├── README.md
├── SCR2e-errata.html
├── SCR2e-errata.pdf
├── examples-R
    ├── SCR2e-Intro.R
    ├── SCR2e-Programming.R
    ├── SCR2e-ResampAppl.R
    ├── SCR2e-chBoot.R
    ├── SCR2e-chDensity.R
    ├── SCR2e-chMCMC.R
    ├── SCR2e-chMCint.R
    ├── SCR2e-chMonteCarlo.R
    ├── SCR2e-chNumerical.R
    ├── SCR2e-chOptim.R
    ├── SCR2e-chPerm.R
    ├── SCR2e-chRandomProcess.R
    ├── SCR2e-chRgen.R
    ├── SCR2e-chVis.R
    ├── SCR2e-examples-functions.R
    └── readme.md
├── examples-Rmd
    ├── FOREARM.DAT
    ├── Lahman.Rmd
    ├── SCR2e-Intro.Rmd
    ├── SCR2e-Programming.Rmd
    ├── SCR2e-ResampAppl.Rmd
    ├── SCR2e-chBoot.Rmd
    ├── SCR2e-chDensity.Rmd
    ├── SCR2e-chMCMC.Rmd
    ├── SCR2e-chMCint.Rmd
    ├── SCR2e-chMonteCarlo.Rmd
    ├── SCR2e-chNumerical.Rmd
    ├── SCR2e-chOptim.Rmd
    ├── SCR2e-chPerm.Rmd
    ├── SCR2e-chRandomProcess.Rmd
    ├── SCR2e-chRgen.Rmd
    ├── SCR2e-chVis.Rmd
    ├── SCR2e-examples-functions.R
    └── readme.md
└── examples-cpp
    ├── printme.cpp
    └── readme.md


/README.md:
--------------------------------------------------------------------------------
 1 | # SCR2e
 2 | Resources for the book "Statistical Computing with R, 2nd ed."
 3 | Maria L. Rizzo 2019, Chapman & Hall.
 4 | Resources and files for "Statistical Computing with R 2nd edition" by Maria Rizzo, Chapman & Hall/CRC (2019)
 5 | 
 6 | URL: https://www.crcpress.com/Statistical-Computing-with-R-Second-Edition/Rizzo/p/book/9781466553323
 7 | 
 8 | Bibtex entry:
 9 | 
10 | @book{SCR2,
11 | title = "Statistical Computing with {R}",
12 | edition = "second",
13 | author = "Maria L. Rizzo",
14 | year = "2019",
15 | publisher = "Chapman & Hall/CRC",
16 | series = "The {R} Series",
17 | isbn = "9781466553323",
18 | }
19 | 


--------------------------------------------------------------------------------
/SCR2e-errata.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mariarizzo/SCR2e/5c72bc153c6b931f88f5703e3c088bd0b263eeca/SCR2e-errata.pdf


--------------------------------------------------------------------------------
/examples-R/SCR2e-Intro.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 1                      ###
  9 | ###       Introduction                              ###
 10 | #######################################################
 11 | 
 12 | 
 13 | 
 14 | ### Example 1.1 
 15 | 
 16 |     sumdice <- function(n) {
 17 |         k <- sample(1:6, size=n, replace=TRUE)
 18 |         return(sum(k))
 19 |     }
 20 | 
 21 |     sumdice(2)
 22 | 
 23 |     #to store the result rather than print it
 24 |     a <- sumdice(100)
 25 | 
 26 |     #we expect the mean for 100 dice to be close to 3.5
 27 |     a / 100
 28 | 
 29 |     sumdice <- function(n)
 30 |         sum(sample(1:6, size=n, replace=TRUE))
 31 | 
 32 |     sumdice <- function(n, sides = 6) {
 33 |         if (sides < 1) return (0)
 34 |         k <- sample(1:sides, size=n, replace=TRUE)
 35 |         return(sum(k))
 36 |     }
 37 | 
 38 |     sumdice(5)      #default 6 sides
 39 |     sumdice(n=5, sides=4)  #4 sides
 40 | 
 41 | ### Example 1.2 (iris data)
 42 |     
 43 |   names(iris)
 44 |   table(iris$Species)
 45 |   w <- iris[[2]]   #Sepal.Width
 46 |   mean(w)
 47 | 
 48 |   attach(iris)
 49 |   summary(Petal.Length[51:100]) #versicolor petal length
 50 | 
 51 |   with(iris, summary(Petal.Length[51:100]))
 52 | 
 53 |   out <- with(iris, summary(Petal.Length[51:100]))
 54 | 
 55 |   by(iris[,1:4], Species, colMeans)
 56 |   detach(iris)
 57 | 
 58 | ### Example 1.3 (Arrays)
 59 | 
 60 |     x <- 1:24                      # vector
 61 |     dim(x) <- length(x)            # 1 dimensional array
 62 |     matrix(1:24, nrow=4, ncol=6)   # 4 by 6 matrix
 63 |     x <- array(1:24, c(3, 4, 2))   # 3 by 4 by 2 array
 64 | 
 65 | ### Example 1.4 (Matrices)
 66 | 
 67 |     A <- matrix(0, nrow=2, ncol=2)
 68 |     A <- matrix(c(0, 0, 0, 0), nrow=2, ncol=2)
 69 |     A <- matrix(0, 2, 2)
 70 | 
 71 |     A <- matrix(1:8, nrow=2, ncol=4)
 72 | 
 73 | ### Example 1.5 (Iris data cont.)
 74 |     
 75 |    x <- as.matrix(iris[,1:4]) #all rows of columns 1 to 4
 76 | 
 77 |    mean(x[,2])         #mean of sepal width, all species
 78 |    mean(x[51:100,3])   #mean of petal length, versicolor
 79 | 
 80 |    y <- array(x, dim=c(50, 3, 4))
 81 |    mean(y[,,2])  #mean of sepal width, all species
 82 |    mean(y[,2,3]) #mean of petal length, versicolor
 83 | 
 84 |    y <- array(c(x[1:50,], x[51:100,], x[101:150,]),
 85 |               dim=c(50, 4, 3))
 86 |    mean(y[,2,])  #mean of sepal width, all species
 87 |    mean(y[,3,2]) #mean of petal length, versicolor
 88 | 
 89 | ### Example 1.6 (Run length encoding)
 90 |    
 91 |  
 92 |     n <- 1000
 93 |     x <- rbinom(n, size = 1, prob = .5)
 94 |     table(x)
 95 |     head(x, 30)
 96 | 
 97 |     r <- rle(x)
 98 |     str(r)
 99 | 
100 |     head(r$lengths)
101 |     head(r[[1]])
102 | 
103 |     max(r$lengths)
104 |     log2(length(x))
105 | 
106 | ### Example 1.7 (Named list)
107 | 
108 |     w <- wilcox.test(rnorm(10), rnorm(10, 2))
109 |     w    #print the summary
110 | 
111 |     w$statistic       #stored in object w
112 |     w$p.value
113 |     unlist(w)
114 |     unclass(w)
115 | 
116 | ### Example 1.8 (A list of names)
117 | 
118 |     a <- matrix(runif(8), 4, 2)   #a 4x2 matrix
119 |     dimnames(a) <- list(NULL, c("x", "y"))
120 | 
121 |     # if we want row names
122 |     dimnames(a) <- list(letters[1:4], c("x", "y"))
123 |     a
124 | 
125 |     # another way to assign row names
126 |     row.names(a) <- list("NE", "NW", "SW", "SE")
127 |     a
128 | 
129 | ### Example 1.9 (Parallel boxplots)
130 | 
131 |     boxplot(iris$Sepal.Length ~ iris$Species)
132 |  
133 |     boxplot(iris$Sepal.Length ~ iris$Species,
134 |             ylab = "Sepal Length", boxwex = .4)
135 | ### Example 1.10 (Plotting characters and colors)
136 | 
137 |     plot(0:25, rep(1, 26), pch = 0:25)
138 |     text(0:25, 0.9, 0:25)
139 | 
140 | 
141 | ### Example 1.11 (Barplot for run lengths)
142 | 
143 |   barplot(table(r$lengths))  #R graphics version
144 | 
145 |   ## ggplot version
146 |   library(ggplot2)
147 |   df <- data.frame(lengths = factor(r$lengths))
148 |   ggplot(df, aes(lengths)) + geom_bar()
149 | 
150 | ### Example 1.12 (Scatterplots)  
151 | 
152 |   ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width)) +
153 |         geom_point()
154 | 
155 |   ggplot(iris, aes(Sepal.Length, Sepal.Width,
156 |          color = Species, shape = Species)) + geom_point(size = 2)
157 | 
158 | ### Example 1.13 (ggplot: parallel boxplots and violin plots)
159 | 
160 |   ggplot(iris, aes(Species, Sepal.Length)) + geom_boxplot()
161 |   ggplot(iris, aes(Species, Sepal.Length)) + geom_violin()
162 | 
163 |   ggplot(iris, aes(Species, Sepal.Length)) +
164 |      geom_boxplot() + coord_flip()
165 |   ggplot(iris, aes(Species, Sepal.Length)) +
166 |      geom_violin() + coord_flip()
167 | 
168 | ### Example 1.14 (MPG by engine displacement)
169 | 
170 |   ggplot(mpg, aes(displ, hwy)) +
171 |          geom_point() +
172 |          facet_wrap(~ class)
173 | 
174 | ### Example 1.15 (Import data from a local text file)
175 |   
176 |   # copy "FOREARM.DAT" into working directory 
177 |   forearm <- scan(file = "FOREARM.DAT") #a vector
178 |   
179 |   # use pathname if file is not in working directory
180 |   # forearm <- scan(file = "./DATASETS/FOREARM.DAT") #a vector
181 |   
182 |   head(forearm)
183 | 
184 | ### Example 1.16 (Importing data from a web page)
185 | 
186 | fileloc <- "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
187 | 
188 | df <- read.table(file = fileloc, na.strings = "?", as.is = TRUE)
189 | str(df)
190 | names(df) <- c("mpg", "cyl", "displ", "hp", "wt", "accel",
191 |     "year", "origin", "name")
192 | summary(df)
193 | 
194 | ### Example 1.17 (Importing/exporting .csv files)
195 | 
196 |     #create a data frame
197 |     dates <- c("3/27/1995", "4/3/1995",
198 |                "4/10/1995", "4/18/1995")
199 |     prices <- c(11.1, 7.9, 1.9, 7.3)
200 |     d <- data.frame(dates=dates, prices=prices)
201 | 
202 |     #create the .csv file
203 |     filename <- "temp.csv"
204 |     write.table(d, file = filename, sep = ",",
205 |                 row.names = FALSE)
206 | 
207 |     #read the .csv file
208 |     read.table(file = filename, sep = ",", header = TRUE)
209 |     read.csv(file = filename) #same thing
210 | 
211 |     


--------------------------------------------------------------------------------
/examples-R/SCR2e-Programming.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 15                     ###
  9 | ###       Programming Topics                        ###
 10 | #######################################################
 11 | 
 12 | # packages to install:
 13 | # energy, ggplot2, profvis, pryr, microbenchmark, mvtnorm, rbenchmark
 14 | # for the last section:  dplyr, Lahman, Rcpp 
 15 | 
 16 | ### Example 15.1 (Benchmarking methods to generate a sequence)
 17 | 
 18 |     s1 <- 1:10
 19 |     s2 <- seq(1, 10, 1)
 20 |     s3 <- seq.int(1, 10, 1)
 21 |     df <- data.frame(s1=s1, s2=s2, s3=s3)
 22 |     str(df)
 23 | 
 24 |     library(microbenchmark)
 25 |     library(ggplot2)
 26 |     
 27 |     n <- 1000
 28 |     mb <- microbenchmark(
 29 |       seq(1, n, 1),
 30 |       seq.int(1, n, 1),
 31 |       1:n
 32 |     )
 33 |     
 34 |     print(mb)
 35 |     autoplot(mb)  # display a violin plot
 36 | 
 37 | ### Example 15.2 (Benchmarking methods to initialize a vector)
 38 | 
 39 |     n <- 100
 40 |     mb2 <- microbenchmark(
 41 |       numeric = numeric(n) + 1,
 42 |       rep = rep(1, n),
 43 |       seq = seq(from=1, to=1, length=n),
 44 |       ones = matrix(1, nrow=n, ncol=1),
 45 |       as.ones = as.matrix(rep(1, n))
 46 |     )
 47 |     
 48 |     print(mb2)
 49 | 
 50 | ### Example 15.3 (Timings of two multivariate normal generators)
 51 | 
 52 |   library(rbenchmark)
 53 |   library(MASS)
 54 |   library(mvtnorm)
 55 |   n <- 100          #sample size
 56 |   d <- 30           #dimension
 57 |   N <- 2000         #iterations
 58 |   mu <- numeric(d)
 59 | 
 60 |   benchmark(
 61 |       columns = c("test", "replications", "elapsed", "relative"),
 62 |       replications = 2000,
 63 |       cov = {S <- cov(matrix(rnorm(n*d), n, d))},
 64 |       mvrnorm = mvrnorm(n, mu, S),
 65 |       rmvnorm = rmvnorm(n, mu, S)
 66 |   )
 67 | 
 68 | ### Example 15.4 (Profiling with Rprof)
 69 | 
 70 |   x <- rnorm(1000)
 71 |   y <- rnorm(1000)
 72 | 
 73 |   Rprof("pr.out", line.profiling = TRUE)
 74 |   energy::dcor(x, y)
 75 |   Rprof(NULL)
 76 |   summaryRprof("pr.out")
 77 | 
 78 | ### Example 15.5 (profvis interactive visualization)
 79 | 
 80 |   library(profvis)
 81 |   profvis(energy::dcor(x, y))
 82 | 
 83 | ### Example 15.6 (Object size)
 84 | 
 85 |   x <- matrix(rnorm(5000), 1000, 5) #1000 obs in R^5
 86 |   DF <- as.data.frame(x)
 87 |   object.size(x)
 88 |   object.size(DF)
 89 |   pryr::object_size(x)
 90 |   pryr::object_size(DF)
 91 |   pryr::compare_size(x)
 92 | 
 93 |   listTwo <- list(x, x)
 94 |   pryr::compare_size(listTwo)
 95 |  
 96 | ### Example 15.7 (Comparing objects and attributes)
 97 |   
 98 |   str(x)
 99 |   str(DF)
100 |   all.equal(x, DF)
101 |   names(attributes(x))
102 |   names(attributes(DF))
103 |   all.equal(x, DF, check.attributes = FALSE)
104 | 
105 | ### Example 15.8 (Comparing objects for equality)
106 | 
107 |   try(ifelse(all.equal(x, DF), "T", "F"))  # error
108 |   ifelse(isTRUE(all.equal(x, DF)), "T", "F") # correct
109 | 
110 |   x <- 1 - 10e-4
111 |   y <- x + 2
112 |   x == (y - 2)   # equal mathematically but
113 |   isTRUE(all.equal(x, y - 2))  #gives expected result
114 | 
115 |   ## does not necessarily evaluate to TRUE or FALSE
116 |   try(ifelse(all.equal(x, y), "T", "F"))
117 |   ## returns TRUE or FALSE
118 |   ifelse(isTRUE(all.equal(x, y)), "T", "F")
119 | 
120 | ### Example 15.9 (Display R function code)
121 | 
122 |   nclass.scott
123 | 
124 | ### Example 15.10 (RSiteSearch)
125 | 
126 |   if (interactive())
127 |     RSiteSearch("ggcorr")
128 | 
129 | ### Example 15.11 (UseMethod)
130 | 
131 |   body(density)
132 |   args(density.default)
133 |   body(density.default)
134 | 
135 | ### Example 15.12 (Show methods)
136 |   
137 |   methods(t.test)
138 |   getAnywhere(t.test.formula)
139 |   body(stats:::t.test.formula)
140 | 
141 | ### Example 15.13 (Object not found or not an exported object)
142 | 
143 |   try(perc.ci)
144 |   try(boot::perc.ci)
145 |   
146 |   getAnywhere(perc.ci)
147 |   args(boot:::perc.ci)
148 |   body(boot:::perc.ci)
149 |   boot:::perc.ci
150 |   getFromNamespace("perc.ci", "boot")
151 | 
152 | ### Example 15.14 (getS3method)
153 | 
154 |   library(microbenchmark)
155 |   library(ggplot2)
156 |   getAnywhere(autoplot)
157 |   getS3method("autoplot", class = "microbenchmark")
158 |   getAnywhere(autoplot.microbenchmark)
159 | 
160 | ### Example 15.15 (.Primitive or .Internal)
161 | 
162 |   if (interactive())
163 |     pryr::show_c_source(.Primitive(cumsum(x)))
164 | 
165 | ### Example 15.16 (.Call, .External, .C or .Fortran)
166 | 
167 |   #dist  is implemented by a .Call to C_Cdist
168 | 
169 |   body(dist)
170 | 
171 | ### Example 15.17 (A first Rcpp experiment)
172 | 
173 | # In RStudio use the menu
174 | # File > New File > C++ file to display this code
175 | 
176 | ### Example 15.18 (cppFunction)
177 | 
178 |     library(Rcpp)
179 |     
180 |     set.seed(1)
181 |     x <- matrix(rnorm(20), nrow = 5, ncol = 4)
182 |     
183 |     cppFunction('double vecnorm(NumericVector v) {
184 |       // compute the Euclidean norm of vector v
185 |       int d = v.size();
186 |       double s = 0.0;
187 |       for (int i = 0; i < d; i++)
188 |         s += v(i) * v(i);
189 |       return sqrt(s);
190 |     }')
191 |     
192 |     print(vecnorm(x[, 1]))
193 |     print(apply(x, MARGIN = 2, FUN = "vecnorm"))
194 | 
195 | 
196 | ### Example 15.19 (sourceCpp)
197 | 
198 | # Create the C++ source file "printme.cpp" by editing the template
199 | # in RStudio menu: File > New File > C++
200 | 
201 |   library(Rcpp)
202 |   sourceCpp("..//examples-cpp//printme.cpp")
203 |   x <- sample(1:5)
204 |   print_me(x)
205 | 
206 | ### Example 15.20 (Lahman baseball data)
207 | 
208 |   library(Lahman)
209 |   str(Batting)
210 |   # method 1
211 |   S <- subset(Batting, Batting$yearID == 1999,
212 |              select = c("playerID", "AB", "H"))
213 | 
214 |   # method 2 (dplyr)
215 |   library(dplyr)
216 |   Batting %>% filter(yearID == 1999) -> b
217 | 
218 |   # method 1
219 |   AB <- as.vector(by(S$AB, S$playerID, FUN = sum))
220 |   H <- as.vector(by(S$H, S$playerID, FUN = sum))
221 |   S <- data.frame(playerID = unique(S$playerID), 
222 |                   AB = AB, H = H, AVG = round(H / AB, 3),
223 |                   stringsAsFactors = FALSE)
224 |   S400 <- S[S$AB >= 400, ]
225 | 
226 |   # method 2 (dplyr)
227 |   b %>% group_by(playerID) %>%
228 |     summarize(AB = sum(AB), H = sum(H)) -> S
229 |   S %>% mutate(AVG = round(H / AB, 3)) -> S
230 |   S %>% filter(AB >= 400) -> S400
231 | 
232 |   # method 1
233 |   o <- order(S400$AVG, decreasing = TRUE)
234 |   S400 <- S400[o, ]
235 |   top <- S400[1:10, ]
236 | 
237 |   # method 2 (dplyr)
238 |   S400 %>% arrange(desc(AVG)) -> S400
239 |   slice(S400, 1:10) -> top
240 |   top
241 | 
242 |   People %>% select(playerID, nameFirst, nameLast) -> m
243 |   top %>% inner_join(m) %>%
244 |     select(nameFirst, nameLast, AVG)
245 | 
246 | ### Example 15.21 (Comparison with microbenchmark)
247 | 
248 | # The source for this example is in R Markdown format
249 | # Open "Lahman.Rmd" in RStudio and knit to see report
250 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-ResampAppl.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 9                      ###
  9 | ###       Resampling Applications                   ###
 10 | #######################################################
 11 | 
 12 | 
 13 | # packages to install: bootstrap, DAAG, ggplot2
 14 | 
 15 | 
 16 | ### Example 9.1 (Jackknife-after-bootstrap)
 17 | 
 18 | library(boot)
 19 | library(bootstrap)
 20 | set.seed(1111)
 21 | 
 22 | theta.boot <- function(patch, i) {
 23 |   # function to compute the patch ratio statistic
 24 |   y <- patch[i, "y"]
 25 |   z <- patch[i, "z"]
 26 |   mean(y) / mean(z)
 27 | }
 28 | 
 29 | boot.out <- boot(bootstrap::patch,
 30 |                  statistic = theta.boot, R=2000)
 31 | A <- boot.array(boot.out)
 32 | head(A, 3)
 33 | mean(A[, 1] == 0)
 34 | 
 35 | # jackknife-after-bootstrap to est. se(se)
 36 | A <- boot.array(boot.out)
 37 | theta.b <- boot.out$t
 38 | n <- NROW(patch)
 39 | jack.se <- numeric(n)
 40 | 
 41 | for (i in 1:n) {
 42 |   #in i-th replicate omit all samples with x[i]
 43 |   keep <- which(A[, i] == 0)
 44 |   jack.se[i] <- sd(theta.b[keep])
 45 | }
 46 | 
 47 | print(boot.out)  #for se_boot
 48 | se.bar <- mean(jack.se)
 49 | se.se <- sqrt((n-1) * mean((jack.se - se.bar)^2))
 50 | print(paste("Jackknife-after-bootstrap est. se(se)=", se.se))
 51 | 
 52 | 
 53 | ### Example 9.2 (Jackknife-after-bootstrap)
 54 | 
 55 | # initialize
 56 | data(patch, package = "bootstrap")
 57 | y <- patch$y
 58 | z <- patch$z
 59 | dat <- cbind(y, z)
 60 | n <- NROW(dat)
 61 | B <- 2000
 62 | 
 63 | # jackknife-after-bootstrap step 1: run the bootstrap
 64 | theta_boot <- function(dat, ind) {
 65 |   # function to compute the statistic
 66 |   y <- dat[ind, 1]
 67 |   z <- dat[ind, 2]
 68 |   mean(y) / mean(z)
 69 | }
 70 | 
 71 | boot.obj <- boot(dat, statistic = theta_boot, R=2000)
 72 | theta.hat <- boot.obj$t0
 73 | theta.b <- boot.obj$t
 74 | se.boot <- sd(theta.b)
 75 | 
 76 | # jackknife-after-bootstrap to est. se(se)
 77 | sample.freq <- boot.array(boot.obj)
 78 | se.se.reps <- numeric(n)
 79 | N <- 1:n
 80 | 
 81 | for (i in N) {
 82 |   # jackknife-after-bootstrap
 83 |   # omit all bootstrap samples that contain obs i
 84 |   keep <- which(sample.freq[ ,i] == 0)
 85 |   se.se.reps[i] <- sd(theta.b[keep])
 86 | }
 87 | 
 88 | print(boot.obj)
 89 | se.bar <- mean(se.se.reps)
 90 | se.se <- sqrt((n-1) * mean((se.se.reps - se.bar)^2))
 91 | se.se
 92 | 
 93 | ### Example 9.3 (ironslag linear model)
 94 | 
 95 | library(ggplot2)
 96 | library(DAAG)
 97 | L1 <- lm(magnetic ~ chemical, data=ironslag)
 98 | cf3 <- round(L1$coeff, 3)
 99 | cap <- paste("Fit: magnetic =", cf3[1], "+", cf3[2], "chemical")
100 | 
101 | ggplot(data=ironslag, aes(chemical, magnetic)) +
102 | geom_point() + geom_smooth(method="lm") +
103 | ggtitle(cap)
104 | 
105 | plot(L1, which=1:2, ask=FALSE)  #residual plots
106 | 
107 | ### Example 9.4 (mammals data)
108 | 
109 | library(MASS)
110 | cor(log(mammals$body), log(mammals$brain))
111 | summary(mammals)
112 | 
113 | y <- log(mammals$brain)
114 | x <- log(mammals$body)
115 | L <- lm(y ~ x)
116 | L
117 | 
118 | cap <- paste("Fit: log(brain) =", round(L$coeff[1],3),
119 |              "+", round(L$coeff[2],3), "log(body)")
120 | ggplot(data=mammals, aes(x, y)) +
121 | geom_point() + geom_smooth(method="lm") +
122 | labs(x = "log(body)", y = "log(brain)", title = cap)
123 | 
124 | summary(L)$r.squared
125 | 
126 | ### Example 9.5 (ironslag data, resampling cases)
127 | 
128 | x <- ironslag$chemical
129 | y <- ironslag$magnetic
130 | m <- 2000
131 | n <- NROW(x)
132 | L1 <- lm(y ~ x)  #estimate the model
133 | b0 <- L1$coeff[1]; b1 <- L1$coeff[2]
134 | 
135 | ## run bootstrap of cases
136 | out <- replicate(m, expr={
137 | i <- sample(1:n, replace=TRUE, size=n)
138 | xstar <- x[i]
139 | ystar <- y[i]
140 | Lb <- lm(ystar ~ xstar)
141 | s <- summary(Lb)$sigma
142 | c(Lb$coeff[1], slope=Lb$coeff[2], s=s)
143 | })
144 | 
145 | bootCases <- t(out)
146 | meanCases <- colMeans(bootCases)
147 | sdCases <- apply(bootCases, 2, "sd")
148 | meanCases
149 | sdCases
150 | 
151 | biasInt <- mean(bootCases[,1] - b0)  #bias for intercept
152 | biasSlope <- mean(bootCases[,2] - b1)  #bias for slope
153 | 
154 | rbind(estimate=c(b0, b1), bias=c(biasInt, biasSlope),
155 |       se=sdCases[1:2], cv=c(biasInt, cv=biasSlope)/sdCases[1:2])
156 | 
157 | ### Example 9.6 (Resampling cases using the boot function)
158 | 
159 | # set.seed(1104)
160 | library(boot)
161 | m <- 2000
162 | stats <- function(dat, i) {
163 | x <- dat$chemical[i]
164 | y <- dat$magnetic[i]
165 | Lb <- lm(y ~ x)
166 | s <- summary(Lb)$sigma
167 | c(Lb$coeff[1], slope=Lb$coeff[2], s=s)
168 | }
169 | 
170 | boot.out <- boot(ironslag, statistic=stats, R=2000)
171 | boot.out
172 | boot.out$t0
173 | 
174 | sd(boot.out$t[,2])
175 | boottbl <- broom::tidy(boot.out)
176 | boottbl$std.error[2]
177 | 
178 | MASS::truehist(boot.out$t[ ,2], main="", xlab="slopes")
179 | abline(v = boot.out$t0[2], lwd=2)
180 | 
181 | boot.ci(boot.out, index=2, type=c("norm","perc","basic","bca"))
182 | 
183 | ### Example 9.7 (Resampling errors: mammals data)
184 | 
185 | x <- log(mammals$brain)
186 | y <- log(mammals$body)
187 | L <- lm(y ~ x)
188 | 
189 | m.resid <- rstandard(L, sd = 1)
190 | r <- m.resid - mean(m.resid)
191 | m <- 1000; n <- NROW(x)
192 | estsErr <- replicate(m, expr={
193 |   estar <- sample(r, replace=TRUE, size=n)
194 |   ystar <- L$fitted.values + estar
195 |   Lb <- lm(ystar ~ x)
196 |   s <- summary(Lb)$sigma
197 |   c(b0=Lb$coeff[1], b1=Lb$coeff[2], s=s)
198 | })
199 | ests <- t(estsErr)
200 | summary(ests)
201 | 
202 | ### Example 9.8 (Resampling errors, continued)
203 | sd(ests[,2])
204 | s <- summary(L)$sigma
205 | SSx <- (n - 1) * var(x)
206 | se.beta1 <- sqrt(s^2 / SSx)
207 | se.beta1
208 | s * sqrt(1/n + mean(x)^2 / SSx)
209 | sd(ests[,1])
210 | 
211 | betas <- summary(L)$coeff
212 | betas
213 | betas[, "Std. Error"]
214 | 
215 | broom::tidy(summary(L))
216 | broom::tidy(summary(L))$std.error
217 | 
218 | ### Example 9.9 (Model based resampling with the boot function)
219 | regstats <- function(dat, i) {
220 |   #dat is a data frame (r, x, yhat)
221 |   #r are the modified centered residuals, yhat are the fits
222 |   ystar <- dat$yhat + dat$r[i]
223 |   xstar <- dat$x
224 |   Lnew <- lm(ystar ~ xstar)
225 |   Lnew$coefficients
226 | }
227 | 
228 | y <- log(mammals$brain)
229 | x <- log(mammals$body)
230 | L <- lm(y ~ x)
231 | r <- rstandard(L, sd=1)
232 | r <- r - mean(r)
233 | df <- data.frame(r=r, x=x, yhat=L$fitted)
234 | head(df)
235 | boot.obj <- boot(data=df, statistic=regstats, R=2000)
236 | broom::tidy(boot.obj)
237 | 
238 | 
239 | ### Example 9.10 (Empirical influence values for the patch ratio statistic)
240 | 
241 | library(boot)
242 | library(bootstrap)
243 | theta_boot <- function(dat, ind) {
244 |   # function to compute the patch ratio statistic
245 |   mean(dat[ind, ]$y) / mean(dat[ind, ]$z)
246 | }
247 | boot.out <- boot(patch, theta_boot, R = 2000)
248 | infl <- empinf(boot.out, type = "jack")
249 | theta.hat <- boot.out$t0
250 | jack <- theta.hat - infl / (nrow(patch) - 1)
251 | rbind(infl, jack)
252 | 
253 | ### Example 9.11 (Jackknife-after-bootstrap plot)
254 | 
255 | jack.after.boot(boot.out, useJ=TRUE, stinf=FALSE)
256 | 
257 | n <- NROW(patch) 
258 | J <- numeric(n)
259 | b.freq <- boot.array(boot.out)
260 | theta.b <- boot.out$t
261 | 
262 | for (i in 1:n) {
263 |   keep <- which(b.freq[ ,i] == 0)
264 |   J[i] <- mean(theta.b[keep])
265 | }
266 | 
267 | # the jackknife influence values
268 | (n - 1) * (mean(J) - J)
269 | 
270 | jack.after.boot(boot.out, useJ=TRUE, stinf=TRUE)
271 | 
272 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chBoot.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       March 6, 2019                             ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 8                      ###
  9 | ###       Bootstrap and Jackknife                   ###
 10 | #######################################################
 11 | 
 12 | 
 13 | # packages to install: bootstrap, DAAG
 14 | 
 15 |     
 16 | ### Example 8.2 (Bootstrap estimate of standard error)
 17 | 
 18 |     library(bootstrap)    #for the law data
 19 |     print(cor(law$LSAT, law$GPA))
 20 |     print(cor(law82$LSAT, law82$GPA))
 21 | 
 22 |     #set up the bootstrap
 23 |     B <- 200            #number of replicates
 24 |     n <- nrow(law)      #sample size
 25 |     R <- numeric(B)     #storage for replicates
 26 | 
 27 |     #bootstrap estimate of standard error of R
 28 |     for (b in 1:B) {
 29 |         #randomly select the indices
 30 |         i <- sample(1:n, size = n, replace = TRUE)
 31 |         LSAT <- law$LSAT[i]       #i is a vector of indices
 32 |         GPA <- law$GPA[i]
 33 |         R[b] <- cor(LSAT, GPA)
 34 |     }
 35 |     #output
 36 |     print(se.R <- sd(R))
 37 |     hist(R, prob = TRUE)
 38 | 
 39 | 
 40 | ### Example 8.3 (Bootstrap estimate of standard error: boot function)
 41 | 
 42 |     r <- function(x, i) {
 43 |         #want correlation of columns 1 and 2
 44 |         cor(x[i,1], x[i,2])
 45 |     }
 46 | 
 47 |     library(boot)       #for boot function
 48 |     obj <- boot(data = law, statistic = r, R = 2000)
 49 |     obj
 50 |     y <- obj$t
 51 |     sd(y)
 52 | 
 53 | 
 54 | ### Example 8.4 (Bootstrap estimate of bias)
 55 | 
 56 |     #sample estimate for n=15
 57 |     theta.hat <- cor(law$LSAT, law$GPA)
 58 | 
 59 |     #bootstrap estimate of bias
 60 |     B <- 2000   #larger for estimating bias
 61 |     n <- nrow(law)
 62 |     theta.b <- numeric(B)
 63 | 
 64 |     for (b in 1:B) {
 65 |         i <- sample(1:n, size = n, replace = TRUE)
 66 |         LSAT <- law$LSAT[i]
 67 |         GPA <- law$GPA[i]
 68 |         theta.b[b] <- cor(LSAT, GPA)
 69 |     }
 70 |     bias <- mean(theta.b - theta.hat)
 71 |     bias
 72 | 
 73 |     
 74 | ### Example 8.5 (Bootstrap estimate of bias of a ratio estimate)
 75 | 
 76 | 
 77 |     data(patch, package = "bootstrap")
 78 |     patch
 79 | 
 80 |     n <- nrow(patch)  #in bootstrap package
 81 |     B <- 2000
 82 |     theta.b <- numeric(B)
 83 |     theta.hat <- mean(patch$y) / mean(patch$z)
 84 | 
 85 |     #bootstrap
 86 |     for (b in 1:B) {
 87 |         i <- sample(1:n, size = n, replace = TRUE)
 88 |         y <- patch$y[i]
 89 |         z <- patch$z[i]
 90 |         theta.b[b] <- mean(y) / mean(z)
 91 |         }
 92 |     bias <- mean(theta.b) - theta.hat
 93 |     se <- sd(theta.b)
 94 |     print(list(est=theta.hat, bias = bias,
 95 |                se = se, cv = bias/se))
 96 | 
 97 | 
 98 | ### Example 8.6 (Jackknife estimate of bias)
 99 | 
100 |     data(patch, package = "bootstrap")
101 |     n <- nrow(patch)
102 |     y <- patch$y
103 |     z <- patch$z
104 |     theta.hat <- mean(y) / mean(z)
105 |     print (theta.hat)
106 | 
107 |     #compute the jackknife replicates, leave-one-out estimates
108 |     theta.jack <- numeric(n)
109 |     for (i in 1:n)
110 |         theta.jack[i] <- mean(y[-i]) / mean(z[-i])
111 |     bias <- (n - 1) * (mean(theta.jack) - theta.hat)
112 | 
113 |     print(bias)  #jackknife estimate of bias
114 |     
115 |     
116 | ### Example 8.7 (Jackknife estimate of standard error)
117 | 
118 |     se <- sqrt((n-1) *
119 |         mean((theta.jack - mean(theta.jack))^2))
120 |     print(se)
121 |     
122 | 
123 | ### Example 8.8 (Failure of jackknife)
124 | 
125 |     set.seed(123) #for the specific example given
126 |     #change the seed to see other examples
127 | 
128 |     n <- 10
129 |     x <- sample(1:100, size = n)
130 | 
131 |     #jackknife estimate of se
132 |     M <- numeric(n)
133 |     for (i in 1:n) {        #leave one out
134 |         y <- x[-i]
135 |         M[i] <- median(y)
136 |     }
137 |     Mbar <- mean(M)
138 |     print(sqrt((n-1)/n * sum((M - Mbar)^2)))
139 | 
140 |     #bootstrap estimate of se
141 |     Mb <- replicate(1000, expr = {
142 |             y <- sample(x, size = n, replace = TRUE)
143 |             median(y) })
144 |     print(sd(Mb)) 
145 |     print(x)
146 |     print(M)
147 |     print(Mb)
148 | 
149 | 
150 | ### Example 8.9 (Bootstrap confidence intervals for patch ratio statistic)
151 | 
152 |     library(boot)       #for boot and boot.ci
153 |     data(patch, package = "bootstrap")
154 | 
155 |     theta.boot <- function(dat, ind) {
156 |         #function to compute the statistic
157 |         y <- dat[ind, 1]
158 |         z <- dat[ind, 2]
159 |         mean(y) / mean(z)
160 |     }
161 | 
162 |     y <- patch$y
163 |     z <- patch$z
164 |     dat <- cbind(y, z)
165 |     boot.obj <- boot(dat, statistic = theta.boot, R = 2000)
166 | 
167 |     print(boot.obj)
168 |     print(boot.ci(boot.obj,
169 |                   type = c("basic", "norm", "perc")))
170 | 
171 | 
172 |     #calculations for bootstrap confidence intervals
173 |     alpha <- c(.025, .975)
174 | 
175 |     #normal
176 |     print(boot.obj$t0 + qnorm(alpha) * sd(boot.obj$t))
177 | 
178 |     #basic
179 |     print(2*boot.obj$t0 - 
180 |         quantile(boot.obj$t, rev(alpha), type=1))
181 | 
182 |     #percentile
183 |     print(quantile(boot.obj$t, alpha, type=6))
184 | 
185 | 
186 | ### Example 8.10 (Bootstrap confidence intervals for the correlation statistic)
187 | 
188 |     library(boot)
189 |     data(law, package = "bootstrap")
190 |     boot.obj <- boot(law, R = 2000,
191 |              statistic = function(x, i){cor(x[i,1], x[i,2])})
192 |     print(boot.ci(boot.obj, type=c("basic","norm","perc")))
193 | 
194 | 
195 | ### Example 8.11 (Bootstrap t confidence interval)
196 | 
197 |     boot.t.ci <-
198 |     function(x, B = 500, R = 100, level = .95, statistic){
199 |         #compute the bootstrap t CI
200 |         x <- as.matrix(x);  n <- nrow(x)
201 |         stat <- numeric(B); se <- numeric(B)
202 | 
203 |         boot.se <- function(x, R, f) {
204 |             #local function to compute the bootstrap
205 |             #estimate of standard error for statistic f(x)
206 |             x <- as.matrix(x); m <- nrow(x)
207 |             th <- replicate(R, expr = {
208 |                 i <- sample(1:m, size = m, replace = TRUE)
209 |                 f(x[i, ])
210 |                 })
211 |             return(sd(th))
212 |         }
213 | 
214 |         for (b in 1:B) {
215 |             j <- sample(1:n, size = n, replace = TRUE)
216 |             y <- x[j, ]
217 |             stat[b] <- statistic(y)
218 |             se[b] <- boot.se(y, R = R, f = statistic)
219 |         }
220 |         stat0 <- statistic(x)
221 |         t.stats <- (stat - stat0) / se
222 |         se0 <- sd(stat)
223 |         alpha <- 1 - level
224 |         Qt <- quantile(t.stats, c(alpha/2, 1-alpha/2), type = 1)
225 |         names(Qt) <- rev(names(Qt))
226 |         CI <- rev(stat0 - Qt * se0)
227 |     }
228 | 
229 | 
230 | ### Example 8.12 (Bootstrap t confidence interval for patch ratio statistic)
231 | 
232 |     #boot package and patch data were loaded in Example 8.10
233 |     #library(boot)       #for boot and boot.ci
234 |     #data(patch, package = "bootstrap")
235 |     
236 |     dat <- cbind(patch$y, patch$z)
237 |     stat <- function(dat) {
238 |         mean(dat[, 1]) / mean(dat[, 2]) }
239 |     ci <- boot.t.ci(dat, statistic = stat, B=2000, R=200)
240 |     print(ci)
241 | 
242 | 
243 | ### Example 8.13 (BCa bootstrap confidence interval)
244 | 
245 |     boot.BCa <-
246 |     function(x, th0, th, stat, conf = .95) {
247 |         # bootstrap with BCa bootstrap confidence interval
248 |         # th0 is the observed statistic
249 |         # th is the vector of bootstrap replicates
250 |         # stat is the function to compute the statistic
251 | 
252 |         x <- as.matrix(x)
253 |         n <- nrow(x) #observations in rows
254 |         N <- 1:n
255 |         alpha <- (1 + c(-conf, conf))/2
256 |         zalpha <- qnorm(alpha)
257 | 
258 |         # the bias correction factor
259 |         z0 <- qnorm(sum(th < th0) / length(th))
260 | 
261 |         # the acceleration factor (jackknife est.)
262 |         th.jack <- numeric(n)
263 |         for (i in 1:n) {
264 |             J <- N[1:(n-1)]
265 |             th.jack[i] <- stat(x[-i, ], J)
266 |         }
267 |         L <- mean(th.jack) - th.jack
268 |         a <- sum(L^3)/(6 * sum(L^2)^1.5)
269 | 
270 |         # BCa conf. limits
271 |         adj.alpha <- pnorm(z0 + (z0+zalpha)/(1-a*(z0+zalpha)))
272 |         limits <- quantile(th, adj.alpha, type=6)
273 |         return(list("est"=th0, "BCa"=limits))
274 |     }
275 |     
276 | 
277 | ### Example 8.14 (BCa bootstrap confidence interval)
278 | 
279 |     #boot package and patch data were loaded in Example 8.10
280 |     #library(boot)       #for boot and boot.ci
281 |     #data(patch, package = "bootstrap")
282 | 
283 |     n <- nrow(patch)
284 |     B <- 2000
285 |     y <- patch$y
286 |     z <- patch$z
287 |     x <- cbind(y, z)
288 |     theta.b <- numeric(B)
289 |     theta.hat <- mean(y) / mean(z)
290 | 
291 |     #bootstrap
292 |     for (b in 1:B) {
293 |         i <- sample(1:n, size = n, replace = TRUE)
294 |         y <- patch$y[i]
295 |         z <- patch$z[i]
296 |         theta.b[b] <- mean(y) / mean(z)
297 |         }
298 |     #compute the BCa interval
299 |     stat <- function(dat, index) {
300 |         mean(dat[index, 1]) / mean(dat[index, 2])  }
301 | 
302 |     boot.BCa(x, th0 = theta.hat, th = theta.b, stat = stat)
303 |     
304 |     
305 | 
306 | ### Example 8.15 (BCa bootstrap confidence interval using boot.ci)
307 | 
308 |     #using x from Example 8.15
309 |     boot.obj <- boot(x, statistic = stat, R=2000)
310 |     boot.ci(boot.obj, type=c("perc", "bca"))
311 | 
312 | ### Example 8.16 (Model selection)
313 | 
314 |     #to prompt for next graph, uncomment line below
315 |     #par(ask = TRUE)   
316 |     
317 |     library(DAAG); attach(ironslag)
318 |     a <- seq(10, 40, .1)     #sequence for plotting fits
319 | 
320 |     L1 <- lm(magnetic ~ chemical)
321 |     plot(chemical, magnetic, main="Linear", pch=16)
322 |     yhat1 <- L1$coef[1] + L1$coef[2] * a
323 |     lines(a, yhat1, lwd=2)
324 | 
325 |     L2 <- lm(magnetic ~ chemical + I(chemical^2))
326 |     plot(chemical, magnetic, main="Quadratic", pch=16)
327 |     yhat2 <- L2$coef[1] + L2$coef[2] * a + L2$coef[3] * a^2
328 |     lines(a, yhat2, lwd=2)
329 | 
330 |     L3 <- lm(log(magnetic) ~ chemical)
331 |     plot(chemical, magnetic, main="Exponential", pch=16)
332 |     logyhat3 <- L3$coef[1] + L3$coef[2] * a
333 |     yhat3 <- exp(logyhat3)
334 |     lines(a, yhat3, lwd=2)
335 | 
336 |     L4 <- lm(log(magnetic) ~ log(chemical))
337 |     plot(log(chemical), log(magnetic), main="Log-Log", pch=16)
338 |     logyhat4 <- L4$coef[1] + L4$coef[2] * log(a)
339 |     lines(log(a), logyhat4, lwd=2)
340 | 
341 | ### Example 8.17 (Model selection: Cross validation)
342 | 
343 |     # Example 8.16, cont.
344 |     n <- length(magnetic)   #in DAAG ironslag
345 |     e1 <- e2 <- e3 <- e4 <- numeric(n)
346 | 
347 |     # for n-fold cross validation
348 |     # fit models on leave-one-out samples
349 |     for (k in 1:n) {
350 |         y <- magnetic[-k]
351 |         x <- chemical[-k]
352 | 
353 |         J1 <- lm(y ~ x)
354 |         yhat1 <- J1$coef[1] + J1$coef[2] * chemical[k]
355 |         e1[k] <- magnetic[k] - yhat1
356 | 
357 |         J2 <- lm(y ~ x + I(x^2))
358 |         yhat2 <- J2$coef[1] + J2$coef[2] * chemical[k] +
359 |                 J2$coef[3] * chemical[k]^2
360 |         e2[k] <- magnetic[k] - yhat2
361 | 
362 |         J3 <- lm(log(y) ~ x)
363 |         logyhat3 <- J3$coef[1] + J3$coef[2] * chemical[k]
364 |         yhat3 <- exp(logyhat3)
365 |         e3[k] <- magnetic[k] - yhat3
366 | 
367 |         J4 <- lm(log(y) ~ log(x))
368 |         logyhat4 <- J4$coef[1] + J4$coef[2] * log(chemical[k])
369 |         yhat4 <- exp(logyhat4)
370 |         e4[k] <- magnetic[k] - yhat4
371 |     }
372 | 
373 | 
374 |     c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2))
375 | 
376 |     #selected model, fitted in Example 8.16
377 |     L2
378 | 
379 |     par(mfrow = c(2, 2))    #layout for graphs
380 |     plot(L2$fit, L2$res)    #residuals vs fitted values
381 |     abline(0, 0)            #reference line
382 |     qqnorm(L2$res)          #normal probability plot
383 |     qqline(L2$res)          #reference line
384 |     par(mfrow = c(1, 1))    #restore display
385 |     
386 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chMCint.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###    Statistical Computing with R  2e             ###
  3 | ###    Maria L. Rizzo                               ###
  4 | ###    Chapman & Hall/CRC The R Series              ###
  5 | ###    ISBN 9781466553323 - CAT# K15269             ###
  6 | ###    January 2019                                 ###
  7 | ###                                                 ###
  8 | ###  R code for Chapter 6                           ###
  9 | ###  Monte Carlo Integration and Variance Reduction ###
 10 | #######################################################
 11 | 
 12 | 
 13 | ### Example 6.1 (Simple Monte Carlo integration)
 14 | 
 15 |     m <- 10000
 16 |     x <- runif(m)
 17 |     theta.hat <- mean(exp(-x))
 18 |     print(theta.hat)
 19 |     print(1 - exp(-1))
 20 | 
 21 | 
 22 | ### Example 6.2 (Simple Monte Carlo integration, cont.)
 23 | 
 24 |     m <- 10000
 25 |     x <- runif(m, min=2, max=4)
 26 |     theta.hat <- mean(exp(-x)) * 2
 27 |     print(theta.hat)
 28 |     print(exp(-2) - exp(-4))
 29 | 
 30 | 
 31 | ### Example 6.3 (Monte Carlo integration, unbounded interval)
 32 | 
 33 |     x <- seq(.1, 2.5, length = 10)
 34 |     m <- 10000
 35 |     u <- runif(m)
 36 |     cdf <- numeric(length(x))
 37 |     for (i in 1:length(x)) {
 38 |         g <- x[i] * exp(-(u * x[i])^2 / 2)
 39 |         cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
 40 |     }
 41 | 
 42 |     Phi <- pnorm(x)
 43 |     print(round(rbind(x, cdf, Phi), 3))
 44 | 
 45 | 
 46 | ### Example 6.4 (Example 6.3, cont.)
 47 | 
 48 |     x <- seq(.1, 2.5, length = 10)
 49 |     m <- 10000
 50 |     z <- rnorm(m)
 51 |     dim(x) <- length(x)
 52 |     p <- apply(x, MARGIN = 1,
 53 |              FUN = function(x, z) {mean(z < x)}, z = z)
 54 | 
 55 |     Phi <- pnorm(x)
 56 |     print(round(rbind(x, p, Phi), 3))
 57 | 
 58 | 
 59 | ### Example 6.5 (Error bounds for MC integration)
 60 | 
 61 |     x <- 2
 62 |     m <- 10000
 63 |     z <- rnorm(m)
 64 |     g <- (z < x)  #the indicator function
 65 |     v <- mean((g - mean(g))^2) / m
 66 |     cdf <- mean(g)
 67 |     c(cdf, v)
 68 |     c(cdf - 1.96 * sqrt(v), cdf + 1.96 * sqrt(v))
 69 | 
 70 | 
 71 | ### Example 6.6 (Antithetic variables)
 72 | 
 73 |     MC.Phi <- function(x, R = 10000, antithetic = TRUE) {
 74 |         u <- runif(R/2)
 75 |         if (!antithetic) v <- runif(R/2) else
 76 |             v <- 1 - u
 77 |         u <- c(u, v)
 78 |         cdf <- numeric(length(x))
 79 |         for (i in 1:length(x)) {
 80 |             g <- x[i] * exp(-(u * x[i])^2 / 2)
 81 |             cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
 82 |         }
 83 |         cdf
 84 |     }
 85 | 
 86 | 
 87 |     x <- seq(.1, 2.5, length=5)
 88 |     Phi <- pnorm(x)
 89 |     set.seed(123)
 90 |     MC1 <- MC.Phi(x, anti = FALSE)
 91 |     set.seed(123)
 92 |     MC2 <- MC.Phi(x)
 93 |     print(round(rbind(x, MC1, MC2, Phi), 5))
 94 | 
 95 | 
 96 |     m <- 1000
 97 |     MC1 <- MC2 <- numeric(m)
 98 |     x <- 1.95
 99 |     for (i in 1:m) {
100 |         MC1[i] <- MC.Phi(x, R = 1000, anti = FALSE)
101 |         MC2[i] <- MC.Phi(x, R = 1000)
102 |     }
103 | 
104 |     print(sd(MC1))
105 |     print(sd(MC2))
106 |     print((var(MC1) - var(MC2))/var(MC1))
107 | 
108 | 
109 | ### Example 6.7 (Control variate)
110 | 
111 |     m <- 10000
112 |     a <- - 12 + 6 * (exp(1) - 1)
113 |     U <- runif(m)
114 |     T1 <- exp(U)                  #simple MC
115 |     T2 <- exp(U) + a * (U - 1/2)  #controlled
116 | 
117 |     mean(T1)
118 |     mean(T2)
119 |     (var(T1) - var(T2)) / var(T1)
120 | 
121 | 
122 | ### Example 6.8 (MC integration using control variates)
123 | 
124 |     f <- function(u)
125 |         exp(-.5)/(1+u^2)
126 | 
127 |     g <- function(u)
128 |         exp(-u)/(1+u^2)
129 | 
130 |     set.seed(510) #needed later
131 |     u <- runif(10000)
132 |     B <- f(u)
133 |     A <- g(u)
134 | 
135 |     cor(A, B)
136 |     a <- -cov(A,B) / var(B)    #est of c*
137 |     a
138 | 
139 |     m <- 100000
140 |     u <- runif(m)
141 |     T1 <- g(u)
142 |     T2 <- T1 + a * (f(u) - exp(-.5)*pi/4)
143 | 
144 |     c(mean(T1), mean(T2))
145 |     c(var(T1), var(T2))
146 |     (var(T1) - var(T2)) / var(T1)
147 | 
148 | 
149 | ### Example 6.9 (Control variate and regression)
150 | 
151 |     
152 |     set.seed(510)
153 |     mu <- exp(-.5)*pi/4
154 |     u <- runif(10000)
155 |     f <- exp(-.5)/(1+u^2)
156 |     g <- exp(-u)/(1+u^2)
157 |     L <- lm(g ~ f)
158 |     L
159 |     c.star <- - L$coeff[2]
160 |     c.star 
161 |     
162 |     theta.hat <- sum(L$coeff * c(1, mu))  #pred. value at mu
163 |     theta.hat
164 |     summary(L)$sigma^2
165 |     summary(L)$r.squared
166 | 
167 | ### Example 6.10 (Control variates and multiple regression)
168 |     
169 |     # Example 6.9 continued with a second control variate
170 |     # and multiple regression to estimate vector c*
171 |     
172 |     u <- runif(10000)
173 |     f1 <- exp(-.5) / (1+u^2)
174 |     f2 <- exp(-u) / (1-exp(-1))
175 |     g <- exp(-u) / (1+u^2)
176 |     
177 |     L2 <- lm(g ~ f1 + f2)
178 |     
179 |     L2$coeff
180 |     c.star <-  - L2$coeff[2:3]
181 |     c.star
182 |     mu1 <- exp(-.5)*pi/4
183 |     mu2 <- 1
184 |     mu <- c(mu1, mu2)
185 |     
186 |     # theta.hat is the predicted response at mu
187 |     # alternately can use predict.lm method
188 |     
189 |     theta.hat <- sum(L2$coeff * c(1, mu))  #pred. value at mu
190 |     theta.hat
191 |     
192 |     ## alternately
193 |     df <- data.frame(f1=mu1, f2=mu2)
194 |     theta.hat <-predict(L2, df)
195 |     
196 |     # MSE / n is the est. variance of the control estimator
197 |     MSE <- summary(L2)$sigma^2 
198 |     MSE
199 |     sqrt(MSE / 10000)
200 |     
201 |     
202 |     # compare with the previous estimates using
203 |     # naive MC and control variate f1(u)
204 |     # var1=.060231423 var2=.003124814
205 |     var0 <- 0.060231423  #naive MC
206 |     var1 <- 0.003117644   #controlled estimator with f1
207 |     var2 <- MSE  #new estimator
208 |     
209 |     # percent reduction in variance
210 |     # it is a weighted average of R^2 values
211 |     # so easier to compute directly
212 |     
213 |     100 * (var0 - var1) / var0
214 |     100 * (var1 - var2) / var1
215 |     100 * (var0 - var2) / var0
216 |     
217 | ### Example 6.11 (Choice of the importance function)
218 |     #code for plot is at the end of the file
219 | 
220 |     m <- 10000
221 |     theta.hat <- se <- numeric(5)
222 |     g <- function(x) {
223 |         exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
224 |         }
225 | 
226 |     x <- runif(m)     #using f0
227 |     fg <- g(x)
228 |     theta.hat[1] <- mean(fg)
229 |     se[1] <- sd(fg)
230 | 
231 |     x <- rexp(m, 1)   #using f1
232 |     fg <- g(x) / exp(-x)
233 |     theta.hat[2] <- mean(fg)
234 |     se[2] <- sd(fg)
235 | 
236 |     x <- rcauchy(m)   #using f2
237 |     i <- c(which(x > 1), which(x < 0))
238 |     x[i] <- 2  #to catch overflow errors in g(x)
239 |     fg <- g(x) / dcauchy(x)
240 |     theta.hat[3] <- mean(fg)
241 |     se[3] <- sd(fg)
242 | 
243 |     u <- runif(m)     #f3, inverse transform method
244 |     x <- - log(1 - u * (1 - exp(-1)))
245 |     fg <- g(x) / (exp(-x) / (1 - exp(-1)))
246 |     theta.hat[4] <- mean(fg)
247 |     se[4] <- sd(fg)
248 | 
249 |     u <- runif(m)    #f4, inverse transform method
250 |     x <- tan(pi * u / 4)
251 |     fg <- g(x) / (4 / ((1 + x^2) * pi))
252 |     theta.hat[5] <- mean(fg)
253 |     se[5] <- sd(fg)
254 | 
255 |     rbind(theta.hat, se / sqrt(m))
256 | 
257 | 
258 | ### Example 6.12 (Example 6.11, cont.)
259 | 
260 |     M <- 20   #number of replicates
261 |     T2 <- numeric(4)
262 |     estimates <- matrix(0, 10, 2)
263 | 
264 |     g <- function(x) {
265 |         exp(-x - log(1+x^2)) * (x > 0) * (x < 1) }
266 | 
267 |     for (i in 1:10) {
268 |         estimates[i, 1] <- mean(g(runif(M)))
269 |         T2[1] <- mean(g(runif(M/4, 0, .25)))
270 |         T2[2] <- mean(g(runif(M/4, .25, .5)))
271 |         T2[3] <- mean(g(runif(M/4, .5, .75)))
272 |         T2[4] <- mean(g(runif(M/4, .75, 1)))
273 |         estimates[i, 2] <- mean(T2)
274 |     }
275 | 
276 |     estimates
277 |     apply(estimates, 2, mean)
278 |     apply(estimates, 2, var)
279 | 
280 | 
281 | ### Example 6.13 (Examples 6.11-6.12, cont.)
282 | 
283 |     M <- 10000  #number of replicates
284 |     k <- 10     #number of strata
285 |     r <- M / k  #replicates per stratum
286 |     N <- 50     #number of times to repeat the estimation
287 |     T2 <- numeric(k)
288 |     estimates <- matrix(0, N, 2)
289 | 
290 |     g <- function(x) {
291 |         exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
292 |         }
293 | 
294 |     for (i in 1:N) {
295 |         estimates[i, 1] <- mean(g(runif(M)))
296 |         for (j in 1:k)
297 |             T2[j] <- mean(g(runif(M/k, (j-1)/k, j/k)))
298 |         estimates[i, 2] <- mean(T2)
299 |     }
300 | 
301 |     apply(estimates, 2, mean)
302 |     apply(estimates, 2, var)
303 | 
304 | 
305 | 
306 | ### Plot importance functions in Figures 6.1(a) and 6.1.(b)
307 | 
308 |     #par(ask = TRUE) #uncomment to pause between graphs
309 | 
310 |     x <- seq(0, 1, .01)
311 |     w <- 2
312 |     f1 <- exp(-x)
313 |     f2 <- (1 / pi) / (1 + x^2)
314 |     f3 <- exp(-x) / (1 - exp(-1))
315 |     f4 <- 4 / ((1 + x^2) * pi)
316 |     g <- exp(-x) / (1 + x^2)
317 | 
318 |     #for color change lty to col
319 | 
320 |     #figure (a)
321 |     plot(x, g, type = "l", main = "", ylab = "",
322 |          ylim = c(0,2), lwd = w)
323 |     lines(x, g/g, lty = 2, lwd = w)
324 |     lines(x, f1, lty = 3, lwd = w)
325 |     lines(x, f2, lty = 4, lwd = w)
326 |     lines(x, f3, lty = 5, lwd = w)
327 |     lines(x, f4, lty = 6, lwd = w)
328 |     legend("topright", legend = c("g", 0:4),
329 |            lty = 1:6, lwd = w, inset = 0.02)
330 | 
331 |     #figure (b)
332 |     plot(x, g, type = "l", main = "", ylab = "",
333 |         ylim = c(0,3.2), lwd = w, lty = 2)
334 |     lines(x, g/f1, lty = 3, lwd = w)
335 |     lines(x, g/f2, lty = 4, lwd = w)
336 |     lines(x, g/f3, lty = 5, lwd = w)
337 |     lines(x, g/f4, lty = 6, lwd = w)
338 |     legend("topright", legend = c(0:4),
339 |            lty = 2:6, lwd = w, inset = 0.02)
340 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chMonteCarlo.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 7                      ###
  9 | ###       Monte Carlo Methods in Inference          ###
 10 | #######################################################
 11 | 
 12 | # packages to install: energy, ggplot2
 13 | 
 14 | ### Example 7.1 (Basic Monte Carlo estimation)
 15 | 
 16 |     m <- 1000
 17 |     g <- numeric(m)
 18 |     for (i in 1:m) {
 19 |         x <- rnorm(2)
 20 |         g[i] <- abs(x[1] - x[2])
 21 |     }
 22 |     est <- mean(g)
 23 |     est
 24 | 
 25 | 
 26 | ### Example 7.2 (Estimating the MSE of a trimmed mean)
 27 | 
 28 |     n <- 20
 29 |     m <- 1000
 30 |     tmean <- numeric(m)
 31 |     for (i in 1:m) {
 32 |         x <- sort(rnorm(n))
 33 |         tmean[i] <- sum(x[2:(n-1)]) / (n-2)
 34 |         }
 35 |     mse <- mean(tmean^2)
 36 |     mse
 37 |     sqrt(sum((tmean - mean(tmean))^2)) / m    #se
 38 | 
 39 |     n <- 20
 40 |     m <- 1000
 41 |     tmean <- numeric(m)
 42 |     for (i in 1:m) {
 43 |         x <- sort(rnorm(n))
 44 |         tmean[i] <- median(x)
 45 |         }
 46 |     mse <- mean(tmean^2)
 47 |     mse
 48 |     sqrt(sum((tmean - mean(tmean))^2)) / m    #se
 49 | 
 50 | 
 51 | ### Example 7.3 (MSE of a trimmed mean, cont.)
 52 | 
 53 |      set.seed(522)
 54 |      n <- 20
 55 |      K <- n/2 - 1
 56 |      m <- 1000
 57 |      mse <- matrix(0, n/2, 6)
 58 | 
 59 |      trimmed.mse <- function(n, m, k, p) {
 60 |          #MC est of mse for k-level trimmed mean of
 61 |          #contaminated normal pN(0,1) + (1-p)N(0,100)
 62 |          tmean <- numeric(m)
 63 |          for (i in 1:m) {
 64 |              sigma <- sample(c(1, 10), size = n,
 65 |                  replace = TRUE, prob = c(p, 1-p))
 66 |              x <- sort(rnorm(n, 0, sigma))
 67 |              tmean[i] <- sum(x[(k+1):(n-k)]) / (n-2*k)
 68 |              }
 69 |          mse.est <- mean(tmean^2)
 70 |          se.mse <- sqrt(mean((tmean-mean(tmean))^2)) / sqrt(m)
 71 |          return(c(mse.est, se.mse))
 72 |      }
 73 | 
 74 |     for (k in 0:K) {
 75 |         mse[k+1, 1:2] <- trimmed.mse(n=n, m=m, k=k, p=1.0)
 76 |         mse[k+1, 3:4] <- trimmed.mse(n=n, m=m, k=k, p=.95)
 77 |         mse[k+1, 5:6] <- trimmed.mse(n=n, m=m, k=k, p=.9)
 78 |     }
 79 | 
 80 | ### Example 7.4 (Confidence interval for variance)
 81 | 
 82 |     n <- 20
 83 |     alpha <- .05
 84 |     x <- rnorm(n, mean=0, sd=2)
 85 |     UCL <- (n-1) * var(x) / qchisq(alpha, df=n-1)
 86 | 
 87 | 
 88 | ### Example 7.5 (MC estimate of confidence level)
 89 | 
 90 |     n <- 20
 91 |     alpha <- .05
 92 |     UCL <- replicate(1000, expr = {
 93 |         x <- rnorm(n, mean = 0, sd = 2)
 94 |         (n-1) * var(x) / qchisq(alpha, df = n-1)
 95 |         } )
 96 |     #count the number of intervals that contain sigma^2=4
 97 |     sum(UCL > 4)
 98 |     #or compute the mean to get the confidence level
 99 |     mean(UCL > 4)
100 | 
101 | 
102 | ### Example 7.6 (Empirical confidence level)
103 | 
104 |     n <- 20
105 |     alpha <- .05
106 |     UCL <- replicate(1000, expr = {
107 |         x <- rchisq(n, df = 2)
108 |         (n-1) * var(x) / qchisq(alpha, df = n-1)
109 |         } )
110 |     sum(UCL > 4)
111 |     mean(UCL > 4)
112 | 
113 | 
114 | ### Example 7.7 (Empirical Type I error rate)
115 | 
116 |     n <- 20
117 |     alpha <- .05
118 |     mu0 <- 500
119 |     sigma <- 100
120 | 
121 |     m <- 10000          #number of replicates
122 |     p <- numeric(m)     #storage for p-values
123 |     for (j in 1:m) {
124 |         x <- rnorm(n, mu0, sigma)
125 |         ttest <- t.test(x, alternative = "greater", mu = mu0)
126 |         p[j] <- ttest$p.value
127 |         }
128 | 
129 |     p.hat <- mean(p < alpha)
130 |     se.hat <- sqrt(p.hat * (1 - p.hat) / m)
131 |     print(c(p.hat, se.hat))
132 | 
133 | 
134 | ### Example 7.8 (Skewness test of normality)
135 | 
136 |     n <- c(10, 20, 30, 50, 100, 500) #sample sizes
137 |     cv <- qnorm(.975, 0, sqrt(6/n))  #crit. values for each n
138 | 
139 |     sk <- function(x) {
140 |         #computes the sample skewness coeff.
141 |         xbar <- mean(x)
142 |         m3 <- mean((x - xbar)^3)
143 |         m2 <- mean((x - xbar)^2)
144 |         return( m3 / m2^1.5 )
145 |     }
146 | 
147 |     #n is a vector of sample sizes
148 |     #we are doing length(n) different simulations
149 | 
150 |     p.reject <- numeric(length(n)) #to store sim. results
151 |     m <- 10000                     #num. repl. each sim.
152 | 
153 |     for (i in 1:length(n)) {
154 |         sktests <- numeric(m)       #test decisions
155 |         for (j in 1:m) {
156 |             x <- rnorm(n[i])
157 |             #test decision is 1 (reject) or 0
158 |             sktests[j] <- as.integer(abs(sk(x)) >= cv[i] )
159 |             }
160 |         p.reject[i] <- mean(sktests) #proportion rejected
161 |     }
162 | 
163 |     p.reject
164 | 
165 | 
166 | ### Example 7.9 (Empirical power)
167 | 
168 |     #set.seed(521)
169 |     n <- 20
170 |     m <- 1000
171 |     mu0 <- 500
172 |     sigma <- 100
173 |     mu <- c(seq(450, 650, 10))  #alternatives
174 |     M <- length(mu)
175 |     power <- numeric(M)
176 |     for (i in 1:M) {
177 |         mu1 <- mu[i]
178 |         pvalues <- replicate(m, expr = {
179 |             #simulate under alternative mu1
180 |             x <- rnorm(n, mean = mu1, sd = sigma)
181 |             ttest <- t.test(x,
182 |                      alternative = "greater", mu = mu0)
183 |             ttest$p.value  } )
184 |         power[i] <- mean(pvalues <= .05)
185 |     }
186 |     
187 |     se <- sqrt(power * (1-power) / m)
188 |     df <- data.frame(mean=mu, power=power, upper=power+2*se, lower=power-2*se)
189 |     
190 |     library(ggplot2)
191 |     ggplot(df, aes(x=mean, y=power)) +
192 |       geom_line() +
193 |       geom_vline(xintercept=500, lty=2) +
194 |       geom_hline(yintercept=c(0,.05), lty=1:2) +
195 |       geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.2, lwd=1.5)
196 |     
197 | 
198 | ### Example 7.10 (Power of the skewness test of normality)
199 | 
200 |     #set.seed(111)
201 |     alpha <- .1
202 |     n <- 30
203 |     m <- 2500
204 |     epsilon <- c(seq(0, .15, .01), seq(.15, 1, .05))
205 |     N <- length(epsilon)
206 |     pwr <- numeric(N)
207 |     #critical value for the skewness test
208 |     cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
209 | 
210 |     for (j in 1:N) {           #for each epsilon
211 |         e <- epsilon[j]
212 |         sktests <- numeric(m)
213 |         for (i in 1:m) {       #for each replicate
214 |             sigma <- sample(c(1, 10), replace = TRUE,
215 |                 size = n, prob = c(1-e, e))
216 |             x <- rnorm(n, 0, sigma)
217 |             sktests[i] <- as.integer(abs(sk(x)) >= cv)
218 |             }
219 |         pwr[j] <- mean(sktests)
220 |         }
221 | 
222 |     se <- sqrt(pwr * (1-pwr) / m)
223 |     df <- data.frame(epsilon=epsilon, power=pwr, upper=pwr+2*se, lower=pwr-2*se)
224 |     
225 |     #plot power vs epsilon
226 |     library(ggplot2)
227 |     ggplot(df, aes(x=epsilon, y=power)) +
228 |       geom_line() + labs(x=bquote(epsilon)) +
229 |       geom_hline(yintercept=.1, lty=2) +
230 |       geom_pointrange(aes(ymin=lower, ymax=upper))
231 |     
232 | 
233 | ### Example 7.11 (Power comparison of tests of normality)
234 | 
235 |     #only one loop, for epsilon=0.1, was shown in the text
236 |     #the simulation below takes several minutes to run
237 | 
238 |     # initialize input and output
239 |     library(energy)
240 |     alpha <- .1
241 |     n <- 30
242 |     m <- 500        #try small m for a trial run
243 |     test1 <- test2 <- test3 <- numeric(m)
244 | 
245 |     #critical value for the skewness test
246 |     cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
247 |     sim <- matrix(0, 11, 4)
248 | 
249 |     # estimate power
250 |     for (i in 0:10) {
251 |         epsilon <- i * .1
252 |         for (j in 1:m) {
253 |             e <- epsilon
254 |             sigma <- sample(c(1, 10), replace = TRUE,
255 |                 size = n, prob = c(1-e, e))
256 |             x <- rnorm(n, 0, sigma)
257 |             test1[j] <- as.integer(abs(sk(x)) >= cv)
258 |             test2[j] <- as.integer(
259 |                         shapiro.test(x)$p.value <= alpha)
260 |             test3[j] <- as.integer(
261 |                         mvnorm.etest(x, R=200)$p.value <= alpha)
262 |             }
263 |         print(c(epsilon, mean(test1), mean(test2), mean(test3)))
264 |         sim[i+1, ] <- c(epsilon, mean(test1), mean(test2), mean(test3))
265 |     }
266 |     detach(package:energy)
267 | 
268 |     # plot the empirical estimates of power
269 |     plot(sim[,1], sim[,2], ylim = c(0, 1), type = "l",
270 |         xlab = bquote(epsilon), ylab = "power")
271 |     lines(sim[,1], sim[,3], lty = 2)
272 |     lines(sim[,1], sim[,4], lty = 4)
273 |     abline(h = alpha, lty = 3)
274 |     legend("topright", 1, c("skewness", "S-W", "energy"),
275 |         lty = c(1,2,4), inset = .02)
276 | 
277 | 
278 | ### Example 7.12 (Count Five test statistic)
279 | 
280 |     x1 <- rnorm(20, 0, sd = 1)
281 |     x2 <- rnorm(20, 0, sd = 1.5)
282 |     y <- c(x1, x2)
283 | 
284 |     group <- rep(1:2, each = length(x1))
285 |     boxplot(y ~ group, boxwex = .3, xlim = c(.5, 2.5), main = "")
286 |     points(group, y)
287 | 
288 |     # now identify the extreme points
289 |     range(x1)
290 |     range(x2)
291 | 
292 |     i <- which(x1 < min(x2))
293 |     j <- which(x2 > max(x1))
294 | 
295 |     x1[i]
296 |     x2[j]
297 | 
298 |     out1 <- sum(x1 > max(x2)) + sum(x1 < min(x2))
299 |     out2 <- sum(x2 > max(x1)) + sum(x2 < min(x1))
300 |     max(c(out1, out2))
301 | 
302 | 
303 | ### Example 7.13 (Count Five test statistic, cont.)
304 | 
305 |     maxout <- function(x, y) {
306 |         X <- x - mean(x)
307 |         Y <- y - mean(y)
308 |         outx <- sum(X > max(Y)) + sum(X < min(Y))
309 |         outy <- sum(Y > max(X)) + sum(Y < min(X))
310 |         return(max(c(outx, outy)))
311 |     }
312 | 
313 |     n1 <- n2 <- 20
314 |     mu1 <- mu2 <- 0
315 |     sigma1 <- sigma2 <- 1
316 |     m <- 1000
317 | 
318 |     # generate samples under H0
319 |     stat <- replicate(m, expr={
320 |         x <- rnorm(n1, mu1, sigma1)
321 |         y <- rnorm(n2, mu2, sigma2)
322 |         maxout(x, y)
323 |         })
324 |     print(cumsum(table(stat)) / m)
325 |     print(quantile(stat, c(.8, .9, .95)))
326 | 
327 | 
328 | ### Example 7.14 (Count Five test)
329 | 
330 |     count5test <- function(x, y) {
331 |         X <- x - mean(x)
332 |         Y <- y - mean(y)
333 |         outx <- sum(X > max(Y)) + sum(X < min(Y))
334 |         outy <- sum(Y > max(X)) + sum(Y < min(X))
335 |         # return 1 (reject) or 0 (do not reject H0)
336 |         return(as.integer(max(c(outx, outy)) > 5))
337 |     }
338 | 
339 |     n1 <- n2 <- 20
340 |     mu1 <- mu2 <- 0
341 |     sigma1 <-  sigma2 <- 1
342 |     m <- 10000
343 |     tests <- replicate(m, expr = {
344 |         x <- rnorm(n1, mu1, sigma1)
345 |         y <- rnorm(n2, mu2, sigma2)
346 |         x <- x - mean(x)  #centered by sample mean
347 |         y <- y - mean(y)
348 |         count5test(x, y)
349 |         } )
350 | 
351 |     alphahat <- mean(tests)
352 |     print(alphahat)
353 | 
354 | 
355 | ### Example 7.15 (Count Five test, cont.)
356 | 
357 |     n1 <- 20
358 |     n2 <- 30
359 |     mu1 <- mu2 <- 0
360 |     sigma1 <- sigma2 <- 1
361 |     m <- 10000
362 | 
363 |     alphahat <- mean(replicate(m, expr={
364 |         x <- rnorm(n1, mu1, sigma1)
365 |         y <- rnorm(n2, mu2, sigma2)
366 |         x <- x - mean(x)  #centered by sample mean
367 |         y <- y - mean(y)
368 |         count5test(x, y)
369 |         }))
370 | 
371 |     print(alphahat)
372 | 
373 | 
374 | ### Example 7.16 (Count Five, cont.)
375 | 
376 |     # generate samples under H1 to estimate power
377 |     sigma1 <- 1
378 |     sigma2 <- 1.5
379 | 
380 |     power <- mean(replicate(m, expr={
381 |         x <- rnorm(20, 0, sigma1)
382 |         y <- rnorm(20, 0, sigma2)
383 |         count5test(x, y)
384 |         }))
385 | 
386 |     print(power)
387 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chNumerical.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 13                     ###
  9 | ###       Introduction to Numerical Methods in R    ###
 10 | #######################################################
 11 | 
 12 | # packages to install: gsl
 13 | 
 14 | ### Example 13.1 (Identical and nearly equal)
 15 | 
 16 |     isTRUE(all.equal(.2, .3 - .1))
 17 |     all.equal(.2, .3)          #not a logical value
 18 |     isTRUE(all.equal(.2, .3))  #always a logical value
 19 | 
 20 |     x <- 1:4
 21 |     y <- 2
 22 |     y == 2
 23 |     x == y  #not necessarily a single logical value
 24 |     identical(x, y)  #always a single logical value
 25 |     identical(y, 2)
 26 | 
 27 | 
 28 | ### Example 13.2 (Ratio of two large numbers)
 29 | 
 30 |     n <- 400
 31 |     (gamma((n-1)/2) / (sqrt(pi) * gamma((n-2)/2)))
 32 |     exp(lgamma((n-1)/2) - lgamma((n-2)/2)) / sqrt(pi)
 33 | 
 34 | 
 35 | ### Example 13.3 (Taylor expansion)
 36 | 
 37 |     system.time({
 38 |         for (i in 1:1000) {
 39 |             a <- rep(0, 24)
 40 |             a0 <- pi / 6
 41 |             a2 <- a0 * a0
 42 |             a[1] <- -a0^3 / 6
 43 |             for (i in 2:24)
 44 |                 a[i] <- - a2 * a[i-1] / ((2*i+1)*(2*i))
 45 |             a0 + sum(a)}
 46 |         })
 47 | 
 48 |     system.time({
 49 |         for (i in 1:1000) {
 50 |             K <- 2 * (0:24) + 1
 51 |             i <- rep(c(1, -1), length=25)
 52 |             sum(i * (pi/6)^K / factorial(K))}
 53 |         })
 54 | 
 55 | 
 56 | ### Example 13.4 (Derivative of zeta function)
 57 | 
 58 |     zeta.deriv <- function(a) {
 59 |         z <- a - 1
 60 |         # Stieltjes constants gamma_k for k=1:5
 61 |         g <- c(
 62 |             -.7281584548367672e-1,
 63 |             -.9690363192872318e-2,
 64 |              .2053834420303346e-2,
 65 |              .2325370065467300e-2,
 66 |              .7933238173010627e-3)
 67 |         i <- c(-1, 1, -1, 1, -1)
 68 |         n <- 0:4
 69 |         -1/z^2 + sum(i * g * z^n / factorial(n))
 70 |     }
 71 | 
 72 | 
 73 | ### Example 13.5 (Derivative of zeta function, cont.)
 74 | 
 75 |     library(gsl)   #for zeta function
 76 |     z <- c(1.001, 1.01, 1.5, 2, 3, 5)
 77 |     h <- .Machine$double.eps^0.5
 78 |     dz <- dq <- rep(0, length(z))
 79 |     for (i in 1:length(z)) {
 80 |         v <- z[i] + h
 81 |         h <- v - z[i]
 82 |         a0 <- z[i] - h
 83 |         if (a0 < 1) a0 <- (1 + z[i])/2
 84 |         a1 <- z[i] + h
 85 |         dq[i] <- (zeta(a1) - zeta(a0)) / (a1 - a0)
 86 |         dz[i] <- zeta.deriv(z[i])
 87 |     }
 88 | 
 89 |     h
 90 | 
 91 |     cbind(z, dz, dq)
 92 | 
 93 |     
 94 | ### Example 13.6 (Solving f(x)=0)
 95 | 
 96 |     f <- function(y, a, n) {
 97 |         a^2 + y^2 + 2*a*y/(n-1) - (n-2)
 98 |     }
 99 | 
100 |     a <- 0.5
101 |     n <- 20
102 |     b0 <- 0
103 |     b1 <- 5*n
104 | 
105 |     #solve using bisection
106 |     it <- 0
107 |     eps <- .Machine$double.eps^0.25
108 |     r <- seq(b0, b1, length=3)
109 |     y <- c(f(r[1], a, n), f(r[2], a, n), f(r[3], a, n))
110 |     if (y[1] * y[3] > 0)
111 |         stop("f does not have opposite sign at endpoints")
112 | 
113 |     while(it < 1000 && abs(y[2]) > eps) {
114 |         it <- it + 1
115 |         if (y[1]*y[2] < 0) {
116 |             r[3] <- r[2]
117 |             y[3] <- y[2]
118 |         } else {
119 |             r[1] <- r[2]
120 |             y[1] <- y[2]
121 |         }
122 |         r[2] <- (r[1] + r[3]) / 2
123 |         y[2] <- f(r[2], a=a, n=n)
124 |         print(c(r[1], y[1], y[3]-y[2]))
125 |     }
126 | 
127 | 
128 | ### Example 13.7 (Solving f(x)=0 with Brent's method: uniroot)
129 | 
130 |     a <- 0.5
131 |     n <- 20
132 |     out <- uniroot(function(y) {
133 |                a^2 + y^2 + 2*a*y/(n-1) - (n-2) },
134 |                lower = 0, upper = n*5)
135 |     unlist(out)
136 |     uniroot(function(y) {a^2 + y^2 + 2*a*y/(n-1) - (n-2)},
137 |             interval = c(-n*5, 0))$root
138 | 
139 | 
140 | ### Example 13.8 (Numerical integration with integrate)
141 | 
142 |     f <- function(y, N, r, rho) {
143 |         (cosh(y) - rho * r)^(1 - N)
144 |     }
145 |     integrate(f, lower=0, upper=Inf,
146 |               rel.tol=.Machine$double.eps^0.25,
147 |               N=10, r=0.5, rho=0.2)
148 | 
149 |     ro <- seq(-.99, .99, .01)
150 |     v <- rep(0, length(ro))
151 |     for (i in 1:length(ro)) {
152 |         v[i] <- integrate(f, lower=0, upper=Inf,
153 |                   rel.tol=.Machine$double.eps^0.25,
154 |                   N=10, r=0.5, rho=ro[i])$value
155 |         }
156 |     plot(ro, v, type="l", xlab=expression(rho),
157 |          ylab="Integral Value (n=10, r=0.5)")
158 | 
159 | 
160 | ### Example 13.9 (Density of sample correlation coefficient)
161 | 
162 |     .dcorr <- function(r, N, rho=0) {
163 |         # compute the density function of sample correlation
164 |         if (abs(r) > 1 || abs(rho) > 1) return (0)
165 |         if (N < 4) return (NA)
166 | 
167 |         if (isTRUE(all.equal(rho, 0.0))) {
168 |             a <- exp(lgamma((N - 1)/2) - lgamma((N - 2)/2)) /
169 |                      sqrt(pi)
170 |             return (a * (1 - r^2)^((N - 4)/2))
171 |         }
172 | 
173 |         # if rho not 0, need to integrate
174 |         f <- function(w, R, N, rho)
175 |             (cosh(w) - rho * R)^(1 - N)
176 | 
177 |         #need to insert some error checking here
178 |         i <- integrate(f, lower=0, upper=Inf,
179 |                 R=r, N=N, rho=rho)$value
180 |         c1 <- (N - 2) * (1 - rho^2)^((N - 1)/2)
181 |         c2 <- (1 - r^2)^((N - 4) / 2) / pi
182 |         return(c1 * c2 * i)
183 |     }
184 | 
185 |     r <- as.matrix(seq(-1, 1, .01))
186 |     d1 <- apply(r, 1, .dcorr, N=10, rho=.0)
187 |     d2 <- apply(r, 1, .dcorr, N=10, rho=.5)
188 |     d3 <- apply(r, 1, .dcorr, N=10, rho=-.5)
189 |     plot(r, d2, type="l", lty=2, lwd=2, ylab="density")
190 |     lines(r, d1, lwd=2)
191 |     lines(r, d3, lty=4, lwd=2)
192 |     legend("top", inset=.02,
193 |            c("rho = 0", "rho = 0.5", "rho = -0.5"), lty=c(1,2,4), lwd=2)
194 | 
195 | 
196 | ### Example 13.10 (MLE using mle)
197 | 
198 |     #the observed sample
199 |     y <- c(0.04304550, 0.50263474)
200 | 
201 |     mlogL <- function(theta=1) {
202 |         #minus log-likelihood of exp. density, rate theta
203 |         return( - (length(y) * log(theta) - theta * sum(y)))
204 |     }
205 | 
206 |     library(stats4)
207 |     fit <- mle(mlogL)
208 |     summary(fit)
209 | 
210 |     # Alternately, the initial value for the optimizer could
211 |     # be supplied in the call to mle; two examples are
212 | 
213 |     mle(mlogL, start=list(theta=1))
214 |     mle(mlogL, start=list(theta=mean(y)))
215 | 
216 | ### Application: Evaluating an Expected Value
217 | 
218 |     y <- VGAM::rpareto(1000, scale=1, shape=3)
219 |     hist(y, prob=TRUE, breaks="scott", main="", ylim=c(0,3))
220 |     curve(VGAM::dpareto(x, scale=1, shape=3), add=TRUE)
221 | 
222 |     a <- 1
223 |     s <- 1
224 |     b <- 0.5
225 |     
226 | ### Example 13.11 (Numerical integration)
227 | 
228 |     f1 <- function(x, y, s, a, b) {
229 |       # the integrand function: |y-x|^b f(x)
230 |       (abs(y-x))^b * a * s^a / x^(a+1)
231 |     }
232 |     
233 |     integrate(f1, lower=s, upper=Inf, y=2, s=1, a=1, b=b)
234 |     
235 | ### Example 13.12 (Direct evaluation)
236 | 
237 |     g2 <- function(y, s, b)   {
238 |       # Compute E|y-X|^b for Pareto(I) with a=1
239 |       y0 <- (y - s)/y
240 |       c0 <-  gsl::hyperg_2F1(b+1, 1, b+2, y0)
241 |       ((y - s)^b) - s*b*(y^(b-1)) *
242 |       ((y0^b)/b + c0*(y0^(b+1))/(b+1)) +
243 |       s*y^(b-1) * beta(b+1,1-b)
244 |       }
245 | 
246 |     g2(2, s, b)
247 | 
248 | ### Example 13.13 (Plot of expected distance function)
249 | 
250 |     p <- c(ppoints(50)/100, ppoints(50))
251 |     x <- VGAM::qpareto(p, scale=s, shape=1)
252 |     ex <- g2(x, s=s, b=b)
253 |     plot(log(x), log(ex), cex=.25, type="l")
254 |     for (i in 1:length(x)) {
255 |       y <- x[i]
256 |       zi <- integrate(f1, lower=s, upper=Inf,
257 |       subdivisions=200, rel.tol=.Machine$double.eps^.5,
258 |       stop.on.error=FALSE, y=y, s=s, a=1, b=b)
259 |       if (zi$message == "OK")
260 |       points(log(y), log(zi$value), col=2, cex=.25)
261 |       else print(paste("y=", y, zi$Message))
262 |     }
263 | 
264 |     integrate(f1, lower=s, upper=Inf, y=s, s=s, a=1, b=b)
265 |     g2(s, s, b)
266 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chOptim.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 14                     ###
  9 | ###       Optimization                              ###
 10 | #######################################################
 11 | 
 12 | 
 13 | ### Example 14.1 (One-dimensional optimization with optimize)
 14 | 
 15 |     f <- function(x)
 16 |         log(x + log(x))/log(1+x)
 17 | 
 18 |     curve(f(x), from = 2, to = 15, ylab = "f(x)")
 19 |     
 20 |     optimize(f, lower = 4, upper = 8, maximum = TRUE)
 21 | 
 22 | 
 23 | ### Example 14.2 (MLE: Gamma distribution)
 24 | 
 25 |     m <- 20000
 26 |     est <- matrix(0, m, 2)
 27 |     n <- 200
 28 |     r <- 5
 29 |     lambda <- 2
 30 | 
 31 |     obj <- function(lambda, xbar, logx.bar) {
 32 |         digamma(lambda * xbar) - logx.bar - log(lambda)
 33 |         }
 34 | 
 35 |     for (i in 1:m) {
 36 |         x <- rgamma(n, shape=r, rate=lambda)
 37 |         xbar <- mean(x)
 38 |         u <- uniroot(obj, lower = .001, upper = 10e5,
 39 |                 xbar = xbar, logx.bar = mean(log(x)))
 40 |         lambda.hat <- u$root
 41 |         r.hat <- xbar * lambda.hat
 42 |         est[i, ] <- c(r.hat, lambda.hat)
 43 |     }
 44 | 
 45 |     ML <- colMeans(est)
 46 | 
 47 |     hist(est[, 1], breaks="scott", freq=FALSE,
 48 |         xlab="r", main="")
 49 |     points(ML[1], 0, cex=1.5, pch=20)
 50 |     hist(est[, 2], breaks="scott", freq=FALSE,
 51 |          xlab=bquote(lambda), main="")
 52 |     points(ML[2], 0, cex=1.5, pch=20)
 53 | 
 54 | 
 55 | ### Example 14.3 (Two-dimensional optimization with optim)
 56 | 
 57 |     LL <- function(theta, sx, slogx, n) {
 58 |         r <- theta[1]
 59 |         lambda <- theta[2]
 60 |         loglik <- n * r * log(lambda) + (r - 1) * slogx -
 61 |             lambda * sx - n * log(gamma(r))
 62 |         - loglik
 63 |         }
 64 | 
 65 |     n <- 200
 66 |     r <- 5;    lambda <- 2
 67 |     x <- rgamma(n, shape=r, rate=lambda)
 68 | 
 69 |     optim(c(1,1), LL, sx=sum(x), slogx=sum(log(x)), n=n)
 70 | 
 71 |     mlests <- replicate(20000, expr = {
 72 |       x <- rgamma(200, shape = 5, rate = 2)
 73 |       optim(c(1,1), LL, sx=sum(x), slogx=sum(log(x)), n=n)$par
 74 |       })
 75 |     colMeans(t(mlests))
 76 | 
 77 | 
 78 | ### Example 14.4 (MLE for a quadratic form)
 79 | 
 80 |     LL <- function(lambda, y) {
 81 |         lambda3 <- 1 - sum(lambda)
 82 |         f1 <- dgamma(y, shape=1/2, rate=1/(2*lambda[1]))
 83 |         f2 <- dgamma(y, shape=1/2, rate=1/(2*lambda[2]))
 84 |         f3 <- dgamma(y, shape=1/2, rate=1/(2*lambda3))
 85 |         f <- f1/3 + f2/3 + f3/3   #density of mixture
 86 |         #returning -loglikelihood
 87 |         return( -sum(log(f)))
 88 |         }
 89 | 
 90 |     set.seed(543)
 91 |     m <- 2000
 92 |     lambda <- c(.6, .25, .15)  #rate is 1/(2lambda)
 93 |     lam <- sample(lambda, size = 2000, replace = TRUE)
 94 |     y <- rgamma(m, shape = .5, rate = 1/(2*lam))
 95 | 
 96 |     opt <- optim(c(.5,.3), LL, y=y)
 97 |     theta <- c(opt$par, 1 - sum(opt$par))
 98 | 
 99 |     as.data.frame(unlist(opt))
100 | 
101 |     theta
102 | 
103 | 
104 | ### Example 14.5 (EM algorithm for a mixture model)
105 | 
106 |     set.seed(543)
107 |     lambda <- c(.6, .25, .15)  #rate is 1/(2lambda)
108 |     lam <- sample(lambda, size = 2000, replace = TRUE)
109 |     y <- rgamma(m, shape = .5, rate = 1/(2*lam))
110 | 
111 |     N <- 10000                #max. number of iterations
112 |     L <- c(.5, .4, .1)        #initial est. for lambdas
113 |     tol <- .Machine$double.eps^0.5
114 |     L.old <- L + 1
115 | 
116 |     for (j in 1:N) {
117 |         f1 <- dgamma(y, shape=1/2, rate=1/(2*L[1]))
118 |         f2 <- dgamma(y, shape=1/2, rate=1/(2*L[2]))
119 |         f3 <- dgamma(y, shape=1/2, rate=1/(2*L[3]))
120 |         py <- f1 / (f1 + f2 + f3) #posterior prob y from 1
121 |         qy <- f2 / (f1 + f2 + f3) #posterior prob y from 2
122 |         ry <- f3 / (f1 + f2 + f3) #posterior prob y from 3
123 | 
124 |         mu1 <- sum(y * py) / sum(py) #update means
125 |         mu2 <- sum(y * qy) / sum(qy)
126 |         mu3 <- sum(y * ry) / sum(ry)
127 |         L <- c(mu1, mu2, mu3)  #update lambdas
128 |         L <- L / sum(L)
129 | 
130 |         if (sum(abs(L - L.old)/L.old) < tol) break
131 |         L.old <- L
132 |     }
133 | 
134 |     print(list(lambda = L/sum(L), iter = j, tol = tol))
135 | 
136 | 
137 | ### Example 14.6 (Simplex algorithm)
138 | 
139 |     library(boot)   #for simplex function
140 |     A1 <- rbind(c(-2, 1, 1), c(4, -1, 3))
141 |     b1 <- c(1, 3)
142 |     a <- c(2, 2, 3)
143 |     simplex(a = a, A1 = A1, b1 = b1, maxi = TRUE)
144 |     detach(package:boot)
145 | 
146 | 
147 | ### Example 14.7 (Solving the Morra game)
148 | 
149 |     solve.game <- function(A) {
150 |         #solve the two player zero-sum game by simplex method
151 |         #optimize for player 1, then player 2
152 |         #maximize v subject to ...
153 |         #let x strategies 1:m, and put v as extra variable
154 |         #A1, the <= constraints
155 |         #
156 |         min.A <- min(A)
157 |         A <- A - min.A   #so that v >= 0
158 |         max.A <- max(A)
159 |         A <- A / max(A)
160 |         m <- nrow(A)
161 |         n <- ncol(A)
162 |         it <- n^3
163 |         a <- c(rep(0, m), 1) #objective function
164 |         A1 <- -cbind(t(A), rep(-1, n)) #constraints <=
165 |         b1 <- rep(0, n)
166 |         A3 <- t(as.matrix(c(rep(1, m), 0))) #constraints sum(x)=1
167 |         b3 <- 1
168 |         sx <- simplex(a=a, A1=A1, b1=b1, A3=A3, b3=b3,
169 |                     maxi=TRUE, n.iter=it)
170 |         #the 'solution' is [x1,x2,...,xm | value of game]
171 |         #
172 |         #minimize v subject to ...
173 |         #let y strategies 1:n, with v as extra variable
174 |         a <- c(rep(0, n), 1) #objective function
175 |         A1 <- cbind(A, rep(-1, m)) #constraints <=
176 |         b1 <- rep(0, m)
177 |         A3 <- t(as.matrix(c(rep(1, n), 0))) #constraints sum(y)=1
178 |         b3 <- 1
179 |         sy <- simplex(a=a, A1=A1, b1=b1, A3=A3, b3=b3,
180 |                     maxi=FALSE, n.iter=it)
181 | 
182 |         soln <- list("A" = A * max.A + min.A,
183 |                     "x" = sx$soln[1:m],
184 |                     "y" = sy$soln[1:n],
185 |                     "v" = sx$soln[m+1] * max.A + min.A)
186 |         soln
187 |         }
188 | 
189 | 
190 |     #enter the payoff matrix
191 |     A <- matrix(c(  0,-2,-2,3,0,0,4,0,0,
192 |                     2,0,0,0,-3,-3,4,0,0,
193 |                     2,0,0,3,0,0,0,-4,-4,
194 |                     -3,0,-3,0,4,0,0,5,0,
195 |                     0,3,0,-4,0,-4,0,5,0,
196 |                     0,3,0,0,4,0,-5,0,-5,
197 |                     -4,-4,0,0,0,5,0,0,6,
198 |                     0,0,4,-5,-5,0,0,0,6,
199 |                     0,0,4,0,0,5,-6,-6,0), 9, 9)
200 | 
201 |     library(boot)  #needed for simplex function
202 | 
203 |     s <- solve.game(A)
204 |     round(cbind(s$x, s$y), 7)
205 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chPerm.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 10                     ###
  9 | ###       Permutation Tests                         ###
 10 | #######################################################
 11 | 
 12 | # packages to install: energy, yaImpute
 13 |     
 14 | ### Example 10.1 (Permutation distribution of a statistic)
 15 | 
 16 |     attach(chickwts)
 17 |     x <- sort(weight[feed == "soybean"])
 18 |     y <- sort(weight[feed == "linseed"])
 19 |     detach(chickwts)
 20 |     
 21 |     R <- 999              #number of replicates
 22 |     z <- c(x, y)          #pooled sample
 23 |     K <- 1:26
 24 |     reps <- numeric(R)   #storage for replicates
 25 |     t0 <- t.test(x, y)$statistic
 26 | 
 27 |     for (i in 1:R) {
 28 |         #generate indices k for the first sample
 29 |         k <- sample(K, size = 14, replace = FALSE)
 30 |         x1 <- z[k]
 31 |         y1 <- z[-k]      #complement of x1
 32 |         reps[i] <- t.test(x1, y1)$statistic
 33 |         }
 34 |     p <- mean(c(t0, reps) >= t0)
 35 |     p
 36 | 
 37 |     hist(reps, main = "", freq = FALSE, xlab = "T (p = 0.202)",
 38 |         breaks = "scott")
 39 |     points(t0, 0, cex = 1, pch = 16)      #observed T
 40 | 
 41 | 
 42 | ### Example 10.2 (Permutation distribution of the K-S statistic)    
 43 | 
 44 |     # continues Example 10.1
 45 |     R <- 999             #number of replicates
 46 |     z <- c(x, y)         #pooled sample
 47 |     K <- 1:26
 48 |     D <- numeric(R)      #storage for replicates
 49 |     options(warn = -1)
 50 |     D0 <- ks.test(x, y, exact = FALSE)$statistic
 51 |     for (i in 1:R) {
 52 |         #generate indices k for the first sample
 53 |         k <- sample(K, size = 14, replace = FALSE)
 54 |         x1 <- z[k]
 55 |         y1 <- z[-k]      #complement of x1
 56 |         D[i] <- ks.test(x1, y1, exact = FALSE)$statistic
 57 |         }
 58 |     p <- mean(c(D0, D) >= D0)
 59 |     options(warn = 0)
 60 |     p
 61 | 
 62 |     hist(D, main = "", freq = FALSE, xlab = "D (p = 0.46)",
 63 |         breaks = "scott")
 64 |     points(D0, 0, cex = 1, pch = 16)      #observed D
 65 | 
 66 | 
 67 | ### Example 10.3 (Example 10.2, cont.)
 68 | 
 69 |     attach(chickwts)
 70 |     x <- sort(weight[feed == "sunflower"])
 71 |     y <- sort(weight[feed == "linseed"])
 72 |     detach(chickwts)
 73 |     
 74 |     summary(cbind(x, y))
 75 |     options(warn = -1)
 76 |     D0 <- ks.test(x, y, exact = FALSE)$statistic
 77 |     for (i in 1:R) {
 78 |         #generate indices k for the first sample
 79 |         k <- sample(K, size = 14, replace = FALSE)
 80 |         x1 <- z[k]
 81 |         y1 <- z[-k]      #complement of x1
 82 |         D[i] <- ks.test(x1, y1, exact = FALSE)$statistic
 83 |         }
 84 |     p <- mean(c(D0, D) >= D0)
 85 |     options(warn = 0)
 86 |     p
 87 | 
 88 | 
 89 | ### Example 10.4 (Finding nearest neighbors)
 90 | ### using yaImpute::ann
 91 |     
 92 |     set.seed(439)  
 93 |     library(yaImpute)  #for ann function
 94 |     
 95 |     #generate a small multivariate data set
 96 |     x <- matrix(rnorm(12), 3, 4)
 97 |     y <- matrix(rnorm(12), 3, 4)
 98 |     z <- rbind(x, y)
 99 |     k <- nrow(z)  #number of nearest neighbors desired
100 |     
101 |     ## Do an exact kd-tree search
102 |     kd.exact <- ann(ref=z, target=z,
103 |                     tree.type="kd", k=k, verbose=FALSE)
104 |     kd.exact$knnIndexDist[,1:k]           #NN indices
105 |     round(sqrt(kd.exact$knnIndexDist[,-(1:k)]),2)  #Euclidean distances
106 |     
107 |     ## Do an approximate kd-tree search
108 |     kd.approx <- ann(ref=z, target=z,
109 |                      tree.type="kd", k=k, eps=100, verbose=FALSE)
110 |     kd.approx$knnIndexDist[,1:k]           #NN indices
111 |     detach(package:yaImpute)
112 |     
113 |     
114 |     ### Example 10.5 (Nearest neighbor statistic)
115 |     ### using yaImpute::ann
116 |     
117 |     library(yaImpute)
118 |     attach(chickwts)
119 |     x <- weight[feed == "sunflower"]
120 |     y <- weight[feed == "linseed"]
121 |     z <- as.matrix(c(x, y))
122 |     detach(chickwts)
123 |     
124 |     k <- 4  #want first 3 nearest neighbors
125 |     NN <- ann(ref=z, target=z, tree.type="kd", k=k, verbose=FALSE)
126 |     idx <- NN$knnIndexDist[,1:k]
127 |     nn.idx <- idx[,-1]   #first NN is in column 2
128 |     
129 |     block1 <- nn.idx[1:12, ]
130 |     block2 <- nn.idx[13:24, ]
131 |     i1 <- sum(block1 < 12.5)
132 |     i2 <- sum(block2 > 12.5)
133 |     
134 |     c(i1, i2)
135 |     
136 |     detach(package:yaImpute)
137 |     
138 |     
139 | ### Example 10.6 (Nearest neighbor test)
140 | ### using yaImpute::ann
141 |     
142 |     library(boot)
143 |     #continues the previous example
144 |     
145 |     ## function to return the matrix of indices NN_j of nearest neighbors
146 |     NN.idx <- function(x, tree.type="kd", k=NROW(x)) {
147 |       x <- as.matrix(x)
148 |       k <- min(c(k+1, NROW(x)))
149 |       NN <- yaImpute::ann(ref=x, target=x, 
150 |                           tree.type="kd", k=k, verbose=FALSE)
151 |       idx <- NN$knnIndexDist[,1:k]
152 |       nn.idx <- idx[,-1]   #first NN is in column 2
153 |       row.names(nn.idx) <- idx[,1]
154 |       nn.idx
155 |     }
156 |     
157 |     ## function to compute the NN statistic T(n,3)
158 |     Tn3 <- function(z, ix=1:NROW(z), sizes) {
159 |       z <- as.matrix(z)
160 |       n1 <- sizes[1]
161 |       n2 <- sizes[2]
162 |       n <- n1 + n2
163 |       z <- as.matrix(z[ix, ])
164 |       nn.idx <- NN.idx(z, k=3)
165 |       block1 <- nn.idx[1:n1, ]
166 |       block2 <- nn.idx[(n1+1):n, ]
167 |       i1 <- sum(block1 < n1 + .5)
168 |       i2 <- sum(block2 > n1 + .5)
169 |       return((i1 + i2) / (3 * n))
170 |     }
171 |     
172 |     attach(chickwts)
173 |     x <- weight[feed == "sunflower"]
174 |     y <- weight[feed == "linseed"]
175 |     z <- c(x, y)
176 |     detach(chickwts)
177 |     
178 |     N <- c(NROW(x), NROW(y))
179 |     
180 |     boot.obj <- boot(data = z, statistic = Tn3,
181 |                      sim = "permutation", R = 999, sizes = N)
182 |     boot.obj
183 |     
184 |     tb <- c(boot.obj$t, boot.obj$t0)
185 |     mean(tb >= boot.obj$t0)
186 |     
187 |     hist(tb, freq=FALSE, main="",
188 |          xlab="replicates of T(n,3) statistic")
189 |     points(boot.obj$t0, 0, cex=1, pch=16)
190 |     
191 |     
192 | 
193 | 
194 | ### Example 10.7 (Two-sample energy statistic)
195 | 
196 |     edist.2 <- function(x, ix, sizes) {
197 |         # computes the e-statistic between 2 samples
198 |         # x:          Euclidean distances of pooled sample
199 |         # sizes:      vector of sample sizes
200 |         # ix:         a permutation of row indices of x
201 | 
202 |         dst <- x
203 |         n1 <- sizes[1]
204 |         n2 <- sizes[2]
205 |         ii <- ix[1:n1]
206 |         jj <- ix[(n1+1):(n1+n2)]
207 |         w <- n1 * n2 / (n1 + n2)
208 | 
209 |         # permutation applied to rows & cols of dist. matrix
210 |         m11 <- sum(dst[ii, ii]) / (n1 * n1)
211 |         m22 <- sum(dst[jj, jj]) / (n2 * n2)
212 |         m12 <- sum(dst[ii, jj]) / (n1 * n2)
213 |         e <- w * ((m12 + m12) - (m11 + m22))
214 |         return (e)
215 |     }
216 | 
217 |     d <- 3
218 |     a <- 2 / sqrt(d)
219 |     x <- matrix(rnorm(20 * d), nrow = 20, ncol = d)
220 |     y <- matrix(rnorm(10 * d, a, 1), nrow = 10, ncol = d)
221 |     z <- rbind(x, y)
222 |     dst <- as.matrix(dist(z))
223 | 
224 |     edist.2(dst, 1:30, sizes = c(20, 10))
225 | 
226 | 
227 | ### Example 10.8 (Two-sample energy test)
228 | 
229 |     library(boot)  #for boot function
230 |     dst <- as.matrix(dist(z))
231 |     N <- c(20, 10)
232 | 
233 |     boot.obj <- boot(data = dst, statistic = edist.2,
234 |         sim = "permutation", R = 999, sizes = N)
235 |     boot.obj
236 |     
237 |     #calculate the ASL    
238 |     e <- boot.obj$t0
239 |     tb <- c(e, boot.obj$t)
240 |     mean(tb >= e)
241 | 
242 |     hist(tb, main = "", breaks="scott", freq=FALSE,
243 |         xlab="Replicates of e")
244 |     points(e, 0, cex=1, pch=16)
245 | 
246 | 
247 |     #energy test applied under F=G
248 |     d <- 3
249 |     a <- 0
250 |     x <- matrix(rnorm(20 * d), nrow = 20, ncol = d)
251 |     y <- matrix(rnorm(10 * d, a, 1), nrow = 10, ncol = d)
252 |     z <- rbind(x, y)
253 |     dst <- as.matrix(dist(z))
254 | 
255 |     N <- c(20, 10)
256 |     dst <- as.matrix(dist(z))
257 |     boot.obj <- boot(data = dst, statistic = edist.2,
258 |         sim="permutation", R=999, sizes=N)
259 |     boot.obj
260 | 
261 |     #calculate the ASL    
262 |     e <- boot.obj$t0
263 |     E <- c(boot.obj$t, e)
264 |     mean(E >= e)
265 | 
266 |     hist(E, main = "", breaks="scott",
267 |         xlab="Replicates of e", freq=FALSE)
268 |     points(e, 0, cex=1, pch=16)
269 | 
270 | ### Example 10.9 (k-sample energy distances)
271 | 
272 |     z <- iris[ , 1:4]
273 |     dst <- dist(z)
274 |     energy::edist(dst, sizes = c(50, 50, 50), distance = TRUE)
275 | 
276 | ### Example 10.10 (Distance Components (disco))
277 | 
278 |     set.seed(413)
279 |     energy::disco(iris[ , 1:4], factors = iris$Species, R = 999)
280 |     
281 | ### Example 10.11 (Power Comparison)
282 |     
283 |     # results of several simulations summarized in Table
284 |     
285 | ### Example 10.12 (Distance covariance statistic)
286 | 
287 |     dCov <- function(x, y) {
288 |         x <- as.matrix(x)
289 |         y <- as.matrix(y)
290 |         n <- nrow(x)
291 |         m <- nrow(y)
292 |         if (n != m || n < 2) stop("Sample sizes must agree")
293 |         if (! (all(is.finite(c(x, y)))))
294 |             stop("Data contains missing or infinite values")
295 | 
296 |         Akl <- function(x) {
297 |             d <- as.matrix(dist(x))
298 |             m <- rowMeans(d)
299 |             M <- mean(d)
300 |             a <- sweep(d, 1, m)
301 |             b <- sweep(a, 2, m)
302 |             return(b + M)
303 |         }
304 |         A <- Akl(x)
305 |         B <- Akl(y)
306 |         dCov <- sqrt(mean(A * B))
307 |         dCov
308 |     }
309 | 
310 |     z <- as.matrix(iris[1:50, 1:4])
311 |     x <- z[ , 1:2]
312 |     y <- z[ , 3:4]
313 |     # compute the observed statistic
314 |     dCov(x, y)
315 | 
316 | ### Example 10.13 (Distance correlation statistic)
317 | 
318 |     DCOR <- function(x, y) {
319 |         x <- as.matrix(x)
320 |         y <- as.matrix(y)
321 |         n <- nrow(x)
322 |         m <- nrow(y)
323 |         if (n != m || n < 2) stop("Sample sizes must agree")
324 |         if (! (all(is.finite(c(x, y)))))
325 |             stop("Data contains missing or infinite values")
326 |         Akl <- function(x) {
327 |             d <- as.matrix(dist(x))
328 |             m <- rowMeans(d)
329 |             M <- mean(d)
330 |             a <- sweep(d, 1, m)
331 |             b <- sweep(a, 2, m)
332 |             return(b + M)
333 |         }
334 |         A <- Akl(x)
335 |         B <- Akl(y)
336 |         dCov <- sqrt(mean(A * B))
337 |         dVarX <- sqrt(mean(A * A))
338 |         dVarY <- sqrt(mean(B * B))
339 |         dCor <- sqrt(dCov / sqrt(dVarX * dVarY))
340 |         list(dCov=dCov, dCor=dCor, dVarX=dVarX, dVarY=dVarY)
341 |     }
342 | 
343 |     z <- as.matrix(iris[1:50, 1:4])
344 |     x <- z[ , 1:2]
345 |     y <- z[ , 3:4]
346 | 
347 |     DCOR(x, y)
348 |     
349 | 
350 | ### Example 10.14 (Distance covariance test)
351 | 
352 |     ndCov2 <- function(z, ix, dims) {
353 |         p <- dims[1]
354 |         q1 <- p + 1
355 |         d <- p + dims[2]
356 |         x <- z[ , 1:p]     #leave x as is
357 |         y <- z[ix, q1:d]   #permute rows of y
358 |         return(nrow(z) * dCov(x, y)^2)
359 |     }
360 | 
361 |     library(boot)
362 |     z <- as.matrix(iris[1:50, 1:4])
363 |     boot.obj <- boot(data = z, statistic = ndCov2, R = 999,
364 |         sim = "permutation", dims = c(2, 2))
365 | 
366 |     tb <- c(boot.obj$t0, boot.obj$t)
367 |     hist(tb, nclass="scott", xlab="", main="",
368 |             freq=FALSE)
369 |     points(boot.obj$t0, 0, cex=1, pch=16)
370 | 
371 |     mean(tb >= boot.obj$t0)
372 |     boot.obj
373 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chRandomProcess.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 4                      ###
  9 | ###       Generating Random Processes               ###
 10 | #######################################################
 11 | 
 12 | 
 13 | ### Example 4.1 (Poisson process)
 14 | 
 15 |     lambda <- 2
 16 |     t0 <- 3
 17 |     Tn <- rexp(100, lambda)       #interarrival times
 18 |     Sn <- cumsum(Tn)              #arrival times
 19 |     n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 20 | 
 21 | 
 22 | ### Example 4.2 (Poisson process, cont.)
 23 | 
 24 |     lambda <- 2
 25 |     t0 <- 3
 26 |     upper <- 100
 27 |     pp <- numeric(10000)
 28 |     for (i in 1:10000) {
 29 |         N <- rpois(1, lambda * upper)
 30 |         Un <- runif(N, 0, upper)      #unordered arrival times
 31 |         Sn <- sort(Un)                #arrival times
 32 |         n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 33 |         pp[i] <- n - 1                #arrivals in [0, t0]
 34 |         }
 35 | 
 36 |     #alternately, the loop can be replaced by replicate function
 37 |     pp <- replicate(10000, expr = {
 38 |         N <- rpois(1, lambda * upper)
 39 |         Un <- runif(N, 0, upper)      #unordered arrival times
 40 |         Sn <- sort(Un)                #arrival times
 41 |         n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 42 |         n - 1  })                     #arrivals in [0, t0]
 43 | 
 44 |     c(mean(pp), var(pp))
 45 | 
 46 | 
 47 | ### Example 4.3 (Nonhomogeneous Poisson process)
 48 | 
 49 |     lambda <- 3
 50 |     upper <- 100
 51 |     N <- rpois(1, lambda * upper)
 52 |     Tn <- rexp(N, lambda)
 53 |     Sn <- cumsum(Tn)
 54 |     Un <- runif(N)
 55 |     keep <- (Un <= cos(Sn)^2)    #indicator, as logical vector
 56 |     Sn[keep]
 57 | 
 58 |     round(Sn[keep], 4)
 59 | 
 60 | 
 61 | ### Example 4.4 (Renewal process)
 62 | 
 63 |     t0 <- 5
 64 |     Tn <- rgeom(100, prob = .2)   #interarrival times
 65 |     Sn <- cumsum(Tn)              #arrival times
 66 |     n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 67 | 
 68 |     Nt0 <- replicate(1000, expr = {
 69 |         Sn <- cumsum(rgeom(100, prob = .2))
 70 |         min(which(Sn > t0)) - 1
 71 |         })
 72 |     table(Nt0)/1000
 73 |     Nt0
 74 | 
 75 |     t0 <- seq(0.1, 30, .1)
 76 |     mt <- numeric(length(t0))
 77 | 
 78 |     for (i in 1:length(t0)) {
 79 |         mt[i] <- mean(replicate(1000,
 80 |         {
 81 |         Sn <- cumsum(rgeom(100, prob = .2))
 82 |         min(which(Sn > t0[i])) - 1
 83 |         }))
 84 |     }
 85 |     plot(t0, mt, type = "l", xlab = "t", ylab = "mean")
 86 |     abline(0, .25)
 87 |     
 88 | 
 89 | ### Example 4.5 (Symmetric random walk)
 90 | 
 91 |     n <- 400
 92 |     incr <- sample(c(-1, 1), size = n, replace = TRUE)
 93 |     S <- as.integer(c(0, cumsum(incr)))
 94 |     plot(0:n, S, type = "l", main = "", xlab = "i")
 95 | 
 96 | ### Example 4.6 (Generator for the time until return to origin)
 97 |     
 98 |     set.seed(12345)
 99 |     
100 |     #compute the probabilities directly
101 |     n <- 1:10000
102 |     p2n <- exp(lgamma(2*n-1)
103 |               - log(n) - (2*n-1)*log(2) - 2*lgamma(n))
104 |     #or compute using dbinom
105 |     P2n <- (.5/n) * dbinom(n-1, size = 2*n-2, prob = 0.5)
106 |     pP2n <- cumsum(P2n)
107 | 
108 |     #given n compute the time of the last return to 0 in (0,n]
109 |     n <- 200
110 |     sumT <- 0
111 |     while (sumT <= n) {
112 |         u <- runif(1)
113 |         s <- sum(u > pP2n)
114 |         if (s == length(pP2n))
115 |             warning("T is truncated")
116 |         Tj <- 2 * (1 + s)
117 |         #print(c(Tj, sumT))
118 |         sumT <- sumT + Tj
119 |         }
120 |     sumT - Tj
121 | 
122 | ### Example 4.7 (Brownian motion)
123 |     
124 |     
125 |     simBM <- function(n, T) {
126 |       times <- seq(0, T, length = n+1)
127 |       z <- rnorm(n)
128 |       w <- rep(0, n)
129 |       s <- sqrt(diff(times))
130 |       for (k in 2:n) {
131 |         w[k] <- w[k-1] + s[k] * z[k]
132 |       }
133 |       return (list(w=w, t=times))
134 |     }
135 |     
136 |     set.seed(1)
137 |     n <- 200
138 |     x1 <- simBM(n, 1)
139 |     x2 <- simBM(n, 1)
140 |     x3 <- simBM(n, 1)
141 |     r <- range(c(x1$w, x2$w, x3$w))
142 |     plot(x1$w, type="l", main="", xlab="t", ylab="W", ylim=r)
143 |     lines(x2$w, lty=2)
144 |     lines(x3$w, lty=3)
145 |     
146 |     interpBM <- function(w, t0, times) {
147 |       k1 <- sum(times < t0)
148 |       k <- k1 + 1
149 |       b <- (t0 - times[k1]) / (times[k] - times[k1])
150 |       return (w[k1] + b * (w[k] - w[k1]))
151 |     }
152 |     
153 |     plot(x1$t[1:10], x1$w[1:10], type="b", main="", xlab="t", ylab="W")
154 |     tmids <- x1$t + 0.0025
155 |     for (i in 1:10) {
156 |       w <- interpBM(x1$w, tmids[i], x1$t)
157 |       points(tmids[i], w, pch=2)
158 |     }
159 |     
160 |     legend("topleft", c("Generated W", "Interpolated W"), pch=c(1,2), lty=1, bty="n")
161 |     


--------------------------------------------------------------------------------
/examples-R/SCR2e-chRgen.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 3                      ###
  9 | ###       Methods for Generating Random Variables   ###
 10 | #######################################################
 11 | 
 12 | 
 13 | # packages to install: mvtnorm
 14 | 
 15 | 
 16 | ### Example 3.1 (Sampling from a finite population)
 17 | 
 18 | #toss some coins
 19 | sample(0:1, size = 10, replace = TRUE)
 20 | 
 21 | #choose some lottery numbers
 22 | sample(1:100, size = 6, replace = FALSE)
 23 | 
 24 | #permuation of letters a-z
 25 | sample(letters)
 26 | 
 27 | #sample from a multinomial distribution
 28 | x <- sample(1:3, size = 100, replace = TRUE,
 29 |                 prob = c(.2, .3, .5))
 30 | table(x)
 31 | 
 32 | ### Example 3.2 (Inverse transform method, continuous case)
 33 | 
 34 | n <- 1000
 35 | u <- runif(n)
 36 | x <- u^(1/3)
 37 | hist(x, prob = TRUE, main = bquote(f(x)==3*x^2)) #density histogram of sample
 38 | y <- seq(0, 1, .01)
 39 | lines(y, 3*y^2)    #density curve f(x)
 40 | 
 41 | 
 42 | ### Example 3.4 (Two point distribution)
 43 | 
 44 | n <- 1000
 45 | p <- 0.4
 46 | u <- runif(n)
 47 | x <- as.integer(u > 0.6)   #(u > 0.6) is a logical vector
 48 | 
 49 | mean(x)
 50 | var(x)
 51 | 
 52 | 
 53 | ### Example 3.5 (Geometric distribution)
 54 | 
 55 | n <- 1000
 56 | p <- 0.25
 57 | u <- runif(n)
 58 | k <- ceiling(log(1-u) / log(1-p)) - 1
 59 | 
 60 | # more efficient
 61 | k <- floor(log(u) / log(1-p))
 62 | 
 63 | 
 64 | ### Example 3.6 (Logarithmic distribution)
 65 | 
 66 | rlogarithmic <- function(n, theta) {
 67 |   #returns a random logarithmic(theta) sample size n
 68 |   u <- runif(n)
 69 |   #set the initial length of cdf vector
 70 |   N <- ceiling(-16 / log10(theta))
 71 |   k <- 1:N
 72 |   a <- -1/log(1-theta)
 73 |   fk <- exp(log(a) + k * log(theta) - log(k))
 74 |   Fk <- cumsum(fk)
 75 |   x <- integer(n)
 76 |   for (i in 1:n) {
 77 |     x[i] <- as.integer(sum(u[i] > Fk)) #F^{-1}(u)-1
 78 |     while (x[i] == N) {
 79 |       #if x==N we need to extend the cdf
 80 |       #very unlikely because N is large
 81 |       logf <- log(a) + (N+1)*log(theta) - log(N+1)
 82 |       fk <- c(fk, exp(logf))
 83 |       Fk <- c(Fk, Fk[N] + fk[N+1])
 84 |       N <- N + 1
 85 |       x[i] <- as.integer(sum(u[i] > Fk))
 86 |     }
 87 |   }
 88 |   x + 1
 89 | }
 90 | 
 91 | n <- 1000
 92 | theta <- 0.5
 93 | x <- rlogarithmic(n, theta)
 94 | #compute density of logarithmic(theta) for comparison
 95 | k <- sort(unique(x))
 96 | p <- -1 / log(1 - theta) * theta^k / k
 97 | se <- sqrt(p*(1-p)/n)   #standard error
 98 | 
 99 | round(rbind(table(x)/n, p, se),3)
100 | 
101 | 
102 | ### Example 3.7 (Acceptance-rejection method)
103 | 
104 | n <- 1000
105 | k <- 0      #counter for accepted
106 | j <- 0      #iterations
107 | y <- numeric(n)
108 | 
109 | while (k < n) {
110 |   u <- runif(1)
111 |   j <- j + 1
112 |   x <- runif(1)  #random variate from g
113 |   if (x * (1-x) > u) {
114 |     #we accept x
115 |     k <- k + 1
116 |     y[k] <- x
117 |   }
118 | }
119 | 
120 | j
121 | 
122 | #compare empirical and theoretical percentiles
123 | p <- seq(.1, .9, .1)
124 | Qhat <- quantile(y, p)   #quantiles of sample
125 | Q <- qbeta(p, 2, 2)      #theoretical quantiles
126 | se <- sqrt(p * (1-p) / (n * dbeta(Q, 2, 2)^2)) #see Ch. 2
127 | round(rbind(Qhat, Q, se), 3)
128 | 
129 | 
130 | ### Example 3.8 (Beta distribution)
131 | 
132 | n <- 1000
133 | a <- 3
134 | b <- 2
135 | u <- rgamma(n, shape=a, rate=1)
136 | v <- rgamma(n, shape=b, rate=1)
137 | x <- u / (u + v)
138 | 
139 | q <- qbeta(ppoints(n), a, b)
140 | qqplot(q, x, cex=0.25, xlab="Beta(3, 2)", ylab="Sample")
141 | abline(0, 1)
142 | 
143 | 
144 | ### Example 3.9 (Logarithmic distribution, version 2)
145 | 
146 | n <- 1000
147 | theta <- 0.5
148 | u <- runif(n)  #generate logarithmic sample
149 | v <- runif(n)
150 | x <- floor(1 + log(v) / log(1 - (1 - theta)^u))
151 | k <- 1:max(x)  #calc. logarithmic probs.
152 | p <- -1 / log(1 - theta) * theta^k / k
153 | se <- sqrt(p*(1-p)/n)
154 | p.hat <- tabulate(x)/n
155 | 
156 | print(round(rbind(p.hat, p, se), 3))
157 | 
158 | # The following function is a simple replacement for
159 | # rlogarithmic in Example 3.6
160 | 
161 | rlogarithmic <- function(n, theta) {
162 |   stopifnot(all(theta > 0 & theta < 1))
163 |   th <- rep(theta, length=n)
164 |   u <- runif(n)
165 |   v <- runif(n)
166 |   x <- floor(1 + log(v) / log(1 - (1 - th)^u))
167 |   return(x)
168 | }
169 | 
170 | 
171 | ### Example 3.10 (Chisquare)
172 | 
173 | n <- 1000
174 | nu <- 2
175 | X <- matrix(rnorm(n*nu), n, nu)^2 #matrix of sq. normals
176 | #sum the squared normals across each row: method 1
177 | y <- rowSums(X)
178 | #method 2
179 | y <- apply(X, MARGIN=1, FUN=sum)  #a vector length n
180 | mean(y)
181 | mean(y^2)
182 | 
183 | 
184 | ### Example 3.11 (Convolutions and mixtures)
185 | 
186 | n <- 1000
187 | x1 <- rgamma(n, 2, 2)
188 | x2 <- rgamma(n, 2, 4)
189 | s <- x1 + x2              #the convolution
190 | u <- runif(n)
191 | k <- as.integer(u > 0.5)  #vector of 0's and 1's
192 | x <- k * x1 + (1-k) * x2  #the mixture
193 | 
194 | par(mfcol=c(1,2))         #two graphs per page
195 | hist(s, prob=TRUE, xlim=c(0,5), ylim=c(0,1))
196 | hist(x, prob=TRUE, xlim=c(0,5), ylim=c(0,1))
197 | par(mfcol=c(1,1))         #restore display
198 | 
199 | 
200 | ### Example 3.12 (Mixture of several gamma distributions)
201 | # density estimates are plotted
202 | 
203 | n <- 5000
204 | k <- sample(1:5, size=n, replace=TRUE, prob=(1:5)/15)
205 | rate <- 1/k
206 | x <- rgamma(n, shape=3, rate=rate)
207 | 
208 | #plot the density of the mixture
209 | #with the densities of the components
210 | plot(density(x), xlim=c(0,40), ylim=c(0,.3),
211 |      lwd=3, xlab="x", main="")
212 | for (i in 1:5)
213 |   lines(density(rgamma(n, 3, 1/i)))
214 | 
215 | 
216 | ### Example 3.13 (Mixture of several gamma distributions)
217 | 
218 | n <- 5000
219 | p <- c(.1,.2,.2,.3,.2)
220 | lambda <- c(1,1.5,2,2.5,3)
221 | k <- sample(1:5, size=n, replace=TRUE, prob=p)
222 | rate <- lambda[k]
223 | x <- rgamma(n, shape=3, rate=rate)
224 | 
225 | 
226 | ### Example 3.14 (Plot density of mixture)
227 | 
228 | f <- function(x, lambda, theta) {
229 |   #density of the mixture at the point x
230 |   sum(dgamma(x, 3, lambda) * theta)
231 | }
232 | 
233 | p <- c(.1,.2,.2,.3,.2)
234 | lambda <- c(1,1.5,2,2.5,3)
235 | 
236 | x <- seq(0, 8, length=200)
237 | dim(x) <- length(x)  #need for apply
238 | 
239 | #compute density of the mixture f(x) along x
240 | y <- apply(x, 1, f, lambda=lambda, theta=p)
241 | 
242 | #plot the density of the mixture
243 | plot(x, y, type="l", ylim=c(0,.85), lwd=3, ylab="Density")
244 | 
245 | for (j in 1:5) {
246 |   #add the j-th gamma density to the plot
247 |   y <- apply(x, 1, dgamma, shape=3, rate=lambda[j])
248 |   lines(x, y)
249 | }
250 | 
251 | 
252 | ### Example 3.15 (Poisson-Gamma mixture)
253 | 
254 | #generate a Poisson-Gamma mixture
255 | n <- 1000
256 | r <- 4
257 | beta <- 3
258 | lambda <- rgamma(n, r, beta) #lambda is random
259 | 
260 | #now supply the sample of lambda's as the Poisson mean
261 | x <- rpois(n, lambda)        #the mixture
262 | 
263 | #compare with negative binomial
264 | mix <- tabulate(x+1) / n
265 | negbin <- round(dnbinom(0:max(x), r, beta/(1+beta)), 3)
266 | se <- sqrt(negbin * (1 - negbin) / n)
267 | 
268 | round(rbind(mix, negbin, se), 3)
269 | 
270 | 
271 | ### Example 3.16 (Spectral decomposition method)
272 | 
273 | # mean and covariance parameters
274 | mu <- c(0, 0)
275 | Sigma <- matrix(c(1, .9, .9, 1), nrow = 2, ncol = 2)
276 | 
277 | rmvn.eigen <-
278 |   function(n, mu, Sigma) {
279 |     # generate n random vectors from MVN(mu, Sigma)
280 |     # dimension is inferred from mu and Sigma
281 |     d <- length(mu)
282 |     ev <- eigen(Sigma, symmetric = TRUE)
283 |     lambda <- ev$values
284 |     V <- ev$vectors
285 |     R <- V %*% diag(sqrt(lambda)) %*% t(V)
286 |     Z <- matrix(rnorm(n*d), nrow = n, ncol = d)
287 |     X <- Z %*% R + matrix(mu, n, d, byrow = TRUE)
288 |     X
289 |   }
290 | 
291 | # generate the sample
292 | X <- rmvn.eigen(1000, mu, Sigma)
293 | 
294 | plot(X, xlab = "x", ylab = "y", pch = 20)
295 | print(colMeans(X))
296 | print(cor(X))
297 | 
298 | 
299 | ### Example 3.17 (SVD method)
300 | 
301 | rmvn.svd <-
302 |   function(n, mu, Sigma) {
303 |     # generate n random vectors from MVN(mu, Sigma)
304 |     # dimension is inferred from mu and Sigma
305 |     d <- length(mu)
306 |     S <- svd(Sigma)
307 |     R <- S$u %*% diag(sqrt(S$d)) %*% t(S$v) #sq. root Sigma
308 |     Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
309 |     X <- Z %*% R + matrix(mu, n, d, byrow=TRUE)
310 |     X
311 |   }
312 | 
313 | 
314 | ### Example 3.18 (Choleski factorization method)
315 | 
316 | rmvn.Choleski <-
317 |   function(n, mu, Sigma) {
318 |     # generate n random vectors from MVN(mu, Sigma)
319 |     # dimension is inferred from mu and Sigma
320 |     d <- length(mu)
321 |     Q <- chol(Sigma) # Choleski factorization of Sigma
322 |     Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
323 |     X <- Z %*% Q + matrix(mu, n, d, byrow=TRUE)
324 |     X
325 |   }
326 | 
327 | #generating the samples according to the mean and covariance 
328 | #structure as the four-dimensional iris virginica data
329 | y <- subset(x=iris, Species=="virginica")[, 1:4]
330 | mu <- colMeans(y)
331 | Sigma <- cov(y)
332 | mu
333 | Sigma
334 | 
335 | #now generate MVN data with this mean and covariance
336 | X <- rmvn.Choleski(200, mu, Sigma)
337 | pairs(X)
338 | 
339 | ### Example 3.19 (Comparing performance of MVN generators)
340 | 
341 | library(MASS)
342 | library(mvtnorm)
343 | n <- 100          #sample size
344 | d <- 30           #dimension
345 | N <- 2000         #iterations
346 | mu <- numeric(d)
347 | 
348 | set.seed(100)
349 | system.time(for (i in 1:N)
350 |   rmvn.eigen(n, mu, cov(matrix(rnorm(n*d), n, d))))
351 | set.seed(100)
352 | system.time(for (i in 1:N)
353 |   rmvn.svd(n, mu, cov(matrix(rnorm(n*d), n, d))))
354 | set.seed(100)
355 | system.time(for (i in 1:N)
356 |   rmvn.Choleski(n, mu, cov(matrix(rnorm(n*d), n, d))))
357 | set.seed(100)
358 | system.time(for (i in 1:N)
359 |   mvrnorm(n, mu, cov(matrix(rnorm(n*d), n, d))))
360 | set.seed(100)
361 | system.time(for (i in 1:N)
362 |   rmvnorm(n, mu, cov(matrix(rnorm(n*d), n, d))))
363 | set.seed(100)
364 | system.time(for (i in 1:N)
365 |   cov(matrix(rnorm(n*d), n, d)))
366 | 
367 | detach(package:MASS)
368 | detach(package:mvtnorm)
369 | 
370 | ### Example 3.20 (Multivariate normal mixture)
371 | 
372 | library(MASS)  #for mvrnorm
373 | #inefficient version loc.mix.0 with loops
374 | 
375 | loc.mix.0 <- function(n, p, mu1, mu2, Sigma) {
376 |   #generate sample from BVN location mixture
377 |   X <- matrix(0, n, 2)
378 |   
379 |   for (i in 1:n) {
380 |     k <- rbinom(1, size = 1, prob = p)
381 |     if (k)
382 |       X[i,] <- mvrnorm(1, mu = mu1, Sigma) else
383 |         X[i,] <- mvrnorm(1, mu = mu2, Sigma)
384 |   }
385 |   return(X)
386 | }
387 | 
388 | #more efficient version
389 | loc.mix <- function(n, p, mu1, mu2, Sigma) {
390 |   #generate sample from BVN location mixture
391 |   n1 <- rbinom(1, size = n, prob = p)
392 |   n2 <- n - n1
393 |   x1 <- mvrnorm(n1, mu = mu1, Sigma)
394 |   x2 <- mvrnorm(n2, mu = mu2, Sigma)
395 |   X <- rbind(x1, x2)            #combine the samples
396 |   return(X[sample(1:n), ])      #mix them
397 | }
398 | 
399 | x <- loc.mix(1000, .5, rep(0, 4), 2:5, Sigma = diag(4))
400 | r <- range(x) * 1.2
401 | par(mfrow = c(2, 2))
402 | for (i in 1:4)
403 |   hist(x[ , i], xlim = r, ylim = c(0, .3), freq = FALSE,
404 |        main = "", breaks = seq(-5, 10, .5))        
405 | 
406 | detach(package:MASS)
407 | par(mfrow = c(1, 1))
408 | 
409 | 
410 | ### Example 3.21 (Generating variates on a sphere)
411 | 
412 | runif.sphere <- function(n, d) {
413 |   # return a random sample uniformly distributed
414 |   # on the unit sphere in R ^d
415 |   M <- matrix(rnorm(n*d), nrow = n, ncol = d)
416 |   L <- apply(M, MARGIN = 1,
417 |              FUN = function(x){sqrt(sum(x*x))})
418 |   D <- diag(1 / L)
419 |   U <- D %*% M
420 |   U
421 | }
422 | 
423 | #generate a sample in d=2 and plot
424 | X <- runif.sphere(200, 2)
425 | par(pty = "s")
426 | plot(X, xlab = bquote(x[1]), ylab = bquote(x[2]))
427 | par(pty = "m")
428 | 


--------------------------------------------------------------------------------
/examples-R/SCR2e-chVis.R:
--------------------------------------------------------------------------------
  1 | #######################################################
  2 | ###       Statistical Computing with R  2e          ###
  3 | ###       Maria L. Rizzo                            ###
  4 | ###       Chapman & Hall/CRC The R Series           ###
  5 | ###       ISBN 9781466553323 - CAT# K15269          ###
  6 | ###       January 2019                              ###
  7 | ###                                                 ###
  8 | ###       R code for Chapter 5                      ###
  9 | ###       Visualization of Multivariate Data        ###
 10 | #######################################################
 11 | 
 12 | 
 13 | # packages to install:
 14 | # bootstrap, corrplot, DAAG, FactoMineR, ggplot2, hexbin
 15 | 
 16 | 
 17 | ### Example 5.1 (Scatterplot matrix)
 18 | 
 19 |     data(iris)
 20 |     #virginica data in first 4 columns of the last 50 obs.
 21 |     
 22 |     # not shown in text
 23 |     pairs(iris[101:150, 1:4])
 24 | 
 25 |     panel.d <- function(x, ...) {
 26 |         usr <- par("usr")
 27 |         on.exit(par(usr=usr))
 28 |         par(usr = c(usr[1:2], 0, .5))
 29 |         lines(density(x))
 30 |      }
 31 |     
 32 |     # Fig. 5.1
 33 |     x <- scale(iris[101:150, 1:4])
 34 |     r <- range(x)
 35 |     pairs(x, diag.panel = panel.d, xlim = r, ylim = r)
 36 | 
 37 |     library(lattice)
 38 |     splom(iris[101:150, 1:4])    #plot 1
 39 | 
 40 |     #for all 3 at once, in color, plot 2
 41 |     splom(iris[,1:4], groups = iris$Species)
 42 | 
 43 |     # Fig. 5.2
 44 |     #for all 3 at once, black and white, plot 3
 45 |     splom(~iris[1:4], groups = Species, data = iris,
 46 |         col = 1, pch = c(1, 2, 3),  cex = c(.5,.5,.5))
 47 | 
 48 | ### Example 5.2 (Correlation plots: Decathlon data)
 49 | 
 50 |     library(FactoMineR)  #decathlon data
 51 |     library(corrplot)
 52 |     data("decathlon")
 53 |     str(decathlon)
 54 |     
 55 |     corrMat <- cor(decathlon[, 1:10])
 56 |     corrplot(corrMat, type="upper", tl.col="black", tl.srt=45)
 57 |     corrplot(corrMat, type = "upper", method = "square",
 58 |          addCoef.col = "black", diag=FALSE)
 59 | 
 60 | ### Example 5.3 (Plot bivariate normal density)
 61 | 
 62 |     #the standard BVN density
 63 |     f <- function(x,y) {
 64 |         z <- (1/(2*pi)) * exp(-.5 * (x^2 + y^2))
 65 |         }
 66 | 
 67 |     y <- x <- seq(-3, 3, length= 50)
 68 |     z <- outer(x, y, f)   #compute density for all (x,y)
 69 | 
 70 |     persp(x, y, z)        #the default plot
 71 | 
 72 |     persp(x, y, z, theta = 45, phi = 30, expand = 0.6,
 73 |           ltheta = 120, shade = 0.75, ticktype = "detailed",
 74 |           xlab = "X", ylab = "Y", zlab = "f(x, y)")
 75 | 
 76 | 
 77 | ### Example 5.4 (Add elements to perspective plot)
 78 | 
 79 |      #store viewing transformation in M
 80 |      persp(x, y, z, theta = 45, phi = 30,
 81 |            expand = .4, box = FALSE) -> M
 82 | 
 83 |      #add some points along a circle
 84 |      a <- seq(-pi, pi, pi/16)
 85 |      newpts <- cbind(cos(a), sin(a)) * 2
 86 |      newpts <- cbind(newpts, 0, 1)  #z=0, t=1
 87 |      N <- newpts %*% M
 88 |      points(N[,1]/N[,4], N[,2]/N[,4], col=2)
 89 | 
 90 |      #add lines
 91 |      x2 <- seq(-3, 3, .1)
 92 |      y2 <- -x2^2 / 3
 93 |      z2 <- dnorm(x2) * dnorm(y2)
 94 |      N <- cbind(x2, y2, z2, 1) %*% M
 95 |      lines(N[,1]/N[,4], N[,2]/N[,4], col=4)
 96 | 
 97 |      #add text
 98 |      x3 <- c(0, 3.1)
 99 |      y3 <- c(0, -3.1)
100 |      z3 <- dnorm(x3) * dnorm(y3) * 1.1
101 |      N <- cbind(x3, y3, z3, 1) %*% M
102 |      text(N[1,1]/N[1,4], N[1,2]/N[1,4], "f(x,y)")
103 |      text(N[2,1]/N[2,4], N[2,2]/N[2,4], bquote(y==-x^2/3))
104 | 
105 | 
106 | ### Example 5.5 (Surface plot using wireframe(lattice))
107 | 
108 |     library(lattice)
109 |     x <- y <- seq(-3, 3, length= 50)
110 | 
111 |     xy <- expand.grid(x, y)
112 |     z <- (1/(2*pi)) * exp(-.5 * (xy[,1]^2 + xy[,2]^2))
113 |     wireframe(z ~ xy[,1] * xy[,2])
114 | 
115 | 
116 | ### Example 5.6 (3D scatterplot)
117 | 
118 |     library(lattice)
119 |     attach(iris)
120 |     #basic 3 color plot with arrows along axes
121 |     print(cloud(Petal.Length ~ Sepal.Length * Sepal.Width,
122 |           data=iris, groups=Species))
123 | 
124 |     print(cloud(Sepal.Length ~ Petal.Length * Petal.Width,
125 |         data = iris, groups = Species, main = "1", pch=1:3,
126 |         scales = list(draw = FALSE), zlab = "SL",
127 |         screen = list(z = 30, x = -75, y = 0)),
128 |         split = c(1, 1, 2, 2), more = TRUE)
129 | 
130 |     print(cloud(Sepal.Width ~ Petal.Length * Petal.Width,
131 |         data = iris, groups = Species, main = "2", pch=1:3,
132 |         scales = list(draw = FALSE), zlab = "SW",
133 |         screen = list(z = 30, x = -75, y = 0)),
134 |         split = c(2, 1, 2, 2), more = TRUE)
135 | 
136 |     print(cloud(Petal.Length ~ Sepal.Length * Sepal.Width,
137 |         data = iris, groups = Species, main = "3", pch=1:3,
138 |         scales = list(draw = FALSE), zlab = "PL",
139 |         screen = list(z = 30, x = -55, y = 0)),
140 |         split = c(1, 2, 2, 2), more = TRUE)
141 | 
142 |     print(cloud(Petal.Width ~ Sepal.Length * Sepal.Width,
143 |         data = iris, groups = Species, main = "4", pch=1:3,
144 |         scales = list(draw = FALSE), zlab = "PW",
145 |         screen = list(z = 30, x = -55, y = 0)),
146 |         split = c(2, 2, 2, 2))
147 |     detach(iris)
148 | 
149 | 
150 | ### Example 5.7 (Contour plot)
151 | 
152 |      #contour plot with labels
153 |      contour(volcano, asp = 1, labcex = 1)
154 | 
155 |      #another version from lattice package
156 |      library(lattice)
157 |      contourplot(volcano) #similar to above
158 | 
159 | 
160 | ### Example 5.8 (Filled contour plots)
161 | 
162 |      image(volcano, col = terrain.colors(100), axes = FALSE)
163 |      contour(volcano, levels = seq(100,200,by = 10), add = TRUE)
164 | 
165 |      filled.contour(volcano, color = terrain.colors, asp = 1)
166 |      levelplot(volcano, scales = list(draw = FALSE),
167 |                xlab = "", ylab = "")
168 |                
169 | 
170 | ### Example 5.9 (2D histogram)
171 | 
172 |     library(hexbin)
173 |     x <- matrix(rnorm(4000), 2000, 2)
174 |     plot(hexbin(x[,1], x[,2]))
175 | 
176 |     # ggplot version 
177 |     library(ggplot2)
178 |     x <- data.frame(x)
179 |     ggplot(x, aes(x[,1], x[,2])) + geom_hex()
180 | 
181 | ### Example 5.10 (Andrews curves)
182 | 
183 |     library(DAAG)
184 |     attach(leafshape17)
185 | 
186 |     f <- function(a, v) {
187 |         #Andrews curve f(a) for a data vector v in R^3
188 |         v[1]/sqrt(2) + v[2]*sin(a) + v[3]*cos(a)
189 |     }
190 | 
191 | 
192 |     #scale data to range [-1, 1]
193 |     x <- cbind(bladelen, petiole, bladewid)
194 |     n <- nrow(x)
195 |     mins <- apply(x, 2, min)  #column minimums
196 |     maxs <- apply(x, 2, max)  #column maximums
197 |     r <- maxs - mins          #column ranges
198 |     y <- sweep(x, 2, mins)    #subtract column mins
199 |     y <- sweep(y, 2, r, "/")  #divide by range
200 |     x <- 2 * y - 1            #now has range [-1, 1]
201 | 
202 |     #set up plot window, but plot nothing yet
203 |     plot(0, 0, xlim = c(-pi, pi), ylim = c(-3,3),
204 |         xlab = "t", ylab = "Andrews Curves",
205 |         main = "", type = "n")
206 | 
207 |     #now add the Andrews curves for each observation
208 |     #line type corresponds to leaf architecture
209 |     #0=orthotropic, 1=plagiotropic
210 |     a <- seq(-pi, pi, len=101)
211 |     dim(a) <- length(a)
212 |     for (i in 1:n) {
213 |         g <- arch[i] + 1
214 |         y <- apply(a, MARGIN = 1, FUN = f, v = x[i,])
215 |         lines(a, y, lty = g)
216 |     }
217 |     legend(3, c("Orthotropic", "Plagiotropic"), lty = 1:2)
218 |     detach(leafshape17)
219 |     
220 | 
221 | ### Example 5.11 (Parallel coordinates)
222 | 
223 |     library(MASS)
224 |     library(lattice)
225 |     #trellis.device(color = FALSE) #black and white display
226 |     x <- crabs[seq(5, 200, 5), ]  #get every fifth obs.
227 |     parallelplot(~x[4:8] | sp*sex, x)
228 |     
229 |     #trellis.device(color = FALSE)    #black and white display
230 |     x <- crabs[seq(5, 200, 5), ]     #get every fifth obs.
231 |     a <- x$CW * x$CL                 #area of carapace
232 |     x[4:8] <- x[4:8] / sqrt(a)       #adjust for size
233 |     parallelplot(~x[4:8] | sp*sex, x)
234 | 
235 | ### Example 5.12 (Segment plot)
236 | 
237 |     #segment plot
238 |     x <- MASS::crabs[seq(5, 200, 5), ]         #get every fifth obs.
239 |     x <- subset(x, sex == "M")           #keep just the males
240 |     a <- x$CW * x$CL                     #area of carapace
241 |     x[4:8] <- x[4:8] / sqrt(a)           #adjust for size
242 |     
243 |     #use default color palette or other colors
244 |     #palette(gray(seq(.4, .95, len = 5))) #use gray scale
245 |     palette(rainbow(6))                 #or use color
246 |     stars(x[4:8], draw.segments = TRUE,
247 |           labels =levels(x$sp), nrow = 4,
248 |           ylim = c(-2,10), key.loc = c(3,-1))
249 |     
250 |     #after viewing, restore the default colors
251 |     palette("default")
252 |     
253 |     
254 | ### Example 5.13 (PCA for open and closed book exams)
255 |     
256 |     library(bootstrap)
257 |     str(scor)
258 |     pairs(scor)   
259 |     cor(scor)
260 |     
261 |     n <- nrow(scor)
262 |     x <- scale(scor)  #center and scale
263 |     s <- cov(x)
264 |     e <- eigen(s)
265 |     lam <- e$values   #vector of eigenvalues
266 |     P <- e$vectors    #matrix of eigenvectors
267 |     
268 |     plot(lam, type = "b", xlab = "eigenvalues", main = "")
269 |     barplot(lam, xlab = "eigenvalues")
270 |     
271 |     tab <- rbind(lam / sum(lam), cumsum(lam) / sum(lam))
272 |     tab
273 |     
274 |     z <- x %*% P
275 |     dim(z)
276 |     head(z)
277 |     
278 |     pc <- prcomp(scor, center = TRUE, scale = TRUE)
279 |     summary(pc)
280 |     
281 |     df <- scor[1:5, ]
282 |     predict(pc, newdata = df)  #same as z above
283 |     
284 |     head(x %*% pc$rotation, 5)
285 |     head(pc$rotation)
286 |     head(P)
287 |     
288 | 
289 | ### Example 5.14 (PC Biplot)
290 |     
291 |     ## plot scor data in the (PC1, PC2) coordinate system
292 |     biplot(pc, pc.biplot = TRUE) 
293 |     round(cor(x, z), 3)
294 | 
295 | ### Example 5.15 (PCA: Decathlon data)
296 |     
297 |     library(FactoMineR)
298 |     data(decathlon)
299 |     pc <- princomp(decathlon[,1:10], cor = TRUE, scores = TRUE)
300 |     plot(pc)     # screeplot
301 |     biplot(pc)
302 |     summary(pc)    
303 |     
304 |     


--------------------------------------------------------------------------------
/examples-R/SCR2e-examples-functions.R:
--------------------------------------------------------------------------------
  1 | rlogarithmic <- function(n, theta) {
  2 |   # generate random sample from Logarithmic(theta)
  3 |   stopifnot(all(theta > 0 & theta < 1))
  4 |   th <- rep(theta, length=n)
  5 |   u <- runif(n)
  6 |   v <- runif(n)
  7 |   x <- floor(1 + log(v) / log(1 - (1 - th)^u))
  8 |   return(x)
  9 | }
 10 | 
 11 | # generate MVN by spectral decomposition method
 12 | 
 13 | rmvn.eigen <-
 14 |   function(n, mu, Sigma) {
 15 |     # generate n random vectors from MVN(mu, Sigma)
 16 |     # dimension is inferred from mu and Sigma
 17 |     d <- length(mu)
 18 |     ev <- eigen(Sigma, symmetric = TRUE)
 19 |     lambda <- ev$values
 20 |     V <- ev$vectors
 21 |     R <- V %*% diag(sqrt(lambda)) %*% t(V)
 22 |     Z <- matrix(rnorm(n*d), nrow = n, ncol = d)
 23 |     X <- Z %*% R + matrix(mu, n, d, byrow = TRUE)
 24 |     X
 25 |   }
 26 | 
 27 | 
 28 | # generate MVN by singular value decomposition method
 29 | 
 30 | rmvn.svd <-
 31 |   function(n, mu, Sigma) {
 32 |     # generate n random vectors from MVN(mu, Sigma)
 33 |     # dimension is inferred from mu and Sigma
 34 |     d <- length(mu)
 35 |     S <- svd(Sigma)
 36 |     R <- S$u %*% diag(sqrt(S$d)) %*% t(S$v) #sq. root Sigma
 37 |     Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
 38 |     X <- Z %*% R + matrix(mu, n, d, byrow=TRUE)
 39 |     X
 40 |   }
 41 | 
 42 | 
 43 | # generate MVN by Choleski factorization method
 44 | 
 45 | rmvn.Choleski <-
 46 |   function(n, mu, Sigma) {
 47 |     # generate n random vectors from MVN(mu, Sigma)
 48 |     # dimension is inferred from mu and Sigma
 49 |     d <- length(mu)
 50 |     Q <- chol(Sigma) # Choleski factorization of Sigma
 51 |     Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
 52 |     X <- Z %*% Q + matrix(mu, n, d, byrow=TRUE)
 53 |     X
 54 |   }
 55 | 
 56 | # Bootstrap t confidence interval
 57 | 
 58 | boot.t.ci <-
 59 |   function(x, B = 500, R = 100, level = .95, statistic){
 60 |     #compute the bootstrap t CI
 61 |     x <- as.matrix(x);  n <- nrow(x)
 62 |     stat <- numeric(B); se <- numeric(B)
 63 |     
 64 |     boot.se <- function(x, R, f) {
 65 |       #local function to compute the bootstrap
 66 |       #estimate of standard error for statistic f(x)
 67 |       x <- as.matrix(x); m <- nrow(x)
 68 |       th <- replicate(R, expr = {
 69 |         i <- sample(1:m, size = m, replace = TRUE)
 70 |         f(x[i, ])
 71 |       })
 72 |       return(sd(th))
 73 |     }
 74 |     
 75 |     for (b in 1:B) {
 76 |       j <- sample(1:n, size = n, replace = TRUE)
 77 |       y <- x[j, ]
 78 |       stat[b] <- statistic(y)
 79 |       se[b] <- boot.se(y, R = R, f = statistic)
 80 |     }
 81 |     stat0 <- statistic(x)
 82 |     t.stats <- (stat - stat0) / se
 83 |     se0 <- sd(stat)
 84 |     alpha <- 1 - level
 85 |     Qt <- quantile(t.stats, c(alpha/2, 1-alpha/2), type = 1)
 86 |     names(Qt) <- rev(names(Qt))
 87 |     CI <- rev(stat0 - Qt * se0)
 88 |   }
 89 | 
 90 | 
 91 | # BCa bootstrap confidence interval
 92 | 
 93 | boot.BCa <-
 94 |   function(x, th0, th, stat, conf = .95) {
 95 |     # bootstrap with BCa bootstrap confidence interval
 96 |     # th0 is the observed statistic
 97 |     # th is the vector of bootstrap replicates
 98 |     # stat is the function to compute the statistic
 99 |     
100 |     x <- as.matrix(x)
101 |     n <- nrow(x) #observations in rows
102 |     N <- 1:n
103 |     alpha <- (1 + c(-conf, conf))/2
104 |     zalpha <- qnorm(alpha)
105 |     
106 |     # the bias correction factor
107 |     z0 <- qnorm(sum(th < th0) / length(th))
108 |     
109 |     # the acceleration factor (jackknife est.)
110 |     th.jack <- numeric(n)
111 |     for (i in 1:n) {
112 |       J <- N[1:(n-1)]
113 |       th.jack[i] <- stat(x[-i, ], J)
114 |     }
115 |     L <- mean(th.jack) - th.jack
116 |     a <- sum(L^3)/(6 * sum(L^2)^1.5)
117 |     
118 |     # BCa conf. limits
119 |     adj.alpha <- pnorm(z0 + (z0+zalpha)/(1-a*(z0+zalpha)))
120 |     limits <- quantile(th, adj.alpha, type=6)
121 |     return(list("est"=th0, "BCa"=limits))
122 |   }
123 | 
124 | # Gelman-Rubin statistic for MCMC convergence
125 | 
126 | Gelman.Rubin <- function(psi) {
127 |   # psi[i,j] is the statistic psi(X[i,1:j])
128 |   # for chain in i-th row of X
129 |   psi <- as.matrix(psi)
130 |   n <- ncol(psi)
131 |   k <- nrow(psi)
132 |   
133 |   psi.means <- rowMeans(psi)     #row means
134 |   B <- n * var(psi.means)        #between variance est.
135 |   psi.w <- apply(psi, 1, "var")  #within variances
136 |   W <- mean(psi.w)               #within est.
137 |   v.hat <- W*(n-1)/n + (B/n)     #upper variance est.
138 |   r.hat <- v.hat / W             #G-R statistic
139 |   return(r.hat)
140 | }
141 | 
142 | # binning bivariate data
143 | 
144 | bin2d <-
145 |   function(x, breaks1 = "Sturges", breaks2 = "Sturges"){
146 |     # Data matrix x is n by 2
147 |     # breaks1, breaks2: any valid breaks for hist function
148 |     # using same defaults as hist
149 |     histg1 <- hist(x[,1], breaks = breaks1, plot = FALSE)
150 |     histg2 <- hist(x[,2], breaks = breaks2, plot = FALSE)
151 |     brx <- histg1$breaks
152 |     bry <- histg2$breaks
153 |     
154 |     # bin frequencies
155 |     freq <- table(cut(x[,1], brx),  cut(x[,2], bry))
156 |     
157 |     return(list(call = match.call(), freq = freq,
158 |                 breaks1 = brx, breaks2 = bry,
159 |                 mids1 = histg1$mids, mids2 = histg2$mids))
160 |   }
161 | 


--------------------------------------------------------------------------------
/examples-R/readme.md:
--------------------------------------------------------------------------------
1 | Code for the examples in "Statistical Computing with R, 2nd ed.".
2 | 


--------------------------------------------------------------------------------
/examples-Rmd/FOREARM.DAT:
--------------------------------------------------------------------------------
 1 | 17.3	18.4	20.9	16.8	18.7	20.5	17.9	20.4	18.3	20.5
 2 | 19.0	17.5	18.1	17.1	18.8	20.0	19.1	19.1	17.9	18.3
 3 | 18.2	18.9	19.4	18.9	19.4	20.8	17.3	18.5	18.3	19.4
 4 | 19.0	19.0	20.5	19.7	18.5	17.7	19.4	18.3	19.6	21.4
 5 | 19.0	20.5	20.4	19.7	18.6	19.9	18.3	19.8	19.6	19.0
 6 | 20.4	17.3	16.1	19.2	19.6	18.8	19.3	19.1	21.0	18.6
 7 | 18.3	18.3	18.7	20.6	18.5	16.4	17.2	17.5	18.0	19.5
 8 | 19.9	18.4	18.8	20.1	20.0	18.5	17.5	18.5	17.9	17.4
 9 | 18.7	18.6	17.3	18.8	17.8	19.0	19.6	19.3	18.1	18.5
10 | 20.9	19.8	18.1	17.1	19.8	20.6	17.6	19.1	19.5	18.4
11 | 17.7	20.2	19.9	18.6	16.6	19.2	20.0	17.4	17.1	18.3
12 | 19.1	18.5	19.6	18.0	19.4	17.1	19.9	16.3	18.9	20.7
13 | 19.7	18.5	18.4	18.7	19.3	16.3	16.9	18.2	18.5	19.3
14 | 18.1	18.0	19.5	20.3	20.1	17.2	19.5	18.8	19.2	17.7
15 | 


--------------------------------------------------------------------------------
/examples-Rmd/Lahman.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Benchmarks on Lahman Batting data"
 3 | author: "Maria Rizzo"
 4 | date: "September 1, 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r}
 9 | library(Lahman)
10 | library(dplyr)
11 | library(ggplot2)
12 | library(microbenchmark)
13 | ```
14 | 
15 | ## Define two functions to compare the two methods
16 | 
17 | ```{r}
18 | use_dplyr <- function(yr) {
19 |   Batting %>% filter(yearID == yr) -> b
20 |   b %>% group_by(playerID) %>% 
21 |     summarize(AB = sum(AB), H = sum(H)) -> S
22 |   S %>% mutate(AVG = round(H / AB, 3)) -> S
23 |   S %>% filter(AB >= 400) -> S400
24 |   S400 %>% arrange(desc(AVG)) -> S400
25 |   slice(S400, 1:10) -> top
26 |   top
27 | }
28 | 
29 | no_dplyr <- function(yr) {
30 |   S <- subset(Batting, Batting$yearID == yr, 
31 |              select = c("playerID", "AB", "H"))
32 |   AB <- as.vector(by(S$AB, S$playerID, FUN = sum))
33 |   H <- as.vector(by(S$H, S$playerID, FUN = sum))
34 |   S <- data.frame(playerID = unique(S$playerID), 
35 |                   AB = AB, H = H, AVG = round(H / AB, 3),
36 |                   stringsAsFactors = FALSE)
37 |   S400 <- S[S$AB >= 400, ]
38 |   o <- order(S400$AVG, decreasing = TRUE)
39 |   S400 <- S400[o, ]
40 |   top <- S400[1:10, ]
41 |   top
42 | }
43 | ```
44 | 
45 | Check that these functions obtain the same results.
46 | 
47 | ```{r}
48 | all.equal(use_dplyr(1999), no_dplyr(1999))
49 | ```
50 | 
51 | ## Benchmark comparison for dplyr method vs basic method
52 | 
53 | ```{r}
54 | mb <- microbenchmark(
55 |   dplyr = use_dplyr(1999),
56 |   base = no_dplyr(1999)
57 | )
58 | 
59 | mb
60 | autoplot(mb)
61 | ```
62 | 
63 | Another version of the summary table:
64 | 
65 | ```{r}
66 | knitr::kable(summary(mb))
67 | ```


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-Intro.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 1: Introduction
 14 | 
 15 | ## Example 1.1 
 16 | 
 17 | ```{r }
 18 |     sumdice <- function(n) {
 19 |         k <- sample(1:6, size=n, replace=TRUE)
 20 |         return(sum(k))
 21 |     }
 22 | 
 23 |     sumdice(2)
 24 | 
 25 |     #to store the result rather than print it
 26 |     a <- sumdice(100)
 27 | 
 28 |     #we expect the mean for 100 dice to be close to 3.5
 29 |     a / 100
 30 | 
 31 |     sumdice <- function(n)
 32 |         sum(sample(1:6, size=n, replace=TRUE))
 33 | 
 34 |     sumdice <- function(n, sides = 6) {
 35 |         if (sides < 1) return (0)
 36 |         k <- sample(1:sides, size=n, replace=TRUE)
 37 |         return(sum(k))
 38 |     }
 39 | 
 40 |     sumdice(5)      #default 6 sides
 41 |     sumdice(n=5, sides=4)  #4 sides
 42 | ```
 43 | 
 44 | ## Example 1.2 (iris data)
 45 | 
 46 | ```{r }
 47 |   names(iris)
 48 |   table(iris$Species)
 49 |   w <- iris[[2]]   #Sepal.Width
 50 |   mean(w)
 51 | 
 52 |   attach(iris)
 53 |   summary(Petal.Length[51:100]) #versicolor petal length
 54 | 
 55 |   with(iris, summary(Petal.Length[51:100]))
 56 | 
 57 |   out <- with(iris, summary(Petal.Length[51:100]))
 58 | 
 59 |   by(iris[,1:4], Species, colMeans)
 60 |   detach(iris)
 61 | ```
 62 | 
 63 | ## Example 1.3 (Arrays)
 64 | 
 65 | ```{r }
 66 |     x <- 1:24                      # vector
 67 |     dim(x) <- length(x)            # 1 dimensional array
 68 |     matrix(1:24, nrow=4, ncol=6)   # 4 by 6 matrix
 69 |     x <- array(1:24, c(3, 4, 2))   # 3 by 4 by 2 array
 70 | ```
 71 | 
 72 | ## Example 1.4 (Matrices)
 73 | 
 74 | ```{r }
 75 |     A <- matrix(0, nrow=2, ncol=2)
 76 |     A <- matrix(c(0, 0, 0, 0), nrow=2, ncol=2)
 77 |     A <- matrix(0, 2, 2)
 78 | 
 79 |     A <- matrix(1:8, nrow=2, ncol=4)
 80 | ```
 81 | 
 82 | ## Example 1.5 (Iris data cont.)
 83 | 
 84 | ```{r }
 85 |    x <- as.matrix(iris[,1:4]) #all rows of columns 1 to 4
 86 | 
 87 |    mean(x[,2])         #mean of sepal width, all species
 88 |    mean(x[51:100,3])   #mean of petal length, versicolor
 89 | 
 90 |    y <- array(x, dim=c(50, 3, 4))
 91 |    mean(y[,,2])  #mean of sepal width, all species
 92 |    mean(y[,2,3]) #mean of petal length, versicolor
 93 | 
 94 |    y <- array(c(x[1:50,], x[51:100,], x[101:150,]),
 95 |               dim=c(50, 4, 3))
 96 |    mean(y[,2,])  #mean of sepal width, all species
 97 |    mean(y[,3,2]) #mean of petal length, versicolor
 98 | ```
 99 | 
100 | ## Example 1.6 (Run length encoding)
101 | 
102 | ```{r }
103 |     n <- 1000
104 |     x <- rbinom(n, size = 1, prob = .5)
105 |     table(x)
106 |     head(x, 30)
107 | 
108 |     r <- rle(x)
109 |     str(r)
110 | 
111 |     head(r$lengths)
112 |     head(r[[1]])
113 | 
114 |     max(r$lengths)
115 |     log2(length(x))
116 | ```
117 | 
118 | ## Example 1.7 (Named list)
119 | 
120 | ```{r }
121 |     w <- wilcox.test(rnorm(10), rnorm(10, 2))
122 |     w    #print the summary
123 | 
124 |     w$statistic       #stored in object w
125 |     w$p.value
126 |     unlist(w)
127 |     unclass(w)
128 | ```
129 | 
130 | ## Example 1.8 (A list of names)
131 | 
132 | ```{r }
133 |     a <- matrix(runif(8), 4, 2)   #a 4x2 matrix
134 |     dimnames(a) <- list(NULL, c("x", "y"))
135 | 
136 |     # if we want row names
137 |     dimnames(a) <- list(letters[1:4], c("x", "y"))
138 |     a
139 | 
140 |     # another way to assign row names
141 |     row.names(a) <- list("NE", "NW", "SW", "SE")
142 |     a
143 | ```
144 | 
145 | ## Example 1.9 (Parallel boxplots)
146 | 
147 | ```{r }
148 |     boxplot(iris$Sepal.Length ~ iris$Species)
149 |  
150 |     boxplot(iris$Sepal.Length ~ iris$Species,
151 |             ylab = "Sepal Length", boxwex = .4)
152 | ```
153 | 
154 | ## Example 1.10 (Plotting characters and colors)
155 | 
156 | ```{r }
157 |     plot(0:25, rep(1, 26), pch = 0:25)
158 |     text(0:25, 0.9, 0:25)
159 | ```
160 | 
161 | ## Example 1.11 (Barplot for run lengths)
162 | 
163 | ```{r }
164 |   barplot(table(r$lengths))  #R graphics version
165 | 
166 |   ## ggplot version
167 |   library(ggplot2)
168 |   df <- data.frame(lengths = factor(r$lengths))
169 |   ggplot(df, aes(lengths)) + geom_bar()
170 | ```
171 | 
172 | ## Example 1.12 (Scatterplots)  
173 | 
174 | ```{r }
175 |   ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width)) +
176 |         geom_point()
177 | 
178 |   ggplot(iris, aes(Sepal.Length, Sepal.Width,
179 |          color = Species, shape = Species)) + geom_point(size = 2)
180 | ```
181 | 
182 | ## Example 1.13 (ggplot: parallel boxplots and violin plots)
183 | 
184 | ```{r }
185 |   ggplot(iris, aes(Species, Sepal.Length)) + geom_boxplot()
186 |   ggplot(iris, aes(Species, Sepal.Length)) + geom_violin()
187 | 
188 |   ggplot(iris, aes(Species, Sepal.Length)) +
189 |      geom_boxplot() + coord_flip()
190 |   ggplot(iris, aes(Species, Sepal.Length)) +
191 |      geom_violin() + coord_flip()
192 | ```
193 | 
194 | ## Example 1.14 (MPG by engine displacement)
195 | 
196 | ```{r }
197 |   ggplot(mpg, aes(displ, hwy)) +
198 |          geom_point() +
199 |          facet_wrap(~ class)
200 | ```
201 | 
202 | ## Example 1.15 (Import data from a local text file)
203 | 
204 | ```{r }
205 |   # copy "FOREARM.DAT" into working directory 
206 |   forearm <- scan(file = "FOREARM.DAT") #a vector
207 |   
208 |   # use pathname if file is not in working directory
209 |   # forearm <- scan(file = "./DATASETS/FOREARM.DAT") #a vector
210 |   
211 |   head(forearm)
212 | ```
213 | 
214 | ## Example 1.16 (Importing data from a web page)
215 | 
216 | ```{r }
217 | fileloc <- "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
218 | 
219 | df <- read.table(file = fileloc, na.strings = "?", as.is = TRUE)
220 | str(df)
221 | names(df) <- c("mpg", "cyl", "displ", "hp", "wt", "accel",
222 |     "year", "origin", "name")
223 | summary(df)
224 | ```
225 | 
226 | ## Example 1.17 (Importing/exporting .csv files)
227 | 
228 | ```{r }
229 |     #create a data frame
230 |     dates <- c("3/27/1995", "4/3/1995",
231 |                "4/10/1995", "4/18/1995")
232 |     prices <- c(11.1, 7.9, 1.9, 7.3)
233 |     d <- data.frame(dates=dates, prices=prices)
234 | 
235 |     #create the .csv file
236 |     filename <- "temp.csv"
237 |     write.table(d, file = filename, sep = ",",
238 |                 row.names = FALSE)
239 | 
240 |     #read the .csv file
241 |     read.table(file = filename, sep = ",", header = TRUE)
242 |     read.csv(file = filename) #same thing
243 | ```
244 | 
245 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-Programming.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 | ---
 11 | 
 12 | # Chapter 15: Programming Topics
 13 | 
 14 | packages to install:
 15 | energy, ggplot2, profvis, pryr, microbenchmark, mvtnorm, rbenchmark
 16 | 
 17 | for the last section:  dplyr, Lahman, Rcpp 
 18 | 
 19 | ## Example 15.1 (Benchmarking methods to generate a sequence)
 20 | 
 21 | ```{r }
 22 |     s1 <- 1:10
 23 |     s2 <- seq(1, 10, 1)
 24 |     s3 <- seq.int(1, 10, 1)
 25 |     df <- data.frame(s1=s1, s2=s2, s3=s3)
 26 |     str(df)
 27 | 
 28 |     library(microbenchmark)
 29 |     library(ggplot2)
 30 |     
 31 |     n <- 1000
 32 |     mb <- microbenchmark(
 33 |       seq(1, n, 1),
 34 |       seq.int(1, n, 1),
 35 |       1:n
 36 |     )
 37 |     
 38 |     print(mb)
 39 |     autoplot(mb)  # display a violin plot
 40 | ```
 41 | 
 42 | ## Example 15.2 (Benchmarking methods to initialize a vector)
 43 | 
 44 | ```{r }
 45 |     n <- 100
 46 |     mb2 <- microbenchmark(
 47 |       numeric = numeric(n) + 1,
 48 |       rep = rep(1, n),
 49 |       seq = seq(from=1, to=1, length=n),
 50 |       ones = matrix(1, nrow=n, ncol=1),
 51 |       as.ones = as.matrix(rep(1, n))
 52 |     )
 53 |     
 54 |     print(mb2)
 55 | ```
 56 | 
 57 | ## Example 15.3 (Timings of two multivariate normal generators)
 58 | 
 59 | ```{r }
 60 |   library(rbenchmark)
 61 |   library(MASS)
 62 |   library(mvtnorm)
 63 |   n <- 100          #sample size
 64 |   d <- 30           #dimension
 65 |   N <- 2000         #iterations
 66 |   mu <- numeric(d)
 67 | 
 68 |   benchmark(
 69 |       columns = c("test", "replications", "elapsed", "relative"),
 70 |       replications = 2000,
 71 |       cov = {S <- cov(matrix(rnorm(n*d), n, d))},
 72 |       mvrnorm = mvrnorm(n, mu, S),
 73 |       rmvnorm = rmvnorm(n, mu, S)
 74 |   )
 75 | ```
 76 | 
 77 | ## Example 15.4 (Profiling with Rprof)
 78 | 
 79 | ```{r }
 80 |   x <- rnorm(1000)
 81 |   y <- rnorm(1000)
 82 | 
 83 |   Rprof("pr.out", line.profiling = TRUE)
 84 |   energy::dcor(x, y)
 85 |   Rprof(NULL)
 86 |   summaryRprof("pr.out")
 87 | ```
 88 | 
 89 | ## Example 15.5 (profvis interactive visualization)
 90 | 
 91 | ```{r }
 92 |   library(profvis)
 93 |   profvis(energy::dcor(x, y))
 94 | ```
 95 | 
 96 | ## Example 15.6 (Object size)
 97 | 
 98 | ```{r }
 99 |   x <- matrix(rnorm(5000), 1000, 5) #1000 obs in R^5
100 |   DF <- as.data.frame(x)
101 |   object.size(x)
102 |   object.size(DF)
103 |   pryr::object_size(x)
104 |   pryr::object_size(DF)
105 |   pryr::compare_size(x)
106 | 
107 |   listTwo <- list(x, x)
108 |   pryr::compare_size(listTwo)
109 | ```
110 | 
111 | ## Example 15.7 (Comparing objects and attributes)
112 | 
113 | ```{r }
114 |   str(x)
115 |   str(DF)
116 |   all.equal(x, DF)
117 |   names(attributes(x))
118 |   names(attributes(DF))
119 |   all.equal(x, DF, check.attributes = FALSE)
120 | ```
121 | 
122 | ## Example 15.8 (Comparing objects for equality)
123 | 
124 | ```{r }
125 |   try(ifelse(all.equal(x, DF), "T", "F"))  # error
126 |   ifelse(isTRUE(all.equal(x, DF)), "T", "F") # correct
127 | 
128 |   x <- 1 - 10e-4
129 |   y <- x + 2
130 |   x == (y - 2)   # equal mathematically but
131 |   isTRUE(all.equal(x, y - 2))  #gives expected result
132 | 
133 |   ## does not necessarily evaluate to TRUE or FALSE
134 |   try(ifelse(all.equal(x, y), "T", "F"))
135 |   ## returns TRUE or FALSE
136 |   ifelse(isTRUE(all.equal(x, y)), "T", "F")
137 | ```
138 | 
139 | ## Example 15.9 (Display R function code)
140 | 
141 | ```{r }
142 |   nclass.scott
143 | ```
144 | 
145 | ## Example 15.10 (RSiteSearch)
146 | 
147 | ```{r }
148 |   if (interactive())
149 |     RSiteSearch("ggcorr")
150 | ```
151 | 
152 | ## Example 15.11 (UseMethod)
153 | 
154 | ```{r }
155 |   body(density)
156 |   args(density.default)
157 |   body(density.default)
158 | ```
159 | 
160 | ## Example 15.12 (Show methods)
161 | 
162 | ```{r }
163 |   methods(t.test)
164 |   getAnywhere(t.test.formula)
165 |   body(stats:::t.test.formula)
166 | ```
167 | 
168 | ## Example 15.13 (Object not found or not an exported object)
169 | 
170 | ```{r }
171 |   try(perc.ci)
172 |   try(boot::perc.ci)
173 |   getAnywhere(perc.ci)
174 |   args(boot:::perc.ci)
175 |   body(boot:::perc.ci)
176 |   boot:::perc.ci
177 |   getFromNamespace("perc.ci", "boot")
178 | ```
179 | 
180 | ## Example 15.14 (getS3method)
181 | 
182 | ```{r }
183 |   library(microbenchmark)
184 |   library(ggplot2)
185 |   getAnywhere(autoplot)
186 |   getS3method("autoplot", class = "microbenchmark")
187 |   getAnywhere(autoplot.microbenchmark)
188 | ```
189 | 
190 | ## Example 15.15 (.Primitive or .Internal)
191 | 
192 | ```{r }
193 | if (interactive())
194 |   pryr::show_c_source(.Primitive(cumsum(x)))
195 | ```
196 | 
197 | ## Example 15.16 (.Call, .External, .C or .Fortran)
198 | 
199 | ```{r }
200 |   #### dist  is implemented by a .Call to C_Cdist
201 | 
202 |   body(dist)
203 | ```
204 | 
205 | ## Example 15.17 (A first Rcpp experiment)
206 | ### In RStudio use the menu
207 | ### File > New File > C++ file to display this code
208 | ## Example 15.18 (cppFunction)
209 | 
210 | ```{r }
211 |     library(Rcpp)
212 |     
213 |     set.seed(1)
214 |     x <- matrix(rnorm(20), nrow = 5, ncol = 4)
215 |     
216 |     cppFunction('double vecnorm(NumericVector v) {
217 |       // compute the Euclidean norm of vector v
218 |       int d = v.size();
219 |       double s = 0.0;
220 |       for (int i = 0; i < d; i++)
221 |         s += v(i) * v(i);
222 |       return sqrt(s);
223 |     }')
224 |     
225 |     print(vecnorm(x[, 1]))
226 |     print(apply(x, MARGIN = 2, FUN = "vecnorm"))
227 | ```
228 | 
229 | ## Example 15.19 (sourceCpp)
230 | ### Create the C++ source file "printme.cpp" by editing the template
231 | ### in RStudio menu: File > New File > C++
232 | 
233 | ```{r }
234 |   library(Rcpp)
235 |   sourceCpp("..//examples-cpp//printme.cpp")
236 |   x <- sample(1:5)
237 |   print_me(x)
238 | ```
239 | 
240 | ## Example 15.20 (Lahman baseball data)
241 | 
242 | ```{r }
243 |   library(Lahman)
244 |   str(Batting)
245 |   # method 1
246 |   S <- subset(Batting, Batting$yearID == 1999,
247 |              select = c("playerID", "AB", "H"))
248 | 
249 |   # method 2 (dplyr)
250 |   library(dplyr)
251 |   Batting %>% filter(yearID == 1999) -> b
252 | 
253 |   # method 1
254 |   AB <- as.vector(by(S$AB, S$playerID, FUN = sum))
255 |   H <- as.vector(by(S$H, S$playerID, FUN = sum))
256 |   S <- data.frame(playerID = unique(S$playerID), 
257 |                   AB = AB, H = H, AVG = round(H / AB, 3),
258 |                   stringsAsFactors = FALSE)
259 |   S400 <- S[S$AB >= 400, ]
260 | 
261 |   # method 2 (dplyr)
262 |   b %>% group_by(playerID) %>%
263 |     summarize(AB = sum(AB), H = sum(H)) -> S
264 |   S %>% mutate(AVG = round(H / AB, 3)) -> S
265 |   S %>% filter(AB >= 400) -> S400
266 | 
267 |   # method 1
268 |   o <- order(S400$AVG, decreasing = TRUE)
269 |   S400 <- S400[o, ]
270 |   top <- S400[1:10, ]
271 | 
272 |   # method 2 (dplyr)
273 |   S400 %>% arrange(desc(AVG)) -> S400
274 |   slice(S400, 1:10) -> top
275 |   top
276 | 
277 |   People %>% select(playerID, nameFirst, nameLast) -> m
278 |   top %>% inner_join(m) %>%
279 |     select(nameFirst, nameLast, AVG)
280 | ```
281 | 
282 | ## Example 15.21 (Comparison with microbenchmark)
283 | ### The source for this example is in R Markdown format
284 | ### Open "Lahman.Rmd" in RStudio and knit to see report
285 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-ResampAppl.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 9: Resampling Applications
 14 | 
 15 | ## Example 9.1 (Jackknife-after-bootstrap)
 16 | 
 17 | ```{r }
 18 | library(boot)
 19 | library(bootstrap)
 20 | set.seed(1111)
 21 | 
 22 | theta.boot <- function(patch, i) {
 23 |   # function to compute the patch ratio statistic
 24 |   y <- patch[i, "y"]
 25 |   z <- patch[i, "z"]
 26 |   mean(y) / mean(z)
 27 | }
 28 | 
 29 | boot.out <- boot(bootstrap::patch,
 30 |                  statistic = theta.boot, R=2000)
 31 | A <- boot.array(boot.out)
 32 | head(A, 3)
 33 | mean(A[, 1] == 0)
 34 | ```
 35 | 
 36 | jackknife-after-bootstrap to est. se(se)
 37 | 
 38 | ```{r }
 39 | A <- boot.array(boot.out)
 40 | theta.b <- boot.out$t
 41 | n <- NROW(patch)
 42 | jack.se <- numeric(n)
 43 | 
 44 | for (i in 1:n) {
 45 |   #in i-th replicate omit all samples with x[i]
 46 |   keep <- which(A[, i] == 0)
 47 |   jack.se[i] <- sd(theta.b[keep])
 48 | }
 49 | 
 50 | print(boot.out)  #for se_boot
 51 | se.bar <- mean(jack.se)
 52 | se.se <- sqrt((n-1) * mean((jack.se - se.bar)^2))
 53 | print(paste("Jackknife-after-bootstrap est. se(se)=", se.se))
 54 | ```
 55 | 
 56 | ## Example 9.2 (Jackknife-after-bootstrap)
 57 | initialize
 58 | 
 59 | ```{r }
 60 | data(patch, package = "bootstrap")
 61 | y <- patch$y
 62 | z <- patch$z
 63 | dat <- cbind(y, z)
 64 | n <- NROW(dat)
 65 | B <- 2000
 66 | ```
 67 | 
 68 | jackknife-after-bootstrap step 1: run the bootstrap
 69 | 
 70 | ```{r }
 71 | theta_boot <- function(dat, ind) {
 72 |   # function to compute the statistic
 73 |   y <- dat[ind, 1]
 74 |   z <- dat[ind, 2]
 75 |   mean(y) / mean(z)
 76 | }
 77 | 
 78 | boot.obj <- boot(dat, statistic = theta_boot, R=2000)
 79 | theta.hat <- boot.obj$t0
 80 | theta.b <- boot.obj$t
 81 | se.boot <- sd(theta.b)
 82 | ```
 83 | 
 84 | jackknife-after-bootstrap to est. se(se)
 85 | 
 86 | ```{r }
 87 | sample.freq <- boot.array(boot.obj)
 88 | se.se.reps <- numeric(n)
 89 | N <- 1:n
 90 | 
 91 | for (i in N) {
 92 |   # jackknife-after-bootstrap
 93 |   # omit all bootstrap samples that contain obs i
 94 |   keep <- which(sample.freq[ ,i] == 0)
 95 |   se.se.reps[i] <- sd(theta.b[keep])
 96 | }
 97 | 
 98 | print(boot.obj)
 99 | se.bar <- mean(se.se.reps)
100 | se.se <- sqrt((n-1) * mean((se.se.reps - se.bar)^2))
101 | se.se
102 | ```
103 | 
104 | ## Example 9.3 (ironslag linear model)
105 | 
106 | ```{r }
107 | library(ggplot2)
108 | library(DAAG)
109 | L1 <- lm(magnetic ~ chemical, data=ironslag)
110 | cf3 <- round(L1$coeff, 3)
111 | cap <- paste("Fit: magnetic =", cf3[1], "+", cf3[2], "chemical")
112 | 
113 | ggplot(data=ironslag, aes(chemical, magnetic)) +
114 | geom_point() + geom_smooth(method="lm") +
115 | ggtitle(cap)
116 | 
117 | plot(L1, which=1:2, ask=FALSE)  #residual plots
118 | ```
119 | 
120 | ## Example 9.4 (mammals data)
121 | 
122 | ```{r,  mammals1}
123 | library(MASS)
124 | cor(log(mammals$body), log(mammals$brain))
125 | summary(mammals)
126 | 
127 | y <- log(mammals$brain)
128 | x <- log(mammals$body)
129 | L <- lm(y ~ x)
130 | L
131 | 
132 | cap <- paste("Fit: log(brain) =", round(L$coeff[1],3),
133 |              "+", round(L$coeff[2],3), "log(body)")
134 | ggplot(data=mammals, aes(x, y)) +
135 | geom_point() + geom_smooth(method="lm") +
136 | labs(x = "log(body)", y = "log(brain)", title = cap)
137 | 
138 | summary(L)$r.squared
139 | ```
140 | 
141 | ## Example 9.5 (ironslag data, resampling cases)
142 | 
143 | ```{r }
144 | x <- ironslag$chemical
145 | y <- ironslag$magnetic
146 | m <- 2000
147 | n <- NROW(x)
148 | L1 <- lm(y ~ x)  #estimate the model
149 | b0 <- L1$coeff[1]; b1 <- L1$coeff[2]
150 | ```
151 | 
152 | #### run bootstrap of cases
153 | 
154 | ```{r }
155 | out <- replicate(m, expr={
156 | i <- sample(1:n, replace=TRUE, size=n)
157 | xstar <- x[i]
158 | ystar <- y[i]
159 | Lb <- lm(ystar ~ xstar)
160 | s <- summary(Lb)$sigma
161 | c(Lb$coeff[1], slope=Lb$coeff[2], s=s)
162 | })
163 | 
164 | bootCases <- t(out)
165 | meanCases <- colMeans(bootCases)
166 | sdCases <- apply(bootCases, 2, "sd")
167 | meanCases
168 | sdCases
169 | 
170 | biasInt <- mean(bootCases[,1] - b0)  #bias for intercept
171 | biasSlope <- mean(bootCases[,2] - b1)  #bias for slope
172 | 
173 | rbind(estimate=c(b0, b1), bias=c(biasInt, biasSlope),
174 |       se=sdCases[1:2], cv=c(biasInt, cv=biasSlope)/sdCases[1:2])
175 | ```
176 | 
177 | ## Example 9.6 (Resampling cases using the boot function)
178 | 
179 | 
180 | ```{r }
181 | set.seed(1104)
182 | library(boot)
183 | m <- 2000
184 | stats <- function(dat, i) {
185 | x <- dat$chemical[i]
186 | y <- dat$magnetic[i]
187 | Lb <- lm(y ~ x)
188 | s <- summary(Lb)$sigma
189 | c(Lb$coeff[1], slope=Lb$coeff[2], s=s)
190 | }
191 | 
192 | boot.out <- boot(ironslag, statistic=stats, R=2000)
193 | boot.out
194 | boot.out$t0
195 | 
196 | sd(boot.out$t[,2])
197 | boottbl <- broom::tidy(boot.out)
198 | boottbl$std.error[2]
199 | 
200 | MASS::truehist(boot.out$t[ ,2], main="", xlab="slopes")
201 | abline(v = boot.out$t0[2], lwd=2)
202 | 
203 | boot.ci(boot.out, index=2, type=c("norm","perc","basic","bca"))
204 | ```
205 | 
206 | ## Example 9.7 (Resampling errors: mammals data)
207 | 
208 | Resampling errors:
209 | 
210 | ```{r}
211 | x <- log(mammals$brain)
212 | y <- log(mammals$body)
213 | L <- lm(y ~ x)
214 | ```
215 | 
216 | ```{r}
217 | m.resid <- rstandard(L, sd = 1)
218 | r <- m.resid - mean(m.resid)
219 | m <- 1000; n <- NROW(x)
220 | estsErr <- replicate(m, expr={
221 |   estar <- sample(r, replace=TRUE, size=n)
222 |   ystar <- L$fitted.values + estar
223 |   Lb <- lm(ystar ~ x)
224 |   s <- summary(Lb)$sigma
225 |   c(b0=Lb$coeff[1], b1=Lb$coeff[2], s=s)
226 | })
227 | ests <- t(estsErr)
228 | summary(ests)
229 | ```
230 | 
231 | ## Example 9.8 (Resampling errors, continued)
232 | 
233 | ```{r }
234 | sd(ests[,2])
235 | s <- summary(L)$sigma
236 | SSx <- (n - 1) * var(x)
237 | se.beta1 <- sqrt(s^2 / SSx)
238 | se.beta1
239 | s * sqrt(1/n + mean(x)^2 / SSx)
240 | sd(ests[,1])
241 | 
242 | betas <- summary(L)$coeff
243 | betas
244 | betas[, "Std. Error"]
245 | 
246 | broom::tidy(summary(L))
247 | broom::tidy(summary(L))$std.error
248 | ```
249 | 
250 | ## Example 9.9 (Model based resampling with the boot function)
251 | 
252 | ```{r }
253 | regstats <- function(dat, i) {
254 |   #dat is a data frame (r, x, yhat)
255 |   #r are the modified centered residuals, yhat are the fits
256 |   ystar <- dat$yhat + dat$r[i]
257 |   xstar <- dat$x
258 |   Lnew <- lm(ystar ~ xstar)
259 |   Lnew$coefficients
260 | }
261 | 
262 | y <- log(mammals$brain)
263 | x <- log(mammals$body)
264 | L <- lm(y ~ x)
265 | r <- rstandard(L, sd=1)
266 | r <- r - mean(r)
267 | df <- data.frame(r=r, x=x, yhat=L$fitted)
268 | head(df)
269 | boot.obj <- boot(data=df, statistic=regstats, R=2000)
270 | broom::tidy(boot.obj)
271 | ```
272 | 
273 | ## Example 9.10 (Empirical influence values for the patch ratio statistic)
274 | 
275 | ```{r }
276 | library(boot)
277 | library(bootstrap)
278 | theta_boot <- function(dat, ind) {
279 |   # function to compute the patch ratio statistic
280 |   mean(dat[ind, ]$y) / mean(dat[ind, ]$z)
281 | }
282 | boot.out <- boot(patch, theta_boot, R = 2000)
283 | infl <- empinf(boot.out, type = "jack")
284 | theta.hat <- boot.out$t0
285 | jack <- theta.hat - infl / (nrow(patch) - 1)
286 | rbind(infl, jack)
287 | ```
288 | 
289 | ## Example 9.11 (Jackknife-after-bootstrap plot)
290 | 
291 | ```{r }
292 | jack.after.boot(boot.out, useJ=TRUE, stinf=FALSE)
293 | 
294 | n <- NROW(patch) 
295 | J <- numeric(n)
296 | b.freq <- boot.array(boot.out)
297 | theta.b <- boot.out$t
298 | 
299 | for (i in 1:n) {
300 |   keep <- which(b.freq[ ,i] == 0)
301 |   J[i] <- mean(theta.b[keep])
302 | }
303 | ```
304 | 
305 | the jackknife influence values
306 | 
307 | ```{r }
308 | (n - 1) * (mean(J) - J)
309 | 
310 | jack.after.boot(boot.out, useJ=TRUE, stinf=TRUE)
311 | ```
312 | 
313 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chBoot.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 8: Bootstrap and Jackknife
 14 | 
 15 | ## Example 8.2 (Bootstrap estimate of standard error)
 16 | 
 17 | ```{r }
 18 |     library(bootstrap)    #for the law data
 19 |     print(cor(law$LSAT, law$GPA))
 20 |     print(cor(law82$LSAT, law82$GPA))
 21 | 
 22 |     #set up the bootstrap
 23 |     B <- 200            #number of replicates
 24 |     n <- nrow(law)      #sample size
 25 |     R <- numeric(B)     #storage for replicates
 26 | 
 27 |     #bootstrap estimate of standard error of R
 28 |     for (b in 1:B) {
 29 |         #randomly select the indices
 30 |         i <- sample(1:n, size = n, replace = TRUE)
 31 |         LSAT <- law$LSAT[i]       #i is a vector of indices
 32 |         GPA <- law$GPA[i]
 33 |         R[b] <- cor(LSAT, GPA)
 34 |     }
 35 |     #output
 36 |     print(se.R <- sd(R))
 37 |     hist(R, prob = TRUE)
 38 | ```
 39 | 
 40 | ## Example 8.3 (Bootstrap estimate of standard error: boot function)
 41 | 
 42 | ```{r }
 43 |     r <- function(x, i) {
 44 |         #want correlation of columns 1 and 2
 45 |         cor(x[i,1], x[i,2])
 46 |     }
 47 | 
 48 |     library(boot)       #for boot function
 49 |     obj <- boot(data = law, statistic = r, R = 2000)
 50 |     obj
 51 |     y <- obj$t
 52 |     sd(y)
 53 | ```
 54 | 
 55 | ## Example 8.4 (Bootstrap estimate of bias)
 56 | 
 57 | ```{r }
 58 |     #sample estimate for n=15
 59 |     theta.hat <- cor(law$LSAT, law$GPA)
 60 | 
 61 |     #bootstrap estimate of bias
 62 |     B <- 2000   #larger for estimating bias
 63 |     n <- nrow(law)
 64 |     theta.b <- numeric(B)
 65 | 
 66 |     for (b in 1:B) {
 67 |         i <- sample(1:n, size = n, replace = TRUE)
 68 |         LSAT <- law$LSAT[i]
 69 |         GPA <- law$GPA[i]
 70 |         theta.b[b] <- cor(LSAT, GPA)
 71 |     }
 72 |     bias <- mean(theta.b - theta.hat)
 73 |     bias
 74 | ```
 75 | 
 76 | ## Example 8.5 (Bootstrap estimate of bias of a ratio estimate)
 77 | 
 78 | ```{r }
 79 |     data(patch, package = "bootstrap")
 80 |     patch
 81 | 
 82 |     n <- nrow(patch)  #in bootstrap package
 83 |     B <- 2000
 84 |     theta.b <- numeric(B)
 85 |     theta.hat <- mean(patch$y) / mean(patch$z)
 86 | 
 87 |     #bootstrap
 88 |     for (b in 1:B) {
 89 |         i <- sample(1:n, size = n, replace = TRUE)
 90 |         y <- patch$y[i]
 91 |         z <- patch$z[i]
 92 |         theta.b[b] <- mean(y) / mean(z)
 93 |         }
 94 |     bias <- mean(theta.b) - theta.hat
 95 |     se <- sd(theta.b)
 96 |     print(list(est=theta.hat, bias = bias,
 97 |                se = se, cv = bias/se))
 98 | ```
 99 | 
100 | ## Example 8.6 (Jackknife estimate of bias)
101 | 
102 | ```{r }
103 |     data(patch, package = "bootstrap")
104 |     n <- nrow(patch)
105 |     y <- patch$y
106 |     z <- patch$z
107 |     theta.hat <- mean(y) / mean(z)
108 |     print (theta.hat)
109 | 
110 |     #compute the jackknife replicates, leave-one-out estimates
111 |     theta.jack <- numeric(n)
112 |     for (i in 1:n)
113 |         theta.jack[i] <- mean(y[-i]) / mean(z[-i])
114 |     bias <- (n - 1) * (mean(theta.jack) - theta.hat)
115 | 
116 |     print(bias)  #jackknife estimate of bias
117 | ```
118 | 
119 | ## Example 8.7 (Jackknife estimate of standard error)
120 | 
121 | ```{r }
122 |     se <- sqrt((n-1) *
123 |         mean((theta.jack - mean(theta.jack))^2))
124 |     print(se)
125 | ```
126 | 
127 | ## Example 8.8 (Failure of jackknife)
128 | 
129 | ```{r }
130 |     set.seed(123) #for the specific example given
131 |     #change the seed to see other examples
132 | 
133 |     n <- 10
134 |     x <- sample(1:100, size = n)
135 | 
136 |     #jackknife estimate of se
137 |     M <- numeric(n)
138 |     for (i in 1:n) {        #leave one out
139 |         y <- x[-i]
140 |         M[i] <- median(y)
141 |     }
142 |     Mbar <- mean(M)
143 |     print(sqrt((n-1)/n * sum((M - Mbar)^2)))
144 | 
145 |     #bootstrap estimate of se
146 |     Mb <- replicate(1000, expr = {
147 |             y <- sample(x, size = n, replace = TRUE)
148 |             median(y) })
149 |     print(sd(Mb)) 
150 |     print(x)
151 |     print(M)
152 |     print(Mb)
153 | ```
154 | 
155 | ## Example 8.9 (Bootstrap confidence intervals for patch ratio statistic)
156 | 
157 | ```{r }
158 |     library(boot)       #for boot and boot.ci
159 |     data(patch, package = "bootstrap")
160 | 
161 |     theta.boot <- function(dat, ind) {
162 |         #function to compute the statistic
163 |         y <- dat[ind, 1]
164 |         z <- dat[ind, 2]
165 |         mean(y) / mean(z)
166 |     }
167 | 
168 |     y <- patch$y
169 |     z <- patch$z
170 |     dat <- cbind(y, z)
171 |     boot.obj <- boot(dat, statistic = theta.boot, R = 2000)
172 | 
173 |     print(boot.obj)
174 |     print(boot.ci(boot.obj,
175 |                   type = c("basic", "norm", "perc")))
176 | 
177 | 
178 |     #calculations for bootstrap confidence intervals
179 |     alpha <- c(.025, .975)
180 | 
181 |     #normal
182 |     print(boot.obj$t0 + qnorm(alpha) * sd(boot.obj$t))
183 | 
184 |     #basic
185 |     print(2*boot.obj$t0 - 
186 |         quantile(boot.obj$t, rev(alpha), type=1))
187 | 
188 |     #percentile
189 |     print(quantile(boot.obj$t, alpha, type=6))
190 | ```
191 | 
192 | ## Example 8.10 (Bootstrap confidence intervals for the correlation statistic)
193 | 
194 | ```{r }
195 |     library(boot)
196 |     data(law, package = "bootstrap")
197 |     boot.obj <- boot(law, R = 2000,
198 |              statistic = function(x, i){cor(x[i,1], x[i,2])})
199 |     print(boot.ci(boot.obj, type=c("basic","norm","perc")))
200 | ```
201 | 
202 | ## Example 8.11 (Bootstrap t confidence interval)
203 | 
204 | ```{r }
205 |     boot.t.ci <-
206 |     function(x, B = 500, R = 100, level = .95, statistic){
207 |         #compute the bootstrap t CI
208 |         x <- as.matrix(x);  n <- nrow(x)
209 |         stat <- numeric(B); se <- numeric(B)
210 | 
211 |         boot.se <- function(x, R, f) {
212 |             #local function to compute the bootstrap
213 |             #estimate of standard error for statistic f(x)
214 |             x <- as.matrix(x); m <- nrow(x)
215 |             th <- replicate(R, expr = {
216 |                 i <- sample(1:m, size = m, replace = TRUE)
217 |                 f(x[i, ])
218 |                 })
219 |             return(sd(th))
220 |         }
221 | 
222 |         for (b in 1:B) {
223 |             j <- sample(1:n, size = n, replace = TRUE)
224 |             y <- x[j, ]
225 |             stat[b] <- statistic(y)
226 |             se[b] <- boot.se(y, R = R, f = statistic)
227 |         }
228 |         stat0 <- statistic(x)
229 |         t.stats <- (stat - stat0) / se
230 |         se0 <- sd(stat)
231 |         alpha <- 1 - level
232 |         Qt <- quantile(t.stats, c(alpha/2, 1-alpha/2), type = 1)
233 |         names(Qt) <- rev(names(Qt))
234 |         CI <- rev(stat0 - Qt * se0)
235 |     }
236 | ```
237 | 
238 | ## Example 8.12 (Bootstrap t confidence interval for patch ratio statistic)
239 | 
240 | ```{r }
241 |     #boot package and patch data were loaded in Example 8.10
242 |     #library(boot)       #for boot and boot.ci
243 |     #data(patch, package = "bootstrap")
244 |     
245 |     dat <- cbind(patch$y, patch$z)
246 |     stat <- function(dat) {
247 |         mean(dat[, 1]) / mean(dat[, 2]) }
248 |     ci <- boot.t.ci(dat, statistic = stat, B=2000, R=200)
249 |     print(ci)
250 | ```
251 | 
252 | ## Example 8.13 (BCa bootstrap confidence interval)
253 | 
254 | ```{r }
255 |     boot.BCa <-
256 |     function(x, th0, th, stat, conf = .95) {
257 |         # bootstrap with BCa bootstrap confidence interval
258 |         # th0 is the observed statistic
259 |         # th is the vector of bootstrap replicates
260 |         # stat is the function to compute the statistic
261 | 
262 |         x <- as.matrix(x)
263 |         n <- nrow(x) #observations in rows
264 |         N <- 1:n
265 |         alpha <- (1 + c(-conf, conf))/2
266 |         zalpha <- qnorm(alpha)
267 | 
268 |         # the bias correction factor
269 |         z0 <- qnorm(sum(th < th0) / length(th))
270 | 
271 |         # the acceleration factor (jackknife est.)
272 |         th.jack <- numeric(n)
273 |         for (i in 1:n) {
274 |             J <- N[1:(n-1)]
275 |             th.jack[i] <- stat(x[-i, ], J)
276 |         }
277 |         L <- mean(th.jack) - th.jack
278 |         a <- sum(L^3)/(6 * sum(L^2)^1.5)
279 | 
280 |         # BCa conf. limits
281 |         adj.alpha <- pnorm(z0 + (z0+zalpha)/(1-a*(z0+zalpha)))
282 |         limits <- quantile(th, adj.alpha, type=6)
283 |         return(list("est"=th0, "BCa"=limits))
284 |     }
285 | ```
286 | 
287 | ## Example 8.14 (BCa bootstrap confidence interval)
288 | 
289 | ```{r }
290 |     #boot package and patch data were loaded in Example 8.10
291 |     #library(boot)       #for boot and boot.ci
292 |     #data(patch, package = "bootstrap")
293 | 
294 |     n <- nrow(patch)
295 |     B <- 2000
296 |     y <- patch$y
297 |     z <- patch$z
298 |     x <- cbind(y, z)
299 |     theta.b <- numeric(B)
300 |     theta.hat <- mean(y) / mean(z)
301 | 
302 |     #bootstrap
303 |     for (b in 1:B) {
304 |         i <- sample(1:n, size = n, replace = TRUE)
305 |         y <- patch$y[i]
306 |         z <- patch$z[i]
307 |         theta.b[b] <- mean(y) / mean(z)
308 |         }
309 |     #compute the BCa interval
310 |     stat <- function(dat, index) {
311 |         mean(dat[index, 1]) / mean(dat[index, 2])  }
312 | 
313 |     boot.BCa(x, th0 = theta.hat, th = theta.b, stat = stat)
314 | ```
315 | 
316 | ## Example 8.15 (BCa bootstrap confidence interval using boot.ci)
317 | 
318 | ```{r }
319 |     #using x from Example 8.15
320 |     boot.obj <- boot(x, statistic = stat, R=2000)
321 |     boot.ci(boot.obj, type=c("perc", "bca"))
322 | ```
323 | 
324 | ## Example 8.16 (Model selection)
325 | 
326 | ```{r }
327 |     #to prompt for next graph, uncomment line below
328 |     #par(ask = TRUE)   
329 |     
330 |     library(DAAG); attach(ironslag)
331 |     a <- seq(10, 40, .1)     #sequence for plotting fits
332 | 
333 |     L1 <- lm(magnetic ~ chemical)
334 |     plot(chemical, magnetic, main="Linear", pch=16)
335 |     yhat1 <- L1$coef[1] + L1$coef[2] * a
336 |     lines(a, yhat1, lwd=2)
337 | 
338 |     L2 <- lm(magnetic ~ chemical + I(chemical^2))
339 |     plot(chemical, magnetic, main="Quadratic", pch=16)
340 |     yhat2 <- L2$coef[1] + L2$coef[2] * a + L2$coef[3] * a^2
341 |     lines(a, yhat2, lwd=2)
342 | 
343 |     L3 <- lm(log(magnetic) ~ chemical)
344 |     plot(chemical, magnetic, main="Exponential", pch=16)
345 |     logyhat3 <- L3$coef[1] + L3$coef[2] * a
346 |     yhat3 <- exp(logyhat3)
347 |     lines(a, yhat3, lwd=2)
348 | 
349 |     L4 <- lm(log(magnetic) ~ log(chemical))
350 |     plot(log(chemical), log(magnetic), main="Log-Log", pch=16)
351 |     logyhat4 <- L4$coef[1] + L4$coef[2] * log(a)
352 |     lines(log(a), logyhat4, lwd=2)
353 | ```
354 | 
355 | ## Example 8.17 (Model selection: Cross validation)
356 | 
357 | ```{r }
358 |     # Example 8.16, cont.
359 |     n <- length(magnetic)   #in DAAG ironslag
360 |     e1 <- e2 <- e3 <- e4 <- numeric(n)
361 | 
362 |     # for n-fold cross validation
363 |     # fit models on leave-one-out samples
364 |     for (k in 1:n) {
365 |         y <- magnetic[-k]
366 |         x <- chemical[-k]
367 | 
368 |         J1 <- lm(y ~ x)
369 |         yhat1 <- J1$coef[1] + J1$coef[2] * chemical[k]
370 |         e1[k] <- magnetic[k] - yhat1
371 | 
372 |         J2 <- lm(y ~ x + I(x^2))
373 |         yhat2 <- J2$coef[1] + J2$coef[2] * chemical[k] +
374 |                 J2$coef[3] * chemical[k]^2
375 |         e2[k] <- magnetic[k] - yhat2
376 | 
377 |         J3 <- lm(log(y) ~ x)
378 |         logyhat3 <- J3$coef[1] + J3$coef[2] * chemical[k]
379 |         yhat3 <- exp(logyhat3)
380 |         e3[k] <- magnetic[k] - yhat3
381 | 
382 |         J4 <- lm(log(y) ~ log(x))
383 |         logyhat4 <- J4$coef[1] + J4$coef[2] * log(chemical[k])
384 |         yhat4 <- exp(logyhat4)
385 |         e4[k] <- magnetic[k] - yhat4
386 |     }
387 | 
388 | 
389 |     c(mean(e1^2), mean(e2^2), mean(e3^2), mean(e4^2))
390 | 
391 |     #selected model, fitted in Example 8.16
392 |     L2
393 | 
394 |     par(mfrow = c(2, 2))    #layout for graphs
395 |     plot(L2$fit, L2$res)    #residuals vs fitted values
396 |     abline(0, 0)            #reference line
397 |     qqnorm(L2$res)          #normal probability plot
398 |     qqline(L2$res)          #reference line
399 |     par(mfrow = c(1, 1))    #restore display
400 | ```
401 | 
402 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chMCint.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 6: Monte Carlo Integration and Variance Reduction 
 14 | 
 15 | ## Example 6.1 (Simple Monte Carlo integration)
 16 | 
 17 | ```{r }
 18 |     m <- 10000
 19 |     x <- runif(m)
 20 |     theta.hat <- mean(exp(-x))
 21 |     print(theta.hat)
 22 |     print(1 - exp(-1))
 23 | ```
 24 | 
 25 | ## Example 6.2 (Simple Monte Carlo integration, cont.)
 26 | 
 27 | ```{r }
 28 |     m <- 10000
 29 |     x <- runif(m, min=2, max=4)
 30 |     theta.hat <- mean(exp(-x)) * 2
 31 |     print(theta.hat)
 32 |     print(exp(-2) - exp(-4))
 33 | ```
 34 | 
 35 | ## Example 6.3 (Monte Carlo integration, unbounded interval)
 36 | 
 37 | ```{r }
 38 |     x <- seq(.1, 2.5, length = 10)
 39 |     m <- 10000
 40 |     u <- runif(m)
 41 |     cdf <- numeric(length(x))
 42 |     for (i in 1:length(x)) {
 43 |         g <- x[i] * exp(-(u * x[i])^2 / 2)
 44 |         cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
 45 |     }
 46 | 
 47 |     Phi <- pnorm(x)
 48 |     print(round(rbind(x, cdf, Phi), 3))
 49 | ```
 50 | 
 51 | ## Example 6.4 (Example 6.3, cont.)
 52 | 
 53 | ```{r }
 54 |     x <- seq(.1, 2.5, length = 10)
 55 |     m <- 10000
 56 |     z <- rnorm(m)
 57 |     dim(x) <- length(x)
 58 |     p <- apply(x, MARGIN = 1,
 59 |              FUN = function(x, z) {mean(z < x)}, z = z)
 60 | 
 61 |     Phi <- pnorm(x)
 62 |     print(round(rbind(x, p, Phi), 3))
 63 | ```
 64 | 
 65 | ## Example 6.5 (Error bounds for MC integration)
 66 | 
 67 | ```{r }
 68 |     x <- 2
 69 |     m <- 10000
 70 |     z <- rnorm(m)
 71 |     g <- (z < x)  #the indicator function
 72 |     v <- mean((g - mean(g))^2) / m
 73 |     cdf <- mean(g)
 74 |     c(cdf, v)
 75 |     c(cdf - 1.96 * sqrt(v), cdf + 1.96 * sqrt(v))
 76 | ```
 77 | 
 78 | ## Example 6.6 (Antithetic variables)
 79 | 
 80 | ```{r }
 81 |     MC.Phi <- function(x, R = 10000, antithetic = TRUE) {
 82 |         u <- runif(R/2)
 83 |         if (!antithetic) v <- runif(R/2) else
 84 |             v <- 1 - u
 85 |         u <- c(u, v)
 86 |         cdf <- numeric(length(x))
 87 |         for (i in 1:length(x)) {
 88 |             g <- x[i] * exp(-(u * x[i])^2 / 2)
 89 |             cdf[i] <- mean(g) / sqrt(2 * pi) + 0.5
 90 |         }
 91 |         cdf
 92 |     }
 93 | 
 94 | 
 95 |     x <- seq(.1, 2.5, length=5)
 96 |     Phi <- pnorm(x)
 97 |     set.seed(123)
 98 |     MC1 <- MC.Phi(x, anti = FALSE)
 99 |     set.seed(123)
100 |     MC2 <- MC.Phi(x)
101 |     print(round(rbind(x, MC1, MC2, Phi), 5))
102 | 
103 | 
104 |     m <- 1000
105 |     MC1 <- MC2 <- numeric(m)
106 |     x <- 1.95
107 |     for (i in 1:m) {
108 |         MC1[i] <- MC.Phi(x, R = 1000, anti = FALSE)
109 |         MC2[i] <- MC.Phi(x, R = 1000)
110 |     }
111 | 
112 |     print(sd(MC1))
113 |     print(sd(MC2))
114 |     print((var(MC1) - var(MC2))/var(MC1))
115 | ```
116 | 
117 | ## Example 6.7 (Control variate)
118 | 
119 | ```{r }
120 |     m <- 10000
121 |     a <- - 12 + 6 * (exp(1) - 1)
122 |     U <- runif(m)
123 |     T1 <- exp(U)                  #simple MC
124 |     T2 <- exp(U) + a * (U - 1/2)  #controlled
125 | 
126 |     mean(T1)
127 |     mean(T2)
128 |     (var(T1) - var(T2)) / var(T1)
129 | ```
130 | 
131 | ## Example 6.8 (MC integration using control variates)
132 | 
133 | ```{r }
134 |     f <- function(u)
135 |         exp(-.5)/(1+u^2)
136 | 
137 |     g <- function(u)
138 |         exp(-u)/(1+u^2)
139 | 
140 |     set.seed(510) #needed later
141 |     u <- runif(10000)
142 |     B <- f(u)
143 |     A <- g(u)
144 | 
145 |     cor(A, B)
146 |     a <- -cov(A,B) / var(B)    #est of c*
147 |     a
148 | 
149 |     m <- 100000
150 |     u <- runif(m)
151 |     T1 <- g(u)
152 |     T2 <- T1 + a * (f(u) - exp(-.5)*pi/4)
153 | 
154 |     c(mean(T1), mean(T2))
155 |     c(var(T1), var(T2))
156 |     (var(T1) - var(T2)) / var(T1)
157 | ```
158 | 
159 | ## Example 6.9 (Control variate and regression)
160 | 
161 | ```{r }
162 |     set.seed(510)
163 |     mu <- exp(-.5)*pi/4
164 |     u <- runif(10000)
165 |     f <- exp(-.5)/(1+u^2)
166 |     g <- exp(-u)/(1+u^2)
167 |     L <- lm(g ~ f)
168 |     L
169 |     c.star <- - L$coeff[2]
170 |     c.star 
171 |     
172 |     theta.hat <- sum(L$coeff * c(1, mu))  #pred. value at mu
173 |     theta.hat
174 |     summary(L)$sigma^2
175 |     summary(L)$r.squared
176 | ```
177 | 
178 | ## Example 6.10 (Control variates and multiple regression)
179 | 
180 | ```{r }
181 |     # Example 6.9 continued with a second control variate
182 |     # and multiple regression to estimate vector c*
183 |     
184 |     u <- runif(10000)
185 |     f1 <- exp(-.5) / (1+u^2)
186 |     f2 <- exp(-u) / (1-exp(-1))
187 |     g <- exp(-u) / (1+u^2)
188 |     
189 |     L2 <- lm(g ~ f1 + f2)
190 |     
191 |     L2$coeff
192 |     c.star <-  - L2$coeff[2:3]
193 |     c.star
194 |     mu1 <- exp(-.5)*pi/4
195 |     mu2 <- 1
196 |     mu <- c(mu1, mu2)
197 |     
198 |     # theta.hat is the predicted response at mu
199 |     # alternately can use predict.lm method
200 |     
201 |     theta.hat <- sum(L2$coeff * c(1, mu))  #pred. value at mu
202 |     theta.hat
203 |     
204 |     ## alternately
205 |     df <- data.frame(f1=mu1, f2=mu2)
206 |     theta.hat <-predict(L2, df)
207 |     
208 |     # MSE / n is the est. variance of the control estimator
209 |     MSE <- summary(L2)$sigma^2 
210 |     MSE
211 |     sqrt(MSE / 10000)
212 |     
213 |     
214 |     # compare with the previous estimates using
215 |     # naive MC and control variate f1(u)
216 |     # var1=.060231423 var2=.003124814
217 |     var0 <- 0.060231423  #naive MC
218 |     var1 <- 0.003117644   #controlled estimator with f1
219 |     var2 <- MSE  #new estimator
220 |     
221 |     # percent reduction in variance
222 |     # it is a weighted average of R^2 values
223 |     # so easier to compute directly
224 |     
225 |     100 * (var0 - var1) / var0
226 |     100 * (var1 - var2) / var1
227 |     100 * (var0 - var2) / var0
228 | ```
229 | 
230 | ## Example 6.11 (Choice of the importance function)
231 | 
232 | ```{r }
233 |     #code for plot is at the end of the file
234 | 
235 |     m <- 10000
236 |     theta.hat <- se <- numeric(5)
237 |     g <- function(x) {
238 |         exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
239 |         }
240 | 
241 |     x <- runif(m)     #using f0
242 |     fg <- g(x)
243 |     theta.hat[1] <- mean(fg)
244 |     se[1] <- sd(fg)
245 | 
246 |     x <- rexp(m, 1)   #using f1
247 |     fg <- g(x) / exp(-x)
248 |     theta.hat[2] <- mean(fg)
249 |     se[2] <- sd(fg)
250 | 
251 |     x <- rcauchy(m)   #using f2
252 |     i <- c(which(x > 1), which(x < 0))
253 |     x[i] <- 2  #to catch overflow errors in g(x)
254 |     fg <- g(x) / dcauchy(x)
255 |     theta.hat[3] <- mean(fg)
256 |     se[3] <- sd(fg)
257 | 
258 |     u <- runif(m)     #f3, inverse transform method
259 |     x <- - log(1 - u * (1 - exp(-1)))
260 |     fg <- g(x) / (exp(-x) / (1 - exp(-1)))
261 |     theta.hat[4] <- mean(fg)
262 |     se[4] <- sd(fg)
263 | 
264 |     u <- runif(m)    #f4, inverse transform method
265 |     x <- tan(pi * u / 4)
266 |     fg <- g(x) / (4 / ((1 + x^2) * pi))
267 |     theta.hat[5] <- mean(fg)
268 |     se[5] <- sd(fg)
269 | 
270 |     rbind(theta.hat, se / sqrt(m))
271 | ```
272 | 
273 | ## Example 6.12 (Example 6.11, cont.)
274 | 
275 | ```{r }
276 |     M <- 20   #number of replicates
277 |     T2 <- numeric(4)
278 |     estimates <- matrix(0, 10, 2)
279 | 
280 |     g <- function(x) {
281 |         exp(-x - log(1+x^2)) * (x > 0) * (x < 1) }
282 | 
283 |     for (i in 1:10) {
284 |         estimates[i, 1] <- mean(g(runif(M)))
285 |         T2[1] <- mean(g(runif(M/4, 0, .25)))
286 |         T2[2] <- mean(g(runif(M/4, .25, .5)))
287 |         T2[3] <- mean(g(runif(M/4, .5, .75)))
288 |         T2[4] <- mean(g(runif(M/4, .75, 1)))
289 |         estimates[i, 2] <- mean(T2)
290 |     }
291 | 
292 |     estimates
293 |     apply(estimates, 2, mean)
294 |     apply(estimates, 2, var)
295 | ```
296 | 
297 | ## Example 6.13 (Examples 6.11-6.12, cont.)
298 | 
299 | ```{r }
300 |     M <- 10000  #number of replicates
301 |     k <- 10     #number of strata
302 |     r <- M / k  #replicates per stratum
303 |     N <- 50     #number of times to repeat the estimation
304 |     T2 <- numeric(k)
305 |     estimates <- matrix(0, N, 2)
306 | 
307 |     g <- function(x) {
308 |         exp(-x - log(1+x^2)) * (x > 0) * (x < 1)
309 |         }
310 | 
311 |     for (i in 1:N) {
312 |         estimates[i, 1] <- mean(g(runif(M)))
313 |         for (j in 1:k)
314 |             T2[j] <- mean(g(runif(M/k, (j-1)/k, j/k)))
315 |         estimates[i, 2] <- mean(T2)
316 |     }
317 | 
318 |     apply(estimates, 2, mean)
319 |     apply(estimates, 2, var)
320 | ```
321 | 
322 | ## Plot importance functions in Figures 6.1(a) and 6.1.(b)
323 | 
324 | ```{r }
325 |     #par(ask = TRUE) #uncomment to pause between graphs
326 | 
327 |     x <- seq(0, 1, .01)
328 |     w <- 2
329 |     f1 <- exp(-x)
330 |     f2 <- (1 / pi) / (1 + x^2)
331 |     f3 <- exp(-x) / (1 - exp(-1))
332 |     f4 <- 4 / ((1 + x^2) * pi)
333 |     g <- exp(-x) / (1 + x^2)
334 | 
335 |     #for color change lty to col
336 | 
337 |     #figure (a)
338 |     plot(x, g, type = "l", main = "", ylab = "",
339 |          ylim = c(0,2), lwd = w)
340 |     lines(x, g/g, lty = 2, lwd = w)
341 |     lines(x, f1, lty = 3, lwd = w)
342 |     lines(x, f2, lty = 4, lwd = w)
343 |     lines(x, f3, lty = 5, lwd = w)
344 |     lines(x, f4, lty = 6, lwd = w)
345 |     legend("topright", legend = c("g", 0:4),
346 |            lty = 1:6, lwd = w, inset = 0.02)
347 | 
348 |     #figure (b)
349 |     plot(x, g, type = "l", main = "", ylab = "",
350 |         ylim = c(0,3.2), lwd = w, lty = 2)
351 |     lines(x, g/f1, lty = 3, lwd = w)
352 |     lines(x, g/f2, lty = 4, lwd = w)
353 |     lines(x, g/f3, lty = 5, lwd = w)
354 |     lines(x, g/f4, lty = 6, lwd = w)
355 |     legend("topright", legend = c(0:4),
356 |            lty = 2:6, lwd = w, inset = 0.02)
357 | ```
358 | 
359 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chMonteCarlo.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 7: Monte Carlo Methods in Inference 
 14 | 
 15 | Note: Example 7.11 may take a few minutes.
 16 | 
 17 | 
 18 | ## Example 7.1 (Basic Monte Carlo estimation)
 19 | 
 20 | ```{r 7.1}
 21 |     m <- 1000
 22 |     g <- numeric(m)
 23 |     for (i in 1:m) {
 24 |         x <- rnorm(2)
 25 |         g[i] <- abs(x[1] - x[2])
 26 |     }
 27 |     est <- mean(g)
 28 |     est
 29 | ```
 30 | 
 31 | ## Example 7.2 (Estimating the MSE of a trimmed mean)
 32 | 
 33 | ```{r 7.2}
 34 |     n <- 20
 35 |     m <- 1000
 36 |     tmean <- numeric(m)
 37 |     for (i in 1:m) {
 38 |         x <- sort(rnorm(n))
 39 |         tmean[i] <- sum(x[2:(n-1)]) / (n-2)
 40 |         }
 41 |     mse <- mean(tmean^2)
 42 |     mse
 43 |     sqrt(sum((tmean - mean(tmean))^2)) / m    #se
 44 | 
 45 |     n <- 20
 46 |     m <- 1000
 47 |     tmean <- numeric(m)
 48 |     for (i in 1:m) {
 49 |         x <- sort(rnorm(n))
 50 |         tmean[i] <- median(x)
 51 |         }
 52 |     mse <- mean(tmean^2)
 53 |     mse
 54 |     sqrt(sum((tmean - mean(tmean))^2)) / m    #se
 55 | ```
 56 | 
 57 | ## Example 7.3 (MSE of a trimmed mean, cont.)
 58 | 
 59 | ```{r 7.3}
 60 |      set.seed(522)
 61 |      n <- 20
 62 |      K <- n/2 - 1
 63 |      m <- 1000
 64 |      mse <- matrix(0, n/2, 6)
 65 | 
 66 |      trimmed.mse <- function(n, m, k, p) {
 67 |          #MC est of mse for k-level trimmed mean of
 68 |          #contaminated normal pN(0,1) + (1-p)N(0,100)
 69 |          tmean <- numeric(m)
 70 |          for (i in 1:m) {
 71 |              sigma <- sample(c(1, 10), size = n,
 72 |                  replace = TRUE, prob = c(p, 1-p))
 73 |              x <- sort(rnorm(n, 0, sigma))
 74 |              tmean[i] <- sum(x[(k+1):(n-k)]) / (n-2*k)
 75 |              }
 76 |          mse.est <- mean(tmean^2)
 77 |          se.mse <- sqrt(mean((tmean-mean(tmean))^2)) / sqrt(m)
 78 |          return(c(mse.est, se.mse))
 79 |      }
 80 | 
 81 |     for (k in 0:K) {
 82 |         mse[k+1, 1:2] <- trimmed.mse(n=n, m=m, k=k, p=1.0)
 83 |         mse[k+1, 3:4] <- trimmed.mse(n=n, m=m, k=k, p=.95)
 84 |         mse[k+1, 5:6] <- trimmed.mse(n=n, m=m, k=k, p=.9)
 85 |     }
 86 | ```
 87 | 
 88 | ## Example 7.4 (Confidence interval for variance)
 89 | 
 90 | ```{r 7.4}
 91 |     n <- 20
 92 |     alpha <- .05
 93 |     x <- rnorm(n, mean=0, sd=2)
 94 |     UCL <- (n-1) * var(x) / qchisq(alpha, df=n-1)
 95 | ```
 96 | 
 97 | ## Example 7.5 (MC estimate of confidence level)
 98 | 
 99 | ```{r 7.5}
100 |     n <- 20
101 |     alpha <- .05
102 |     UCL <- replicate(1000, expr = {
103 |         x <- rnorm(n, mean = 0, sd = 2)
104 |         (n-1) * var(x) / qchisq(alpha, df = n-1)
105 |         } )
106 |     #count the number of intervals that contain sigma^2=4
107 |     sum(UCL > 4)
108 |     #or compute the mean to get the confidence level
109 |     mean(UCL > 4)
110 | ```
111 | 
112 | ## Example 7.6 (Empirical confidence level)
113 | 
114 | ```{r 7.6}
115 |     n <- 20
116 |     alpha <- .05
117 |     UCL <- replicate(1000, expr = {
118 |         x <- rchisq(n, df = 2)
119 |         (n-1) * var(x) / qchisq(alpha, df = n-1)
120 |         } )
121 |     sum(UCL > 4)
122 |     mean(UCL > 4)
123 | ```
124 | 
125 | ## Example 7.7 (Empirical Type I error rate)
126 | 
127 | ```{r 7.7}
128 |     n <- 20
129 |     alpha <- .05
130 |     mu0 <- 500
131 |     sigma <- 100
132 | 
133 |     m <- 10000          #number of replicates
134 |     p <- numeric(m)     #storage for p-values
135 |     for (j in 1:m) {
136 |         x <- rnorm(n, mu0, sigma)
137 |         ttest <- t.test(x, alternative = "greater", mu = mu0)
138 |         p[j] <- ttest$p.value
139 |         }
140 | 
141 |     p.hat <- mean(p < alpha)
142 |     se.hat <- sqrt(p.hat * (1 - p.hat) / m)
143 |     print(c(p.hat, se.hat))
144 | ```
145 | 
146 | ## Example 7.8 (Skewness test of normality)
147 | 
148 | ```{r 7.8}
149 |     n <- c(10, 20, 30, 50, 100, 500) #sample sizes
150 |     cv <- qnorm(.975, 0, sqrt(6/n))  #crit. values for each n
151 | 
152 |     sk <- function(x) {
153 |         #computes the sample skewness coeff.
154 |         xbar <- mean(x)
155 |         m3 <- mean((x - xbar)^3)
156 |         m2 <- mean((x - xbar)^2)
157 |         return( m3 / m2^1.5 )
158 |     }
159 | 
160 |     #n is a vector of sample sizes
161 |     #we are doing length(n) different simulations
162 | 
163 |     p.reject <- numeric(length(n)) #to store sim. results
164 |     m <- 10000                     #num. repl. each sim.
165 | 
166 |     for (i in 1:length(n)) {
167 |         sktests <- numeric(m)       #test decisions
168 |         for (j in 1:m) {
169 |             x <- rnorm(n[i])
170 |             #test decision is 1 (reject) or 0
171 |             sktests[j] <- as.integer(abs(sk(x)) >= cv[i] )
172 |             }
173 |         p.reject[i] <- mean(sktests) #proportion rejected
174 |     }
175 | 
176 |     p.reject
177 | ```
178 | 
179 | ## Example 7.9 (Empirical power)
180 | 
181 | ```{r 7.9}
182 |     #set.seed(521)
183 |     n <- 20
184 |     m <- 1000
185 |     mu0 <- 500
186 |     sigma <- 100
187 |     mu <- c(seq(450, 650, 10))  #alternatives
188 |     M <- length(mu)
189 |     power <- numeric(M)
190 |     for (i in 1:M) {
191 |         mu1 <- mu[i]
192 |         pvalues <- replicate(m, expr = {
193 |             #simulate under alternative mu1
194 |             x <- rnorm(n, mean = mu1, sd = sigma)
195 |             ttest <- t.test(x,
196 |                      alternative = "greater", mu = mu0)
197 |             ttest$p.value  } )
198 |         power[i] <- mean(pvalues <= .05)
199 |     }
200 |     
201 |     se <- sqrt(power * (1-power) / m)
202 |     df <- data.frame(mean=mu, power=power, upper=power+2*se, lower=power-2*se)
203 |     
204 |     library(ggplot2)
205 |     ggplot(df, aes(x=mean, y=power)) +
206 |       geom_line() +
207 |       geom_vline(xintercept=500, lty=2) +
208 |       geom_hline(yintercept=c(0,.05), lty=1:2) +
209 |       geom_errorbar(aes(ymin=lower, ymax=upper), width = 0.2, lwd=1.5)
210 | ```
211 | 
212 | ## Example 7.10 (Power of the skewness test of normality)
213 | 
214 | ```{r 7.10}
215 |     #set.seed(111)
216 |     alpha <- .1
217 |     n <- 30
218 |     m <- 2500
219 |     epsilon <- c(seq(0, .15, .01), seq(.15, 1, .05))
220 |     N <- length(epsilon)
221 |     pwr <- numeric(N)
222 |     #critical value for the skewness test
223 |     cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
224 | 
225 |     for (j in 1:N) {           #for each epsilon
226 |         e <- epsilon[j]
227 |         sktests <- numeric(m)
228 |         for (i in 1:m) {       #for each replicate
229 |             sigma <- sample(c(1, 10), replace = TRUE,
230 |                 size = n, prob = c(1-e, e))
231 |             x <- rnorm(n, 0, sigma)
232 |             sktests[i] <- as.integer(abs(sk(x)) >= cv)
233 |             }
234 |         pwr[j] <- mean(sktests)
235 |         }
236 | 
237 |     se <- sqrt(pwr * (1-pwr) / m)
238 |     df <- data.frame(epsilon=epsilon, power=pwr, upper=pwr+2*se, lower=pwr-2*se)
239 |     
240 |     #plot power vs epsilon
241 |     library(ggplot2)
242 |     ggplot(df, aes(x=epsilon, y=power)) +
243 |       geom_line() + labs(x=bquote(epsilon)) +
244 |       geom_hline(yintercept=.1, lty=2) +
245 |       geom_pointrange(aes(ymin=lower, ymax=upper))
246 | ```
247 | 
248 | ## Example 7.11 (Power comparison of tests of normality)
249 | 
250 | ```{r 7.11}
251 |     #only one loop, for epsilon=0.1, was shown in the text
252 |     #the simulation below takes several minutes to run
253 | 
254 |     # initialize input and output
255 |     library(energy)
256 |     alpha <- .1
257 |     n <- 30
258 |     m <- 500        #try small m for a trial run
259 |     test1 <- test2 <- test3 <- numeric(m)
260 | 
261 |     #critical value for the skewness test
262 |     cv <- qnorm(1-alpha/2, 0, sqrt(6*(n-2) / ((n+1)*(n+3))))
263 |     sim <- matrix(0, 11, 4)
264 | 
265 |     # estimate power
266 |     for (i in 0:10) {
267 |         epsilon <- i * .1
268 |         for (j in 1:m) {
269 |             e <- epsilon
270 |             sigma <- sample(c(1, 10), replace = TRUE,
271 |                 size = n, prob = c(1-e, e))
272 |             x <- rnorm(n, 0, sigma)
273 |             test1[j] <- as.integer(abs(sk(x)) >= cv)
274 |             test2[j] <- as.integer(
275 |                         shapiro.test(x)$p.value <= alpha)
276 |             test3[j] <- as.integer(
277 |                         mvnorm.etest(x, R=200)$p.value <= alpha)
278 |             }
279 |         print(c(epsilon, mean(test1), mean(test2), mean(test3)))
280 |         sim[i+1, ] <- c(epsilon, mean(test1), mean(test2), mean(test3))
281 |     }
282 |     detach(package:energy)
283 | 
284 |     # plot the empirical estimates of power
285 |     plot(sim[,1], sim[,2], ylim = c(0, 1), type = "l",
286 |         xlab = bquote(epsilon), ylab = "power")
287 |     lines(sim[,1], sim[,3], lty = 2)
288 |     lines(sim[,1], sim[,4], lty = 4)
289 |     abline(h = alpha, lty = 3)
290 |     legend("topright", 1, c("skewness", "S-W", "energy"),
291 |         lty = c(1,2,4), inset = .02)
292 | ```
293 | 
294 | ## Example 7.12 (Count Five test statistic)
295 | 
296 | ```{r 7.12}
297 |     x1 <- rnorm(20, 0, sd = 1)
298 |     x2 <- rnorm(20, 0, sd = 1.5)
299 |     y <- c(x1, x2)
300 | 
301 |     group <- rep(1:2, each = length(x1))
302 |     boxplot(y ~ group, boxwex = .3, xlim = c(.5, 2.5), main = "")
303 |     points(group, y)
304 | 
305 |     # now identify the extreme points
306 |     range(x1)
307 |     range(x2)
308 | 
309 |     i <- which(x1 < min(x2))
310 |     j <- which(x2 > max(x1))
311 | 
312 |     x1[i]
313 |     x2[j]
314 | 
315 |     out1 <- sum(x1 > max(x2)) + sum(x1 < min(x2))
316 |     out2 <- sum(x2 > max(x1)) + sum(x2 < min(x1))
317 |     max(c(out1, out2))
318 | ```
319 | 
320 | ## Example 7.13 (Count Five test statistic, cont.)
321 | 
322 | ```{r 7.13}
323 |     maxout <- function(x, y) {
324 |         X <- x - mean(x)
325 |         Y <- y - mean(y)
326 |         outx <- sum(X > max(Y)) + sum(X < min(Y))
327 |         outy <- sum(Y > max(X)) + sum(Y < min(X))
328 |         return(max(c(outx, outy)))
329 |     }
330 | 
331 |     n1 <- n2 <- 20
332 |     mu1 <- mu2 <- 0
333 |     sigma1 <- sigma2 <- 1
334 |     m <- 1000
335 | 
336 |     # generate samples under H0
337 |     stat <- replicate(m, expr={
338 |         x <- rnorm(n1, mu1, sigma1)
339 |         y <- rnorm(n2, mu2, sigma2)
340 |         maxout(x, y)
341 |         })
342 |     print(cumsum(table(stat)) / m)
343 |     print(quantile(stat, c(.8, .9, .95)))
344 | ```
345 | 
346 | ## Example 7.14 (Count Five test)
347 | 
348 | ```{r 7.14}
349 |     count5test <- function(x, y) {
350 |         X <- x - mean(x)
351 |         Y <- y - mean(y)
352 |         outx <- sum(X > max(Y)) + sum(X < min(Y))
353 |         outy <- sum(Y > max(X)) + sum(Y < min(X))
354 |         # return 1 (reject) or 0 (do not reject H0)
355 |         return(as.integer(max(c(outx, outy)) > 5))
356 |     }
357 | 
358 |     n1 <- n2 <- 20
359 |     mu1 <- mu2 <- 0
360 |     sigma1 <-  sigma2 <- 1
361 |     m <- 10000
362 |     tests <- replicate(m, expr = {
363 |         x <- rnorm(n1, mu1, sigma1)
364 |         y <- rnorm(n2, mu2, sigma2)
365 |         x <- x - mean(x)  #centered by sample mean
366 |         y <- y - mean(y)
367 |         count5test(x, y)
368 |         } )
369 | 
370 |     alphahat <- mean(tests)
371 |     print(alphahat)
372 | ```
373 | 
374 | ## Example 7.15 (Count Five test, cont.)
375 | 
376 | ```{r 7.15}
377 |     n1 <- 20
378 |     n2 <- 30
379 |     mu1 <- mu2 <- 0
380 |     sigma1 <- sigma2 <- 1
381 |     m <- 10000
382 | 
383 |     alphahat <- mean(replicate(m, expr={
384 |         x <- rnorm(n1, mu1, sigma1)
385 |         y <- rnorm(n2, mu2, sigma2)
386 |         x <- x - mean(x)  #centered by sample mean
387 |         y <- y - mean(y)
388 |         count5test(x, y)
389 |         }))
390 | 
391 |     print(alphahat)
392 | ```
393 | 
394 | ## Example 7.16 (Count Five, cont.)
395 | 
396 | ```{r 7.16}
397 |     # generate samples under H1 to estimate power
398 |     sigma1 <- 1
399 |     sigma2 <- 1.5
400 | 
401 |     power <- mean(replicate(m, expr={
402 |         x <- rnorm(20, 0, sigma1)
403 |         y <- rnorm(20, 0, sigma2)
404 |         count5test(x, y)
405 |         }))
406 | 
407 |     print(power)
408 | ```
409 | 
410 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chNumerical.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 13: Introduction to Numerical Methods in R
 14 | 
 15 | ## Example 13.1 (Identical and nearly equal)
 16 | 
 17 | ```{r }
 18 |     isTRUE(all.equal(.2, .3 - .1))
 19 |     all.equal(.2, .3)          #not a logical value
 20 |     isTRUE(all.equal(.2, .3))  #always a logical value
 21 | 
 22 |     x <- 1:4
 23 |     y <- 2
 24 |     y == 2
 25 |     x == y  #not necessarily a single logical value
 26 |     identical(x, y)  #always a single logical value
 27 |     identical(y, 2)
 28 | ```
 29 | 
 30 | ## Example 13.2 (Ratio of two large numbers)
 31 | 
 32 | ```{r }
 33 |     n <- 400
 34 |     (gamma((n-1)/2) / (sqrt(pi) * gamma((n-2)/2)))
 35 |     exp(lgamma((n-1)/2) - lgamma((n-2)/2)) / sqrt(pi)
 36 | ```
 37 | 
 38 | ## Example 13.3 (Taylor expansion)
 39 | 
 40 | ```{r }
 41 |     system.time({
 42 |         for (i in 1:1000) {
 43 |             a <- rep(0, 24)
 44 |             a0 <- pi / 6
 45 |             a2 <- a0 * a0
 46 |             a[1] <- -a0^3 / 6
 47 |             for (i in 2:24)
 48 |                 a[i] <- - a2 * a[i-1] / ((2*i+1)*(2*i))
 49 |             a0 + sum(a)}
 50 |         })
 51 | 
 52 |     system.time({
 53 |         for (i in 1:1000) {
 54 |             K <- 2 * (0:24) + 1
 55 |             i <- rep(c(1, -1), length=25)
 56 |             sum(i * (pi/6)^K / factorial(K))}
 57 |         })
 58 | ```
 59 | 
 60 | ## Example 13.4 (Derivative of zeta function)
 61 | 
 62 | ```{r }
 63 |     zeta.deriv <- function(a) {
 64 |         z <- a - 1
 65 |         # Stieltjes constants gamma_k for k=1:5
 66 |         g <- c(
 67 |             -.7281584548367672e-1,
 68 |             -.9690363192872318e-2,
 69 |              .2053834420303346e-2,
 70 |              .2325370065467300e-2,
 71 |              .7933238173010627e-3)
 72 |         i <- c(-1, 1, -1, 1, -1)
 73 |         n <- 0:4
 74 |         -1/z^2 + sum(i * g * z^n / factorial(n))
 75 |     }
 76 | ```
 77 | 
 78 | ## Example 13.5 (Derivative of zeta function, cont.)
 79 | 
 80 | ```{r }
 81 |     library(gsl)   #for zeta function
 82 |     z <- c(1.001, 1.01, 1.5, 2, 3, 5)
 83 |     h <- .Machine$double.eps^0.5
 84 |     dz <- dq <- rep(0, length(z))
 85 |     for (i in 1:length(z)) {
 86 |         v <- z[i] + h
 87 |         h <- v - z[i]
 88 |         a0 <- z[i] - h
 89 |         if (a0 < 1) a0 <- (1 + z[i])/2
 90 |         a1 <- z[i] + h
 91 |         dq[i] <- (zeta(a1) - zeta(a0)) / (a1 - a0)
 92 |         dz[i] <- zeta.deriv(z[i])
 93 |     }
 94 | 
 95 |     h
 96 | 
 97 |     cbind(z, dz, dq)
 98 | ```
 99 | 
100 | ## Example 13.6 (Solving f(x)=0)
101 | 
102 | ```{r }
103 |     f <- function(y, a, n) {
104 |         a^2 + y^2 + 2*a*y/(n-1) - (n-2)
105 |     }
106 | 
107 |     a <- 0.5
108 |     n <- 20
109 |     b0 <- 0
110 |     b1 <- 5*n
111 | 
112 |     #solve using bisection
113 |     it <- 0
114 |     eps <- .Machine$double.eps^0.25
115 |     r <- seq(b0, b1, length=3)
116 |     y <- c(f(r[1], a, n), f(r[2], a, n), f(r[3], a, n))
117 |     if (y[1] * y[3] > 0)
118 |         stop("f does not have opposite sign at endpoints")
119 | 
120 |     while(it < 1000 && abs(y[2]) > eps) {
121 |         it <- it + 1
122 |         if (y[1]*y[2] < 0) {
123 |             r[3] <- r[2]
124 |             y[3] <- y[2]
125 |         } else {
126 |             r[1] <- r[2]
127 |             y[1] <- y[2]
128 |         }
129 |         r[2] <- (r[1] + r[3]) / 2
130 |         y[2] <- f(r[2], a=a, n=n)
131 |         print(c(r[1], y[1], y[3]-y[2]))
132 |     }
133 | ```
134 | 
135 | ## Example 13.7 (Solving f(x)=0 with Brent's method: uniroot)
136 | 
137 | ```{r }
138 |     a <- 0.5
139 |     n <- 20
140 |     out <- uniroot(function(y) {
141 |                a^2 + y^2 + 2*a*y/(n-1) - (n-2) },
142 |                lower = 0, upper = n*5)
143 |     unlist(out)
144 |     uniroot(function(y) {a^2 + y^2 + 2*a*y/(n-1) - (n-2)},
145 |             interval = c(-n*5, 0))$root
146 | ```
147 | 
148 | ## Example 13.8 (Numerical integration with integrate)
149 | 
150 | ```{r }
151 |     f <- function(y, N, r, rho) {
152 |         (cosh(y) - rho * r)^(1 - N)
153 |     }
154 |     integrate(f, lower=0, upper=Inf,
155 |               rel.tol=.Machine$double.eps^0.25,
156 |               N=10, r=0.5, rho=0.2)
157 | 
158 |     ro <- seq(-.99, .99, .01)
159 |     v <- rep(0, length(ro))
160 |     for (i in 1:length(ro)) {
161 |         v[i] <- integrate(f, lower=0, upper=Inf,
162 |                   rel.tol=.Machine$double.eps^0.25,
163 |                   N=10, r=0.5, rho=ro[i])$value
164 |         }
165 |     plot(ro, v, type="l", xlab=expression(rho),
166 |          ylab="Integral Value (n=10, r=0.5)")
167 | ```
168 | 
169 | ## Example 13.9 (Density of sample correlation coefficient)
170 | 
171 | ```{r }
172 |     .dcorr <- function(r, N, rho=0) {
173 |         # compute the density function of sample correlation
174 |         if (abs(r) > 1 || abs(rho) > 1) return (0)
175 |         if (N < 4) return (NA)
176 | 
177 |         if (isTRUE(all.equal(rho, 0.0))) {
178 |             a <- exp(lgamma((N - 1)/2) - lgamma((N - 2)/2)) /
179 |                      sqrt(pi)
180 |             return (a * (1 - r^2)^((N - 4)/2))
181 |         }
182 | 
183 |         # if rho not 0, need to integrate
184 |         f <- function(w, R, N, rho)
185 |             (cosh(w) - rho * R)^(1 - N)
186 | 
187 |         #need to insert some error checking here
188 |         i <- integrate(f, lower=0, upper=Inf,
189 |                 R=r, N=N, rho=rho)$value
190 |         c1 <- (N - 2) * (1 - rho^2)^((N - 1)/2)
191 |         c2 <- (1 - r^2)^((N - 4) / 2) / pi
192 |         return(c1 * c2 * i)
193 |     }
194 | 
195 |     r <- as.matrix(seq(-1, 1, .01))
196 |     d1 <- apply(r, 1, .dcorr, N=10, rho=.0)
197 |     d2 <- apply(r, 1, .dcorr, N=10, rho=.5)
198 |     d3 <- apply(r, 1, .dcorr, N=10, rho=-.5)
199 |     plot(r, d2, type="l", lty=2, lwd=2, ylab="density")
200 |     lines(r, d1, lwd=2)
201 |     lines(r, d3, lty=4, lwd=2)
202 |     legend("top", inset=.02,
203 |            c("rho = 0", "rho = 0.5", "rho = -0.5"), lty=c(1,2,4), lwd=2)
204 | ```
205 | 
206 | ## Example 13.10 (MLE using mle)
207 | 
208 | ```{r }
209 |     #the observed sample
210 |     y <- c(0.04304550, 0.50263474)
211 | 
212 |     mlogL <- function(theta=1) {
213 |         #minus log-likelihood of exp. density, rate theta
214 |         return( - (length(y) * log(theta) - theta * sum(y)))
215 |     }
216 | 
217 |     library(stats4)
218 |     fit <- mle(mlogL)
219 |     summary(fit)
220 | 
221 |     # Alternately, the initial value for the optimizer could
222 |     # be supplied in the call to mle; two examples are
223 | 
224 |     mle(mlogL, start=list(theta=1))
225 |     mle(mlogL, start=list(theta=mean(y)))
226 | ```
227 | 
228 | ## Application: Evaluating an Expected Value
229 | 
230 | ```{r }
231 |     y <- VGAM::rpareto(1000, scale=1, shape=3)
232 |     hist(y, prob=TRUE, breaks="scott", main="", ylim=c(0,3))
233 |     curve(VGAM::dpareto(x, scale=1, shape=3), add=TRUE)
234 | 
235 |     a <- 1
236 |     s <- 1
237 |     b <- 0.5
238 | ```
239 | 
240 | ## Example 13.11 (Numerical integration)
241 | 
242 | ```{r }
243 |     f1 <- function(x, y, s, a, b) {
244 |       # the integrand function: |y-x|^b f(x)
245 |       (abs(y-x))^b * a * s^a / x^(a+1)
246 |     }
247 |     
248 |     integrate(f1, lower=s, upper=Inf, y=2, s=1, a=1, b=b)
249 | ```
250 | 
251 | ## Example 13.12 (Direct evaluation)
252 | 
253 | ```{r }
254 |     g2 <- function(y, s, b)   {
255 |       # Compute E|y-X|^b for Pareto(I) with a=1
256 |       y0 <- (y - s)/y
257 |       c0 <-  gsl::hyperg_2F1(b+1, 1, b+2, y0)
258 |       ((y - s)^b) - s*b*(y^(b-1)) *
259 |       ((y0^b)/b + c0*(y0^(b+1))/(b+1)) +
260 |       s*y^(b-1) * beta(b+1,1-b)
261 |       }
262 | 
263 |     g2(2, s, b)
264 | ```
265 | 
266 | ## Example 13.13 (Plot of expected distance function)
267 | 
268 | ```{r }
269 |     p <- c(ppoints(50)/100, ppoints(50))
270 |     x <- VGAM::qpareto(p, scale=s, shape=1)
271 |     ex <- g2(x, s=s, b=b)
272 |     plot(log(x), log(ex), cex=.25, type="l")
273 |     for (i in 1:length(x)) {
274 |       y <- x[i]
275 |       zi <- integrate(f1, lower=s, upper=Inf,
276 |         subdivisions=200, rel.tol=.Machine$double.eps^.5,
277 |         stop.on.error=FALSE, y=y, s=s, a=1, b=b)
278 |       if (zi$message == "OK")
279 |         points(log(y), log(zi$value), col=2, cex=.25)
280 |       else print(paste("y=", y, zi$Message))
281 |     }
282 | 
283 |     integrate(f1, lower=s, upper=Inf, y=s, s=s, a=1, b=b)
284 |     g2(s, s, b)
285 | ```
286 | 
287 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chOptim.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 14: Optimization
 14 | 
 15 | ## Example 14.1 (One-dimensional optimization with optimize)
 16 | 
 17 | ```{r }
 18 |     f <- function(x)
 19 |         log(x + log(x))/log(1+x)
 20 | 
 21 |     curve(f(x), from = 2, to = 15, ylab = "f(x)")
 22 |     
 23 |     optimize(f, lower = 4, upper = 8, maximum = TRUE)
 24 | ```
 25 | 
 26 | ## Example 14.2 (MLE: Gamma distribution)
 27 | 
 28 | ```{r }
 29 |     m <- 20000
 30 |     est <- matrix(0, m, 2)
 31 |     n <- 200
 32 |     r <- 5
 33 |     lambda <- 2
 34 | 
 35 |     obj <- function(lambda, xbar, logx.bar) {
 36 |         digamma(lambda * xbar) - logx.bar - log(lambda)
 37 |         }
 38 | 
 39 |     for (i in 1:m) {
 40 |         x <- rgamma(n, shape=r, rate=lambda)
 41 |         xbar <- mean(x)
 42 |         u <- uniroot(obj, lower = .001, upper = 10e5,
 43 |                 xbar = xbar, logx.bar = mean(log(x)))
 44 |         lambda.hat <- u$root
 45 |         r.hat <- xbar * lambda.hat
 46 |         est[i, ] <- c(r.hat, lambda.hat)
 47 |     }
 48 | 
 49 |     ML <- colMeans(est)
 50 | 
 51 |     hist(est[, 1], breaks="scott", freq=FALSE,
 52 |         xlab="r", main="")
 53 |     points(ML[1], 0, cex=1.5, pch=20)
 54 |     hist(est[, 2], breaks="scott", freq=FALSE,
 55 |          xlab=bquote(lambda), main="")
 56 |     points(ML[2], 0, cex=1.5, pch=20)
 57 | ```
 58 | 
 59 | ## Example 14.3 (Two-dimensional optimization with optim)
 60 | 
 61 | ```{r }
 62 |     LL <- function(theta, sx, slogx, n) {
 63 |         r <- theta[1]
 64 |         lambda <- theta[2]
 65 |         loglik <- n * r * log(lambda) + (r - 1) * slogx -
 66 |             lambda * sx - n * log(gamma(r))
 67 |         - loglik
 68 |         }
 69 | 
 70 |     n <- 200
 71 |     r <- 5;    lambda <- 2
 72 |     x <- rgamma(n, shape=r, rate=lambda)
 73 | 
 74 |     optim(c(1,1), LL, sx=sum(x), slogx=sum(log(x)), n=n)
 75 | 
 76 |     mlests <- replicate(20000, expr = {
 77 |       x <- rgamma(200, shape = 5, rate = 2)
 78 |       optim(c(1,1), LL, sx=sum(x), slogx=sum(log(x)), n=n)$par
 79 |       })
 80 |     colMeans(t(mlests))
 81 | ```
 82 | 
 83 | ## Example 14.4 (MLE for a quadratic form)
 84 | 
 85 | ```{r }
 86 |     LL <- function(lambda, y) {
 87 |         lambda3 <- 1 - sum(lambda)
 88 |         f1 <- dgamma(y, shape=1/2, rate=1/(2*lambda[1]))
 89 |         f2 <- dgamma(y, shape=1/2, rate=1/(2*lambda[2]))
 90 |         f3 <- dgamma(y, shape=1/2, rate=1/(2*lambda3))
 91 |         f <- f1/3 + f2/3 + f3/3   #density of mixture
 92 |         #returning -loglikelihood
 93 |         return( -sum(log(f)))
 94 |         }
 95 | 
 96 |     set.seed(543)
 97 |     m <- 2000
 98 |     lambda <- c(.6, .25, .15)  #rate is 1/(2lambda)
 99 |     lam <- sample(lambda, size = 2000, replace = TRUE)
100 |     y <- rgamma(m, shape = .5, rate = 1/(2*lam))
101 | 
102 |     opt <- optim(c(.5,.3), LL, y=y)
103 |     theta <- c(opt$par, 1 - sum(opt$par))
104 | 
105 |     as.data.frame(unlist(opt))
106 | 
107 |     theta
108 | ```
109 | 
110 | ## Example 14.5 (EM algorithm for a mixture model)
111 | 
112 | ```{r }
113 |     set.seed(543)
114 |     lambda <- c(.6, .25, .15)  #rate is 1/(2lambda)
115 |     lam <- sample(lambda, size = 2000, replace = TRUE)
116 |     y <- rgamma(m, shape = .5, rate = 1/(2*lam))
117 | 
118 |     N <- 10000                #max. number of iterations
119 |     L <- c(.5, .4, .1)        #initial est. for lambdas
120 |     tol <- .Machine$double.eps^0.5
121 |     L.old <- L + 1
122 | 
123 |     for (j in 1:N) {
124 |         f1 <- dgamma(y, shape=1/2, rate=1/(2*L[1]))
125 |         f2 <- dgamma(y, shape=1/2, rate=1/(2*L[2]))
126 |         f3 <- dgamma(y, shape=1/2, rate=1/(2*L[3]))
127 |         py <- f1 / (f1 + f2 + f3) #posterior prob y from 1
128 |         qy <- f2 / (f1 + f2 + f3) #posterior prob y from 2
129 |         ry <- f3 / (f1 + f2 + f3) #posterior prob y from 3
130 | 
131 |         mu1 <- sum(y * py) / sum(py) #update means
132 |         mu2 <- sum(y * qy) / sum(qy)
133 |         mu3 <- sum(y * ry) / sum(ry)
134 |         L <- c(mu1, mu2, mu3)  #update lambdas
135 |         L <- L / sum(L)
136 | 
137 |         if (sum(abs(L - L.old)/L.old) < tol) break
138 |         L.old <- L
139 |     }
140 | 
141 |     print(list(lambda = L/sum(L), iter = j, tol = tol))
142 | ```
143 | 
144 | ## Example 14.6 (Simplex algorithm)
145 | 
146 | ```{r }
147 |     library(boot)   #for simplex function
148 |     A1 <- rbind(c(-2, 1, 1), c(4, -1, 3))
149 |     b1 <- c(1, 3)
150 |     a <- c(2, 2, 3)
151 |     simplex(a = a, A1 = A1, b1 = b1, maxi = TRUE)
152 |     detach(package:boot)
153 | ```
154 | 
155 | ## Example 14.7 (Solving the Morra game)
156 | 
157 | ```{r }
158 |     solve.game <- function(A) {
159 |         #solve the two player zero-sum game by simplex method
160 |         #optimize for player 1, then player 2
161 |         #maximize v subject to ...
162 |         #let x strategies 1:m, and put v as extra variable
163 |         #A1, the <= constraints
164 |         #
165 |         min.A <- min(A)
166 |         A <- A - min.A   #so that v >= 0
167 |         max.A <- max(A)
168 |         A <- A / max(A)
169 |         m <- nrow(A)
170 |         n <- ncol(A)
171 |         it <- n^3
172 |         a <- c(rep(0, m), 1) #objective function
173 |         A1 <- -cbind(t(A), rep(-1, n)) #constraints <=
174 |         b1 <- rep(0, n)
175 |         A3 <- t(as.matrix(c(rep(1, m), 0))) #constraints sum(x)=1
176 |         b3 <- 1
177 |         sx <- simplex(a=a, A1=A1, b1=b1, A3=A3, b3=b3,
178 |                     maxi=TRUE, n.iter=it)
179 |         #the 'solution' is [x1,x2,...,xm | value of game]
180 |         #
181 |         #minimize v subject to ...
182 |         #let y strategies 1:n, with v as extra variable
183 |         a <- c(rep(0, n), 1) #objective function
184 |         A1 <- cbind(A, rep(-1, m)) #constraints <=
185 |         b1 <- rep(0, m)
186 |         A3 <- t(as.matrix(c(rep(1, n), 0))) #constraints sum(y)=1
187 |         b3 <- 1
188 |         sy <- simplex(a=a, A1=A1, b1=b1, A3=A3, b3=b3,
189 |                     maxi=FALSE, n.iter=it)
190 | 
191 |         soln <- list("A" = A * max.A + min.A,
192 |                     "x" = sx$soln[1:m],
193 |                     "y" = sy$soln[1:n],
194 |                     "v" = sx$soln[m+1] * max.A + min.A)
195 |         soln
196 |         }
197 | 
198 | 
199 |     #enter the payoff matrix
200 |     A <- matrix(c(  0,-2,-2,3,0,0,4,0,0,
201 |                     2,0,0,0,-3,-3,4,0,0,
202 |                     2,0,0,3,0,0,0,-4,-4,
203 |                     -3,0,-3,0,4,0,0,5,0,
204 |                     0,3,0,-4,0,-4,0,5,0,
205 |                     0,3,0,0,4,0,-5,0,-5,
206 |                     -4,-4,0,0,0,5,0,0,6,
207 |                     0,0,4,-5,-5,0,0,0,6,
208 |                     0,0,4,0,0,5,-6,-6,0), 9, 9)
209 | 
210 |     library(boot)  #needed for simplex function
211 | 
212 |     s <- solve.game(A)
213 |     round(cbind(s$x, s$y), 7)
214 | ```
215 | 
216 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chPerm.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 10: Permutation Tests
 14 | 
 15 | ## Example 10.1 (Permutation distribution of a statistic)
 16 | 
 17 | ```{r }
 18 |     attach(chickwts)
 19 |     x <- sort(weight[feed == "soybean"])
 20 |     y <- sort(weight[feed == "linseed"])
 21 |     detach(chickwts)
 22 |     
 23 |     R <- 999              #number of replicates
 24 |     z <- c(x, y)          #pooled sample
 25 |     K <- 1:26
 26 |     reps <- numeric(R)   #storage for replicates
 27 |     t0 <- t.test(x, y)$statistic
 28 | 
 29 |     for (i in 1:R) {
 30 |         #generate indices k for the first sample
 31 |         k <- sample(K, size = 14, replace = FALSE)
 32 |         x1 <- z[k]
 33 |         y1 <- z[-k]      #complement of x1
 34 |         reps[i] <- t.test(x1, y1)$statistic
 35 |         }
 36 |     p <- mean(c(t0, reps) >= t0)
 37 |     p
 38 | 
 39 |     hist(reps, main = "", freq = FALSE, xlab = "T (p = 0.202)",
 40 |         breaks = "scott")
 41 |     points(t0, 0, cex = 1, pch = 16)      #observed T
 42 | ```
 43 | 
 44 | ## Example 10.2 (Permutation distribution of the K-S statistic)    
 45 | 
 46 | ```{r }
 47 |     # continues Example 10.1
 48 |     R <- 999             #number of replicates
 49 |     z <- c(x, y)         #pooled sample
 50 |     K <- 1:26
 51 |     D <- numeric(R)      #storage for replicates
 52 |     options(warn = -1)
 53 |     D0 <- ks.test(x, y, exact = FALSE)$statistic
 54 |     for (i in 1:R) {
 55 |         #generate indices k for the first sample
 56 |         k <- sample(K, size = 14, replace = FALSE)
 57 |         x1 <- z[k]
 58 |         y1 <- z[-k]      #complement of x1
 59 |         D[i] <- ks.test(x1, y1, exact = FALSE)$statistic
 60 |         }
 61 |     p <- mean(c(D0, D) >= D0)
 62 |     options(warn = 0)
 63 |     p
 64 | 
 65 |     hist(D, main = "", freq = FALSE, xlab = "D (p = 0.46)",
 66 |         breaks = "scott")
 67 |     points(D0, 0, cex = 1, pch = 16)      #observed D
 68 | ```
 69 | 
 70 | ## Example 10.3 (Example 10.2, cont.)
 71 | 
 72 | ```{r }
 73 |     attach(chickwts)
 74 |     x <- sort(weight[feed == "sunflower"])
 75 |     y <- sort(weight[feed == "linseed"])
 76 |     detach(chickwts)
 77 |     
 78 |     summary(cbind(x, y))
 79 |     options(warn = -1)
 80 |     D0 <- ks.test(x, y, exact = FALSE)$statistic
 81 |     for (i in 1:R) {
 82 |         #generate indices k for the first sample
 83 |         k <- sample(K, size = 14, replace = FALSE)
 84 |         x1 <- z[k]
 85 |         y1 <- z[-k]      #complement of x1
 86 |         D[i] <- ks.test(x1, y1, exact = FALSE)$statistic
 87 |         }
 88 |     p <- mean(c(D0, D) >= D0)
 89 |     options(warn = 0)
 90 |     p
 91 | ```
 92 | 
 93 | ## Example 10.4 (Finding nearest neighbors)
 94 | ## using yaImpute::ann
 95 | 
 96 | ```{r }
 97 |     set.seed(439)  
 98 |     library(yaImpute)  #for ann function
 99 |     
100 |     #generate a small multivariate data set
101 |     x <- matrix(rnorm(12), 3, 4)
102 |     y <- matrix(rnorm(12), 3, 4)
103 |     z <- rbind(x, y)
104 |     k <- nrow(z)  #number of nearest neighbors desired
105 |     
106 |     ## Do an exact kd-tree search
107 |     kd.exact <- ann(ref=z, target=z,
108 |                     tree.type="kd", k=k, verbose=FALSE)
109 |     kd.exact$knnIndexDist[,1:k]           #NN indices
110 |     round(sqrt(kd.exact$knnIndexDist[,-(1:k)]),2)  #Euclidean distances
111 |     
112 |     ## Do an approximate kd-tree search
113 |     kd.approx <- ann(ref=z, target=z,
114 |                      tree.type="kd", k=k, eps=100, verbose=FALSE)
115 |     kd.approx$knnIndexDist[,1:k]           #NN indices
116 |     detach(package:yaImpute)
117 |     
118 |     
119 |     ### Example 10.5 (Nearest neighbor statistic)
120 |     ### using yaImpute::ann
121 |     
122 |     library(yaImpute)
123 |     attach(chickwts)
124 |     x <- weight[feed == "sunflower"]
125 |     y <- weight[feed == "linseed"]
126 |     z <- as.matrix(c(x, y))
127 |     detach(chickwts)
128 |     
129 |     k <- 4  #want first 3 nearest neighbors
130 |     NN <- ann(ref=z, target=z, tree.type="kd", k=k, verbose=FALSE)
131 |     idx <- NN$knnIndexDist[,1:k]
132 |     nn.idx <- idx[,-1]   #first NN is in column 2
133 |     
134 |     block1 <- nn.idx[1:12, ]
135 |     block2 <- nn.idx[13:24, ]
136 |     i1 <- sum(block1 < 12.5)
137 |     i2 <- sum(block2 > 12.5)
138 |     
139 |     c(i1, i2)
140 |     
141 |     detach(package:yaImpute)
142 | ```
143 | 
144 | ## Example 10.6 (Nearest neighbor test)
145 | ## using yaImpute::ann
146 | 
147 | ```{r }
148 |     library(boot)
149 |     #continues the previous example
150 |     
151 |     ## function to return the matrix of indices NN_j of nearest neighbors
152 |     NN.idx <- function(x, tree.type="kd", k=NROW(x)) {
153 |       x <- as.matrix(x)
154 |       k <- min(c(k+1, NROW(x)))
155 |       NN <- yaImpute::ann(ref=x, target=x, 
156 |                           tree.type="kd", k=k, verbose=FALSE)
157 |       idx <- NN$knnIndexDist[,1:k]
158 |       nn.idx <- idx[,-1]   #first NN is in column 2
159 |       row.names(nn.idx) <- idx[,1]
160 |       nn.idx
161 |     }
162 |     
163 |     ## function to compute the NN statistic T(n,3)
164 |     Tn3 <- function(z, ix=1:NROW(z), sizes) {
165 |       z <- as.matrix(z)
166 |       n1 <- sizes[1]
167 |       n2 <- sizes[2]
168 |       n <- n1 + n2
169 |       z <- as.matrix(z[ix, ])
170 |       nn.idx <- NN.idx(z, k=3)
171 |       block1 <- nn.idx[1:n1, ]
172 |       block2 <- nn.idx[(n1+1):n, ]
173 |       i1 <- sum(block1 < n1 + .5)
174 |       i2 <- sum(block2 > n1 + .5)
175 |       return((i1 + i2) / (3 * n))
176 |     }
177 |     
178 |     attach(chickwts)
179 |     x <- weight[feed == "sunflower"]
180 |     y <- weight[feed == "linseed"]
181 |     z <- c(x, y)
182 |     detach(chickwts)
183 |     
184 |     N <- c(NROW(x), NROW(y))
185 |     
186 |     boot.obj <- boot(data = z, statistic = Tn3,
187 |                      sim = "permutation", R = 999, sizes = N)
188 |     boot.obj
189 |     
190 |     tb <- c(boot.obj$t, boot.obj$t0)
191 |     mean(tb >= boot.obj$t0)
192 |     
193 |     hist(tb, freq=FALSE, main="",
194 |          xlab="replicates of T(n,3) statistic")
195 |     points(boot.obj$t0, 0, cex=1, pch=16)
196 | ```
197 | 
198 | ## Example 10.7 (Two-sample energy statistic)
199 | 
200 | ```{r }
201 |     edist.2 <- function(x, ix, sizes) {
202 |         # computes the e-statistic between 2 samples
203 |         # x:          Euclidean distances of pooled sample
204 |         # sizes:      vector of sample sizes
205 |         # ix:         a permutation of row indices of x
206 | 
207 |         dst <- x
208 |         n1 <- sizes[1]
209 |         n2 <- sizes[2]
210 |         ii <- ix[1:n1]
211 |         jj <- ix[(n1+1):(n1+n2)]
212 |         w <- n1 * n2 / (n1 + n2)
213 | 
214 |         # permutation applied to rows & cols of dist. matrix
215 |         m11 <- sum(dst[ii, ii]) / (n1 * n1)
216 |         m22 <- sum(dst[jj, jj]) / (n2 * n2)
217 |         m12 <- sum(dst[ii, jj]) / (n1 * n2)
218 |         e <- w * ((m12 + m12) - (m11 + m22))
219 |         return (e)
220 |     }
221 | 
222 |     d <- 3
223 |     a <- 2 / sqrt(d)
224 |     x <- matrix(rnorm(20 * d), nrow = 20, ncol = d)
225 |     y <- matrix(rnorm(10 * d, a, 1), nrow = 10, ncol = d)
226 |     z <- rbind(x, y)
227 |     dst <- as.matrix(dist(z))
228 | 
229 |     edist.2(dst, 1:30, sizes = c(20, 10))
230 | ```
231 | 
232 | ## Example 10.8 (Two-sample energy test)
233 | 
234 | ```{r }
235 |     library(boot)  #for boot function
236 |     dst <- as.matrix(dist(z))
237 |     N <- c(20, 10)
238 | 
239 |     boot.obj <- boot(data = dst, statistic = edist.2,
240 |         sim = "permutation", R = 999, sizes = N)
241 |     boot.obj
242 |     
243 |     #calculate the ASL    
244 |     e <- boot.obj$t0
245 |     tb <- c(e, boot.obj$t)
246 |     mean(tb >= e)
247 | 
248 |     hist(tb, main = "", breaks="scott", freq=FALSE,
249 |         xlab="Replicates of e")
250 |     points(e, 0, cex=1, pch=16)
251 | 
252 | 
253 |     #energy test applied under F=G
254 |     d <- 3
255 |     a <- 0
256 |     x <- matrix(rnorm(20 * d), nrow = 20, ncol = d)
257 |     y <- matrix(rnorm(10 * d, a, 1), nrow = 10, ncol = d)
258 |     z <- rbind(x, y)
259 |     dst <- as.matrix(dist(z))
260 | 
261 |     N <- c(20, 10)
262 |     dst <- as.matrix(dist(z))
263 |     boot.obj <- boot(data = dst, statistic = edist.2,
264 |         sim="permutation", R=999, sizes=N)
265 |     boot.obj
266 | 
267 |     #calculate the ASL    
268 |     e <- boot.obj$t0
269 |     E <- c(boot.obj$t, e)
270 |     mean(E >= e)
271 | 
272 |     hist(E, main = "", breaks="scott",
273 |         xlab="Replicates of e", freq=FALSE)
274 |     points(e, 0, cex=1, pch=16)
275 | ```
276 | 
277 | ## Example 10.9 (k-sample energy distances)
278 | 
279 | ```{r }
280 |     z <- iris[ , 1:4]
281 |     dst <- dist(z)
282 |     energy::edist(dst, sizes = c(50, 50, 50), distance = TRUE)
283 | ```
284 | 
285 | ## Example 10.10 (Distance Components (disco))
286 | 
287 | ```{r }
288 |     set.seed(413)
289 |     energy::disco(iris[ , 1:4], factors = iris$Species, R = 999)
290 | ```
291 | 
292 | ## Example 10.11 (Power Comparison)
293 | 
294 | ```{r }
295 |     # results of several simulations summarized in Table
296 | ```
297 | 
298 | ## Example 10.12 (Distance covariance statistic)
299 | 
300 | ```{r }
301 |     dCov <- function(x, y) {
302 |         x <- as.matrix(x)
303 |         y <- as.matrix(y)
304 |         n <- nrow(x)
305 |         m <- nrow(y)
306 |         if (n != m || n < 2) stop("Sample sizes must agree")
307 |         if (! (all(is.finite(c(x, y)))))
308 |             stop("Data contains missing or infinite values")
309 | 
310 |         Akl <- function(x) {
311 |             d <- as.matrix(dist(x))
312 |             m <- rowMeans(d)
313 |             M <- mean(d)
314 |             a <- sweep(d, 1, m)
315 |             b <- sweep(a, 2, m)
316 |             return(b + M)
317 |         }
318 |         A <- Akl(x)
319 |         B <- Akl(y)
320 |         dCov <- sqrt(mean(A * B))
321 |         dCov
322 |     }
323 | 
324 |     z <- as.matrix(iris[1:50, 1:4])
325 |     x <- z[ , 1:2]
326 |     y <- z[ , 3:4]
327 |     # compute the observed statistic
328 |     dCov(x, y)
329 | ```
330 | 
331 | ## Example 10.13 (Distance correlation statistic)
332 | 
333 | ```{r }
334 |     DCOR <- function(x, y) {
335 |         x <- as.matrix(x)
336 |         y <- as.matrix(y)
337 |         n <- nrow(x)
338 |         m <- nrow(y)
339 |         if (n != m || n < 2) stop("Sample sizes must agree")
340 |         if (! (all(is.finite(c(x, y)))))
341 |             stop("Data contains missing or infinite values")
342 |         Akl <- function(x) {
343 |             d <- as.matrix(dist(x))
344 |             m <- rowMeans(d)
345 |             M <- mean(d)
346 |             a <- sweep(d, 1, m)
347 |             b <- sweep(a, 2, m)
348 |             return(b + M)
349 |         }
350 |         A <- Akl(x)
351 |         B <- Akl(y)
352 |         dCov <- sqrt(mean(A * B))
353 |         dVarX <- sqrt(mean(A * A))
354 |         dVarY <- sqrt(mean(B * B))
355 |         dCor <- sqrt(dCov / sqrt(dVarX * dVarY))
356 |         list(dCov=dCov, dCor=dCor, dVarX=dVarX, dVarY=dVarY)
357 |     }
358 | 
359 |     z <- as.matrix(iris[1:50, 1:4])
360 |     x <- z[ , 1:2]
361 |     y <- z[ , 3:4]
362 | 
363 |     DCOR(x, y)
364 | ```
365 | 
366 | ## Example 10.14 (Distance covariance test)
367 | 
368 | ```{r }
369 |     ndCov2 <- function(z, ix, dims) {
370 |         p <- dims[1]
371 |         q1 <- p + 1
372 |         d <- p + dims[2]
373 |         x <- z[ , 1:p]     #leave x as is
374 |         y <- z[ix, q1:d]   #permute rows of y
375 |         return(nrow(z) * dCov(x, y)^2)
376 |     }
377 | 
378 |     library(boot)
379 |     z <- as.matrix(iris[1:50, 1:4])
380 |     boot.obj <- boot(data = z, statistic = ndCov2, R = 999,
381 |         sim = "permutation", dims = c(2, 2))
382 | 
383 |     tb <- c(boot.obj$t0, boot.obj$t)
384 |     hist(tb, nclass="scott", xlab="", main="",
385 |             freq=FALSE)
386 |     points(boot.obj$t0, 0, cex=1, pch=16)
387 | 
388 |     mean(tb >= boot.obj$t0)
389 |     boot.obj
390 | ```
391 | 
392 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chRandomProcess.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | 
 14 | # Chapter 4: Generating Random Processes
 15 | 
 16 | ## Example 4.1 (Poisson process)
 17 | 
 18 | ```{r }
 19 |     lambda <- 2
 20 |     t0 <- 3
 21 |     Tn <- rexp(100, lambda)       #interarrival times
 22 |     Sn <- cumsum(Tn)              #arrival times
 23 |     n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 24 | ```
 25 | 
 26 | ## Example 4.2 (Poisson process, cont.)
 27 | 
 28 | ```{r }
 29 |     lambda <- 2
 30 |     t0 <- 3
 31 |     upper <- 100
 32 |     pp <- numeric(10000)
 33 |     for (i in 1:10000) {
 34 |         N <- rpois(1, lambda * upper)
 35 |         Un <- runif(N, 0, upper)      #unordered arrival times
 36 |         Sn <- sort(Un)                #arrival times
 37 |         n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 38 |         pp[i] <- n - 1                #arrivals in [0, t0]
 39 |         }
 40 | 
 41 |     #alternately, the loop can be replaced by replicate function
 42 |     pp <- replicate(10000, expr = {
 43 |         N <- rpois(1, lambda * upper)
 44 |         Un <- runif(N, 0, upper)      #unordered arrival times
 45 |         Sn <- sort(Un)                #arrival times
 46 |         n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 47 |         n - 1  })                     #arrivals in [0, t0]
 48 | 
 49 |     c(mean(pp), var(pp))
 50 | ```
 51 | 
 52 | ## Example 4.3 (Nonhomogeneous Poisson process)
 53 | 
 54 | ```{r }
 55 |     lambda <- 3
 56 |     upper <- 100
 57 |     N <- rpois(1, lambda * upper)
 58 |     Tn <- rexp(N, lambda)
 59 |     Sn <- cumsum(Tn)
 60 |     Un <- runif(N)
 61 |     keep <- (Un <= cos(Sn)^2)    #indicator, as logical vector
 62 |     Sn[keep]
 63 | 
 64 |     round(Sn[keep], 4)
 65 | ```
 66 | 
 67 | ## Example 4.4 (Renewal process)
 68 | 
 69 | ```{r }
 70 |     t0 <- 5
 71 |     Tn <- rgeom(100, prob = .2)   #interarrival times
 72 |     Sn <- cumsum(Tn)              #arrival times
 73 |     n <- min(which(Sn > t0))      #arrivals+1 in [0, t0]
 74 | 
 75 |     Nt0 <- replicate(1000, expr = {
 76 |         Sn <- cumsum(rgeom(100, prob = .2))
 77 |         min(which(Sn > t0)) - 1
 78 |         })
 79 |     table(Nt0)/1000
 80 |     Nt0
 81 | 
 82 |     t0 <- seq(0.1, 30, .1)
 83 |     mt <- numeric(length(t0))
 84 | 
 85 |     for (i in 1:length(t0)) {
 86 |         mt[i] <- mean(replicate(1000,
 87 |         {
 88 |         Sn <- cumsum(rgeom(100, prob = .2))
 89 |         min(which(Sn > t0[i])) - 1
 90 |         }))
 91 |     }
 92 |     plot(t0, mt, type = "l", xlab = "t", ylab = "mean")
 93 |     abline(0, .25)
 94 | ```
 95 | 
 96 | ## Example 4.5 (Symmetric random walk)
 97 | 
 98 | ```{r }
 99 |     n <- 400
100 |     incr <- sample(c(-1, 1), size = n, replace = TRUE)
101 |     S <- as.integer(c(0, cumsum(incr)))
102 |     plot(0:n, S, type = "l", main = "", xlab = "i")
103 | ```
104 | 
105 | ## Example 4.6 (Generator for the time until return to origin)
106 | 
107 | ```{r }
108 |     set.seed(12345)
109 |     
110 |     #compute the probabilities directly
111 |     n <- 1:10000
112 |     p2n <- exp(lgamma(2*n-1)
113 |               - log(n) - (2*n-1)*log(2) - 2*lgamma(n))
114 |     #or compute using dbinom
115 |     P2n <- (.5/n) * dbinom(n-1, size = 2*n-2, prob = 0.5)
116 |     pP2n <- cumsum(P2n)
117 | 
118 |     #given n compute the time of the last return to 0 in (0,n]
119 |     n <- 200
120 |     sumT <- 0
121 |     while (sumT <= n) {
122 |         u <- runif(1)
123 |         s <- sum(u > pP2n)
124 |         if (s == length(pP2n))
125 |             warning("T is truncated")
126 |         Tj <- 2 * (1 + s)
127 |         #print(c(Tj, sumT))
128 |         sumT <- sumT + Tj
129 |         }
130 |     sumT - Tj
131 | ```
132 | 
133 | ## Example 4.7 (Brownian motion)
134 | 
135 | ```{r }
136 |     simBM <- function(n, T) {
137 |       times <- seq(0, T, length = n+1)
138 |       z <- rnorm(n)
139 |       w <- rep(0, n)
140 |       s <- sqrt(diff(times))
141 |       for (k in 2:n) {
142 |         w[k] <- w[k-1] + s[k] * z[k]
143 |       }
144 |       return (list(w=w, t=times))
145 |     }
146 |     
147 |     set.seed(1)
148 |     n <- 200
149 |     x1 <- simBM(n, 1)
150 |     x2 <- simBM(n, 1)
151 |     x3 <- simBM(n, 1)
152 |     r <- range(c(x1$w, x2$w, x3$w))
153 |     plot(x1$w, type="l", main="", xlab="t", ylab="W", ylim=r)
154 |     lines(x2$w, lty=2)
155 |     lines(x3$w, lty=3)
156 |     
157 |     interpBM <- function(w, t0, times) {
158 |       k1 <- sum(times < t0)
159 |       k <- k1 + 1
160 |       b <- (t0 - times[k1]) / (times[k] - times[k1])
161 |       return (w[k1] + b * (w[k] - w[k1]))
162 |     }
163 |     
164 |     plot(x1$t[1:10], x1$w[1:10], type="b", main="", xlab="t", ylab="W")
165 |     tmids <- x1$t + 0.0025
166 |     for (i in 1:10) {
167 |       w <- interpBM(x1$w, tmids[i], x1$t)
168 |       points(tmids[i], w, pch=2)
169 |     }
170 |     
171 |     legend("topleft", c("Generated W", "Interpolated W"), pch=c(1,2), lty=1, bty="n")
172 | ```
173 | 
174 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-chVis.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Code for SCR 2e Examples"
  3 | subtitle: "M. L. Rizzo (2019). *Statistical Computing with R, 2nd ed.*, Chapman & Hall/CRC. https://github.com/mariarizzo/SCR2e"
  4 | output:
  5 |   html_document:
  6 |     code_folding: show
  7 |     theme: lumen
  8 |     toc: yes
  9 |     toc_float: yes
 10 |     df_print: paged
 11 | ---
 12 | 
 13 | # Chapter 5: Visualization of Multivariate Data
 14 | 
 15 | ## Example 5.1 (Scatterplot matrix)
 16 | 
 17 | ```{r }
 18 |     data(iris)
 19 |     #virginica data in first 4 columns of the last 50 obs.
 20 |     
 21 |     # not shown in text
 22 |     pairs(iris[101:150, 1:4])
 23 | 
 24 |     panel.d <- function(x, ...) {
 25 |         usr <- par("usr")
 26 |         on.exit(par(usr=usr))
 27 |         par(usr = c(usr[1:2], 0, .5))
 28 |         lines(density(x))
 29 |      }
 30 |     
 31 |     # Fig. 5.1
 32 |     x <- scale(iris[101:150, 1:4])
 33 |     r <- range(x)
 34 |     pairs(x, diag.panel = panel.d, xlim = r, ylim = r)
 35 | 
 36 |     library(lattice)
 37 |     splom(iris[101:150, 1:4])    #plot 1
 38 | 
 39 |     #for all 3 at once, in color, plot 2
 40 |     splom(iris[,1:4], groups = iris$Species)
 41 | 
 42 |     # Fig. 5.2
 43 |     #for all 3 at once, black and white, plot 3
 44 |     splom(~iris[1:4], groups = Species, data = iris,
 45 |         col = 1, pch = c(1, 2, 3),  cex = c(.5,.5,.5))
 46 | ```
 47 | 
 48 | ## Example 5.2 (Correlation plots: Decathlon data)
 49 | 
 50 | ```{r }
 51 |     library(FactoMineR)  #decathlon data
 52 |     library(corrplot)
 53 |     data("decathlon")
 54 |     str(decathlon)
 55 |     
 56 |     corrMat <- cor(decathlon[, 1:10])
 57 |     corrplot(corrMat, type="upper", tl.col="black", tl.srt=45)
 58 |     corrplot(corrMat, type = "upper", method = "square",
 59 |          addCoef.col = "black", diag=FALSE)
 60 | ```
 61 | 
 62 | ## Example 5.3 (Plot bivariate normal density)
 63 | 
 64 | ```{r }
 65 |     #the standard BVN density
 66 |     f <- function(x,y) {
 67 |         z <- (1/(2*pi)) * exp(-.5 * (x^2 + y^2))
 68 |         }
 69 | 
 70 |     y <- x <- seq(-3, 3, length= 50)
 71 |     z <- outer(x, y, f)   #compute density for all (x,y)
 72 | 
 73 |     persp(x, y, z)        #the default plot
 74 | 
 75 |     persp(x, y, z, theta = 45, phi = 30, expand = 0.6,
 76 |           ltheta = 120, shade = 0.75, ticktype = "detailed",
 77 |           xlab = "X", ylab = "Y", zlab = "f(x, y)")
 78 | ```
 79 | 
 80 | ## Example 5.4 (Add elements to perspective plot)
 81 | 
 82 | ```{r }
 83 |      #store viewing transformation in M
 84 |      persp(x, y, z, theta = 45, phi = 30,
 85 |            expand = .4, box = FALSE) -> M
 86 | 
 87 |      #add some points along a circle
 88 |      a <- seq(-pi, pi, pi/16)
 89 |      newpts <- cbind(cos(a), sin(a)) * 2
 90 |      newpts <- cbind(newpts, 0, 1)  #z=0, t=1
 91 |      N <- newpts %*% M
 92 |      points(N[,1]/N[,4], N[,2]/N[,4], col=2)
 93 | 
 94 |      #add lines
 95 |      x2 <- seq(-3, 3, .1)
 96 |      y2 <- -x2^2 / 3
 97 |      z2 <- dnorm(x2) * dnorm(y2)
 98 |      N <- cbind(x2, y2, z2, 1) %*% M
 99 |      lines(N[,1]/N[,4], N[,2]/N[,4], col=4)
100 | 
101 |      #add text
102 |      x3 <- c(0, 3.1)
103 |      y3 <- c(0, -3.1)
104 |      z3 <- dnorm(x3) * dnorm(y3) * 1.1
105 |      N <- cbind(x3, y3, z3, 1) %*% M
106 |      text(N[1,1]/N[1,4], N[1,2]/N[1,4], "f(x,y)")
107 |      text(N[2,1]/N[2,4], N[2,2]/N[2,4], bquote(y==-x^2/3))
108 | ```
109 | 
110 | ## Example 5.5 (Surface plot using wireframe(lattice))
111 | 
112 | ```{r }
113 |     library(lattice)
114 |     x <- y <- seq(-3, 3, length= 50)
115 | 
116 |     xy <- expand.grid(x, y)
117 |     z <- (1/(2*pi)) * exp(-.5 * (xy[,1]^2 + xy[,2]^2))
118 |     wireframe(z ~ xy[,1] * xy[,2])
119 | ```
120 | 
121 | ## Example 5.6 (3D scatterplot)
122 | 
123 | ```{r }
124 |     library(lattice)
125 |     attach(iris)
126 |     #basic 3 color plot with arrows along axes
127 |     print(cloud(Petal.Length ~ Sepal.Length * Sepal.Width,
128 |           data=iris, groups=Species))
129 | 
130 |     print(cloud(Sepal.Length ~ Petal.Length * Petal.Width,
131 |         data = iris, groups = Species, main = "1", pch=1:3,
132 |         scales = list(draw = FALSE), zlab = "SL",
133 |         screen = list(z = 30, x = -75, y = 0)),
134 |         split = c(1, 1, 2, 2), more = TRUE)
135 | 
136 |     print(cloud(Sepal.Width ~ Petal.Length * Petal.Width,
137 |         data = iris, groups = Species, main = "2", pch=1:3,
138 |         scales = list(draw = FALSE), zlab = "SW",
139 |         screen = list(z = 30, x = -75, y = 0)),
140 |         split = c(2, 1, 2, 2), more = TRUE)
141 | 
142 |     print(cloud(Petal.Length ~ Sepal.Length * Sepal.Width,
143 |         data = iris, groups = Species, main = "3", pch=1:3,
144 |         scales = list(draw = FALSE), zlab = "PL",
145 |         screen = list(z = 30, x = -55, y = 0)),
146 |         split = c(1, 2, 2, 2), more = TRUE)
147 | 
148 |     print(cloud(Petal.Width ~ Sepal.Length * Sepal.Width,
149 |         data = iris, groups = Species, main = "4", pch=1:3,
150 |         scales = list(draw = FALSE), zlab = "PW",
151 |         screen = list(z = 30, x = -55, y = 0)),
152 |         split = c(2, 2, 2, 2))
153 |     detach(iris)
154 | ```
155 | 
156 | ## Example 5.7 (Contour plot)
157 | 
158 | ```{r }
159 |      #contour plot with labels
160 |      contour(volcano, asp = 1, labcex = 1)
161 | 
162 |      #another version from lattice package
163 |      library(lattice)
164 |      contourplot(volcano) #similar to above
165 | ```
166 | 
167 | ## Example 5.8 (Filled contour plots)
168 | 
169 | ```{r }
170 |      image(volcano, col = terrain.colors(100), axes = FALSE)
171 |      contour(volcano, levels = seq(100,200,by = 10), add = TRUE)
172 | 
173 |      filled.contour(volcano, color = terrain.colors, asp = 1)
174 |      levelplot(volcano, scales = list(draw = FALSE),
175 |                xlab = "", ylab = "")
176 | ```
177 | 
178 | ## Example 5.9 (2D histogram)
179 | 
180 | ```{r }
181 |     library(hexbin)
182 |     x <- matrix(rnorm(4000), 2000, 2)
183 |     plot(hexbin(x[,1], x[,2]))
184 | 
185 |     # ggplot version 
186 |     library(ggplot2)
187 |     x <- data.frame(x)
188 |     ggplot(x, aes(x[,1], x[,2])) + geom_hex()
189 | ```
190 | 
191 | ## Example 5.10 (Andrews curves)
192 | 
193 | ```{r }
194 |     library(DAAG)
195 |     attach(leafshape17)
196 | 
197 |     f <- function(a, v) {
198 |         #Andrews curve f(a) for a data vector v in R^3
199 |         v[1]/sqrt(2) + v[2]*sin(a) + v[3]*cos(a)
200 |     }
201 | 
202 | 
203 |     #scale data to range [-1, 1]
204 |     x <- cbind(bladelen, petiole, bladewid)
205 |     n <- nrow(x)
206 |     mins <- apply(x, 2, min)  #column minimums
207 |     maxs <- apply(x, 2, max)  #column maximums
208 |     r <- maxs - mins          #column ranges
209 |     y <- sweep(x, 2, mins)    #subtract column mins
210 |     y <- sweep(y, 2, r, "/")  #divide by range
211 |     x <- 2 * y - 1            #now has range [-1, 1]
212 | 
213 |     #set up plot window, but plot nothing yet
214 |     plot(0, 0, xlim = c(-pi, pi), ylim = c(-3,3),
215 |         xlab = "t", ylab = "Andrews Curves",
216 |         main = "", type = "n")
217 | 
218 |     #now add the Andrews curves for each observation
219 |     #line type corresponds to leaf architecture
220 |     #0=orthotropic, 1=plagiotropic
221 |     a <- seq(-pi, pi, len=101)
222 |     dim(a) <- length(a)
223 |     for (i in 1:n) {
224 |         g <- arch[i] + 1
225 |         y <- apply(a, MARGIN = 1, FUN = f, v = x[i,])
226 |         lines(a, y, lty = g)
227 |     }
228 |     legend(3, c("Orthotropic", "Plagiotropic"), lty = 1:2)
229 |     detach(leafshape17)
230 | ```
231 | 
232 | ## Example 5.11 (Parallel coordinates)
233 | 
234 | ```{r }
235 |     library(MASS)
236 |     library(lattice)
237 |     #trellis.device(color = FALSE) #black and white display
238 |     x <- crabs[seq(5, 200, 5), ]  #get every fifth obs.
239 |     parallelplot(~x[4:8] | sp*sex, x)
240 |     
241 |     #trellis.device(color = FALSE)    #black and white display
242 |     x <- crabs[seq(5, 200, 5), ]     #get every fifth obs.
243 |     a <- x$CW * x$CL                 #area of carapace
244 |     x[4:8] <- x[4:8] / sqrt(a)       #adjust for size
245 |     parallelplot(~x[4:8] | sp*sex, x)
246 | ```
247 | 
248 | ## Example 5.12 (Segment plot)
249 | 
250 | ```{r }
251 |     #segment plot
252 |     x <- MASS::crabs[seq(5, 200, 5), ]         #get every fifth obs.
253 |     x <- subset(x, sex == "M")           #keep just the males
254 |     a <- x$CW * x$CL                     #area of carapace
255 |     x[4:8] <- x[4:8] / sqrt(a)           #adjust for size
256 |     
257 |     #use default color palette or other colors
258 |     #palette(gray(seq(.4, .95, len = 5))) #use gray scale
259 |     palette(rainbow(6))                 #or use color
260 |     stars(x[4:8], draw.segments = TRUE,
261 |           labels =levels(x$sp), nrow = 4,
262 |           ylim = c(-2,10), key.loc = c(3,-1))
263 |     
264 |     #after viewing, restore the default colors
265 |     palette("default")
266 | ```
267 | 
268 | ## Example 5.13 (PCA for open and closed book exams)
269 | 
270 | ```{r }
271 |     library(bootstrap)
272 |     str(scor)
273 |     pairs(scor)   
274 |     cor(scor)
275 |     
276 |     n <- nrow(scor)
277 |     x <- scale(scor)  #center and scale
278 |     s <- cov(x)
279 |     e <- eigen(s)
280 |     lam <- e$values   #vector of eigenvalues
281 |     P <- e$vectors    #matrix of eigenvectors
282 |     
283 |     plot(lam, type = "b", xlab = "eigenvalues", main = "")
284 |     barplot(lam, xlab = "eigenvalues")
285 |     
286 |     tab <- rbind(lam / sum(lam), cumsum(lam) / sum(lam))
287 |     tab
288 |     
289 |     z <- x %*% P
290 |     dim(z)
291 |     head(z)
292 |     
293 |     pc <- prcomp(scor, center = TRUE, scale = TRUE)
294 |     summary(pc)
295 |     
296 |     df <- scor[1:5, ]
297 |     predict(pc, newdata = df)  #same as z above
298 |     
299 |     head(x %*% pc$rotation, 5)
300 |     head(pc$rotation)
301 |     head(P)
302 | ```
303 | 
304 | ## Example 5.14 (PC Biplot)
305 | 
306 | ```{r }
307 |     ## plot scor data in the (PC1, PC2) coordinate system
308 |     biplot(pc, pc.biplot = TRUE) 
309 |     round(cor(x, z), 3)
310 | ```
311 | 
312 | ## Example 5.15 (PCA: Decathlon data)
313 | 
314 | ```{r }
315 |     library(FactoMineR)
316 |     data(decathlon)
317 |     pc <- princomp(decathlon[,1:10], cor = TRUE, scores = TRUE)
318 |     plot(pc)     # screeplot
319 |     biplot(pc)
320 |     summary(pc)    
321 | ```
322 | 
323 | 


--------------------------------------------------------------------------------
/examples-Rmd/SCR2e-examples-functions.R:
--------------------------------------------------------------------------------
  1 | rlogarithmic <- function(n, theta) {
  2 |   # generate random sample from Logarithmic(theta)
  3 |   stopifnot(all(theta > 0 & theta < 1))
  4 |   th <- rep(theta, length=n)
  5 |   u <- runif(n)
  6 |   v <- runif(n)
  7 |   x <- floor(1 + log(v) / log(1 - (1 - th)^u))
  8 |   return(x)
  9 | }
 10 | 
 11 | # generate MVN by spectral decomposition method
 12 | 
 13 | rmvn.eigen <-
 14 |   function(n, mu, Sigma) {
 15 |     # generate n random vectors from MVN(mu, Sigma)
 16 |     # dimension is inferred from mu and Sigma
 17 |     d <- length(mu)
 18 |     ev <- eigen(Sigma, symmetric = TRUE)
 19 |     lambda <- ev$values
 20 |     V <- ev$vectors
 21 |     R <- V %*% diag(sqrt(lambda)) %*% t(V)
 22 |     Z <- matrix(rnorm(n*d), nrow = n, ncol = d)
 23 |     X <- Z %*% R + matrix(mu, n, d, byrow = TRUE)
 24 |     X
 25 |   }
 26 | 
 27 | 
 28 | # generate MVN by singular value decomposition method
 29 | 
 30 | rmvn.svd <-
 31 |   function(n, mu, Sigma) {
 32 |     # generate n random vectors from MVN(mu, Sigma)
 33 |     # dimension is inferred from mu and Sigma
 34 |     d <- length(mu)
 35 |     S <- svd(Sigma)
 36 |     R <- S$u %*% diag(sqrt(S$d)) %*% t(S$v) #sq. root Sigma
 37 |     Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
 38 |     X <- Z %*% R + matrix(mu, n, d, byrow=TRUE)
 39 |     X
 40 |   }
 41 | 
 42 | 
 43 | # generate MVN by Choleski factorization method
 44 | 
 45 | rmvn.Choleski <-
 46 |   function(n, mu, Sigma) {
 47 |     # generate n random vectors from MVN(mu, Sigma)
 48 |     # dimension is inferred from mu and Sigma
 49 |     d <- length(mu)
 50 |     Q <- chol(Sigma) # Choleski factorization of Sigma
 51 |     Z <- matrix(rnorm(n*d), nrow=n, ncol=d)
 52 |     X <- Z %*% Q + matrix(mu, n, d, byrow=TRUE)
 53 |     X
 54 |   }
 55 | 
 56 | # Bootstrap t confidence interval
 57 | 
 58 | boot.t.ci <-
 59 |   function(x, B = 500, R = 100, level = .95, statistic){
 60 |     #compute the bootstrap t CI
 61 |     x <- as.matrix(x);  n <- nrow(x)
 62 |     stat <- numeric(B); se <- numeric(B)
 63 |     
 64 |     boot.se <- function(x, R, f) {
 65 |       #local function to compute the bootstrap
 66 |       #estimate of standard error for statistic f(x)
 67 |       x <- as.matrix(x); m <- nrow(x)
 68 |       th <- replicate(R, expr = {
 69 |         i <- sample(1:m, size = m, replace = TRUE)
 70 |         f(x[i, ])
 71 |       })
 72 |       return(sd(th))
 73 |     }
 74 |     
 75 |     for (b in 1:B) {
 76 |       j <- sample(1:n, size = n, replace = TRUE)
 77 |       y <- x[j, ]
 78 |       stat[b] <- statistic(y)
 79 |       se[b] <- boot.se(y, R = R, f = statistic)
 80 |     }
 81 |     stat0 <- statistic(x)
 82 |     t.stats <- (stat - stat0) / se
 83 |     se0 <- sd(stat)
 84 |     alpha <- 1 - level
 85 |     Qt <- quantile(t.stats, c(alpha/2, 1-alpha/2), type = 1)
 86 |     names(Qt) <- rev(names(Qt))
 87 |     CI <- rev(stat0 - Qt * se0)
 88 |   }
 89 | 
 90 | 
 91 | # BCa bootstrap confidence interval
 92 | 
 93 | boot.BCa <-
 94 |   function(x, th0, th, stat, conf = .95) {
 95 |     # bootstrap with BCa bootstrap confidence interval
 96 |     # th0 is the observed statistic
 97 |     # th is the vector of bootstrap replicates
 98 |     # stat is the function to compute the statistic
 99 |     
100 |     x <- as.matrix(x)
101 |     n <- nrow(x) #observations in rows
102 |     N <- 1:n
103 |     alpha <- (1 + c(-conf, conf))/2
104 |     zalpha <- qnorm(alpha)
105 |     
106 |     # the bias correction factor
107 |     z0 <- qnorm(sum(th < th0) / length(th))
108 |     
109 |     # the acceleration factor (jackknife est.)
110 |     th.jack <- numeric(n)
111 |     for (i in 1:n) {
112 |       J <- N[1:(n-1)]
113 |       th.jack[i] <- stat(x[-i, ], J)
114 |     }
115 |     L <- mean(th.jack) - th.jack
116 |     a <- sum(L^3)/(6 * sum(L^2)^1.5)
117 |     
118 |     # BCa conf. limits
119 |     adj.alpha <- pnorm(z0 + (z0+zalpha)/(1-a*(z0+zalpha)))
120 |     limits <- quantile(th, adj.alpha, type=6)
121 |     return(list("est"=th0, "BCa"=limits))
122 |   }
123 | 
124 | # Gelman-Rubin statistic for MCMC convergence
125 | 
126 | Gelman.Rubin <- function(psi) {
127 |   # psi[i,j] is the statistic psi(X[i,1:j])
128 |   # for chain in i-th row of X
129 |   psi <- as.matrix(psi)
130 |   n <- ncol(psi)
131 |   k <- nrow(psi)
132 |   
133 |   psi.means <- rowMeans(psi)     #row means
134 |   B <- n * var(psi.means)        #between variance est.
135 |   psi.w <- apply(psi, 1, "var")  #within variances
136 |   W <- mean(psi.w)               #within est.
137 |   v.hat <- W*(n-1)/n + (B/n)     #upper variance est.
138 |   r.hat <- v.hat / W             #G-R statistic
139 |   return(r.hat)
140 | }
141 | 
142 | # binning bivariate data
143 | 
144 | bin2d <-
145 |   function(x, breaks1 = "Sturges", breaks2 = "Sturges"){
146 |     # Data matrix x is n by 2
147 |     # breaks1, breaks2: any valid breaks for hist function
148 |     # using same defaults as hist
149 |     histg1 <- hist(x[,1], breaks = breaks1, plot = FALSE)
150 |     histg2 <- hist(x[,2], breaks = breaks2, plot = FALSE)
151 |     brx <- histg1$breaks
152 |     bry <- histg2$breaks
153 |     
154 |     # bin frequencies
155 |     freq <- table(cut(x[,1], brx),  cut(x[,2], bry))
156 |     
157 |     return(list(call = match.call(), freq = freq,
158 |                 breaks1 = brx, breaks2 = bry,
159 |                 mids1 = histg1$mids, mids2 = histg2$mids))
160 |   }
161 | 


--------------------------------------------------------------------------------
/examples-Rmd/readme.md:
--------------------------------------------------------------------------------
1 | Code for the textbook examples in "Statistical Computing with R, 2nd ed." in R Markdown format.
2 | 


--------------------------------------------------------------------------------
/examples-cpp/printme.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | void print_me(IntegerVector x) {
 6 |   // several ways to print in Rcpp
 7 |   Rprintf("my vector is (%d, %d, %d)\n",
 8 |           x[0], x[1], x[2]);
 9 |   Rcpp::Rcout << "my vector is " << x << std::endl;
10 |   Rf_PrintValue(x);
11 | }
12 | 


--------------------------------------------------------------------------------
/examples-cpp/readme.md:
--------------------------------------------------------------------------------
1 | C++ example code
2 | 


--------------------------------------------------------------------------------