├── .gitattributes
├── .gitignore
├── Chapter 10
    └── object-oriented-programming.R
├── Chapter 11
    └── working-with-databases.R
├── Chapter 12
    └── data-manipulation.R
├── Chapter 13
    ├── high-performance-computing.txt
    ├── my_cumsum1.R
    ├── rcpp-algo1.cpp
    ├── rcpp-demo.cpp
    ├── rcpp-diff-openmp.cpp
    ├── rcpp-diff.cpp
    └── rcpp-parallel.cpp
├── Chapter 14
    ├── data
    │   ├── new-products.html
    │   ├── products.html
    │   ├── simple-page.html
    │   ├── simple-products.html
    │   └── single-table.html
    └── web-scrapping.R
├── Chapter 15
    └── boosting-productivity.R
├── Chapter 2
    └── basic-objects.R
├── Chapter 3
    └── managing-your-workspace.R
├── Chapter 4
    └── basic-expressions.R
├── Chapter 5
    └── working-with-basic-objects.R
├── Chapter 6
    └── working-with-strings.R
├── Chapter 7
    └── working-with-data.R
├── Chapter 8
    └── inside-r.R
├── Chapter 9
    └── meta-programming.R
├── License
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 


--------------------------------------------------------------------------------
/Chapter 10/object-oriented-programming.R:
--------------------------------------------------------------------------------
  1 | vec1 <- c(1, 2, 3)
  2 | typeof(vec1)
  3 | class(vec1)
  4 | 
  5 | 
  6 | data1 <- data.frame(x = 1:3, y = rnorm(3))
  7 | typeof(data1)
  8 | class(data1)
  9 | 
 10 | 
 11 | head
 12 | 
 13 | 
 14 | num_vec <- c(1, 2, 3, 4, 5)
 15 | data_frame <- data.frame(x = 1:5, y = rnorm(5))
 16 | 
 17 | 
 18 | head(num_vec, 3)
 19 | 
 20 | 
 21 | head(data_frame, 3)
 22 | 
 23 | 
 24 | simple_head <- function(x, n) {
 25 |   x[1:n]
 26 | }
 27 | 
 28 | 
 29 | simple_head(num_vec, 3)
 30 | 
 31 | 
 32 | simple_head(data_frame, 3)
 33 | 
 34 | 
 35 | simple_head2 <- function(x, n) {
 36 |   if (is.data.frame(x)) {
 37 |     x[1:n,]
 38 |   } else {
 39 |     x[1:n]
 40 |   }
 41 | }
 42 | 
 43 | 
 44 | simple_head2(num_vec, 3)
 45 | simple_head2(data_frame, 3)
 46 | 
 47 | 
 48 | methods("head")
 49 | 
 50 | 
 51 | lm1 <- lm(mpg ~ cyl + vs, data = mtcars)
 52 | 
 53 | 
 54 | typeof(lm1)
 55 | class(lm1)
 56 | 
 57 | 
 58 | lm1
 59 | 
 60 | 
 61 | print(lm1)
 62 | 
 63 | 
 64 | identical(getS3method("print", "lm"), stats:::print.lm)
 65 | 
 66 | 
 67 | length(methods("print"))
 68 | 
 69 | 
 70 | summary(lm1)
 71 | 
 72 | 
 73 | lm1summary <- summary(lm1)
 74 | typeof(lm1summary)
 75 | class(lm1summary)
 76 | 
 77 | 
 78 | names(lm1summary)
 79 | 
 80 | 
 81 | coef(lm1)
 82 | 
 83 | 
 84 | coef(lm1summary)
 85 | 
 86 | 
 87 | oldpar <- par(mfrow = c(2, 2))
 88 | plot(lm1)
 89 | par(oldpar)
 90 | 
 91 | 
 92 | predict(lm1, data.frame(cyl = c(6, 8), vs = c(1, 1)))
 93 | 
 94 | 
 95 | plot(mtcars$mpg, fitted(lm1))
 96 | 
 97 | 
 98 | plot(density(residuals(lm1)), 
 99 |   main = "Density of lm1 residuals")
100 | 
101 | 
102 | install.packages("rpart")
103 | 
104 | 
105 | library(rpart)
106 | tree_model <- rpart(mpg ~ cyl + vs, data = mtcars)
107 | 
108 | 
109 | typeof(tree_model)
110 | class(tree_model)
111 | 
112 | 
113 | print(tree_model)
114 | 
115 | 
116 | summary(tree_model)
117 | 
118 | 
119 | oldpar <- par(xpd = NA)
120 | plot(tree_model)
121 | text(tree_model, use.n = TRUE)
122 | par(oldpar)
123 | 
124 | 
125 | predict(tree_model, data.frame(cyl = c(6, 8), vs = c(1, 1)))
126 | 
127 | 
128 | coef(tree_model)
129 | 
130 | 
131 | generic_head <- function(x, n) 
132 |   UseMethod("generic_head")
133 | 
134 | 
135 | generic_head.default <- function(x, n) {
136 |   x[1:n]
137 | }
138 | 
139 | 
140 | generic_head(num_vec, 3)
141 | 
142 | 
143 | generic_head(data_frame, 3)
144 | 
145 | 
146 | generic_head.data.frame <- function(x, n) {
147 |   x[1:n,]
148 | }
149 | 
150 | 
151 | generic_head(data_frame, 3)
152 | 
153 | 
154 | product <- function(name, price, inventory) {
155 |   obj <- list(name = name, 
156 |     price = price, 
157 |     inventory = inventory)
158 |   class(obj) <- "product"
159 |   obj
160 | }
161 | 
162 | 
163 | product <- function(name, price, inventory) {
164 |   structure(list(name = name, 
165 |     price = price, 
166 |     inventory = inventory),
167 |     class = "product")
168 | }
169 | 
170 | 
171 | laptop <- product("Laptop", 499, 300)
172 | 
173 | 
174 | typeof(laptop)
175 | class(laptop)
176 | 
177 | 
178 | laptop
179 | 
180 | 
181 | print.product <- function(x, ...) {
182 |   cat("<product>\n")
183 |   cat("name:", x$name, "\n")
184 |   cat("price:", x$price, "\n")
185 |   cat("inventory:", x$inventory, "\n")
186 |   invisible(x)
187 | }
188 | 
189 | 
190 | laptop
191 | 
192 | 
193 | laptop$name
194 | laptop$price
195 | laptop$inventory
196 | 
197 | 
198 | cellphone <- product("Phone", 249, 12000)
199 | products <- list(laptop, cellphone)
200 | products
201 | 
202 | 
203 | product("Basket", 150, -0.5)
204 | 
205 | 
206 | product <- function(name, price, inventory) {
207 |   stopifnot(is.character(name), length(name) == 1,
208 |     is.numeric(price), length(price) == 1, 
209 |     is.numeric(inventory), length(inventory) == 1, 
210 |     price > 0, inventory >= 0)
211 |   structure(list(name = name, 
212 |     price = as.numeric(price), 
213 |     inventory = as.integer(inventory)),
214 |     class = "product")
215 | }
216 | 
217 | 
218 | product("Basket", 150, -0.5)
219 | 
220 | 
221 | value <- function(x, ...) 
222 |   UseMethod("value")
223 | 
224 | value.default <- function(x, ...) {
225 |   stop("Value is undefined")
226 | }
227 | 
228 | value.product <- function(x, ...) {
229 |   x$price * x$inventory
230 | }
231 | 
232 | 
233 | value(laptop)
234 | value(cellphone)
235 | 
236 | 
237 | sapply(products, value)
238 | 
239 | 
240 | laptop$price <- laptop$price * 0.85
241 | 
242 | 
243 | laptop$value <- laptop$price * laptop$inventory
244 | 
245 | 
246 | laptop
247 | 
248 | 
249 | percent <- function(x) {
250 |   stopifnot(is.numeric(x))
251 |   class(x) <- c("percent", "numeric")
252 |   x
253 | }
254 | 
255 | 
256 | pct <- percent(c(0.1, 0.05, 0.25, 0.23))
257 | pct
258 | 
259 | 
260 | as.character.percent <- function(x, ...) {
261 |   paste0(as.numeric(x) * 100, "%")
262 | }
263 | 
264 | 
265 | as.character(pct)
266 | 
267 | 
268 | format.percent <- function(x, ...) {
269 |   as.character(x, ...)
270 | }
271 | 
272 | 
273 | format(pct)
274 | 
275 | 
276 | print.percent <- function(x, ...) {
277 |   print(format.percent(x), quote = FALSE)
278 | }
279 | 
280 | 
281 | pct
282 | 
283 | 
284 | pct + 0.2
285 | pct * 0.5
286 | 
287 | 
288 | sum(pct)
289 | mean(pct)
290 | max(pct)
291 | min(pct)
292 | 
293 | 
294 | sum.percent <- function(...) {
295 |   percent(NextMethod("sum"))
296 | }
297 | mean.percent <- function(x, ...) {
298 |   percent(NextMethod("mean"))
299 | }
300 | max.percent <- function(...) {
301 |   percent(NextMethod("max"))
302 | }
303 | min.percent <- function(...) {
304 |   percent(NextMethod("max"))
305 | }
306 | 
307 | 
308 | sum(pct)
309 | mean(pct)
310 | max(pct)
311 | min(pct)
312 | 
313 | 
314 | c(pct, 0.12)
315 | 
316 | 
317 | c.percent <- function(x, ...) {
318 |   percent(NextMethod("c"))
319 | }
320 | 
321 | 
322 | c(pct, 0.12, -0.145)
323 | 
324 | 
325 | pct[1:3]
326 | pct[[2]]
327 | 
328 | 
329 | `[.percent` <- function(x, i) {
330 |   percent(NextMethod("["))
331 | }
332 | `[[.percent` <- function(x, i) {
333 |   percent(NextMethod("[["))
334 | }
335 | 
336 | 
337 | pct[1:3]
338 | pct[[2]]
339 | 
340 | 
341 | data.frame(id = 1:4, pct)
342 | 
343 | 
344 | Vehicle <- function(class, name, speed) {
345 |   obj <- new.env(parent = emptyenv())
346 |   obj$name <- name
347 |   obj$speed <- speed
348 |   obj$position <- c(0, 0, 0)
349 |   class(obj) <- c(class, "vehicle")
350 |   obj
351 | }
352 | 
353 | 
354 | Car <- function(...) {
355 |   Vehicle(class = "car", ...)
356 | }
357 | Bus <- function(...) {
358 |   Vehicle(class = "bus", ...)
359 | }
360 | Airplane <- function(...) {
361 |   Vehicle(class = "airplane", ...)
362 | }
363 | 
364 | 
365 | car <- Car("Model-A", 80)
366 | bus <- Bus("Medium-Bus", 45)
367 | airplane <- Airplane("Big-Plane", 800)
368 | 
369 | 
370 | print.vehicle <- function(x, ...) {
371 |   cat(sprintf("<vehicle: %s>\n", class(x)[[1]]))
372 |   cat("name:", x$name, "\n")
373 |   cat("speed:", x$speed, "km/h\n")
374 |   cat("position:", paste(x$position, collapse = ", "))
375 | }
376 | 
377 | 
378 | car
379 | bus
380 | airplane
381 | 
382 | 
383 | move <- function(vehicle, x, y, z) {
384 |   UseMethod("move")
385 | }
386 | move.vehicle <- function(vehicle, movement) {
387 |   if (length(movement) != 3) {
388 |     stop("All three dimensions must be specified to move a vehicle")
389 |   }
390 |   vehicle$position <- vehicle$position + movement
391 |   vehicle
392 | }
393 | 
394 | 
395 | move.bus <- move.car <- function(vehicle, movement) {
396 |   if (length(movement) != 2) {
397 |     stop("This vehicle only supports 2d movement")
398 |   }
399 |   movement <- c(movement, 0)
400 |   NextMethod("move")
401 | }
402 | 
403 | 
404 | move.airplane <- function(vehicle, movement) {
405 |   if (length(movement) == 2) {
406 |     movement <- c(movement, 0)
407 |   }
408 |   NextMethod("move")
409 | }
410 | 
411 | 
412 | move(car, c(1, 2, 3))
413 | 
414 | 
415 | move(car, c(1, 2))
416 | 
417 | 
418 | move(airplane, c(1, 2))
419 | 
420 | 
421 | move(airplane, c(20, 50, 80))
422 | 
423 | 
424 | setClass("Product", 
425 |   representation(name = "character", 
426 |     price = "numeric", 
427 |     inventory = "integer"))
428 | 
429 | 
430 | getSlots("Product")
431 | 
432 | 
433 | laptop <- new("Product", name = "Laptop-A", price = 299, inventory = 100)
434 | 
435 | 
436 | laptop <- new("Product", name = "Laptop-A", price = 299, inventory = 100L)
437 | laptop
438 | 
439 | 
440 | typeof(laptop)
441 | class(laptop)
442 | 
443 | 
444 | isS4(laptop)
445 | 
446 | 
447 | laptop@price * laptop@inventory
448 | 
449 | 
450 | slot(laptop, "price")
451 | 
452 | 
453 | laptop@price <- 289
454 | 
455 | 
456 | laptop@inventory <- 200
457 | 
458 | 
459 | laptop@value <- laptop@price * laptop@inventory
460 | 
461 | 
462 | toy <- new("Product", name = "Toys", price = 10)
463 | toy
464 | 
465 | 
466 | setClass("Product", 
467 |   representation(name = "character", 
468 |     price = "numeric", 
469 |     inventory = "integer"),
470 |   prototype(name = "Unnamed", price = NA_real_, inventory = 0L))
471 | 
472 | 
473 | toy <- new("Product", name = "Toys", price = 5)
474 | toy
475 | 
476 | 
477 | bottle <- new("Product", name = "Bottle", price = 1.5, inventory = -2L)
478 | bottle
479 | 
480 | 
481 | validate_product <- function(object) {
482 |   errors <- c(
483 |     if (length(object@name) != 1)  
484 |       "Length of name should be 1" 
485 |     else if (is.na(object@name)) 
486 |       "name should not be missing value",
487 |     
488 |     if (length(object@price) != 1) 
489 |       "Length of price should be 1"
490 |     else if (is.na(object@price)) 
491 |       "price should not be missing value"
492 |     else if (object@price <= 0) 
493 |       "price must be positive",
494 |     
495 |     if (length(object@inventory) != 1) 
496 |       "Length of inventory should be 1"
497 |     else if (is.na(object@inventory))
498 |       "inventory should not be missing value"
499 |     else if (object@inventory < 0) 
500 |       "inventory must be non-negative")
501 |   if (length(errors) == 0) TRUE else errors
502 | }
503 | 
504 | 
505 | validate_product(bottle)
506 | 
507 | 
508 | setClass("Product", 
509 |   representation(name = "character", 
510 |     price = "numeric", 
511 |     inventory = "integer"),
512 |   prototype(name = "Unnamed", price = NA_real_, inventory = 0L),
513 |   validity = validate_product)
514 | 
515 | 
516 | bottle <- new("Product", name = "Bottle")
517 | 
518 | 
519 | bottle <- new("Product", name = "Bottle", price = 3, inventory = -2L)
520 | 
521 | 
522 | bottle <- new("Product", name = "Bottle", 
523 |   price = 3, inventory = 100L, volume = 15)
524 | 
525 | 
526 | setClass("Container", 
527 |   representation(volume = "numeric"), 
528 |   contains = "Product")
529 | 
530 | 
531 | getSlots("Container")
532 | 
533 | 
534 | bottle <- new("Container", name = "Bottle",
535 |   price = 3, inventory = 100L, volume = 15)
536 | 
537 | 
538 | bottle <- new("Container", name = "Bottle",
539 |   price = 3, inventory = -10L, volume = 15)
540 | 
541 | 
542 | bottle <- new("Container", name = "Bottle",
543 |   price = 3, inventory = 100L, volume = -2)
544 | 
545 | 
546 | validate_container <- function(object) {
547 |   errors <- c(
548 |     if (length(object@volume) != 1)
549 |       "Length of volume must be 1",
550 |     if (object@volume <= 0)
551 |       "volume must be positive"
552 |   )
553 |   if (length(errors) == 0) TRUE else errors
554 | }
555 | 
556 | 
557 | setClass("Container", 
558 |   representation(volume = "numeric"), 
559 |   contains = "Product",
560 |   validity = validate_container)
561 | 
562 | 
563 | bottle <- new("Container", name = "Bottle",
564 |   price = 3, inventory = 100L, volume = -2)
565 | bottle <- new("Container", name = "Bottle",
566 |   price = 3, inventory = -5L, volume = 10)
567 | 
568 | 
569 | setClass("Shape")
570 | setClass("Polygon", 
571 |   representation(sides = "integer"), 
572 |   contains = "Shape")
573 | setClass("Triangle", 
574 |   representation(a = "numeric", b = "numeric", c = "numeric"), 
575 |   prototype(a = 1, b = 1, c = 1, sides = 3L),
576 |   contains = "Polygon")
577 | setClass("Rectangle",
578 |   representation(a = "numeric", b = "numeric"),
579 |   prototype(a = 1, b = 1, sides = 4L),
580 |   contains = "Polygon")
581 | setClass("Circle",
582 |   representation(r = "numeric"),
583 |   prototype(r = 1, sides = Inf),
584 |   contains = "Shape")
585 | 
586 | 
587 | setGeneric("area", function(object) {
588 |   standardGeneric("area")
589 | }, valueClass = "numeric")
590 | 
591 | 
592 | setMethod("area", signature("Triangle"), function(object) {
593 |   a <- object@a
594 |   b <- object@b
595 |   c <- object@c
596 |   s <- (a + b + c) / 2
597 |   sqrt(s * (s - a) * (s - b) * (s - c))
598 | })
599 | 
600 | 
601 | setMethod("area", signature("Rectangle"), function(object) {
602 |   object@a * object@b
603 | })
604 | setMethod("area", signature("Circle"), function(object) {
605 |   pi * object@r ^ 2
606 | })
607 | 
608 | 
609 | triangle <- new("Triangle", a = 3, b = 4, c = 5)
610 | area(triangle)
611 | 
612 | 
613 | circle <- new("Circle", r = 3)
614 | area(circle)
615 | 
616 | 
617 | setClass("Object", representation(height = "numeric"))
618 | setClass("Cylinder", contains = "Object")
619 | setClass("Cone", contains = "Object")
620 | 
621 | 
622 | setGeneric("volume", 
623 |   function(shape, object) standardGeneric("volume"))
624 | 
625 | 
626 | setMethod("volume", signature("Rectangle", "Cylinder"), 
627 |   function(shape, object) {
628 |     shape@a * shape@b * object@height
629 |   })
630 | setMethod("volume", signature("Rectangle", "Cone"),
631 |   function(shape, object) {
632 |     shape@a * shape@b * object@height / 3
633 |   })
634 | 
635 | 
636 | rectangle <- new("Rectangle", a = 2, b = 3)
637 | cylinder <- new("Cylinder", height = 3)
638 | volume(rectangle, cylinder)
639 | 
640 | 
641 | setMethod("volume", signature("Shape", "Cylinder"), 
642 |   function(shape, object) {
643 |     area(shape) * object@height
644 |   })
645 | setMethod("volume", signature("Shape", "Cone"),
646 |   function(shape, object) {
647 |     area(shape) * object@height / 3
648 |   })
649 | 
650 | 
651 | circle <- new("Circle", r = 2)
652 | cone <- new("Cone", height = 3)
653 | volume(circle, cone)
654 | 
655 | 
656 | setMethod("volume", signature("Shape", "numeric"),
657 |   function(shape, object) {
658 |     area(shape) * object
659 |   })
660 | 
661 | 
662 | volume(rectangle, 3)
663 | 
664 | 
665 | setMethod("*", signature("Shape", "Object"), 
666 |   function(e1, e2) {
667 |     volume(e1, e2)
668 |   })
669 | 
670 | 
671 | rectangle * cone
672 | 
673 | 
674 | lengthen <- function(object, factor) {
675 |   object@height <- object@height * factor
676 |   object
677 | }
678 | 
679 | 
680 | cylinder
681 | lengthen(cylinder, 2)
682 | cylinder
683 | 
684 | 
685 | Vehicle <- setRefClass("Vehicle", 
686 |   fields = list(position = "numeric", distance = "numeric"))
687 | 
688 | 
689 | car <- Vehicle$new(position = 0, distance = 0)
690 | 
691 | 
692 | car$position
693 | 
694 | 
695 | move <- function(vehicle, movement) {
696 |   vehicle$position <- vehicle$position + movement
697 |   vehicle$distance <- vehicle$distance + abs(movement)
698 | }
699 | 
700 | 
701 | move(car, 10)
702 | car
703 | 
704 | 
705 | Vehicle <- setRefClass("Vehicle", 
706 |   fields = list(position = "numeric", distance = "numeric"),
707 |   methods = list(move = function(x) {
708 |     stopifnot(is.numeric(x))
709 |     position <<- position + x
710 |     distance <<- distance + abs(x)
711 |   }))
712 | 
713 | 
714 | bus <- Vehicle(position = 0, distance = 0)
715 | bus$move(5)
716 | bus
717 | 
718 | 
719 | install.packages("R6")
720 | 
721 | 
722 | library(R6)
723 | Vehicle <- R6Class("Vehicle", 
724 |   public = list(
725 |     name = NA,
726 |     model = NA,
727 |     initialize = function(name, model) {
728 |       if (!missing(name)) self$name <- name
729 |       if (!missing(model)) self$model <- model
730 |     },
731 |     move = function(movement) {
732 |       private$start()
733 |       private$position <- private$position + movement
734 |       private$stop()
735 |     },
736 |     get_position = function() {
737 |       private$position
738 |     }
739 |   ),
740 |   private = list(
741 |     position = 0,
742 |     speed = 0,
743 |     start = function() {
744 |       cat(self$name, "is starting\n")
745 |       private$speed <- 50
746 |     },
747 |     stop = function() {
748 |       cat(self$name, "is stopping\n")
749 |       private$speed <- 0
750 |     }
751 |   ))
752 | 
753 | 
754 | car <- Vehicle$new(name = "Car", model = "A")
755 | car
756 | 
757 | 
758 | car$move(10)
759 | car$get_position()
760 | 
761 | 
762 | MeteredVehicle <- R6Class("MeteredVehicle",
763 |   inherit = Vehicle,
764 |   public = list(
765 |     move = function(movement) {
766 |       super$move(movement)
767 |       private$distance <<- private$distance + abs(movement)
768 |     },
769 |     get_distance = function() {
770 |       private$distance
771 |     }
772 |   ),
773 |   private = list(
774 |     distance = 0
775 |   ))
776 | 
777 | 
778 | bus <- MeteredVehicle$new(name = "Bus", model = "B")
779 | bus
780 | 
781 | 
782 | bus$move(10)
783 | bus$get_position()
784 | bus$get_distance()
785 | 
786 | 
787 | bus$move(-5)
788 | bus$get_position()
789 | bus$get_distance()
790 | 


--------------------------------------------------------------------------------
/Chapter 11/working-with-databases.R:
--------------------------------------------------------------------------------
  1 | install.packages("RSQLite")
  2 | 
  3 | 
  4 | if (file.exists("data/example.sqlite")) 
  5 |   file.remove("data/example.sqlite")
  6 | 
  7 | 
  8 | if (!dir.exists("data")) dir.create("data")
  9 | 
 10 | 
 11 | library(RSQLite)
 12 | con <- dbConnect(SQLite(), "data/example.sqlite")
 13 | 
 14 | 
 15 | example1 <- data.frame(
 16 |   id = 1:5, 
 17 |   type = c("A", "A", "B", "B", "C"),
 18 |   score = c(8, 9, 8, 10, 9), 
 19 |   stringsAsFactors = FALSE)
 20 | example1
 21 | 
 22 | 
 23 | dbWriteTable(con, "example1", example1)
 24 | 
 25 | 
 26 | dbDisconnect(con)
 27 | 
 28 | 
 29 | if (file.exists("data/datasets.sqlite")) 
 30 |   file.remove("data/datasets.sqlite")
 31 | 
 32 | 
 33 | install.packages(c("ggplot2", "nycflights13"))
 34 | 
 35 | 
 36 | data("diamonds", package = "ggplot2")
 37 | data("flights", package = "nycflights13")
 38 | 
 39 | 
 40 | con <- dbConnect(SQLite(), "data/datasets.sqlite")
 41 | dbWriteTable(con, "diamonds", diamonds, row.names = FALSE)
 42 | dbWriteTable(con, "flights", flights, row.names = FALSE)
 43 | dbDisconnect(con)
 44 | 
 45 | 
 46 | class(diamonds)
 47 | class(flights)
 48 | 
 49 | 
 50 | con <- dbConnect(SQLite(), "data/datasets.sqlite")
 51 | dbWriteTable(con, "diamonds", as.data.frame(diamonds), row.names = FALSE)
 52 | dbWriteTable(con, "flights", as.data.frame(flights), row.names = FALSE)
 53 | dbDisconnect(con)
 54 | 
 55 | 
 56 | if (file.exists("data/example2.sqlite")) 
 57 |   file.remove("data/example2.sqlite")
 58 | 
 59 | 
 60 | con <- dbConnect(SQLite(), "data/example2.sqlite")
 61 | chunk_size <- 10
 62 | id <- 0
 63 | for (i in 1:6) {
 64 |   chunk <- data.frame(id = ((i - 1L) * chunk_size):(i * chunk_size - 1L), 
 65 |     type = LETTERS[[i]],
 66 |     score = rbinom(chunk_size, 10, (10 - i) / 10),
 67 |     stringsAsFactors = FALSE)
 68 |   dbWriteTable(con, "products", chunk, 
 69 |     append = i > 1, row.names = FALSE)
 70 | }
 71 | dbDisconnect(con)
 72 | 
 73 | 
 74 | con <- dbConnect(SQLite(), "data/datasets.sqlite")
 75 | 
 76 | 
 77 | dbExistsTable(con, "diamonds")
 78 | dbExistsTable(con, "mtcars")
 79 | 
 80 | 
 81 | dbListTables(con)
 82 | 
 83 | 
 84 | dbListFields(con, "diamonds")
 85 | 
 86 | 
 87 | db_diamonds <- dbReadTable(con, "diamonds")
 88 | dbDisconnect(con)
 89 | 
 90 | 
 91 | head(db_diamonds, 3)
 92 | head(diamonds, 3)
 93 | 
 94 | 
 95 | identical(diamonds, db_diamonds)
 96 | 
 97 | 
 98 | str(db_diamonds)
 99 | 
100 | 
101 | str(diamonds)
102 | 
103 | 
104 | con <- dbConnect(SQLite(), "data/datasets.sqlite")
105 | dbListTables(con)
106 | 
107 | 
108 | db_diamonds <- dbGetQuery(con, 
109 |   "select * from diamonds")
110 | head(db_diamonds, 3)
111 | 
112 | 
113 | db_diamonds <- dbGetQuery(con, 
114 |   "select carat, cut, color, clarity, depth, price 
115 |   from diamonds")
116 | head(db_diamonds, 3)
117 | 
118 | 
119 | dbGetQuery(con, "select distinct cut from diamonds")
120 | 
121 | 
122 | dbGetQuery(con, "select distinct clarity from diamonds")[[1]]
123 | 
124 | 
125 | db_diamonds <- dbGetQuery(con, 
126 |   "select carat, price, clarity as clarity_level from diamonds")
127 | head(db_diamonds, 3)
128 | 
129 | 
130 | db_diamonds <- dbGetQuery(con,
131 |   "select carat, price, x * y * z as size from diamonds")
132 | head(db_diamonds, 3)
133 | 
134 | 
135 | db_diamonds <- dbGetQuery(con,
136 |   "select carat, price, x * y * z as size,
137 |   price / size as value_density
138 |   from diamonds")
139 | 
140 | 
141 | db_diamonds <- dbGetQuery(con,
142 |   "select *, price / size as value_density from
143 |   (select carat, price, x * y * z as size from diamonds)")
144 | head(db_diamonds, 3)
145 | 
146 | 
147 | good_diamonds <- dbGetQuery(con, 
148 |   "select carat, cut, price from diamonds where cut = 'Good'")
149 | head(good_diamonds, 3)
150 | 
151 | 
152 | nrow(good_diamonds) / nrow(diamonds)
153 | 
154 | 
155 | good_e_diamonds <- dbGetQuery(con, 
156 |   "select carat, cut, color, price from diamonds 
157 |   where cut = 'Good' and color = 'E'")
158 | head(good_e_diamonds, 3)
159 | nrow(good_e_diamonds) / nrow(diamonds)
160 | 
161 | 
162 | color_ef_diamonds <- dbGetQuery(con,
163 |   "select carat, cut, color, price from diamonds
164 |   where color in ('E','F')")
165 | nrow(color_ef_diamonds)
166 | 
167 | 
168 | table(diamonds$color)
169 | 
170 | 
171 | some_price_diamonds <- dbGetQuery(con,
172 |   "select carat, cut, color, price from diamonds
173 |   where price between 5000 and 5500")
174 | nrow(some_price_diamonds) / nrow(diamonds)
175 | 
176 | 
177 | good_cut_diamonds <- dbGetQuery(con, 
178 |   "select carat, cut, color, price from diamonds
179 |   where cut like '%Good'")
180 | nrow(good_cut_diamonds) / nrow(diamonds)
181 | 
182 | 
183 | cheapest_diamonds <- dbGetQuery(con,
184 |   "select carat, price from diamonds
185 |   order by price")
186 | 
187 | 
188 | head(cheapest_diamonds)
189 | 
190 | 
191 | most_expensive_diamonds <- dbGetQuery(con,
192 |   "select carat, price from diamonds
193 |   order by price desc")
194 | head(most_expensive_diamonds)
195 | 
196 | 
197 | cheapest_diamonds <- dbGetQuery(con, 
198 |   "select carat, price from diamonds
199 |   order by price, carat desc")
200 | head(cheapest_diamonds)
201 | 
202 | 
203 | dense_diamonds <- dbGetQuery(con,
204 |   "select carat, price, x * y * z as size from diamonds
205 |   order by carat / size desc")
206 | head(dense_diamonds)
207 | 
208 | 
209 | head(dbGetQuery(con, 
210 |   "select carat, price from diamonds
211 |    where cut = 'Ideal' and clarity = 'IF' and color = 'J'
212 |    order by price"))
213 | 
214 | 
215 | dbGetQuery(con, 
216 |   "select carat, price from diamonds
217 |   order by carat desc limit 3")
218 | 
219 | 
220 | dbGetQuery(con,
221 |   "select color, count(*) as number from diamonds
222 |   group by color")
223 | 
224 | 
225 | table(diamonds$color)
226 | 
227 | 
228 | dbGetQuery(con,
229 |   "select clarity, avg(price) as avg_price 
230 |    from diamonds
231 |    group by clarity 
232 |    order by avg_price desc")
233 | 
234 | 
235 | dbGetQuery(con,
236 |   "select price, max(carat) as max_carat 
237 |    from diamonds
238 |    group by price
239 |    order by price
240 |    limit 5")
241 | 
242 | 
243 | dbGetQuery(con,
244 |   "select clarity, 
245 |      min(price) as min_price, 
246 |      max(price) as max_price,
247 |      avg(price) as avg_price
248 |    from diamonds
249 |    group by clarity 
250 |    order by avg_price desc")
251 | 
252 | 
253 | dbGetQuery(con,
254 |   "select clarity,
255 |      sum(price * carat) / sum(carat) as wprice
256 |    from diamonds
257 |    group by clarity 
258 |    order by wprice desc")
259 | 
260 | 
261 | dbGetQuery(con,
262 |   "select clarity, color,
263 |      avg(price) as avg_price
264 |    from diamonds
265 |    group by clarity, color 
266 |    order by avg_price desc 
267 |    limit 5")
268 | 
269 | 
270 | diamond_selector <- data.frame(
271 |   cut = c("Ideal", "Good", "Fair"),
272 |   color = c("E", "I", "D"),
273 |   clarity = c("VS1", "I1", "IF"),
274 |   stringsAsFactors = FALSE
275 | )
276 | diamond_selector
277 | 
278 | 
279 | dbWriteTable(con, "diamond_selector", diamond_selector, 
280 |   row.names = FALSE, overwrite = TRUE)
281 | 
282 | 
283 | subset_diamonds <- dbGetQuery(con, 
284 |   "select cut, color, clarity, carat, price
285 |    from diamonds
286 |    join diamond_selector using (cut, color, clarity)")
287 | head(subset_diamonds)
288 | 
289 | 
290 | nrow(subset_diamonds) / nrow(diamonds)
291 | 
292 | 
293 | dbDisconnect(con)
294 | 
295 | 
296 | con <- dbConnect(SQLite(), "data/datasets.sqlite")
297 | res <- dbSendQuery(con, 
298 |   "select carat, cut, color, price from diamonds
299 |   where cut = 'Ideal' and color = 'E'")
300 | while (!dbHasCompleted(res)) {
301 |   chunk <- dbFetch(res, 800)
302 |   cat(nrow(chunk), "records fetched\n")
303 |   # do something with chunk
304 | }
305 | dbClearResult(res)
306 | dbDisconnect(con)
307 | 
308 | 
309 | file.remove("data/products.sqlite")
310 | 
311 | 
312 | set.seed(123)
313 | con <- dbConnect(SQLite(), "data/products.sqlite")
314 | chunk_size <- 10
315 | for (i in 1:6) {
316 |   cat("Processing chunk", i, "\n")
317 |   if (runif(1) <= 0.2) stop("Data error")
318 |   chunk <- data.frame(id = ((i - 1L) * chunk_size):(i * chunk_size - 1L), 
319 |     type = LETTERS[[i]],
320 |     score = rbinom(chunk_size, 10, (10 - i) / 10),
321 |     stringsAsFactors = FALSE)
322 |   dbWriteTable(con, "products", chunk, 
323 |     append = i > 1, row.names = FALSE)
324 | }
325 | 
326 | 
327 | dbGetQuery(con, "select COUNT(*) from products")
328 | dbDisconnect(con)
329 | 
330 | 
331 | set.seed(123)
332 | file.remove("data/products.sqlite")
333 | con <- dbConnect(SQLite(), "data/products.sqlite")
334 | chunk_size <- 10
335 | dbBegin(con)
336 | res <- tryCatch({
337 |   for (i in 1:6) {
338 |     cat("Processing chunk", i, "\n")
339 |     if (runif(1) <= 0.2) stop("Data error")
340 |     chunk <- data.frame(id = ((i - 1L) * chunk_size):(i * chunk_size - 1L), 
341 |       type = LETTERS[[i]],
342 |       score = rbinom(chunk_size, 10, (10 - i) / 10),
343 |       stringsAsFactors = FALSE)
344 |     dbWriteTable(con, "products", chunk, 
345 |       append = i > 1, row.names = FALSE)
346 |   }
347 |   dbCommit(con)
348 | }, error = function(e) {
349 |   warning("An error occurs: ", e, "\nRolling back", immediate. = TRUE)
350 |   dbRollback(con)
351 | })
352 | 
353 | 
354 | dbGetQuery(con, "select COUNT(*) from products")
355 | dbDisconnect(con)
356 | 
357 | 
358 | file.remove("data/bank.sqlite")
359 | 
360 | 
361 | create_bank <- function(dbfile) {
362 |   if (file.exists(dbfile)) file.remove(dbfile)
363 |   con <- dbConnect(SQLite(), dbfile)
364 |   dbSendQuery(con, 
365 |     "create table accounts 
366 |     (name text primary key, balance real)")
367 |   dbSendQuery(con,
368 |     "create table transactions 
369 |     (time text, account_from text, account_to text, value real)")
370 |   con
371 | }
372 | 
373 | 
374 | create_account <- function(con, name, balance) {
375 |   dbSendQuery(con, 
376 |     sprintf("insert into accounts (name, balance) values ('%s',%.2f)", name, balance))
377 |   TRUE
378 | }
379 | 
380 | 
381 | transfer <- function(con, from, to, value) {
382 |   get_account <- function(name) {
383 |     account <- dbGetQuery(con, 
384 |       sprintf("select * from accounts where name = '%s'", name))
385 |     if (nrow(account) == 0) stop(sprintf("Account '%s' does not exist", name))
386 |     account
387 |   }
388 |   account_from <- get_account(from)
389 |   account_to <- get_account(to)
390 |   if (account_from$balance < value) {
391 |     stop(sprintf("Insufficient money to transfer from '%s'", from))
392 |   } else {
393 |     dbSendQuery(con, 
394 |       sprintf("update accounts set balance = %.2f where name = '%s'",
395 |         account_from$balance - value, from))
396 |     dbSendQuery(con,
397 |       sprintf("update accounts set balance = %.2f where name = '%s'",
398 |         account_to$balance + value, to))
399 |     dbSendQuery(con,
400 |       sprintf("insert into transactions (time, account_from, account_to, value) values
401 |         ('%s', '%s', '%s', %.2f)", format(Sys.time(), "%Y-%m-%d %H:%M:%S"),
402 |         from, to, value))
403 |   }
404 |   TRUE
405 | }
406 | 
407 | 
408 | safe_transfer <- function(con, ...) {
409 |   dbBegin(con)
410 |   tryCatch({
411 |     transfer(con, ...)
412 |     dbCommit(con)
413 |   }, error = function(e) {
414 |     message("An error occurs in the transaction. Rollback...")
415 |     dbRollback(con)
416 |     stop(e)
417 |   })
418 | }
419 | 
420 | 
421 | get_balance <- function(con, name) {
422 |   res <- dbGetQuery(con, 
423 |     sprintf("select balance from accounts where name = '%s'", name))
424 |   res$balance
425 | }
426 | get_transactions <- function(con, from, to) {
427 |   dbGetQuery(con,
428 |     sprintf("select * from transactions 
429 |       where account_from = '%s' and account_to = '%s'", from, to))
430 | }
431 | 
432 | 
433 | con <- create_bank("data/bank.sqlite")
434 | create_account(con, "David", 5000)
435 | create_account(con, "Jenny", 6500)
436 | get_balance(con, "David")
437 | get_balance(con, "Jenny")
438 | 
439 | 
440 | safe_transfer(con, "David", "Jenny", 1500)
441 | get_balance(con, "David")
442 | get_balance(con, "Jenny")
443 | 
444 | 
445 | safe_transfer(con, "David", "Jenny", 6500)
446 | get_balance(con, "David")
447 | get_balance(con, "Jenny")
448 | 
449 | 
450 | get_transactions(con, "David", "Jenny")
451 | 
452 | 
453 | dbDisconnect(con)
454 | 
455 | 
456 | chunk_rw <- function(input, output, table, chunk_size = 10000) {
457 |   first_row <- read.csv(input, nrows = 1, header = TRUE)
458 |   header <- colnames(first_row)
459 |   n <- 0
460 |   con <- dbConnect(SQLite(), output)
461 |   on.exit(dbDisconnect(con))
462 |   while (TRUE) {
463 |     df <- read.csv(input, 
464 |       skip = 1 + n * chunk_size, nrows = chunk_size, 
465 |       header = FALSE, col.names = header,
466 |       stringsAsFactors = FALSE)
467 |     if (nrow(df) == 0) break;
468 |     dbWriteTable(con, table, df, row.names = FALSE, append = n > 0)
469 |     n <- n + 1
470 |     cat(sprintf("%d records written\n", nrow(df)))
471 |   }
472 | }
473 | 
474 | 
475 | file.remove("data/diamonds.csv", "data/diamonds.sqlite")
476 | 
477 | 
478 | write.csv(diamonds, "data/diamonds.csv", quote = FALSE, row.names = FALSE)
479 | chunk_rw("data/diamonds.csv", "data/diamonds.sqlite", "diamonds")
480 | 
481 | 
482 | batch_rw <- function(dir, output, table, overwrite = TRUE) {
483 |   files <- list.files(dir, "\\.csv$", full.names = TRUE)
484 |   con <- dbConnect(SQLite(), output)
485 |   on.exit(dbDisconnect(con))
486 |   exist <- dbExistsTable(con, table)
487 |   if (exist) {
488 |     if (overwrite) dbRemoveTable(con, table)
489 |     else stop(sprintf("Table '%s' already exists", table))
490 |   }
491 |   exist <- FALSE
492 |   for (file in files) {
493 |     cat(file, "... ")
494 |     df <- read.csv(file, header = TRUE, stringsAsFactors = FALSE)
495 |     dbWriteTable(con, table, df, row.names = FALSE, 
496 |       append = exist)
497 |     exist <- TRUE
498 |     cat("done\n")
499 |   }
500 | }
501 | 
502 | 
503 | file.remove("data/groups.sqlite")
504 | 
505 | 
506 | batch_rw("data/groups", "data/groups.sqlite", "groups")
507 | 
508 | 
509 | con <- dbConnect(SQLite(), "data/groups.sqlite")
510 | dbReadTable(con, "groups")
511 | dbDisconnect(con)
512 | 
513 | 
514 | install.packages("mongolite")
515 | 
516 | 
517 | library(mongolite)
518 | m <- mongo("products", "test", "mongodb://localhost")
519 | 
520 | 
521 | m$count()
522 | 
523 | 
524 | m$insert('
525 | {
526 |   "code": "A0000001",
527 |   "name": "Product-A",
528 |   "type": "Type-I",
529 |   "price": 29.5,
530 |   "amount": 500,
531 |   "comments": [
532 |     {
533 |       "user": "david",
534 |       "score": 8,
535 |       "text": "This is a good product"
536 |     },
537 |     {
538 |       "user": "jenny",
539 |       "score": 5,
540 |       "text": "Just so so"
541 |     }
542 |   ]
543 | }')
544 | 
545 | 
546 | m$count()
547 | 
548 | 
549 | m$insert(list(
550 |   code = "A0000002",
551 |   name = "Product-B",
552 |   type = "Type-II",
553 |   price = 59.9,
554 |   amount = 200L,
555 |   comments = list(
556 |     list(user = "tom", score = 6L,
557 |       text = "Just fine"),
558 |     list(user = "mike", score = 9L,
559 |       text = "great product!")
560 |   )
561 | ), auto_unbox = TRUE)
562 | 
563 | 
564 | m$count()
565 | 
566 | 
567 | products <- m$find()
568 | str(products)
569 | 
570 | 
571 | iter <- m$iterate()
572 | products <- iter$batch(2)
573 | str(products)
574 | 
575 | 
576 | m$find('{ "code": "A0000001" }', 
577 |   '{ "_id": 0, "name": 1, "price": 1, "amount": 1 }')
578 | 
579 | 
580 | m$find('{ "price": { "$gte": 40 } }',
581 |   '{ "_id": 0, "name": 1, "price": 1, "amount": 1 }')
582 | 
583 | 
584 | m$find('{ "comments.score": 9 }', 
585 |   '{ "_id": 0, "code": 1, "name": 1}')
586 | 
587 | 
588 | m$find('{ "comments.score": { "$lt": 6 }}',
589 |   '{ "_id": 0, "code": 1, "name": 1}')
590 | 
591 | 
592 | m$drop()
593 | 
594 | 
595 | m <- mongo("students", "test", "mongodb://localhost")
596 | 
597 | 
598 | m$count()
599 | 
600 | 
601 | students <- data.frame(
602 |   name = c("David", "Jenny", "Sara", "John"),
603 |   age = c(25, 23, 26, 23),
604 |   major = c("Statistics", "Physics", "Computer Science", "Statistics"),
605 |   projects = c(2, 1, 3, 1),
606 |   stringsAsFactors = FALSE
607 | )
608 | students
609 | 
610 | 
611 | m$insert(students)
612 | 
613 | 
614 | m$count()
615 | 
616 | 
617 | m$find()
618 | 
619 | 
620 | m$find('{ "name": "Jenny" }')
621 | 
622 | 
623 | m$find('{ "projects": { "$gte": 2 }}')
624 | 
625 | 
626 | m$find('{ "projects": { "$gte": 2 }}', 
627 |   '{ "_id": 0, "name": 1, "major": 1 }')
628 | 
629 | 
630 | m$find('{ "projects": { "$gte": 2 }}', 
631 |   fields = '{ "_id": 0, "name": 1, "age": 1 }',
632 |   sort = '{ "age": -1 }')
633 | 
634 | 
635 | m$find('{ "projects": { "$gte": 2 }}', 
636 |   fields = '{ "_id": 0, "name": 1, "age": 1 }',
637 |   sort = '{ "age": -1 }',
638 |   limit = 1)
639 | 
640 | 
641 | m$distinct("major")
642 | 
643 | 
644 | m$distinct("major", '{ "projects": { "$gte": 2 } }')
645 | 
646 | 
647 | m$update('{ "name": "Jenny" }', '{ "$set": { "age": 24 } }')
648 | m$find()
649 | 
650 | 
651 | m$index('{ "name": 1 }')
652 | 
653 | 
654 | m$find('{ "name": "Sara" }')
655 | 
656 | 
657 | m$find('{ "name": "Jane" }')
658 | 
659 | 
660 | m$drop()
661 | 
662 | 
663 | set.seed(123)
664 | m <- mongo("simulation", "test")
665 | sim_data <- expand.grid(
666 |   type = c("A", "B", "C", "D", "E"), 
667 |   category = c("P-1", "P-2", "P-3"),
668 |   group = 1:20000, 
669 |   stringsAsFactors = FALSE)
670 | head(sim_data)
671 | 
672 | 
673 | sim_data$score1 <- rnorm(nrow(sim_data), 10, 3)
674 | sim_data$test1 <- rbinom(nrow(sim_data), 100, 0.8)
675 | 
676 | 
677 | head(sim_data)
678 | 
679 | 
680 | m$insert(sim_data)
681 | 
682 | 
683 | system.time(rec <- m$find('{ "type": "C", "category": "P-3", "group": 87 }'))
684 | rec
685 | 
686 | 
687 | system.time({
688 |   recs <- m$find('{ "type": { "$in": ["B", "D"]  }, 
689 |     "category": { "$in": ["P-1", "P-2"] }, 
690 |     "group": { "$gte": 25, "$lte": 75 } }')
691 | })
692 | 
693 | 
694 | head(recs)
695 | 
696 | 
697 | system.time(recs2 <- m$find('{ "score1": { "$gte": 20 } }'))
698 | 
699 | 
700 | head(recs2)
701 | 
702 | 
703 | m$index('{ "type": 1, "category": 1, "group": 1 }')
704 | 
705 | 
706 | system.time({
707 |   rec <- m$find('{ "type": "C", "category": "P-3", "group": 87 }')
708 | })
709 | 
710 | 
711 | system.time({
712 |   recs <- m$find('{ "type": { "$in": ["B", "D"]  }, 
713 |     "category": { "$in": ["P-1", "P-2"] }, 
714 |     "group": { "$gte": 25, "$lte": 75 } }')
715 | })
716 | 
717 | 
718 | system.time({
719 |   recs2 <- m$find('{ "score1": { "$gte": 20 } }')
720 | })
721 | 
722 | 
723 | m$aggregate('[
724 |   { "$group": { 
725 |       "_id": "$type", 
726 |       "count": { "$sum": 1 },
727 |       "avg_score": { "$avg": "$score1" },
728 |       "min_test": { "$min": "$test1" },
729 |       "max_test": { "$max": "$test1" }
730 |     }
731 |   }
732 | ]')
733 | 
734 | 
735 | m$aggregate('[
736 |   { "$group": { 
737 |       "_id": { "type": "$type", "category": "$category" }, 
738 |       "count": { "$sum": 1 },
739 |       "avg_score": { "$avg": "$score1" },
740 |       "min_test": { "$min": "$test1" },
741 |       "max_test": { "$max": "$test1" }
742 |     }
743 |   }
744 | ]')
745 | 
746 | 
747 | m$aggregate('[
748 |   { "$group": { 
749 |       "_id": { "type": "$type", "category": "$category" }, 
750 |       "count": { "$sum": 1 },
751 |       "avg_score": { "$avg": "$score1" },
752 |       "min_test": { "$min": "$test1" },
753 |       "max_test": { "$max": "$test1" }
754 |     }
755 |   }, 
756 |   {
757 |     "$sort": { "_id.type": 1, "avg_score": -1 }
758 |   }
759 | ]')
760 | 
761 | 
762 | m$aggregate('[
763 |   { "$group": { 
764 |       "_id": { "type": "$type", "category": "$category" }, 
765 |       "count": { "$sum": 1 },
766 |       "avg_score": { "$avg": "$score1" },
767 |       "min_test": { "$min": "$test1" },
768 |       "max_test": { "$max": "$test1" }
769 |     }
770 |   }, 
771 |   {
772 |     "$sort": { "avg_score": -1 }
773 |   }, 
774 |   {
775 |     "$limit": 3
776 |   }, 
777 |   {
778 |     "$project": { 
779 |       "_id.type": 1, 
780 |       "_id.category": 1, 
781 |       "avg_score": 1, 
782 |       "test_range": { "$subtract": ["$max_test", "$min_test"] }
783 |     }
784 |   }
785 | ]')
786 | 
787 | 
788 | bins <- m$mapreduce(
789 |   map = 'function() {
790 |     emit(Math.floor(this.score1 / 2.5) * 2.5, 1);
791 |   }',
792 |   reduce = 'function(id, counts) {
793 |     return Array.sum(counts);
794 |   }'
795 | )
796 | 
797 | 
798 | bins
799 | 
800 | 
801 | with(bins, barplot(value / sum(value), names.arg = `_id`,
802 |   main = "Histogram of scores", 
803 |   xlab = "score1", ylab = "Percentage"))
804 | 
805 | 
806 | m$drop()
807 | 
808 | 
809 | install.packages("rredis")
810 | 
811 | 
812 | library(rredis)
813 | redisConnect()
814 | 
815 | 
816 | library(rredis)
817 | redisConnect(nodelay = FALSE)
818 | redisCmd("flushall")
819 | 
820 | 
821 | redisSet("num1", 100)
822 | 
823 | 
824 | redisGet("num1")
825 | 
826 | 
827 | redisSet("vec1", 1:5)
828 | redisGet("vec1")
829 | 
830 | 
831 | redisSet("mtcars_head", head(mtcars, 3))
832 | redisGet("mtcars_head")
833 | 
834 | 
835 | redisGet("something")
836 | 
837 | 
838 | redisExists("something")
839 | redisExists("num1")
840 | 
841 | 
842 | redisDelete("num1")
843 | redisExists("num1")
844 | 
845 | 
846 | redisHSet("fruits", "apple", 5)
847 | redisHSet("fruits", "pear", 2)
848 | redisHSet("fruits", "banana", 9)
849 | 
850 | 
851 | redisHGet("fruits", "banana")
852 | 
853 | 
854 | redisHGetAll("fruits")
855 | 
856 | 
857 | redisHKeys("fruits")
858 | 
859 | 
860 | redisHVals("fruits")
861 | 
862 | 
863 | redisHLen("fruits")
864 | 
865 | 
866 | redisHMGet("fruits", c("apple", "banana"))
867 | 
868 | 
869 | redisHMSet("fruits", list(apple = 4, pear = 1))
870 | 
871 | 
872 | redisHGetAll("fruits")
873 | 
874 | 
875 | for (qi in 1:3) {
876 |   redisRPush("queue", qi)  
877 | }
878 | 
879 | 
880 | redisLLen("queue")
881 | 
882 | 
883 | redisLPop("queue")
884 | redisLPop("queue")
885 | redisLPop("queue")
886 | redisLPop("queue")
887 | 
888 | 
889 | redisClose()
890 | 
891 | 
892 | 


--------------------------------------------------------------------------------
/Chapter 12/data-manipulation.R:
--------------------------------------------------------------------------------
  1 | library(readr)
  2 | product_info <- read_csv("data/product-info.csv")
  3 | product_info
  4 | 
  5 | 
  6 | sapply(product_info, class)
  7 | 
  8 | 
  9 | product_info[product_info$type == "toy", ]
 10 | 
 11 | 
 12 | product_info[product_info$released == "no", ]
 13 | 
 14 | 
 15 | product_info[, c("id", "name", "type")]
 16 | 
 17 | 
 18 | product_info[c("id", "name", "class")]
 19 | 
 20 | 
 21 | product_info[product_info$type == "toy", c("name", "class", "released")]
 22 | 
 23 | 
 24 | subset(product_info, 
 25 |   subset = type == "model" & released == "yes",
 26 |   select = name:class)
 27 | 
 28 | 
 29 | with(product_info, name[released == "no"])
 30 | 
 31 | 
 32 | with(product_info, table(type[released == "yes"]))
 33 | 
 34 | 
 35 | product_stats <- read_csv("data/product-stats.csv") 
 36 | product_stats 
 37 | 
 38 | 
 39 | top_3_id <- product_stats[order(product_stats$size, decreasing = TRUE), "id"][1:3]
 40 | product_info[product_info$id %in% top_3_id, ]
 41 | 
 42 | 
 43 | product_table <- merge(product_info, product_stats, by = "id")
 44 | product_table
 45 | 
 46 | 
 47 | product_table[order(product_table$size), ]
 48 | 
 49 | 
 50 | product_table[order(product_table$size, decreasing = TRUE), "name"][1:3]
 51 | 
 52 | 
 53 | product_table[order(product_table$weight, decreasing = TRUE), ][
 54 |   product_table$type == "model",]
 55 | 
 56 | 
 57 | transform(product_table, 
 58 |   released = ifelse(released == "yes", TRUE, FALSE),
 59 |   density = weight / size)
 60 | 
 61 | 
 62 | product_tests <- read_csv("data/product-tests.csv") 
 63 | product_tests
 64 | 
 65 | 
 66 | na.omit(product_tests)
 67 | 
 68 | 
 69 | complete.cases(product_tests)
 70 | 
 71 | 
 72 | product_tests[complete.cases(product_tests), "id"]
 73 | 
 74 | 
 75 | product_tests[!complete.cases(product_tests), "id"]
 76 | 
 77 | 
 78 | product_full <- merge(product_table, product_tests, by = "id")
 79 | product_full
 80 | 
 81 | 
 82 | mean_quality1 <- tapply(product_full$quality,
 83 |   list(product_full$type),
 84 |   mean, na.rm = TRUE)
 85 | mean_quality1
 86 | 
 87 | 
 88 | str(mean_quality1)
 89 | 
 90 | 
 91 | is.array(mean_quality1)
 92 | 
 93 | 
 94 | mean_quality2 <- tapply(product_full$quality, 
 95 |   list(product_full$type, product_full$class), 
 96 |   mean, na.rm = TRUE)
 97 | mean_quality2
 98 | 
 99 | 
100 | mean_quality2["model", "vehicle"]
101 | 
102 | 
103 | mean_quality3 <- with(product_full,
104 |   tapply(quality, list(type, material, released),
105 |     mean, na.rm = TRUE))
106 | mean_quality3
107 | 
108 | 
109 | str(mean_quality3)
110 | 
111 | 
112 | mean_quality3["model", "Wood", "yes"]
113 | 
114 | 
115 | toy_tests <- read_csv("data/product-toy-tests.csv")
116 | toy_tests
117 | 
118 | 
119 | install.packages("reshape2")
120 | 
121 | 
122 | library(reshape2)
123 | toy_quality <- dcast(toy_tests, date ~ id, value.var = "quality")
124 | toy_quality
125 | 
126 | 
127 | install.packages("zoo")
128 | 
129 | 
130 | zoo::na.locf(c(1, 2, NA, NA, 3, 1, NA, 2, NA))
131 | 
132 | 
133 | toy_quality$T01 <- zoo::na.locf(toy_quality$T01)
134 | toy_quality$T02 <- zoo::na.locf(toy_quality$T02)
135 | 
136 | 
137 | toy_quality[-1] <- lapply(toy_quality[-1], zoo::na.locf)
138 | toy_quality
139 | 
140 | 
141 | toy_tests$ym <- substr(toy_tests$date, 1, 6)
142 | toy_tests
143 | 
144 | 
145 | toy_quality <- dcast(toy_tests, ym ~ id, value.var = "quality")
146 | toy_quality
147 | 
148 | 
149 | toy_tests2 <- melt(toy_tests, id.vars = c("id", "ym"), 
150 |   measure.vars = c("quality", "durability"),
151 |   variable.name = "measure")
152 | toy_tests2
153 | 
154 | 
155 | library(ggplot2)
156 | ggplot(toy_tests2, aes(x = ym, y = value)) + 
157 |   geom_point() +
158 |   facet_grid(id ~ measure)
159 | 
160 | 
161 | ggplot(toy_tests2, aes(x = ym, y = value, color = id)) + 
162 |   geom_point() +
163 |   facet_grid(. ~ measure)
164 | 
165 | 
166 | install.packages("sqldf")
167 | 
168 | 
169 | library(sqldf)
170 | 
171 | 
172 | product_info <- read_csv("data/product-info.csv")
173 | product_stats <- read_csv("data/product-stats.csv")
174 | product_tests <- read_csv("data/product-tests.csv")
175 | toy_tests <- read_csv("data/product-toy-tests.csv")
176 | 
177 | 
178 | sqldf("select * from product_info")
179 | 
180 | 
181 | sqldf("select id, name, class from product_info")
182 | 
183 | 
184 | sqldf("select id, name from product_info where released = 'yes'")
185 | 
186 | 
187 | sqldf("select id, material, size / weight as density from product_stats")
188 | 
189 | 
190 | sqldf("select * from product_stats order by size desc")
191 | 
192 | 
193 | sqldf("select * from product_info join product_stats using (id)")
194 | 
195 | 
196 | sqldf("select * from product_info where id in 
197 |   (select id from product_stats where material = 'Wood')")
198 | 
199 | 
200 | sqldf("select * from product_info join product_stats using (id) 
201 |   where material = 'Wood'")
202 | 
203 | 
204 | sqldf("select waterproof, avg(quality), avg(durability) from product_tests
205 |   group by waterproof")
206 | 
207 | 
208 | sqldf("select id, avg(quality), avg(durability) from toy_tests 
209 |   group by id")
210 | 
211 | 
212 | sqldf("select * from product_info join 
213 |   (select id, avg(quality), avg(durability) from toy_tests 
214 |     group by id) using (id)")
215 | 
216 | 
217 | install.packages("plyr")
218 | 
219 | 
220 | plyr::ddply(product_stats, "material", 
221 |   function(x) {
222 |     head(x[order(x$size, decreasing = TRUE),], 1L)
223 |   })
224 | 
225 | 
226 | plyr::ddply(toy_tests, "id", 
227 |   function(x) {
228 |     head(x[order(x$sample, decreasing = TRUE), ], 2)
229 |   })
230 | 
231 | 
232 | install.packages("data.table")
233 | 
234 | 
235 | library(data.table)
236 | 
237 | 
238 | dt <- data.table(x = 1:3, y = rnorm(3), z = letters[1:3])
239 | dt
240 | 
241 | 
242 | str(dt)
243 | 
244 | 
245 | product_info <- fread("data/product-info.csv")
246 | product_stats <- fread("data/product-stats.csv")
247 | product_tests <- fread("data/product-tests.csv")
248 | toy_tests <- fread("data/product-toy-tests.csv")
249 | 
250 | 
251 | product_info
252 | 
253 | 
254 | str(product_info)
255 | 
256 | 
257 | product_info[1]
258 | 
259 | 
260 | product_info[1:3]
261 | 
262 | 
263 | product_info[-1]
264 | 
265 | 
266 | product_info[.N]
267 | 
268 | 
269 | product_info[c(1, .N)]
270 | 
271 | 
272 | product_info[released == "yes"]
273 | 
274 | 
275 | product_info[released == "yes", id]
276 | 
277 | 
278 | product_info[released == "yes", "id"]
279 | 
280 | 
281 | product_info[released == "yes", "id", with = FALSE]
282 | 
283 | 
284 | product_info[released == "yes", c("id", "name"), with = FALSE]
285 | 
286 | 
287 | product_info[released == "yes", table(type, class)]
288 | 
289 | 
290 | product_info[released == "yes", list(id, name)]
291 | 
292 | 
293 | product_info[, list(id, name, released = released == "yes")]
294 | 
295 | 
296 | product_stats[, list(id, material, size, weight, 
297 |   density = size / weight)]
298 | 
299 | 
300 | product_info[, .(id, name, type, class)]
301 | 
302 | 
303 | product_info[released == "yes", .(id, name)]
304 | 
305 | 
306 | product_stats[order(size, decreasing = TRUE)]
307 | 
308 | 
309 | product_stats
310 | 
311 | 
312 | product_stats[, density := size / weight]
313 | 
314 | 
315 | product_stats
316 | 
317 | 
318 | product_info[, released := released == "yes"]
319 | product_info
320 | 
321 | 
322 | setkey(product_info, id)
323 | 
324 | 
325 | product_info
326 | 
327 | 
328 | key(product_info)
329 | 
330 | 
331 | product_info["M01"]
332 | 
333 | 
334 | product_stats["M01"]
335 | 
336 | 
337 | setkeyv(product_stats, "id")
338 | 
339 | 
340 | product_stats["M02"]
341 | 
342 | 
343 | product_info[product_stats]
344 | 
345 | 
346 | setkey(toy_tests, id, date)
347 | 
348 | 
349 | toy_tests[.("T01", 20160201)]
350 | 
351 | 
352 | toy_tests["T01"]
353 | 
354 | 
355 | toy_tests[.(20160201)]
356 | 
357 | 
358 | toy_tests[.(20160201, "T01")]
359 | 
360 | 
361 | product_info[, .N, by = released]
362 | 
363 | 
364 | product_info[, .N, by = .(type, class)]
365 | 
366 | 
367 | product_tests[, mean(quality, na.rm = TRUE), 
368 |   by = .(waterproof)]
369 | 
370 | 
371 | product_tests[, .(mean_quality = mean(quality, na.rm = TRUE)),
372 |   by = .(waterproof)]
373 | 
374 | 
375 | product_info[product_tests][released == TRUE,
376 |   .(mean_quality = mean(quality, na.rm = TRUE),
377 |     mean_durability = mean(durability, na.rm = TRUE)),
378 |   by = .(type, class)]
379 | 
380 | 
381 | type_class_tests <- product_info[product_tests][released == TRUE,
382 |   .(mean_quality = mean(quality, na.rm = TRUE),
383 |     mean_durability = mean(durability, na.rm = TRUE)),
384 |   keyby = .(type, class)]
385 | type_class_tests
386 | 
387 | 
388 | key(type_class_tests)
389 | 
390 | 
391 | type_class_tests[.("model", "vehicle"), mean_quality]
392 | 
393 | 
394 | n <- 10000000
395 | test1 <- data.frame(id = 1:n, x = rnorm(n), y = rnorm(n))
396 | 
397 | 
398 | system.time(row <- test1[test1$id == 876543, ])
399 | row
400 | 
401 | 
402 | setDT(test1, key = "id")
403 | class(test1)
404 | 
405 | 
406 | system.time(row <- test1[.(8765432)])
407 | row
408 | 
409 | 
410 | toy_tests[, ym := substr(date, 1, 6)]
411 | toy_quality <- dcast(toy_tests, ym ~ id, value.var = "quality")
412 | toy_quality
413 | 
414 | 
415 | toy_tests2 <- dcast(toy_tests, ym ~ id, value.var = c("quality", "durability"))
416 | toy_tests2
417 | 
418 | 
419 | key(toy_tests2)
420 | 
421 | 
422 | toy_tests2[.(201602)]
423 | 
424 | 
425 | sapply(toy_tests2, class)
426 | 
427 | 
428 | toy_tests2["201602"]
429 | 
430 | 
431 | class(20160101)
432 | class(substr(20160101, 1, 6))
433 | 
434 | 
435 | product_stats
436 | setDF(product_stats)
437 | class(product_stats)
438 | 
439 | 
440 | setDT(product_stats, key = "id")
441 | class(product_stats)
442 | 
443 | 
444 | setnames(product_stats, "size", "volume")
445 | product_stats
446 | 
447 | 
448 | product_stats[, i := .I]
449 | product_stats
450 | 
451 | 
452 | setcolorder(product_stats, 
453 |   c("i", "id", "material", "weight", "volume", "density"))
454 | product_stats
455 | 
456 | 
457 | market_data <- data.table(date = as.Date("2015-05-01") + 0:299)
458 | head(market_data)
459 | 
460 | 
461 | set.seed(123)
462 | market_data[, `:=`(
463 |   price = round(30 * cumprod(1 + rnorm(300, 0.001, 0.05)), 2),
464 |   volume = rbinom(300, 5000, 0.8)
465 | )]
466 | 
467 | 
468 | head(market_data)
469 | 
470 | 
471 | plot(price ~ date, data = market_data, 
472 |   type = "l",
473 |   main = "Market data")
474 | 
475 | 
476 | market_data[, range(date)]
477 | 
478 | 
479 | monthly <- market_data[, 
480 |   .(open = price[[1]], high = max(price), 
481 |     low = min(price), close = price[[.N]]),
482 |   keyby = .(year = year(date), month = month(date))]
483 | head(monthly)
484 | 
485 | 
486 | oldpar <- par(mfrow = c(1, 2))
487 | market_data[, {
488 |   plot(price ~ date, type = "l",
489 |     main = sprintf("Market data (%d)", year))
490 | }, by = .(year = year(date))]
491 | par(oldpar)
492 | 
493 | 
494 | oldpar <- par(mfrow = c(1, 2))
495 | market_data[year(date) == 2015, {
496 |   plot(price ~ date, type = "l",
497 |     main = sprintf("Market data (%d)", 2015))
498 | }]
499 | market_data[year(date) == 2016, {
500 |   plot(price ~ date, type = "l",
501 |     main = sprintf("Market data (%d)", 2016))
502 | }]
503 | par(oldpar)
504 | 
505 | 
506 | data("diamonds", package = "ggplot2")
507 | setDT(diamonds)
508 | head(diamonds)
509 | 
510 | 
511 | diamonds[, {
512 |   m <- lm(log(price) ~ carat + depth)
513 |   as.list(coef(m))
514 | }, keyby = .(cut)]
515 | 
516 | 
517 | average <- function(column) {
518 |   market_data[, .(average = mean(.SD[[column]])),
519 |     by = .(year = year(date))]
520 | }
521 | 
522 | 
523 | average("price")
524 | 
525 | 
526 | average("volume")
527 | 
528 | 
529 | price_cols <- paste0("price", 1:3)
530 | market_data[, (price_cols) := lapply(1:3, 
531 |   function(i) round(price + rnorm(.N, 0, 5), 2))]
532 | head(market_data)
533 | 
534 | 
535 | cols <- colnames(market_data)
536 | price_cols <- cols[grep("^price", cols)]
537 | price_cols
538 | 
539 | 
540 | market_data[, (price_cols) := lapply(.SD, zoo::na.locf),
541 |   .SDcols = price_cols]
542 | 
543 | 
544 | install.packages("dplyr")
545 | 
546 | 
547 | library(readr)
548 | product_info <- read_csv("data/product-info.csv")
549 | product_stats <- read_csv("data/product-stats.csv")
550 | product_tests <- read_csv("data/product-tests.csv")
551 | toy_tests <- read_csv("data/product-toy-tests.csv")
552 | 
553 | 
554 | library(dplyr)
555 | 
556 | 
557 | select(product_info, id, name, type, class)
558 | 
559 | 
560 | filter(product_info, released == "yes")
561 | 
562 | 
563 | filter(product_info,
564 |   released == "yes", type == "model")
565 | 
566 | 
567 | mutate(product_stats, density = size / weight)
568 | 
569 | 
570 | arrange(product_stats, material, desc(size), desc(weight))
571 | 
572 | 
573 | product_info_tests <- left_join(product_info, product_tests, by = "id")
574 | product_info_tests
575 | 
576 | 
577 | summarize(group_by(product_info_tests, type, class),
578 |   mean_quality = mean(quality, na.rm = TRUE),
579 |   mean_durability = mean(durability, na.rm = TRUE))
580 | 
581 | 
582 | product_info %>%
583 |   filter(released == "yes") %>%
584 |   inner_join(product_tests, by = "id") %>%
585 |   group_by(type, class) %>%
586 |   summarize(
587 |     mean_quality = mean(quality, na.rm = TRUE),
588 |     mean_durability = mean(durability, na.rm = TRUE)) %>%
589 |   arrange(desc(mean_quality))
590 | 
591 | 
592 | d1 <- f1(d0, arg1)
593 | d2 <- f2(d1, arg2)
594 | d3 <- f3(d2, arg3)
595 | 
596 | 
597 | f3(f2(f1(d0, arg1), arg2), arg3)
598 | 
599 | 
600 | d0 %>%
601 |   f1(arg1) %>%
602 |   f2(arg2) %>%
603 |   f3(arg3)
604 | 
605 | 
606 | data(diamonds, package = "ggplot2")
607 | plot(density(diamonds$price, from = 0),
608 |   main = "Density plot of diamond prices")
609 | 
610 | 
611 | diamonds$price %>%
612 |   density(from = 0) %>%
613 |   plot(main = "Density plot of diamonds prices")
614 | 
615 | 
616 | models <- diamonds %>%
617 |   group_by(cut) %>%
618 |   do(lmod = lm(log(price) ~ carat, data = .))
619 | models
620 | 
621 | 
622 | models$lmod[[1]]
623 | 
624 | 
625 | toy_tests %>%
626 |   group_by(id) %>%
627 |   arrange(desc(sample)) %>%
628 |   do(head(., 3)) %>%
629 |   summarize(
630 |     quality = sum(quality * sample) / sum(sample), 
631 |     durability = sum(durability * sample) / sum(sample))
632 | 
633 | 
634 | toy_tests %>%
635 |   group_by(id) %>%
636 |   arrange(desc(sample))
637 | 
638 | 
639 | toy_tests %>%
640 |   group_by(id) %>%
641 |   arrange(desc(sample)) %>%
642 |   do(head(., 3))
643 | 
644 | 
645 | install.packages("rlist")
646 | 
647 | 
648 | library(rlist)
649 | 
650 | 
651 | products <- list.load("data/products.json")
652 | str(products[[1]])
653 | 
654 | 
655 | str(list.map(products, id))
656 | 
657 | 
658 | list.mapv(products, name)
659 | 
660 | 
661 | released_products <- list.filter(products, released)
662 | list.mapv(released_products, name)
663 | 
664 | 
665 | products %>%
666 |   list.filter(released) %>%
667 |   list.mapv(name)
668 | 
669 | 
670 | products %>%
671 |   list.filter(released, tests$waterproof) %>%
672 |   list.select(id, name, scores) %>%
673 |   str()
674 | 
675 | 
676 | products %>%
677 |   list.filter(mean(scores) >= 8) %>%
678 |   list.select(name, scores, mean_score = mean(scores)) %>%
679 |   str()
680 | 
681 | 
682 | products %>%
683 |   list.select(name, mean_score = mean(scores)) %>%
684 |   list.sort(-mean_score) %>%
685 |   list.stack()
686 | 
687 | 
688 | products %>%
689 |   list.select(name, type, released) %>%
690 |   list.group(type) %>%
691 |   str()
692 | 
693 | 
694 | products %>%
695 |   list.table(type, class)
696 | 
697 | 
698 | products %>%
699 |   list.filter(released) %>%
700 |   list.table(type, waterproof = tests$waterproof)
701 | 
702 | 
703 | products %>%
704 |   list.filter(length(scores) >= 5) %>%
705 |   list.sort(-mean(scores)) %>%
706 |   list.take(2) %>%
707 |   list.select(name, 
708 |     mean_score = mean(scores),
709 |     n_score = length(scores)) %>%
710 |   list.stack()
711 | 
712 | 
713 | 


--------------------------------------------------------------------------------
/Chapter 13/high-performance-computing.txt:
--------------------------------------------------------------------------------
  1 | x <- c(1, 2, 3, 4, 5)
  2 | y <- numeric()
  3 | sum_x <- 0
  4 | for (xi in x) {
  5 |   sum_x <- sum_x + xi
  6 |   y <- c(y, sum_x)
  7 | }
  8 | y
  9 | 
 10 | 
 11 | my_cumsum1 <- function(x) {
 12 |   y <- numeric()
 13 |   sum_x <- 0
 14 |   for (xi in x) {
 15 |     sum_x <- sum_x + xi
 16 |     y <- c(y, sum_x)
 17 |   }
 18 |   y
 19 | }
 20 | 
 21 | 
 22 | my_cumsum2 <- function(x) {
 23 |   y <- numeric(length(x))
 24 |   if (length(y)) {
 25 |     y[[1]] <- x[[1]]
 26 |     for (i in 2:length(x)) {
 27 |       y[[i]] <- y[[i-1]] + x[[i]]
 28 |     }
 29 |   }
 30 |   y
 31 | }
 32 | 
 33 | 
 34 | x <- rnorm(100)
 35 | all.equal(cumsum(x), my_cumsum1(x))
 36 | all.equal(cumsum(x), my_cumsum2(x))
 37 | 
 38 | 
 39 | x <- rnorm(100)
 40 | system.time(my_cumsum1(x))
 41 | 
 42 | 
 43 | system.time(my_cumsum2(x))
 44 | 
 45 | 
 46 | system.time(cumsum(x))
 47 | 
 48 | 
 49 | x <- rnorm(1000)
 50 | system.time(my_cumsum1(x))
 51 | system.time(my_cumsum2(x))
 52 | system.time(cumsum(x))
 53 | 
 54 | 
 55 | x <- rnorm(10000)
 56 | system.time(my_cumsum1(x))
 57 | system.time(my_cumsum2(x))
 58 | system.time(cumsum(x))
 59 | 
 60 | 
 61 | x <- rnorm(100000)
 62 | system.time(my_cumsum1(x))
 63 | system.time(my_cumsum2(x))
 64 | system.time(cumsum(x))
 65 | 
 66 | 
 67 | install.packages("microbenchmark")
 68 | 
 69 | 
 70 | library(microbenchmark)
 71 | x <- rnorm(100)
 72 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x))
 73 | 
 74 | 
 75 | x <- rnorm(1000)
 76 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x))
 77 | 
 78 | 
 79 | x <- rnorm(5000)
 80 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x))
 81 | 
 82 | 
 83 | x <- rnorm(10000)
 84 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x), times = 10)
 85 | 
 86 | 
 87 | library(data.table)
 88 | benchmark <- function(ns, times = 30) {
 89 |   results <- lapply(ns, function(n) {
 90 |     x <- rnorm(n)
 91 |     result <- microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x), 
 92 |       times = times, unit = "ms")
 93 |     data <- setDT(summary(result))
 94 |     data[, n := n]
 95 |     data
 96 |   })
 97 |   rbindlist(results)
 98 | }
 99 | 
100 | 
101 | benchmarks <- benchmark(seq(100, 3000, 100))
102 | 
103 | 
104 | library(ggplot2)
105 | ggplot(benchmarks, aes(x = n, color = expr)) +
106 |   ggtitle("Microbenchmark on cumsum functions") +
107 |   geom_point(aes(y = median)) +
108 |   geom_errorbar(aes(ymin = lq, ymax = uq))
109 | 
110 | 
111 | benchmarks2 <- benchmark(seq(2, 600, 10), times = 50)
112 | 
113 | 
114 | ggplot(benchmarks2, aes(x = n, color = expr)) +
115 |   ggtitle("Microbenchmark on cumsum functions over small input") +
116 |   geom_point(aes(y = median)) +
117 |   geom_errorbar(aes(ymin = lq, ymax = uq))
118 | 
119 | 
120 | benchmarks3 <- benchmark(seq(10, 800, 10), times = 50)
121 | ggplot(benchmarks3, aes(x = n, color = expr)) +
122 |   ggtitle("Microbenchmark on cumsum functions with break even") +
123 |   geom_point(aes(y = median)) +
124 |   geom_errorbar(aes(ymin = lq, ymax = uq))
125 | 
126 | 
127 | x <- rnorm(1000)
128 | tmp <- tempfile(fileext = ".out")
129 | Rprof(tmp)
130 | for (i in 1:1000) {
131 |   my_cumsum1(x)
132 | }
133 | Rprof(NULL)
134 | summaryRprof(tmp)
135 | 
136 | 
137 | tmp <- tempfile(fileext = ".out")
138 | Rprof(tmp)
139 | for (i in 1:1000) {
140 |   my_cumsum2(x)
141 | }
142 | Rprof(NULL)
143 | summaryRprof(tmp)
144 | 
145 | 
146 | my_cumsum1 <- function(x) {
147 |   y <- numeric()
148 |   sum_x <- 0
149 |   for (xi in x) {
150 |     sum_x <- sum_x + xi
151 |     y <- c(y, sum_x)
152 |   }
153 |   y
154 | }
155 | 
156 | x <- rnorm(1000)
157 | 
158 | for (i in 1:1000) {
159 |   my_cumsum1(x)
160 | }
161 | 
162 | 
163 | tmp <- tempfile(fileext = ".out")
164 | Rprof(tmp, line.profiling = TRUE)
165 | source("code/my_cumsum1.R", keep.source = TRUE)
166 | Rprof(NULL)
167 | summaryRprof(tmp, lines = "show")
168 | 
169 | 
170 | install.packages("profvis")
171 | 
172 | 
173 | library(profvis)
174 | profvis({
175 |   my_cumsum1 <- function(x) {
176 |     y <- numeric()
177 |     sum_x <- 0
178 |     for (xi in x) {
179 |       sum_x <- sum_x + xi
180 |       y <- c(y, sum_x)
181 |     }
182 |     y
183 |   }
184 |   
185 |   x <- rnorm(1000)
186 |   
187 |   for (i in 1:1000) {
188 |     my_cumsum1(x)
189 |   }
190 | })
191 | 
192 | 
193 | 
194 | 
195 | profvis({
196 |   my_cumsum2 <- function(x) {
197 |     y <- numeric(length(x))
198 |     y[[1]] <- x[[1]]
199 |     for (i in 2:length(x)) {
200 |       y[[i]] <- y[[i-1]] + x[[i]]
201 |     }
202 |     y
203 |   }
204 |   
205 |   x <- rnorm(1000)
206 |   
207 |   for (i in 1:1000) {
208 |     my_cumsum2(x)
209 |   }
210 | })
211 | 
212 | 
213 | 
214 | 
215 | n <- 10000
216 | microbenchmark(grow_by_index = {
217 |   x <- list()
218 |   for (i in 1:n) x[[i]] <- i
219 | }, preallocated = {
220 |   x <- vector("list", n)
221 |   for (i in 1:n) x[[i]] <- i
222 | }, times = 20)
223 | 
224 | 
225 | cumsum
226 | 
227 | 
228 | diff_for <- function(x) {
229 |   n <- length(x) - 1
230 |   res <- numeric(n)
231 |   for (i in seq_len(n)) {
232 |     res[[i]] <- x[[i + 1]] - x[[i]]
233 |   }
234 |   res
235 | }
236 | 
237 | 
238 | diff_for(c(2, 3, 1, 5))
239 | 
240 | 
241 | x <- rnorm(1000)
242 | all.equal(diff_for(x), diff(x))
243 | 
244 | 
245 | microbenchmark(diff_for(x), diff(x))
246 | 
247 | 
248 | mat <- matrix(1:12, nrow = 3)
249 | mat
250 | 
251 | 
252 | my_transpose <- function(x) {
253 |   stopifnot(is.matrix(x))
254 |   res <- matrix(vector(mode(x), length(x)), 
255 |     nrow = ncol(x), ncol = nrow(x), 
256 |     dimnames = dimnames(x)[c(2, 1)])
257 |   for (i in seq_len(ncol(x))) {
258 |     for (j in seq_len(nrow(x))) {
259 |       res[i, j] <- x[j, i]
260 |     }
261 |   }
262 |   res
263 | }
264 | 
265 | 
266 | my_transpose(mat)
267 | 
268 | 
269 | all.equal(my_transpose(mat), t(mat))
270 | 
271 | 
272 | microbenchmark(my_transpose(mat), t(mat))
273 | 
274 | 
275 | mat <- matrix(rnorm(25000), nrow = 1000)
276 | all.equal(my_transpose(mat), t(mat))
277 | microbenchmark(my_transpose(mat), t(mat))
278 | 
279 | 
280 | microbenchmark(my_transpose(mat), t(mat), t.default(mat))
281 | 
282 | 
283 | add <- function(x, y) {
284 |   stopifnot(length(x) == length(y),
285 |     is.numeric(x), is.numeric(y))
286 |   z <- numeric(length(x))
287 |   for (i in seq_along(x)) {
288 |     z[[i]] <- x[[i]] + y[[i]]
289 |   }
290 |   z
291 | }
292 | 
293 | 
294 | x <- rnorm(10000)
295 | y <- rnorm(10000)
296 | all.equal(add(x, y), x + y)
297 | 
298 | 
299 | microbenchmark(add(x, y), x + y)
300 | 
301 | 
302 | algo1_for <- function(n) {
303 |   res <- 0
304 |   for (i in seq_len(n)) {
305 |     res <- res + 1 / i ^ 2
306 |   }
307 |   res
308 | }
309 | 
310 | 
311 | algo1_vec <- function(n) {
312 |   sum(1 / seq_len(n) ^ 2)
313 | }
314 | 
315 | 
316 | algo1_for(10)
317 | 
318 | 
319 | algo1_vec(10)
320 | 
321 | 
322 | microbenchmark(algo1_for(200), algo1_vec(200))
323 | 
324 | 
325 | microbenchmark(algo1_for(1000), algo1_vec(1000))
326 | 
327 | 
328 | library(compiler)
329 | diff_cmp <- cmpfun(diff_for)
330 | diff_cmp
331 | 
332 | 
333 | x <- rnorm(10000)
334 | microbenchmark(diff_for(x), diff_cmp(x), diff(x))
335 | 
336 | 
337 | algo1_cmp <- cmpfun(algo1_for)
338 | algo1_cmp
339 | 
340 | 
341 | n <- 1000
342 | microbenchmark(algo1_for(n), algo1_cmp(n), algo1_vec(n))
343 | 
344 | 
345 | algo1_vec_cmp <- cmpfun(algo1_vec)
346 | microbenchmark(algo1_vec(n), algo1_vec_cmp(n), times = 10000)
347 | 
348 | 
349 | set.seed(1)
350 | sim_data <- 100 * cumprod(1 + rnorm(500, 0, 0.006))
351 | plot(sim_data, type = "s", ylim = c(85, 115),
352 |   main = "A simulated random path")
353 | abline(h = 100, lty = 2, col = "blue")
354 | abline(h = 100 * (1 + 0.1 * c(1, -1)), lty = 3, col = "red")
355 | 
356 | 
357 | simulate <- function(i, p = 100, n = 10000, 
358 |   r = 0, sigma = 0.0005, margin = 0.1) {
359 |   ps <- p * cumprod(1 + rnorm(n, r, sigma))
360 |   list(id = i, 
361 |     first = ps[[1]], 
362 |     high = max(ps), 
363 |     low = min(ps), 
364 |     last = ps[[n]],
365 |     signal = any(ps > p * (1 + margin) | ps < p * (1 - margin)))
366 | }
367 | 
368 | 
369 | simulate(1)
370 | 
371 | 
372 | system.time(res <- lapply(1:10000, simulate))
373 | 
374 | 
375 | library(data.table)
376 | res_table <- rbindlist(res)
377 | head(res_table)
378 | 
379 | 
380 | res_table[, sum(signal) / .N]
381 | 
382 | 
383 | library(parallel)
384 | cl <- makeCluster(detectCores())
385 | 
386 | 
387 | system.time(res <- parLapply(cl, 1:10000, simulate))
388 | 
389 | 
390 | stopCluster(cl)
391 | 
392 | 
393 | length(res)
394 | res_table <- rbindlist(res)
395 | res_table[, sum(signal) / .N]
396 | 
397 | 
398 | cl <- makeCluster(detectCores())
399 | n <- 1
400 | parLapply(cl, 1:3, function(x) x + n)
401 | stopCluster(cl)
402 | 
403 | 
404 | n <- 100
405 | data <- data.frame(id = 1:n,  x = rnorm(n), y = rnorm(n))
406 | 
407 | take_sample <- function(n) {
408 |   data[sample(seq_len(nrow(data)),
409 |     size = n, replace = FALSE), ]
410 | }
411 | 
412 | 
413 | cl <- makeCluster(detectCores())
414 | 
415 | 
416 | clusterEvalQ(cl, Sys.getpid())
417 | 
418 | 
419 | clusterEvalQ(cl, ls())
420 | 
421 | 
422 | clusterExport(cl, c("data", "take_sample"))
423 | clusterEvalQ(cl, ls())
424 | 
425 | 
426 | clusterEvalQ(cl, take_sample(2))
427 | 
428 | 
429 | invisible(clusterCall(cl, function() {
430 |   local_var <- 10
431 |   global_var <<- 100
432 | }))
433 | clusterEvalQ(cl, ls())
434 | 
435 | 
436 | clusterExport(cl, "simulate")
437 | invisible(clusterEvalQ(cl, {
438 |   library(data.table)
439 | }))
440 | res <- parLapply(cl, 1:3, function(i) {
441 |   res_table <- rbindlist(lapply(1:1000, simulate))
442 |   res_table[, id := NULL]
443 |   summary(res_table)
444 | })
445 | 
446 | 
447 | res
448 | 
449 | 
450 | stopCluster(cl)
451 | 
452 | 
453 | system.time(res <- mclapply(1:10000, simulate, 
454 |   mc.cores = detectCores()))
455 | 
456 | 
457 | mclapply(1:3, take_sample, mc.cores = detectCores())
458 | 
459 | 
460 | job1 <- mcparallel(rnorm(10), "job1")
461 | 
462 | 
463 | mccollect(job1)
464 | 
465 | 
466 | jobs <- lapply(1:8, function(i) {
467 |   mcparallel({
468 |     t <- rbinom(1, 5, 0.6)
469 |     Sys.sleep(t)
470 |     t
471 |   }, paste0("job", i))
472 | })
473 | system.time(res <- mccollect(jobs))
474 | 
475 | 
476 | install.packages("Rcpp")
477 | 
478 | 
479 | #include <Rcpp.h>
480 | using namespace Rcpp;
481 | 
482 | // [[Rcpp::export]]
483 | NumericVector timesTwo(NumericVector x) {
484 |   return x * 2;
485 | }
486 | 
487 | 
488 | Rcpp::sourceCpp("code/rcpp-demo.cpp")
489 | 
490 | 
491 | timesTwo
492 | 
493 | 
494 | timesTwo(10)
495 | 
496 | 
497 | timesTwo(c(1, 2, 3))
498 | 
499 | 
500 | #include <Rcpp.h>
501 | using namespace Rcpp;
502 | 
503 | // [[Rcpp::export]]
504 | double algo1_cpp(int n) {
505 |   double res = 0;
506 |   for (double i = 1; i < n; i++) {
507 |     res += 1 / (i * i);
508 |   }
509 |   return res;
510 | }
511 | 
512 | 
513 | Rcpp::sourceCpp("code/rcpp-algo1.cpp")
514 | 
515 | 
516 | algo1_cpp(10)
517 | 
518 | 
519 | algo1_cpp(c(10, 15))
520 | 
521 | 
522 | n <- 1000
523 | microbenchmark(
524 |   algo1_for(n), 
525 |   algo1_cmp(n), 
526 |   algo1_vec(n), 
527 |   algo1_cpp(n))
528 | 
529 | 
530 | #include <Rcpp.h>
531 | using namespace Rcpp;
532 | 
533 | // [[Rcpp::export]]
534 | NumericVector diff_cpp(NumericVector x) {
535 |   NumericVector res(x.size() - 1);
536 |   for (int i = 0; i < x.size() - 1; i++) {
537 |     res[i] = x[i + 1] - x[i];
538 |   }
539 |   return res;
540 | }
541 | 
542 | 
543 | Rcpp::sourceCpp("code/rcpp-diff.cpp")
544 | 
545 | 
546 | diff_cpp(c(1, 2, 3, 5))
547 | 
548 | 
549 | x <- rnorm(1000)
550 | microbenchmark(
551 |   diff_for(x), 
552 |   diff_cmp(x), 
553 |   diff(x), 
554 |   diff.default(x), 
555 |   diff_cpp(x))
556 | 
557 | 
558 | // [[Rcpp::plugins(openmp)]]
559 | #include <omp.h>
560 | #include <Rcpp.h>
561 | using namespace Rcpp;
562 | 
563 | // [[Rcpp::export]]
564 | NumericVector diff_cpp_omp(NumericVector x) {
565 |   omp_set_num_threads(3);
566 |   NumericVector res(x.size() - 1);
567 | #pragma omp parallel for 
568 |   for (int i = 0; i < x.size() - 1; i++) {
569 |     res[i] = x[i + 1] - x[i];
570 |   }
571 |   return res;
572 | }
573 | 
574 | 
575 | Rcpp::sourceCpp("code/rcpp-diff-openmp.cpp")
576 | 
577 | 
578 | diff_cpp_omp(c(1, 2, 4, 8))
579 | 
580 | 
581 | x <- rnorm(1000)
582 | microbenchmark(
583 |   diff_for(x), 
584 |   diff_cmp(x), 
585 |   diff(x), 
586 |   diff.default(x), 
587 |   diff_cpp(x),
588 |   diff_cpp_omp(x))
589 | 
590 | 
591 | x <- rnorm(100000)
592 | microbenchmark(
593 |   diff_for(x), 
594 |   diff_cmp(x), 
595 |   diff(x), 
596 |   diff.default(x), 
597 |   diff_cpp(x),
598 |   diff_cpp_omp(x))
599 | 
600 | 
601 | // [[Rcpp::plugins(cpp11)]]
602 | // [[Rcpp::depends(RcppParallel)]]
603 | #include <Rcpp.h>
604 | #include <RcppParallel.h>
605 | 
606 | using namespace Rcpp;
607 | using namespace RcppParallel;
608 | 
609 | struct Transformer : public Worker {
610 |   const RMatrix<double> input;
611 |   RMatrix<double> output;
612 |   Transformer(const NumericMatrix input, NumericMatrix output)
613 |     : input(input), output(output) {}
614 |   void operator()(std::size_t begin, std::size_t end) {
615 |     std::transform(input.begin() + begin, input.begin() + end,
616 |       output.begin() + begin, [](double x) {
617 |         return 1 / (1 + x * x);
618 |       });
619 |   }
620 | };
621 | 
622 | // [[Rcpp::export]]
623 | NumericMatrix par_transform (NumericMatrix x) {
624 |   NumericMatrix output(x.nrow(), x.ncol());
625 |   Transformer transformer(x, output);
626 |   parallelFor(0, x.length(), transformer);
627 |   return output;
628 | }
629 | 
630 | 
631 | Rcpp::sourceCpp("code/rcpp-parallel.cpp")
632 | 
633 | 
634 | mat <- matrix(1:12, nrow = 3)
635 | mat
636 | 
637 | 
638 | par_transform(mat)
639 | 
640 | 
641 | all.equal(par_transform(mat), 1 / (1 + mat ^ 2))
642 | 
643 | 
644 | mat <- matrix(rnorm(1000 * 2000), nrow = 1000)
645 | 
646 | 
647 | microbenchmark(1 / (1 + mat ^ 2), par_transform(mat))
648 | 
649 | 
650 | 


--------------------------------------------------------------------------------
/Chapter 13/my_cumsum1.R:
--------------------------------------------------------------------------------
 1 | my_cumsum1 <- function(x) {
 2 |   y <- numeric()
 3 |   sum_x <- 0
 4 |   for (xi in x) {
 5 |     sum_x <- sum_x + xi
 6 |     y <- c(y, sum_x)
 7 |   }
 8 |   y
 9 | }
10 | 
11 | x <- rnorm(1000)
12 | 
13 | for (i in 1:1000) {
14 |   my_cumsum1(x)
15 | }
16 | 


--------------------------------------------------------------------------------
/Chapter 13/rcpp-algo1.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | double algo1_cpp(int n) {
 6 |   double res = 0;
 7 |   for (double i = 1; i < n; i++) {
 8 |     res += 1 / (i * i);
 9 |   }
10 |   return res;
11 | }
12 | 


--------------------------------------------------------------------------------
/Chapter 13/rcpp-demo.cpp:
--------------------------------------------------------------------------------
1 | #include <Rcpp.h>
2 | using namespace Rcpp;
3 | 
4 | // [[Rcpp::export]]
5 | NumericVector timesTwo(NumericVector x) {
6 |   return x * 2;
7 | }
8 | 


--------------------------------------------------------------------------------
/Chapter 13/rcpp-diff-openmp.cpp:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::plugins(openmp)]]
 2 | #include <omp.h>
 3 | #include <Rcpp.h>
 4 | using namespace Rcpp;
 5 | 
 6 | // [[Rcpp::export]]
 7 | NumericVector diff_cpp_omp(NumericVector x) {
 8 |   omp_set_num_threads(3);
 9 |   NumericVector res(x.size() - 1);
10 | #pragma omp parallel for 
11 |   for (int i = 0; i < x.size() - 1; i++) {
12 |     res[i] = x[i + 1] - x[i];
13 |   }
14 |   return res;
15 | }
16 | 


--------------------------------------------------------------------------------
/Chapter 13/rcpp-diff.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // [[Rcpp::export]]
 5 | NumericVector diff_cpp(NumericVector x) {
 6 |   NumericVector res(x.size() - 1);
 7 |   for (int i = 0; i < x.size() - 1; i++) {
 8 |     res[i] = x[i + 1] - x[i];
 9 |   }
10 |   return res;
11 | }
12 | 


--------------------------------------------------------------------------------
/Chapter 13/rcpp-parallel.cpp:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::plugins(cpp11)]]
 2 | // [[Rcpp::depends(RcppParallel)]]
 3 | #include <Rcpp.h>
 4 | #include <RcppParallel.h>
 5 | 
 6 | using namespace Rcpp;
 7 | using namespace RcppParallel;
 8 | 
 9 | struct Transformer : public Worker {
10 |   const RMatrix<double> input;
11 |   RMatrix<double> output;
12 |   Transformer(const NumericMatrix input, NumericMatrix output)
13 |     : input(input), output(output) {}
14 |   void operator()(std::size_t begin, std::size_t end) {
15 |     std::transform(input.begin() + begin, input.begin() + end,
16 |       output.begin() + begin, [](double x) {
17 |         return 1 / (1 + x * x);
18 |       });
19 |   }
20 | };
21 | 
22 | // [[Rcpp::export]]
23 | NumericMatrix par_transform (NumericMatrix x) {
24 |   NumericMatrix output(x.nrow(), x.ncol());
25 |   Transformer transformer(x, output);
26 |   parallelFor(0, x.length(), transformer);
27 |   return output;
28 | }
29 | 


--------------------------------------------------------------------------------
/Chapter 14/data/new-products.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>New Products</title>
 5 |   <style>
 6 |     p {
 7 |       margin: 2px;
 8 |     }
 9 |     .product-list {
10 |       width: 80%;
11 |     }
12 |     .product-list ul {
13 |       padding-left: 8px;
14 |     }
15 |     .product-list ul li {
16 |       padding-left: 2px;
17 |       list-style: none;
18 |     }
19 |     .product-list > ul > li {
20 |       margin-top: 16px;
21 |       list-style: none;
22 |     }
23 |     .product-list .name {
24 |       font-weight: bold;
25 |       font-size: 1.25em;
26 |     }
27 |     .product-list .price {
28 |       color: green;
29 |     }
30 |     .product-list .info {
31 |       background-color: #efefef;
32 |       border-radius: 4px;
33 |     }
34 |     .info-key {
35 |       font-weight: bold;
36 |     }
37 |     .info-value {
38 |       
39 |     }
40 |     .unit {
41 |       padding-left: 4px;
42 |       color: #818181;
43 |     }
44 |     .bordered {
45 |       border: 1px gray dashed;
46 |       border-radius: 4px;
47 |     }
48 |   </style>
49 | </head>
50 | <body>
51 |   <h1>New Products</h1>
52 |   <p>The following is a list of products</p>
53 |   <div id="list" class="product-list">
54 |     <ul>
55 |       <li>
56 |         <span class="name">Product-A</span>
57 |         <span class="price">$199.95</span>
58 |         <div class="info bordered">
59 |           <p>Description for Product-A</p>
60 |           <ul>
61 |             <li><span class="info-key">Quality</span> <span class="info-value">Good</span></li>
62 |             <li><span class="info-key">Duration</span> <span class="info-value">5</span><span class="unit">years</span></li>
63 |           </ul>
64 |         </div>
65 |       </li>
66 |       <li class="selected">
67 |         <span class="name">Product-B</span>
68 |         <span class="price">$129.95</span>
69 |         <div class="info">
70 |           <p>Description for Product-B</p>
71 |           <ul>
72 |             <li><span class="info-key">Quality</span> <span class="info-value">Medium</span></li>
73 |             <li><span class="info-key">Duration</span> <span class="info-value">2</span><span class="unit">years</span></li>
74 |           </ul>
75 |         </div>
76 |       </li>
77 |       <li>
78 |         <span class="name">Product-C</span>
79 |         <span class="price">$99.95</span>
80 |         <div class="info">
81 |           <p>Description for Product-C</p>
82 |           <ul>
83 |             <li><span class="info-key">Quality</span> <span class="info-value">Good</span></li>
84 |             <li><span class="info-key">Duration</span> <span class="info-value">4</span><span class="unit">years</span></li>
85 |           </ul>
86 |         </div>
87 |       </li>
88 |     </ul>
89 |   </div>
90 |   <p>All products are available for sale!</p>
91 | </body>
92 | </html>
93 | 


--------------------------------------------------------------------------------
/Chapter 14/data/products.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Products</title>
 5 |   <style>
 6 |     h1 {
 7 |       color: darkblue;
 8 |     }
 9 |     .product-list {
10 |       width: 50px;
11 |     }
12 |     .product-list li.selected .name {
13 |       color: blue;
14 |     }
15 |     .product-list .name {
16 |       font-weight: bold;
17 |     }
18 |     .product-list .price {
19 |       color: green;
20 |     }
21 |   </style>
22 | </head>
23 | <body>
24 |   <h1>Products</h1>
25 |   <p>The following lists some products</p>
26 |   <div id="table1" class="product-list">
27 |     <ul>
28 |       <li>
29 |         <span class="name">Product-A</span>
30 |         <span class="price">$199.95</span>
31 |       </li>
32 |       <li class="selected">
33 |         <span class="name">Product-B</span>
34 |         <span class="price">$129.95</span>
35 |       </li>
36 |       <li>
37 |         <span class="name">Product-C</span>
38 |         <span class="price">$99.95</span>
39 |       </li>
40 |     </ul>
41 |   </div>
42 | </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/Chapter 14/data/simple-page.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Simple page</title>
 5 | </head>
 6 | <body>
 7 |   <h1>Heading 1</h1>
 8 |   <p>This is a paragraph.</p>
 9 | </body>
10 | </html>
11 | 


--------------------------------------------------------------------------------
/Chapter 14/data/simple-products.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Products</title>
 5 | </head>
 6 | <body>
 7 |   <h1 style="color: blue;">Products</h1>
 8 |   <p>The following lists some products</p>
 9 |   <div id="table1" style="width: 50px;">
10 |     <ul>
11 |       <li>
12 |         <span style="font-weight: bold;">Product-A</span>
13 |         <span style="color: green;">$199.95</span>
14 |       </li>
15 |       <li>
16 |         <span style="font-weight: bold;">Product-B</span>
17 |         <span style="color: green;">$129.95</span>
18 |       </li>
19 |       <li>
20 |         <span style="font-weight: bold;">Product-C</span>
21 |         <span style="color: green;">$99.95</span>
22 |       </li>
23 |     </ul>
24 |   </div>
25 | </body>
26 | </html>
27 | 


--------------------------------------------------------------------------------
/Chapter 14/data/single-table.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <title>Single table</title>
 5 | </head>
 6 | <body>
 7 |   <p>The following is a table</p>
 8 |   <table id="table1" border="1">
 9 |     <thead>
10 |       <tr>
11 |         <th>Name</th>
12 |         <th>Age</th>
13 |       </tr>
14 |     </thead>
15 |     <tbody>
16 |       <tr>
17 |         <td>Jenny</td>
18 |         <td>18</td>
19 |       </tr>
20 |       <tr>
21 |         <td>James</td>
22 |         <td>19</td>
23 |       </tr>
24 |     </tbody>
25 |   </table>
26 | </body>
27 | </html>
28 | 


--------------------------------------------------------------------------------
/Chapter 14/web-scrapping.R:
--------------------------------------------------------------------------------
  1 | install.packages("rvest")
  2 | 
  3 | 
  4 | library(rvest)
  5 | single_table_page <- read_html("data/single-table.html")
  6 | single_table_page
  7 | 
  8 | 
  9 | html_table(single_table_page)
 10 | 
 11 | 
 12 | html_table(html_node(single_table_page, "table"))
 13 | 
 14 | 
 15 | single_table_page %>%
 16 |   html_node("table") %>%
 17 |   html_table()
 18 | 
 19 | 
 20 | products_page <- read_html("data/products.html")
 21 | products_page %>%
 22 |   html_nodes(".product-list li .name")
 23 | 
 24 | 
 25 | products_page %>%
 26 |   html_nodes(".product-list li .name") %>%
 27 |   html_text()
 28 | 
 29 | 
 30 | products_page %>%
 31 |   html_nodes(".product-list li .price") %>%
 32 |   html_text()
 33 | 
 34 | 
 35 | product_items <- products_page %>%
 36 |   html_nodes(".product-list li")
 37 | products <- data.frame(
 38 |   name = product_items %>%
 39 |     html_nodes(".name") %>%
 40 |     html_text(),
 41 |   price = product_items %>%
 42 |     html_nodes(".price") %>%
 43 |     html_text() %>%
 44 |     gsub("$", "", ., fixed = TRUE) %>%
 45 |     as.numeric(),
 46 |   stringsAsFactors = FALSE
 47 | )
 48 | products
 49 | 
 50 | 
 51 | 
 52 | 
 53 | page <- read_html("data/new-products.html")
 54 | 
 55 | 
 56 | page %>% html_nodes(xpath = "//p")
 57 | 
 58 | 
 59 | page %>% html_nodes(xpath = "//li[@class]")
 60 | 
 61 | 
 62 | page %>% html_nodes(xpath = "//div[@id='list']/ul/li")
 63 | 
 64 | 
 65 | page %>% html_nodes(xpath = "//div[@id='list']//li/span[@class='name']")
 66 | 
 67 | 
 68 | page %>%
 69 |   html_nodes(xpath = "//li[@class='selected']/span[@class='name']")
 70 | 
 71 | 
 72 | page %>% html_nodes(xpath = "//div[p]")
 73 | 
 74 | 
 75 | page %>% 
 76 |   html_nodes(xpath = "//span[@class='info-value' and text()='Good']")
 77 | 
 78 | 
 79 | page %>%
 80 |   html_nodes(xpath = "//li[div/ul/li[1]/span[@class='info-value' and text()='Good']]/span[@class='name']")
 81 | 
 82 | 
 83 | page %>%
 84 |   html_nodes(xpath = "//li[div/ul/li[2]/span[@class='info-value' and text()>3]]/span[@class='name']")
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | page <- read_html("https://cran.rstudio.com/web/packages/available_packages_by_name.html")
 94 | pkg_table <- page %>%
 95 |   html_node("table") %>% 
 96 |   html_table(fill = TRUE)
 97 | head(pkg_table, 5)
 98 | 
 99 | 
100 | pkg_table <- pkg_table[complete.cases(pkg_table), ]
101 | colnames(pkg_table) <- c("name", "title")
102 | head(pkg_table, 3)
103 | 
104 | 
105 | 
106 | 
107 | page <- read_html("https://finance.yahoo.com/quote/MSFT")
108 | page %>%
109 |   html_node("div#quote-header-info > section > span") %>%
110 |   html_text() %>%
111 |   as.numeric()
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | page %>%
119 |   html_node("#key-statistics table") %>%
120 |   html_table()
121 | 
122 | 
123 | get_price <- function(symbol) {
124 |   page <- read_html(sprintf("https://finance.yahoo.com/quote/%s", symbol))
125 |   list(symbol = symbol,
126 |     company = page %>% 
127 |       html_node("div#quote-header-info > div:nth-child(1) > h6") %>%
128 |       html_text(),
129 |     price = page %>% 
130 |       html_node("div#quote-header-info > section > span:nth-child(1)") %>%
131 |       html_text() %>%
132 |       as.numeric())
133 | }
134 | 
135 | 
136 | get_price("AAPL")
137 | 
138 | 
139 | 
140 | 
141 | page <- read_html("https://stackoverflow.com/questions/tagged/r?sort=votes&pageSize=5")
142 | questions <- page %>% 
143 |   html_node("#questions")
144 | 
145 | 
146 | 
147 | 
148 | questions %>%
149 |   html_nodes(".summary h3") %>%
150 |   html_text()
151 | 
152 | 
153 | questions %>%
154 |   html_nodes(".question-hyperlink") %>%
155 |   html_text()
156 | 
157 | 
158 | 
159 | 
160 | questions %>%
161 |   html_nodes(".question-summary .vote-count-post") %>%
162 |   html_text() %>%
163 |   as.integer()
164 | 
165 | 
166 | questions %>%
167 |   html_nodes(".question-summary .status strong") %>%
168 |   html_text() %>%
169 |   as.integer()
170 | 
171 | 
172 | questions %>%
173 |   html_nodes(".question-summary .tags") %>%
174 |   lapply(function(node) {
175 |     node %>%
176 |       html_nodes(".post-tag") %>%
177 |       html_text()
178 |   }) %>%
179 |   str
180 | 
181 | 
182 | 
183 | 
184 | questions %>%
185 |   html_nodes(".question-hyperlink") %>%
186 |   html_attr("href") %>%
187 |   lapply(function(link) {
188 |     paste0("https://stackoverflow.com", link) %>%
189 |       read_html() %>%
190 |       html_node("#qinfo") %>%
191 |       html_table() %>%
192 |       setNames(c("item", "value"))
193 |   })
194 | 


--------------------------------------------------------------------------------
/Chapter 15/boosting-productivity.R:
--------------------------------------------------------------------------------
  1 | n <- 100
  2 | x <- rnorm(n)
  3 | y <- 2 * x + rnorm(n)
  4 | m <- lm(y ~ x)
  5 | coef(m)
  6 | 
  7 | toys <- data.frame(
  8 |   id = 1:3,
  9 |   name = c("Car", "Plane", "Motocycle"),
 10 |   price = c(15, 25, 14),
 11 |   share = c(0.3, 0.1, 0.2),
 12 |   stringsAsFactors = FALSE
 13 | )
 14 | 
 15 | 
 16 | toys
 17 | 
 18 | 
 19 | knitr::kable(toys)
 20 | 
 21 | 
 22 | xtable::xtable(lm(mpg ~ cyl + vs, data = mtcars))
 23 | 
 24 | 
 25 | 
 26 | 
 27 | library(formattable)
 28 | formattable(toys, 
 29 |   list(price = color_bar("lightpink"), share = percent))
 30 | 
 31 | 
 32 | 
 33 | 
 34 | library(DT)
 35 | datatable(mtcars)
 36 | 
 37 | 
 38 | 
 39 | 
 40 | set.seed(123)
 41 | x <- rnorm(1000)
 42 | y <- 2 * x + rnorm(1000)
 43 | m <- lm(y ~ x)
 44 | plot(x, y, main = "Linear regression", col = "darkgray")
 45 | abline(coef(m))
 46 | 
 47 | 
 48 | library(DiagrammeR)
 49 | grViz("
 50 | digraph rmarkdown {
 51 |   A -> B;
 52 |   B -> C;
 53 |   C -> A;
 54 | }")
 55 | 
 56 | 
 57 | 
 58 | 
 59 | library(ggvis)
 60 | mtcars %>% 
 61 |   ggvis(~mpg, ~disp, opacity := 0.6) %>% 
 62 |   layer_points(size := input_slider(1, 100, value = 50, label = "size")) %>%
 63 |   layer_smooths(span = input_slider(0.5, 1, value = 1, label = "span"))
 64 | 
 65 | 
 66 | 
 67 | 
 68 | library(dygraphs)
 69 | library(xts)
 70 | library(dplyr)
 71 | library(reshape2)
 72 | data(weather, package = "nycflights13")
 73 | temp <- weather %>%
 74 |   group_by(origin, year, month, day) %>%
 75 |   summarize(temp = mean(temp)) %>%
 76 |   ungroup() %>%
 77 |   mutate(date = as.Date(sprintf("%d-%02d-%02d", year, month, day))) %>%
 78 |   select(origin, date, temp) %>%
 79 |   dcast(date ~ origin, value.var = "temp")
 80 | 
 81 | temp_xts <- as.xts(temp[-1], order.by = temp[[1]])
 82 | head(temp_xts)
 83 | 
 84 | 
 85 | dygraph(temp_xts, main = "Airport Temperature") %>%
 86 |   dyRangeSelector() %>%
 87 |   dyHighlight(highlightCircleSize = 3, 
 88 |     highlightSeriesBackgroundAlpha = 0.3,
 89 |     hideOnMouseOut = FALSE)
 90 | 
 91 | 
 92 | 
 93 | 
 94 | library(shiny)
 95 | 
 96 | ui <- bootstrapPage(
 97 |   numericInput("n", label = "Sample size", value = 10, min = 10, max = 100),
 98 |   textOutput("mean")
 99 | )
100 | 
101 | server <- function(input, output) {
102 |   output$mean <- renderText(mean(rnorm(input$n)))
103 | }
104 | 
105 | app <- shinyApp(ui, server)
106 | runApp(app)
107 | 
108 | 
109 | 
110 | 
111 | shiny_vars <- ls(getNamespace("shiny"))
112 | shiny_vars[grep("Input$", shiny_vars)]
113 | 
114 | 
115 | shiny_vars[grep("Output$", shiny_vars)]
116 | 
117 | 
118 | library(shiny)
119 | ui <- fluidPage(
120 |   titlePanel("Random walk"),
121 |   sidebarLayout(
122 |     sidebarPanel(
123 |       numericInput("seed", "Random seed", 123),
124 |       sliderInput("paths", "Paths", 1, 100, 1),
125 |       sliderInput("start", "Starting value", 1, 10, 1, 1),
126 |       sliderInput("r", "Expected return", -0.1, 0.1, 0, 0.001),
127 |       sliderInput("sigma", "Sigma", 0.001, 1, 0.01, 0.001),
128 |       sliderInput("periods", "Periods", 10, 1000, 200, 10)),
129 |   mainPanel(
130 |     plotOutput("plot", width = "100%", height = "600px")
131 |   ))
132 | )
133 | 
134 | 
135 | shiny_vars[grep("^render", shiny_vars)]
136 | 
137 | 
138 | server <- function(input, output) {
139 |   output$plot <- renderPlot({
140 |     set.seed(input$seed)
141 |     mat <- sapply(seq_len(input$paths), function(i) {
142 |       sde::GBM(input$start, input$r, input$sigma, 1, input$periods)
143 |     })
144 |     matplot(mat, type = "l", lty = 1,
145 |       main = "Geometric Brownian motions")
146 |   })
147 | }
148 | 
149 | 
150 | app <- shinyApp(ui, server)
151 | runApp(app)
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | install_packages(c("shinydashboard", "cranlogs"))
159 | 
160 | 
161 | library(cranlogs)
162 | cran_top_downloads()
163 | cran_top_downloads("last-week")
164 | 
165 | 
166 | library(shiny)
167 | library(shinydashboard)
168 | library(formattable)
169 | library(cranlogs)
170 | 
171 | ui <- dashboardPage(
172 |   dashboardHeader(title = "CRAN Downloads"),
173 |   dashboardSidebar(sidebarMenu(
174 |     menuItem("Last week", tabName = "last_week", icon = icon("list")),
175 |     menuItem("Last month", tabName = "last_month", icon = icon("list"))
176 |   )),
177 |   dashboardBody(tabItems(
178 |     tabItem(tabName = "last_week", 
179 |       fluidRow(tabBox(title = "Total downloads",
180 |         tabPanel("Total", formattableOutput("last_week_table"))),
181 |         tabBox(title = "Top downloads",
182 |           tabPanel("Top", formattableOutput("last_week_top_table"))))),
183 |     tabItem(tabName = "last_month", 
184 |       fluidRow(tabBox(title = "Total downloads",
185 |         tabPanel("Total", plotOutput("last_month_barplot"))),
186 |         tabBox(title = "Top downloads",
187 |           tabPanel("Top", formattableOutput("last_month_top_table")))))
188 |   ))
189 | )
190 | 
191 | 
192 | server <- function(input, output) {
193 |   output$last_week_table <- renderFormattable({
194 |     data <- cran_downloads(when = "last-week")
195 |     formattable(data, list(count = color_bar("lightblue")))
196 |   })
197 |   
198 |   output$last_week_top_table <- renderFormattable({
199 |     data <- cran_top_downloads("last-week")
200 |     formattable(data, list(count = color_bar("lightblue"),
201 |       package = formatter("span", style = "font-family: monospace;")))
202 |   })
203 |   
204 |   output$last_month_barplot <- renderPlot({
205 |     data <- subset(cran_downloads(when = "last-month"), count > 0)
206 |     with(data, barplot(count, names.arg = date),
207 |       main = "Last month downloads")
208 |   })
209 |   
210 |   output$last_month_top_table <- renderFormattable({
211 |     data <- cran_top_downloads("last-month")
212 |     formattable(data, list(count = color_bar("lightblue"),
213 |       package = formatter("span", style = "font-family: monospace;")))
214 |   })
215 | }
216 | 
217 | 
218 | runApp(shinyApp(ui, server))
219 | 


--------------------------------------------------------------------------------
/Chapter 2/basic-objects.R:
--------------------------------------------------------------------------------
  1 | 1.5
  2 | 
  3 | 
  4 | x <- 1.5
  5 | 
  6 | 
  7 | x
  8 | 
  9 | 
 10 | numeric(10)
 11 | 
 12 | 
 13 | c(1, 2, 3, 4, 5)
 14 | 
 15 | 
 16 | c(1, 2, c(3, 4, 5))
 17 | 
 18 | 
 19 | 1:5
 20 | 
 21 | 
 22 | seq(1, 10, 2)
 23 | 
 24 | 
 25 | seq(3, length.out = 10)
 26 | 
 27 | 
 28 | 1 + 1:5
 29 | 
 30 | 
 31 | TRUE
 32 | 
 33 | 
 34 | 1 > 2
 35 | 
 36 | 
 37 | c(1, 2) > 2
 38 | 
 39 | 
 40 | c(1, 2) > c(2, 1)
 41 | 
 42 | 
 43 | c(2, 3) > c(1, 2, -1, 3)
 44 | 
 45 | 
 46 | 1 %in% c(1, 2, 3)
 47 | c(1, 4) %in% c(1, 2, 3)
 48 | 
 49 | 
 50 | "hello, world!"
 51 | 
 52 | 
 53 | 'hello, world!'
 54 | 
 55 | 
 56 | c("Hello", "World")
 57 | 
 58 | 
 59 | c("Hello", "World") == c('Hello', 'World')
 60 | 
 61 | 
 62 | c("Hello", "World") == "Hello, World"
 63 | 
 64 | 
 65 | cat("Is \"You\" a Chinese name?")
 66 | 
 67 | 
 68 | cat('Is "You" a Chinese name?')
 69 | 
 70 | 
 71 | v1 <- c(1, 2, 3, 4)
 72 | 
 73 | 
 74 | v1[2]
 75 | 
 76 | 
 77 | v1[2:4]
 78 | 
 79 | 
 80 | v1[-3]
 81 | 
 82 | 
 83 | a <- c(1, 3)
 84 | v1[a]
 85 | 
 86 | 
 87 | v1[c(1, 2, -3)]
 88 | 
 89 | 
 90 | v1[3:6]
 91 | 
 92 | 
 93 | v1[c(TRUE, FALSE, TRUE, FALSE)]
 94 | 
 95 | 
 96 | v1[2] <- 0
 97 | 
 98 | 
 99 | v1
100 | 
101 | 
102 | v1[2:4] <- c(0, 1, 3)
103 | 
104 | 
105 | v1
106 | 
107 | 
108 | v1[c(TRUE, FALSE, TRUE, FALSE)] <- c(3, 2)
109 | 
110 | 
111 | v1
112 | 
113 | 
114 | v1[v1 <= 2]
115 | 
116 | 
117 | v1[v1 ^ 2 - v1 + 1 >= 0]
118 | 
119 | 
120 | v1[v1 <= 2] <- 0
121 | 
122 | 
123 | v1
124 | 
125 | 
126 | v1[10] <- 8
127 | v1
128 | 
129 | 
130 | x <- c(a = 1, b = 2, c = 3)
131 | x
132 | 
133 | 
134 | x["a"]
135 | 
136 | 
137 | x[c("a", "c")]
138 | 
139 | 
140 | x[c("a", "a", "c")]
141 | 
142 | 
143 | names(x)
144 | 
145 | 
146 | names(x) <- c("x", "y", "z")
147 | x["z"]
148 | 
149 | 
150 | names(x) <- NULL
151 | x
152 | 
153 | 
154 | x <- c(a = 1, b = 2, c = 3)
155 | x["d"]
156 | 
157 | 
158 | names(x["d"])
159 | 
160 | 
161 | x[c("a", "d")]
162 | 
163 | 
164 | x <- c(a = 1, b = 2, c = 3)
165 | x["a"]
166 | x[["a"]]
167 | 
168 | 
169 | x[[c(1, 2)]]
170 | 
171 | 
172 | x[[-1]]
173 | 
174 | 
175 | x[["d"]]
176 | 
177 | 
178 | class(c(1, 2, 3))
179 | class(c(TRUE, TRUE, FALSE))
180 | class(c("Hello", "World"))
181 | 
182 | 
183 | is.numeric(c(1, 2, 3))
184 | is.numeric(c(TRUE, TRUE, FALSE))
185 | is.numeric(c("Hello", "World"))
186 | 
187 | 
188 | strings <- c("1", "2", "3")
189 | class(strings)
190 | 
191 | 
192 | strings + 10
193 | 
194 | 
195 | numbers <- as.numeric(strings)
196 | numbers
197 | class(numbers)
198 | 
199 | 
200 | numbers + 10
201 | 
202 | 
203 | as.numeric(c("1", "2", "3", "a"))
204 | as.logical(c(-1, 0, 1, 2))
205 | as.character(c(1, 2, 3))
206 | as.character(c(TRUE, FALSE))
207 | 
208 | 
209 | as.character(c(1, 2)) + c(2, 3)
210 | 
211 | 
212 | c(1, 2, 3, 4) + 2
213 | c(1, 2, 3) - c(2, 3, 4)
214 | c(1, 2, 3) * c(2, 3, 4)
215 | c(1, 2, 3) / c(2, 3, 4)
216 | c(1, 2, 3) ^ 2
217 | c(1, 2, 3) ^ c(2, 3, 4)
218 | c(1, 2, 3, 14) %% 2
219 | 
220 | 
221 | c(a = 1, b = 2, c = 3) + c(b = 2, c = 3, d = 4)
222 | c(a = 1, b = 2, 3) + c(b = 2, c = 3, d = 4)
223 | 
224 | 
225 | matrix(c(1, 2, 3, 2, 3, 4, 3, 4, 5), ncol = 3)
226 | 
227 | 
228 | matrix(c(1, 2, 3, 
229 |          4, 5, 6, 
230 |          7, 8, 9), nrow = 3, byrow = FALSE)
231 | 
232 | matrix(c(1, 2, 3, 
233 |          4, 5, 6, 
234 |          7, 8, 9), nrow = 3, byrow = TRUE)
235 | 
236 | 
237 | diag(1, nrow = 5)
238 | 
239 | 
240 | matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), nrow = 3, byrow = TRUE,
241 |   dimnames = list(c("r1", "r2", "r3"), c("c1", "c2", "c3")))
242 | 
243 | 
244 | m1 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
245 | rownames(m1) <- c("r1", "r2", "r3")
246 | colnames(m1) <- c("c1", "c2", "c3")
247 | 
248 | 
249 | m1
250 | 
251 | 
252 | m1[1, 2]
253 | 
254 | 
255 | m1[1:2, 2:3]
256 | 
257 | 
258 | m1[1,]
259 | m1[,2]
260 | m1[1:2,]
261 | m1[, 2:3]
262 | 
263 | 
264 | m1[-1,]
265 | m1[,-2]
266 | 
267 | 
268 | m1[c("r1", "r3"), c("c1", "c3")]
269 | 
270 | 
271 | m1[1]
272 | m1[9]
273 | m1[3:7]
274 | 
275 | 
276 | m1 > 3
277 | 
278 | 
279 | m1[m1 > 3]
280 | 
281 | 
282 | m1 + m1
283 | m1 - 2*m1
284 | m1 * m1
285 | m1 / m1
286 | m1 ^ 2
287 | m1 %*% m1
288 | 
289 | 
290 | t(m1)
291 | 
292 | 
293 | a1 <- array(c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), dim = c(1, 5, 2))
294 | a1
295 | 
296 | 
297 | a1 <- array(c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), dim = c(1, 5, 2),
298 |   dimnames = list(c("r1"), c("c1", "c2", "c3", "c4", "c5"), c("k1", "k2")))
299 | a1
300 | 
301 | 
302 | a0 <- array(c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), dim = c(1, 5, 2))
303 | dimnames(a0) <- list(c("r1"), c("c1", "c2", "c3", "c4", "c5"), c("k1", "k2"))
304 | a0
305 | 
306 | 
307 | a1[1,,]
308 | a1[, 2,]
309 | a1[,,1]
310 | a1[1, 1, 1]
311 | a1[1, 2:4, 1:2]
312 | a1[c("r1"), c("c1", "c3"), "k1"]
313 | 
314 | 
315 | l0 <- list(1, c(TRUE, FALSE), c("a", "b", "c"))
316 | l0
317 | 
318 | 
319 | l1 <- list(x = 1, y = c(TRUE, FALSE), z = c("a", "b", "c"))
320 | l1
321 | 
322 | 
323 | l1$x
324 | l1$y
325 | l1$z
326 | l1$m
327 | 
328 | 
329 | l1[[2]]
330 | 
331 | 
332 | l1[["y"]]
333 | 
334 | 
335 | member <- "z" # you can dynamically determine which member to extract
336 | l1[[member]]
337 | 
338 | 
339 | l1["x"]
340 | l1[c("x", "y")]
341 | l1[1]
342 | l1[c(1, 2)]
343 | l1[c(TRUE, FALSE, TRUE)]
344 | 
345 | 
346 | names(l1) <- c("A","B","C")
347 | l1
348 | 
349 | 
350 | names(l1) <- NULL
351 | l1
352 | 
353 | 
354 | l1 <- list(x = 1, y = c(TRUE, FALSE), z = c("a", "b", "c"))
355 | l1$x <- 0
356 | 
357 | 
358 | l1$m <- 4
359 | l1
360 | 
361 | 
362 | l1[c("y", "z")] <- list(y = "new value for y", z = c(1, 2))
363 | l1
364 | 
365 | 
366 | l1$x <- NULL
367 | l1
368 | 
369 | 
370 | l1[c("z", "m")] <- NULL
371 | l1
372 | 
373 | 
374 | l2 <- list(a = c(1, 2, 3), b = c("x", "y", "z", "w"))
375 | is.list(l2)
376 | is.list(l2$a)
377 | 
378 | 
379 | l3 <- as.list(c(a = 1, b = 2, c = 3))
380 | l3
381 | 
382 | 
383 | l4 <- list(a = 1, b = 2, c = 3)
384 | unlist(l4)
385 | 
386 | 
387 | l4 <- list(a = 1, b = 2, c = "hello")
388 | unlist(l4)
389 | 
390 | 
391 | persons <- data.frame(Name = c("Ken", "Ashley", "Jennifer"),
392 |   Gender = c("Male", "Female", "Female"),
393 |   Age = c(24, 25, 23),
394 |   Major = c("Finance", "Statistics", "Computer Science"))
395 | persons
396 | 
397 | 
398 | l1 <- list(x = c(1, 2, 3), y = c("a", "b", "c"))
399 | data.frame(l1)
400 | as.data.frame(l1)
401 | 
402 | 
403 | m1 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), nrow = 3, byrow = FALSE)
404 | data.frame(m1)
405 | as.data.frame(m1)
406 | 
407 | 
408 | df1 <- data.frame(id = 1:5, x = c(0, 2, 1, -1, -3), y = c(0.5, 0.2, 0.1, 0.5, 0.9))
409 | df1
410 | 
411 | 
412 | colnames(df1) <- c("id", "level", "score")
413 | rownames(df1) <- letters[1:5]
414 | df1
415 | 
416 | 
417 | df1$id
418 | df1[[1]]
419 | 
420 | 
421 | df1[1]
422 | df1[1:2]
423 | df1["level"]
424 | df1[c("id", "score")]
425 | df1[c(TRUE, FALSE, TRUE)]
426 | 
427 | 
428 | df1[, "level"]
429 | df1[, c("id", "level")]
430 | df1[, 1:2]
431 | 
432 | 
433 | df1[1:4,]
434 | df1[c("c", "e"),]
435 | 
436 | 
437 | df1[1:4, "id"]
438 | df1[1:3, c("id", "score")]
439 | 
440 | 
441 | df1[1:4,]["id"]
442 | 
443 | 
444 | df1[1:4, "id", drop = FALSE]
445 | 
446 | 
447 | df1$score >= 0.5
448 | df1[df1$score >= 0.5, c("id", "level")]
449 | 
450 | 
451 | rownames(df1) %in% c("a", "d", "e")
452 | df1[rownames(df1) %in% c("a", "d", "e"), c("id", "score")]
453 | 
454 | 
455 | df1$score <- c(0.6, 0.3, 0.2, 0.4, 0.8)
456 | df1
457 | 
458 | 
459 | df1["score"] <- c(0.8, 0.5, 0.2, 0.4, 0.8)
460 | df1
461 | df1[["score"]] <- c(0.4, 0.5, 0.2, 0.8, 0.4)
462 | df1
463 | df1[c("level", "score")] <- list(level = c(1, 2, 1, 0, 0), score = c(0.1, 0.2, 0.3, 0.4, 0.5))
464 | df1
465 | 
466 | 
467 | df1[1:3, "level"] <- c(-1, 0, 1)
468 | df1
469 | df1[1:2, c("level", "score")] <- list(level = c(0, 0), score = c(0.9, 1.0))
470 | df1
471 | 
472 | 
473 | str(persons)
474 | 
475 | 
476 | persons[1, "Name"] <- "John"
477 | persons
478 | 
479 | 
480 | persons <- data.frame(Name = c("Ken", "Ashley", "Jennifer"),
481 |   Gender = factor(c("Male", "Female", "Female")),
482 |   Age = c(24, 25, 23),
483 |   Major = c("Finance", "Statistics", "Computer Science"),
484 |   stringsAsFactors = FALSE)
485 | str(persons)
486 | 
487 | 
488 | summary(persons)
489 | 
490 | 
491 | rbind(persons, data.frame(Name = "John", Gender = "Male", Age = 25, Major = "Statistics"))
492 | 
493 | 
494 | cbind(persons, Registered = c(TRUE, TRUE, FALSE), Projects = c(3, 2, 3))
495 | 
496 | 
497 | expand.grid(type = c("A", "B"), class = c("M", "L", "XL"))
498 | 
499 | 
500 | read.csv("data/persons.csv")
501 | 
502 | 
503 | write.csv(persons, "data/persons.csv", row.names = FALSE, quote = FALSE)
504 | 
505 | 
506 | add <- function(x, y) {
507 |   x + y
508 | }
509 | 
510 | 
511 | add(2, 3)
512 | 
513 | 
514 | add(c(2, 3), 4)
515 | 
516 | 
517 | add(as.Date("2014-06-01"), 1)
518 | 
519 | 
520 | add(list(a = 1), list(a = 2))
521 | 
522 | 
523 | calc <- function(x, y, type) {
524 |   if (type == "add") {
525 |     x + y
526 |   } else if (type == "minus") {
527 |     x - y
528 |   } else if (type == "multiply") {
529 |     x * y
530 |   } else if (type == "divide") {
531 |     x / y
532 |   } else {
533 |     stop("Unknown type of operation")
534 |   }
535 | }
536 | 
537 | 
538 | calc(2, 3, "minus")
539 | 
540 | 
541 | calc(c(2, 5), c(3, 6), "divide")
542 | 
543 | 
544 | calc(as.Date("2014-06-01"), 3, "add")
545 | 
546 | 
547 | calc(1, 2, "what")
548 | 
549 | 
550 | calc(1, 2, c("add", "minue"))
551 | 
552 | 
553 | calc <- function(x, y, type) {
554 |   if (length(type) > 1L) stop("Only a single type is accepted")
555 |   if (type == "add") {
556 |     x + y
557 |   } else if (type == "minus") {
558 |     x - y
559 |   } else if (type == "multiply") {
560 |     x*y
561 |   } else if (type == "divide") {
562 |     x / y
563 |   } else {
564 |     stop("Unknown type of operation")
565 |   }
566 | }
567 | 
568 | 
569 | calc(1, 2, c("add", "minue"))
570 | 
571 | 
572 | increase <- function(x, y = 1) {
573 |   x + y
574 | }
575 | 
576 | 
577 | increase(1)
578 | increase(c(1, 2, 3))
579 | 
580 | 
581 | 


--------------------------------------------------------------------------------
/Chapter 3/managing-your-workspace.R:
--------------------------------------------------------------------------------
  1 | 
  2 | getwd()
  3 | 
  4 | 
  5 | "Hello\nWorld"
  6 | 
  7 | 
  8 | cat("Hello\nWorld")
  9 | 
 10 | 
 11 | cat("The string with '\\' is translated")
 12 | 
 13 | 
 14 | filename <- "d:\data\test.csv"
 15 | ## Error: '\d' is an unrecognized escape in character string starting ""d:\d"
 16 | 
 17 | 
 18 | filename <- "d:\\data\\test.csv"
 19 | 
 20 | 
 21 | absolute_filename <- "d:/data/test.csv"
 22 | relative_filename <- "data/test.csv"
 23 | 
 24 | 
 25 | objects()
 26 | 
 27 | 
 28 | e <- new.env()
 29 | objects(pos = e)
 30 | 
 31 | 
 32 | x <- c(1, 2, 3)
 33 | y <- c("a", "b", "c")
 34 | z <- list(m = 1:5, n = c("x", "y", "z"))
 35 | 
 36 | 
 37 | objects()
 38 | 
 39 | 
 40 | with(e, {
 41 |   x <- c(1, 2, 3)
 42 |   y <- c("a", "b", "c")
 43 |   z <- list(m = 1:5, n = c("x", "y", "z"))
 44 |   objects()
 45 | })
 46 | 
 47 | 
 48 | ls()
 49 | 
 50 | 
 51 | with(e, ls())
 52 | 
 53 | 
 54 | x
 55 | str(x)
 56 | 
 57 | 
 58 | str(1:30)
 59 | 
 60 | 
 61 | z
 62 | 
 63 | 
 64 | str(z)
 65 | 
 66 | 
 67 | nested_list <- list(m = 1:15, n = list("a", c(1, 2, 3)), 
 68 |   p = list(x = 1:10, y = c("a", "b")), 
 69 |   q = list(x = 0:9, y = c("c", "d")))
 70 | 
 71 | 
 72 | with(e, nested_list <- nested_list)
 73 | 
 74 | 
 75 | nested_list
 76 | 
 77 | 
 78 | str(nested_list)
 79 | 
 80 | 
 81 | ls.str()
 82 | 
 83 | 
 84 | ls.str(pos = e)
 85 | 
 86 | 
 87 | ls.str(mode = "list")
 88 | 
 89 | 
 90 | ls.str(e, mode = "list")
 91 | 
 92 | 
 93 | ls.str(pattern = "^\\w$")
 94 | 
 95 | 
 96 | ls.str(e, pattern = "^\\w$")
 97 | 
 98 | 
 99 | ls.str(pattern = "^\\w$", mode = "list")
100 | 
101 | 
102 | ls.str(e, pattern = "^\\w$", mode = "list")
103 | 
104 | 
105 | ls()
106 | 
107 | 
108 | with(e, ls())
109 | 
110 | 
111 | rm(x)
112 | ls()
113 | 
114 | 
115 | with(e, {
116 |   rm(x)
117 |   ls()
118 | })
119 | 
120 | 
121 | rm(y, z)
122 | ls()
123 | 
124 | 
125 | with(e, {
126 |   rm(y, z)
127 |   ls()
128 | })
129 | 
130 | 
131 | rm(x)
132 | 
133 | 
134 | with(e, rm(x))
135 | 
136 | 
137 | p <- 1:10
138 | q <- seq(1, 20, 5)
139 | v <- c("p", "q")
140 | rm(list = v)
141 | 
142 | 
143 | with(e, {
144 |   p <- 1:10
145 |   q <- seq(1, 20, 5)
146 |   v <- c("p", "q")
147 |   rm(list = v)
148 |   ls()
149 | })
150 | 
151 | 
152 | rm(list = ls())
153 | ls()
154 | 
155 | 
156 | with(e, {
157 |   rm(list = ls())
158 |   ls()
159 | })
160 | 
161 | 
162 | 
163 | 
164 | 123.12345678
165 | 
166 | 
167 | 0.10000002
168 | 0.10000002 - 0.1
169 | 
170 | 
171 | 1234567.12345678
172 | 
173 | 
174 | getOption("digits")
175 | 1e10 + 0.5
176 | options(digits = 15)
177 | 1e10 + 0.5
178 | 
179 | 
180 | options(digits = 7)
181 | 1e10 + 0.5
182 | 
183 | 
184 | getOption("warn")
185 | 
186 | 
187 | as.numeric("hello")
188 | 
189 | 
190 | options(warn = -1)
191 | as.numeric("hello")
192 | 
193 | 
194 | f <- function(x, y) {
195 |   as.numeric(x) + as.numeric(y)
196 | }
197 | 
198 | 
199 | options(warn = 0)
200 | f("hello", "world")
201 | 
202 | 
203 | options(warn = 1)
204 | f("hello", "world")
205 | 
206 | 
207 | options(warn = 2)
208 | f("hello", "world")
209 | 
210 | 
211 | options(warn = 0)
212 | 
213 | 
214 | 
215 | 
216 | install.packages("ggplot2")
217 | 
218 | 
219 | 
220 | 
221 | install.packages(c("ggplot2", "shiny", "knitr", "dplyr", "data.table"))
222 | 
223 | 
224 | update.packages()
225 | 
226 | 
227 | install.packages("devtools")
228 | 
229 | 
230 | devtools::install_github("hadley/ggplot2")
231 | 
232 | 
233 | install.packages("ggplot2")
234 | 
235 | 
236 | library(moments)
237 | skewness(x)
238 | 
239 | 
240 | moments::skewness(x)
241 | 
242 | 
243 | sessionInfo()
244 | 
245 | 
246 | moments::skewness(c(1, 2, 3, 2, 1))
247 | sessionInfo()
248 | 
249 | 
250 | library(moments)
251 | sessionInfo()
252 | skewness(c(1, 2, 3, 2, 1))
253 | 
254 | 
255 | search()
256 | 
257 | 
258 | unloadNamespace("moments")
259 | 
260 | 
261 | loaded <- require(moments)
262 | loaded
263 | 
264 | 
265 | if (!require(moments)) {
266 |   install.packages("moments")
267 |   library(moments)
268 | }
269 | 
270 | 
271 | require(moments)
272 | 
273 | 
274 | require(testPkg)
275 | 
276 | 
277 | library(testPkg)
278 | 
279 | 
280 | library(dplyr)
281 | 
282 | 
283 | fun1 <- package1::some_function
284 | fun2 <- pacakge2::some_function
285 | 
286 | 
287 | unloadNamespace("moments")
288 | 
289 | 
290 | skewness(c(1, 2, 3, 2, 1))
291 | 
292 | 
293 | moments::skewness(c(1, 2, 3, 2, 1))
294 | 
295 | 
296 | pkgs <- installed.packages()
297 | colnames(pkgs)
298 | 
299 | 
300 | c("moments", "testPkg") %in% installed.packages()[, "Package"]
301 | 
302 | 
303 | installed.packages()["moments", "Version"]
304 | 
305 | 
306 | packageVersion("moments")
307 | 
308 | 
309 | packageVersion("moments") >= package_version("0.14")
310 | 
311 | 
312 | packageVersion("moments") >= "0.14"
313 | 
314 | 
315 | 


--------------------------------------------------------------------------------
/Chapter 4/basic-expressions.R:
--------------------------------------------------------------------------------
  1 | x <- 1
  2 | y <- c(1, 2, 3)
  3 | z <- list(x, y)
  4 | 
  5 | 
  6 | 2 -> x1
  7 | 
  8 | 
  9 | x3 <- x2 <- x1 <- 0
 10 | 
 11 | 
 12 | x3 <- x2 <- x1 <- rnorm(1)
 13 | c(x1, x2, x3)
 14 | 
 15 | 
 16 | x2 = c(1, 2, 3)
 17 | 
 18 | 
 19 | f <- function(input, data = NULL) {
 20 |   cat("input:\n")
 21 |   print(input)
 22 |   cat("data:\n")
 23 |   print(data)
 24 | }
 25 | 
 26 | 
 27 | x <- c(1, 2, 3)
 28 | y <- c("some", "text")
 29 | f(input = x)
 30 | 
 31 | 
 32 | x = c(1, 2, 3)
 33 | y = c("some", "text")
 34 | f(input = x)
 35 | 
 36 | 
 37 | x <- c(1, 2, 3)
 38 | y <- c("some", "text")
 39 | f(input <- x)
 40 | 
 41 | 
 42 | input
 43 | 
 44 | 
 45 | f(input = x, data = y)
 46 | 
 47 | 
 48 | f(input <- x, data <- y)
 49 | 
 50 | 
 51 | f(data = y, input = x)
 52 | 
 53 | 
 54 | f(data <- y, input <- x)
 55 | 
 56 | 
 57 | data <- y
 58 | input <- x
 59 | f(y, x)
 60 | 
 61 | 
 62 | students <- data.frame()
 63 | us_population <- data.frame()
 64 | sales.2015 <- data.frame()
 65 | 
 66 | 
 67 | some data <- data.frame()
 68 | ## Error: unexpected symbol in "some data"
 69 | 
 70 | _data <- data.frame()
 71 | ## Error: unexpected input in "_"
 72 | 
 73 | Population(Millions) <- data.frame()
 74 | ## Error in Population(Millions) <- data.frame() : 
 75 | ##  object 'Millions' not found
 76 | 
 77 | 
 78 | `some data` <- c(1, 2, 3)
 79 | `_data` <- c(4, 5, 6)
 80 | `Population(Millions)` <- c(city1 = 50, city2 = 60)
 81 | 
 82 | 
 83 | `some data`
 84 | `_data`
 85 | `Population(Millions)`
 86 | 
 87 | 
 88 | `Tom's secret function` <- function(a, d) {
 89 |   (a ^ 2 - d ^ 2) / (a ^ 2 + d ^ 2) 
 90 | }
 91 | 
 92 | 
 93 | l1 <- list(`Group(A)` = rnorm(10), `Group(B)` = rnorm(10))
 94 | 
 95 | 
 96 | `Tom's secret function`(1,2)
 97 | l1$`Group(A)`
 98 | 
 99 | 
100 | results <- data.frame(`Group(A)` = rnorm(10), `Group(B)` = rnorm(10))
101 | results
102 | 
103 | 
104 | colnames(results)
105 | 
106 | 
107 | make.names(c("Population(before)", "Population(after)"))
108 | 
109 | 
110 | results <- data.frame(
111 |   ID = c(0, 1, 2, 3, 4),
112 |   Category = c("A", "A", "A", "B", "C"),
113 |   `Population(before)` = c(10, 12, 13, 11, 13), 
114 |   `Population(after)` = c(12, 13, 16, 12, 12),
115 |   stringsAsFactors = FALSE,
116 |   check.names = FALSE)
117 | results
118 | colnames(results)
119 | 
120 | 
121 | results$`Population(before)`
122 | 
123 | 
124 | check_positive <- function(x) {
125 |   if (x > 0) {
126 |     return(1)
127 |   }
128 | }
129 | 
130 | 
131 | check_positive(1)
132 | check_positive(0)
133 | 
134 | 
135 | check_sign <- function(x) {
136 |   if (x > 0) {
137 |     return(1)
138 |   } else if (x < 0) {
139 |     return(-1)
140 |   } else {
141 |     return(0)
142 |   }
143 | }
144 | 
145 | 
146 | check_sign(15)
147 | check_sign(-3.5)
148 | check_sign(0)
149 | 
150 | 
151 | say_sign <- function(x) {
152 |   if (x > 0) {
153 |     cat("The number is greater than 0")
154 |   } else if (x < 0) {
155 |     cat("The number is less than 0")
156 |   } else {
157 |     cat("The number is 0")
158 |   }
159 | }
160 | 
161 | 
162 | say_sign(0)
163 | say_sign(3)
164 | say_sign(-9)
165 | 
166 | 
167 | if (cond1) {
168 |   # do something
169 | }
170 | 
171 | 
172 | if (cond1) {
173 |   # do something
174 | } else {
175 |   # do something else
176 | }
177 | 
178 | 
179 | if (cond1) {
180 |   expr1
181 | } else if (cond2) {
182 |   expr2
183 | } else if (cond3) {
184 |   expr3
185 | } else {
186 |   expr4
187 | }
188 | 
189 | 
190 | grade <- function(score) {
191 |   if (score >= 90) {
192 |     return("A")
193 |   } else if (score >= 80) {
194 |     return("B")
195 |   } else if (score >= 70) {
196 |     return("C")
197 |   } else if (score >= 60) {
198 |     return("D")
199 |   } else {
200 |     return("F")
201 |   }
202 | }
203 | 
204 | c(grade(65), grade(59), grade(87), grade(96))
205 | 
206 | 
207 | grade2 <- function(score) {
208 |   if (score >= 60) {
209 |     return("D")
210 |   } else if (score >= 70) {
211 |     return("C")
212 |   } else if (score >= 80) {
213 |     return("B")
214 |   } else if (score >= 90) {
215 |     return("A")
216 |   } else {
217 |     return("F")
218 |   }
219 | }
220 | 
221 | c(grade2(65), grade2(59), grade2(87), grade2(96))
222 | 
223 | 
224 | grade2 <- function(score) {
225 |   if (score >= 60 && score < 70) {
226 |     return("D")
227 |   } else if (score >= 70 && score < 80) {
228 |     return("C")
229 |   } else if (score >= 80 && score < 90) {
230 |     return("B")
231 |   } else if (score >= 90) {
232 |     return("A")
233 |   } else {
234 |     return("F")
235 |   }
236 | }
237 | 
238 | c(grade2(65), grade2(59), grade2(87), grade2(96))
239 | 
240 | 
241 | check_positive <- function(x) {
242 |   return(if (x > 0) {
243 |     1
244 |   })
245 | }
246 | 
247 | 
248 | check_positive <- function(x) {
249 |   return(if (x > 0) 1)
250 | }
251 | 
252 | 
253 | check_positive <- function(x) {
254 |   if (x > 0) 1
255 | }
256 | 
257 | 
258 | check_sign <- function(x) {
259 |   if (x > 0) 1 else if (x < 0) -1 else 0
260 | }
261 | 
262 | 
263 | say_grade <- function(name, score) {
264 |   grade <- if (score >= 90) "A" 
265 |     else if (score >= 80) "B"
266 |     else if (score >= 70) "C"
267 |     else if (score >= 60) "D"
268 |     else "F"
269 |   cat("The grade of", name, "is", grade)
270 | }
271 | 
272 | say_grade("Betty", 86)
273 | 
274 | 
275 | say_grade <- function(name, score) {
276 |   if (score >= 90) grade <- "A"
277 |   cat("Congratulations!\n")
278 |   else if (score >= 80) grade <- "B"
279 |   else if (score >= 70) grade <- "C"
280 |   else if (score >= 60) grade <- "D"
281 |   else grade <- "F"
282 |   cat("What a pity!\n")
283 |   cat("The grade of", name, "is", grade)
284 | }
285 | 
286 | # code with errors
287 | say_grade <- function(name, score) {
288 |   if (score >= 90) grade <- "A"
289 |   cat("Congratulations!\n")
290 |   else if (score >= 80) grade <- "B"
291 |   else if (score >= 70) grade <- "C"
292 |   else if (score >= 60) grade <- "D"
293 |   else grade <- "F"
294 |   cat("What a pity!\n")
295 |   cat("The grade of", name, "is", grade)
296 | }
297 | 
298 | 
299 | say_grade <- function(name, score) {
300 |   if (score >= 90) {
301 |     grade <- "A"
302 |     cat("Congratulations!\n")
303 |   } else if (score >= 80) {
304 |     grade <- "B"
305 |   }
306 |   else if (score >= 70) {
307 |     grade <- "C"
308 |   }
309 |   else if (score >= 60) {
310 |     grade <- "D"
311 |   } else {
312 |     grade <- "F"
313 |     cat("What a pity!\n")
314 |   }
315 |   
316 |   cat("The grade of", name, "is", grade)
317 | }
318 | 
319 | say_grade("James", 93)
320 | 
321 | 
322 | check_positive(c(1, -1, 0))
323 | 
324 | 
325 | num <- c(1, 2, 3)
326 | if (num > 2) {
327 |   cat("num > 2!")
328 | }
329 | 
330 | 
331 | any(c(TRUE, FALSE, FALSE))
332 | any(c(FALSE, FALSE))
333 | 
334 | 
335 | if (any(num > 2)) {
336 |   cat("num > 2!")
337 | }
338 | 
339 | 
340 | if (all(num > 2)) {
341 |   cat("num > 2!")
342 | } else {
343 |   cat("Not all values are greater than 2!")
344 | }
345 | 
346 | 
347 | check <- function(x) {
348 |   if (all(x > 0)) {
349 |     cat("All input values are positive!")
350 |   } else {
351 |     cat("Some values are not positive!")
352 |   }
353 | }
354 | 
355 | 
356 | check(c(1, 2, 3))
357 | check(c(1, 2, NA, -1))
358 | check(c(1, 2, NA))
359 | 
360 | 
361 | ifelse(c(TRUE, FALSE, FALSE), c(1, 2, 3), c(4, 5, 6))
362 | 
363 | 
364 | check_positive2 <- function(x) {
365 |   ifelse(x, 1, 0)
366 | }
367 | 
368 | 
369 | ifelse(TRUE, c(1,2), c(2,3))
370 | 
371 | 
372 | if (TRUE) c(1,2) else c(2,3)
373 | 
374 | 
375 | ifelse(c(TRUE, FALSE), c(1, 2), c("a", "b"))
376 | 
377 | 
378 | switch(1, "x", "y")
379 | switch(2, "x", "y")
380 | 
381 | 
382 | switch(3, "x", "y")
383 | 
384 | 
385 | switch("a", a = 1, b = 2)
386 | switch("b", a = 1, b = 2)
387 | 
388 | 
389 | switch("c", a = 1, b = 2)
390 | 
391 | 
392 | switch("c", a = 1, b = 2, 3)
393 | 
394 | 
395 | switch_test <- function(x) {
396 |   switch(x, 
397 |     a = c(1, 2, 3), 
398 |     b = list(x = 0, y = 1), 
399 |     c = {
400 |       cat("You choose c!\n")
401 |       list(name = "c", value = "something")
402 |     })
403 | }
404 | 
405 | switch_test("a")
406 | switch_test("b")
407 | switch_test("c")
408 | 
409 | 
410 | for (var in vector) {
411 |   expr
412 | }
413 | 
414 | 
415 | var <- vector[[1]]
416 | expr
417 | var <- vector[[2]]
418 | expr
419 | var <- vector[[n]]
420 | expr
421 | 
422 | 
423 | for (i in 1:3) {
424 |   cat("The value of i is", i, "\n")
425 | }
426 | 
427 | 
428 | for (word in c("hello","new", "world")) {
429 |   cat("The current word is", word, "\n")
430 | }
431 | 
432 | 
433 | loop_list <- list(
434 |   a = c(1, 2, 3), 
435 |   b = c("a", "b", "c", "d"))
436 | 
437 | for (item in loop_list) {
438 |   cat("item:\n  length:", length(item), 
439 |     "\n  class: ", class(item), "\n")
440 | }
441 | 
442 | 
443 | df <- data.frame(
444 |   x = c(1, 2, 3), 
445 |   y = c("A", "B", "C"), 
446 |   stringsAsFactors = FALSE)
447 | 
448 | for (col in df) {
449 |   str(col)
450 | }
451 | 
452 | 
453 | for (i in 1:nrow(df)) {
454 |   row <- df[i,]
455 |   cat("row", i, "\n")
456 |   str(row)
457 |   cat("\n")
458 | }
459 | 
460 | 
461 | s <- 0
462 | for (i in 1:100) {
463 |   s <- s + i
464 | }
465 | s
466 | 
467 | 
468 | set.seed(123)
469 | x <- numeric(1000)
470 | for (t in 1:(length(x) - 1)) {
471 |   x[[t + 1]] <- x[[t]] + rnorm(1, 0, 0.1)
472 | }
473 | plot(x, type = "s", main = "Random walk", xlab = "t")
474 | 
475 | 
476 | sum100 <- sum(1:100)
477 | random_walk <- cumsum(rnorm(1000, 0, 0.1))
478 | 
479 | 
480 | for (i in 1:5) {
481 |   if (i == 3) break
482 |   cat("message ", i, "\n")
483 | }
484 | 
485 | 
486 | m <- integer()
487 | for (i in 1000:1100) {
488 |   if ((i ^ 2) %% 11 == (i ^ 3) %% 17) {
489 |     m <- c(m, i)
490 |   }
491 | }
492 | m
493 | 
494 | 
495 | for (i in 1000:1100) {
496 |   if ((i ^ 2) %% 11 == (i ^ 3) %% 17) break
497 | }
498 | i
499 | 
500 | 
501 | for (i in 1:5) {
502 |   if (i == 3) next
503 |   cat("message ", i, "\n")
504 | }
505 | 
506 | 
507 | x <- c("a", "b", "c")
508 | combx <- character()
509 | for (c1 in x) {
510 |   for (c2 in x) {
511 |     combx <- c(combx, paste(c1, c2, sep = ",", collapse = ""))
512 |   }
513 | }
514 | combx
515 | 
516 | 
517 | combx2 <- character()
518 | for (c1 in x) {
519 |   for (c2 in x) {
520 |     if (c1 == c2) next
521 |     combx2 <- c(combx2, paste(c1, c2, sep = ",", collapse = ""))
522 |   }
523 | }
524 | combx2
525 | 
526 | 
527 | if (c1 != c2) {
528 |   combx2 <- c(combx2, paste(c1, c2, sep = ",", collapse = ""))
529 | }
530 | 
531 | 
532 | combn(c("a", "b", "c"), 2)
533 | 
534 | 
535 | expand.grid(n = c(1, 2, 3), x = c("a", "b"))
536 | 
537 | 
538 | x <- 0
539 | while (x <= 5) {
540 |   cat(x, " ", sep = "")
541 |   x <- x + 1
542 | }
543 | 
544 | 
545 | x <- 0
546 | while (TRUE) {
547 |   x <- x + 1
548 |   if (x == 4) break
549 |   else if (x == 2) next
550 |   else cat(x, '\n')
551 | }
552 | 
553 | 
554 | res <- dbSendQuery(con, "SELECT * FROM table1 WHERE type = 1")
555 | while (!dbHasCompleted(res)) {
556 |   chunk <- dbFetch(res, 10000)
557 |   process(chunk)
558 | }
559 | 
560 | 
561 | x <- 0
562 | repeat {
563 |   x <- x + 1
564 |   if (x == 4) break
565 |   else if (x == 2) next
566 |   else cat(x, '\n')
567 | }
568 | 
569 | 


--------------------------------------------------------------------------------
/Chapter 5/working-with-basic-objects.R:
--------------------------------------------------------------------------------
  1 | take_it <- function(x) {
  2 |   if (is.atomic(x)) {
  3 |     x[[1]]
  4 |   } else if (is.list(x)) {
  5 |     x$data[[x$index]]
  6 |   } else {
  7 |     stop("Not supported input type")
  8 |   }
  9 | }
 10 | 
 11 | 
 12 | take_it(c(1, 2, 3))
 13 | take_it(list(data = c("a", "b", "c"), index = 3))
 14 | 
 15 | 
 16 | take_it(mean)
 17 | 
 18 | 
 19 | take_it(list(input = c("a", "b", "c")))
 20 | 
 21 | 
 22 | NULL[[1]]
 23 | NULL[[NULL]]
 24 | 
 25 | 
 26 | take_it(list(data = c("a", "b", "c")))
 27 | 
 28 | 
 29 | c("a", "b", "c")[[NULL]]
 30 | 
 31 | 
 32 | take_it(list(index = 2))
 33 | 
 34 | 
 35 | take_it2 <- function(x) {
 36 |   if (is.atomic(x)) {
 37 |     x[[1]]
 38 |   } else if (is.list(x)) {
 39 |     if (!is.null(x$data) && is.atomic(x$data)) {
 40 |       if (is.numeric(x$index) && length(x) == 1) {
 41 |         x$data[[x$index]]    
 42 |       } else {
 43 |         stop("Invalid index")  
 44 |       }
 45 |     } else {
 46 |       stop("Invalid data")
 47 |     }
 48 |   } else {
 49 |     stop("Not supported input type")
 50 |   }
 51 | }
 52 | 
 53 | 
 54 | take_it2(list(data = c("a", "b", "c")))
 55 | take_it2(list(index = 2))
 56 | 
 57 | 
 58 | x <- c(1, 2, 3)
 59 | class(x)
 60 | typeof(x)
 61 | str(x)
 62 | 
 63 | 
 64 | x <- 1:3
 65 | class(x)
 66 | typeof(x)
 67 | str(x)
 68 | 
 69 | 
 70 | x <- c("a", "b", "c")
 71 | class(x)
 72 | typeof(x)
 73 | str(x)
 74 | 
 75 | 
 76 | x <- list(a = c(1, 2), b = c(TRUE, FALSE))
 77 | class(x)
 78 | typeof(x)
 79 | str(x)
 80 | 
 81 | 
 82 | x <- data.frame(a = c(1, 2), b = c(TRUE, FALSE))
 83 | class(x)
 84 | typeof(x)
 85 | str(x)
 86 | 
 87 | 
 88 | vec <- c(1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6)
 89 | class(vec)
 90 | typeof(vec)
 91 | 
 92 | 
 93 | sample_matrix <- matrix(vec, ncol = 4)
 94 | sample_matrix
 95 | class(sample_matrix)
 96 | typeof(sample_matrix)
 97 | dim(sample_matrix)
 98 | nrow(sample_matrix)
 99 | ncol(sample_matrix)
100 | 
101 | 
102 | sample_array <- array(vec, dim = c(2, 3, 2))
103 | sample_array
104 | class(sample_array)
105 | typeof(sample_array)
106 | dim(sample_array)
107 | nrow(sample_array)
108 | ncol(sample_array)
109 | 
110 | 
111 | sample_data_frame <- data.frame(a = c(1, 2, 3), b = c(2, 3, 4))
112 | class(sample_data_frame)
113 | typeof(sample_data_frame)
114 | dim(sample_data_frame)
115 | nrow(sample_data_frame)
116 | ncol(sample_data_frame)
117 | 
118 | 
119 | sample_data <- vec
120 | dim(sample_data) <- c(3, 4)
121 | sample_data
122 | class(sample_data)
123 | typeof(sample_data)
124 | 
125 | 
126 | dim(sample_data) <- c(4, 3)
127 | sample_data
128 | 
129 | 
130 | dim(sample_data) <- c(3, 2, 2)
131 | sample_data
132 | class(sample_data)
133 | 
134 | 
135 | dim(sample_data) <- c(2, 3, 4)
136 | 
137 | 
138 | sample_data_frame
139 | 
140 | 
141 | for (i in 1:nrow(sample_data_frame)) {
142 |   # sample text:
143 |   # row #1, a: 1, b: 2
144 |   cat("row #", i, ", ",
145 |     "a: ", sample_data_frame[i, "a"], 
146 |     ", b: ", sample_data_frame[i, "b"],
147 |     "\n", sep = "")
148 | }
149 | 
150 | 
151 | test_direction <- function(x, y, z) {
152 |   if (x < y & y < z) 1
153 |   else if (x > y & y > z) -1
154 |   else 0
155 | }
156 | 
157 | 
158 | test_direction(1, 2, 3)
159 | 
160 | 
161 | test_direction(c(1, 2), c(2, 3), c(3, 4))
162 | 
163 | 
164 | test_direction2 <- function(x, y, z) {
165 |   if (x < y && y < z) 1
166 |   else if (x > y && y > z) -1
167 |   else 0
168 | }
169 | 
170 | 
171 | test_direction2(1, 2, 3)
172 | 
173 | 
174 | test_direction2(c(1, 2), c(2, 3), c(3, 4))
175 | 
176 | 
177 | x <- c(-2, -3, 2, 3, 1, 0, 0, 1, 2)
178 | any(x > 1)
179 | all(x <= 1)
180 | 
181 | 
182 | test_all_direction <- function(x, y, z) {
183 |   if (all(x < y & y < z)) 1
184 |   else if (all(x > y & y > z)) -1
185 |   else 0
186 | }
187 | 
188 | 
189 | test_all_direction(1, 2, 3)
190 | 
191 | 
192 | test_all_direction(c(1, 2), c(2, 3), c(3, 4))
193 | 
194 | 
195 | test_all_direction(c(1, 2), c(2, 4), c(3, 4))
196 | 
197 | 
198 | test_any_direction <- function(x, y, z) {
199 |   if (any(x < y & y < z)) 1
200 |   else if (any(x > y & y > z)) -1
201 |   else 0
202 | }
203 | test_all_direction2 <- function(x, y, z) {
204 |   if (all(x < y) && all(y < z)) 1 
205 |   else if (all(x > y) && all(y > z)) -1
206 |   else 0
207 | }
208 | test_any_direction2 <- function(x, y, z) {
209 |   if (any(x < y) && any(y < z)) 1 
210 |   else if (any(x > y) && any(y > z)) -1
211 |   else 0
212 | }
213 | 
214 | 
215 | x
216 | abs(x) >= 1.5
217 | which(abs(x) >= 1.5)
218 | 
219 | 
220 | x[x >= 1.5]
221 | 
222 | 
223 | x[x >= 100]
224 | 
225 | 
226 | x <- c(-2, -3, NA, 2, 3, 1, NA, 0, 1, NA, 2)
227 | 
228 | 
229 | x + 2
230 | 
231 | 
232 | x > 2
233 | 
234 | 
235 | x
236 | any(x > 2)
237 | any(x < -2)
238 | any(x < -3)
239 | 
240 | 
241 | any(c(TRUE, FALSE, NA))
242 | any(c(FALSE, FALSE, NA))
243 | any(c(FALSE, FALSE))
244 | 
245 | 
246 | any(x < -3, na.rm = TRUE)
247 | 
248 | 
249 | x
250 | all(x > -3)
251 | all(x >= -3)
252 | all(x < 4)
253 | 
254 | 
255 | all(c(TRUE, FALSE, NA))
256 | all(c(TRUE, TRUE, NA))
257 | all(c(TRUE, TRUE))
258 | 
259 | 
260 | all(x >= -3, na.rm = TRUE)
261 | 
262 | 
263 | x
264 | x[x >= 0]
265 | 
266 | 
267 | which(x >= 0)
268 | 
269 | 
270 | x[which(x >= 0)]
271 | 
272 | 
273 | if (2) 3
274 | if (0) 0 else 1
275 | 
276 | 
277 | if ("a") 1 else 2
278 | 
279 | 
280 | sqrt(-1)
281 | 
282 | 
283 | 1 / 0
284 | 
285 | 
286 | log(0)
287 | 
288 | 
289 | is.finite(1 / 0)
290 | is.infinite(log(0))
291 | 
292 | 
293 | 1 / 0 < 0
294 | 1 / 0 > 0
295 | log(0) < 0
296 | log(0) > 0
297 | 
298 | 
299 | is.pos.infinite <- function(x) {
300 |   is.infinite(x) & x > 0
301 | }
302 | is.neg.infinite <- function(x) {
303 |   is.infinite(x) & x < 0
304 | }
305 | is.pos.infinite(1/0)
306 | is.neg.infinite(log(0))
307 | 
308 | 
309 | log(-1)
310 | 
311 | 
312 | pi
313 | 
314 | 
315 | sin(pi)
316 | 
317 | 
318 | max(c(1, 2, 3))
319 | 
320 | 
321 | max(c(1, 2, 3),
322 |      c(2, 1, 2),
323 |      c(1, 3, 4))
324 | min(c(1, 2, 3),
325 |      c(2, 1, 2),
326 |      c(1, 3, 4))
327 | 
328 | 
329 | pmax(c(1, 2, 3),
330 |      c(2, 1, 2),
331 |      c(1, 3, 4))
332 | 
333 | 
334 | x <- list(c(1, 2, 3),
335 |           c(2, 1, 2),
336 |           c(1, 3, 4))
337 | c(max(x[[1]][[1]], x[[2]][[1]], x[[3]][[1]]),
338 |   max(x[[1]][[2]], x[[2]][[2]], x[[3]][[2]]),
339 |   max(x[[1]][[3]], x[[2]][[3]], x[[3]][[3]]))
340 | 
341 | 
342 | pmin(c(1, 2, 3),
343 |      c(2, 1, 2),
344 |      c(1, 3, 4))
345 | 
346 | 
347 | spread <- function(x) {
348 |   if (x < -5) -5
349 |   else if (x > 5) 5
350 |   else x
351 | }
352 | 
353 | 
354 | spread(1)
355 | spread(seq(-8, 8))
356 | 
357 | 
358 | spread2 <- function(x) {
359 |   pmin(5, pmax(-5, x))
360 | }
361 | spread2(seq(-8, 8))
362 | 
363 | 
364 | spread3 <- function(x) {
365 |   ifelse(x < -5, -5, ifelse(x > 5, 5, x))
366 | }
367 | spread3(seq(-8, 8))
368 | 
369 | 
370 | curve(spread2, -8, 8, xlab = "x", ylab = "spread", main = "spread")
371 | 
372 | 
373 | polyroot(c(-2, 1, 1))
374 | 
375 | 
376 | Re(polyroot(c(-2, 1, 1)))
377 | 
378 | 
379 | polyroot(c(1, 0, 1))
380 | 
381 | 
382 | r <- polyroot(c(-1, -2, -1, 1))
383 | r
384 | 
385 | 
386 | r ^ 3 - r ^ 2 - 2 * r - 1
387 | 
388 | 
389 | round(r ^ 3 - r ^ 2 - 2 * r - 1, 8)
390 | 
391 | 
392 | curve(x ^ 2 - exp(x), -2, 1, 
393 |   main = quote(x ^ 2 - e ^ x),
394 |   xlab = "x", ylab = "y")
395 | abline(h = 0, col = "red", lty = 2)
396 | 
397 | 
398 | uniroot(function(x) x ^ 2 - exp(x), c(-2, 1))
399 | 
400 | 
401 | curve(exp(x) - 3 * exp(-x ^ 2 + x) + 1, -2, 2, 
402 |   main = quote(e ^ x - 3 * e ^ (-x ^ 2 + x) + 1),
403 |   xlab = "x", ylab = "y")
404 | abline(h = 0, col = "red", lty = 2)
405 | 
406 | 
407 | f <- function(x) exp(x) - 3 * exp(-x ^ 2 + x) + 1
408 | uniroot(f, c(-2, 2))
409 | 
410 | 
411 | uniroot(f, c(-2, 0))$root
412 | uniroot(f, c(0, 2))$root
413 | 
414 | 
415 | curve(x ^ 2 - 2 * x + 4 * cos(x ^ 2) - 3, -5, 5, 
416 |   main = quote(x ^ 2 - 2 * x + 4 * cos(x ^ 2) - 3),
417 |   xlab = "x", ylab = "y")
418 | abline(h = 0, col = "red", lty = 2)
419 | 
420 | 
421 | uniroot(function(x) x ^ 2 - 2 * x + 4 * cos(x ^ 2) - 3, c(0, 1))$root
422 | 
423 | 
424 | D(quote(x ^ 2), "x")
425 | 
426 | 
427 | D(quote(sin(x) * cos(x * y)), "x")
428 | 
429 | 
430 | z <- D(quote(sin(x) * cos(x * y)), "x")
431 | z
432 | eval(z, list(x = 1, y = 2))
433 | 
434 | 
435 | result <- integrate(function(x) sin(x), 0, pi / 2)
436 | result
437 | 
438 | 
439 | str(result)
440 | 
441 | 
442 | sample(1:6, size = 5)
443 | 
444 | 
445 | sample(1:6, size = 5, replace = TRUE)
446 | 
447 | 
448 | sample(letters, size = 3)
449 | 
450 | 
451 | sample(list(a = 1, b = c(2, 3), c = c(3, 4, 5)), size = 2)
452 | 
453 | 
454 | grades <- sample(c("A", "B", "C"), size = 20, replace = TRUE, 
455 |   prob = c(0.25, 0.5, 0.25))
456 | grades
457 | 
458 | 
459 | table(grades)
460 | 
461 | 
462 | runif(5)
463 | 
464 | 
465 | runif(5, min = -1, max = 1)
466 | 
467 | 
468 | withr::with_par(list(mfrow = c(1, 2)), {
469 |   x <- runif(1000)
470 |   plot(x, main = "runif(1000)")
471 |   hist(x, main = "Histogram of runif(1000)")
472 | })
473 | 
474 | 
475 | rnorm(5)
476 | 
477 | 
478 | rnorm(5, mean = 2, sd = 0.5)
479 | 
480 | 
481 | withr::with_par(list(mfrow = c(1, 2)), {
482 |   x <- rnorm(1000)
483 |   plot(x, main = "rnorm(1000)")
484 |   hist(x, main = "Histogram of rnorm(1000)")
485 | })
486 | 
487 | 
488 | x <- rnorm(50)
489 | 
490 | 
491 | mean(x)
492 | 
493 | 
494 | sum(x) / length(x)
495 | 
496 | 
497 | mean(x, trim = 0.05)
498 | 
499 | 
500 | median(x)
501 | 
502 | 
503 | sd(x)
504 | 
505 | 
506 | var(x)
507 | 
508 | 
509 | c(min = min(x), max = max(x))
510 | 
511 | 
512 | range(x)
513 | 
514 | 
515 | quantile(x)
516 | 
517 | 
518 | quantile(x, probs = seq(0, 1, 0.1))
519 | 
520 | 
521 | summary(x)
522 | 
523 | 
524 | df <- data.frame(score = round(rnorm(100, 80, 10)),
525 |   grade = sample(letters[1:3], 100, replace = TRUE))
526 | summary(df)
527 | 
528 | 
529 | y <- 2 * x + 0.5 * rnorm(length(x))
530 | 
531 | 
532 | cov(x, y)
533 | 
534 | 
535 | cor(x, y)
536 | 
537 | 
538 | z <- runif(length(x))
539 | m1 <- cbind(x, y, z)
540 | cov(m1)
541 | 
542 | 
543 | cor(m1)
544 | 
545 | 
546 | len <- c(3, 4, 5)
547 | # first, create a list in the environment.
548 | x <- list()
549 | # then use `for` to generate the random vector for each length
550 | for (i in 1:3) {
551 |   x[[i]] <- rnorm(len[i])
552 | }
553 | x
554 | 
555 | 
556 | lapply(len, rnorm)
557 | 
558 | 
559 | students <- list(
560 |   a1 = list(name = "James", age = 25, 
561 |     gender = "M", interest = c("reading", "writing")),
562 |   a2 = list(name = "Jenny", age = 23, 
563 |     gender = "F", interest = c("cooking")),
564 |   a3 = list(name = "David", age = 24, 
565 |     gender = "M", interest = c("running", "basketball")))
566 | 
567 | 
568 | sprintf("Hello, %s! Your number is %d.", "Tom", 3)
569 | 
570 | 
571 | lapply(students, function(s) {
572 |   type <- switch(s$gender, "M" = "man", "F" = "woman")
573 |   interest <- paste(s$interest, collapse = ", ")
574 |   sprintf("%s, %d year-old %s, loves %s.", s$name, s$age, type, interest)
575 | })
576 | 
577 | 
578 | sapply(1:10, function(i) i ^ 2)
579 | 
580 | 
581 | sapply(1:10, function(i) c(i, i ^ 2))
582 | 
583 | 
584 | x <- list(c(1, 2), c(2, 3), c(1, 3))
585 | 
586 | 
587 | sapply(x, function(x) x ^ 2)
588 | 
589 | 
590 | x1 <- list(c(1, 2), c(2, 3), c(1, 3, 3))
591 | 
592 | 
593 | sapply(x1, function(x) x ^ 2)
594 | 
595 | 
596 | vapply(x1, function(x) x ^ 2, numeric(2))
597 | 
598 | 
599 | vapply(x, function(x) x ^ 2, numeric(2))
600 | 
601 | 
602 | mapply(function(a, b, c) a * b + b * c + a * c, 
603 |   a = c(1, 2, 3), b = c(5, 6, 7), c = c(-1, -2, -3))
604 | 
605 | 
606 | df <- data.frame(x = c(1, 2, 3), y = c(3, 4, 5))
607 | df
608 | mapply(function(xi, yi) c(xi, yi, xi + yi), df$x, df$y)
609 | 
610 | 
611 | Map(function(xi, yi) c(xi, yi, xi + yi), df$x, df$y)
612 | 
613 | 
614 | mat <- matrix(c(1, 2, 3, 4), nrow = 2)
615 | mat
616 | apply(mat, 1, sum)
617 | 
618 | 
619 | apply(mat, 2, sum)
620 | 
621 | 
622 | mat2 <- matrix(1:16, nrow = 4)
623 | mat2
624 | 
625 | 
626 | apply(mat2, 2, function(col) c(min = min(col), max = max(col)))
627 | 
628 | 
629 | apply(mat2, 1, function(col) c(min = min(col), max = max(col)))
630 | 
631 | 
632 | 


--------------------------------------------------------------------------------
/Chapter 6/working-with-strings.R:
--------------------------------------------------------------------------------
  1 | "Hello"
  2 | 
  3 | 
  4 | str1 <- "Hello"
  5 | str1
  6 | 
  7 | 
  8 | for (i in 1:3) {
  9 |   "Hello"
 10 | }
 11 | 
 12 | 
 13 | test1 <- function(x) {
 14 |   "Hello"
 15 |   x
 16 | }
 17 | test1("World")
 18 | 
 19 | 
 20 | test2 <- function(x) {
 21 |   "Hello"
 22 | }
 23 | test2("World")
 24 | 
 25 | 
 26 | print(str1)
 27 | 
 28 | 
 29 | for (i in 1:3) {
 30 |   print(str1)
 31 | }
 32 | 
 33 | 
 34 | test3 <- function(x) {
 35 |   print("Hello")
 36 |   x
 37 | }
 38 | test3("World")
 39 | 
 40 | 
 41 | cat("Hello")
 42 | 
 43 | 
 44 | name <- "Ken"
 45 | language <- "R"
 46 | cat("Hello,", name, "- a user of", language)
 47 | 
 48 | 
 49 | cat("Hello, ", name, ", a user of ", language, ".")
 50 | 
 51 | 
 52 | cat("Hello, ", name, ", a user of ", language, ".", sep = "")
 53 | 
 54 | 
 55 | message("Hello, ", name, ", a user of ", language, ".")
 56 | 
 57 | 
 58 | for (i in 1:3) {
 59 |   cat(letters[[i]])
 60 | }
 61 | 
 62 | 
 63 | for (i in 1:3) {
 64 |   message(letters[[i]])
 65 | }
 66 | 
 67 | 
 68 | for (i in 1:3) {
 69 |   cat(letters[[i]], "\n", sep = "")
 70 | }
 71 | 
 72 | 
 73 | paste("Hello", "world")
 74 | paste("Hello", "world", sep = "-")
 75 | 
 76 | 
 77 | paste0("Hello", "world")
 78 | 
 79 | 
 80 | value1 <- cat("Hello", "world")
 81 | value1
 82 | 
 83 | 
 84 | paste(c("A", "B"), c("C", "D"))
 85 | 
 86 | 
 87 | paste(c("A", "B"), c("C", "D"),collapse = ", ")
 88 | 
 89 | 
 90 | result <- paste(c("A", "B"), c("C", "D"), collapse = "\n")
 91 | result
 92 | 
 93 | 
 94 | cat(result)
 95 | 
 96 | 
 97 | tolower("Hello")
 98 | toupper("Hello")
 99 | 
100 | 
101 | calc <- function(type, x, y) {
102 |   type <- tolower(type)
103 |   if (type == "add") {
104 |     x + y
105 |   } else if (type == "times") {
106 |     x * y
107 |   } else {
108 |     stop("Not supported type of command")
109 |   }
110 | }
111 | c(calc("add", 2, 3), calc("Add", 2, 3), calc("TIMES", 2, 3))
112 | 
113 | 
114 | toupper(c("Hello", "world"))
115 | 
116 | 
117 | nchar("Hello")
118 | 
119 | 
120 | nchar(c("Hello", "R", "User"))
121 | 
122 | 
123 | store_student <- function(name, age) {
124 |   stopifnot(length(name) == 1, nchar(name) >= 2, 
125 |     is.numeric(age), age > 0)
126 |   # store the information in the database
127 | }
128 | 
129 | 
130 | store_student("James", 20)
131 | store_student("P", 23)
132 | 
133 | 
134 | store_student("  P", 23)
135 | 
136 | 
137 | store_student2 <- function(name, age) {
138 |   stopifnot(length(name) == 1, nchar(trimws(name)) >= 2, 
139 |     is.numeric(age), age > 0)
140 |   # store the information in the database
141 | }
142 | 
143 | 
144 | store_student2("  P", 23)
145 | 
146 | 
147 | trimws(c("  Hello", "World   "), which = "left")
148 | 
149 | 
150 | dates <- c("Jan 3", "Feb 10", "Nov 15") 
151 | 
152 | 
153 | substr(dates, 1, 3)
154 | 
155 | 
156 | substr(dates, 5, nchar(dates))
157 | 
158 | 
159 | get_month_day <- function(x) {
160 |   months <- vapply(substr(tolower(x), 1, 3), function(md) {
161 |     switch(md, jan = 1, feb = 2, mar = 3, apr = 4, 
162 |     may = 5, jun = 6, jul = 7, aug = 8, 
163 |     sep = 9, oct = 10, nov = 11, dec = 12)
164 |   }, numeric(1), USE.NAMES = FALSE)
165 |   days <- as.numeric(substr(x, 5, nchar(x)))
166 |   data.frame(month = months, day = days)
167 | }
168 | get_month_day(dates)
169 | 
170 | 
171 | substr(dates, 1, 3) <- c("Feb", "Dec", "Mar")
172 | dates
173 | 
174 | 
175 | strsplit("a,bb,ccc", split = ",")
176 | 
177 | 
178 | students <- strsplit(c("Tony, 26, Physics", "James, 25, Economics"), split = ", ")
179 | students
180 | 
181 | 
182 | students_matrix <- do.call(rbind, students)
183 | colnames(students_matrix) <- c("name", "age", "major")
184 | students_matrix
185 | 
186 | 
187 | students_df <- data.frame(students_matrix, stringsAsFactors = FALSE)
188 | students_df$age <- as.numeric(students_df$age)
189 | students_df
190 | 
191 | 
192 | strsplit(c("hello", "world"), split = "")
193 | 
194 | 
195 | cat(paste("#", 1:nrow(students_df), ", name: ", students_df$name,
196 |   ", age: ", students_df$age, ", major: ", students_df$major, sep = ""), sep = "\n")
197 | 
198 | 
199 | cat(sprintf("#%d, name: %s, age: %d, major: %s", 
200 |   1:nrow(students_df), students_df$name, students_df$age, students_df$major), sep = "\n")
201 | 
202 | 
203 | sprintf("The length of the line is approximately %.1fmm", 12.295)
204 | 
205 | 
206 | sprintf("The ratio is %d%%", 10)
207 | 
208 | 
209 | sprintf("%s, %d years old, majors in %s and loves %s.",  
210 |   "James", 25, "Physics", "Physics")
211 | 
212 | 
213 | # install.packages("pystr")
214 | library(pystr)
215 | pystr_format("{1}, {2} years old, majors in {3} and loves {3}.",
216 |   "James", 25, "Physics", "Physics")
217 | 
218 | 
219 | pystr_format("{name}, {age} years old, majors in {major} and loves {major}.", 
220 |   name = "James", age = 25, major = "Physics")
221 | 
222 | 
223 | Sys.Date()
224 | 
225 | 
226 | Sys.time()
227 | 
228 | 
229 | current_date <- Sys.Date()
230 | as.numeric(current_date)
231 | 
232 | 
233 | current_time <- Sys.time()
234 | as.numeric(current_time)
235 | 
236 | 
237 | as.Date(1000, "1970-01-01")
238 | 
239 | 
240 | my_date <- as.Date("2016-02-10")
241 | my_date
242 | 
243 | 
244 | my_date + 3
245 | my_date + 80
246 | my_date - 65
247 | 
248 | 
249 | date1 <- as.Date("2014-09-28")
250 | date2 <- as.Date("2015-10-20")
251 | date2 - date1
252 | 
253 | 
254 | as.numeric(date2 - date1)
255 | 
256 | 
257 | my_time <- as.POSIXlt("2016-02-10 10:25:31")
258 | my_time
259 | 
260 | 
261 | my_time + 10
262 | my_time + 12345
263 | my_time - 1234567
264 | 
265 | 
266 | as.Date("2015.07.25")
267 | 
268 | 
269 | as.Date("2015.07.25", format = "%Y.%m.%d")
270 | 
271 | 
272 | as.POSIXlt("7/25/2015 09:30:25", format = "%m/%d/%Y %H:%M:%S")
273 | 
274 | 
275 | strptime("7/25/2015 09:30:25", "%m/%d/%Y %H:%M:%S")
276 | 
277 | 
278 | as.Date(c("2015-05-01", "2016-02-12"))
279 | 
280 | 
281 | as.Date("2015-01-01") + 0:2
282 | 
283 | 
284 | strptime("7/25/2015 09:30:25", "%m/%d/%Y %H:%M:%S") + 1:3
285 | 
286 | 
287 | as.Date("20150610", format = "%Y%m%d")
288 | 
289 | 
290 | strptime("20150610093215", "%Y%m%d%H%M%S")
291 | 
292 | 
293 | datetimes <- data.frame(
294 |   date = c(20150601, 20150603),
295 |   time = c(92325, 150621))
296 | 
297 | 
298 | dt_text <- paste0(datetimes$date, datetimes$time)
299 | dt_text
300 | strptime(dt_text, "%Y%m%d%H%M%S")
301 | 
302 | 
303 | dt_text2 <- paste0(datetimes$date, sprintf("%06d", datetimes$time))
304 | dt_text2
305 | strptime(dt_text2, "%Y%m%d%H%M%S")
306 | 
307 | 
308 | my_date
309 | 
310 | 
311 | date_text <- as.character(my_date)
312 | date_text
313 | 
314 | 
315 | date_text + 1
316 | 
317 | 
318 | as.character(my_date, format = "%Y.%m.%d")
319 | 
320 | 
321 | format(my_date, "%Y.%m.%d")
322 | 
323 | 
324 | my_time
325 | format(my_time, "date: %Y-%m-%d, time: %H:%M:%S")
326 | 
327 | 
328 | read.csv("data/messages.txt", header = FALSE)
329 | 
330 | 
331 | fruits <- readLines("data/fruits.txt")
332 | fruits
333 | matches <- grep("^\\w+:\\s\\d+$", fruits)
334 | matches
335 | 
336 | 
337 | fruits[matches]
338 | 
339 | 
340 | grep("\\d", c("abc", "a12", "123", "1"))
341 | grep("^\\d$", c("abc", "a12", "123", "1"))
342 | 
343 | 
344 | library(stringr)
345 | matches <- str_match(fruits, "^(\\w+):\\s(\\d+)$")
346 | matches
347 | 
348 | 
349 | # transform to data frame
350 | fruits_df <- data.frame(na.omit(matches[, -1]), stringsAsFactors = FALSE)
351 | # add a header
352 | colnames(fruits_df) <- c("fruit","quantity")
353 | # convert type of quantity from character to integer
354 | fruits_df$quantity <- as.integer(fruits_df$quantity)
355 | 
356 | 
357 | fruits_df
358 | 
359 | 
360 | telephone <- readLines("data/telephone.txt")
361 | telephone
362 | 
363 | 
364 | telephone[grep("^\\d{3}-\\d{5}$", telephone)]
365 | telephone[grep("^\\d{4}-\\d{4}$", telephone)]
366 | 
367 | 
368 | telephone[!grepl("^\\d{3}-\\d{5}$", telephone) & !grepl("^\\d{4}-\\d{4}$", telephone)]
369 | 
370 | 
371 | messages <- readLines("data/messages.txt")
372 | 
373 | 
374 | pattern <- "^(\\d+-\\d+-\\d+),(\\d+:\\d+:\\d+),(\\w+),(\\w+),\\s*(.+)$"
375 | matches <- str_match(messages, pattern)
376 | messages_df <- data.frame(matches[, -1])
377 | colnames(messages_df) <- c("Date", "Time", "Sender", "Receiver", "Message")
378 | 
379 | 
380 | messages_df
381 | 


--------------------------------------------------------------------------------
/Chapter 7/working-with-data.R:
--------------------------------------------------------------------------------
  1 | set.seed(100)
  2 | 
  3 | readLines("data/persons.csv")
  4 | 
  5 | 
  6 | readLines("data/persons.csv", n = 2)
  7 | 
  8 | 
  9 | persons1 <- read.csv("data/persons.csv", stringsAsFactors = FALSE)
 10 | str(persons1)
 11 | 
 12 | 
 13 | persons2 <- read.csv("data/persons.csv", 
 14 |   colClasses = c("character", "factor", "integer", "character"),
 15 |   col.names = c("name", "sex", "age", "major"))
 16 | str(persons2)
 17 | 
 18 | 
 19 | persons3 <- readr::read_csv("data/persons.csv")
 20 | str(persons3)
 21 | 
 22 | 
 23 | read.table("data/persons.txt", sep = " ")
 24 | 
 25 | # install.packages("readr")
 26 | readr::read_table("data/persons.txt")
 27 | 
 28 | 
 29 | some_data <- data.frame(
 30 |   id = 1:4, 
 31 |   grade = c("A", "A", "B", NA), 
 32 |   width = c(1.51, 1.52, 1.46, NA),
 33 |   check_date = as.Date(c("2016-03-05", "2016-03-06", "2016-03-10", "2016-03-11")))
 34 | some_data
 35 | write.csv(some_data, "data/some_data.csv")
 36 | 
 37 | 
 38 | cat(readLines("data/some_data.csv"), sep = "\n")
 39 | 
 40 | 
 41 | write.csv(some_data, "data/some_data.csv", 
 42 |   quote = FALSE, na = "-", row.names = FALSE)
 43 | 
 44 | 
 45 | cat(readLines("data/some_data.csv"), sep = "\n")
 46 | 
 47 | 
 48 | readr::read_csv("data/some_data.csv", na = "-")
 49 | 
 50 | 
 51 | file.remove("data/some_data.csv")
 52 | 
 53 | 
 54 | 
 55 | # install.packages("readxl")
 56 | readxl::read_excel("data/prices.xlsx")
 57 | 
 58 | # install.packages("openxlsx")
 59 | openxlsx::read.xlsx("data/prices.xlsx", detectDates = TRUE)
 60 | 
 61 | 
 62 | openxlsx::write.xlsx(mtcars, "data/mtcars.xlsx")
 63 | 
 64 | 
 65 | saveRDS(some_data, "data/some_data.rds")
 66 | 
 67 | 
 68 | some_data2 <- readRDS("data/some_data.rds")
 69 | 
 70 | 
 71 | identical(some_data, some_data2)
 72 | 
 73 | 
 74 | rows <- 200000
 75 | large_data <- data.frame(id = 1:rows, 
 76 |   x = rnorm(rows), y = rnorm(rows))
 77 | system.time(write.csv(large_data, "data/large_data.csv"))
 78 | system.time(saveRDS(large_data, "data/large_data.rds"))
 79 | 
 80 | 
 81 | fileinfo <- file.info("data/large_data.csv", "data/large_data.rds")
 82 | fileinfo[, "size", drop = FALSE]
 83 | 
 84 | 
 85 | system.time(read.csv("data/large_data.csv"))
 86 | system.time(readr::read_csv("data/large_data.csv"))
 87 | 
 88 | 
 89 | system.time(readRDS("data/large_data.rds"))
 90 | 
 91 | 
 92 | nums <- c(1.5, 2.5, NA, 3)
 93 | list1 <- list(x = c(1, 2, 3), 
 94 |   y = list(a = c("a", "b"),
 95 |     b = c(NA, 1, 2.5)))
 96 | saveRDS(nums, "data/nums.rds")
 97 | saveRDS(list1, "data/list1.rds")
 98 | 
 99 | 
100 | readRDS("data/nums.rds")
101 | readRDS("data/list1.rds")
102 | 
103 | 
104 | save(some_data, nums, list1, file = "data/bundle1.RData")
105 | 
106 | 
107 | rm(some_data, nums, list1)
108 | load("data/bundle1.RData")
109 | 
110 | 
111 | some_data
112 | nums
113 | list1
114 | 
115 | 
116 | file.remove("data/some_data.rds", 
117 |   "data/large_data.csv", 
118 |   "data/large_data.rds",
119 |   "data/nums.rds",
120 |   "data/list1.rds",
121 |   "data/bundle1.RData")
122 | 
123 | 
124 | head(iris)
125 | str(iris)
126 | 
127 | 
128 | head(mtcars)
129 | str(mtcars)
130 | 
131 | 
132 | data("diamonds", package = "ggplot2")
133 | dim(diamonds)
134 | 
135 | 
136 | head(diamonds)
137 | 
138 | 
139 | install.package(c("nycflights13", "babynames"))
140 | 
141 | 
142 | plot(1:10)
143 | 
144 | 
145 | x <- rnorm(100)
146 | y <- 2 * x + rnorm(100)
147 | plot(x, y)
148 | 
149 | 
150 | plot(x, y, 
151 |   main = "Linearly correlated random numbers",
152 |   xlab = "x", ylab = "2x + noise",
153 |   xlim = c(-4, 4), ylim = c(-4, 4))
154 | 
155 | 
156 | plot(x, y, 
157 |   xlim = c(-4, 4), ylim = c(-4, 4),
158 |   xlab = "x", ylab = "2x + noise")
159 | title("Linearly correlated random numbers")
160 | 
161 | 
162 | plot(0:25, 0:25, pch = 0:25, 
163 |   xlim = c(-1, 26), ylim = c(-1, 26),
164 |   main = "Point styles (pch)")
165 | text(0:25 + 1, 0:25, 0:25)
166 | 
167 | 
168 | x <- rnorm(100)
169 | y <- 2 * x + rnorm(100)
170 | plot(x, y, pch = 16,
171 |   main = "Scatter plot with customized point style")
172 | 
173 | 
174 | plot(x, y, 
175 |   pch = ifelse(x * y > 1, 16, 1),
176 |   main = "Scatter plot with conditional point styles")
177 | 
178 | 
179 | z <- sqrt(1 + x ^ 2) + rnorm(100)
180 | plot(x, y, pch = 1, 
181 |   xlim = range(x), ylim = range(y, z),
182 |   xlab = "x", ylab = "value")
183 | points(x, z, pch = 17)
184 | title("Scatter plot with two series")
185 | 
186 | 
187 | plot(x, y, pch = 16, col = "blue", 
188 |   main = "Scatter plot with blue points")
189 | 
190 | 
191 | plot(x, y, pch = 16, 
192 |   col = ifelse(y >= mean(y), "red", "green"),
193 |   main = "Scatter plot with conditional colors")
194 | 
195 | 
196 | plot(x, y, col = "blue", pch = 0,
197 |   xlim = range(x), ylim = range(y, z),
198 |   xlab = "x", ylab = "value")
199 | points(x, z, col = "red", pch = 1)
200 | title("Scatter plot with two series")
201 | 
202 | 
203 | t <- 1:50
204 | y <- 3 * sin(t * pi / 60) + rnorm(t)
205 | plot(t, y, type = "l",
206 |   main = "Simple line plot")
207 | 
208 | 
209 | lty_values <- 1:6
210 | plot(lty_values, type = "n", axes = FALSE, ann = FALSE)
211 | abline(h = lty_values, lty = lty_values, lwd = 2)
212 | mtext(lty_values, side = 2, at = lty_values)
213 | title("Line types (lty)")
214 | 
215 | 
216 | plot(t, y, type = "l", lwd = 2)
217 | abline(h = mean(y), lty = 2, col = "blue")
218 | abline(h = range(y), lty = 3, col = "red")
219 | abline(v = t[c(which.min(y), which.max(y))], 
220 |   lty = 3, col = "darkgray")
221 | title("Line plot with auxiliary lines")
222 | 
223 | 
224 | p <- 40
225 | plot(t[t <= p], y[t <= p], type = "l", 
226 |   xlim = range(t), xlab = "t")
227 | lines(t[t >= p], y[t >= p], lty = 2)
228 | title("Simple line plot with two periods")
229 | 
230 | 
231 | plot(y, type = "l")
232 | points(y, pch = 16)
233 | title("Lines with points")
234 | 
235 | 
236 | plot(y, pch = 16)
237 | lines(y)
238 | title("Lines with points")
239 | 
240 | 
241 | x <- 1:30
242 | y <- 2 * x + 6 * rnorm(30)
243 | z <- 3 * sqrt(x) + 8 * rnorm(30)
244 | 
245 | plot(x, y, type = "l", 
246 |   ylim = range(y, z), col = "black")
247 | points(y, pch = 15)
248 | lines(z, lty = 2, col = "blue")
249 | points(z, pch = 16, col = "blue")
250 | title("Plot of two series")
251 | legend("topleft", 
252 |   legend = c("y", "z"),
253 |   col = c("black", "blue"),
254 |   lty = c(1, 2), pch = c(15, 16), cex = 0.8, 
255 |   x.intersp = 0.5, y.intersp = 0.8)
256 | 
257 | 
258 | plot(x, y, type = "s",
259 |   main = "A simple step plot")
260 | 
261 | 
262 | barplot(1:10, names.arg = LETTERS[1:10])
263 | 
264 | 
265 | ints <- 1:10
266 | names(ints) <- LETTERS[1:10]
267 | barplot(ints)
268 | 
269 | # install.packages("nycflights13")
270 | data("flights", package = "nycflights13")
271 | carriers <- table(flights$carrier)
272 | carriers
273 | 
274 | 
275 | sorted_carriers <- sort(carriers, decreasing = TRUE)
276 | sorted_carriers
277 | 
278 | 
279 | barplot(head(sorted_carriers, 8), 
280 |   ylim = c(0, max(sorted_carriers) * 1.1),
281 |   xlab = "Carrier", ylab = "Flights",
282 |   main = "Top 8 carriers with the most flights in record")
283 | 
284 | 
285 | grades <- c(A = 2, B = 10, C = 12, D = 8)
286 | pie(grades, main = "Grades", radius = 1)
287 | 
288 | 
289 | random_normal <- rnorm(10000)
290 | hist(random_normal)
291 | 
292 | 
293 | hist(random_normal, probability = TRUE, col = "lightgray")
294 | curve(dnorm, add = TRUE, lwd = 2, col = "blue")
295 | 
296 | 
297 | flight_speed <- flights$distance / flights$air_time
298 | hist(flight_speed, main = "Histogram of flight speed")
299 | 
300 | 
301 | plot(density(flight_speed, from = 2, na.rm = TRUE),
302 |   main = "Empirical distribution of flight speed")
303 | abline(v = mean(flight_speed, na.rm = TRUE), 
304 |   col = "blue", lty = 2)
305 | 
306 | 
307 | hist(flight_speed, 
308 |   probability = TRUE, ylim = c(0, 0.5),
309 |   main = "Histogram and empirical distribution of flight speed",
310 |   border = "gray", col = "lightgray")
311 | lines(density(flight_speed, from = 2, na.rm = TRUE),
312 |   col = "darkgray", lwd = 2)
313 | abline(v = mean(flight_speed, na.rm = TRUE), 
314 |   col = "blue", lty = 2)
315 | 
316 | 
317 | x <- rnorm(1000)
318 | boxplot(x)
319 | 
320 | 
321 | 
322 | 
323 | boxplot(distance / air_time ~ carrier, data = flights,
324 |   main = "Box plot of flight speed by carrier")
325 | 
326 | 
327 | f <- function(x) 3 + 2 * x
328 | x <- rnorm(100)
329 | y <- f(x) + 0.5 * rnorm(100)
330 | 
331 | 
332 | model1 <- lm(y ~ x)
333 | model1
334 | 
335 | 
336 | coef(model1)
337 | 
338 | 
339 | summary(model1)
340 | 
341 | 
342 | plot(x, y, main = "A simple linear regression")
343 | abline(coef(model1), col = "blue")
344 | 
345 | 
346 | predict(model1, list(x = c(-1, 0.5)), se.fit = TRUE)
347 | 
348 | 
349 | data("flights", package = "nycflights13")
350 | plot(air_time ~ distance, data = flights, 
351 |   pch = ".",
352 |   main = "flight speed plot")
353 | 
354 | 
355 | rows <- nrow(flights)
356 | rows_id <- 1:rows
357 | sample_id <- sample(rows_id, rows * 0.75, replace = FALSE)
358 | flights_train <- flights[sample_id,]
359 | flights_test <- flights[setdiff(rows_id, sample_id), ]
360 | 
361 | 
362 | model2 <- lm(air_time ~ distance, data = flights_train)
363 | predict2_train <- predict(model2, flights_train)
364 | error2_train <- flights_train$air_time - predict2_train
365 | 
366 | 
367 | evaluate_error <- function(x) {
368 |   c(abs_err = mean(abs(x), na.rm = TRUE),
369 |     std_dev = sd(x, na.rm = TRUE))
370 | }
371 | 
372 | 
373 | evaluate_error(error2_train)
374 | 
375 | 
376 | predict2_test <- predict(model2, flights_test)
377 | error2_test <- flights_test$air_time - predict2_test
378 | evaluate_error(error2_test)
379 | 
380 | 
381 | model3 <- lm(air_time ~ carrier + distance + month + dep_time,
382 |   data = flights_train)
383 | predict3_train <- predict(model3, flights_train)
384 | error3_train <- flights_train$air_time - predict3_train
385 | evaluate_error(error3_train)
386 | 
387 | 
388 | predict3_test <- predict(model3, flights_test)
389 | error3_test <- flights_test$air_time - predict3_test
390 | evaluate_error(error3_test)
391 | 
392 | 
393 | plot(density(error2_test, na.rm = TRUE),
394 |   main = "Empirical distributions of out-of-sample errors")
395 | lines(density(error3_test, na.rm = TRUE), lty = 2)
396 | legend("topright", legend = c("model2", "model3"),
397 |   lty = c(1, 2), cex = 0.8,
398 |   x.intersp = 0.6, y.intersp = 0.6)
399 | 
400 | # install.packages("party")
401 | airct <- party::ctree(Ozone ~ ., 
402 |   data = subset(airquality, !is.na(Ozone)), 
403 |   controls = party::ctree_control(maxsurrogate = 3))
404 | plot(airct, main = "Regression tree of air quality")
405 | 
406 | 
407 | model4 <- party::ctree(air_time ~ distance + month + dep_time, 
408 |   data = subset(flights_train, !is.na(air_time)))
409 | predict4_train <- predict(model4, flights_train)
410 | error4_train <- flights_train$air_time - predict4_train[, 1]
411 | evaluate_error(error4_train)
412 | 
413 | 
414 | predict4_test <- predict(model4, flights_test)
415 | error4_test <- flights_test$air_time - predict4_test[, 1]
416 | evaluate_error(error4_test)
417 | 
418 | 
419 | plot(density(error3_test, na.rm = TRUE), 
420 |   ylim = range(0, 0.06),
421 |   main = "Empirical distributions of out-of-sample errors")
422 | lines(density(error4_test, na.rm = TRUE), lty = 2)
423 | legend("topright", legend = c("model3", "model4"),
424 |   lty = c(1, 2), cex = 0.8,
425 |   x.intersp = 0.6, y.intersp = 0.6)
426 | 


--------------------------------------------------------------------------------
/Chapter 8/inside-r.R:
--------------------------------------------------------------------------------
  1 | test0 <- function(x, y) {
  2 |   if (x > 0) x else y
  3 | }
  4 | 
  5 | 
  6 | test0(1)
  7 | 
  8 | 
  9 | test0(-1)
 10 | 
 11 | 
 12 | test0(1, stop("Stop now"))
 13 | 
 14 | 
 15 | test0(-1, stop("Stop now"))
 16 | 
 17 | 
 18 | system.time(rnorm(10000000))
 19 | 
 20 | 
 21 | system.time(1)
 22 | 
 23 | 
 24 | system.time(test0(1, rnorm(10000000)))
 25 | 
 26 | 
 27 | test1 <- function(x, y = stop("Stop now")) {
 28 |   if (x > 0) x else y
 29 | }
 30 | 
 31 | 
 32 | test1(1)
 33 | 
 34 | 
 35 | test1(-1)
 36 | 
 37 | 
 38 | test2 <- function(x, n = floor(length(x) / 2)) {
 39 |   x[1:n]
 40 | }
 41 | 
 42 | 
 43 | test2(1:10)
 44 | 
 45 | 
 46 | test2(1:10, 3)
 47 | 
 48 | 
 49 | test3 <- function(x, n = floor(length(m) / 2)) {
 50 |   x[1:n]
 51 | }
 52 | 
 53 | 
 54 | test3(1:10)
 55 | 
 56 | 
 57 | m <- c(1, 2, 3)
 58 | test3(1:10)
 59 | 
 60 | 
 61 | test4 <- function(x, y = p) {
 62 |   p <- x + 1
 63 |   c(x, y)
 64 | }
 65 | 
 66 | 
 67 | test4(1)
 68 | 
 69 | 
 70 | check_input <- function(x) {
 71 |   switch(x, 
 72 |     y = message("yes"), 
 73 |     n = message("no"),
 74 |     stop("Invalid input"))
 75 | }
 76 | 
 77 | 
 78 | check_input("y")
 79 | 
 80 | 
 81 | check_input("n")
 82 | 
 83 | 
 84 | check_input("what")
 85 | 
 86 | 
 87 | x1 <- c(1, 2, 3)
 88 | 
 89 | 
 90 | x2 <- x1
 91 | 
 92 | 
 93 | x1[1] <- 0
 94 | x1
 95 | x2
 96 | 
 97 | 
 98 | x1 <- c(1, 2, 3)
 99 | x2 <- x1
100 | 
101 | 
102 | tracemem(x1)
103 | tracemem(x2)
104 | 
105 | 
106 | x1[1] <- 0
107 | 
108 | 
109 | untracemem(x1)
110 | untracemem(x2)
111 | 
112 | 
113 | modify_first <- function(x) {
114 |   x[1] <- 0
115 |   x
116 | }
117 | 
118 | 
119 | v1 <- c(1, 2, 3)
120 | modify_first(v1)
121 | v1
122 | 
123 | 
124 | v2 <- list(x = 1, y = 2)
125 | modify_first(v2)
126 | v2
127 | 
128 | 
129 | v1[1] <- 0
130 | v1
131 | 
132 | 
133 | v2[1] <- 0
134 | v2
135 | 
136 | 
137 | v3 <- 1:5
138 | v3 <- modify_first(v3)
139 | v3
140 | 
141 | 
142 | change_names <- function(x) {
143 |   if (is.data.frame(x)) {
144 |     rownames(x) <- NULL
145 |     if (ncol(x) <= length(LETTERS)) {
146 |       colnames(x) <- LETTERS[1:ncol(x)]
147 |     } else {
148 |       stop("Too many columns to rename")
149 |     }
150 |   } else {
151 |     stop("x must be a data frame")
152 |   }
153 |   x
154 | }
155 | 
156 | 
157 | small_df <- data.frame(
158 |   id = 1:3, 
159 |   width = runif(3, 5, 10), 
160 |   height = runif(3, 5, 10))
161 | small_df
162 | 
163 | 
164 | change_names(small_df)
165 | 
166 | 
167 | small_df
168 | 
169 | 
170 | x <- 0
171 | modify_x <- function(value) {
172 |   x <<- value
173 | }
174 | 
175 | 
176 | modify_x(3)
177 | x
178 | 
179 | 
180 | count <- 0
181 | lapply(1:3, function(x) {
182 |   result <- 1:x
183 |   count <<- count + length(result)
184 |   result
185 | })
186 | count
187 | 
188 | 
189 | nested_list <- list(
190 |   a = c(1, 2, 3), 
191 |   b = list(
192 |     x = c("a", "b", "c"), 
193 |     y = list(
194 |       z = c(TRUE, FALSE), 
195 |       w = c(2, 3, 4))
196 |   )
197 | )
198 | str(nested_list)
199 | 
200 | 
201 | flat_list <- list()
202 | i <- 1
203 | 
204 | 
205 | res <- rapply(nested_list, function(x) {
206 |   flat_list[[i]] <<- x
207 |   i <<- i + 1
208 | })
209 | 
210 | 
211 | res
212 | 
213 | 
214 | names(flat_list) <- names(res)
215 | str(flat_list)
216 | 
217 | 
218 | start_num <- 1
219 | end_num <- 10
220 | fun1 <- function(x) {
221 |   c(start_num, x, end_num)
222 | }
223 | 
224 | 
225 | fun1(c(4, 5, 6))
226 | 
227 | 
228 | rm(start_num, end_num)
229 | fun1(c(4, 5, 6))
230 | 
231 | 
232 | rm(fun1, start_num, end_num)
233 | fun1 <- function(x) {
234 |   c(start_num, x, end_num)
235 | }
236 | 
237 | 
238 | fun1(c(4, 5, 6))
239 | 
240 | 
241 | start_num <- 1
242 | end_num <- 10
243 | fun1(c(4, 5, 6))
244 | 
245 | 
246 | p <- 0
247 | fun2 <- function(x) {
248 |   p <- 1
249 |   x + p
250 | }
251 | 
252 | 
253 | fun2(1)
254 | 
255 | 
256 | f1 <- function(x) {
257 |   x + p
258 | }
259 | g1 <- function(x) {
260 |   p <- 1
261 |   f1(x)
262 | }
263 | 
264 | 
265 | g1(0)
266 | 
267 | 
268 | p <- 1
269 | g1(0)
270 | 
271 | 
272 | m <- 1
273 | f2 <- function(x) {
274 |   m <<- 2
275 |   x
276 | }
277 | g2 <- function(x) {
278 |   m <- 1
279 |   f2(x)
280 |   cat(sprintf("[g2] m: %d\n", m))
281 | }
282 | 
283 | 
284 | g2(1)
285 | 
286 | 
287 | m
288 | 
289 | 
290 | f <- function(x) {
291 |   p <- 1
292 |   q <- 2
293 |   cat(sprintf("1. [f1] p: %d, q: %d\n", p, q))
294 |   f2 <- function(x) {
295 |     p <- 3
296 |     cat(sprintf("2. [f2] p: %d, q: %d\n", p, q))
297 |     c(x = x, p = p, q = q)
298 |   }
299 |   cat(sprintf("3. [f1] p: %d, q: %d\n", p, q))
300 |   f2(x)
301 | }
302 | 
303 | 
304 | f(0)
305 | 
306 | 
307 | g <- function(x) {
308 |   p <- 1
309 |   q <- 2
310 |   cat(sprintf("1. [f1] p: %d, q: %d\n", p, q))
311 |   g2 <- function(x) {
312 |     p <<- 3
313 |     p <- 2
314 |     cat(sprintf("2. [f2] p: %d, q: %d\n", p, q))
315 |     c(x = x, p = p, q = q)
316 |   }
317 |   cat(sprintf("3. [f1] p: %d, q: %d\n", p, q))
318 |   result <- g2(x)
319 |   cat(sprintf("4. [f1] p: %d, q: %d\n", p, q))
320 |   result
321 | }
322 | 
323 | 
324 | g(0)
325 | 
326 | 
327 | e1 <- new.env()
328 | 
329 | 
330 | e1
331 | 
332 | 
333 | e1$x <- 1
334 | e1[["x"]]
335 | 
336 | 
337 | e1[1:3]
338 | 
339 | 
340 | e1[[1]]
341 | 
342 | 
343 | exists("x", e1)
344 | 
345 | 
346 | get("x", e1)
347 | 
348 | 
349 | ls(e1)
350 | 
351 | 
352 | e1$y
353 | e1[["y"]]
354 | 
355 | 
356 | get("y", e1)
357 | 
358 | 
359 | exists("y", e1)
360 | 
361 | 
362 | e2 <- new.env(parent = e1)
363 | 
364 | 
365 | e2
366 | e1
367 | 
368 | 
369 | parent.env(e2)
370 | 
371 | 
372 | e2$y <- 2
373 | 
374 | 
375 | ls(e2)
376 | 
377 | 
378 | e2$y
379 | e2[["y"]]
380 | exists("y", e2)
381 | get("y", e2)
382 | 
383 | 
384 | e2$x
385 | e2[["x"]]
386 | 
387 | 
388 | exists("x", e2)
389 | get("x", e1)
390 | 
391 | 
392 | exists("x", e2, inherits = FALSE)
393 | 
394 | 
395 | get("x", e2, inherits = FALSE)
396 | 
397 | 
398 | ls(e1)
399 | e3 <- e1
400 | 
401 | 
402 | e3$y
403 | e1$y <- 2
404 | e3$y
405 | 
406 | 
407 | modify <- function(e) {
408 |   e$z <- 10
409 | }
410 | 
411 | 
412 | list1 <- list(x = 1, y = 2)
413 | list1$z
414 | modify(list1)
415 | list1$z
416 | 
417 | 
418 | e1$z
419 | modify(e1)
420 | e1$z
421 | 
422 | 
423 | environment()
424 | 
425 | 
426 | global <- environment()
427 | global$some_obj <- 1
428 | 
429 | 
430 | some_obj
431 | 
432 | 
433 | globalenv()
434 | 
435 | 
436 | baseenv()
437 | 
438 | 
439 | parents <- function(env) {
440 |   while (TRUE) {
441 |     name <- environmentName(env)
442 |     txt <- if (nzchar(name)) name else format(env)
443 |     cat(txt, "\n")
444 |     env <- parent.env(env)
445 |   }
446 | }
447 | 
448 | 
449 | parents(globalenv())
450 | 
451 | 
452 | search()
453 | 
454 | 
455 | median(c(1, 2, 1 + 3))
456 | 
457 | 
458 | simple_fun <- function() {
459 |   cat("Executing environment: ")
460 |   print(environment())
461 |   cat("Enclosing environment: ")
462 |   print(parent.env(environment()))
463 | }
464 | 
465 | 
466 | simple_fun()
467 | simple_fun()
468 | simple_fun()
469 | 
470 | 
471 | environment(simple_fun)
472 | 
473 | 
474 | f1 <- function() {
475 |   cat("[f1] Executing in ")
476 |   print(environment())
477 |   cat("[f1] Enclosed by ")
478 |   print(parent.env(environment()))
479 |   cat("[f1] Calling from ")
480 |   print(parent.frame())
481 |   
482 |   f2 <- function() {
483 |     cat("[f2] Executing in ")
484 |     print(environment())
485 |     cat("[f2] Enclosed by ")
486 |     print(parent.env(environment()))
487 |     cat("[f2] Calling from ")
488 |     print(parent.frame())
489 |   }
490 |   
491 |   f3 <- function() {
492 |     cat("[f3] Executing in ")
493 |     print(environment())
494 |     cat("[f3] Enclosed by ")
495 |     print(parent.env(environment()))
496 |     cat("[f3] Calling from ")
497 |     print(parent.frame())
498 |     f2()
499 |   }
500 |   
501 |   f3()
502 | }
503 | 
504 | 
505 | f1()
506 | 


--------------------------------------------------------------------------------
/Chapter 9/meta-programming.R:
--------------------------------------------------------------------------------
  1 | add <- function(x, y) {
  2 |   x + y
  3 | }
  4 | 
  5 | 
  6 | addn <- function(y) {
  7 |   function(x) {
  8 |     x + y
  9 |   }
 10 | }
 11 | 
 12 | 
 13 | add1 <- addn(1)
 14 | add2 <- addn(2)
 15 | 
 16 | 
 17 | add1(10)
 18 | add2(10)
 19 | 
 20 | 
 21 | add1
 22 | 
 23 | 
 24 | environment(add1)$y
 25 | 
 26 | 
 27 | environment(add2)$y
 28 | 
 29 | 
 30 | color_line <- function(col) {
 31 |   function(...) {
 32 |     plot(..., type = "l", lty = 1, col = col)
 33 |   }
 34 | }
 35 | 
 36 | 
 37 | red_line <- color_line("red")
 38 | red_line(rnorm(30), main = "Red line plot")
 39 | 
 40 | 
 41 | plot(rnorm(30), type = "l", lty = 1, col = "red",
 42 |   main = "Red line plot")
 43 | 
 44 | 
 45 | nloglik <- function(x) {
 46 |   n <- length(x)
 47 |   function(mean, sd) {
 48 |     log(2 * pi) * n / 2 + log(sd ^ 2) * n / 2 + sum((x - mean) ^ 2) / (2 * sd ^ 2)
 49 |   }
 50 | }
 51 | 
 52 | 
 53 | data <- rnorm(10000, 1, 2)
 54 | 
 55 | 
 56 | fit <- stats4::mle(nloglik(data), 
 57 |   start = list(mean = 0, sd = 1), method = "L-BFGS-B", 
 58 |   lower = c(-5, 0.01), upper = c(5, 10))
 59 | 
 60 | 
 61 | fit@coef
 62 | 
 63 | 
 64 | (fit@coef - c(1, 2)) / c(1, 2)
 65 | 
 66 | 
 67 | hist(data, freq = FALSE, ylim = c(0, 0.25))
 68 | curve(dnorm(x, 1, 2), add = TRUE, col = rgb(1, 0, 0, 0.5), lwd = 6)
 69 | curve(dnorm(x, fit@coef[["mean"]], fit@coef[["sd"]]), 
 70 |     add = TRUE, col = "blue", lwd = 2)
 71 | 
 72 | 
 73 | f1 <- function() {
 74 |   cat("[f1] executing in ")
 75 |   print(environment())
 76 |   cat("[f1] enclosed by ")
 77 |   print(parent.env(environment()))
 78 |   cat("[f1] calling from ")
 79 |   print(parent.frame())
 80 | }
 81 | f2 <- function() {
 82 |   cat("[f2] executing in ")
 83 |   print(environment())
 84 |   cat("[f2] enclosed by ")
 85 |   print(parent.env(environment()))
 86 |   cat("[f2] calling from ")
 87 |   print(parent.frame())
 88 |   p <- f1
 89 |   p()
 90 | }
 91 | f1()
 92 | f2()
 93 | 
 94 | 
 95 | f1 <- function(x, y) {
 96 |   if (x > y) {
 97 |     x + y
 98 |   } else {
 99 |     x - y
100 |   }
101 | }
102 | 
103 | 
104 | f2 <- function(x, y) {
105 |   op <- if (x > y) `+` else `-`
106 |   op(x, y)
107 | }
108 | 
109 | 
110 | add <- function(x, y, z) {
111 |   x + y + z
112 | }
113 | product <- function(x, y, z) {
114 |   x * y * z
115 | }
116 | 
117 | 
118 | combine <- function(f, x, y, z) {
119 |   f(x, y, z)
120 | }
121 | 
122 | 
123 | combine(add, 3, 4, 5)
124 | combine(product, 3, 4, 5)
125 | 
126 | 
127 | result <- list()
128 | for (i in seq_along(x)) {
129 |     result[[i]] <- f(x[[i]])
130 | }
131 | result
132 | 
133 | 
134 | lapply(x, f)
135 | 
136 | 
137 | lapply <- function(x, f, ...) {
138 |   result <- list()
139 |   for (i in seq_along(x)) {
140 |       result[[i]] <- f(x[i], ...)
141 |   }
142 | }
143 | 
144 | 
145 | lapply(1:3, `+`, 3)
146 | 
147 | 
148 | list(1 + 3, 2 + 3, 3 + 3)
149 | 
150 | 
151 | lapply(1:3, addn(3))
152 | 
153 | 
154 | sapply(1:3, addn(3))
155 | 
156 | 
157 | vapply(1:3, addn(3), numeric(1))
158 | 
159 | 
160 | result <- list()
161 | for (i in seq_along(x)) {
162 |   # heavy computing task
163 |   result[[i]] <- f(x[[i]])
164 | }
165 | result
166 | 
167 | 
168 | result <- lapply(x, f)
169 | 
170 | 
171 | result <- parallel::mclapply(x, f)
172 | 
173 | 
174 | iris[iris$Sepal.Length > quantile(iris$Sepal.Length, 0.8) &
175 |     iris$Sepal.Width > quantile(iris$Sepal.Width, 0.8) &
176 |     iris$Petal.Length > quantile(iris$Petal.Length, 0.8) &
177 |     iris$Petal.Width > quantile(iris$Petal.Width, 0.8), ]
178 | 
179 | 
180 | subset(iris, 
181 |   Sepal.Length > quantile(Sepal.Length, 0.8) &
182 |     Sepal.Width > quantile(Sepal.Width, 0.8) &
183 |     Petal.Length > quantile(Petal.Length, 0.8) &
184 |     Petal.Width > quantile(Petal.Width, 0.8))
185 | 
186 | 
187 | iris[Sepal.Length > quantile(Sepal.Length, 0.8) &
188 |     Sepal.Width > quantile(Sepal.Width, 0.8) &
189 |     Petal.Length > quantile(Petal.Length, 0.8) &
190 |     Petal.Width > quantile(Petal.Width, 0.8), ]
191 | 
192 | 
193 | subset(iris, 
194 |   Sepal.Length > quantile(Sepal.Length, 0.8) &
195 |     Sepal.Width > quantile(Sepal.Width, 0.8) &
196 |     Petal.Length > quantile(Petal.Length, 0.8) &
197 |     Petal.Width > quantile(Petal.Width, 0.8),
198 |   select = c(Sepal.Length, Petal.Length, Species))
199 | 
200 | 
201 | rnorm(5)
202 | 
203 | 
204 | call1 <- quote(rnorm(5))
205 | call1
206 | 
207 | 
208 | typeof(call1)
209 | class(call1)
210 | 
211 | 
212 | name1 <- quote(rnorm)
213 | name1
214 | typeof(name1)
215 | class(name1)
216 | 
217 | 
218 | quote(pvar)
219 | quote(xfun(a = 1:n))
220 | 
221 | 
222 | as.list(call1)
223 | 
224 | 
225 | call1[[1]]
226 | typeof(call1[[1]])
227 | class(call1[[1]])
228 | 
229 | 
230 | call1[[2]]
231 | typeof(call1[[2]])
232 | class(call1[[2]])
233 | 
234 | 
235 | num1 <- 100
236 | num2 <- quote(100)
237 | 
238 | 
239 | num1
240 | num2
241 | 
242 | 
243 | identical(num1, num2)
244 | 
245 | 
246 | call2 <- quote(c("a", "b"))
247 | call2
248 | 
249 | 
250 | as.list(call2)
251 | 
252 | 
253 | str(as.list(call2))
254 | 
255 | 
256 | call3 <- quote(1 + 1)
257 | call3
258 | 
259 | 
260 | is.call(call3)
261 | str(as.list(call3))
262 | 
263 | 
264 | call4 <- quote(sqrt(1 + x ^ 2))
265 | call4
266 | 
267 | 
268 | pryr::call_tree(call4)
269 | 
270 | 
271 | call1
272 | call1[[1]] <- quote(runif)
273 | call1
274 | 
275 | 
276 | call1[[3]] <- -1
277 | names(call1)[[3]] <- "min"
278 | call1
279 | 
280 | 
281 | fun1 <- function(x) {
282 |   quote(x)
283 | }
284 | 
285 | 
286 | fun1(rnorm(5))
287 | 
288 | 
289 | fun2 <- function(x) {
290 |   substitute(x)
291 | }
292 | fun2(rnorm(5))
293 | 
294 | 
295 | substitute(x + y + x ^ 2, list(x = 1))
296 | 
297 | 
298 | substitute(f(x + f(y)), list(f = quote(sin)))
299 | 
300 | 
301 | call1 <- quote(rnorm(5, mean = 3))
302 | call1
303 | 
304 | 
305 | call2 <- call("rnorm", 5,  mean = 3)
306 | call2
307 | 
308 | 
309 | call3 <- as.call(list(quote(rnorm), 5, mean = 3))
310 | call3
311 | 
312 | 
313 | identical(call1, call2)
314 | identical(call2, call3)
315 | 
316 | 
317 | sin(1)
318 | 
319 | 
320 | call1 <- quote(sin(1))
321 | call1
322 | eval(call1)
323 | 
324 | 
325 | call2 <- quote(sin(x))
326 | call2
327 | 
328 | 
329 | eval(call2)
330 | 
331 | 
332 | sin(x)
333 | 
334 | 
335 | eval(call2, list(x = 1))
336 | 
337 | 
338 | e1 <- new.env()
339 | e1$x <- 1
340 | eval(call2, e1)
341 | 
342 | 
343 | call3 <- quote(x ^ 2 + y ^ 2)
344 | call3
345 | 
346 | 
347 | eval(call3)
348 | 
349 | 
350 | eval(call3, list(x = 2))
351 | 
352 | 
353 | eval(call3, list(x = 2, y = 3))
354 | 
355 | 
356 | e1 <- new.env()
357 | e1$x <- 2
358 | eval(call3, e1)
359 | 
360 | 
361 | e2 <- new.env(parent = e1)
362 | e2$y <- 3
363 | eval(call3, e2)
364 | 
365 | 
366 | pryr::call_tree(call3)
367 | 
368 | 
369 | e3 <- new.env()
370 | e3$y <- 3
371 | eval(call3, list(x = 2), e3)
372 | 
373 | 
374 | eval(quote(z <- x + y + 1), list(x = 1), e3)
375 | e3$z
376 | 
377 | 
378 | eval(quote(z <- y + 1), e3)
379 | e3$z
380 | 
381 | 
382 | eval(quote(1 + 1), list(`+` = `-`))
383 | 
384 | 
385 | x <- 1:10
386 | x[3:(length(x) - 5)]
387 | 
388 | 
389 | qs <- function(x, range) {
390 |   range <- substitute(range)
391 |   selector <- eval(range, list(. = length(x)))
392 |   x[selector]
393 | }
394 | 
395 | 
396 | qs(x, 3:(. - 5))
397 | 
398 | 
399 | qs(x, . - 1)
400 | 
401 | 
402 | trim_margin <- function(x, n) {
403 |   qs(x, (n + 1):(. - n - 1))
404 | }
405 | 
406 | 
407 | trim_margin(x, 3)
408 | 
409 | 
410 | eval
411 | 
412 | 
413 | qs <- function(x, range) {
414 |   range <- substitute(range)
415 |   selector <- eval(range, list(. = length(x)), parent.frame())
416 |   x[selector]
417 | }
418 | 
419 | 
420 | trim_margin(x, 3)
421 | 
422 | 
423 | formula1 <- z ~ x ^ 2 + y ^ 2
424 | 
425 | 
426 | typeof(formula1)
427 | class(formula1)
428 | 
429 | 
430 | str(as.list(formula1))
431 | 
432 | 
433 | is.call(formula1)
434 | length(formula1)
435 | 
436 | 
437 | formula1[[2]]
438 | formula1[[3]]
439 | 
440 | 
441 | environment(formula1)
442 | 
443 | 
444 | formula2 <- ~ x + y
445 | str(as.list(formula2))
446 | 
447 | 
448 | length(formula2)
449 | formula2[[2]]
450 | 
451 | 
452 | qs2 <- function(x, range) {
453 |   selector <- if (inherits(range, "formula")) {
454 |     eval(range[[2]], list(. = length(x)), environment(range))
455 |   } else range
456 |   x[selector]
457 | }
458 | 
459 | 
460 | qs2(1:10, ~ 3:(. - 2))
461 | 
462 | 
463 | qs2(1:10, 3)
464 | 
465 | 
466 | trim_margin2 <- function(x, n) {
467 |   qs2(x, ~ (n + 1):(. - n - 1))
468 | }
469 | 
470 | 
471 | trim_margin2(x, 3)
472 | 
473 | 
474 | subset2 <- function(x, subset = TRUE, select = TRUE) {
475 |   enclos <- parent.frame()
476 |   subset <- substitute(subset)
477 |   select <- substitute(select)
478 |   row_selector <- eval(subset, x, enclos)
479 |   col_envir <- as.list(seq_along(x))
480 |   names(col_envir) <- colnames(x)
481 |   col_selector <- eval(select, col_envir, enclos)
482 |   x[row_selector, col_selector]
483 | }
484 | 
485 | 
486 | subset2(mtcars, mpg >= quantile(mpg, 0.9), c(mpg, cyl, qsec))
487 | 
488 | 
489 | subset2(mtcars, mpg >= quantile(mpg, 0.9), mpg:drat)
490 | 
491 | 
492 | 


--------------------------------------------------------------------------------
/License:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | # Learning R Programming
 5 | 
 6 | This is the code repository for the book Learning R Programming, Published by Packt.It contains all the supporting project files necessary to work through the book from start to finish.
 7 | 
 8 | 
 9 | ## Code snippets
10 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02
11 | ~~~
12 | class(c(TRUE, TRUE, FALSE))
13 | class(c("Hello", "World"))
14 | 
15 | 
16 | is.numeric(c(1, 2, 3))
17 | is.numeric(c(TRUE, TRUE, FALSE))
18 | is.numeric(c("Hello", "World"))
19 | 
20 | 
21 | strings <- c("1", "2", "3")
22 | class(strings)
23 | ~~~
24 | 
25 | ## Related R products
26 | 
27 | [Instant R Starter](https://www.packtpub.com/big-data-and-business-intelligence/instant-r-starter-instant?utm_source=github&utm_campaign=9781782163503&utm_medium=repository)
28 | 
29 | [R Object-oriented Programming](https://www.packtpub.com/big-data-and-business-intelligence/r-object-oriented-programming?utm_source=github&utm_medium=repository&utm_campaign=9781783986682)
30 | 
31 | [R for Data Science](https://www.packtpub.com/big-data-and-business-intelligence/r-data-science?utm_source=github&utm_medium=repository&utm_campaign=9781784390860)
32 | 
33 | ### Suggestion and Feedback 
34 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform)
35 | ### Download a free PDF
36 | 
37 |  <i>If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.<br>Simply click on the link to claim your free PDF.</i>
38 | <p align="center"> <a href="https://packt.link/free-ebook/9781785889776">https://packt.link/free-ebook/9781785889776 </a> </p>


--------------------------------------------------------------------------------