├── .gitattributes ├── .gitignore ├── Chapter 10 └── object-oriented-programming.R ├── Chapter 11 └── working-with-databases.R ├── Chapter 12 └── data-manipulation.R ├── Chapter 13 ├── high-performance-computing.txt ├── my_cumsum1.R ├── rcpp-algo1.cpp ├── rcpp-demo.cpp ├── rcpp-diff-openmp.cpp ├── rcpp-diff.cpp └── rcpp-parallel.cpp ├── Chapter 14 ├── data │ ├── new-products.html │ ├── products.html │ ├── simple-page.html │ ├── simple-products.html │ └── single-table.html └── web-scrapping.R ├── Chapter 15 └── boosting-productivity.R ├── Chapter 2 └── basic-objects.R ├── Chapter 3 └── managing-your-workspace.R ├── Chapter 4 └── basic-expressions.R ├── Chapter 5 └── working-with-basic-objects.R ├── Chapter 6 └── working-with-strings.R ├── Chapter 7 └── working-with-data.R ├── Chapter 8 └── inside-r.R ├── Chapter 9 └── meta-programming.R ├── License └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Windows image file caches 2 | Thumbs.db 3 | ehthumbs.db 4 | 5 | # Folder config file 6 | Desktop.ini 7 | 8 | # Recycle Bin used on file shares 9 | $RECYCLE.BIN/ 10 | 11 | # Windows Installer files 12 | *.cab 13 | *.msi 14 | *.msm 15 | *.msp 16 | 17 | # Windows shortcuts 18 | *.lnk 19 | 20 | # ========================= 21 | # Operating System Files 22 | # ========================= 23 | 24 | # OSX 25 | # ========================= 26 | 27 | .DS_Store 28 | .AppleDouble 29 | .LSOverride 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | -------------------------------------------------------------------------------- /Chapter 10/object-oriented-programming.R: -------------------------------------------------------------------------------- 1 | vec1 <- c(1, 2, 3) 2 | typeof(vec1) 3 | class(vec1) 4 | 5 | 6 | data1 <- data.frame(x = 1:3, y = rnorm(3)) 7 | typeof(data1) 8 | class(data1) 9 | 10 | 11 | head 12 | 13 | 14 | num_vec <- c(1, 2, 3, 4, 5) 15 | data_frame <- data.frame(x = 1:5, y = rnorm(5)) 16 | 17 | 18 | head(num_vec, 3) 19 | 20 | 21 | head(data_frame, 3) 22 | 23 | 24 | simple_head <- function(x, n) { 25 | x[1:n] 26 | } 27 | 28 | 29 | simple_head(num_vec, 3) 30 | 31 | 32 | simple_head(data_frame, 3) 33 | 34 | 35 | simple_head2 <- function(x, n) { 36 | if (is.data.frame(x)) { 37 | x[1:n,] 38 | } else { 39 | x[1:n] 40 | } 41 | } 42 | 43 | 44 | simple_head2(num_vec, 3) 45 | simple_head2(data_frame, 3) 46 | 47 | 48 | methods("head") 49 | 50 | 51 | lm1 <- lm(mpg ~ cyl + vs, data = mtcars) 52 | 53 | 54 | typeof(lm1) 55 | class(lm1) 56 | 57 | 58 | lm1 59 | 60 | 61 | print(lm1) 62 | 63 | 64 | identical(getS3method("print", "lm"), stats:::print.lm) 65 | 66 | 67 | length(methods("print")) 68 | 69 | 70 | summary(lm1) 71 | 72 | 73 | lm1summary <- summary(lm1) 74 | typeof(lm1summary) 75 | class(lm1summary) 76 | 77 | 78 | names(lm1summary) 79 | 80 | 81 | coef(lm1) 82 | 83 | 84 | coef(lm1summary) 85 | 86 | 87 | oldpar <- par(mfrow = c(2, 2)) 88 | plot(lm1) 89 | par(oldpar) 90 | 91 | 92 | predict(lm1, data.frame(cyl = c(6, 8), vs = c(1, 1))) 93 | 94 | 95 | plot(mtcars$mpg, fitted(lm1)) 96 | 97 | 98 | plot(density(residuals(lm1)), 99 | main = "Density of lm1 residuals") 100 | 101 | 102 | install.packages("rpart") 103 | 104 | 105 | library(rpart) 106 | tree_model <- rpart(mpg ~ cyl + vs, data = mtcars) 107 | 108 | 109 | typeof(tree_model) 110 | class(tree_model) 111 | 112 | 113 | print(tree_model) 114 | 115 | 116 | summary(tree_model) 117 | 118 | 119 | oldpar <- par(xpd = NA) 120 | plot(tree_model) 121 | text(tree_model, use.n = TRUE) 122 | par(oldpar) 123 | 124 | 125 | predict(tree_model, data.frame(cyl = c(6, 8), vs = c(1, 1))) 126 | 127 | 128 | coef(tree_model) 129 | 130 | 131 | generic_head <- function(x, n) 132 | UseMethod("generic_head") 133 | 134 | 135 | generic_head.default <- function(x, n) { 136 | x[1:n] 137 | } 138 | 139 | 140 | generic_head(num_vec, 3) 141 | 142 | 143 | generic_head(data_frame, 3) 144 | 145 | 146 | generic_head.data.frame <- function(x, n) { 147 | x[1:n,] 148 | } 149 | 150 | 151 | generic_head(data_frame, 3) 152 | 153 | 154 | product <- function(name, price, inventory) { 155 | obj <- list(name = name, 156 | price = price, 157 | inventory = inventory) 158 | class(obj) <- "product" 159 | obj 160 | } 161 | 162 | 163 | product <- function(name, price, inventory) { 164 | structure(list(name = name, 165 | price = price, 166 | inventory = inventory), 167 | class = "product") 168 | } 169 | 170 | 171 | laptop <- product("Laptop", 499, 300) 172 | 173 | 174 | typeof(laptop) 175 | class(laptop) 176 | 177 | 178 | laptop 179 | 180 | 181 | print.product <- function(x, ...) { 182 | cat("\n") 183 | cat("name:", x$name, "\n") 184 | cat("price:", x$price, "\n") 185 | cat("inventory:", x$inventory, "\n") 186 | invisible(x) 187 | } 188 | 189 | 190 | laptop 191 | 192 | 193 | laptop$name 194 | laptop$price 195 | laptop$inventory 196 | 197 | 198 | cellphone <- product("Phone", 249, 12000) 199 | products <- list(laptop, cellphone) 200 | products 201 | 202 | 203 | product("Basket", 150, -0.5) 204 | 205 | 206 | product <- function(name, price, inventory) { 207 | stopifnot(is.character(name), length(name) == 1, 208 | is.numeric(price), length(price) == 1, 209 | is.numeric(inventory), length(inventory) == 1, 210 | price > 0, inventory >= 0) 211 | structure(list(name = name, 212 | price = as.numeric(price), 213 | inventory = as.integer(inventory)), 214 | class = "product") 215 | } 216 | 217 | 218 | product("Basket", 150, -0.5) 219 | 220 | 221 | value <- function(x, ...) 222 | UseMethod("value") 223 | 224 | value.default <- function(x, ...) { 225 | stop("Value is undefined") 226 | } 227 | 228 | value.product <- function(x, ...) { 229 | x$price * x$inventory 230 | } 231 | 232 | 233 | value(laptop) 234 | value(cellphone) 235 | 236 | 237 | sapply(products, value) 238 | 239 | 240 | laptop$price <- laptop$price * 0.85 241 | 242 | 243 | laptop$value <- laptop$price * laptop$inventory 244 | 245 | 246 | laptop 247 | 248 | 249 | percent <- function(x) { 250 | stopifnot(is.numeric(x)) 251 | class(x) <- c("percent", "numeric") 252 | x 253 | } 254 | 255 | 256 | pct <- percent(c(0.1, 0.05, 0.25, 0.23)) 257 | pct 258 | 259 | 260 | as.character.percent <- function(x, ...) { 261 | paste0(as.numeric(x) * 100, "%") 262 | } 263 | 264 | 265 | as.character(pct) 266 | 267 | 268 | format.percent <- function(x, ...) { 269 | as.character(x, ...) 270 | } 271 | 272 | 273 | format(pct) 274 | 275 | 276 | print.percent <- function(x, ...) { 277 | print(format.percent(x), quote = FALSE) 278 | } 279 | 280 | 281 | pct 282 | 283 | 284 | pct + 0.2 285 | pct * 0.5 286 | 287 | 288 | sum(pct) 289 | mean(pct) 290 | max(pct) 291 | min(pct) 292 | 293 | 294 | sum.percent <- function(...) { 295 | percent(NextMethod("sum")) 296 | } 297 | mean.percent <- function(x, ...) { 298 | percent(NextMethod("mean")) 299 | } 300 | max.percent <- function(...) { 301 | percent(NextMethod("max")) 302 | } 303 | min.percent <- function(...) { 304 | percent(NextMethod("max")) 305 | } 306 | 307 | 308 | sum(pct) 309 | mean(pct) 310 | max(pct) 311 | min(pct) 312 | 313 | 314 | c(pct, 0.12) 315 | 316 | 317 | c.percent <- function(x, ...) { 318 | percent(NextMethod("c")) 319 | } 320 | 321 | 322 | c(pct, 0.12, -0.145) 323 | 324 | 325 | pct[1:3] 326 | pct[[2]] 327 | 328 | 329 | `[.percent` <- function(x, i) { 330 | percent(NextMethod("[")) 331 | } 332 | `[[.percent` <- function(x, i) { 333 | percent(NextMethod("[[")) 334 | } 335 | 336 | 337 | pct[1:3] 338 | pct[[2]] 339 | 340 | 341 | data.frame(id = 1:4, pct) 342 | 343 | 344 | Vehicle <- function(class, name, speed) { 345 | obj <- new.env(parent = emptyenv()) 346 | obj$name <- name 347 | obj$speed <- speed 348 | obj$position <- c(0, 0, 0) 349 | class(obj) <- c(class, "vehicle") 350 | obj 351 | } 352 | 353 | 354 | Car <- function(...) { 355 | Vehicle(class = "car", ...) 356 | } 357 | Bus <- function(...) { 358 | Vehicle(class = "bus", ...) 359 | } 360 | Airplane <- function(...) { 361 | Vehicle(class = "airplane", ...) 362 | } 363 | 364 | 365 | car <- Car("Model-A", 80) 366 | bus <- Bus("Medium-Bus", 45) 367 | airplane <- Airplane("Big-Plane", 800) 368 | 369 | 370 | print.vehicle <- function(x, ...) { 371 | cat(sprintf("\n", class(x)[[1]])) 372 | cat("name:", x$name, "\n") 373 | cat("speed:", x$speed, "km/h\n") 374 | cat("position:", paste(x$position, collapse = ", ")) 375 | } 376 | 377 | 378 | car 379 | bus 380 | airplane 381 | 382 | 383 | move <- function(vehicle, x, y, z) { 384 | UseMethod("move") 385 | } 386 | move.vehicle <- function(vehicle, movement) { 387 | if (length(movement) != 3) { 388 | stop("All three dimensions must be specified to move a vehicle") 389 | } 390 | vehicle$position <- vehicle$position + movement 391 | vehicle 392 | } 393 | 394 | 395 | move.bus <- move.car <- function(vehicle, movement) { 396 | if (length(movement) != 2) { 397 | stop("This vehicle only supports 2d movement") 398 | } 399 | movement <- c(movement, 0) 400 | NextMethod("move") 401 | } 402 | 403 | 404 | move.airplane <- function(vehicle, movement) { 405 | if (length(movement) == 2) { 406 | movement <- c(movement, 0) 407 | } 408 | NextMethod("move") 409 | } 410 | 411 | 412 | move(car, c(1, 2, 3)) 413 | 414 | 415 | move(car, c(1, 2)) 416 | 417 | 418 | move(airplane, c(1, 2)) 419 | 420 | 421 | move(airplane, c(20, 50, 80)) 422 | 423 | 424 | setClass("Product", 425 | representation(name = "character", 426 | price = "numeric", 427 | inventory = "integer")) 428 | 429 | 430 | getSlots("Product") 431 | 432 | 433 | laptop <- new("Product", name = "Laptop-A", price = 299, inventory = 100) 434 | 435 | 436 | laptop <- new("Product", name = "Laptop-A", price = 299, inventory = 100L) 437 | laptop 438 | 439 | 440 | typeof(laptop) 441 | class(laptop) 442 | 443 | 444 | isS4(laptop) 445 | 446 | 447 | laptop@price * laptop@inventory 448 | 449 | 450 | slot(laptop, "price") 451 | 452 | 453 | laptop@price <- 289 454 | 455 | 456 | laptop@inventory <- 200 457 | 458 | 459 | laptop@value <- laptop@price * laptop@inventory 460 | 461 | 462 | toy <- new("Product", name = "Toys", price = 10) 463 | toy 464 | 465 | 466 | setClass("Product", 467 | representation(name = "character", 468 | price = "numeric", 469 | inventory = "integer"), 470 | prototype(name = "Unnamed", price = NA_real_, inventory = 0L)) 471 | 472 | 473 | toy <- new("Product", name = "Toys", price = 5) 474 | toy 475 | 476 | 477 | bottle <- new("Product", name = "Bottle", price = 1.5, inventory = -2L) 478 | bottle 479 | 480 | 481 | validate_product <- function(object) { 482 | errors <- c( 483 | if (length(object@name) != 1) 484 | "Length of name should be 1" 485 | else if (is.na(object@name)) 486 | "name should not be missing value", 487 | 488 | if (length(object@price) != 1) 489 | "Length of price should be 1" 490 | else if (is.na(object@price)) 491 | "price should not be missing value" 492 | else if (object@price <= 0) 493 | "price must be positive", 494 | 495 | if (length(object@inventory) != 1) 496 | "Length of inventory should be 1" 497 | else if (is.na(object@inventory)) 498 | "inventory should not be missing value" 499 | else if (object@inventory < 0) 500 | "inventory must be non-negative") 501 | if (length(errors) == 0) TRUE else errors 502 | } 503 | 504 | 505 | validate_product(bottle) 506 | 507 | 508 | setClass("Product", 509 | representation(name = "character", 510 | price = "numeric", 511 | inventory = "integer"), 512 | prototype(name = "Unnamed", price = NA_real_, inventory = 0L), 513 | validity = validate_product) 514 | 515 | 516 | bottle <- new("Product", name = "Bottle") 517 | 518 | 519 | bottle <- new("Product", name = "Bottle", price = 3, inventory = -2L) 520 | 521 | 522 | bottle <- new("Product", name = "Bottle", 523 | price = 3, inventory = 100L, volume = 15) 524 | 525 | 526 | setClass("Container", 527 | representation(volume = "numeric"), 528 | contains = "Product") 529 | 530 | 531 | getSlots("Container") 532 | 533 | 534 | bottle <- new("Container", name = "Bottle", 535 | price = 3, inventory = 100L, volume = 15) 536 | 537 | 538 | bottle <- new("Container", name = "Bottle", 539 | price = 3, inventory = -10L, volume = 15) 540 | 541 | 542 | bottle <- new("Container", name = "Bottle", 543 | price = 3, inventory = 100L, volume = -2) 544 | 545 | 546 | validate_container <- function(object) { 547 | errors <- c( 548 | if (length(object@volume) != 1) 549 | "Length of volume must be 1", 550 | if (object@volume <= 0) 551 | "volume must be positive" 552 | ) 553 | if (length(errors) == 0) TRUE else errors 554 | } 555 | 556 | 557 | setClass("Container", 558 | representation(volume = "numeric"), 559 | contains = "Product", 560 | validity = validate_container) 561 | 562 | 563 | bottle <- new("Container", name = "Bottle", 564 | price = 3, inventory = 100L, volume = -2) 565 | bottle <- new("Container", name = "Bottle", 566 | price = 3, inventory = -5L, volume = 10) 567 | 568 | 569 | setClass("Shape") 570 | setClass("Polygon", 571 | representation(sides = "integer"), 572 | contains = "Shape") 573 | setClass("Triangle", 574 | representation(a = "numeric", b = "numeric", c = "numeric"), 575 | prototype(a = 1, b = 1, c = 1, sides = 3L), 576 | contains = "Polygon") 577 | setClass("Rectangle", 578 | representation(a = "numeric", b = "numeric"), 579 | prototype(a = 1, b = 1, sides = 4L), 580 | contains = "Polygon") 581 | setClass("Circle", 582 | representation(r = "numeric"), 583 | prototype(r = 1, sides = Inf), 584 | contains = "Shape") 585 | 586 | 587 | setGeneric("area", function(object) { 588 | standardGeneric("area") 589 | }, valueClass = "numeric") 590 | 591 | 592 | setMethod("area", signature("Triangle"), function(object) { 593 | a <- object@a 594 | b <- object@b 595 | c <- object@c 596 | s <- (a + b + c) / 2 597 | sqrt(s * (s - a) * (s - b) * (s - c)) 598 | }) 599 | 600 | 601 | setMethod("area", signature("Rectangle"), function(object) { 602 | object@a * object@b 603 | }) 604 | setMethod("area", signature("Circle"), function(object) { 605 | pi * object@r ^ 2 606 | }) 607 | 608 | 609 | triangle <- new("Triangle", a = 3, b = 4, c = 5) 610 | area(triangle) 611 | 612 | 613 | circle <- new("Circle", r = 3) 614 | area(circle) 615 | 616 | 617 | setClass("Object", representation(height = "numeric")) 618 | setClass("Cylinder", contains = "Object") 619 | setClass("Cone", contains = "Object") 620 | 621 | 622 | setGeneric("volume", 623 | function(shape, object) standardGeneric("volume")) 624 | 625 | 626 | setMethod("volume", signature("Rectangle", "Cylinder"), 627 | function(shape, object) { 628 | shape@a * shape@b * object@height 629 | }) 630 | setMethod("volume", signature("Rectangle", "Cone"), 631 | function(shape, object) { 632 | shape@a * shape@b * object@height / 3 633 | }) 634 | 635 | 636 | rectangle <- new("Rectangle", a = 2, b = 3) 637 | cylinder <- new("Cylinder", height = 3) 638 | volume(rectangle, cylinder) 639 | 640 | 641 | setMethod("volume", signature("Shape", "Cylinder"), 642 | function(shape, object) { 643 | area(shape) * object@height 644 | }) 645 | setMethod("volume", signature("Shape", "Cone"), 646 | function(shape, object) { 647 | area(shape) * object@height / 3 648 | }) 649 | 650 | 651 | circle <- new("Circle", r = 2) 652 | cone <- new("Cone", height = 3) 653 | volume(circle, cone) 654 | 655 | 656 | setMethod("volume", signature("Shape", "numeric"), 657 | function(shape, object) { 658 | area(shape) * object 659 | }) 660 | 661 | 662 | volume(rectangle, 3) 663 | 664 | 665 | setMethod("*", signature("Shape", "Object"), 666 | function(e1, e2) { 667 | volume(e1, e2) 668 | }) 669 | 670 | 671 | rectangle * cone 672 | 673 | 674 | lengthen <- function(object, factor) { 675 | object@height <- object@height * factor 676 | object 677 | } 678 | 679 | 680 | cylinder 681 | lengthen(cylinder, 2) 682 | cylinder 683 | 684 | 685 | Vehicle <- setRefClass("Vehicle", 686 | fields = list(position = "numeric", distance = "numeric")) 687 | 688 | 689 | car <- Vehicle$new(position = 0, distance = 0) 690 | 691 | 692 | car$position 693 | 694 | 695 | move <- function(vehicle, movement) { 696 | vehicle$position <- vehicle$position + movement 697 | vehicle$distance <- vehicle$distance + abs(movement) 698 | } 699 | 700 | 701 | move(car, 10) 702 | car 703 | 704 | 705 | Vehicle <- setRefClass("Vehicle", 706 | fields = list(position = "numeric", distance = "numeric"), 707 | methods = list(move = function(x) { 708 | stopifnot(is.numeric(x)) 709 | position <<- position + x 710 | distance <<- distance + abs(x) 711 | })) 712 | 713 | 714 | bus <- Vehicle(position = 0, distance = 0) 715 | bus$move(5) 716 | bus 717 | 718 | 719 | install.packages("R6") 720 | 721 | 722 | library(R6) 723 | Vehicle <- R6Class("Vehicle", 724 | public = list( 725 | name = NA, 726 | model = NA, 727 | initialize = function(name, model) { 728 | if (!missing(name)) self$name <- name 729 | if (!missing(model)) self$model <- model 730 | }, 731 | move = function(movement) { 732 | private$start() 733 | private$position <- private$position + movement 734 | private$stop() 735 | }, 736 | get_position = function() { 737 | private$position 738 | } 739 | ), 740 | private = list( 741 | position = 0, 742 | speed = 0, 743 | start = function() { 744 | cat(self$name, "is starting\n") 745 | private$speed <- 50 746 | }, 747 | stop = function() { 748 | cat(self$name, "is stopping\n") 749 | private$speed <- 0 750 | } 751 | )) 752 | 753 | 754 | car <- Vehicle$new(name = "Car", model = "A") 755 | car 756 | 757 | 758 | car$move(10) 759 | car$get_position() 760 | 761 | 762 | MeteredVehicle <- R6Class("MeteredVehicle", 763 | inherit = Vehicle, 764 | public = list( 765 | move = function(movement) { 766 | super$move(movement) 767 | private$distance <<- private$distance + abs(movement) 768 | }, 769 | get_distance = function() { 770 | private$distance 771 | } 772 | ), 773 | private = list( 774 | distance = 0 775 | )) 776 | 777 | 778 | bus <- MeteredVehicle$new(name = "Bus", model = "B") 779 | bus 780 | 781 | 782 | bus$move(10) 783 | bus$get_position() 784 | bus$get_distance() 785 | 786 | 787 | bus$move(-5) 788 | bus$get_position() 789 | bus$get_distance() 790 | -------------------------------------------------------------------------------- /Chapter 11/working-with-databases.R: -------------------------------------------------------------------------------- 1 | install.packages("RSQLite") 2 | 3 | 4 | if (file.exists("data/example.sqlite")) 5 | file.remove("data/example.sqlite") 6 | 7 | 8 | if (!dir.exists("data")) dir.create("data") 9 | 10 | 11 | library(RSQLite) 12 | con <- dbConnect(SQLite(), "data/example.sqlite") 13 | 14 | 15 | example1 <- data.frame( 16 | id = 1:5, 17 | type = c("A", "A", "B", "B", "C"), 18 | score = c(8, 9, 8, 10, 9), 19 | stringsAsFactors = FALSE) 20 | example1 21 | 22 | 23 | dbWriteTable(con, "example1", example1) 24 | 25 | 26 | dbDisconnect(con) 27 | 28 | 29 | if (file.exists("data/datasets.sqlite")) 30 | file.remove("data/datasets.sqlite") 31 | 32 | 33 | install.packages(c("ggplot2", "nycflights13")) 34 | 35 | 36 | data("diamonds", package = "ggplot2") 37 | data("flights", package = "nycflights13") 38 | 39 | 40 | con <- dbConnect(SQLite(), "data/datasets.sqlite") 41 | dbWriteTable(con, "diamonds", diamonds, row.names = FALSE) 42 | dbWriteTable(con, "flights", flights, row.names = FALSE) 43 | dbDisconnect(con) 44 | 45 | 46 | class(diamonds) 47 | class(flights) 48 | 49 | 50 | con <- dbConnect(SQLite(), "data/datasets.sqlite") 51 | dbWriteTable(con, "diamonds", as.data.frame(diamonds), row.names = FALSE) 52 | dbWriteTable(con, "flights", as.data.frame(flights), row.names = FALSE) 53 | dbDisconnect(con) 54 | 55 | 56 | if (file.exists("data/example2.sqlite")) 57 | file.remove("data/example2.sqlite") 58 | 59 | 60 | con <- dbConnect(SQLite(), "data/example2.sqlite") 61 | chunk_size <- 10 62 | id <- 0 63 | for (i in 1:6) { 64 | chunk <- data.frame(id = ((i - 1L) * chunk_size):(i * chunk_size - 1L), 65 | type = LETTERS[[i]], 66 | score = rbinom(chunk_size, 10, (10 - i) / 10), 67 | stringsAsFactors = FALSE) 68 | dbWriteTable(con, "products", chunk, 69 | append = i > 1, row.names = FALSE) 70 | } 71 | dbDisconnect(con) 72 | 73 | 74 | con <- dbConnect(SQLite(), "data/datasets.sqlite") 75 | 76 | 77 | dbExistsTable(con, "diamonds") 78 | dbExistsTable(con, "mtcars") 79 | 80 | 81 | dbListTables(con) 82 | 83 | 84 | dbListFields(con, "diamonds") 85 | 86 | 87 | db_diamonds <- dbReadTable(con, "diamonds") 88 | dbDisconnect(con) 89 | 90 | 91 | head(db_diamonds, 3) 92 | head(diamonds, 3) 93 | 94 | 95 | identical(diamonds, db_diamonds) 96 | 97 | 98 | str(db_diamonds) 99 | 100 | 101 | str(diamonds) 102 | 103 | 104 | con <- dbConnect(SQLite(), "data/datasets.sqlite") 105 | dbListTables(con) 106 | 107 | 108 | db_diamonds <- dbGetQuery(con, 109 | "select * from diamonds") 110 | head(db_diamonds, 3) 111 | 112 | 113 | db_diamonds <- dbGetQuery(con, 114 | "select carat, cut, color, clarity, depth, price 115 | from diamonds") 116 | head(db_diamonds, 3) 117 | 118 | 119 | dbGetQuery(con, "select distinct cut from diamonds") 120 | 121 | 122 | dbGetQuery(con, "select distinct clarity from diamonds")[[1]] 123 | 124 | 125 | db_diamonds <- dbGetQuery(con, 126 | "select carat, price, clarity as clarity_level from diamonds") 127 | head(db_diamonds, 3) 128 | 129 | 130 | db_diamonds <- dbGetQuery(con, 131 | "select carat, price, x * y * z as size from diamonds") 132 | head(db_diamonds, 3) 133 | 134 | 135 | db_diamonds <- dbGetQuery(con, 136 | "select carat, price, x * y * z as size, 137 | price / size as value_density 138 | from diamonds") 139 | 140 | 141 | db_diamonds <- dbGetQuery(con, 142 | "select *, price / size as value_density from 143 | (select carat, price, x * y * z as size from diamonds)") 144 | head(db_diamonds, 3) 145 | 146 | 147 | good_diamonds <- dbGetQuery(con, 148 | "select carat, cut, price from diamonds where cut = 'Good'") 149 | head(good_diamonds, 3) 150 | 151 | 152 | nrow(good_diamonds) / nrow(diamonds) 153 | 154 | 155 | good_e_diamonds <- dbGetQuery(con, 156 | "select carat, cut, color, price from diamonds 157 | where cut = 'Good' and color = 'E'") 158 | head(good_e_diamonds, 3) 159 | nrow(good_e_diamonds) / nrow(diamonds) 160 | 161 | 162 | color_ef_diamonds <- dbGetQuery(con, 163 | "select carat, cut, color, price from diamonds 164 | where color in ('E','F')") 165 | nrow(color_ef_diamonds) 166 | 167 | 168 | table(diamonds$color) 169 | 170 | 171 | some_price_diamonds <- dbGetQuery(con, 172 | "select carat, cut, color, price from diamonds 173 | where price between 5000 and 5500") 174 | nrow(some_price_diamonds) / nrow(diamonds) 175 | 176 | 177 | good_cut_diamonds <- dbGetQuery(con, 178 | "select carat, cut, color, price from diamonds 179 | where cut like '%Good'") 180 | nrow(good_cut_diamonds) / nrow(diamonds) 181 | 182 | 183 | cheapest_diamonds <- dbGetQuery(con, 184 | "select carat, price from diamonds 185 | order by price") 186 | 187 | 188 | head(cheapest_diamonds) 189 | 190 | 191 | most_expensive_diamonds <- dbGetQuery(con, 192 | "select carat, price from diamonds 193 | order by price desc") 194 | head(most_expensive_diamonds) 195 | 196 | 197 | cheapest_diamonds <- dbGetQuery(con, 198 | "select carat, price from diamonds 199 | order by price, carat desc") 200 | head(cheapest_diamonds) 201 | 202 | 203 | dense_diamonds <- dbGetQuery(con, 204 | "select carat, price, x * y * z as size from diamonds 205 | order by carat / size desc") 206 | head(dense_diamonds) 207 | 208 | 209 | head(dbGetQuery(con, 210 | "select carat, price from diamonds 211 | where cut = 'Ideal' and clarity = 'IF' and color = 'J' 212 | order by price")) 213 | 214 | 215 | dbGetQuery(con, 216 | "select carat, price from diamonds 217 | order by carat desc limit 3") 218 | 219 | 220 | dbGetQuery(con, 221 | "select color, count(*) as number from diamonds 222 | group by color") 223 | 224 | 225 | table(diamonds$color) 226 | 227 | 228 | dbGetQuery(con, 229 | "select clarity, avg(price) as avg_price 230 | from diamonds 231 | group by clarity 232 | order by avg_price desc") 233 | 234 | 235 | dbGetQuery(con, 236 | "select price, max(carat) as max_carat 237 | from diamonds 238 | group by price 239 | order by price 240 | limit 5") 241 | 242 | 243 | dbGetQuery(con, 244 | "select clarity, 245 | min(price) as min_price, 246 | max(price) as max_price, 247 | avg(price) as avg_price 248 | from diamonds 249 | group by clarity 250 | order by avg_price desc") 251 | 252 | 253 | dbGetQuery(con, 254 | "select clarity, 255 | sum(price * carat) / sum(carat) as wprice 256 | from diamonds 257 | group by clarity 258 | order by wprice desc") 259 | 260 | 261 | dbGetQuery(con, 262 | "select clarity, color, 263 | avg(price) as avg_price 264 | from diamonds 265 | group by clarity, color 266 | order by avg_price desc 267 | limit 5") 268 | 269 | 270 | diamond_selector <- data.frame( 271 | cut = c("Ideal", "Good", "Fair"), 272 | color = c("E", "I", "D"), 273 | clarity = c("VS1", "I1", "IF"), 274 | stringsAsFactors = FALSE 275 | ) 276 | diamond_selector 277 | 278 | 279 | dbWriteTable(con, "diamond_selector", diamond_selector, 280 | row.names = FALSE, overwrite = TRUE) 281 | 282 | 283 | subset_diamonds <- dbGetQuery(con, 284 | "select cut, color, clarity, carat, price 285 | from diamonds 286 | join diamond_selector using (cut, color, clarity)") 287 | head(subset_diamonds) 288 | 289 | 290 | nrow(subset_diamonds) / nrow(diamonds) 291 | 292 | 293 | dbDisconnect(con) 294 | 295 | 296 | con <- dbConnect(SQLite(), "data/datasets.sqlite") 297 | res <- dbSendQuery(con, 298 | "select carat, cut, color, price from diamonds 299 | where cut = 'Ideal' and color = 'E'") 300 | while (!dbHasCompleted(res)) { 301 | chunk <- dbFetch(res, 800) 302 | cat(nrow(chunk), "records fetched\n") 303 | # do something with chunk 304 | } 305 | dbClearResult(res) 306 | dbDisconnect(con) 307 | 308 | 309 | file.remove("data/products.sqlite") 310 | 311 | 312 | set.seed(123) 313 | con <- dbConnect(SQLite(), "data/products.sqlite") 314 | chunk_size <- 10 315 | for (i in 1:6) { 316 | cat("Processing chunk", i, "\n") 317 | if (runif(1) <= 0.2) stop("Data error") 318 | chunk <- data.frame(id = ((i - 1L) * chunk_size):(i * chunk_size - 1L), 319 | type = LETTERS[[i]], 320 | score = rbinom(chunk_size, 10, (10 - i) / 10), 321 | stringsAsFactors = FALSE) 322 | dbWriteTable(con, "products", chunk, 323 | append = i > 1, row.names = FALSE) 324 | } 325 | 326 | 327 | dbGetQuery(con, "select COUNT(*) from products") 328 | dbDisconnect(con) 329 | 330 | 331 | set.seed(123) 332 | file.remove("data/products.sqlite") 333 | con <- dbConnect(SQLite(), "data/products.sqlite") 334 | chunk_size <- 10 335 | dbBegin(con) 336 | res <- tryCatch({ 337 | for (i in 1:6) { 338 | cat("Processing chunk", i, "\n") 339 | if (runif(1) <= 0.2) stop("Data error") 340 | chunk <- data.frame(id = ((i - 1L) * chunk_size):(i * chunk_size - 1L), 341 | type = LETTERS[[i]], 342 | score = rbinom(chunk_size, 10, (10 - i) / 10), 343 | stringsAsFactors = FALSE) 344 | dbWriteTable(con, "products", chunk, 345 | append = i > 1, row.names = FALSE) 346 | } 347 | dbCommit(con) 348 | }, error = function(e) { 349 | warning("An error occurs: ", e, "\nRolling back", immediate. = TRUE) 350 | dbRollback(con) 351 | }) 352 | 353 | 354 | dbGetQuery(con, "select COUNT(*) from products") 355 | dbDisconnect(con) 356 | 357 | 358 | file.remove("data/bank.sqlite") 359 | 360 | 361 | create_bank <- function(dbfile) { 362 | if (file.exists(dbfile)) file.remove(dbfile) 363 | con <- dbConnect(SQLite(), dbfile) 364 | dbSendQuery(con, 365 | "create table accounts 366 | (name text primary key, balance real)") 367 | dbSendQuery(con, 368 | "create table transactions 369 | (time text, account_from text, account_to text, value real)") 370 | con 371 | } 372 | 373 | 374 | create_account <- function(con, name, balance) { 375 | dbSendQuery(con, 376 | sprintf("insert into accounts (name, balance) values ('%s',%.2f)", name, balance)) 377 | TRUE 378 | } 379 | 380 | 381 | transfer <- function(con, from, to, value) { 382 | get_account <- function(name) { 383 | account <- dbGetQuery(con, 384 | sprintf("select * from accounts where name = '%s'", name)) 385 | if (nrow(account) == 0) stop(sprintf("Account '%s' does not exist", name)) 386 | account 387 | } 388 | account_from <- get_account(from) 389 | account_to <- get_account(to) 390 | if (account_from$balance < value) { 391 | stop(sprintf("Insufficient money to transfer from '%s'", from)) 392 | } else { 393 | dbSendQuery(con, 394 | sprintf("update accounts set balance = %.2f where name = '%s'", 395 | account_from$balance - value, from)) 396 | dbSendQuery(con, 397 | sprintf("update accounts set balance = %.2f where name = '%s'", 398 | account_to$balance + value, to)) 399 | dbSendQuery(con, 400 | sprintf("insert into transactions (time, account_from, account_to, value) values 401 | ('%s', '%s', '%s', %.2f)", format(Sys.time(), "%Y-%m-%d %H:%M:%S"), 402 | from, to, value)) 403 | } 404 | TRUE 405 | } 406 | 407 | 408 | safe_transfer <- function(con, ...) { 409 | dbBegin(con) 410 | tryCatch({ 411 | transfer(con, ...) 412 | dbCommit(con) 413 | }, error = function(e) { 414 | message("An error occurs in the transaction. Rollback...") 415 | dbRollback(con) 416 | stop(e) 417 | }) 418 | } 419 | 420 | 421 | get_balance <- function(con, name) { 422 | res <- dbGetQuery(con, 423 | sprintf("select balance from accounts where name = '%s'", name)) 424 | res$balance 425 | } 426 | get_transactions <- function(con, from, to) { 427 | dbGetQuery(con, 428 | sprintf("select * from transactions 429 | where account_from = '%s' and account_to = '%s'", from, to)) 430 | } 431 | 432 | 433 | con <- create_bank("data/bank.sqlite") 434 | create_account(con, "David", 5000) 435 | create_account(con, "Jenny", 6500) 436 | get_balance(con, "David") 437 | get_balance(con, "Jenny") 438 | 439 | 440 | safe_transfer(con, "David", "Jenny", 1500) 441 | get_balance(con, "David") 442 | get_balance(con, "Jenny") 443 | 444 | 445 | safe_transfer(con, "David", "Jenny", 6500) 446 | get_balance(con, "David") 447 | get_balance(con, "Jenny") 448 | 449 | 450 | get_transactions(con, "David", "Jenny") 451 | 452 | 453 | dbDisconnect(con) 454 | 455 | 456 | chunk_rw <- function(input, output, table, chunk_size = 10000) { 457 | first_row <- read.csv(input, nrows = 1, header = TRUE) 458 | header <- colnames(first_row) 459 | n <- 0 460 | con <- dbConnect(SQLite(), output) 461 | on.exit(dbDisconnect(con)) 462 | while (TRUE) { 463 | df <- read.csv(input, 464 | skip = 1 + n * chunk_size, nrows = chunk_size, 465 | header = FALSE, col.names = header, 466 | stringsAsFactors = FALSE) 467 | if (nrow(df) == 0) break; 468 | dbWriteTable(con, table, df, row.names = FALSE, append = n > 0) 469 | n <- n + 1 470 | cat(sprintf("%d records written\n", nrow(df))) 471 | } 472 | } 473 | 474 | 475 | file.remove("data/diamonds.csv", "data/diamonds.sqlite") 476 | 477 | 478 | write.csv(diamonds, "data/diamonds.csv", quote = FALSE, row.names = FALSE) 479 | chunk_rw("data/diamonds.csv", "data/diamonds.sqlite", "diamonds") 480 | 481 | 482 | batch_rw <- function(dir, output, table, overwrite = TRUE) { 483 | files <- list.files(dir, "\\.csv$", full.names = TRUE) 484 | con <- dbConnect(SQLite(), output) 485 | on.exit(dbDisconnect(con)) 486 | exist <- dbExistsTable(con, table) 487 | if (exist) { 488 | if (overwrite) dbRemoveTable(con, table) 489 | else stop(sprintf("Table '%s' already exists", table)) 490 | } 491 | exist <- FALSE 492 | for (file in files) { 493 | cat(file, "... ") 494 | df <- read.csv(file, header = TRUE, stringsAsFactors = FALSE) 495 | dbWriteTable(con, table, df, row.names = FALSE, 496 | append = exist) 497 | exist <- TRUE 498 | cat("done\n") 499 | } 500 | } 501 | 502 | 503 | file.remove("data/groups.sqlite") 504 | 505 | 506 | batch_rw("data/groups", "data/groups.sqlite", "groups") 507 | 508 | 509 | con <- dbConnect(SQLite(), "data/groups.sqlite") 510 | dbReadTable(con, "groups") 511 | dbDisconnect(con) 512 | 513 | 514 | install.packages("mongolite") 515 | 516 | 517 | library(mongolite) 518 | m <- mongo("products", "test", "mongodb://localhost") 519 | 520 | 521 | m$count() 522 | 523 | 524 | m$insert(' 525 | { 526 | "code": "A0000001", 527 | "name": "Product-A", 528 | "type": "Type-I", 529 | "price": 29.5, 530 | "amount": 500, 531 | "comments": [ 532 | { 533 | "user": "david", 534 | "score": 8, 535 | "text": "This is a good product" 536 | }, 537 | { 538 | "user": "jenny", 539 | "score": 5, 540 | "text": "Just so so" 541 | } 542 | ] 543 | }') 544 | 545 | 546 | m$count() 547 | 548 | 549 | m$insert(list( 550 | code = "A0000002", 551 | name = "Product-B", 552 | type = "Type-II", 553 | price = 59.9, 554 | amount = 200L, 555 | comments = list( 556 | list(user = "tom", score = 6L, 557 | text = "Just fine"), 558 | list(user = "mike", score = 9L, 559 | text = "great product!") 560 | ) 561 | ), auto_unbox = TRUE) 562 | 563 | 564 | m$count() 565 | 566 | 567 | products <- m$find() 568 | str(products) 569 | 570 | 571 | iter <- m$iterate() 572 | products <- iter$batch(2) 573 | str(products) 574 | 575 | 576 | m$find('{ "code": "A0000001" }', 577 | '{ "_id": 0, "name": 1, "price": 1, "amount": 1 }') 578 | 579 | 580 | m$find('{ "price": { "$gte": 40 } }', 581 | '{ "_id": 0, "name": 1, "price": 1, "amount": 1 }') 582 | 583 | 584 | m$find('{ "comments.score": 9 }', 585 | '{ "_id": 0, "code": 1, "name": 1}') 586 | 587 | 588 | m$find('{ "comments.score": { "$lt": 6 }}', 589 | '{ "_id": 0, "code": 1, "name": 1}') 590 | 591 | 592 | m$drop() 593 | 594 | 595 | m <- mongo("students", "test", "mongodb://localhost") 596 | 597 | 598 | m$count() 599 | 600 | 601 | students <- data.frame( 602 | name = c("David", "Jenny", "Sara", "John"), 603 | age = c(25, 23, 26, 23), 604 | major = c("Statistics", "Physics", "Computer Science", "Statistics"), 605 | projects = c(2, 1, 3, 1), 606 | stringsAsFactors = FALSE 607 | ) 608 | students 609 | 610 | 611 | m$insert(students) 612 | 613 | 614 | m$count() 615 | 616 | 617 | m$find() 618 | 619 | 620 | m$find('{ "name": "Jenny" }') 621 | 622 | 623 | m$find('{ "projects": { "$gte": 2 }}') 624 | 625 | 626 | m$find('{ "projects": { "$gte": 2 }}', 627 | '{ "_id": 0, "name": 1, "major": 1 }') 628 | 629 | 630 | m$find('{ "projects": { "$gte": 2 }}', 631 | fields = '{ "_id": 0, "name": 1, "age": 1 }', 632 | sort = '{ "age": -1 }') 633 | 634 | 635 | m$find('{ "projects": { "$gte": 2 }}', 636 | fields = '{ "_id": 0, "name": 1, "age": 1 }', 637 | sort = '{ "age": -1 }', 638 | limit = 1) 639 | 640 | 641 | m$distinct("major") 642 | 643 | 644 | m$distinct("major", '{ "projects": { "$gte": 2 } }') 645 | 646 | 647 | m$update('{ "name": "Jenny" }', '{ "$set": { "age": 24 } }') 648 | m$find() 649 | 650 | 651 | m$index('{ "name": 1 }') 652 | 653 | 654 | m$find('{ "name": "Sara" }') 655 | 656 | 657 | m$find('{ "name": "Jane" }') 658 | 659 | 660 | m$drop() 661 | 662 | 663 | set.seed(123) 664 | m <- mongo("simulation", "test") 665 | sim_data <- expand.grid( 666 | type = c("A", "B", "C", "D", "E"), 667 | category = c("P-1", "P-2", "P-3"), 668 | group = 1:20000, 669 | stringsAsFactors = FALSE) 670 | head(sim_data) 671 | 672 | 673 | sim_data$score1 <- rnorm(nrow(sim_data), 10, 3) 674 | sim_data$test1 <- rbinom(nrow(sim_data), 100, 0.8) 675 | 676 | 677 | head(sim_data) 678 | 679 | 680 | m$insert(sim_data) 681 | 682 | 683 | system.time(rec <- m$find('{ "type": "C", "category": "P-3", "group": 87 }')) 684 | rec 685 | 686 | 687 | system.time({ 688 | recs <- m$find('{ "type": { "$in": ["B", "D"] }, 689 | "category": { "$in": ["P-1", "P-2"] }, 690 | "group": { "$gte": 25, "$lte": 75 } }') 691 | }) 692 | 693 | 694 | head(recs) 695 | 696 | 697 | system.time(recs2 <- m$find('{ "score1": { "$gte": 20 } }')) 698 | 699 | 700 | head(recs2) 701 | 702 | 703 | m$index('{ "type": 1, "category": 1, "group": 1 }') 704 | 705 | 706 | system.time({ 707 | rec <- m$find('{ "type": "C", "category": "P-3", "group": 87 }') 708 | }) 709 | 710 | 711 | system.time({ 712 | recs <- m$find('{ "type": { "$in": ["B", "D"] }, 713 | "category": { "$in": ["P-1", "P-2"] }, 714 | "group": { "$gte": 25, "$lte": 75 } }') 715 | }) 716 | 717 | 718 | system.time({ 719 | recs2 <- m$find('{ "score1": { "$gte": 20 } }') 720 | }) 721 | 722 | 723 | m$aggregate('[ 724 | { "$group": { 725 | "_id": "$type", 726 | "count": { "$sum": 1 }, 727 | "avg_score": { "$avg": "$score1" }, 728 | "min_test": { "$min": "$test1" }, 729 | "max_test": { "$max": "$test1" } 730 | } 731 | } 732 | ]') 733 | 734 | 735 | m$aggregate('[ 736 | { "$group": { 737 | "_id": { "type": "$type", "category": "$category" }, 738 | "count": { "$sum": 1 }, 739 | "avg_score": { "$avg": "$score1" }, 740 | "min_test": { "$min": "$test1" }, 741 | "max_test": { "$max": "$test1" } 742 | } 743 | } 744 | ]') 745 | 746 | 747 | m$aggregate('[ 748 | { "$group": { 749 | "_id": { "type": "$type", "category": "$category" }, 750 | "count": { "$sum": 1 }, 751 | "avg_score": { "$avg": "$score1" }, 752 | "min_test": { "$min": "$test1" }, 753 | "max_test": { "$max": "$test1" } 754 | } 755 | }, 756 | { 757 | "$sort": { "_id.type": 1, "avg_score": -1 } 758 | } 759 | ]') 760 | 761 | 762 | m$aggregate('[ 763 | { "$group": { 764 | "_id": { "type": "$type", "category": "$category" }, 765 | "count": { "$sum": 1 }, 766 | "avg_score": { "$avg": "$score1" }, 767 | "min_test": { "$min": "$test1" }, 768 | "max_test": { "$max": "$test1" } 769 | } 770 | }, 771 | { 772 | "$sort": { "avg_score": -1 } 773 | }, 774 | { 775 | "$limit": 3 776 | }, 777 | { 778 | "$project": { 779 | "_id.type": 1, 780 | "_id.category": 1, 781 | "avg_score": 1, 782 | "test_range": { "$subtract": ["$max_test", "$min_test"] } 783 | } 784 | } 785 | ]') 786 | 787 | 788 | bins <- m$mapreduce( 789 | map = 'function() { 790 | emit(Math.floor(this.score1 / 2.5) * 2.5, 1); 791 | }', 792 | reduce = 'function(id, counts) { 793 | return Array.sum(counts); 794 | }' 795 | ) 796 | 797 | 798 | bins 799 | 800 | 801 | with(bins, barplot(value / sum(value), names.arg = `_id`, 802 | main = "Histogram of scores", 803 | xlab = "score1", ylab = "Percentage")) 804 | 805 | 806 | m$drop() 807 | 808 | 809 | install.packages("rredis") 810 | 811 | 812 | library(rredis) 813 | redisConnect() 814 | 815 | 816 | library(rredis) 817 | redisConnect(nodelay = FALSE) 818 | redisCmd("flushall") 819 | 820 | 821 | redisSet("num1", 100) 822 | 823 | 824 | redisGet("num1") 825 | 826 | 827 | redisSet("vec1", 1:5) 828 | redisGet("vec1") 829 | 830 | 831 | redisSet("mtcars_head", head(mtcars, 3)) 832 | redisGet("mtcars_head") 833 | 834 | 835 | redisGet("something") 836 | 837 | 838 | redisExists("something") 839 | redisExists("num1") 840 | 841 | 842 | redisDelete("num1") 843 | redisExists("num1") 844 | 845 | 846 | redisHSet("fruits", "apple", 5) 847 | redisHSet("fruits", "pear", 2) 848 | redisHSet("fruits", "banana", 9) 849 | 850 | 851 | redisHGet("fruits", "banana") 852 | 853 | 854 | redisHGetAll("fruits") 855 | 856 | 857 | redisHKeys("fruits") 858 | 859 | 860 | redisHVals("fruits") 861 | 862 | 863 | redisHLen("fruits") 864 | 865 | 866 | redisHMGet("fruits", c("apple", "banana")) 867 | 868 | 869 | redisHMSet("fruits", list(apple = 4, pear = 1)) 870 | 871 | 872 | redisHGetAll("fruits") 873 | 874 | 875 | for (qi in 1:3) { 876 | redisRPush("queue", qi) 877 | } 878 | 879 | 880 | redisLLen("queue") 881 | 882 | 883 | redisLPop("queue") 884 | redisLPop("queue") 885 | redisLPop("queue") 886 | redisLPop("queue") 887 | 888 | 889 | redisClose() 890 | 891 | 892 | -------------------------------------------------------------------------------- /Chapter 12/data-manipulation.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | product_info <- read_csv("data/product-info.csv") 3 | product_info 4 | 5 | 6 | sapply(product_info, class) 7 | 8 | 9 | product_info[product_info$type == "toy", ] 10 | 11 | 12 | product_info[product_info$released == "no", ] 13 | 14 | 15 | product_info[, c("id", "name", "type")] 16 | 17 | 18 | product_info[c("id", "name", "class")] 19 | 20 | 21 | product_info[product_info$type == "toy", c("name", "class", "released")] 22 | 23 | 24 | subset(product_info, 25 | subset = type == "model" & released == "yes", 26 | select = name:class) 27 | 28 | 29 | with(product_info, name[released == "no"]) 30 | 31 | 32 | with(product_info, table(type[released == "yes"])) 33 | 34 | 35 | product_stats <- read_csv("data/product-stats.csv") 36 | product_stats 37 | 38 | 39 | top_3_id <- product_stats[order(product_stats$size, decreasing = TRUE), "id"][1:3] 40 | product_info[product_info$id %in% top_3_id, ] 41 | 42 | 43 | product_table <- merge(product_info, product_stats, by = "id") 44 | product_table 45 | 46 | 47 | product_table[order(product_table$size), ] 48 | 49 | 50 | product_table[order(product_table$size, decreasing = TRUE), "name"][1:3] 51 | 52 | 53 | product_table[order(product_table$weight, decreasing = TRUE), ][ 54 | product_table$type == "model",] 55 | 56 | 57 | transform(product_table, 58 | released = ifelse(released == "yes", TRUE, FALSE), 59 | density = weight / size) 60 | 61 | 62 | product_tests <- read_csv("data/product-tests.csv") 63 | product_tests 64 | 65 | 66 | na.omit(product_tests) 67 | 68 | 69 | complete.cases(product_tests) 70 | 71 | 72 | product_tests[complete.cases(product_tests), "id"] 73 | 74 | 75 | product_tests[!complete.cases(product_tests), "id"] 76 | 77 | 78 | product_full <- merge(product_table, product_tests, by = "id") 79 | product_full 80 | 81 | 82 | mean_quality1 <- tapply(product_full$quality, 83 | list(product_full$type), 84 | mean, na.rm = TRUE) 85 | mean_quality1 86 | 87 | 88 | str(mean_quality1) 89 | 90 | 91 | is.array(mean_quality1) 92 | 93 | 94 | mean_quality2 <- tapply(product_full$quality, 95 | list(product_full$type, product_full$class), 96 | mean, na.rm = TRUE) 97 | mean_quality2 98 | 99 | 100 | mean_quality2["model", "vehicle"] 101 | 102 | 103 | mean_quality3 <- with(product_full, 104 | tapply(quality, list(type, material, released), 105 | mean, na.rm = TRUE)) 106 | mean_quality3 107 | 108 | 109 | str(mean_quality3) 110 | 111 | 112 | mean_quality3["model", "Wood", "yes"] 113 | 114 | 115 | toy_tests <- read_csv("data/product-toy-tests.csv") 116 | toy_tests 117 | 118 | 119 | install.packages("reshape2") 120 | 121 | 122 | library(reshape2) 123 | toy_quality <- dcast(toy_tests, date ~ id, value.var = "quality") 124 | toy_quality 125 | 126 | 127 | install.packages("zoo") 128 | 129 | 130 | zoo::na.locf(c(1, 2, NA, NA, 3, 1, NA, 2, NA)) 131 | 132 | 133 | toy_quality$T01 <- zoo::na.locf(toy_quality$T01) 134 | toy_quality$T02 <- zoo::na.locf(toy_quality$T02) 135 | 136 | 137 | toy_quality[-1] <- lapply(toy_quality[-1], zoo::na.locf) 138 | toy_quality 139 | 140 | 141 | toy_tests$ym <- substr(toy_tests$date, 1, 6) 142 | toy_tests 143 | 144 | 145 | toy_quality <- dcast(toy_tests, ym ~ id, value.var = "quality") 146 | toy_quality 147 | 148 | 149 | toy_tests2 <- melt(toy_tests, id.vars = c("id", "ym"), 150 | measure.vars = c("quality", "durability"), 151 | variable.name = "measure") 152 | toy_tests2 153 | 154 | 155 | library(ggplot2) 156 | ggplot(toy_tests2, aes(x = ym, y = value)) + 157 | geom_point() + 158 | facet_grid(id ~ measure) 159 | 160 | 161 | ggplot(toy_tests2, aes(x = ym, y = value, color = id)) + 162 | geom_point() + 163 | facet_grid(. ~ measure) 164 | 165 | 166 | install.packages("sqldf") 167 | 168 | 169 | library(sqldf) 170 | 171 | 172 | product_info <- read_csv("data/product-info.csv") 173 | product_stats <- read_csv("data/product-stats.csv") 174 | product_tests <- read_csv("data/product-tests.csv") 175 | toy_tests <- read_csv("data/product-toy-tests.csv") 176 | 177 | 178 | sqldf("select * from product_info") 179 | 180 | 181 | sqldf("select id, name, class from product_info") 182 | 183 | 184 | sqldf("select id, name from product_info where released = 'yes'") 185 | 186 | 187 | sqldf("select id, material, size / weight as density from product_stats") 188 | 189 | 190 | sqldf("select * from product_stats order by size desc") 191 | 192 | 193 | sqldf("select * from product_info join product_stats using (id)") 194 | 195 | 196 | sqldf("select * from product_info where id in 197 | (select id from product_stats where material = 'Wood')") 198 | 199 | 200 | sqldf("select * from product_info join product_stats using (id) 201 | where material = 'Wood'") 202 | 203 | 204 | sqldf("select waterproof, avg(quality), avg(durability) from product_tests 205 | group by waterproof") 206 | 207 | 208 | sqldf("select id, avg(quality), avg(durability) from toy_tests 209 | group by id") 210 | 211 | 212 | sqldf("select * from product_info join 213 | (select id, avg(quality), avg(durability) from toy_tests 214 | group by id) using (id)") 215 | 216 | 217 | install.packages("plyr") 218 | 219 | 220 | plyr::ddply(product_stats, "material", 221 | function(x) { 222 | head(x[order(x$size, decreasing = TRUE),], 1L) 223 | }) 224 | 225 | 226 | plyr::ddply(toy_tests, "id", 227 | function(x) { 228 | head(x[order(x$sample, decreasing = TRUE), ], 2) 229 | }) 230 | 231 | 232 | install.packages("data.table") 233 | 234 | 235 | library(data.table) 236 | 237 | 238 | dt <- data.table(x = 1:3, y = rnorm(3), z = letters[1:3]) 239 | dt 240 | 241 | 242 | str(dt) 243 | 244 | 245 | product_info <- fread("data/product-info.csv") 246 | product_stats <- fread("data/product-stats.csv") 247 | product_tests <- fread("data/product-tests.csv") 248 | toy_tests <- fread("data/product-toy-tests.csv") 249 | 250 | 251 | product_info 252 | 253 | 254 | str(product_info) 255 | 256 | 257 | product_info[1] 258 | 259 | 260 | product_info[1:3] 261 | 262 | 263 | product_info[-1] 264 | 265 | 266 | product_info[.N] 267 | 268 | 269 | product_info[c(1, .N)] 270 | 271 | 272 | product_info[released == "yes"] 273 | 274 | 275 | product_info[released == "yes", id] 276 | 277 | 278 | product_info[released == "yes", "id"] 279 | 280 | 281 | product_info[released == "yes", "id", with = FALSE] 282 | 283 | 284 | product_info[released == "yes", c("id", "name"), with = FALSE] 285 | 286 | 287 | product_info[released == "yes", table(type, class)] 288 | 289 | 290 | product_info[released == "yes", list(id, name)] 291 | 292 | 293 | product_info[, list(id, name, released = released == "yes")] 294 | 295 | 296 | product_stats[, list(id, material, size, weight, 297 | density = size / weight)] 298 | 299 | 300 | product_info[, .(id, name, type, class)] 301 | 302 | 303 | product_info[released == "yes", .(id, name)] 304 | 305 | 306 | product_stats[order(size, decreasing = TRUE)] 307 | 308 | 309 | product_stats 310 | 311 | 312 | product_stats[, density := size / weight] 313 | 314 | 315 | product_stats 316 | 317 | 318 | product_info[, released := released == "yes"] 319 | product_info 320 | 321 | 322 | setkey(product_info, id) 323 | 324 | 325 | product_info 326 | 327 | 328 | key(product_info) 329 | 330 | 331 | product_info["M01"] 332 | 333 | 334 | product_stats["M01"] 335 | 336 | 337 | setkeyv(product_stats, "id") 338 | 339 | 340 | product_stats["M02"] 341 | 342 | 343 | product_info[product_stats] 344 | 345 | 346 | setkey(toy_tests, id, date) 347 | 348 | 349 | toy_tests[.("T01", 20160201)] 350 | 351 | 352 | toy_tests["T01"] 353 | 354 | 355 | toy_tests[.(20160201)] 356 | 357 | 358 | toy_tests[.(20160201, "T01")] 359 | 360 | 361 | product_info[, .N, by = released] 362 | 363 | 364 | product_info[, .N, by = .(type, class)] 365 | 366 | 367 | product_tests[, mean(quality, na.rm = TRUE), 368 | by = .(waterproof)] 369 | 370 | 371 | product_tests[, .(mean_quality = mean(quality, na.rm = TRUE)), 372 | by = .(waterproof)] 373 | 374 | 375 | product_info[product_tests][released == TRUE, 376 | .(mean_quality = mean(quality, na.rm = TRUE), 377 | mean_durability = mean(durability, na.rm = TRUE)), 378 | by = .(type, class)] 379 | 380 | 381 | type_class_tests <- product_info[product_tests][released == TRUE, 382 | .(mean_quality = mean(quality, na.rm = TRUE), 383 | mean_durability = mean(durability, na.rm = TRUE)), 384 | keyby = .(type, class)] 385 | type_class_tests 386 | 387 | 388 | key(type_class_tests) 389 | 390 | 391 | type_class_tests[.("model", "vehicle"), mean_quality] 392 | 393 | 394 | n <- 10000000 395 | test1 <- data.frame(id = 1:n, x = rnorm(n), y = rnorm(n)) 396 | 397 | 398 | system.time(row <- test1[test1$id == 876543, ]) 399 | row 400 | 401 | 402 | setDT(test1, key = "id") 403 | class(test1) 404 | 405 | 406 | system.time(row <- test1[.(8765432)]) 407 | row 408 | 409 | 410 | toy_tests[, ym := substr(date, 1, 6)] 411 | toy_quality <- dcast(toy_tests, ym ~ id, value.var = "quality") 412 | toy_quality 413 | 414 | 415 | toy_tests2 <- dcast(toy_tests, ym ~ id, value.var = c("quality", "durability")) 416 | toy_tests2 417 | 418 | 419 | key(toy_tests2) 420 | 421 | 422 | toy_tests2[.(201602)] 423 | 424 | 425 | sapply(toy_tests2, class) 426 | 427 | 428 | toy_tests2["201602"] 429 | 430 | 431 | class(20160101) 432 | class(substr(20160101, 1, 6)) 433 | 434 | 435 | product_stats 436 | setDF(product_stats) 437 | class(product_stats) 438 | 439 | 440 | setDT(product_stats, key = "id") 441 | class(product_stats) 442 | 443 | 444 | setnames(product_stats, "size", "volume") 445 | product_stats 446 | 447 | 448 | product_stats[, i := .I] 449 | product_stats 450 | 451 | 452 | setcolorder(product_stats, 453 | c("i", "id", "material", "weight", "volume", "density")) 454 | product_stats 455 | 456 | 457 | market_data <- data.table(date = as.Date("2015-05-01") + 0:299) 458 | head(market_data) 459 | 460 | 461 | set.seed(123) 462 | market_data[, `:=`( 463 | price = round(30 * cumprod(1 + rnorm(300, 0.001, 0.05)), 2), 464 | volume = rbinom(300, 5000, 0.8) 465 | )] 466 | 467 | 468 | head(market_data) 469 | 470 | 471 | plot(price ~ date, data = market_data, 472 | type = "l", 473 | main = "Market data") 474 | 475 | 476 | market_data[, range(date)] 477 | 478 | 479 | monthly <- market_data[, 480 | .(open = price[[1]], high = max(price), 481 | low = min(price), close = price[[.N]]), 482 | keyby = .(year = year(date), month = month(date))] 483 | head(monthly) 484 | 485 | 486 | oldpar <- par(mfrow = c(1, 2)) 487 | market_data[, { 488 | plot(price ~ date, type = "l", 489 | main = sprintf("Market data (%d)", year)) 490 | }, by = .(year = year(date))] 491 | par(oldpar) 492 | 493 | 494 | oldpar <- par(mfrow = c(1, 2)) 495 | market_data[year(date) == 2015, { 496 | plot(price ~ date, type = "l", 497 | main = sprintf("Market data (%d)", 2015)) 498 | }] 499 | market_data[year(date) == 2016, { 500 | plot(price ~ date, type = "l", 501 | main = sprintf("Market data (%d)", 2016)) 502 | }] 503 | par(oldpar) 504 | 505 | 506 | data("diamonds", package = "ggplot2") 507 | setDT(diamonds) 508 | head(diamonds) 509 | 510 | 511 | diamonds[, { 512 | m <- lm(log(price) ~ carat + depth) 513 | as.list(coef(m)) 514 | }, keyby = .(cut)] 515 | 516 | 517 | average <- function(column) { 518 | market_data[, .(average = mean(.SD[[column]])), 519 | by = .(year = year(date))] 520 | } 521 | 522 | 523 | average("price") 524 | 525 | 526 | average("volume") 527 | 528 | 529 | price_cols <- paste0("price", 1:3) 530 | market_data[, (price_cols) := lapply(1:3, 531 | function(i) round(price + rnorm(.N, 0, 5), 2))] 532 | head(market_data) 533 | 534 | 535 | cols <- colnames(market_data) 536 | price_cols <- cols[grep("^price", cols)] 537 | price_cols 538 | 539 | 540 | market_data[, (price_cols) := lapply(.SD, zoo::na.locf), 541 | .SDcols = price_cols] 542 | 543 | 544 | install.packages("dplyr") 545 | 546 | 547 | library(readr) 548 | product_info <- read_csv("data/product-info.csv") 549 | product_stats <- read_csv("data/product-stats.csv") 550 | product_tests <- read_csv("data/product-tests.csv") 551 | toy_tests <- read_csv("data/product-toy-tests.csv") 552 | 553 | 554 | library(dplyr) 555 | 556 | 557 | select(product_info, id, name, type, class) 558 | 559 | 560 | filter(product_info, released == "yes") 561 | 562 | 563 | filter(product_info, 564 | released == "yes", type == "model") 565 | 566 | 567 | mutate(product_stats, density = size / weight) 568 | 569 | 570 | arrange(product_stats, material, desc(size), desc(weight)) 571 | 572 | 573 | product_info_tests <- left_join(product_info, product_tests, by = "id") 574 | product_info_tests 575 | 576 | 577 | summarize(group_by(product_info_tests, type, class), 578 | mean_quality = mean(quality, na.rm = TRUE), 579 | mean_durability = mean(durability, na.rm = TRUE)) 580 | 581 | 582 | product_info %>% 583 | filter(released == "yes") %>% 584 | inner_join(product_tests, by = "id") %>% 585 | group_by(type, class) %>% 586 | summarize( 587 | mean_quality = mean(quality, na.rm = TRUE), 588 | mean_durability = mean(durability, na.rm = TRUE)) %>% 589 | arrange(desc(mean_quality)) 590 | 591 | 592 | d1 <- f1(d0, arg1) 593 | d2 <- f2(d1, arg2) 594 | d3 <- f3(d2, arg3) 595 | 596 | 597 | f3(f2(f1(d0, arg1), arg2), arg3) 598 | 599 | 600 | d0 %>% 601 | f1(arg1) %>% 602 | f2(arg2) %>% 603 | f3(arg3) 604 | 605 | 606 | data(diamonds, package = "ggplot2") 607 | plot(density(diamonds$price, from = 0), 608 | main = "Density plot of diamond prices") 609 | 610 | 611 | diamonds$price %>% 612 | density(from = 0) %>% 613 | plot(main = "Density plot of diamonds prices") 614 | 615 | 616 | models <- diamonds %>% 617 | group_by(cut) %>% 618 | do(lmod = lm(log(price) ~ carat, data = .)) 619 | models 620 | 621 | 622 | models$lmod[[1]] 623 | 624 | 625 | toy_tests %>% 626 | group_by(id) %>% 627 | arrange(desc(sample)) %>% 628 | do(head(., 3)) %>% 629 | summarize( 630 | quality = sum(quality * sample) / sum(sample), 631 | durability = sum(durability * sample) / sum(sample)) 632 | 633 | 634 | toy_tests %>% 635 | group_by(id) %>% 636 | arrange(desc(sample)) 637 | 638 | 639 | toy_tests %>% 640 | group_by(id) %>% 641 | arrange(desc(sample)) %>% 642 | do(head(., 3)) 643 | 644 | 645 | install.packages("rlist") 646 | 647 | 648 | library(rlist) 649 | 650 | 651 | products <- list.load("data/products.json") 652 | str(products[[1]]) 653 | 654 | 655 | str(list.map(products, id)) 656 | 657 | 658 | list.mapv(products, name) 659 | 660 | 661 | released_products <- list.filter(products, released) 662 | list.mapv(released_products, name) 663 | 664 | 665 | products %>% 666 | list.filter(released) %>% 667 | list.mapv(name) 668 | 669 | 670 | products %>% 671 | list.filter(released, tests$waterproof) %>% 672 | list.select(id, name, scores) %>% 673 | str() 674 | 675 | 676 | products %>% 677 | list.filter(mean(scores) >= 8) %>% 678 | list.select(name, scores, mean_score = mean(scores)) %>% 679 | str() 680 | 681 | 682 | products %>% 683 | list.select(name, mean_score = mean(scores)) %>% 684 | list.sort(-mean_score) %>% 685 | list.stack() 686 | 687 | 688 | products %>% 689 | list.select(name, type, released) %>% 690 | list.group(type) %>% 691 | str() 692 | 693 | 694 | products %>% 695 | list.table(type, class) 696 | 697 | 698 | products %>% 699 | list.filter(released) %>% 700 | list.table(type, waterproof = tests$waterproof) 701 | 702 | 703 | products %>% 704 | list.filter(length(scores) >= 5) %>% 705 | list.sort(-mean(scores)) %>% 706 | list.take(2) %>% 707 | list.select(name, 708 | mean_score = mean(scores), 709 | n_score = length(scores)) %>% 710 | list.stack() 711 | 712 | 713 | -------------------------------------------------------------------------------- /Chapter 13/high-performance-computing.txt: -------------------------------------------------------------------------------- 1 | x <- c(1, 2, 3, 4, 5) 2 | y <- numeric() 3 | sum_x <- 0 4 | for (xi in x) { 5 | sum_x <- sum_x + xi 6 | y <- c(y, sum_x) 7 | } 8 | y 9 | 10 | 11 | my_cumsum1 <- function(x) { 12 | y <- numeric() 13 | sum_x <- 0 14 | for (xi in x) { 15 | sum_x <- sum_x + xi 16 | y <- c(y, sum_x) 17 | } 18 | y 19 | } 20 | 21 | 22 | my_cumsum2 <- function(x) { 23 | y <- numeric(length(x)) 24 | if (length(y)) { 25 | y[[1]] <- x[[1]] 26 | for (i in 2:length(x)) { 27 | y[[i]] <- y[[i-1]] + x[[i]] 28 | } 29 | } 30 | y 31 | } 32 | 33 | 34 | x <- rnorm(100) 35 | all.equal(cumsum(x), my_cumsum1(x)) 36 | all.equal(cumsum(x), my_cumsum2(x)) 37 | 38 | 39 | x <- rnorm(100) 40 | system.time(my_cumsum1(x)) 41 | 42 | 43 | system.time(my_cumsum2(x)) 44 | 45 | 46 | system.time(cumsum(x)) 47 | 48 | 49 | x <- rnorm(1000) 50 | system.time(my_cumsum1(x)) 51 | system.time(my_cumsum2(x)) 52 | system.time(cumsum(x)) 53 | 54 | 55 | x <- rnorm(10000) 56 | system.time(my_cumsum1(x)) 57 | system.time(my_cumsum2(x)) 58 | system.time(cumsum(x)) 59 | 60 | 61 | x <- rnorm(100000) 62 | system.time(my_cumsum1(x)) 63 | system.time(my_cumsum2(x)) 64 | system.time(cumsum(x)) 65 | 66 | 67 | install.packages("microbenchmark") 68 | 69 | 70 | library(microbenchmark) 71 | x <- rnorm(100) 72 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x)) 73 | 74 | 75 | x <- rnorm(1000) 76 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x)) 77 | 78 | 79 | x <- rnorm(5000) 80 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x)) 81 | 82 | 83 | x <- rnorm(10000) 84 | microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x), times = 10) 85 | 86 | 87 | library(data.table) 88 | benchmark <- function(ns, times = 30) { 89 | results <- lapply(ns, function(n) { 90 | x <- rnorm(n) 91 | result <- microbenchmark(my_cumsum1(x), my_cumsum2(x), cumsum(x), 92 | times = times, unit = "ms") 93 | data <- setDT(summary(result)) 94 | data[, n := n] 95 | data 96 | }) 97 | rbindlist(results) 98 | } 99 | 100 | 101 | benchmarks <- benchmark(seq(100, 3000, 100)) 102 | 103 | 104 | library(ggplot2) 105 | ggplot(benchmarks, aes(x = n, color = expr)) + 106 | ggtitle("Microbenchmark on cumsum functions") + 107 | geom_point(aes(y = median)) + 108 | geom_errorbar(aes(ymin = lq, ymax = uq)) 109 | 110 | 111 | benchmarks2 <- benchmark(seq(2, 600, 10), times = 50) 112 | 113 | 114 | ggplot(benchmarks2, aes(x = n, color = expr)) + 115 | ggtitle("Microbenchmark on cumsum functions over small input") + 116 | geom_point(aes(y = median)) + 117 | geom_errorbar(aes(ymin = lq, ymax = uq)) 118 | 119 | 120 | benchmarks3 <- benchmark(seq(10, 800, 10), times = 50) 121 | ggplot(benchmarks3, aes(x = n, color = expr)) + 122 | ggtitle("Microbenchmark on cumsum functions with break even") + 123 | geom_point(aes(y = median)) + 124 | geom_errorbar(aes(ymin = lq, ymax = uq)) 125 | 126 | 127 | x <- rnorm(1000) 128 | tmp <- tempfile(fileext = ".out") 129 | Rprof(tmp) 130 | for (i in 1:1000) { 131 | my_cumsum1(x) 132 | } 133 | Rprof(NULL) 134 | summaryRprof(tmp) 135 | 136 | 137 | tmp <- tempfile(fileext = ".out") 138 | Rprof(tmp) 139 | for (i in 1:1000) { 140 | my_cumsum2(x) 141 | } 142 | Rprof(NULL) 143 | summaryRprof(tmp) 144 | 145 | 146 | my_cumsum1 <- function(x) { 147 | y <- numeric() 148 | sum_x <- 0 149 | for (xi in x) { 150 | sum_x <- sum_x + xi 151 | y <- c(y, sum_x) 152 | } 153 | y 154 | } 155 | 156 | x <- rnorm(1000) 157 | 158 | for (i in 1:1000) { 159 | my_cumsum1(x) 160 | } 161 | 162 | 163 | tmp <- tempfile(fileext = ".out") 164 | Rprof(tmp, line.profiling = TRUE) 165 | source("code/my_cumsum1.R", keep.source = TRUE) 166 | Rprof(NULL) 167 | summaryRprof(tmp, lines = "show") 168 | 169 | 170 | install.packages("profvis") 171 | 172 | 173 | library(profvis) 174 | profvis({ 175 | my_cumsum1 <- function(x) { 176 | y <- numeric() 177 | sum_x <- 0 178 | for (xi in x) { 179 | sum_x <- sum_x + xi 180 | y <- c(y, sum_x) 181 | } 182 | y 183 | } 184 | 185 | x <- rnorm(1000) 186 | 187 | for (i in 1:1000) { 188 | my_cumsum1(x) 189 | } 190 | }) 191 | 192 | 193 | 194 | 195 | profvis({ 196 | my_cumsum2 <- function(x) { 197 | y <- numeric(length(x)) 198 | y[[1]] <- x[[1]] 199 | for (i in 2:length(x)) { 200 | y[[i]] <- y[[i-1]] + x[[i]] 201 | } 202 | y 203 | } 204 | 205 | x <- rnorm(1000) 206 | 207 | for (i in 1:1000) { 208 | my_cumsum2(x) 209 | } 210 | }) 211 | 212 | 213 | 214 | 215 | n <- 10000 216 | microbenchmark(grow_by_index = { 217 | x <- list() 218 | for (i in 1:n) x[[i]] <- i 219 | }, preallocated = { 220 | x <- vector("list", n) 221 | for (i in 1:n) x[[i]] <- i 222 | }, times = 20) 223 | 224 | 225 | cumsum 226 | 227 | 228 | diff_for <- function(x) { 229 | n <- length(x) - 1 230 | res <- numeric(n) 231 | for (i in seq_len(n)) { 232 | res[[i]] <- x[[i + 1]] - x[[i]] 233 | } 234 | res 235 | } 236 | 237 | 238 | diff_for(c(2, 3, 1, 5)) 239 | 240 | 241 | x <- rnorm(1000) 242 | all.equal(diff_for(x), diff(x)) 243 | 244 | 245 | microbenchmark(diff_for(x), diff(x)) 246 | 247 | 248 | mat <- matrix(1:12, nrow = 3) 249 | mat 250 | 251 | 252 | my_transpose <- function(x) { 253 | stopifnot(is.matrix(x)) 254 | res <- matrix(vector(mode(x), length(x)), 255 | nrow = ncol(x), ncol = nrow(x), 256 | dimnames = dimnames(x)[c(2, 1)]) 257 | for (i in seq_len(ncol(x))) { 258 | for (j in seq_len(nrow(x))) { 259 | res[i, j] <- x[j, i] 260 | } 261 | } 262 | res 263 | } 264 | 265 | 266 | my_transpose(mat) 267 | 268 | 269 | all.equal(my_transpose(mat), t(mat)) 270 | 271 | 272 | microbenchmark(my_transpose(mat), t(mat)) 273 | 274 | 275 | mat <- matrix(rnorm(25000), nrow = 1000) 276 | all.equal(my_transpose(mat), t(mat)) 277 | microbenchmark(my_transpose(mat), t(mat)) 278 | 279 | 280 | microbenchmark(my_transpose(mat), t(mat), t.default(mat)) 281 | 282 | 283 | add <- function(x, y) { 284 | stopifnot(length(x) == length(y), 285 | is.numeric(x), is.numeric(y)) 286 | z <- numeric(length(x)) 287 | for (i in seq_along(x)) { 288 | z[[i]] <- x[[i]] + y[[i]] 289 | } 290 | z 291 | } 292 | 293 | 294 | x <- rnorm(10000) 295 | y <- rnorm(10000) 296 | all.equal(add(x, y), x + y) 297 | 298 | 299 | microbenchmark(add(x, y), x + y) 300 | 301 | 302 | algo1_for <- function(n) { 303 | res <- 0 304 | for (i in seq_len(n)) { 305 | res <- res + 1 / i ^ 2 306 | } 307 | res 308 | } 309 | 310 | 311 | algo1_vec <- function(n) { 312 | sum(1 / seq_len(n) ^ 2) 313 | } 314 | 315 | 316 | algo1_for(10) 317 | 318 | 319 | algo1_vec(10) 320 | 321 | 322 | microbenchmark(algo1_for(200), algo1_vec(200)) 323 | 324 | 325 | microbenchmark(algo1_for(1000), algo1_vec(1000)) 326 | 327 | 328 | library(compiler) 329 | diff_cmp <- cmpfun(diff_for) 330 | diff_cmp 331 | 332 | 333 | x <- rnorm(10000) 334 | microbenchmark(diff_for(x), diff_cmp(x), diff(x)) 335 | 336 | 337 | algo1_cmp <- cmpfun(algo1_for) 338 | algo1_cmp 339 | 340 | 341 | n <- 1000 342 | microbenchmark(algo1_for(n), algo1_cmp(n), algo1_vec(n)) 343 | 344 | 345 | algo1_vec_cmp <- cmpfun(algo1_vec) 346 | microbenchmark(algo1_vec(n), algo1_vec_cmp(n), times = 10000) 347 | 348 | 349 | set.seed(1) 350 | sim_data <- 100 * cumprod(1 + rnorm(500, 0, 0.006)) 351 | plot(sim_data, type = "s", ylim = c(85, 115), 352 | main = "A simulated random path") 353 | abline(h = 100, lty = 2, col = "blue") 354 | abline(h = 100 * (1 + 0.1 * c(1, -1)), lty = 3, col = "red") 355 | 356 | 357 | simulate <- function(i, p = 100, n = 10000, 358 | r = 0, sigma = 0.0005, margin = 0.1) { 359 | ps <- p * cumprod(1 + rnorm(n, r, sigma)) 360 | list(id = i, 361 | first = ps[[1]], 362 | high = max(ps), 363 | low = min(ps), 364 | last = ps[[n]], 365 | signal = any(ps > p * (1 + margin) | ps < p * (1 - margin))) 366 | } 367 | 368 | 369 | simulate(1) 370 | 371 | 372 | system.time(res <- lapply(1:10000, simulate)) 373 | 374 | 375 | library(data.table) 376 | res_table <- rbindlist(res) 377 | head(res_table) 378 | 379 | 380 | res_table[, sum(signal) / .N] 381 | 382 | 383 | library(parallel) 384 | cl <- makeCluster(detectCores()) 385 | 386 | 387 | system.time(res <- parLapply(cl, 1:10000, simulate)) 388 | 389 | 390 | stopCluster(cl) 391 | 392 | 393 | length(res) 394 | res_table <- rbindlist(res) 395 | res_table[, sum(signal) / .N] 396 | 397 | 398 | cl <- makeCluster(detectCores()) 399 | n <- 1 400 | parLapply(cl, 1:3, function(x) x + n) 401 | stopCluster(cl) 402 | 403 | 404 | n <- 100 405 | data <- data.frame(id = 1:n, x = rnorm(n), y = rnorm(n)) 406 | 407 | take_sample <- function(n) { 408 | data[sample(seq_len(nrow(data)), 409 | size = n, replace = FALSE), ] 410 | } 411 | 412 | 413 | cl <- makeCluster(detectCores()) 414 | 415 | 416 | clusterEvalQ(cl, Sys.getpid()) 417 | 418 | 419 | clusterEvalQ(cl, ls()) 420 | 421 | 422 | clusterExport(cl, c("data", "take_sample")) 423 | clusterEvalQ(cl, ls()) 424 | 425 | 426 | clusterEvalQ(cl, take_sample(2)) 427 | 428 | 429 | invisible(clusterCall(cl, function() { 430 | local_var <- 10 431 | global_var <<- 100 432 | })) 433 | clusterEvalQ(cl, ls()) 434 | 435 | 436 | clusterExport(cl, "simulate") 437 | invisible(clusterEvalQ(cl, { 438 | library(data.table) 439 | })) 440 | res <- parLapply(cl, 1:3, function(i) { 441 | res_table <- rbindlist(lapply(1:1000, simulate)) 442 | res_table[, id := NULL] 443 | summary(res_table) 444 | }) 445 | 446 | 447 | res 448 | 449 | 450 | stopCluster(cl) 451 | 452 | 453 | system.time(res <- mclapply(1:10000, simulate, 454 | mc.cores = detectCores())) 455 | 456 | 457 | mclapply(1:3, take_sample, mc.cores = detectCores()) 458 | 459 | 460 | job1 <- mcparallel(rnorm(10), "job1") 461 | 462 | 463 | mccollect(job1) 464 | 465 | 466 | jobs <- lapply(1:8, function(i) { 467 | mcparallel({ 468 | t <- rbinom(1, 5, 0.6) 469 | Sys.sleep(t) 470 | t 471 | }, paste0("job", i)) 472 | }) 473 | system.time(res <- mccollect(jobs)) 474 | 475 | 476 | install.packages("Rcpp") 477 | 478 | 479 | #include 480 | using namespace Rcpp; 481 | 482 | // [[Rcpp::export]] 483 | NumericVector timesTwo(NumericVector x) { 484 | return x * 2; 485 | } 486 | 487 | 488 | Rcpp::sourceCpp("code/rcpp-demo.cpp") 489 | 490 | 491 | timesTwo 492 | 493 | 494 | timesTwo(10) 495 | 496 | 497 | timesTwo(c(1, 2, 3)) 498 | 499 | 500 | #include 501 | using namespace Rcpp; 502 | 503 | // [[Rcpp::export]] 504 | double algo1_cpp(int n) { 505 | double res = 0; 506 | for (double i = 1; i < n; i++) { 507 | res += 1 / (i * i); 508 | } 509 | return res; 510 | } 511 | 512 | 513 | Rcpp::sourceCpp("code/rcpp-algo1.cpp") 514 | 515 | 516 | algo1_cpp(10) 517 | 518 | 519 | algo1_cpp(c(10, 15)) 520 | 521 | 522 | n <- 1000 523 | microbenchmark( 524 | algo1_for(n), 525 | algo1_cmp(n), 526 | algo1_vec(n), 527 | algo1_cpp(n)) 528 | 529 | 530 | #include 531 | using namespace Rcpp; 532 | 533 | // [[Rcpp::export]] 534 | NumericVector diff_cpp(NumericVector x) { 535 | NumericVector res(x.size() - 1); 536 | for (int i = 0; i < x.size() - 1; i++) { 537 | res[i] = x[i + 1] - x[i]; 538 | } 539 | return res; 540 | } 541 | 542 | 543 | Rcpp::sourceCpp("code/rcpp-diff.cpp") 544 | 545 | 546 | diff_cpp(c(1, 2, 3, 5)) 547 | 548 | 549 | x <- rnorm(1000) 550 | microbenchmark( 551 | diff_for(x), 552 | diff_cmp(x), 553 | diff(x), 554 | diff.default(x), 555 | diff_cpp(x)) 556 | 557 | 558 | // [[Rcpp::plugins(openmp)]] 559 | #include 560 | #include 561 | using namespace Rcpp; 562 | 563 | // [[Rcpp::export]] 564 | NumericVector diff_cpp_omp(NumericVector x) { 565 | omp_set_num_threads(3); 566 | NumericVector res(x.size() - 1); 567 | #pragma omp parallel for 568 | for (int i = 0; i < x.size() - 1; i++) { 569 | res[i] = x[i + 1] - x[i]; 570 | } 571 | return res; 572 | } 573 | 574 | 575 | Rcpp::sourceCpp("code/rcpp-diff-openmp.cpp") 576 | 577 | 578 | diff_cpp_omp(c(1, 2, 4, 8)) 579 | 580 | 581 | x <- rnorm(1000) 582 | microbenchmark( 583 | diff_for(x), 584 | diff_cmp(x), 585 | diff(x), 586 | diff.default(x), 587 | diff_cpp(x), 588 | diff_cpp_omp(x)) 589 | 590 | 591 | x <- rnorm(100000) 592 | microbenchmark( 593 | diff_for(x), 594 | diff_cmp(x), 595 | diff(x), 596 | diff.default(x), 597 | diff_cpp(x), 598 | diff_cpp_omp(x)) 599 | 600 | 601 | // [[Rcpp::plugins(cpp11)]] 602 | // [[Rcpp::depends(RcppParallel)]] 603 | #include 604 | #include 605 | 606 | using namespace Rcpp; 607 | using namespace RcppParallel; 608 | 609 | struct Transformer : public Worker { 610 | const RMatrix input; 611 | RMatrix output; 612 | Transformer(const NumericMatrix input, NumericMatrix output) 613 | : input(input), output(output) {} 614 | void operator()(std::size_t begin, std::size_t end) { 615 | std::transform(input.begin() + begin, input.begin() + end, 616 | output.begin() + begin, [](double x) { 617 | return 1 / (1 + x * x); 618 | }); 619 | } 620 | }; 621 | 622 | // [[Rcpp::export]] 623 | NumericMatrix par_transform (NumericMatrix x) { 624 | NumericMatrix output(x.nrow(), x.ncol()); 625 | Transformer transformer(x, output); 626 | parallelFor(0, x.length(), transformer); 627 | return output; 628 | } 629 | 630 | 631 | Rcpp::sourceCpp("code/rcpp-parallel.cpp") 632 | 633 | 634 | mat <- matrix(1:12, nrow = 3) 635 | mat 636 | 637 | 638 | par_transform(mat) 639 | 640 | 641 | all.equal(par_transform(mat), 1 / (1 + mat ^ 2)) 642 | 643 | 644 | mat <- matrix(rnorm(1000 * 2000), nrow = 1000) 645 | 646 | 647 | microbenchmark(1 / (1 + mat ^ 2), par_transform(mat)) 648 | 649 | 650 | -------------------------------------------------------------------------------- /Chapter 13/my_cumsum1.R: -------------------------------------------------------------------------------- 1 | my_cumsum1 <- function(x) { 2 | y <- numeric() 3 | sum_x <- 0 4 | for (xi in x) { 5 | sum_x <- sum_x + xi 6 | y <- c(y, sum_x) 7 | } 8 | y 9 | } 10 | 11 | x <- rnorm(1000) 12 | 13 | for (i in 1:1000) { 14 | my_cumsum1(x) 15 | } 16 | -------------------------------------------------------------------------------- /Chapter 13/rcpp-algo1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | double algo1_cpp(int n) { 6 | double res = 0; 7 | for (double i = 1; i < n; i++) { 8 | res += 1 / (i * i); 9 | } 10 | return res; 11 | } 12 | -------------------------------------------------------------------------------- /Chapter 13/rcpp-demo.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | NumericVector timesTwo(NumericVector x) { 6 | return x * 2; 7 | } 8 | -------------------------------------------------------------------------------- /Chapter 13/rcpp-diff-openmp.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::plugins(openmp)]] 2 | #include 3 | #include 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector diff_cpp_omp(NumericVector x) { 8 | omp_set_num_threads(3); 9 | NumericVector res(x.size() - 1); 10 | #pragma omp parallel for 11 | for (int i = 0; i < x.size() - 1; i++) { 12 | res[i] = x[i + 1] - x[i]; 13 | } 14 | return res; 15 | } 16 | -------------------------------------------------------------------------------- /Chapter 13/rcpp-diff.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | NumericVector diff_cpp(NumericVector x) { 6 | NumericVector res(x.size() - 1); 7 | for (int i = 0; i < x.size() - 1; i++) { 8 | res[i] = x[i + 1] - x[i]; 9 | } 10 | return res; 11 | } 12 | -------------------------------------------------------------------------------- /Chapter 13/rcpp-parallel.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::plugins(cpp11)]] 2 | // [[Rcpp::depends(RcppParallel)]] 3 | #include 4 | #include 5 | 6 | using namespace Rcpp; 7 | using namespace RcppParallel; 8 | 9 | struct Transformer : public Worker { 10 | const RMatrix input; 11 | RMatrix output; 12 | Transformer(const NumericMatrix input, NumericMatrix output) 13 | : input(input), output(output) {} 14 | void operator()(std::size_t begin, std::size_t end) { 15 | std::transform(input.begin() + begin, input.begin() + end, 16 | output.begin() + begin, [](double x) { 17 | return 1 / (1 + x * x); 18 | }); 19 | } 20 | }; 21 | 22 | // [[Rcpp::export]] 23 | NumericMatrix par_transform (NumericMatrix x) { 24 | NumericMatrix output(x.nrow(), x.ncol()); 25 | Transformer transformer(x, output); 26 | parallelFor(0, x.length(), transformer); 27 | return output; 28 | } 29 | -------------------------------------------------------------------------------- /Chapter 14/data/new-products.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | New Products 5 | 49 | 50 | 51 |

New Products

52 |

The following is a list of products

53 |
54 |
    55 |
  • 56 | Product-A 57 | $199.95 58 |
    59 |

    Description for Product-A

    60 |
      61 |
    • Quality Good
    • 62 |
    • Duration 5years
    • 63 |
    64 |
    65 |
  • 66 |
  • 67 | Product-B 68 | $129.95 69 |
    70 |

    Description for Product-B

    71 |
      72 |
    • Quality Medium
    • 73 |
    • Duration 2years
    • 74 |
    75 |
    76 |
  • 77 |
  • 78 | Product-C 79 | $99.95 80 |
    81 |

    Description for Product-C

    82 |
      83 |
    • Quality Good
    • 84 |
    • Duration 4years
    • 85 |
    86 |
    87 |
  • 88 |
89 |
90 |

All products are available for sale!

91 | 92 | 93 | -------------------------------------------------------------------------------- /Chapter 14/data/products.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Products 5 | 22 | 23 | 24 |

Products

25 |

The following lists some products

26 |
27 |
    28 |
  • 29 | Product-A 30 | $199.95 31 |
  • 32 |
  • 33 | Product-B 34 | $129.95 35 |
  • 36 |
  • 37 | Product-C 38 | $99.95 39 |
  • 40 |
41 |
42 | 43 | 44 | -------------------------------------------------------------------------------- /Chapter 14/data/simple-page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Simple page 5 | 6 | 7 |

Heading 1

8 |

This is a paragraph.

9 | 10 | 11 | -------------------------------------------------------------------------------- /Chapter 14/data/simple-products.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Products 5 | 6 | 7 |

Products

8 |

The following lists some products

9 |
10 |
    11 |
  • 12 | Product-A 13 | $199.95 14 |
  • 15 |
  • 16 | Product-B 17 | $129.95 18 |
  • 19 |
  • 20 | Product-C 21 | $99.95 22 |
  • 23 |
24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /Chapter 14/data/single-table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Single table 5 | 6 | 7 |

The following is a table

8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 |
NameAge
Jenny18
James19
26 | 27 | 28 | -------------------------------------------------------------------------------- /Chapter 14/web-scrapping.R: -------------------------------------------------------------------------------- 1 | install.packages("rvest") 2 | 3 | 4 | library(rvest) 5 | single_table_page <- read_html("data/single-table.html") 6 | single_table_page 7 | 8 | 9 | html_table(single_table_page) 10 | 11 | 12 | html_table(html_node(single_table_page, "table")) 13 | 14 | 15 | single_table_page %>% 16 | html_node("table") %>% 17 | html_table() 18 | 19 | 20 | products_page <- read_html("data/products.html") 21 | products_page %>% 22 | html_nodes(".product-list li .name") 23 | 24 | 25 | products_page %>% 26 | html_nodes(".product-list li .name") %>% 27 | html_text() 28 | 29 | 30 | products_page %>% 31 | html_nodes(".product-list li .price") %>% 32 | html_text() 33 | 34 | 35 | product_items <- products_page %>% 36 | html_nodes(".product-list li") 37 | products <- data.frame( 38 | name = product_items %>% 39 | html_nodes(".name") %>% 40 | html_text(), 41 | price = product_items %>% 42 | html_nodes(".price") %>% 43 | html_text() %>% 44 | gsub("$", "", ., fixed = TRUE) %>% 45 | as.numeric(), 46 | stringsAsFactors = FALSE 47 | ) 48 | products 49 | 50 | 51 | 52 | 53 | page <- read_html("data/new-products.html") 54 | 55 | 56 | page %>% html_nodes(xpath = "//p") 57 | 58 | 59 | page %>% html_nodes(xpath = "//li[@class]") 60 | 61 | 62 | page %>% html_nodes(xpath = "//div[@id='list']/ul/li") 63 | 64 | 65 | page %>% html_nodes(xpath = "//div[@id='list']//li/span[@class='name']") 66 | 67 | 68 | page %>% 69 | html_nodes(xpath = "//li[@class='selected']/span[@class='name']") 70 | 71 | 72 | page %>% html_nodes(xpath = "//div[p]") 73 | 74 | 75 | page %>% 76 | html_nodes(xpath = "//span[@class='info-value' and text()='Good']") 77 | 78 | 79 | page %>% 80 | html_nodes(xpath = "//li[div/ul/li[1]/span[@class='info-value' and text()='Good']]/span[@class='name']") 81 | 82 | 83 | page %>% 84 | html_nodes(xpath = "//li[div/ul/li[2]/span[@class='info-value' and text()>3]]/span[@class='name']") 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | page <- read_html("https://cran.rstudio.com/web/packages/available_packages_by_name.html") 94 | pkg_table <- page %>% 95 | html_node("table") %>% 96 | html_table(fill = TRUE) 97 | head(pkg_table, 5) 98 | 99 | 100 | pkg_table <- pkg_table[complete.cases(pkg_table), ] 101 | colnames(pkg_table) <- c("name", "title") 102 | head(pkg_table, 3) 103 | 104 | 105 | 106 | 107 | page <- read_html("https://finance.yahoo.com/quote/MSFT") 108 | page %>% 109 | html_node("div#quote-header-info > section > span") %>% 110 | html_text() %>% 111 | as.numeric() 112 | 113 | 114 | 115 | 116 | 117 | 118 | page %>% 119 | html_node("#key-statistics table") %>% 120 | html_table() 121 | 122 | 123 | get_price <- function(symbol) { 124 | page <- read_html(sprintf("https://finance.yahoo.com/quote/%s", symbol)) 125 | list(symbol = symbol, 126 | company = page %>% 127 | html_node("div#quote-header-info > div:nth-child(1) > h6") %>% 128 | html_text(), 129 | price = page %>% 130 | html_node("div#quote-header-info > section > span:nth-child(1)") %>% 131 | html_text() %>% 132 | as.numeric()) 133 | } 134 | 135 | 136 | get_price("AAPL") 137 | 138 | 139 | 140 | 141 | page <- read_html("https://stackoverflow.com/questions/tagged/r?sort=votes&pageSize=5") 142 | questions <- page %>% 143 | html_node("#questions") 144 | 145 | 146 | 147 | 148 | questions %>% 149 | html_nodes(".summary h3") %>% 150 | html_text() 151 | 152 | 153 | questions %>% 154 | html_nodes(".question-hyperlink") %>% 155 | html_text() 156 | 157 | 158 | 159 | 160 | questions %>% 161 | html_nodes(".question-summary .vote-count-post") %>% 162 | html_text() %>% 163 | as.integer() 164 | 165 | 166 | questions %>% 167 | html_nodes(".question-summary .status strong") %>% 168 | html_text() %>% 169 | as.integer() 170 | 171 | 172 | questions %>% 173 | html_nodes(".question-summary .tags") %>% 174 | lapply(function(node) { 175 | node %>% 176 | html_nodes(".post-tag") %>% 177 | html_text() 178 | }) %>% 179 | str 180 | 181 | 182 | 183 | 184 | questions %>% 185 | html_nodes(".question-hyperlink") %>% 186 | html_attr("href") %>% 187 | lapply(function(link) { 188 | paste0("https://stackoverflow.com", link) %>% 189 | read_html() %>% 190 | html_node("#qinfo") %>% 191 | html_table() %>% 192 | setNames(c("item", "value")) 193 | }) 194 | -------------------------------------------------------------------------------- /Chapter 15/boosting-productivity.R: -------------------------------------------------------------------------------- 1 | n <- 100 2 | x <- rnorm(n) 3 | y <- 2 * x + rnorm(n) 4 | m <- lm(y ~ x) 5 | coef(m) 6 | 7 | toys <- data.frame( 8 | id = 1:3, 9 | name = c("Car", "Plane", "Motocycle"), 10 | price = c(15, 25, 14), 11 | share = c(0.3, 0.1, 0.2), 12 | stringsAsFactors = FALSE 13 | ) 14 | 15 | 16 | toys 17 | 18 | 19 | knitr::kable(toys) 20 | 21 | 22 | xtable::xtable(lm(mpg ~ cyl + vs, data = mtcars)) 23 | 24 | 25 | 26 | 27 | library(formattable) 28 | formattable(toys, 29 | list(price = color_bar("lightpink"), share = percent)) 30 | 31 | 32 | 33 | 34 | library(DT) 35 | datatable(mtcars) 36 | 37 | 38 | 39 | 40 | set.seed(123) 41 | x <- rnorm(1000) 42 | y <- 2 * x + rnorm(1000) 43 | m <- lm(y ~ x) 44 | plot(x, y, main = "Linear regression", col = "darkgray") 45 | abline(coef(m)) 46 | 47 | 48 | library(DiagrammeR) 49 | grViz(" 50 | digraph rmarkdown { 51 | A -> B; 52 | B -> C; 53 | C -> A; 54 | }") 55 | 56 | 57 | 58 | 59 | library(ggvis) 60 | mtcars %>% 61 | ggvis(~mpg, ~disp, opacity := 0.6) %>% 62 | layer_points(size := input_slider(1, 100, value = 50, label = "size")) %>% 63 | layer_smooths(span = input_slider(0.5, 1, value = 1, label = "span")) 64 | 65 | 66 | 67 | 68 | library(dygraphs) 69 | library(xts) 70 | library(dplyr) 71 | library(reshape2) 72 | data(weather, package = "nycflights13") 73 | temp <- weather %>% 74 | group_by(origin, year, month, day) %>% 75 | summarize(temp = mean(temp)) %>% 76 | ungroup() %>% 77 | mutate(date = as.Date(sprintf("%d-%02d-%02d", year, month, day))) %>% 78 | select(origin, date, temp) %>% 79 | dcast(date ~ origin, value.var = "temp") 80 | 81 | temp_xts <- as.xts(temp[-1], order.by = temp[[1]]) 82 | head(temp_xts) 83 | 84 | 85 | dygraph(temp_xts, main = "Airport Temperature") %>% 86 | dyRangeSelector() %>% 87 | dyHighlight(highlightCircleSize = 3, 88 | highlightSeriesBackgroundAlpha = 0.3, 89 | hideOnMouseOut = FALSE) 90 | 91 | 92 | 93 | 94 | library(shiny) 95 | 96 | ui <- bootstrapPage( 97 | numericInput("n", label = "Sample size", value = 10, min = 10, max = 100), 98 | textOutput("mean") 99 | ) 100 | 101 | server <- function(input, output) { 102 | output$mean <- renderText(mean(rnorm(input$n))) 103 | } 104 | 105 | app <- shinyApp(ui, server) 106 | runApp(app) 107 | 108 | 109 | 110 | 111 | shiny_vars <- ls(getNamespace("shiny")) 112 | shiny_vars[grep("Input$", shiny_vars)] 113 | 114 | 115 | shiny_vars[grep("Output$", shiny_vars)] 116 | 117 | 118 | library(shiny) 119 | ui <- fluidPage( 120 | titlePanel("Random walk"), 121 | sidebarLayout( 122 | sidebarPanel( 123 | numericInput("seed", "Random seed", 123), 124 | sliderInput("paths", "Paths", 1, 100, 1), 125 | sliderInput("start", "Starting value", 1, 10, 1, 1), 126 | sliderInput("r", "Expected return", -0.1, 0.1, 0, 0.001), 127 | sliderInput("sigma", "Sigma", 0.001, 1, 0.01, 0.001), 128 | sliderInput("periods", "Periods", 10, 1000, 200, 10)), 129 | mainPanel( 130 | plotOutput("plot", width = "100%", height = "600px") 131 | )) 132 | ) 133 | 134 | 135 | shiny_vars[grep("^render", shiny_vars)] 136 | 137 | 138 | server <- function(input, output) { 139 | output$plot <- renderPlot({ 140 | set.seed(input$seed) 141 | mat <- sapply(seq_len(input$paths), function(i) { 142 | sde::GBM(input$start, input$r, input$sigma, 1, input$periods) 143 | }) 144 | matplot(mat, type = "l", lty = 1, 145 | main = "Geometric Brownian motions") 146 | }) 147 | } 148 | 149 | 150 | app <- shinyApp(ui, server) 151 | runApp(app) 152 | 153 | 154 | 155 | 156 | 157 | 158 | install_packages(c("shinydashboard", "cranlogs")) 159 | 160 | 161 | library(cranlogs) 162 | cran_top_downloads() 163 | cran_top_downloads("last-week") 164 | 165 | 166 | library(shiny) 167 | library(shinydashboard) 168 | library(formattable) 169 | library(cranlogs) 170 | 171 | ui <- dashboardPage( 172 | dashboardHeader(title = "CRAN Downloads"), 173 | dashboardSidebar(sidebarMenu( 174 | menuItem("Last week", tabName = "last_week", icon = icon("list")), 175 | menuItem("Last month", tabName = "last_month", icon = icon("list")) 176 | )), 177 | dashboardBody(tabItems( 178 | tabItem(tabName = "last_week", 179 | fluidRow(tabBox(title = "Total downloads", 180 | tabPanel("Total", formattableOutput("last_week_table"))), 181 | tabBox(title = "Top downloads", 182 | tabPanel("Top", formattableOutput("last_week_top_table"))))), 183 | tabItem(tabName = "last_month", 184 | fluidRow(tabBox(title = "Total downloads", 185 | tabPanel("Total", plotOutput("last_month_barplot"))), 186 | tabBox(title = "Top downloads", 187 | tabPanel("Top", formattableOutput("last_month_top_table"))))) 188 | )) 189 | ) 190 | 191 | 192 | server <- function(input, output) { 193 | output$last_week_table <- renderFormattable({ 194 | data <- cran_downloads(when = "last-week") 195 | formattable(data, list(count = color_bar("lightblue"))) 196 | }) 197 | 198 | output$last_week_top_table <- renderFormattable({ 199 | data <- cran_top_downloads("last-week") 200 | formattable(data, list(count = color_bar("lightblue"), 201 | package = formatter("span", style = "font-family: monospace;"))) 202 | }) 203 | 204 | output$last_month_barplot <- renderPlot({ 205 | data <- subset(cran_downloads(when = "last-month"), count > 0) 206 | with(data, barplot(count, names.arg = date), 207 | main = "Last month downloads") 208 | }) 209 | 210 | output$last_month_top_table <- renderFormattable({ 211 | data <- cran_top_downloads("last-month") 212 | formattable(data, list(count = color_bar("lightblue"), 213 | package = formatter("span", style = "font-family: monospace;"))) 214 | }) 215 | } 216 | 217 | 218 | runApp(shinyApp(ui, server)) 219 | -------------------------------------------------------------------------------- /Chapter 2/basic-objects.R: -------------------------------------------------------------------------------- 1 | 1.5 2 | 3 | 4 | x <- 1.5 5 | 6 | 7 | x 8 | 9 | 10 | numeric(10) 11 | 12 | 13 | c(1, 2, 3, 4, 5) 14 | 15 | 16 | c(1, 2, c(3, 4, 5)) 17 | 18 | 19 | 1:5 20 | 21 | 22 | seq(1, 10, 2) 23 | 24 | 25 | seq(3, length.out = 10) 26 | 27 | 28 | 1 + 1:5 29 | 30 | 31 | TRUE 32 | 33 | 34 | 1 > 2 35 | 36 | 37 | c(1, 2) > 2 38 | 39 | 40 | c(1, 2) > c(2, 1) 41 | 42 | 43 | c(2, 3) > c(1, 2, -1, 3) 44 | 45 | 46 | 1 %in% c(1, 2, 3) 47 | c(1, 4) %in% c(1, 2, 3) 48 | 49 | 50 | "hello, world!" 51 | 52 | 53 | 'hello, world!' 54 | 55 | 56 | c("Hello", "World") 57 | 58 | 59 | c("Hello", "World") == c('Hello', 'World') 60 | 61 | 62 | c("Hello", "World") == "Hello, World" 63 | 64 | 65 | cat("Is \"You\" a Chinese name?") 66 | 67 | 68 | cat('Is "You" a Chinese name?') 69 | 70 | 71 | v1 <- c(1, 2, 3, 4) 72 | 73 | 74 | v1[2] 75 | 76 | 77 | v1[2:4] 78 | 79 | 80 | v1[-3] 81 | 82 | 83 | a <- c(1, 3) 84 | v1[a] 85 | 86 | 87 | v1[c(1, 2, -3)] 88 | 89 | 90 | v1[3:6] 91 | 92 | 93 | v1[c(TRUE, FALSE, TRUE, FALSE)] 94 | 95 | 96 | v1[2] <- 0 97 | 98 | 99 | v1 100 | 101 | 102 | v1[2:4] <- c(0, 1, 3) 103 | 104 | 105 | v1 106 | 107 | 108 | v1[c(TRUE, FALSE, TRUE, FALSE)] <- c(3, 2) 109 | 110 | 111 | v1 112 | 113 | 114 | v1[v1 <= 2] 115 | 116 | 117 | v1[v1 ^ 2 - v1 + 1 >= 0] 118 | 119 | 120 | v1[v1 <= 2] <- 0 121 | 122 | 123 | v1 124 | 125 | 126 | v1[10] <- 8 127 | v1 128 | 129 | 130 | x <- c(a = 1, b = 2, c = 3) 131 | x 132 | 133 | 134 | x["a"] 135 | 136 | 137 | x[c("a", "c")] 138 | 139 | 140 | x[c("a", "a", "c")] 141 | 142 | 143 | names(x) 144 | 145 | 146 | names(x) <- c("x", "y", "z") 147 | x["z"] 148 | 149 | 150 | names(x) <- NULL 151 | x 152 | 153 | 154 | x <- c(a = 1, b = 2, c = 3) 155 | x["d"] 156 | 157 | 158 | names(x["d"]) 159 | 160 | 161 | x[c("a", "d")] 162 | 163 | 164 | x <- c(a = 1, b = 2, c = 3) 165 | x["a"] 166 | x[["a"]] 167 | 168 | 169 | x[[c(1, 2)]] 170 | 171 | 172 | x[[-1]] 173 | 174 | 175 | x[["d"]] 176 | 177 | 178 | class(c(1, 2, 3)) 179 | class(c(TRUE, TRUE, FALSE)) 180 | class(c("Hello", "World")) 181 | 182 | 183 | is.numeric(c(1, 2, 3)) 184 | is.numeric(c(TRUE, TRUE, FALSE)) 185 | is.numeric(c("Hello", "World")) 186 | 187 | 188 | strings <- c("1", "2", "3") 189 | class(strings) 190 | 191 | 192 | strings + 10 193 | 194 | 195 | numbers <- as.numeric(strings) 196 | numbers 197 | class(numbers) 198 | 199 | 200 | numbers + 10 201 | 202 | 203 | as.numeric(c("1", "2", "3", "a")) 204 | as.logical(c(-1, 0, 1, 2)) 205 | as.character(c(1, 2, 3)) 206 | as.character(c(TRUE, FALSE)) 207 | 208 | 209 | as.character(c(1, 2)) + c(2, 3) 210 | 211 | 212 | c(1, 2, 3, 4) + 2 213 | c(1, 2, 3) - c(2, 3, 4) 214 | c(1, 2, 3) * c(2, 3, 4) 215 | c(1, 2, 3) / c(2, 3, 4) 216 | c(1, 2, 3) ^ 2 217 | c(1, 2, 3) ^ c(2, 3, 4) 218 | c(1, 2, 3, 14) %% 2 219 | 220 | 221 | c(a = 1, b = 2, c = 3) + c(b = 2, c = 3, d = 4) 222 | c(a = 1, b = 2, 3) + c(b = 2, c = 3, d = 4) 223 | 224 | 225 | matrix(c(1, 2, 3, 2, 3, 4, 3, 4, 5), ncol = 3) 226 | 227 | 228 | matrix(c(1, 2, 3, 229 | 4, 5, 6, 230 | 7, 8, 9), nrow = 3, byrow = FALSE) 231 | 232 | matrix(c(1, 2, 3, 233 | 4, 5, 6, 234 | 7, 8, 9), nrow = 3, byrow = TRUE) 235 | 236 | 237 | diag(1, nrow = 5) 238 | 239 | 240 | matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), nrow = 3, byrow = TRUE, 241 | dimnames = list(c("r1", "r2", "r3"), c("c1", "c2", "c3"))) 242 | 243 | 244 | m1 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3) 245 | rownames(m1) <- c("r1", "r2", "r3") 246 | colnames(m1) <- c("c1", "c2", "c3") 247 | 248 | 249 | m1 250 | 251 | 252 | m1[1, 2] 253 | 254 | 255 | m1[1:2, 2:3] 256 | 257 | 258 | m1[1,] 259 | m1[,2] 260 | m1[1:2,] 261 | m1[, 2:3] 262 | 263 | 264 | m1[-1,] 265 | m1[,-2] 266 | 267 | 268 | m1[c("r1", "r3"), c("c1", "c3")] 269 | 270 | 271 | m1[1] 272 | m1[9] 273 | m1[3:7] 274 | 275 | 276 | m1 > 3 277 | 278 | 279 | m1[m1 > 3] 280 | 281 | 282 | m1 + m1 283 | m1 - 2*m1 284 | m1 * m1 285 | m1 / m1 286 | m1 ^ 2 287 | m1 %*% m1 288 | 289 | 290 | t(m1) 291 | 292 | 293 | a1 <- array(c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), dim = c(1, 5, 2)) 294 | a1 295 | 296 | 297 | a1 <- array(c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), dim = c(1, 5, 2), 298 | dimnames = list(c("r1"), c("c1", "c2", "c3", "c4", "c5"), c("k1", "k2"))) 299 | a1 300 | 301 | 302 | a0 <- array(c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10), dim = c(1, 5, 2)) 303 | dimnames(a0) <- list(c("r1"), c("c1", "c2", "c3", "c4", "c5"), c("k1", "k2")) 304 | a0 305 | 306 | 307 | a1[1,,] 308 | a1[, 2,] 309 | a1[,,1] 310 | a1[1, 1, 1] 311 | a1[1, 2:4, 1:2] 312 | a1[c("r1"), c("c1", "c3"), "k1"] 313 | 314 | 315 | l0 <- list(1, c(TRUE, FALSE), c("a", "b", "c")) 316 | l0 317 | 318 | 319 | l1 <- list(x = 1, y = c(TRUE, FALSE), z = c("a", "b", "c")) 320 | l1 321 | 322 | 323 | l1$x 324 | l1$y 325 | l1$z 326 | l1$m 327 | 328 | 329 | l1[[2]] 330 | 331 | 332 | l1[["y"]] 333 | 334 | 335 | member <- "z" # you can dynamically determine which member to extract 336 | l1[[member]] 337 | 338 | 339 | l1["x"] 340 | l1[c("x", "y")] 341 | l1[1] 342 | l1[c(1, 2)] 343 | l1[c(TRUE, FALSE, TRUE)] 344 | 345 | 346 | names(l1) <- c("A","B","C") 347 | l1 348 | 349 | 350 | names(l1) <- NULL 351 | l1 352 | 353 | 354 | l1 <- list(x = 1, y = c(TRUE, FALSE), z = c("a", "b", "c")) 355 | l1$x <- 0 356 | 357 | 358 | l1$m <- 4 359 | l1 360 | 361 | 362 | l1[c("y", "z")] <- list(y = "new value for y", z = c(1, 2)) 363 | l1 364 | 365 | 366 | l1$x <- NULL 367 | l1 368 | 369 | 370 | l1[c("z", "m")] <- NULL 371 | l1 372 | 373 | 374 | l2 <- list(a = c(1, 2, 3), b = c("x", "y", "z", "w")) 375 | is.list(l2) 376 | is.list(l2$a) 377 | 378 | 379 | l3 <- as.list(c(a = 1, b = 2, c = 3)) 380 | l3 381 | 382 | 383 | l4 <- list(a = 1, b = 2, c = 3) 384 | unlist(l4) 385 | 386 | 387 | l4 <- list(a = 1, b = 2, c = "hello") 388 | unlist(l4) 389 | 390 | 391 | persons <- data.frame(Name = c("Ken", "Ashley", "Jennifer"), 392 | Gender = c("Male", "Female", "Female"), 393 | Age = c(24, 25, 23), 394 | Major = c("Finance", "Statistics", "Computer Science")) 395 | persons 396 | 397 | 398 | l1 <- list(x = c(1, 2, 3), y = c("a", "b", "c")) 399 | data.frame(l1) 400 | as.data.frame(l1) 401 | 402 | 403 | m1 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), nrow = 3, byrow = FALSE) 404 | data.frame(m1) 405 | as.data.frame(m1) 406 | 407 | 408 | df1 <- data.frame(id = 1:5, x = c(0, 2, 1, -1, -3), y = c(0.5, 0.2, 0.1, 0.5, 0.9)) 409 | df1 410 | 411 | 412 | colnames(df1) <- c("id", "level", "score") 413 | rownames(df1) <- letters[1:5] 414 | df1 415 | 416 | 417 | df1$id 418 | df1[[1]] 419 | 420 | 421 | df1[1] 422 | df1[1:2] 423 | df1["level"] 424 | df1[c("id", "score")] 425 | df1[c(TRUE, FALSE, TRUE)] 426 | 427 | 428 | df1[, "level"] 429 | df1[, c("id", "level")] 430 | df1[, 1:2] 431 | 432 | 433 | df1[1:4,] 434 | df1[c("c", "e"),] 435 | 436 | 437 | df1[1:4, "id"] 438 | df1[1:3, c("id", "score")] 439 | 440 | 441 | df1[1:4,]["id"] 442 | 443 | 444 | df1[1:4, "id", drop = FALSE] 445 | 446 | 447 | df1$score >= 0.5 448 | df1[df1$score >= 0.5, c("id", "level")] 449 | 450 | 451 | rownames(df1) %in% c("a", "d", "e") 452 | df1[rownames(df1) %in% c("a", "d", "e"), c("id", "score")] 453 | 454 | 455 | df1$score <- c(0.6, 0.3, 0.2, 0.4, 0.8) 456 | df1 457 | 458 | 459 | df1["score"] <- c(0.8, 0.5, 0.2, 0.4, 0.8) 460 | df1 461 | df1[["score"]] <- c(0.4, 0.5, 0.2, 0.8, 0.4) 462 | df1 463 | df1[c("level", "score")] <- list(level = c(1, 2, 1, 0, 0), score = c(0.1, 0.2, 0.3, 0.4, 0.5)) 464 | df1 465 | 466 | 467 | df1[1:3, "level"] <- c(-1, 0, 1) 468 | df1 469 | df1[1:2, c("level", "score")] <- list(level = c(0, 0), score = c(0.9, 1.0)) 470 | df1 471 | 472 | 473 | str(persons) 474 | 475 | 476 | persons[1, "Name"] <- "John" 477 | persons 478 | 479 | 480 | persons <- data.frame(Name = c("Ken", "Ashley", "Jennifer"), 481 | Gender = factor(c("Male", "Female", "Female")), 482 | Age = c(24, 25, 23), 483 | Major = c("Finance", "Statistics", "Computer Science"), 484 | stringsAsFactors = FALSE) 485 | str(persons) 486 | 487 | 488 | summary(persons) 489 | 490 | 491 | rbind(persons, data.frame(Name = "John", Gender = "Male", Age = 25, Major = "Statistics")) 492 | 493 | 494 | cbind(persons, Registered = c(TRUE, TRUE, FALSE), Projects = c(3, 2, 3)) 495 | 496 | 497 | expand.grid(type = c("A", "B"), class = c("M", "L", "XL")) 498 | 499 | 500 | read.csv("data/persons.csv") 501 | 502 | 503 | write.csv(persons, "data/persons.csv", row.names = FALSE, quote = FALSE) 504 | 505 | 506 | add <- function(x, y) { 507 | x + y 508 | } 509 | 510 | 511 | add(2, 3) 512 | 513 | 514 | add(c(2, 3), 4) 515 | 516 | 517 | add(as.Date("2014-06-01"), 1) 518 | 519 | 520 | add(list(a = 1), list(a = 2)) 521 | 522 | 523 | calc <- function(x, y, type) { 524 | if (type == "add") { 525 | x + y 526 | } else if (type == "minus") { 527 | x - y 528 | } else if (type == "multiply") { 529 | x * y 530 | } else if (type == "divide") { 531 | x / y 532 | } else { 533 | stop("Unknown type of operation") 534 | } 535 | } 536 | 537 | 538 | calc(2, 3, "minus") 539 | 540 | 541 | calc(c(2, 5), c(3, 6), "divide") 542 | 543 | 544 | calc(as.Date("2014-06-01"), 3, "add") 545 | 546 | 547 | calc(1, 2, "what") 548 | 549 | 550 | calc(1, 2, c("add", "minue")) 551 | 552 | 553 | calc <- function(x, y, type) { 554 | if (length(type) > 1L) stop("Only a single type is accepted") 555 | if (type == "add") { 556 | x + y 557 | } else if (type == "minus") { 558 | x - y 559 | } else if (type == "multiply") { 560 | x*y 561 | } else if (type == "divide") { 562 | x / y 563 | } else { 564 | stop("Unknown type of operation") 565 | } 566 | } 567 | 568 | 569 | calc(1, 2, c("add", "minue")) 570 | 571 | 572 | increase <- function(x, y = 1) { 573 | x + y 574 | } 575 | 576 | 577 | increase(1) 578 | increase(c(1, 2, 3)) 579 | 580 | 581 | -------------------------------------------------------------------------------- /Chapter 3/managing-your-workspace.R: -------------------------------------------------------------------------------- 1 | 2 | getwd() 3 | 4 | 5 | "Hello\nWorld" 6 | 7 | 8 | cat("Hello\nWorld") 9 | 10 | 11 | cat("The string with '\\' is translated") 12 | 13 | 14 | filename <- "d:\data\test.csv" 15 | ## Error: '\d' is an unrecognized escape in character string starting ""d:\d" 16 | 17 | 18 | filename <- "d:\\data\\test.csv" 19 | 20 | 21 | absolute_filename <- "d:/data/test.csv" 22 | relative_filename <- "data/test.csv" 23 | 24 | 25 | objects() 26 | 27 | 28 | e <- new.env() 29 | objects(pos = e) 30 | 31 | 32 | x <- c(1, 2, 3) 33 | y <- c("a", "b", "c") 34 | z <- list(m = 1:5, n = c("x", "y", "z")) 35 | 36 | 37 | objects() 38 | 39 | 40 | with(e, { 41 | x <- c(1, 2, 3) 42 | y <- c("a", "b", "c") 43 | z <- list(m = 1:5, n = c("x", "y", "z")) 44 | objects() 45 | }) 46 | 47 | 48 | ls() 49 | 50 | 51 | with(e, ls()) 52 | 53 | 54 | x 55 | str(x) 56 | 57 | 58 | str(1:30) 59 | 60 | 61 | z 62 | 63 | 64 | str(z) 65 | 66 | 67 | nested_list <- list(m = 1:15, n = list("a", c(1, 2, 3)), 68 | p = list(x = 1:10, y = c("a", "b")), 69 | q = list(x = 0:9, y = c("c", "d"))) 70 | 71 | 72 | with(e, nested_list <- nested_list) 73 | 74 | 75 | nested_list 76 | 77 | 78 | str(nested_list) 79 | 80 | 81 | ls.str() 82 | 83 | 84 | ls.str(pos = e) 85 | 86 | 87 | ls.str(mode = "list") 88 | 89 | 90 | ls.str(e, mode = "list") 91 | 92 | 93 | ls.str(pattern = "^\\w$") 94 | 95 | 96 | ls.str(e, pattern = "^\\w$") 97 | 98 | 99 | ls.str(pattern = "^\\w$", mode = "list") 100 | 101 | 102 | ls.str(e, pattern = "^\\w$", mode = "list") 103 | 104 | 105 | ls() 106 | 107 | 108 | with(e, ls()) 109 | 110 | 111 | rm(x) 112 | ls() 113 | 114 | 115 | with(e, { 116 | rm(x) 117 | ls() 118 | }) 119 | 120 | 121 | rm(y, z) 122 | ls() 123 | 124 | 125 | with(e, { 126 | rm(y, z) 127 | ls() 128 | }) 129 | 130 | 131 | rm(x) 132 | 133 | 134 | with(e, rm(x)) 135 | 136 | 137 | p <- 1:10 138 | q <- seq(1, 20, 5) 139 | v <- c("p", "q") 140 | rm(list = v) 141 | 142 | 143 | with(e, { 144 | p <- 1:10 145 | q <- seq(1, 20, 5) 146 | v <- c("p", "q") 147 | rm(list = v) 148 | ls() 149 | }) 150 | 151 | 152 | rm(list = ls()) 153 | ls() 154 | 155 | 156 | with(e, { 157 | rm(list = ls()) 158 | ls() 159 | }) 160 | 161 | 162 | 163 | 164 | 123.12345678 165 | 166 | 167 | 0.10000002 168 | 0.10000002 - 0.1 169 | 170 | 171 | 1234567.12345678 172 | 173 | 174 | getOption("digits") 175 | 1e10 + 0.5 176 | options(digits = 15) 177 | 1e10 + 0.5 178 | 179 | 180 | options(digits = 7) 181 | 1e10 + 0.5 182 | 183 | 184 | getOption("warn") 185 | 186 | 187 | as.numeric("hello") 188 | 189 | 190 | options(warn = -1) 191 | as.numeric("hello") 192 | 193 | 194 | f <- function(x, y) { 195 | as.numeric(x) + as.numeric(y) 196 | } 197 | 198 | 199 | options(warn = 0) 200 | f("hello", "world") 201 | 202 | 203 | options(warn = 1) 204 | f("hello", "world") 205 | 206 | 207 | options(warn = 2) 208 | f("hello", "world") 209 | 210 | 211 | options(warn = 0) 212 | 213 | 214 | 215 | 216 | install.packages("ggplot2") 217 | 218 | 219 | 220 | 221 | install.packages(c("ggplot2", "shiny", "knitr", "dplyr", "data.table")) 222 | 223 | 224 | update.packages() 225 | 226 | 227 | install.packages("devtools") 228 | 229 | 230 | devtools::install_github("hadley/ggplot2") 231 | 232 | 233 | install.packages("ggplot2") 234 | 235 | 236 | library(moments) 237 | skewness(x) 238 | 239 | 240 | moments::skewness(x) 241 | 242 | 243 | sessionInfo() 244 | 245 | 246 | moments::skewness(c(1, 2, 3, 2, 1)) 247 | sessionInfo() 248 | 249 | 250 | library(moments) 251 | sessionInfo() 252 | skewness(c(1, 2, 3, 2, 1)) 253 | 254 | 255 | search() 256 | 257 | 258 | unloadNamespace("moments") 259 | 260 | 261 | loaded <- require(moments) 262 | loaded 263 | 264 | 265 | if (!require(moments)) { 266 | install.packages("moments") 267 | library(moments) 268 | } 269 | 270 | 271 | require(moments) 272 | 273 | 274 | require(testPkg) 275 | 276 | 277 | library(testPkg) 278 | 279 | 280 | library(dplyr) 281 | 282 | 283 | fun1 <- package1::some_function 284 | fun2 <- pacakge2::some_function 285 | 286 | 287 | unloadNamespace("moments") 288 | 289 | 290 | skewness(c(1, 2, 3, 2, 1)) 291 | 292 | 293 | moments::skewness(c(1, 2, 3, 2, 1)) 294 | 295 | 296 | pkgs <- installed.packages() 297 | colnames(pkgs) 298 | 299 | 300 | c("moments", "testPkg") %in% installed.packages()[, "Package"] 301 | 302 | 303 | installed.packages()["moments", "Version"] 304 | 305 | 306 | packageVersion("moments") 307 | 308 | 309 | packageVersion("moments") >= package_version("0.14") 310 | 311 | 312 | packageVersion("moments") >= "0.14" 313 | 314 | 315 | -------------------------------------------------------------------------------- /Chapter 4/basic-expressions.R: -------------------------------------------------------------------------------- 1 | x <- 1 2 | y <- c(1, 2, 3) 3 | z <- list(x, y) 4 | 5 | 6 | 2 -> x1 7 | 8 | 9 | x3 <- x2 <- x1 <- 0 10 | 11 | 12 | x3 <- x2 <- x1 <- rnorm(1) 13 | c(x1, x2, x3) 14 | 15 | 16 | x2 = c(1, 2, 3) 17 | 18 | 19 | f <- function(input, data = NULL) { 20 | cat("input:\n") 21 | print(input) 22 | cat("data:\n") 23 | print(data) 24 | } 25 | 26 | 27 | x <- c(1, 2, 3) 28 | y <- c("some", "text") 29 | f(input = x) 30 | 31 | 32 | x = c(1, 2, 3) 33 | y = c("some", "text") 34 | f(input = x) 35 | 36 | 37 | x <- c(1, 2, 3) 38 | y <- c("some", "text") 39 | f(input <- x) 40 | 41 | 42 | input 43 | 44 | 45 | f(input = x, data = y) 46 | 47 | 48 | f(input <- x, data <- y) 49 | 50 | 51 | f(data = y, input = x) 52 | 53 | 54 | f(data <- y, input <- x) 55 | 56 | 57 | data <- y 58 | input <- x 59 | f(y, x) 60 | 61 | 62 | students <- data.frame() 63 | us_population <- data.frame() 64 | sales.2015 <- data.frame() 65 | 66 | 67 | some data <- data.frame() 68 | ## Error: unexpected symbol in "some data" 69 | 70 | _data <- data.frame() 71 | ## Error: unexpected input in "_" 72 | 73 | Population(Millions) <- data.frame() 74 | ## Error in Population(Millions) <- data.frame() : 75 | ## object 'Millions' not found 76 | 77 | 78 | `some data` <- c(1, 2, 3) 79 | `_data` <- c(4, 5, 6) 80 | `Population(Millions)` <- c(city1 = 50, city2 = 60) 81 | 82 | 83 | `some data` 84 | `_data` 85 | `Population(Millions)` 86 | 87 | 88 | `Tom's secret function` <- function(a, d) { 89 | (a ^ 2 - d ^ 2) / (a ^ 2 + d ^ 2) 90 | } 91 | 92 | 93 | l1 <- list(`Group(A)` = rnorm(10), `Group(B)` = rnorm(10)) 94 | 95 | 96 | `Tom's secret function`(1,2) 97 | l1$`Group(A)` 98 | 99 | 100 | results <- data.frame(`Group(A)` = rnorm(10), `Group(B)` = rnorm(10)) 101 | results 102 | 103 | 104 | colnames(results) 105 | 106 | 107 | make.names(c("Population(before)", "Population(after)")) 108 | 109 | 110 | results <- data.frame( 111 | ID = c(0, 1, 2, 3, 4), 112 | Category = c("A", "A", "A", "B", "C"), 113 | `Population(before)` = c(10, 12, 13, 11, 13), 114 | `Population(after)` = c(12, 13, 16, 12, 12), 115 | stringsAsFactors = FALSE, 116 | check.names = FALSE) 117 | results 118 | colnames(results) 119 | 120 | 121 | results$`Population(before)` 122 | 123 | 124 | check_positive <- function(x) { 125 | if (x > 0) { 126 | return(1) 127 | } 128 | } 129 | 130 | 131 | check_positive(1) 132 | check_positive(0) 133 | 134 | 135 | check_sign <- function(x) { 136 | if (x > 0) { 137 | return(1) 138 | } else if (x < 0) { 139 | return(-1) 140 | } else { 141 | return(0) 142 | } 143 | } 144 | 145 | 146 | check_sign(15) 147 | check_sign(-3.5) 148 | check_sign(0) 149 | 150 | 151 | say_sign <- function(x) { 152 | if (x > 0) { 153 | cat("The number is greater than 0") 154 | } else if (x < 0) { 155 | cat("The number is less than 0") 156 | } else { 157 | cat("The number is 0") 158 | } 159 | } 160 | 161 | 162 | say_sign(0) 163 | say_sign(3) 164 | say_sign(-9) 165 | 166 | 167 | if (cond1) { 168 | # do something 169 | } 170 | 171 | 172 | if (cond1) { 173 | # do something 174 | } else { 175 | # do something else 176 | } 177 | 178 | 179 | if (cond1) { 180 | expr1 181 | } else if (cond2) { 182 | expr2 183 | } else if (cond3) { 184 | expr3 185 | } else { 186 | expr4 187 | } 188 | 189 | 190 | grade <- function(score) { 191 | if (score >= 90) { 192 | return("A") 193 | } else if (score >= 80) { 194 | return("B") 195 | } else if (score >= 70) { 196 | return("C") 197 | } else if (score >= 60) { 198 | return("D") 199 | } else { 200 | return("F") 201 | } 202 | } 203 | 204 | c(grade(65), grade(59), grade(87), grade(96)) 205 | 206 | 207 | grade2 <- function(score) { 208 | if (score >= 60) { 209 | return("D") 210 | } else if (score >= 70) { 211 | return("C") 212 | } else if (score >= 80) { 213 | return("B") 214 | } else if (score >= 90) { 215 | return("A") 216 | } else { 217 | return("F") 218 | } 219 | } 220 | 221 | c(grade2(65), grade2(59), grade2(87), grade2(96)) 222 | 223 | 224 | grade2 <- function(score) { 225 | if (score >= 60 && score < 70) { 226 | return("D") 227 | } else if (score >= 70 && score < 80) { 228 | return("C") 229 | } else if (score >= 80 && score < 90) { 230 | return("B") 231 | } else if (score >= 90) { 232 | return("A") 233 | } else { 234 | return("F") 235 | } 236 | } 237 | 238 | c(grade2(65), grade2(59), grade2(87), grade2(96)) 239 | 240 | 241 | check_positive <- function(x) { 242 | return(if (x > 0) { 243 | 1 244 | }) 245 | } 246 | 247 | 248 | check_positive <- function(x) { 249 | return(if (x > 0) 1) 250 | } 251 | 252 | 253 | check_positive <- function(x) { 254 | if (x > 0) 1 255 | } 256 | 257 | 258 | check_sign <- function(x) { 259 | if (x > 0) 1 else if (x < 0) -1 else 0 260 | } 261 | 262 | 263 | say_grade <- function(name, score) { 264 | grade <- if (score >= 90) "A" 265 | else if (score >= 80) "B" 266 | else if (score >= 70) "C" 267 | else if (score >= 60) "D" 268 | else "F" 269 | cat("The grade of", name, "is", grade) 270 | } 271 | 272 | say_grade("Betty", 86) 273 | 274 | 275 | say_grade <- function(name, score) { 276 | if (score >= 90) grade <- "A" 277 | cat("Congratulations!\n") 278 | else if (score >= 80) grade <- "B" 279 | else if (score >= 70) grade <- "C" 280 | else if (score >= 60) grade <- "D" 281 | else grade <- "F" 282 | cat("What a pity!\n") 283 | cat("The grade of", name, "is", grade) 284 | } 285 | 286 | # code with errors 287 | say_grade <- function(name, score) { 288 | if (score >= 90) grade <- "A" 289 | cat("Congratulations!\n") 290 | else if (score >= 80) grade <- "B" 291 | else if (score >= 70) grade <- "C" 292 | else if (score >= 60) grade <- "D" 293 | else grade <- "F" 294 | cat("What a pity!\n") 295 | cat("The grade of", name, "is", grade) 296 | } 297 | 298 | 299 | say_grade <- function(name, score) { 300 | if (score >= 90) { 301 | grade <- "A" 302 | cat("Congratulations!\n") 303 | } else if (score >= 80) { 304 | grade <- "B" 305 | } 306 | else if (score >= 70) { 307 | grade <- "C" 308 | } 309 | else if (score >= 60) { 310 | grade <- "D" 311 | } else { 312 | grade <- "F" 313 | cat("What a pity!\n") 314 | } 315 | 316 | cat("The grade of", name, "is", grade) 317 | } 318 | 319 | say_grade("James", 93) 320 | 321 | 322 | check_positive(c(1, -1, 0)) 323 | 324 | 325 | num <- c(1, 2, 3) 326 | if (num > 2) { 327 | cat("num > 2!") 328 | } 329 | 330 | 331 | any(c(TRUE, FALSE, FALSE)) 332 | any(c(FALSE, FALSE)) 333 | 334 | 335 | if (any(num > 2)) { 336 | cat("num > 2!") 337 | } 338 | 339 | 340 | if (all(num > 2)) { 341 | cat("num > 2!") 342 | } else { 343 | cat("Not all values are greater than 2!") 344 | } 345 | 346 | 347 | check <- function(x) { 348 | if (all(x > 0)) { 349 | cat("All input values are positive!") 350 | } else { 351 | cat("Some values are not positive!") 352 | } 353 | } 354 | 355 | 356 | check(c(1, 2, 3)) 357 | check(c(1, 2, NA, -1)) 358 | check(c(1, 2, NA)) 359 | 360 | 361 | ifelse(c(TRUE, FALSE, FALSE), c(1, 2, 3), c(4, 5, 6)) 362 | 363 | 364 | check_positive2 <- function(x) { 365 | ifelse(x, 1, 0) 366 | } 367 | 368 | 369 | ifelse(TRUE, c(1,2), c(2,3)) 370 | 371 | 372 | if (TRUE) c(1,2) else c(2,3) 373 | 374 | 375 | ifelse(c(TRUE, FALSE), c(1, 2), c("a", "b")) 376 | 377 | 378 | switch(1, "x", "y") 379 | switch(2, "x", "y") 380 | 381 | 382 | switch(3, "x", "y") 383 | 384 | 385 | switch("a", a = 1, b = 2) 386 | switch("b", a = 1, b = 2) 387 | 388 | 389 | switch("c", a = 1, b = 2) 390 | 391 | 392 | switch("c", a = 1, b = 2, 3) 393 | 394 | 395 | switch_test <- function(x) { 396 | switch(x, 397 | a = c(1, 2, 3), 398 | b = list(x = 0, y = 1), 399 | c = { 400 | cat("You choose c!\n") 401 | list(name = "c", value = "something") 402 | }) 403 | } 404 | 405 | switch_test("a") 406 | switch_test("b") 407 | switch_test("c") 408 | 409 | 410 | for (var in vector) { 411 | expr 412 | } 413 | 414 | 415 | var <- vector[[1]] 416 | expr 417 | var <- vector[[2]] 418 | expr 419 | var <- vector[[n]] 420 | expr 421 | 422 | 423 | for (i in 1:3) { 424 | cat("The value of i is", i, "\n") 425 | } 426 | 427 | 428 | for (word in c("hello","new", "world")) { 429 | cat("The current word is", word, "\n") 430 | } 431 | 432 | 433 | loop_list <- list( 434 | a = c(1, 2, 3), 435 | b = c("a", "b", "c", "d")) 436 | 437 | for (item in loop_list) { 438 | cat("item:\n length:", length(item), 439 | "\n class: ", class(item), "\n") 440 | } 441 | 442 | 443 | df <- data.frame( 444 | x = c(1, 2, 3), 445 | y = c("A", "B", "C"), 446 | stringsAsFactors = FALSE) 447 | 448 | for (col in df) { 449 | str(col) 450 | } 451 | 452 | 453 | for (i in 1:nrow(df)) { 454 | row <- df[i,] 455 | cat("row", i, "\n") 456 | str(row) 457 | cat("\n") 458 | } 459 | 460 | 461 | s <- 0 462 | for (i in 1:100) { 463 | s <- s + i 464 | } 465 | s 466 | 467 | 468 | set.seed(123) 469 | x <- numeric(1000) 470 | for (t in 1:(length(x) - 1)) { 471 | x[[t + 1]] <- x[[t]] + rnorm(1, 0, 0.1) 472 | } 473 | plot(x, type = "s", main = "Random walk", xlab = "t") 474 | 475 | 476 | sum100 <- sum(1:100) 477 | random_walk <- cumsum(rnorm(1000, 0, 0.1)) 478 | 479 | 480 | for (i in 1:5) { 481 | if (i == 3) break 482 | cat("message ", i, "\n") 483 | } 484 | 485 | 486 | m <- integer() 487 | for (i in 1000:1100) { 488 | if ((i ^ 2) %% 11 == (i ^ 3) %% 17) { 489 | m <- c(m, i) 490 | } 491 | } 492 | m 493 | 494 | 495 | for (i in 1000:1100) { 496 | if ((i ^ 2) %% 11 == (i ^ 3) %% 17) break 497 | } 498 | i 499 | 500 | 501 | for (i in 1:5) { 502 | if (i == 3) next 503 | cat("message ", i, "\n") 504 | } 505 | 506 | 507 | x <- c("a", "b", "c") 508 | combx <- character() 509 | for (c1 in x) { 510 | for (c2 in x) { 511 | combx <- c(combx, paste(c1, c2, sep = ",", collapse = "")) 512 | } 513 | } 514 | combx 515 | 516 | 517 | combx2 <- character() 518 | for (c1 in x) { 519 | for (c2 in x) { 520 | if (c1 == c2) next 521 | combx2 <- c(combx2, paste(c1, c2, sep = ",", collapse = "")) 522 | } 523 | } 524 | combx2 525 | 526 | 527 | if (c1 != c2) { 528 | combx2 <- c(combx2, paste(c1, c2, sep = ",", collapse = "")) 529 | } 530 | 531 | 532 | combn(c("a", "b", "c"), 2) 533 | 534 | 535 | expand.grid(n = c(1, 2, 3), x = c("a", "b")) 536 | 537 | 538 | x <- 0 539 | while (x <= 5) { 540 | cat(x, " ", sep = "") 541 | x <- x + 1 542 | } 543 | 544 | 545 | x <- 0 546 | while (TRUE) { 547 | x <- x + 1 548 | if (x == 4) break 549 | else if (x == 2) next 550 | else cat(x, '\n') 551 | } 552 | 553 | 554 | res <- dbSendQuery(con, "SELECT * FROM table1 WHERE type = 1") 555 | while (!dbHasCompleted(res)) { 556 | chunk <- dbFetch(res, 10000) 557 | process(chunk) 558 | } 559 | 560 | 561 | x <- 0 562 | repeat { 563 | x <- x + 1 564 | if (x == 4) break 565 | else if (x == 2) next 566 | else cat(x, '\n') 567 | } 568 | 569 | -------------------------------------------------------------------------------- /Chapter 5/working-with-basic-objects.R: -------------------------------------------------------------------------------- 1 | take_it <- function(x) { 2 | if (is.atomic(x)) { 3 | x[[1]] 4 | } else if (is.list(x)) { 5 | x$data[[x$index]] 6 | } else { 7 | stop("Not supported input type") 8 | } 9 | } 10 | 11 | 12 | take_it(c(1, 2, 3)) 13 | take_it(list(data = c("a", "b", "c"), index = 3)) 14 | 15 | 16 | take_it(mean) 17 | 18 | 19 | take_it(list(input = c("a", "b", "c"))) 20 | 21 | 22 | NULL[[1]] 23 | NULL[[NULL]] 24 | 25 | 26 | take_it(list(data = c("a", "b", "c"))) 27 | 28 | 29 | c("a", "b", "c")[[NULL]] 30 | 31 | 32 | take_it(list(index = 2)) 33 | 34 | 35 | take_it2 <- function(x) { 36 | if (is.atomic(x)) { 37 | x[[1]] 38 | } else if (is.list(x)) { 39 | if (!is.null(x$data) && is.atomic(x$data)) { 40 | if (is.numeric(x$index) && length(x) == 1) { 41 | x$data[[x$index]] 42 | } else { 43 | stop("Invalid index") 44 | } 45 | } else { 46 | stop("Invalid data") 47 | } 48 | } else { 49 | stop("Not supported input type") 50 | } 51 | } 52 | 53 | 54 | take_it2(list(data = c("a", "b", "c"))) 55 | take_it2(list(index = 2)) 56 | 57 | 58 | x <- c(1, 2, 3) 59 | class(x) 60 | typeof(x) 61 | str(x) 62 | 63 | 64 | x <- 1:3 65 | class(x) 66 | typeof(x) 67 | str(x) 68 | 69 | 70 | x <- c("a", "b", "c") 71 | class(x) 72 | typeof(x) 73 | str(x) 74 | 75 | 76 | x <- list(a = c(1, 2), b = c(TRUE, FALSE)) 77 | class(x) 78 | typeof(x) 79 | str(x) 80 | 81 | 82 | x <- data.frame(a = c(1, 2), b = c(TRUE, FALSE)) 83 | class(x) 84 | typeof(x) 85 | str(x) 86 | 87 | 88 | vec <- c(1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6) 89 | class(vec) 90 | typeof(vec) 91 | 92 | 93 | sample_matrix <- matrix(vec, ncol = 4) 94 | sample_matrix 95 | class(sample_matrix) 96 | typeof(sample_matrix) 97 | dim(sample_matrix) 98 | nrow(sample_matrix) 99 | ncol(sample_matrix) 100 | 101 | 102 | sample_array <- array(vec, dim = c(2, 3, 2)) 103 | sample_array 104 | class(sample_array) 105 | typeof(sample_array) 106 | dim(sample_array) 107 | nrow(sample_array) 108 | ncol(sample_array) 109 | 110 | 111 | sample_data_frame <- data.frame(a = c(1, 2, 3), b = c(2, 3, 4)) 112 | class(sample_data_frame) 113 | typeof(sample_data_frame) 114 | dim(sample_data_frame) 115 | nrow(sample_data_frame) 116 | ncol(sample_data_frame) 117 | 118 | 119 | sample_data <- vec 120 | dim(sample_data) <- c(3, 4) 121 | sample_data 122 | class(sample_data) 123 | typeof(sample_data) 124 | 125 | 126 | dim(sample_data) <- c(4, 3) 127 | sample_data 128 | 129 | 130 | dim(sample_data) <- c(3, 2, 2) 131 | sample_data 132 | class(sample_data) 133 | 134 | 135 | dim(sample_data) <- c(2, 3, 4) 136 | 137 | 138 | sample_data_frame 139 | 140 | 141 | for (i in 1:nrow(sample_data_frame)) { 142 | # sample text: 143 | # row #1, a: 1, b: 2 144 | cat("row #", i, ", ", 145 | "a: ", sample_data_frame[i, "a"], 146 | ", b: ", sample_data_frame[i, "b"], 147 | "\n", sep = "") 148 | } 149 | 150 | 151 | test_direction <- function(x, y, z) { 152 | if (x < y & y < z) 1 153 | else if (x > y & y > z) -1 154 | else 0 155 | } 156 | 157 | 158 | test_direction(1, 2, 3) 159 | 160 | 161 | test_direction(c(1, 2), c(2, 3), c(3, 4)) 162 | 163 | 164 | test_direction2 <- function(x, y, z) { 165 | if (x < y && y < z) 1 166 | else if (x > y && y > z) -1 167 | else 0 168 | } 169 | 170 | 171 | test_direction2(1, 2, 3) 172 | 173 | 174 | test_direction2(c(1, 2), c(2, 3), c(3, 4)) 175 | 176 | 177 | x <- c(-2, -3, 2, 3, 1, 0, 0, 1, 2) 178 | any(x > 1) 179 | all(x <= 1) 180 | 181 | 182 | test_all_direction <- function(x, y, z) { 183 | if (all(x < y & y < z)) 1 184 | else if (all(x > y & y > z)) -1 185 | else 0 186 | } 187 | 188 | 189 | test_all_direction(1, 2, 3) 190 | 191 | 192 | test_all_direction(c(1, 2), c(2, 3), c(3, 4)) 193 | 194 | 195 | test_all_direction(c(1, 2), c(2, 4), c(3, 4)) 196 | 197 | 198 | test_any_direction <- function(x, y, z) { 199 | if (any(x < y & y < z)) 1 200 | else if (any(x > y & y > z)) -1 201 | else 0 202 | } 203 | test_all_direction2 <- function(x, y, z) { 204 | if (all(x < y) && all(y < z)) 1 205 | else if (all(x > y) && all(y > z)) -1 206 | else 0 207 | } 208 | test_any_direction2 <- function(x, y, z) { 209 | if (any(x < y) && any(y < z)) 1 210 | else if (any(x > y) && any(y > z)) -1 211 | else 0 212 | } 213 | 214 | 215 | x 216 | abs(x) >= 1.5 217 | which(abs(x) >= 1.5) 218 | 219 | 220 | x[x >= 1.5] 221 | 222 | 223 | x[x >= 100] 224 | 225 | 226 | x <- c(-2, -3, NA, 2, 3, 1, NA, 0, 1, NA, 2) 227 | 228 | 229 | x + 2 230 | 231 | 232 | x > 2 233 | 234 | 235 | x 236 | any(x > 2) 237 | any(x < -2) 238 | any(x < -3) 239 | 240 | 241 | any(c(TRUE, FALSE, NA)) 242 | any(c(FALSE, FALSE, NA)) 243 | any(c(FALSE, FALSE)) 244 | 245 | 246 | any(x < -3, na.rm = TRUE) 247 | 248 | 249 | x 250 | all(x > -3) 251 | all(x >= -3) 252 | all(x < 4) 253 | 254 | 255 | all(c(TRUE, FALSE, NA)) 256 | all(c(TRUE, TRUE, NA)) 257 | all(c(TRUE, TRUE)) 258 | 259 | 260 | all(x >= -3, na.rm = TRUE) 261 | 262 | 263 | x 264 | x[x >= 0] 265 | 266 | 267 | which(x >= 0) 268 | 269 | 270 | x[which(x >= 0)] 271 | 272 | 273 | if (2) 3 274 | if (0) 0 else 1 275 | 276 | 277 | if ("a") 1 else 2 278 | 279 | 280 | sqrt(-1) 281 | 282 | 283 | 1 / 0 284 | 285 | 286 | log(0) 287 | 288 | 289 | is.finite(1 / 0) 290 | is.infinite(log(0)) 291 | 292 | 293 | 1 / 0 < 0 294 | 1 / 0 > 0 295 | log(0) < 0 296 | log(0) > 0 297 | 298 | 299 | is.pos.infinite <- function(x) { 300 | is.infinite(x) & x > 0 301 | } 302 | is.neg.infinite <- function(x) { 303 | is.infinite(x) & x < 0 304 | } 305 | is.pos.infinite(1/0) 306 | is.neg.infinite(log(0)) 307 | 308 | 309 | log(-1) 310 | 311 | 312 | pi 313 | 314 | 315 | sin(pi) 316 | 317 | 318 | max(c(1, 2, 3)) 319 | 320 | 321 | max(c(1, 2, 3), 322 | c(2, 1, 2), 323 | c(1, 3, 4)) 324 | min(c(1, 2, 3), 325 | c(2, 1, 2), 326 | c(1, 3, 4)) 327 | 328 | 329 | pmax(c(1, 2, 3), 330 | c(2, 1, 2), 331 | c(1, 3, 4)) 332 | 333 | 334 | x <- list(c(1, 2, 3), 335 | c(2, 1, 2), 336 | c(1, 3, 4)) 337 | c(max(x[[1]][[1]], x[[2]][[1]], x[[3]][[1]]), 338 | max(x[[1]][[2]], x[[2]][[2]], x[[3]][[2]]), 339 | max(x[[1]][[3]], x[[2]][[3]], x[[3]][[3]])) 340 | 341 | 342 | pmin(c(1, 2, 3), 343 | c(2, 1, 2), 344 | c(1, 3, 4)) 345 | 346 | 347 | spread <- function(x) { 348 | if (x < -5) -5 349 | else if (x > 5) 5 350 | else x 351 | } 352 | 353 | 354 | spread(1) 355 | spread(seq(-8, 8)) 356 | 357 | 358 | spread2 <- function(x) { 359 | pmin(5, pmax(-5, x)) 360 | } 361 | spread2(seq(-8, 8)) 362 | 363 | 364 | spread3 <- function(x) { 365 | ifelse(x < -5, -5, ifelse(x > 5, 5, x)) 366 | } 367 | spread3(seq(-8, 8)) 368 | 369 | 370 | curve(spread2, -8, 8, xlab = "x", ylab = "spread", main = "spread") 371 | 372 | 373 | polyroot(c(-2, 1, 1)) 374 | 375 | 376 | Re(polyroot(c(-2, 1, 1))) 377 | 378 | 379 | polyroot(c(1, 0, 1)) 380 | 381 | 382 | r <- polyroot(c(-1, -2, -1, 1)) 383 | r 384 | 385 | 386 | r ^ 3 - r ^ 2 - 2 * r - 1 387 | 388 | 389 | round(r ^ 3 - r ^ 2 - 2 * r - 1, 8) 390 | 391 | 392 | curve(x ^ 2 - exp(x), -2, 1, 393 | main = quote(x ^ 2 - e ^ x), 394 | xlab = "x", ylab = "y") 395 | abline(h = 0, col = "red", lty = 2) 396 | 397 | 398 | uniroot(function(x) x ^ 2 - exp(x), c(-2, 1)) 399 | 400 | 401 | curve(exp(x) - 3 * exp(-x ^ 2 + x) + 1, -2, 2, 402 | main = quote(e ^ x - 3 * e ^ (-x ^ 2 + x) + 1), 403 | xlab = "x", ylab = "y") 404 | abline(h = 0, col = "red", lty = 2) 405 | 406 | 407 | f <- function(x) exp(x) - 3 * exp(-x ^ 2 + x) + 1 408 | uniroot(f, c(-2, 2)) 409 | 410 | 411 | uniroot(f, c(-2, 0))$root 412 | uniroot(f, c(0, 2))$root 413 | 414 | 415 | curve(x ^ 2 - 2 * x + 4 * cos(x ^ 2) - 3, -5, 5, 416 | main = quote(x ^ 2 - 2 * x + 4 * cos(x ^ 2) - 3), 417 | xlab = "x", ylab = "y") 418 | abline(h = 0, col = "red", lty = 2) 419 | 420 | 421 | uniroot(function(x) x ^ 2 - 2 * x + 4 * cos(x ^ 2) - 3, c(0, 1))$root 422 | 423 | 424 | D(quote(x ^ 2), "x") 425 | 426 | 427 | D(quote(sin(x) * cos(x * y)), "x") 428 | 429 | 430 | z <- D(quote(sin(x) * cos(x * y)), "x") 431 | z 432 | eval(z, list(x = 1, y = 2)) 433 | 434 | 435 | result <- integrate(function(x) sin(x), 0, pi / 2) 436 | result 437 | 438 | 439 | str(result) 440 | 441 | 442 | sample(1:6, size = 5) 443 | 444 | 445 | sample(1:6, size = 5, replace = TRUE) 446 | 447 | 448 | sample(letters, size = 3) 449 | 450 | 451 | sample(list(a = 1, b = c(2, 3), c = c(3, 4, 5)), size = 2) 452 | 453 | 454 | grades <- sample(c("A", "B", "C"), size = 20, replace = TRUE, 455 | prob = c(0.25, 0.5, 0.25)) 456 | grades 457 | 458 | 459 | table(grades) 460 | 461 | 462 | runif(5) 463 | 464 | 465 | runif(5, min = -1, max = 1) 466 | 467 | 468 | withr::with_par(list(mfrow = c(1, 2)), { 469 | x <- runif(1000) 470 | plot(x, main = "runif(1000)") 471 | hist(x, main = "Histogram of runif(1000)") 472 | }) 473 | 474 | 475 | rnorm(5) 476 | 477 | 478 | rnorm(5, mean = 2, sd = 0.5) 479 | 480 | 481 | withr::with_par(list(mfrow = c(1, 2)), { 482 | x <- rnorm(1000) 483 | plot(x, main = "rnorm(1000)") 484 | hist(x, main = "Histogram of rnorm(1000)") 485 | }) 486 | 487 | 488 | x <- rnorm(50) 489 | 490 | 491 | mean(x) 492 | 493 | 494 | sum(x) / length(x) 495 | 496 | 497 | mean(x, trim = 0.05) 498 | 499 | 500 | median(x) 501 | 502 | 503 | sd(x) 504 | 505 | 506 | var(x) 507 | 508 | 509 | c(min = min(x), max = max(x)) 510 | 511 | 512 | range(x) 513 | 514 | 515 | quantile(x) 516 | 517 | 518 | quantile(x, probs = seq(0, 1, 0.1)) 519 | 520 | 521 | summary(x) 522 | 523 | 524 | df <- data.frame(score = round(rnorm(100, 80, 10)), 525 | grade = sample(letters[1:3], 100, replace = TRUE)) 526 | summary(df) 527 | 528 | 529 | y <- 2 * x + 0.5 * rnorm(length(x)) 530 | 531 | 532 | cov(x, y) 533 | 534 | 535 | cor(x, y) 536 | 537 | 538 | z <- runif(length(x)) 539 | m1 <- cbind(x, y, z) 540 | cov(m1) 541 | 542 | 543 | cor(m1) 544 | 545 | 546 | len <- c(3, 4, 5) 547 | # first, create a list in the environment. 548 | x <- list() 549 | # then use `for` to generate the random vector for each length 550 | for (i in 1:3) { 551 | x[[i]] <- rnorm(len[i]) 552 | } 553 | x 554 | 555 | 556 | lapply(len, rnorm) 557 | 558 | 559 | students <- list( 560 | a1 = list(name = "James", age = 25, 561 | gender = "M", interest = c("reading", "writing")), 562 | a2 = list(name = "Jenny", age = 23, 563 | gender = "F", interest = c("cooking")), 564 | a3 = list(name = "David", age = 24, 565 | gender = "M", interest = c("running", "basketball"))) 566 | 567 | 568 | sprintf("Hello, %s! Your number is %d.", "Tom", 3) 569 | 570 | 571 | lapply(students, function(s) { 572 | type <- switch(s$gender, "M" = "man", "F" = "woman") 573 | interest <- paste(s$interest, collapse = ", ") 574 | sprintf("%s, %d year-old %s, loves %s.", s$name, s$age, type, interest) 575 | }) 576 | 577 | 578 | sapply(1:10, function(i) i ^ 2) 579 | 580 | 581 | sapply(1:10, function(i) c(i, i ^ 2)) 582 | 583 | 584 | x <- list(c(1, 2), c(2, 3), c(1, 3)) 585 | 586 | 587 | sapply(x, function(x) x ^ 2) 588 | 589 | 590 | x1 <- list(c(1, 2), c(2, 3), c(1, 3, 3)) 591 | 592 | 593 | sapply(x1, function(x) x ^ 2) 594 | 595 | 596 | vapply(x1, function(x) x ^ 2, numeric(2)) 597 | 598 | 599 | vapply(x, function(x) x ^ 2, numeric(2)) 600 | 601 | 602 | mapply(function(a, b, c) a * b + b * c + a * c, 603 | a = c(1, 2, 3), b = c(5, 6, 7), c = c(-1, -2, -3)) 604 | 605 | 606 | df <- data.frame(x = c(1, 2, 3), y = c(3, 4, 5)) 607 | df 608 | mapply(function(xi, yi) c(xi, yi, xi + yi), df$x, df$y) 609 | 610 | 611 | Map(function(xi, yi) c(xi, yi, xi + yi), df$x, df$y) 612 | 613 | 614 | mat <- matrix(c(1, 2, 3, 4), nrow = 2) 615 | mat 616 | apply(mat, 1, sum) 617 | 618 | 619 | apply(mat, 2, sum) 620 | 621 | 622 | mat2 <- matrix(1:16, nrow = 4) 623 | mat2 624 | 625 | 626 | apply(mat2, 2, function(col) c(min = min(col), max = max(col))) 627 | 628 | 629 | apply(mat2, 1, function(col) c(min = min(col), max = max(col))) 630 | 631 | 632 | -------------------------------------------------------------------------------- /Chapter 6/working-with-strings.R: -------------------------------------------------------------------------------- 1 | "Hello" 2 | 3 | 4 | str1 <- "Hello" 5 | str1 6 | 7 | 8 | for (i in 1:3) { 9 | "Hello" 10 | } 11 | 12 | 13 | test1 <- function(x) { 14 | "Hello" 15 | x 16 | } 17 | test1("World") 18 | 19 | 20 | test2 <- function(x) { 21 | "Hello" 22 | } 23 | test2("World") 24 | 25 | 26 | print(str1) 27 | 28 | 29 | for (i in 1:3) { 30 | print(str1) 31 | } 32 | 33 | 34 | test3 <- function(x) { 35 | print("Hello") 36 | x 37 | } 38 | test3("World") 39 | 40 | 41 | cat("Hello") 42 | 43 | 44 | name <- "Ken" 45 | language <- "R" 46 | cat("Hello,", name, "- a user of", language) 47 | 48 | 49 | cat("Hello, ", name, ", a user of ", language, ".") 50 | 51 | 52 | cat("Hello, ", name, ", a user of ", language, ".", sep = "") 53 | 54 | 55 | message("Hello, ", name, ", a user of ", language, ".") 56 | 57 | 58 | for (i in 1:3) { 59 | cat(letters[[i]]) 60 | } 61 | 62 | 63 | for (i in 1:3) { 64 | message(letters[[i]]) 65 | } 66 | 67 | 68 | for (i in 1:3) { 69 | cat(letters[[i]], "\n", sep = "") 70 | } 71 | 72 | 73 | paste("Hello", "world") 74 | paste("Hello", "world", sep = "-") 75 | 76 | 77 | paste0("Hello", "world") 78 | 79 | 80 | value1 <- cat("Hello", "world") 81 | value1 82 | 83 | 84 | paste(c("A", "B"), c("C", "D")) 85 | 86 | 87 | paste(c("A", "B"), c("C", "D"),collapse = ", ") 88 | 89 | 90 | result <- paste(c("A", "B"), c("C", "D"), collapse = "\n") 91 | result 92 | 93 | 94 | cat(result) 95 | 96 | 97 | tolower("Hello") 98 | toupper("Hello") 99 | 100 | 101 | calc <- function(type, x, y) { 102 | type <- tolower(type) 103 | if (type == "add") { 104 | x + y 105 | } else if (type == "times") { 106 | x * y 107 | } else { 108 | stop("Not supported type of command") 109 | } 110 | } 111 | c(calc("add", 2, 3), calc("Add", 2, 3), calc("TIMES", 2, 3)) 112 | 113 | 114 | toupper(c("Hello", "world")) 115 | 116 | 117 | nchar("Hello") 118 | 119 | 120 | nchar(c("Hello", "R", "User")) 121 | 122 | 123 | store_student <- function(name, age) { 124 | stopifnot(length(name) == 1, nchar(name) >= 2, 125 | is.numeric(age), age > 0) 126 | # store the information in the database 127 | } 128 | 129 | 130 | store_student("James", 20) 131 | store_student("P", 23) 132 | 133 | 134 | store_student(" P", 23) 135 | 136 | 137 | store_student2 <- function(name, age) { 138 | stopifnot(length(name) == 1, nchar(trimws(name)) >= 2, 139 | is.numeric(age), age > 0) 140 | # store the information in the database 141 | } 142 | 143 | 144 | store_student2(" P", 23) 145 | 146 | 147 | trimws(c(" Hello", "World "), which = "left") 148 | 149 | 150 | dates <- c("Jan 3", "Feb 10", "Nov 15") 151 | 152 | 153 | substr(dates, 1, 3) 154 | 155 | 156 | substr(dates, 5, nchar(dates)) 157 | 158 | 159 | get_month_day <- function(x) { 160 | months <- vapply(substr(tolower(x), 1, 3), function(md) { 161 | switch(md, jan = 1, feb = 2, mar = 3, apr = 4, 162 | may = 5, jun = 6, jul = 7, aug = 8, 163 | sep = 9, oct = 10, nov = 11, dec = 12) 164 | }, numeric(1), USE.NAMES = FALSE) 165 | days <- as.numeric(substr(x, 5, nchar(x))) 166 | data.frame(month = months, day = days) 167 | } 168 | get_month_day(dates) 169 | 170 | 171 | substr(dates, 1, 3) <- c("Feb", "Dec", "Mar") 172 | dates 173 | 174 | 175 | strsplit("a,bb,ccc", split = ",") 176 | 177 | 178 | students <- strsplit(c("Tony, 26, Physics", "James, 25, Economics"), split = ", ") 179 | students 180 | 181 | 182 | students_matrix <- do.call(rbind, students) 183 | colnames(students_matrix) <- c("name", "age", "major") 184 | students_matrix 185 | 186 | 187 | students_df <- data.frame(students_matrix, stringsAsFactors = FALSE) 188 | students_df$age <- as.numeric(students_df$age) 189 | students_df 190 | 191 | 192 | strsplit(c("hello", "world"), split = "") 193 | 194 | 195 | cat(paste("#", 1:nrow(students_df), ", name: ", students_df$name, 196 | ", age: ", students_df$age, ", major: ", students_df$major, sep = ""), sep = "\n") 197 | 198 | 199 | cat(sprintf("#%d, name: %s, age: %d, major: %s", 200 | 1:nrow(students_df), students_df$name, students_df$age, students_df$major), sep = "\n") 201 | 202 | 203 | sprintf("The length of the line is approximately %.1fmm", 12.295) 204 | 205 | 206 | sprintf("The ratio is %d%%", 10) 207 | 208 | 209 | sprintf("%s, %d years old, majors in %s and loves %s.", 210 | "James", 25, "Physics", "Physics") 211 | 212 | 213 | # install.packages("pystr") 214 | library(pystr) 215 | pystr_format("{1}, {2} years old, majors in {3} and loves {3}.", 216 | "James", 25, "Physics", "Physics") 217 | 218 | 219 | pystr_format("{name}, {age} years old, majors in {major} and loves {major}.", 220 | name = "James", age = 25, major = "Physics") 221 | 222 | 223 | Sys.Date() 224 | 225 | 226 | Sys.time() 227 | 228 | 229 | current_date <- Sys.Date() 230 | as.numeric(current_date) 231 | 232 | 233 | current_time <- Sys.time() 234 | as.numeric(current_time) 235 | 236 | 237 | as.Date(1000, "1970-01-01") 238 | 239 | 240 | my_date <- as.Date("2016-02-10") 241 | my_date 242 | 243 | 244 | my_date + 3 245 | my_date + 80 246 | my_date - 65 247 | 248 | 249 | date1 <- as.Date("2014-09-28") 250 | date2 <- as.Date("2015-10-20") 251 | date2 - date1 252 | 253 | 254 | as.numeric(date2 - date1) 255 | 256 | 257 | my_time <- as.POSIXlt("2016-02-10 10:25:31") 258 | my_time 259 | 260 | 261 | my_time + 10 262 | my_time + 12345 263 | my_time - 1234567 264 | 265 | 266 | as.Date("2015.07.25") 267 | 268 | 269 | as.Date("2015.07.25", format = "%Y.%m.%d") 270 | 271 | 272 | as.POSIXlt("7/25/2015 09:30:25", format = "%m/%d/%Y %H:%M:%S") 273 | 274 | 275 | strptime("7/25/2015 09:30:25", "%m/%d/%Y %H:%M:%S") 276 | 277 | 278 | as.Date(c("2015-05-01", "2016-02-12")) 279 | 280 | 281 | as.Date("2015-01-01") + 0:2 282 | 283 | 284 | strptime("7/25/2015 09:30:25", "%m/%d/%Y %H:%M:%S") + 1:3 285 | 286 | 287 | as.Date("20150610", format = "%Y%m%d") 288 | 289 | 290 | strptime("20150610093215", "%Y%m%d%H%M%S") 291 | 292 | 293 | datetimes <- data.frame( 294 | date = c(20150601, 20150603), 295 | time = c(92325, 150621)) 296 | 297 | 298 | dt_text <- paste0(datetimes$date, datetimes$time) 299 | dt_text 300 | strptime(dt_text, "%Y%m%d%H%M%S") 301 | 302 | 303 | dt_text2 <- paste0(datetimes$date, sprintf("%06d", datetimes$time)) 304 | dt_text2 305 | strptime(dt_text2, "%Y%m%d%H%M%S") 306 | 307 | 308 | my_date 309 | 310 | 311 | date_text <- as.character(my_date) 312 | date_text 313 | 314 | 315 | date_text + 1 316 | 317 | 318 | as.character(my_date, format = "%Y.%m.%d") 319 | 320 | 321 | format(my_date, "%Y.%m.%d") 322 | 323 | 324 | my_time 325 | format(my_time, "date: %Y-%m-%d, time: %H:%M:%S") 326 | 327 | 328 | read.csv("data/messages.txt", header = FALSE) 329 | 330 | 331 | fruits <- readLines("data/fruits.txt") 332 | fruits 333 | matches <- grep("^\\w+:\\s\\d+$", fruits) 334 | matches 335 | 336 | 337 | fruits[matches] 338 | 339 | 340 | grep("\\d", c("abc", "a12", "123", "1")) 341 | grep("^\\d$", c("abc", "a12", "123", "1")) 342 | 343 | 344 | library(stringr) 345 | matches <- str_match(fruits, "^(\\w+):\\s(\\d+)$") 346 | matches 347 | 348 | 349 | # transform to data frame 350 | fruits_df <- data.frame(na.omit(matches[, -1]), stringsAsFactors = FALSE) 351 | # add a header 352 | colnames(fruits_df) <- c("fruit","quantity") 353 | # convert type of quantity from character to integer 354 | fruits_df$quantity <- as.integer(fruits_df$quantity) 355 | 356 | 357 | fruits_df 358 | 359 | 360 | telephone <- readLines("data/telephone.txt") 361 | telephone 362 | 363 | 364 | telephone[grep("^\\d{3}-\\d{5}$", telephone)] 365 | telephone[grep("^\\d{4}-\\d{4}$", telephone)] 366 | 367 | 368 | telephone[!grepl("^\\d{3}-\\d{5}$", telephone) & !grepl("^\\d{4}-\\d{4}$", telephone)] 369 | 370 | 371 | messages <- readLines("data/messages.txt") 372 | 373 | 374 | pattern <- "^(\\d+-\\d+-\\d+),(\\d+:\\d+:\\d+),(\\w+),(\\w+),\\s*(.+)$" 375 | matches <- str_match(messages, pattern) 376 | messages_df <- data.frame(matches[, -1]) 377 | colnames(messages_df) <- c("Date", "Time", "Sender", "Receiver", "Message") 378 | 379 | 380 | messages_df 381 | -------------------------------------------------------------------------------- /Chapter 7/working-with-data.R: -------------------------------------------------------------------------------- 1 | set.seed(100) 2 | 3 | readLines("data/persons.csv") 4 | 5 | 6 | readLines("data/persons.csv", n = 2) 7 | 8 | 9 | persons1 <- read.csv("data/persons.csv", stringsAsFactors = FALSE) 10 | str(persons1) 11 | 12 | 13 | persons2 <- read.csv("data/persons.csv", 14 | colClasses = c("character", "factor", "integer", "character"), 15 | col.names = c("name", "sex", "age", "major")) 16 | str(persons2) 17 | 18 | 19 | persons3 <- readr::read_csv("data/persons.csv") 20 | str(persons3) 21 | 22 | 23 | read.table("data/persons.txt", sep = " ") 24 | 25 | # install.packages("readr") 26 | readr::read_table("data/persons.txt") 27 | 28 | 29 | some_data <- data.frame( 30 | id = 1:4, 31 | grade = c("A", "A", "B", NA), 32 | width = c(1.51, 1.52, 1.46, NA), 33 | check_date = as.Date(c("2016-03-05", "2016-03-06", "2016-03-10", "2016-03-11"))) 34 | some_data 35 | write.csv(some_data, "data/some_data.csv") 36 | 37 | 38 | cat(readLines("data/some_data.csv"), sep = "\n") 39 | 40 | 41 | write.csv(some_data, "data/some_data.csv", 42 | quote = FALSE, na = "-", row.names = FALSE) 43 | 44 | 45 | cat(readLines("data/some_data.csv"), sep = "\n") 46 | 47 | 48 | readr::read_csv("data/some_data.csv", na = "-") 49 | 50 | 51 | file.remove("data/some_data.csv") 52 | 53 | 54 | 55 | # install.packages("readxl") 56 | readxl::read_excel("data/prices.xlsx") 57 | 58 | # install.packages("openxlsx") 59 | openxlsx::read.xlsx("data/prices.xlsx", detectDates = TRUE) 60 | 61 | 62 | openxlsx::write.xlsx(mtcars, "data/mtcars.xlsx") 63 | 64 | 65 | saveRDS(some_data, "data/some_data.rds") 66 | 67 | 68 | some_data2 <- readRDS("data/some_data.rds") 69 | 70 | 71 | identical(some_data, some_data2) 72 | 73 | 74 | rows <- 200000 75 | large_data <- data.frame(id = 1:rows, 76 | x = rnorm(rows), y = rnorm(rows)) 77 | system.time(write.csv(large_data, "data/large_data.csv")) 78 | system.time(saveRDS(large_data, "data/large_data.rds")) 79 | 80 | 81 | fileinfo <- file.info("data/large_data.csv", "data/large_data.rds") 82 | fileinfo[, "size", drop = FALSE] 83 | 84 | 85 | system.time(read.csv("data/large_data.csv")) 86 | system.time(readr::read_csv("data/large_data.csv")) 87 | 88 | 89 | system.time(readRDS("data/large_data.rds")) 90 | 91 | 92 | nums <- c(1.5, 2.5, NA, 3) 93 | list1 <- list(x = c(1, 2, 3), 94 | y = list(a = c("a", "b"), 95 | b = c(NA, 1, 2.5))) 96 | saveRDS(nums, "data/nums.rds") 97 | saveRDS(list1, "data/list1.rds") 98 | 99 | 100 | readRDS("data/nums.rds") 101 | readRDS("data/list1.rds") 102 | 103 | 104 | save(some_data, nums, list1, file = "data/bundle1.RData") 105 | 106 | 107 | rm(some_data, nums, list1) 108 | load("data/bundle1.RData") 109 | 110 | 111 | some_data 112 | nums 113 | list1 114 | 115 | 116 | file.remove("data/some_data.rds", 117 | "data/large_data.csv", 118 | "data/large_data.rds", 119 | "data/nums.rds", 120 | "data/list1.rds", 121 | "data/bundle1.RData") 122 | 123 | 124 | head(iris) 125 | str(iris) 126 | 127 | 128 | head(mtcars) 129 | str(mtcars) 130 | 131 | 132 | data("diamonds", package = "ggplot2") 133 | dim(diamonds) 134 | 135 | 136 | head(diamonds) 137 | 138 | 139 | install.package(c("nycflights13", "babynames")) 140 | 141 | 142 | plot(1:10) 143 | 144 | 145 | x <- rnorm(100) 146 | y <- 2 * x + rnorm(100) 147 | plot(x, y) 148 | 149 | 150 | plot(x, y, 151 | main = "Linearly correlated random numbers", 152 | xlab = "x", ylab = "2x + noise", 153 | xlim = c(-4, 4), ylim = c(-4, 4)) 154 | 155 | 156 | plot(x, y, 157 | xlim = c(-4, 4), ylim = c(-4, 4), 158 | xlab = "x", ylab = "2x + noise") 159 | title("Linearly correlated random numbers") 160 | 161 | 162 | plot(0:25, 0:25, pch = 0:25, 163 | xlim = c(-1, 26), ylim = c(-1, 26), 164 | main = "Point styles (pch)") 165 | text(0:25 + 1, 0:25, 0:25) 166 | 167 | 168 | x <- rnorm(100) 169 | y <- 2 * x + rnorm(100) 170 | plot(x, y, pch = 16, 171 | main = "Scatter plot with customized point style") 172 | 173 | 174 | plot(x, y, 175 | pch = ifelse(x * y > 1, 16, 1), 176 | main = "Scatter plot with conditional point styles") 177 | 178 | 179 | z <- sqrt(1 + x ^ 2) + rnorm(100) 180 | plot(x, y, pch = 1, 181 | xlim = range(x), ylim = range(y, z), 182 | xlab = "x", ylab = "value") 183 | points(x, z, pch = 17) 184 | title("Scatter plot with two series") 185 | 186 | 187 | plot(x, y, pch = 16, col = "blue", 188 | main = "Scatter plot with blue points") 189 | 190 | 191 | plot(x, y, pch = 16, 192 | col = ifelse(y >= mean(y), "red", "green"), 193 | main = "Scatter plot with conditional colors") 194 | 195 | 196 | plot(x, y, col = "blue", pch = 0, 197 | xlim = range(x), ylim = range(y, z), 198 | xlab = "x", ylab = "value") 199 | points(x, z, col = "red", pch = 1) 200 | title("Scatter plot with two series") 201 | 202 | 203 | t <- 1:50 204 | y <- 3 * sin(t * pi / 60) + rnorm(t) 205 | plot(t, y, type = "l", 206 | main = "Simple line plot") 207 | 208 | 209 | lty_values <- 1:6 210 | plot(lty_values, type = "n", axes = FALSE, ann = FALSE) 211 | abline(h = lty_values, lty = lty_values, lwd = 2) 212 | mtext(lty_values, side = 2, at = lty_values) 213 | title("Line types (lty)") 214 | 215 | 216 | plot(t, y, type = "l", lwd = 2) 217 | abline(h = mean(y), lty = 2, col = "blue") 218 | abline(h = range(y), lty = 3, col = "red") 219 | abline(v = t[c(which.min(y), which.max(y))], 220 | lty = 3, col = "darkgray") 221 | title("Line plot with auxiliary lines") 222 | 223 | 224 | p <- 40 225 | plot(t[t <= p], y[t <= p], type = "l", 226 | xlim = range(t), xlab = "t") 227 | lines(t[t >= p], y[t >= p], lty = 2) 228 | title("Simple line plot with two periods") 229 | 230 | 231 | plot(y, type = "l") 232 | points(y, pch = 16) 233 | title("Lines with points") 234 | 235 | 236 | plot(y, pch = 16) 237 | lines(y) 238 | title("Lines with points") 239 | 240 | 241 | x <- 1:30 242 | y <- 2 * x + 6 * rnorm(30) 243 | z <- 3 * sqrt(x) + 8 * rnorm(30) 244 | 245 | plot(x, y, type = "l", 246 | ylim = range(y, z), col = "black") 247 | points(y, pch = 15) 248 | lines(z, lty = 2, col = "blue") 249 | points(z, pch = 16, col = "blue") 250 | title("Plot of two series") 251 | legend("topleft", 252 | legend = c("y", "z"), 253 | col = c("black", "blue"), 254 | lty = c(1, 2), pch = c(15, 16), cex = 0.8, 255 | x.intersp = 0.5, y.intersp = 0.8) 256 | 257 | 258 | plot(x, y, type = "s", 259 | main = "A simple step plot") 260 | 261 | 262 | barplot(1:10, names.arg = LETTERS[1:10]) 263 | 264 | 265 | ints <- 1:10 266 | names(ints) <- LETTERS[1:10] 267 | barplot(ints) 268 | 269 | # install.packages("nycflights13") 270 | data("flights", package = "nycflights13") 271 | carriers <- table(flights$carrier) 272 | carriers 273 | 274 | 275 | sorted_carriers <- sort(carriers, decreasing = TRUE) 276 | sorted_carriers 277 | 278 | 279 | barplot(head(sorted_carriers, 8), 280 | ylim = c(0, max(sorted_carriers) * 1.1), 281 | xlab = "Carrier", ylab = "Flights", 282 | main = "Top 8 carriers with the most flights in record") 283 | 284 | 285 | grades <- c(A = 2, B = 10, C = 12, D = 8) 286 | pie(grades, main = "Grades", radius = 1) 287 | 288 | 289 | random_normal <- rnorm(10000) 290 | hist(random_normal) 291 | 292 | 293 | hist(random_normal, probability = TRUE, col = "lightgray") 294 | curve(dnorm, add = TRUE, lwd = 2, col = "blue") 295 | 296 | 297 | flight_speed <- flights$distance / flights$air_time 298 | hist(flight_speed, main = "Histogram of flight speed") 299 | 300 | 301 | plot(density(flight_speed, from = 2, na.rm = TRUE), 302 | main = "Empirical distribution of flight speed") 303 | abline(v = mean(flight_speed, na.rm = TRUE), 304 | col = "blue", lty = 2) 305 | 306 | 307 | hist(flight_speed, 308 | probability = TRUE, ylim = c(0, 0.5), 309 | main = "Histogram and empirical distribution of flight speed", 310 | border = "gray", col = "lightgray") 311 | lines(density(flight_speed, from = 2, na.rm = TRUE), 312 | col = "darkgray", lwd = 2) 313 | abline(v = mean(flight_speed, na.rm = TRUE), 314 | col = "blue", lty = 2) 315 | 316 | 317 | x <- rnorm(1000) 318 | boxplot(x) 319 | 320 | 321 | 322 | 323 | boxplot(distance / air_time ~ carrier, data = flights, 324 | main = "Box plot of flight speed by carrier") 325 | 326 | 327 | f <- function(x) 3 + 2 * x 328 | x <- rnorm(100) 329 | y <- f(x) + 0.5 * rnorm(100) 330 | 331 | 332 | model1 <- lm(y ~ x) 333 | model1 334 | 335 | 336 | coef(model1) 337 | 338 | 339 | summary(model1) 340 | 341 | 342 | plot(x, y, main = "A simple linear regression") 343 | abline(coef(model1), col = "blue") 344 | 345 | 346 | predict(model1, list(x = c(-1, 0.5)), se.fit = TRUE) 347 | 348 | 349 | data("flights", package = "nycflights13") 350 | plot(air_time ~ distance, data = flights, 351 | pch = ".", 352 | main = "flight speed plot") 353 | 354 | 355 | rows <- nrow(flights) 356 | rows_id <- 1:rows 357 | sample_id <- sample(rows_id, rows * 0.75, replace = FALSE) 358 | flights_train <- flights[sample_id,] 359 | flights_test <- flights[setdiff(rows_id, sample_id), ] 360 | 361 | 362 | model2 <- lm(air_time ~ distance, data = flights_train) 363 | predict2_train <- predict(model2, flights_train) 364 | error2_train <- flights_train$air_time - predict2_train 365 | 366 | 367 | evaluate_error <- function(x) { 368 | c(abs_err = mean(abs(x), na.rm = TRUE), 369 | std_dev = sd(x, na.rm = TRUE)) 370 | } 371 | 372 | 373 | evaluate_error(error2_train) 374 | 375 | 376 | predict2_test <- predict(model2, flights_test) 377 | error2_test <- flights_test$air_time - predict2_test 378 | evaluate_error(error2_test) 379 | 380 | 381 | model3 <- lm(air_time ~ carrier + distance + month + dep_time, 382 | data = flights_train) 383 | predict3_train <- predict(model3, flights_train) 384 | error3_train <- flights_train$air_time - predict3_train 385 | evaluate_error(error3_train) 386 | 387 | 388 | predict3_test <- predict(model3, flights_test) 389 | error3_test <- flights_test$air_time - predict3_test 390 | evaluate_error(error3_test) 391 | 392 | 393 | plot(density(error2_test, na.rm = TRUE), 394 | main = "Empirical distributions of out-of-sample errors") 395 | lines(density(error3_test, na.rm = TRUE), lty = 2) 396 | legend("topright", legend = c("model2", "model3"), 397 | lty = c(1, 2), cex = 0.8, 398 | x.intersp = 0.6, y.intersp = 0.6) 399 | 400 | # install.packages("party") 401 | airct <- party::ctree(Ozone ~ ., 402 | data = subset(airquality, !is.na(Ozone)), 403 | controls = party::ctree_control(maxsurrogate = 3)) 404 | plot(airct, main = "Regression tree of air quality") 405 | 406 | 407 | model4 <- party::ctree(air_time ~ distance + month + dep_time, 408 | data = subset(flights_train, !is.na(air_time))) 409 | predict4_train <- predict(model4, flights_train) 410 | error4_train <- flights_train$air_time - predict4_train[, 1] 411 | evaluate_error(error4_train) 412 | 413 | 414 | predict4_test <- predict(model4, flights_test) 415 | error4_test <- flights_test$air_time - predict4_test[, 1] 416 | evaluate_error(error4_test) 417 | 418 | 419 | plot(density(error3_test, na.rm = TRUE), 420 | ylim = range(0, 0.06), 421 | main = "Empirical distributions of out-of-sample errors") 422 | lines(density(error4_test, na.rm = TRUE), lty = 2) 423 | legend("topright", legend = c("model3", "model4"), 424 | lty = c(1, 2), cex = 0.8, 425 | x.intersp = 0.6, y.intersp = 0.6) 426 | -------------------------------------------------------------------------------- /Chapter 8/inside-r.R: -------------------------------------------------------------------------------- 1 | test0 <- function(x, y) { 2 | if (x > 0) x else y 3 | } 4 | 5 | 6 | test0(1) 7 | 8 | 9 | test0(-1) 10 | 11 | 12 | test0(1, stop("Stop now")) 13 | 14 | 15 | test0(-1, stop("Stop now")) 16 | 17 | 18 | system.time(rnorm(10000000)) 19 | 20 | 21 | system.time(1) 22 | 23 | 24 | system.time(test0(1, rnorm(10000000))) 25 | 26 | 27 | test1 <- function(x, y = stop("Stop now")) { 28 | if (x > 0) x else y 29 | } 30 | 31 | 32 | test1(1) 33 | 34 | 35 | test1(-1) 36 | 37 | 38 | test2 <- function(x, n = floor(length(x) / 2)) { 39 | x[1:n] 40 | } 41 | 42 | 43 | test2(1:10) 44 | 45 | 46 | test2(1:10, 3) 47 | 48 | 49 | test3 <- function(x, n = floor(length(m) / 2)) { 50 | x[1:n] 51 | } 52 | 53 | 54 | test3(1:10) 55 | 56 | 57 | m <- c(1, 2, 3) 58 | test3(1:10) 59 | 60 | 61 | test4 <- function(x, y = p) { 62 | p <- x + 1 63 | c(x, y) 64 | } 65 | 66 | 67 | test4(1) 68 | 69 | 70 | check_input <- function(x) { 71 | switch(x, 72 | y = message("yes"), 73 | n = message("no"), 74 | stop("Invalid input")) 75 | } 76 | 77 | 78 | check_input("y") 79 | 80 | 81 | check_input("n") 82 | 83 | 84 | check_input("what") 85 | 86 | 87 | x1 <- c(1, 2, 3) 88 | 89 | 90 | x2 <- x1 91 | 92 | 93 | x1[1] <- 0 94 | x1 95 | x2 96 | 97 | 98 | x1 <- c(1, 2, 3) 99 | x2 <- x1 100 | 101 | 102 | tracemem(x1) 103 | tracemem(x2) 104 | 105 | 106 | x1[1] <- 0 107 | 108 | 109 | untracemem(x1) 110 | untracemem(x2) 111 | 112 | 113 | modify_first <- function(x) { 114 | x[1] <- 0 115 | x 116 | } 117 | 118 | 119 | v1 <- c(1, 2, 3) 120 | modify_first(v1) 121 | v1 122 | 123 | 124 | v2 <- list(x = 1, y = 2) 125 | modify_first(v2) 126 | v2 127 | 128 | 129 | v1[1] <- 0 130 | v1 131 | 132 | 133 | v2[1] <- 0 134 | v2 135 | 136 | 137 | v3 <- 1:5 138 | v3 <- modify_first(v3) 139 | v3 140 | 141 | 142 | change_names <- function(x) { 143 | if (is.data.frame(x)) { 144 | rownames(x) <- NULL 145 | if (ncol(x) <= length(LETTERS)) { 146 | colnames(x) <- LETTERS[1:ncol(x)] 147 | } else { 148 | stop("Too many columns to rename") 149 | } 150 | } else { 151 | stop("x must be a data frame") 152 | } 153 | x 154 | } 155 | 156 | 157 | small_df <- data.frame( 158 | id = 1:3, 159 | width = runif(3, 5, 10), 160 | height = runif(3, 5, 10)) 161 | small_df 162 | 163 | 164 | change_names(small_df) 165 | 166 | 167 | small_df 168 | 169 | 170 | x <- 0 171 | modify_x <- function(value) { 172 | x <<- value 173 | } 174 | 175 | 176 | modify_x(3) 177 | x 178 | 179 | 180 | count <- 0 181 | lapply(1:3, function(x) { 182 | result <- 1:x 183 | count <<- count + length(result) 184 | result 185 | }) 186 | count 187 | 188 | 189 | nested_list <- list( 190 | a = c(1, 2, 3), 191 | b = list( 192 | x = c("a", "b", "c"), 193 | y = list( 194 | z = c(TRUE, FALSE), 195 | w = c(2, 3, 4)) 196 | ) 197 | ) 198 | str(nested_list) 199 | 200 | 201 | flat_list <- list() 202 | i <- 1 203 | 204 | 205 | res <- rapply(nested_list, function(x) { 206 | flat_list[[i]] <<- x 207 | i <<- i + 1 208 | }) 209 | 210 | 211 | res 212 | 213 | 214 | names(flat_list) <- names(res) 215 | str(flat_list) 216 | 217 | 218 | start_num <- 1 219 | end_num <- 10 220 | fun1 <- function(x) { 221 | c(start_num, x, end_num) 222 | } 223 | 224 | 225 | fun1(c(4, 5, 6)) 226 | 227 | 228 | rm(start_num, end_num) 229 | fun1(c(4, 5, 6)) 230 | 231 | 232 | rm(fun1, start_num, end_num) 233 | fun1 <- function(x) { 234 | c(start_num, x, end_num) 235 | } 236 | 237 | 238 | fun1(c(4, 5, 6)) 239 | 240 | 241 | start_num <- 1 242 | end_num <- 10 243 | fun1(c(4, 5, 6)) 244 | 245 | 246 | p <- 0 247 | fun2 <- function(x) { 248 | p <- 1 249 | x + p 250 | } 251 | 252 | 253 | fun2(1) 254 | 255 | 256 | f1 <- function(x) { 257 | x + p 258 | } 259 | g1 <- function(x) { 260 | p <- 1 261 | f1(x) 262 | } 263 | 264 | 265 | g1(0) 266 | 267 | 268 | p <- 1 269 | g1(0) 270 | 271 | 272 | m <- 1 273 | f2 <- function(x) { 274 | m <<- 2 275 | x 276 | } 277 | g2 <- function(x) { 278 | m <- 1 279 | f2(x) 280 | cat(sprintf("[g2] m: %d\n", m)) 281 | } 282 | 283 | 284 | g2(1) 285 | 286 | 287 | m 288 | 289 | 290 | f <- function(x) { 291 | p <- 1 292 | q <- 2 293 | cat(sprintf("1. [f1] p: %d, q: %d\n", p, q)) 294 | f2 <- function(x) { 295 | p <- 3 296 | cat(sprintf("2. [f2] p: %d, q: %d\n", p, q)) 297 | c(x = x, p = p, q = q) 298 | } 299 | cat(sprintf("3. [f1] p: %d, q: %d\n", p, q)) 300 | f2(x) 301 | } 302 | 303 | 304 | f(0) 305 | 306 | 307 | g <- function(x) { 308 | p <- 1 309 | q <- 2 310 | cat(sprintf("1. [f1] p: %d, q: %d\n", p, q)) 311 | g2 <- function(x) { 312 | p <<- 3 313 | p <- 2 314 | cat(sprintf("2. [f2] p: %d, q: %d\n", p, q)) 315 | c(x = x, p = p, q = q) 316 | } 317 | cat(sprintf("3. [f1] p: %d, q: %d\n", p, q)) 318 | result <- g2(x) 319 | cat(sprintf("4. [f1] p: %d, q: %d\n", p, q)) 320 | result 321 | } 322 | 323 | 324 | g(0) 325 | 326 | 327 | e1 <- new.env() 328 | 329 | 330 | e1 331 | 332 | 333 | e1$x <- 1 334 | e1[["x"]] 335 | 336 | 337 | e1[1:3] 338 | 339 | 340 | e1[[1]] 341 | 342 | 343 | exists("x", e1) 344 | 345 | 346 | get("x", e1) 347 | 348 | 349 | ls(e1) 350 | 351 | 352 | e1$y 353 | e1[["y"]] 354 | 355 | 356 | get("y", e1) 357 | 358 | 359 | exists("y", e1) 360 | 361 | 362 | e2 <- new.env(parent = e1) 363 | 364 | 365 | e2 366 | e1 367 | 368 | 369 | parent.env(e2) 370 | 371 | 372 | e2$y <- 2 373 | 374 | 375 | ls(e2) 376 | 377 | 378 | e2$y 379 | e2[["y"]] 380 | exists("y", e2) 381 | get("y", e2) 382 | 383 | 384 | e2$x 385 | e2[["x"]] 386 | 387 | 388 | exists("x", e2) 389 | get("x", e1) 390 | 391 | 392 | exists("x", e2, inherits = FALSE) 393 | 394 | 395 | get("x", e2, inherits = FALSE) 396 | 397 | 398 | ls(e1) 399 | e3 <- e1 400 | 401 | 402 | e3$y 403 | e1$y <- 2 404 | e3$y 405 | 406 | 407 | modify <- function(e) { 408 | e$z <- 10 409 | } 410 | 411 | 412 | list1 <- list(x = 1, y = 2) 413 | list1$z 414 | modify(list1) 415 | list1$z 416 | 417 | 418 | e1$z 419 | modify(e1) 420 | e1$z 421 | 422 | 423 | environment() 424 | 425 | 426 | global <- environment() 427 | global$some_obj <- 1 428 | 429 | 430 | some_obj 431 | 432 | 433 | globalenv() 434 | 435 | 436 | baseenv() 437 | 438 | 439 | parents <- function(env) { 440 | while (TRUE) { 441 | name <- environmentName(env) 442 | txt <- if (nzchar(name)) name else format(env) 443 | cat(txt, "\n") 444 | env <- parent.env(env) 445 | } 446 | } 447 | 448 | 449 | parents(globalenv()) 450 | 451 | 452 | search() 453 | 454 | 455 | median(c(1, 2, 1 + 3)) 456 | 457 | 458 | simple_fun <- function() { 459 | cat("Executing environment: ") 460 | print(environment()) 461 | cat("Enclosing environment: ") 462 | print(parent.env(environment())) 463 | } 464 | 465 | 466 | simple_fun() 467 | simple_fun() 468 | simple_fun() 469 | 470 | 471 | environment(simple_fun) 472 | 473 | 474 | f1 <- function() { 475 | cat("[f1] Executing in ") 476 | print(environment()) 477 | cat("[f1] Enclosed by ") 478 | print(parent.env(environment())) 479 | cat("[f1] Calling from ") 480 | print(parent.frame()) 481 | 482 | f2 <- function() { 483 | cat("[f2] Executing in ") 484 | print(environment()) 485 | cat("[f2] Enclosed by ") 486 | print(parent.env(environment())) 487 | cat("[f2] Calling from ") 488 | print(parent.frame()) 489 | } 490 | 491 | f3 <- function() { 492 | cat("[f3] Executing in ") 493 | print(environment()) 494 | cat("[f3] Enclosed by ") 495 | print(parent.env(environment())) 496 | cat("[f3] Calling from ") 497 | print(parent.frame()) 498 | f2() 499 | } 500 | 501 | f3() 502 | } 503 | 504 | 505 | f1() 506 | -------------------------------------------------------------------------------- /Chapter 9/meta-programming.R: -------------------------------------------------------------------------------- 1 | add <- function(x, y) { 2 | x + y 3 | } 4 | 5 | 6 | addn <- function(y) { 7 | function(x) { 8 | x + y 9 | } 10 | } 11 | 12 | 13 | add1 <- addn(1) 14 | add2 <- addn(2) 15 | 16 | 17 | add1(10) 18 | add2(10) 19 | 20 | 21 | add1 22 | 23 | 24 | environment(add1)$y 25 | 26 | 27 | environment(add2)$y 28 | 29 | 30 | color_line <- function(col) { 31 | function(...) { 32 | plot(..., type = "l", lty = 1, col = col) 33 | } 34 | } 35 | 36 | 37 | red_line <- color_line("red") 38 | red_line(rnorm(30), main = "Red line plot") 39 | 40 | 41 | plot(rnorm(30), type = "l", lty = 1, col = "red", 42 | main = "Red line plot") 43 | 44 | 45 | nloglik <- function(x) { 46 | n <- length(x) 47 | function(mean, sd) { 48 | log(2 * pi) * n / 2 + log(sd ^ 2) * n / 2 + sum((x - mean) ^ 2) / (2 * sd ^ 2) 49 | } 50 | } 51 | 52 | 53 | data <- rnorm(10000, 1, 2) 54 | 55 | 56 | fit <- stats4::mle(nloglik(data), 57 | start = list(mean = 0, sd = 1), method = "L-BFGS-B", 58 | lower = c(-5, 0.01), upper = c(5, 10)) 59 | 60 | 61 | fit@coef 62 | 63 | 64 | (fit@coef - c(1, 2)) / c(1, 2) 65 | 66 | 67 | hist(data, freq = FALSE, ylim = c(0, 0.25)) 68 | curve(dnorm(x, 1, 2), add = TRUE, col = rgb(1, 0, 0, 0.5), lwd = 6) 69 | curve(dnorm(x, fit@coef[["mean"]], fit@coef[["sd"]]), 70 | add = TRUE, col = "blue", lwd = 2) 71 | 72 | 73 | f1 <- function() { 74 | cat("[f1] executing in ") 75 | print(environment()) 76 | cat("[f1] enclosed by ") 77 | print(parent.env(environment())) 78 | cat("[f1] calling from ") 79 | print(parent.frame()) 80 | } 81 | f2 <- function() { 82 | cat("[f2] executing in ") 83 | print(environment()) 84 | cat("[f2] enclosed by ") 85 | print(parent.env(environment())) 86 | cat("[f2] calling from ") 87 | print(parent.frame()) 88 | p <- f1 89 | p() 90 | } 91 | f1() 92 | f2() 93 | 94 | 95 | f1 <- function(x, y) { 96 | if (x > y) { 97 | x + y 98 | } else { 99 | x - y 100 | } 101 | } 102 | 103 | 104 | f2 <- function(x, y) { 105 | op <- if (x > y) `+` else `-` 106 | op(x, y) 107 | } 108 | 109 | 110 | add <- function(x, y, z) { 111 | x + y + z 112 | } 113 | product <- function(x, y, z) { 114 | x * y * z 115 | } 116 | 117 | 118 | combine <- function(f, x, y, z) { 119 | f(x, y, z) 120 | } 121 | 122 | 123 | combine(add, 3, 4, 5) 124 | combine(product, 3, 4, 5) 125 | 126 | 127 | result <- list() 128 | for (i in seq_along(x)) { 129 | result[[i]] <- f(x[[i]]) 130 | } 131 | result 132 | 133 | 134 | lapply(x, f) 135 | 136 | 137 | lapply <- function(x, f, ...) { 138 | result <- list() 139 | for (i in seq_along(x)) { 140 | result[[i]] <- f(x[i], ...) 141 | } 142 | } 143 | 144 | 145 | lapply(1:3, `+`, 3) 146 | 147 | 148 | list(1 + 3, 2 + 3, 3 + 3) 149 | 150 | 151 | lapply(1:3, addn(3)) 152 | 153 | 154 | sapply(1:3, addn(3)) 155 | 156 | 157 | vapply(1:3, addn(3), numeric(1)) 158 | 159 | 160 | result <- list() 161 | for (i in seq_along(x)) { 162 | # heavy computing task 163 | result[[i]] <- f(x[[i]]) 164 | } 165 | result 166 | 167 | 168 | result <- lapply(x, f) 169 | 170 | 171 | result <- parallel::mclapply(x, f) 172 | 173 | 174 | iris[iris$Sepal.Length > quantile(iris$Sepal.Length, 0.8) & 175 | iris$Sepal.Width > quantile(iris$Sepal.Width, 0.8) & 176 | iris$Petal.Length > quantile(iris$Petal.Length, 0.8) & 177 | iris$Petal.Width > quantile(iris$Petal.Width, 0.8), ] 178 | 179 | 180 | subset(iris, 181 | Sepal.Length > quantile(Sepal.Length, 0.8) & 182 | Sepal.Width > quantile(Sepal.Width, 0.8) & 183 | Petal.Length > quantile(Petal.Length, 0.8) & 184 | Petal.Width > quantile(Petal.Width, 0.8)) 185 | 186 | 187 | iris[Sepal.Length > quantile(Sepal.Length, 0.8) & 188 | Sepal.Width > quantile(Sepal.Width, 0.8) & 189 | Petal.Length > quantile(Petal.Length, 0.8) & 190 | Petal.Width > quantile(Petal.Width, 0.8), ] 191 | 192 | 193 | subset(iris, 194 | Sepal.Length > quantile(Sepal.Length, 0.8) & 195 | Sepal.Width > quantile(Sepal.Width, 0.8) & 196 | Petal.Length > quantile(Petal.Length, 0.8) & 197 | Petal.Width > quantile(Petal.Width, 0.8), 198 | select = c(Sepal.Length, Petal.Length, Species)) 199 | 200 | 201 | rnorm(5) 202 | 203 | 204 | call1 <- quote(rnorm(5)) 205 | call1 206 | 207 | 208 | typeof(call1) 209 | class(call1) 210 | 211 | 212 | name1 <- quote(rnorm) 213 | name1 214 | typeof(name1) 215 | class(name1) 216 | 217 | 218 | quote(pvar) 219 | quote(xfun(a = 1:n)) 220 | 221 | 222 | as.list(call1) 223 | 224 | 225 | call1[[1]] 226 | typeof(call1[[1]]) 227 | class(call1[[1]]) 228 | 229 | 230 | call1[[2]] 231 | typeof(call1[[2]]) 232 | class(call1[[2]]) 233 | 234 | 235 | num1 <- 100 236 | num2 <- quote(100) 237 | 238 | 239 | num1 240 | num2 241 | 242 | 243 | identical(num1, num2) 244 | 245 | 246 | call2 <- quote(c("a", "b")) 247 | call2 248 | 249 | 250 | as.list(call2) 251 | 252 | 253 | str(as.list(call2)) 254 | 255 | 256 | call3 <- quote(1 + 1) 257 | call3 258 | 259 | 260 | is.call(call3) 261 | str(as.list(call3)) 262 | 263 | 264 | call4 <- quote(sqrt(1 + x ^ 2)) 265 | call4 266 | 267 | 268 | pryr::call_tree(call4) 269 | 270 | 271 | call1 272 | call1[[1]] <- quote(runif) 273 | call1 274 | 275 | 276 | call1[[3]] <- -1 277 | names(call1)[[3]] <- "min" 278 | call1 279 | 280 | 281 | fun1 <- function(x) { 282 | quote(x) 283 | } 284 | 285 | 286 | fun1(rnorm(5)) 287 | 288 | 289 | fun2 <- function(x) { 290 | substitute(x) 291 | } 292 | fun2(rnorm(5)) 293 | 294 | 295 | substitute(x + y + x ^ 2, list(x = 1)) 296 | 297 | 298 | substitute(f(x + f(y)), list(f = quote(sin))) 299 | 300 | 301 | call1 <- quote(rnorm(5, mean = 3)) 302 | call1 303 | 304 | 305 | call2 <- call("rnorm", 5, mean = 3) 306 | call2 307 | 308 | 309 | call3 <- as.call(list(quote(rnorm), 5, mean = 3)) 310 | call3 311 | 312 | 313 | identical(call1, call2) 314 | identical(call2, call3) 315 | 316 | 317 | sin(1) 318 | 319 | 320 | call1 <- quote(sin(1)) 321 | call1 322 | eval(call1) 323 | 324 | 325 | call2 <- quote(sin(x)) 326 | call2 327 | 328 | 329 | eval(call2) 330 | 331 | 332 | sin(x) 333 | 334 | 335 | eval(call2, list(x = 1)) 336 | 337 | 338 | e1 <- new.env() 339 | e1$x <- 1 340 | eval(call2, e1) 341 | 342 | 343 | call3 <- quote(x ^ 2 + y ^ 2) 344 | call3 345 | 346 | 347 | eval(call3) 348 | 349 | 350 | eval(call3, list(x = 2)) 351 | 352 | 353 | eval(call3, list(x = 2, y = 3)) 354 | 355 | 356 | e1 <- new.env() 357 | e1$x <- 2 358 | eval(call3, e1) 359 | 360 | 361 | e2 <- new.env(parent = e1) 362 | e2$y <- 3 363 | eval(call3, e2) 364 | 365 | 366 | pryr::call_tree(call3) 367 | 368 | 369 | e3 <- new.env() 370 | e3$y <- 3 371 | eval(call3, list(x = 2), e3) 372 | 373 | 374 | eval(quote(z <- x + y + 1), list(x = 1), e3) 375 | e3$z 376 | 377 | 378 | eval(quote(z <- y + 1), e3) 379 | e3$z 380 | 381 | 382 | eval(quote(1 + 1), list(`+` = `-`)) 383 | 384 | 385 | x <- 1:10 386 | x[3:(length(x) - 5)] 387 | 388 | 389 | qs <- function(x, range) { 390 | range <- substitute(range) 391 | selector <- eval(range, list(. = length(x))) 392 | x[selector] 393 | } 394 | 395 | 396 | qs(x, 3:(. - 5)) 397 | 398 | 399 | qs(x, . - 1) 400 | 401 | 402 | trim_margin <- function(x, n) { 403 | qs(x, (n + 1):(. - n - 1)) 404 | } 405 | 406 | 407 | trim_margin(x, 3) 408 | 409 | 410 | eval 411 | 412 | 413 | qs <- function(x, range) { 414 | range <- substitute(range) 415 | selector <- eval(range, list(. = length(x)), parent.frame()) 416 | x[selector] 417 | } 418 | 419 | 420 | trim_margin(x, 3) 421 | 422 | 423 | formula1 <- z ~ x ^ 2 + y ^ 2 424 | 425 | 426 | typeof(formula1) 427 | class(formula1) 428 | 429 | 430 | str(as.list(formula1)) 431 | 432 | 433 | is.call(formula1) 434 | length(formula1) 435 | 436 | 437 | formula1[[2]] 438 | formula1[[3]] 439 | 440 | 441 | environment(formula1) 442 | 443 | 444 | formula2 <- ~ x + y 445 | str(as.list(formula2)) 446 | 447 | 448 | length(formula2) 449 | formula2[[2]] 450 | 451 | 452 | qs2 <- function(x, range) { 453 | selector <- if (inherits(range, "formula")) { 454 | eval(range[[2]], list(. = length(x)), environment(range)) 455 | } else range 456 | x[selector] 457 | } 458 | 459 | 460 | qs2(1:10, ~ 3:(. - 2)) 461 | 462 | 463 | qs2(1:10, 3) 464 | 465 | 466 | trim_margin2 <- function(x, n) { 467 | qs2(x, ~ (n + 1):(. - n - 1)) 468 | } 469 | 470 | 471 | trim_margin2(x, 3) 472 | 473 | 474 | subset2 <- function(x, subset = TRUE, select = TRUE) { 475 | enclos <- parent.frame() 476 | subset <- substitute(subset) 477 | select <- substitute(select) 478 | row_selector <- eval(subset, x, enclos) 479 | col_envir <- as.list(seq_along(x)) 480 | names(col_envir) <- colnames(x) 481 | col_selector <- eval(select, col_envir, enclos) 482 | x[row_selector, col_selector] 483 | } 484 | 485 | 486 | subset2(mtcars, mpg >= quantile(mpg, 0.9), c(mpg, cyl, qsec)) 487 | 488 | 489 | subset2(mtcars, mpg >= quantile(mpg, 0.9), mpg:drat) 490 | 491 | 492 | -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Learning R Programming 5 | 6 | This is the code repository for the book Learning R Programming, Published by Packt.It contains all the supporting project files necessary to work through the book from start to finish. 7 | 8 | 9 | ## Code snippets 10 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02 11 | ~~~ 12 | class(c(TRUE, TRUE, FALSE)) 13 | class(c("Hello", "World")) 14 | 15 | 16 | is.numeric(c(1, 2, 3)) 17 | is.numeric(c(TRUE, TRUE, FALSE)) 18 | is.numeric(c("Hello", "World")) 19 | 20 | 21 | strings <- c("1", "2", "3") 22 | class(strings) 23 | ~~~ 24 | 25 | ## Related R products 26 | 27 | [Instant R Starter](https://www.packtpub.com/big-data-and-business-intelligence/instant-r-starter-instant?utm_source=github&utm_campaign=9781782163503&utm_medium=repository) 28 | 29 | [R Object-oriented Programming](https://www.packtpub.com/big-data-and-business-intelligence/r-object-oriented-programming?utm_source=github&utm_medium=repository&utm_campaign=9781783986682) 30 | 31 | [R for Data Science](https://www.packtpub.com/big-data-and-business-intelligence/r-data-science?utm_source=github&utm_medium=repository&utm_campaign=9781784390860) 32 | 33 | ### Suggestion and Feedback 34 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform) 35 | ### Download a free PDF 36 | 37 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
38 |

https://packt.link/free-ebook/9781785889776

--------------------------------------------------------------------------------