├── 2e ├── ch01.R ├── ch06.R ├── install-r-tensorflow.R ├── README.md ├── ch13.R ├── ch14.R ├── ch04.R ├── ch05.R ├── ch03.R └── ch08.R ├── chapter01_what-is-deep-learning.R ├── chapter01_what-is-deep-learning_tmp.R ├── chapter06_universal-workflow-of-ml.R ├── .gitignore ├── README.md ├── deep-learning-with-r-code.Rproj ├── chapter19_future_of_ai.R ├── chapter09_convnet-architecture-patterns.R ├── chapter12_object-detection.R ├── chapter18_best-practices-for-the-real-world.R ├── chapter10_interpreting-what-convnets-learn.R ├── chapter11_image-segmentation.R ├── chapter04_classification-and-regression.R ├── chapter05_fundamentals-of-ml.R ├── chapter13_timeseries-forecasting.R ├── chapter08_image-classification.R ├── chapter02_mathematical-building-blocks.R ├── chapter16_text-generation.R └── chapter17_image-generation.R /2e/ch01.R: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /chapter01_what-is-deep-learning.R: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /chapter01_what-is-deep-learning_tmp.R: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /chapter06_universal-workflow-of-ml.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /2e/ch06.R: -------------------------------------------------------------------------------- 1 | ## ---- eval = FALSE-------------------------------------------------------- 2 | ## x <- scale(x) 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | cats_vs_dogs_small 6 | dogs-vs-cats 7 | pets_dataset 8 | *.keras -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Deep Learning with R (Code Only) 2 | 3 | The repository contains code from the book "Deep Learning with R," 2nd and 3rd editions. 4 | Code for the 2nd edition can be found in the "2e" directory. 5 | -------------------------------------------------------------------------------- /deep-learning-with-r-code.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | -------------------------------------------------------------------------------- /2e/install-r-tensorflow.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | if(!requireNamespace("remotes")) install.packages("remotes") 4 | 5 | remotes::update_packages() 6 | remotes::install_cran(c("readr", "tibble", "zip", "fs", "listarrays", "keras")) 7 | 8 | envname <- "r-keras" 9 | 10 | if("--fresh" %in% commandArgs(TRUE)) { 11 | reticulate::miniconda_uninstall() 12 | unlink("~/.pyenv", recursive = TRUE) 13 | unlink(paste0("~/.virtualenvs/", envname), recursive = TRUE) 14 | } 15 | 16 | 17 | python <- reticulate::install_python("3.9:latest") 18 | reticulate::virtualenv_create(envname, python = python) 19 | 20 | keras::install_keras( 21 | envname = envname, 22 | extra_packages = c("keras-tuner", "ipython", "kaggle") 23 | ) 24 | -------------------------------------------------------------------------------- /2e/README.md: -------------------------------------------------------------------------------- 1 | ## Deep Learning with R, 2nd Edition (Code Only) 2 | 3 | The folder contains just the code from the book "Deep Learning with R, 2nd Edition". 4 | 5 | You can install all the dependencies by cloning this repo and sourcing the `"install-r-tensorflow.R"` script, 6 | either at the R console or the terminal: 7 | 8 | ```bash 9 | Rscript install-r-tensorflow.R 10 | ``` 11 | 12 | The script creates a "r-keras" virtual environment that will automatically be 13 | discovered by the `keras` R package. 14 | 15 | Note: the install script assumes that R and CUDA drivers are already installed. 16 | 17 | Modern reticulate no longer requires setting up a manual virtual environment. 18 | Instead of running the `install-r-tensorflow.R` script, you can instead 19 | request that reticulate use an ephemeral python installation by running this 20 | at the start of the R session: 21 | 22 | ```r 23 | reticulate::py_require(c( 24 | 25 | "tensorflow", 26 | # If you are on Linux and want to use a GPU, you can instead install: 27 | # "tensorflow[and-cuda]", or to force cpu only, "tensorflow-cpu" 28 | 29 | # install legacy (v2) keras 30 | "tf-keras", 31 | 32 | # additional packages used in the book 33 | "keras-tuner", "ipython", "kaggle" 34 | )) 35 | ``` 36 | -------------------------------------------------------------------------------- /chapter19_future_of_ai.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | num_input_features <- 10 3 | num_classes <- 10 4 | num_values <- 10 5 | num_features <- 10 6 | height <- width <- 512 7 | channels <- 3 8 | num_timesteps <- 100 9 | sequence_length <- 100 10 | 11 | 12 | inputs <- keras_input(shape = c(num_input_features)) 13 | outputs <- inputs |> 14 | layer_dense(32, activation = "relu") |> 15 | layer_dense(32, activation = "relu") |> 16 | layer_dense(1, activation = "sigmoid") 17 | model <- keras_model(inputs, outputs) 18 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy") 19 | 20 | 21 | inputs <- keras_input(shape = c(num_input_features)) 22 | outputs <- inputs |> 23 | layer_dense(32, activation = "relu") |> 24 | layer_dense(32, activation = "relu") |> 25 | layer_dense(num_classes, activation = "softmax") 26 | model <- keras_model(inputs, outputs) 27 | model |> compile(optimizer = "rmsprop", loss = "categorical_crossentropy") 28 | 29 | 30 | inputs <- keras_input(shape = c(num_input_features)) 31 | outputs <- inputs |> 32 | layer_dense(32, activation = "relu") |> 33 | layer_dense(32, activation = "relu") |> 34 | layer_dense(num_classes, activation = "sigmoid") 35 | model <- keras_model(inputs, outputs) 36 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy") 37 | 38 | 39 | inputs <- keras_input(shape = c(num_input_features)) 40 | outputs <- inputs |> 41 | layer_dense(32, activation = "relu") |> 42 | layer_dense(32, activation = "relu") |> 43 | layer_dense(num_values) 44 | model <- keras_model(inputs, outputs) 45 | model |> compile(optimizer = "rmsprop", loss = "mse") 46 | 47 | 48 | inputs <- keras_input(shape = c(height, width, channels)) 49 | outputs <- inputs |> 50 | layer_separable_conv_2d(32, 3, activation = "relu") |> 51 | layer_separable_conv_2d(64, 3, activation = "relu") |> 52 | layer_max_pooling_2d(2) |> 53 | layer_separable_conv_2d(64, 3, activation = "relu") |> 54 | layer_separable_conv_2d(128, 3, activation = "relu") |> 55 | layer_max_pooling_2d(2) |> 56 | layer_separable_conv_2d(64, 3, activation = "relu") |> 57 | layer_separable_conv_2d(128, 3, activation = "relu") |> 58 | layer_global_average_pooling_2d() |> 59 | layer_dense(32, activation = "relu") |> 60 | layer_dense(num_classes, activation = "softmax") 61 | model <- keras_model(inputs, outputs) 62 | model |> compile(optimizer = "rmsprop", loss = "categorical_crossentropy") 63 | 64 | 65 | inputs <- keras_input(shape = c(num_timesteps, num_features)) 66 | outputs <- inputs |> 67 | layer_lstm(32) |> 68 | layer_dense(num_classes, activation = "sigmoid") 69 | model <- keras_model(inputs, outputs) 70 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy") 71 | 72 | 73 | inputs <- keras_input(shape = c(num_timesteps, num_features)) 74 | outputs <- inputs |> 75 | layer_lstm(32, return_sequences = TRUE) |> 76 | layer_lstm(32, return_sequences = TRUE) |> 77 | layer_lstm(32) |> 78 | layer_dense(num_classes, activation = "sigmoid") 79 | model <- keras_model(inputs, outputs) 80 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy") 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /chapter09_convnet-architecture-patterns.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | 3 | 4 | inputs <- keras_input(shape = c(32, 32, 3)) 5 | x <- inputs |> layer_conv_2d(32, 3, activation = "relu") 6 | residual <- x # <1> 7 | x <- x |> layer_conv_2d(64, 3, activation = "relu", padding = "same") # <2> 8 | residual <- residual |> layer_conv_2d(64, 1) # <3> 9 | x <- layer_add(c(x, residual)) # <4> 10 | 11 | 12 | inputs <- keras_input(shape = c(32, 32, 3)) 13 | x <- inputs |> layer_conv_2d(32, 3, activation = "relu") 14 | residual <- x # <1> 15 | x <- x |> 16 | layer_conv_2d(64, 3, activation = "relu", padding = "same") |> # <2> 17 | layer_max_pooling_2d(2, padding = "same") # <2> 18 | residual <- residual |> 19 | layer_conv_2d(64, 1, strides = 2) # <3> 20 | x <- layer_add(list(x, residual)) # <4> 21 | 22 | 23 | inputs <- keras_input(shape = c(32, 32, 3)) 24 | x <- inputs |> layer_rescaling(scale = 1/255) 25 | 26 | residual_block <- function(x, filters, pooling = FALSE) { # <1> 27 | residual <- x 28 | x <- x |> 29 | layer_conv_2d(filters, 3, activation = "relu", padding = "same") |> 30 | layer_conv_2d(filters, 3, activation = "relu", padding = "same") 31 | 32 | if (pooling) { 33 | x <- x |> layer_max_pooling_2d(pool_size = 2, padding = "same") 34 | residual <- residual |> layer_conv_2d(filters, 1, strides = 2) # <2> 35 | } else if (filters != op_shape(residual)[[4]]) { 36 | residual <- residual |> layer_conv_2d(filters, 1) # <3> 37 | } 38 | 39 | layer_add(list(x, residual)) 40 | } 41 | 42 | outputs <- x |> 43 | residual_block(filters = 32, pooling = TRUE) |> # <4> 44 | residual_block(filters = 64, pooling = TRUE) |> # <5> 45 | residual_block(filters = 128, pooling = FALSE) |> # <6> 46 | layer_global_average_pooling_2d() |> 47 | layer_dense(units = 1, activation = "sigmoid") 48 | 49 | model <- keras_model(inputs = inputs, outputs = outputs) 50 | 51 | 52 | model 53 | 54 | 55 | image_size <- c(180, 180) 56 | batch_size <- 32 57 | data_dir <- fs::path("dogs_vs_cats_small") 58 | 59 | train_dataset <- 60 | image_dataset_from_directory(data_dir / "train", 61 | image_size = image_size, 62 | batch_size = batch_size) 63 | validation_dataset <- 64 | image_dataset_from_directory(data_dir / "validation", 65 | image_size = image_size, 66 | batch_size = batch_size) 67 | test_dataset <- 68 | image_dataset_from_directory(data_dir / "test", 69 | image_size = image_size, 70 | batch_size = batch_size) 71 | 72 | 73 | data_augmentation_layers <- list( 74 | layer_random_flip(, "horizontal"), 75 | layer_random_rotation(, 0.1), 76 | layer_random_zoom(, 0.2) 77 | ) 78 | 79 | data_augmentation <- function(images, targets) { 80 | for (layer in data_augmentation_layers) { 81 | images <- layer(images) 82 | } 83 | list(images, targets) 84 | } 85 | 86 | augmented_train_dataset <- train_dataset |> 87 | tfdatasets::dataset_map(data_augmentation, num_parallel_calls = 8) |> 88 | tfdatasets::dataset_prefetch(4) 89 | 90 | 91 | inputs <- keras_input(shape = c(180, 180, 3)) 92 | 93 | x <- inputs |> 94 | layer_rescaling(scale = 1 / 255) |> # <1> 95 | layer_conv_2d(filters = 32, kernel_size = 5, use_bias = FALSE) # <2> 96 | 97 | for (size in c(32, 64, 128, 256, 512)) { # <3> 98 | residual <- x 99 | 100 | x <- x |> 101 | layer_batch_normalization() |> 102 | layer_activation("relu") |> 103 | layer_separable_conv_2d(size, 3, padding = "same", use_bias = FALSE) |> 104 | 105 | layer_batch_normalization() |> 106 | layer_activation("relu") |> 107 | layer_separable_conv_2d(size, 3, padding = "same", use_bias = FALSE) |> 108 | 109 | layer_max_pooling_2d(pool_size = 3, strides = 2, padding = "same") 110 | 111 | residual <- residual |> 112 | layer_conv_2d(size, 1, strides = 2, padding = "same", use_bias = FALSE) 113 | 114 | x <- layer_add(x, residual) 115 | } 116 | 117 | outputs <- x |> 118 | layer_global_average_pooling_2d() |> 119 | layer_dropout(0.25) |> # <4> 120 | layer_dense(1, activation = "sigmoid") 121 | 122 | model <- keras_model(inputs, outputs) 123 | 124 | 125 | compile(model, 126 | loss = "binary_crossentropy", 127 | optimizer = "adam", 128 | metrics = "accuracy") 129 | history <- fit(model, augmented_train_dataset, 130 | epochs = 100, 131 | validation_data = validation_dataset) 132 | 133 | 134 | plot(history) 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /chapter12_object-detection.R: -------------------------------------------------------------------------------- 1 | library(dplyr, warn.conflicts = FALSE) 2 | library(stringr) 3 | library(xml2) 4 | library(reticulate) 5 | library(tensorflow, exclude = c("shape", "set_random_seed")) 6 | library(tfdatasets, exclude = "shape") 7 | library(keras3) 8 | py_require("matplotlib") 9 | py_require("opencv-python") 10 | py_require("keras-hub==0.18.1") 11 | 12 | use_backend("tensorflow") 13 | 14 | 15 | VOC_url <- \(data) sprintf( 16 | "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOC%s_06-Nov-2007.tar", data 17 | ) 18 | VOC_train_val <- get_file(origin = VOC_url("trainval")) 19 | VOC_test <- get_file(origin = VOC_url("test")) 20 | 21 | 22 | BASE_DIR <- fs::path("VOCdevkit", "VOC2007") 23 | IMAGE_DIR <- BASE_DIR / "JPEGImages" 24 | ANNOTATION_DIR <- BASE_DIR / "Annotations" 25 | IMAGESET_DIR <- BASE_DIR / "ImageSets" / "Main" 26 | 27 | CLASSES <- c( 28 | "aeroplane", 29 | "bicycle", 30 | "bird", 31 | "boat", 32 | "bottle", 33 | "bus", 34 | "car", 35 | "cat", 36 | "chair", 37 | "cow", 38 | "diningtable", 39 | "dog", 40 | "horse", 41 | "motorbike", 42 | "person", 43 | "pottedplant", 44 | "sheep", 45 | "sofa", 46 | "train", 47 | "tvmonitor" 48 | ) 49 | 50 | 51 | list.files(IMAGESET_DIR) 52 | 53 | 54 | read_example <- function(image_id) { 55 | image_path <- tf$strings$join(list(IMAGE_DIR, "/", image_id, ".jpg")) 56 | csv_path <- tf$strings$join(list(ANNOTATION_DIR, "/", image_id, ".csv")) 57 | 58 | image <- image_path |> 59 | tf$io$read_file() |> 60 | tf$io$decode_jpeg(channels = 3L) |> 61 | tf$ensure_shape(shape(NA, NA, 3)) 62 | 63 | csv_data <- csv_path |> 64 | tf$io$read_file() |> 65 | tf$strings$regex_replace("\n$", "") |> #<1> drop last newline 66 | tf$strings$split("\n") |> 67 | tf$io$decode_csv(list( 68 | tf$constant(list(), dtype = tf$int32), # class_idx 69 | tf$constant(list(), dtype = tf$float32), # ymin 70 | tf$constant(list(), dtype = tf$float32), # xmin 71 | tf$constant(list(), dtype = tf$float32), # ymax 72 | tf$constant(list(), dtype = tf$float32) # xmax 73 | )) 74 | 75 | .[class_idx, ymin, xmin, ymax, xmax] <- csv_data 76 | 77 | bbox <- tf$stack(list(ymin, xmin, ymax, xmax), axis = -1L) |> 78 | tf$ensure_shape(shape(NA, 4)) 79 | 80 | label <- class_idx |> tf$ensure_shape(shape(NA)) 81 | 82 | list(image = image, 83 | objects = list(bbox = bbox, label = label)) 84 | } 85 | 86 | 87 | split <- "test" 88 | split_file <- fs::path(IMAGESET_DIR, split, ext = "txt") 89 | image_id <- readLines(split_file, 1) |> tf$constant() 90 | 91 | read_example(image_id) |> str() 92 | 93 | 94 | get_dataset <- function(split, shuffle_files = TRUE, shuffle_buffer_size = 1000) { 95 | 96 | split_file <- fs::path(IMAGESET_DIR, split, ext = "txt") 97 | ds <- text_line_dataset(split_file, num_parallel_reads = 12) 98 | 99 | if (shuffle_files) 100 | ds <- ds |> dataset_shuffle(shuffle_buffer_size) 101 | 102 | ds <- ds |> 103 | dataset_map(read_example, num_parallel_calls = 12) |> 104 | dataset_prefetch() 105 | 106 | ds 107 | } 108 | 109 | train_ds <- get_dataset("trainval", shuffle_files = TRUE) 110 | eval_ds <- get_dataset("test", shuffle_files = TRUE) 111 | 112 | train_ds |> 113 | as_iterator() |> iter_next() |> str() 114 | 115 | 116 | example <- train_ds |> dataset_batch(1) |> as_iterator() |> iter_next() 117 | 118 | par(mar = c(0,0,0,0)) 119 | 120 | keras$visualization$plot_bounding_box_gallery( 121 | example$image, 122 | bounding_box_format = "rel_yxyx", 123 | y_true = list( 124 | boxes = example$objects$bbox, #|> op_convert_to_numpy(), 125 | labels = example$objects$label #|> op_convert_to_numpy() 126 | ), 127 | scale = 8, 128 | class_mapping = CLASSES 129 | ) 130 | 131 | 132 | BBOX_FORMAT <- "yxyx" 133 | 134 | parse_record <- function(record) { 135 | image <- record$image 136 | .[h, w, depth] <- tf$shape(image) |> tf$unstack(3L) 137 | rel_boxes <- record$objects$bbox 138 | abs_boxes <- keras$utils$bounding_boxes$convert_format( 139 | rel_boxes, 140 | source = "rel_yxyx", 141 | target = BBOX_FORMAT, 142 | height = h, 143 | width = w 144 | ) 145 | labels <- record$objects$label 146 | 147 | list(images = image, 148 | bounding_boxes = list(boxes = abs_boxes, labels = labels)) 149 | } 150 | 151 | record <- train_ds |> as_iterator() |> iter_next() 152 | 153 | 154 | envir::import_from(keras$visualization, plot_bounding_box_gallery) 155 | IMAGE_SIZE <- shape(640, 640) 156 | BATCH_SIZE <- 4 157 | 158 | resizing <- layer_resizing( 159 | height = IMAGE_SIZE[[1]], 160 | width = IMAGE_SIZE[[2]], 161 | interpolation = "bilinear", 162 | pad_to_aspect_ratio = TRUE, 163 | bounding_box_format = BBOX_FORMAT, 164 | ) 165 | 166 | max_box_layer <- layer_max_num_bounding_boxes( 167 | max_number = 100, 168 | bounding_box_format = BBOX_FORMAT 169 | ) 170 | 171 | data_augmentation_layers <- list( 172 | layer_random_flip(mode = "horizontal", bounding_box_format = BBOX_FORMAT) 173 | ) 174 | 175 | 176 | prepare_dataset <- function(ds, batch_size = 4) { 177 | 178 | ds <- ds |> 179 | dataset_map(parse_record, num_parallel_calls = 8) |> 180 | dataset_map(resizing, num_parallel_calls = 8) 181 | 182 | for (layer in data_augmentation_layers) 183 | ds <- ds |> dataset_map(layer, num_parallel_calls = NULL) 184 | 185 | ds <- ds |> 186 | dataset_map(max_box_layer, num_parallel_calls = 8) |> 187 | dataset_batch(batch_size, drop_remainder = TRUE) |> 188 | dataset_prefetch() 189 | 190 | ds 191 | 192 | } 193 | 194 | train_ds_prepared <- prepare_dataset(train_ds, batch_size=BATCH_SIZE) 195 | eval_ds_prepared <- prepare_dataset(eval_ds, batch_size=BATCH_SIZE) 196 | 197 | first_images_prepared <- train_ds_prepared |> as_iterator() |> iter_next() 198 | 199 | plot_bounding_box_gallery( 200 | first_images_prepared$images, 201 | bounding_box_format = BBOX_FORMAT, 202 | y_true = first_images_prepared$bounding_boxes, 203 | scale = 4, 204 | class_mapping = py_dict(seq_along(CLASSES), CLASSES) 205 | ) 206 | 207 | 208 | library(keras3) 209 | 210 | keras_hub <- reticulate::import("keras_hub") 211 | model <- keras_hub$models$ImageObjectDetector$ 212 | from_preset("retinanet_resnet50_fpn_coco") 213 | 214 | 215 | model_with_random_head <- keras_hub$models$ImageObjectDetector$from_preset( 216 | "retinanet_resnet50_fpn_coco", 217 | num_classes = length(CLASSES) 218 | ) 219 | 220 | 221 | split_labels <- function(x) { 222 | list(x$images, 223 | list( 224 | boxes = x$bounding_boxes$boxes, 225 | classes = x$bounding_boxes$labels 226 | )) 227 | } 228 | 229 | train_ds_prepared <- train_ds_prepared |> dataset_map(split_labels) 230 | eval_ds_prepared <- eval_ds_prepared |> dataset_map(split_labels) 231 | 232 | callbacks = list( 233 | callback_model_checkpoint( 234 | "pascal_voc_detection.keras", 235 | save_best_only = TRUE, 236 | monitor = "val_loss" 237 | ) 238 | ) 239 | 240 | 241 | model <- load_model("pascal_voc_detection.keras") 242 | 243 | .[images, gt_boxes] <- iter_next(as_iterator(eval_ds_prepared)) 244 | predictions <- model |> predict(images) 245 | 246 | 247 | plot_bounding_box_gallery( 248 | images, 249 | bounding_box_format = BBOX_FORMAT, 250 | y_true = list(boxes = gt_boxes$boxes, labels = gt_boxes$classes), 251 | y_pred = list(boxes = predictions$boxes, labels = predictions$classes), 252 | scale = 8, 253 | class_mapping = py_dict(seq_along(CLASSES), CLASSES) 254 | ) 255 | 256 | -------------------------------------------------------------------------------- /chapter18_best-practices-for-the-real-world.R: -------------------------------------------------------------------------------- 1 | Sys.unsetenv("CUDA_VISIBLE_DEVICES") 2 | Sys.setenv("XLA_FLAGS" = "--xla_force_host_platform_device_count=8") 3 | library(reticulate) 4 | library(keras3) 5 | use_backend("jax") 6 | py_require(c("keras-tuner", "scikit-learn")) 7 | 8 | 9 | library(keras3) 10 | library(reticulate) 11 | use_backend("jax") 12 | py_require(c("keras-tuner", "scikit-learn")) 13 | 14 | 15 | build_model <- function(hp, num_classes = 10) { 16 | units <- hp$Int( # <1> 17 | name = "units", # <1> 18 | min_value = 16L, # <1> 19 | max_value = 64L, # <1> 20 | step = 16L) # <1> 21 | model <- keras_model_sequential() |> 22 | layer_dense(units, activation = "relu") |> 23 | layer_dense(num_classes, activation = "softmax") 24 | 25 | optimizer <- hp$Choice(name = "optimizer", # <2> 26 | values = c("rmsprop", "adam")) # <2> 27 | model |> compile(optimizer = optimizer, 28 | loss = "sparse_categorical_crossentropy", 29 | metrics = "accuracy") 30 | model # <3> 31 | } 32 | 33 | 34 | kt <- import("keras_tuner") 35 | 36 | SimpleMLP(kt$HyperModel) %py_class% { 37 | `__init__` <- function(self, num_classes) { # <2> 38 | self.num_classes = num_classes 39 | } 40 | 41 | build <- function(self, hp) { 42 | build_model(hp, self$num_classes) 43 | } 44 | } 45 | 46 | hypermodel <- SimpleMLP(num_classes=10) 47 | 48 | 49 | tuner <- kt$BayesianOptimization( 50 | build_model, # <1> 51 | objective = "val_accuracy", # <2> 52 | max_trials = 100L, # <3> 53 | executions_per_trial = 2L, # <4> 54 | directory = "mnist_kt_test", # <5> 55 | overwrite = TRUE # <6> 56 | ) 57 | 58 | 59 | tuner$search_space_summary() 60 | 61 | 62 | .[.[x_train_full, y_train_full], .[x_test, y_test]] <- dataset_mnist() # <1> 63 | x_train_full <- x_train_full |> array_reshape(c(-1, 28 * 28)) # <1> 64 | x_train_full <- x_train_full / 255 # <1> 65 | x_test <- x_test |> array_reshape(c(-1, 28 * 28)) # <1> 66 | x_test <- x_test / 255 # <1> 67 | 68 | num_val_samples <- 10000 # <2> 69 | val_i <- seq_len(num_val_samples) 70 | x_val <- x_train_full[val_i, ] # <2> 71 | x_train <- x_train_full[-val_i, ] # <2> 72 | y_val <- y_train_full[val_i] |> as.matrix() # <2> 73 | y_train <- y_train_full[-val_i] |> as.matrix() # <2> 74 | 75 | callbacks <- list( # <3> 76 | callback_early_stopping(monitor = "val_loss", patience = 5) # <3> 77 | ) 78 | 79 | 80 | top_n <- 4L 81 | best_hps <- tuner$get_best_hyperparameters(top_n) # <1> 82 | 83 | 84 | get_best_epoch <- function(hp) { 85 | model <- build_model(hp) 86 | callbacks <- list( 87 | callback_early_stopping( 88 | monitor = "val_loss", 89 | mode = "min", 90 | patience = 10 # <1> 91 | ) 92 | ) 93 | 94 | history <- model |> fit( 95 | x_train, y_train, 96 | validation_data = list(x_val, y_val), 97 | epochs = 100, 98 | batch_size = 128, 99 | callbacks = callbacks 100 | ) 101 | 102 | best_epoch <- history |> 103 | tibble::as_tibble() |> 104 | dplyr::filter(metric == "loss" & value == min(value)) |> 105 | _$epoch 106 | glue::glue("Best epoch: {best_epoch}") 107 | best_epoch 108 | } 109 | 110 | 111 | get_best_trained_model <- function(hp) { 112 | best_epoch <- get_best_epoch(hp) 113 | model <- build_model(hp) 114 | model |> fit( 115 | x_train_full, y_train_full, 116 | batch_size=128L, epochs=as.integer(best_epoch * 1.2)) 117 | model 118 | } 119 | 120 | best_models <- py_eval("[]", convert = FALSE) 121 | for (hp in best_hps) { 122 | model <- get_best_trained_model(hp) 123 | model |> evaluate(x_test, y_test) |> print() 124 | best_models$append(model) 125 | } 126 | 127 | 128 | best_models <- tuner$get_best_models(top_n) 129 | 130 | 131 | model <- keras_model_sequential(input_shape = c(16000)) |> 132 | layer_dense(64000, activation = "relu") |> 133 | layer_dense(8000, activation = "sigmoid") 134 | 135 | 136 | keras$distribution$list_devices() 137 | 138 | 139 | devices <- paste0("gpu:", 0:7) 140 | device_mesh <- keras$distribution$DeviceMesh( 141 | shape = shape(2, 4), 142 | axis_names = c("data", "model"), 143 | devices = devices 144 | ) 145 | 146 | 147 | for (v in model$variables) 148 | cat(v$path, "\n") 149 | 150 | 151 | layout_map <- keras$distribution$LayoutMap(device_mesh) 152 | layout_map["sequential/dense/kernel"] <- tuple(NULL, "model") 153 | layout_map["sequential/dense/bias"] <- tuple("model") 154 | layout_map["sequential/dense_1/kernel"] <- tuple(NULL, "model") 155 | layout_map["sequential/dense_1/bias"] <- tuple("model") 156 | 157 | 158 | model_parallel <- keras$distribution$ModelParallel( 159 | layout_map=layout_map, 160 | batch_dim_name="data" # <1> 161 | ) 162 | keras$distribution$set_distribution(model_parallel) 163 | 164 | 165 | model$layers[[1]]$kernel$value$sharding 166 | 167 | 168 | jax <- reticulate::import("jax") 169 | value <- model$layers[[1]]$kernel$value 170 | jax$debug$visualize_sharding(value$shape, value$sharding) 171 | 172 | 173 | keras$distribution$set_distribution( 174 | keras$distribution$DataParallel(devices = c("gpu:0", "gpu:1")) 175 | ) 176 | 177 | 178 | config_set_dtype_policy("float16") 179 | 180 | 181 | config_set_dtype_policy("mixed_float16") 182 | 183 | 184 | optimizer <- optimizer_adam(learning_rate = 1e-3, loss_scale_factor = 10) 185 | 186 | 187 | optimizer <- 188 | optimizer_adam(learning_rate = 1e-3) |> 189 | optimizer_loss_scale() 190 | 191 | 192 | x <- op_array(rbind(c(0.1, 0.9), c(1.2, -0.8))) 193 | kernel <- op_array(rbind(c(-0.1, -2.2), c(1.1, 0.7))) 194 | 195 | 196 | abs_max_quantize <- function(value) { 197 | abs_max <- op_max(op_abs(value), keepdims = TRUE) # <1> 198 | scale <- op_divide(127, abs_max + 1e-7) # <2> 199 | scaled_value <- value * scale # <3> 200 | scaled_value <- op_clip(op_round(scaled_value), -127, 127) # <4> 201 | scaled_value <- op_cast(scaled_value, dtype = "int8") # <5> 202 | list(scaled_value, scale) 203 | } 204 | 205 | .[int_x, x_scale] <- abs_max_quantize(x) 206 | .[int_kernel, kernel_scale] <- abs_max_quantize(kernel) 207 | 208 | 209 | int_y <- op_matmul(int_x, int_kernel) 210 | y <- op_cast(int_y, dtype = "float32") / (x_scale * kernel_scale) 211 | 212 | 213 | y 214 | op_matmul(x, kernel) 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /2e/ch13.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include=FALSE------------------------------------------------- 2 | library(tensorflow) 3 | library(keras3) 4 | tensorflow::as_tensor(1) 5 | 6 | 7 | ## ---- eval = FALSE-------------------------------------------------------- 8 | ## reticulate::py_install("keras-tuner", pip = TRUE) 9 | 10 | 11 | ## ------------------------------------------------------------------------- 12 | 13 | build_model <- function(hp, num_classes = 10) { 14 | 15 | units <- hp$Int(name = "units", 16 | min_value = 16L, max_value = 64L, step = 16L) 17 | 18 | model <- keras_model_sequential() %>% 19 | layer_dense(units, activation = "relu") %>% 20 | layer_dense(num_classes, activation = "softmax") 21 | 22 | optimizer <- hp$Choice(name = "optimizer", 23 | values = c("rmsprop", "adam")) 24 | 25 | model %>% compile(optimizer = optimizer, 26 | loss = "sparse_categorical_crossentropy", 27 | metrics = c("accuracy")) 28 | model 29 | } 30 | 31 | 32 | ## ------------------------------------------------------------------------- 33 | kt <- reticulate::import("kerastuner") 34 | 35 | SimpleMLP(kt$HyperModel) %py_class% { 36 | 37 | `__init__` <- function(num_classes) { 38 | self$num_classes <- num_classes 39 | } 40 | 41 | build <- function(hp) { 42 | build_model(hp, self$num_classes) 43 | } 44 | 45 | } 46 | 47 | hypermodel <- SimpleMLP(num_classes = 10) 48 | 49 | 50 | ## import kerastuner as kt 51 | ## 52 | ## class SimpleMLP(kt.HyperModel): 53 | ## def __init__(self, num_classes): 54 | ## self.num_classes = num_classes 55 | ## 56 | ## def build(self, hp): 57 | ## build_model(hp, self.num_classes) 58 | ## 59 | ## hypermodel = SimpleMLP(num_classes = 10) 60 | 61 | 62 | ## ------------------------------------------------------------------------- 63 | tuner <- kt$BayesianOptimization( 64 | build_model, 65 | objective = "val_accuracy", 66 | max_trials = 100L, 67 | executions_per_trial = 2L, 68 | directory = "mnist_kt_test", 69 | overwrite = TRUE 70 | ) 71 | 72 | 73 | ## ------------------------------------------------------------------------- 74 | tuner$search_space_summary() 75 | 76 | 77 | ## ---- eval = FALSE-------------------------------------------------------- 78 | ## objective <- kt$Objective( 79 | ## name = "val_accuracy", 80 | ## direction = "max" 81 | ## ) 82 | ## 83 | ## tuner <- kt$BayesianOptimization( 84 | ## build_model, 85 | ## objective = objective, 86 | ## ... 87 | ## ) 88 | 89 | 90 | ## ------------------------------------------------------------------------- 91 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist() 92 | x_train %<>% { array_reshape(., c(-1, 28 * 28)) / 255 } 93 | x_test %<>% { array_reshape(., c(-1, 28 * 28)) / 255 } 94 | x_train_full <- x_train 95 | y_train_full <- y_train 96 | num_val_samples <- 10000 97 | c(x_train, x_val) %<-% list(x_train[seq(num_val_samples), ], 98 | x_train[-seq(num_val_samples), ]) 99 | c(y_train, y_val) %<-% list(y_train[seq(num_val_samples)], 100 | y_train[-seq(num_val_samples)]) 101 | 102 | callbacks <- c( 103 | callback_early_stopping(monitor="val_loss", patience=5) 104 | ) 105 | 106 | tuner$search( 107 | x_train, y_train, 108 | batch_size = 128L, 109 | epochs = 100L, 110 | validation_data = list(x_val, y_val), 111 | callbacks = callbacks, 112 | verbose = 2L 113 | ) 114 | 115 | 116 | ## ------------------------------------------------------------------------- 117 | top_n <- 4L 118 | best_hps <- tuner$get_best_hyperparameters(top_n) 119 | str(best_hps) 120 | 121 | 122 | ## ------------------------------------------------------------------------- 123 | get_best_epoch <- function(hp) { 124 | model <- build_model(hp) 125 | 126 | callbacks <- c( 127 | callback_early_stopping(monitor = "val_loss", mode = "min", 128 | patience = 10)) 129 | 130 | history <- model %>% fit( 131 | x_train, y_train, 132 | validation_data = list(x_val, y_val), 133 | epochs = 100, 134 | batch_size = 128, 135 | callbacks = callbacks 136 | ) 137 | 138 | best_epoch <- which.min(history$metrics$val_loss) 139 | print(glue::glue("Best epoch: {best_epoch}")) 140 | invisible(best_epoch) 141 | } 142 | 143 | 144 | ## ------------------------------------------------------------------------- 145 | get_best_trained_model <- function(hp) { 146 | best_epoch <- get_best_epoch(hp) 147 | model <- build_model(hp) 148 | model %>% fit( 149 | x_train_full, 150 | y_train_full, 151 | batch_size = 128, 152 | epochs = round(best_epoch * 1.2) 153 | ) 154 | model 155 | } 156 | 157 | best_models <- best_hps %>% 158 | lapply(get_best_trained_model) 159 | 160 | 161 | ## ------------------------------------------------------------------------- 162 | best_models <- tuner$get_best_models(top_n) 163 | 164 | 165 | ## ---- eval = FALSE-------------------------------------------------------- 166 | ## preds_a <- model_a %>% predict(x_val) 167 | ## preds_b <- model_b %>% predict(x_val) 168 | ## preds_c <- model_c %>% predict(x_val) 169 | ## preds_d <- model_d %>% predict(x_val) 170 | ## final_preds <- 0.25 * (preds_a + preds_b + preds_c + preds_d) 171 | 172 | 173 | ## ---- eval = FALSE-------------------------------------------------------- 174 | ## preds_a <- model_a %>% predict(x_val) 175 | ## preds_b <- model_b %>% predict(x_val) 176 | ## preds_c <- model_c %>% predict(x_val) 177 | ## preds_d <- model_d %>% predict(x_val) 178 | ## final_preds <- 179 | ## 0.5 * preds_a + 0.25 * preds_b + 0.1 * preds_c + 0.15 * preds_d 180 | 181 | 182 | ## ---- eval = FALSE-------------------------------------------------------- 183 | ## * (2 ^ ( - 127)) * 1. 184 | 185 | 186 | ## ------------------------------------------------------------------------- 187 | r_array <- base::array(0, dim = c(2, 2)) 188 | tf_tensor <- tensorflow::as_tensor(r_array) 189 | tf_tensor$dtype 190 | 191 | 192 | ## ------------------------------------------------------------------------- 193 | r_array <- base::array(0, dim = c(2, 2)) 194 | tf_tensor <- tensorflow::as_tensor(r_array, dtype = "float32") 195 | tf_tensor$dtype 196 | 197 | 198 | ## ------------------------------------------------------------------------- 199 | keras::keras$mixed_precision$set_global_policy("mixed_float16") 200 | 201 | 202 | ## ---- eval = FALSE-------------------------------------------------------- 203 | ## library(tensorflow) 204 | ## strategy <- tf$distribute$MirroredStrategy() 205 | ## cat("Number of devices:", strategy$num_replicas_in_sync, "\n") 206 | ## with(strategy$scope(), { 207 | ## model <- get_compiled_model() 208 | ## }) 209 | ## model %>% fit( 210 | ## train_dataset, 211 | ## epochs = 100, 212 | ## validation_data = val_dataset, 213 | ## callbacks = callbacks 214 | ## ) 215 | 216 | 217 | ## ------------------------------------------------------------------------- 218 | build_model <- function(input_size) { 219 | resnet <- application_resnet50(weights = NULL, 220 | include_top = FALSE, 221 | pooling = "max") 222 | 223 | inputs <- layer_input(c(input_size, 3)) 224 | 225 | outputs <- inputs %>% 226 | resnet_preprocess_input() %>% 227 | resnet() %>% 228 | layer_dense(10, activation = "softmax") 229 | 230 | model <- keras_model(inputs, outputs) 231 | 232 | model %>% compile( 233 | optimizer = "rmsprop", 234 | loss = "sparse_categorical_crossentropy", 235 | metrics = "accuracy" 236 | ) 237 | 238 | model 239 | } 240 | 241 | strategy <- tf$distribute$MirroredStrategy() 242 | cat("Number of replicas:", strategy$num_replicas_in_sync, "\n") 243 | 244 | with(strategy$scope(), { 245 | model <- build_model(input_size = c(32, 32)) 246 | }) 247 | 248 | 249 | ## ---- eval = FALSE-------------------------------------------------------- 250 | ## c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_cifar10() 251 | ## model %>% fit(x_train, y_train, batch_size = 1024) 252 | 253 | 254 | ## ---- eval = FALSE-------------------------------------------------------- 255 | ## tpu <- tf$distribute$cluster_resolver$TPUClusterResolver$connect() 256 | ## cat("Device:", tpu$master(), "\n") 257 | ## strategy <- tf$distribute$TPUStrategy(tpu) 258 | ## with(strategy$scope(), { ... }) 259 | -------------------------------------------------------------------------------- /chapter10_interpreting-what-convnets-learn.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | 3 | 4 | library(keras3) 5 | model <- load_model("convnet_from_scratch_with_augmentation.keras") 6 | 7 | 8 | img_path <- get_file( # <1> 9 | fname="cat.jpg", # <1> 10 | origin="https://img-datasets.s3.amazonaws.com/cat.jpg" # <1> 11 | ) # <1> 12 | 13 | get_img_array <- function(img_path, target_size) { 14 | image <- img_path |> 15 | image_load(target_size = target_size) |> # <2> 16 | image_to_array() # <3> 17 | dim(image) <- c(1, dim(image)) # <4> 18 | image 19 | } 20 | 21 | img <- get_img_array(img_path, target_size = c(180, 180)) 22 | str(img) 23 | 24 | 25 | display_image <- function(x, ..., max = 255, margin = 0) { 26 | par(mar = rep(margin, 4)) 27 | 28 | x |> as.array() |> drop() |> 29 | as.raster(max = max) |> 30 | plot(..., interpolate = FALSE) 31 | } 32 | 33 | img |> display_image() 34 | 35 | 36 | is_conv_layer <- function(x) inherits(x, keras$layers$Conv2D) 37 | is_pooling_layer <- function(x) inherits(x, keras$layers$MaxPool2D) 38 | 39 | layer_outputs <- list() 40 | for (layer in model$layers) # <1> 41 | if (is_conv_layer(layer) || is_pooling_layer(layer)) # <1> 42 | layer_outputs[[layer$name]] <- layer$output # <1> 43 | 44 | activation_model <- # <2> 45 | keras_model(inputs = model$input, 46 | outputs = layer_outputs) 47 | 48 | 49 | activations <- predict(activation_model, img) # <1> 50 | str(activations) 51 | 52 | 53 | first_layer_activation <- activations[[ names(layer_outputs)[1] ]] 54 | dim(first_layer_activation) 55 | 56 | 57 | plot_activations <- function(x, ...) { 58 | withr::local_par(list(mar = c(0,0,0,0))) 59 | 60 | x <- drop(as.array(x)) 61 | if (sum(x) == 0) 62 | return(plot(as.raster("gray"))) 63 | 64 | rotate <- function(x) t(apply(x, 2, rev)) 65 | graphics::image( 66 | rotate(x), asp = 1, axes = FALSE, useRaster = TRUE, 67 | col = viridis::viridis(256), ... 68 | ) 69 | } 70 | 71 | plot_activations(first_layer_activation[, , , 1]) 72 | plot_activations(first_layer_activation[, , , 2]) 73 | plot_activations(first_layer_activation[, , , 3]) 74 | plot_activations(first_layer_activation[, , , 4]) 75 | plot_activations(first_layer_activation[, , , 5]) 76 | plot_activations(first_layer_activation[, , , 6]) 77 | plot_activations(first_layer_activation[, , , 7]) 78 | plot_activations(first_layer_activation[, , , 8]) 79 | 80 | 81 | for (layer_name in names(activations)) { # <1> 82 | layer_activation <- activations[[layer_name]] # <1> 83 | 84 | n_features <- dim(layer_activation)[[4]] # <2> 85 | 86 | par(mfrow = n2mfrow(n_features, asp = 1.75), # <3> 87 | mar = rep(.1, 4), oma = c(0, 0, 1.5, 0)) # <3> 88 | 89 | for (j in 1:n_features) # <4> 90 | plot_activations(layer_activation[, , , j]) # <4> 91 | title(main = layer_name, outer = TRUE) # <5> 92 | } 93 | 94 | 95 | model <- application_xception( 96 | weights = "imagenet", 97 | include_top = FALSE # <1> 98 | ) 99 | 100 | 101 | unlist(lapply(model$layers, \(layer) { 102 | if (inherits(layer, keras$layers$Conv2D) || 103 | inherits(layer, keras$layers$SeparableConv2D)) 104 | layer$name 105 | })) 106 | 107 | 108 | layer_name <- "block3_sepconv1" # <1> 109 | layer <- model |> get_layer(name = layer_name) # <2> 110 | feature_extractor <- 111 | keras_model(inputs = model$input, # <3> 112 | outputs = layer$output) 113 | 114 | 115 | activation <- img |> 116 | application_preprocess_inputs(model = model) |> 117 | feature_extractor() 118 | 119 | 120 | compute_loss <- function(image, filter_index) { # <1> 121 | activation <- feature_extractor(image) 122 | filter_activation <- activation@r[, 3:-3, 3:-3, filter_index] # <2> 123 | op_mean(filter_activation) # <3> 124 | } 125 | 126 | 127 | model <- application_xception(weights = "imagenet", include_top = FALSE) 128 | 129 | layer_name <- "block3_sepconv1" 130 | layer <- model |> get_layer(name = layer_name) 131 | feature_extractor <- keras_model(inputs = model$input, outputs = layer$output) 132 | 133 | compute_loss <- function(image, filter_index) { 134 | activation <- feature_extractor(image) 135 | filter_activation <- activation@r[, 3:-3, 3:-3, filter_index] 136 | op_mean(filter_activation) 137 | } 138 | 139 | 140 | jax <- reticulate::import("jax") 141 | 142 | grad_fn <- jax$grad(compute_loss) 143 | 144 | gradient_ascent_step <- jax$jit(function(image, filter_index, learning_rate) { 145 | grads <- grad_fn(image, filter_index) 146 | grads <- op_normalize(grads) 147 | image + (learning_rate * grads) 148 | }) 149 | 150 | 151 | img_height <- img_width <- 200 152 | 153 | generate_filter_pattern <- function(filter_index) { 154 | iterations <- 30 # <1> 155 | learning_rate <- 10 # <2> 156 | image <- random_uniform( # <3> 157 | minval = 0.4, maxval = 0.6, # <3> 158 | shape = shape(1, img_width, img_height, 3) # <3> 159 | ) 160 | 161 | for (i in seq(iterations)) # <4> 162 | image <- gradient_ascent_step(image, filter_index, learning_rate) # <4> 163 | 164 | image 165 | } 166 | 167 | 168 | deprocess_image <- function(image, crop = TRUE) { 169 | image <- op_squeeze(image, axis = 1) # <1> 170 | image <- image - op_mean(image) # <2> 171 | image <- image / op_std(image) # <2> 172 | image <- (image * 64) + 128 # <2> 173 | image <- op_clip(image, 0, 255) # <2> 174 | if (crop) { 175 | image <- image@r[26:-26, 26:-26, ] # <3> 176 | } 177 | op_cast(image, "uint8") 178 | } 179 | 180 | 181 | generate_filter_pattern(filter_index = 2L) |> 182 | deprocess_image() |> 183 | display_image() 184 | 185 | 186 | par(mfrow = c(8, 8)) 187 | for (i in seq_len(64)) { 188 | generate_filter_pattern(filter_index = i) |> 189 | deprocess_image() |> 190 | display_image(margin = .1) 191 | } 192 | 193 | 194 | model <- application_xception(weights = "imagenet") # <1> 195 | preprocess_input <- application_preprocess_inputs(model) 196 | 197 | 198 | img_path <- get_file( 199 | fname = "elephant.jpg", # <1> 200 | origin = "https://img-datasets.s3.amazonaws.com/elephant.jpg" # <1> 201 | ) 202 | 203 | get_image_tensor <- function(img_path, target_size = NULL) { 204 | img_path |> 205 | image_load(target_size = target_size) |> # <2> 206 | image_to_array(dtype = "float32") |> # <3> 207 | op_expand_dims(1) |> # <4> 208 | preprocess_input() # <5> 209 | } 210 | 211 | image_tensor <- get_image_tensor(img_path, target_size = c(299, 299)) 212 | 213 | 214 | preds <- predict(model, image_tensor) 215 | application_decode_predictions(model, preds, top = 3) 216 | 217 | 218 | which.max(preds[1, ]) 219 | 220 | 221 | last_conv_layer_name <- "block14_sepconv2_act" 222 | classifier_layer_names <- c("avg_pool", "predictions") 223 | last_conv_layer <- model |> get_layer(last_conv_layer_name) 224 | last_conv_layer_model <- keras_model(model$inputs, 225 | last_conv_layer$output) 226 | 227 | 228 | classifier_input <- keras_input(batch_shape = last_conv_layer$output$shape) 229 | 230 | x <- classifier_input 231 | for (layer_name in classifier_layer_names) 232 | x <- get_layer(model, layer_name)(x) 233 | 234 | classifier_model <- keras_model(classifier_input, x) 235 | 236 | 237 | torch <- reticulate::import("jax") 238 | 239 | loss_fn <- function(last_conv_layer_output) { # <1> 240 | preds <- classifier_model(last_conv_layer_output) 241 | top_pred_index <- op_argmax(preds@r[1]) 242 | top_class_channel <- preds[, top_pred_index] 243 | top_class_channel@r[1] # <2> 244 | } 245 | grad_fn <- jax$grad(loss_fn) # <3> 246 | 247 | get_top_class_gradients <- function(image_tensor) { 248 | last_conv_layer_output <- last_conv_layer_model(image_tensor) 249 | grads <- grad_fn(last_conv_layer_output) # <4> 250 | list(grads, last_conv_layer_output) 251 | } 252 | 253 | 254 | .[grads, last_conv_layer_output] <- get_top_class_gradients(image_tensor) 255 | 256 | pooled_grads <- op_mean(grads, axis = c(1, 2, 3), keepdims = TRUE) # <1> 257 | output <- last_conv_layer_output * pooled_grads # <2> 258 | heatmap <- op_mean(output@r[1], axis = -1) # <3> 259 | 260 | 261 | plot_activations(heatmap) 262 | 263 | 264 | palette <- hcl.colors(256, palette = "Spectral", alpha = .4, rev = TRUE) 265 | heatmap <- as.array(heatmap) 266 | heatmap[] <- palette[cut(heatmap, 256)] 267 | heatmap <- as.raster(heatmap) 268 | 269 | img <- image_load(img_path) |> image_to_array() 270 | display_image(img) 271 | rasterImage(heatmap, 0, 0, ncol(img), nrow(img), interpolate = FALSE) 272 | 273 | 274 | 275 | -------------------------------------------------------------------------------- /chapter11_image-segmentation.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("XLA_PYTHON_CLIENT_PREALLOCATE"="false") 2 | Sys.setenv("XLA_PYTHON_CLIENT_MEM_FRACTION"="1") #.xx fraction 3 | 4 | library(reticulate) 5 | library(fs) 6 | library(dplyr, warn.conflicts = FALSE) 7 | library(tensorflow, exclude = c("shape", "set_random_seed")) 8 | library(tfdatasets, exclude = "shape") 9 | library(keras3) 10 | 11 | py_require("keras-hub==0.18.1") 12 | py_require("keras-hub") 13 | display_image <- function(x, ..., max = 255, margin = 0) { 14 | par(mar = rep(margin, 4)) 15 | 16 | x |> as.array() |> drop() |> 17 | as.raster(max = max) |> 18 | plot(..., interpolate = FALSE) 19 | } 20 | 21 | 22 | library(fs) 23 | data_dir <- path("pets_dataset") 24 | dir_create(data_dir) 25 | 26 | 27 | library(dplyr, warn.conflicts = FALSE) 28 | input_dir <- data_dir / "images" 29 | target_dir <- data_dir / "annotations/trimaps/" 30 | 31 | all_image_paths <- tibble( 32 | input = sort(dir_ls(input_dir, glob = "*.jpg")), 33 | target = sort(dir_ls(target_dir, glob = "*.png", all = FALSE)) # <1> 34 | ) 35 | 36 | 37 | glimpse(all_image_paths) 38 | 39 | 40 | display_image <- function(x, ..., max = 255, margin = 0) { 41 | par(mar = rep(margin, 4)) 42 | 43 | x |> as.array() |> drop() |> 44 | as.raster(max = max) |> 45 | plot(..., interpolate = FALSE) 46 | } 47 | 48 | 49 | all_image_paths$input[10] |> # <1> 50 | image_load() |> image_to_array() |> 51 | display_image() 52 | 53 | 54 | display_target <- function(target, ..., offset = TRUE) { 55 | if (offset) 56 | target <- target - 1L 57 | display_image(target, max = 2L, ...) 58 | } 59 | 60 | all_image_paths$target[10] |> 61 | image_load(color_mode = "grayscale") |> image_to_array() |> # <1> 62 | display_target() 63 | 64 | 65 | library(tfdatasets, exclude = "shape") 66 | 67 | img_size <- shape(200, 200) 68 | 69 | tf_image_load <- function(path, target_size = NULL, ...) { 70 | img <- path |> 71 | tf$io$read_file() |> 72 | tf$io$decode_image(..., expand_animations = FALSE) 73 | 74 | if (!is.null(target_size)) 75 | img <- img |> tf$image$resize(target_size) 76 | 77 | img 78 | } 79 | 80 | make_dataset <- function(image_paths) { 81 | stopifnot(is.data.frame(image_paths), 82 | names(image_paths) == c("input", "target")) 83 | 84 | image_paths |> 85 | tensor_slices_dataset() |> 86 | dataset_map(function(example_paths) { 87 | .[input_path = input, target_path = target] <- example_paths 88 | 89 | input_image <- input_path |> 90 | tf_image_load(channels = 3L, target_size = img_size) # <1> 91 | 92 | target <- target_path |> 93 | tf_image_load(channels = 1L, target_size = img_size) # <2> 94 | 95 | target <- tf$cast(target, "uint8") - 1L # <3> 96 | 97 | list(input_image, target) 98 | }) |> 99 | dataset_cache() |> # <4> 100 | dataset_shuffle(buffer_size = nrow(image_paths)) |> # <5> 101 | dataset_batch(32) 102 | } 103 | 104 | num_val_samples <- 1000 # <6> 105 | 106 | image_paths <- all_image_paths |> 107 | dplyr::mutate(use = ifelse(sample.int(n()) > num_val_samples, 108 | "train", "val")) |> # <7> 109 | tidyr::nest(.by = use) |> 110 | tibble::deframe() 111 | 112 | image_paths 113 | 114 | .[train_ds = train, val_ds = val] <- image_paths |> lapply(make_dataset) # <8> 115 | 116 | 117 | batch <- train_ds |> as_iterator() |> iter_next() 118 | str(batch) 119 | 120 | .[images, targets] <- batch 121 | par(mfrow = c(4, 8)) 122 | for (i in 1:16) { 123 | images@r[i] |> display_image() 124 | targets@r[i] |> display_target(offset = FALSE) # <1> 125 | } 126 | 127 | 128 | get_model <- function(img_size, num_classes) { 129 | 130 | conv <- function(..., padding = "same", activation = "relu") # <1> 131 | layer_conv_2d(..., padding = padding, activation = activation) 132 | 133 | conv_transpose <- function(..., padding = "same", activation = "relu") # <1> 134 | layer_conv_2d_transpose(..., padding = padding, activation = activation) 135 | 136 | input <- keras_input(shape = c(img_size, 3)) 137 | output <- input |> 138 | layer_rescaling(scale = 1/255) |> # <2> 139 | conv(64, 3, strides = 2) |> 140 | conv(64, 3) |> 141 | conv(128, 3, strides = 2) |> 142 | conv(128, 3) |> 143 | conv(256, 3, strides = 2) |> 144 | conv(256, 3) |> 145 | conv_transpose(256, 3) |> 146 | conv_transpose(256, 3, strides = 2) |> 147 | conv_transpose(128, 3) |> 148 | conv_transpose(128, 3, strides = 2) |> 149 | conv_transpose(64, 3) |> 150 | conv_transpose(64, 3, strides = 2) |> 151 | conv(num_classes, 3, activation = "softmax") # <3> 152 | 153 | keras_model(input, output) 154 | } 155 | 156 | model <- get_model(img_size = img_size, num_classes = 3) 157 | model 158 | 159 | 160 | foreground_iou <- metric_iou( 161 | num_classes = 3, # <1> 162 | target_class_ids = c(0), # <2> 163 | name = "foreground_iou", 164 | sparse_y_true = TRUE, # <3> 165 | sparse_y_pred = FALSE, # <4> 166 | ) 167 | 168 | 169 | model |> compile( 170 | optimizer = "adam", 171 | loss = "sparse_categorical_crossentropy", 172 | metrics = foreground_iou 173 | ) 174 | 175 | callbacks <- list( 176 | callback_model_checkpoint("oxford_segmentation.keras", 177 | save_best_only = TRUE) 178 | ) 179 | 180 | 181 | history <- readRDS("ch11-history.rds") 182 | 183 | 184 | plot(history, metrics = "loss") 185 | 186 | 187 | model <- load_model("oxford_segmentation.keras") 188 | 189 | 190 | for (i in 1:10) { 191 | 192 | test_image <- image_paths$val$input[i] |> 193 | tf_image_load(channels = 3L, target_size = img_size) 194 | 195 | test_mask <- image_paths$val$target[i] |> 196 | tf_image_load(channels = 1L, target_size = img_size) |> 197 | tf$subtract(1) 198 | 199 | predicted_mask_probs <- model(test_image@r[newaxis]) 200 | predicted_mask <- op_argmax(predicted_mask_probs, axis = -1) - 1 201 | 202 | par(mfrow = c(1, 3)) 203 | display_image(test_image); title("image") 204 | display_target(predicted_mask, offset = FALSE) 205 | display_target(test_mask, offset = FALSE) 206 | } 207 | 208 | 209 | library(reticulate) 210 | py_require("keras-hub==0.18.1") # <1> 211 | 212 | keras_hub <- import("keras_hub") # <2> 213 | 214 | 215 | model <- keras_hub$models$ImageSegmenter$from_preset("sam_huge_sa1b") 216 | 217 | 218 | count_params(model) 219 | 220 | 221 | path <- get_file( 222 | origin = "https://s3.amazonaws.com/keras.io/img/book/fruits.jpg" # <1> 223 | ) 224 | pil_image <- image_load(path) # <2> 225 | image_array <- image_to_array(pil_image, dtype = "float32") # <3> 226 | display_image(image_array) # <4> 227 | 228 | 229 | image_size <- c(1024, 1024) 230 | 231 | resize_and_pad <- function(x) { 232 | op_image_resize(x, image_size, pad_to_aspect_ratio = TRUE) 233 | } 234 | 235 | image <- resize_and_pad(image_array) 236 | op_shape(image) 237 | 238 | 239 | display_points <- function(coords, color = "white") { 240 | stopifnot(is.matrix(coords), ncol(coords) == 2) 241 | coords[, 2] <- image_size[1] - coords[, 2] # <1> 242 | points(coords, col = color, pch = 8, cex = 2, lwd = 2) 243 | } 244 | 245 | display_mask <- function(mask, index = 1, color = "dodgerblue", alpha = 0.6) { 246 | .[r, g, b] <- col2rgb(color) 247 | color <- rgb(r, g, b, alpha * 255, maxColorValue = 255) 248 | 249 | mask <- mask |> as.array() |> drop() |> _[index, , ] 250 | mask[] <- ifelse(mask > 1, color, rgb(0, 0, 0, 0)) 251 | 252 | .[h, w] <- image_size 253 | rasterImage(mask, 0, 0, h, w, interpolate = FALSE) 254 | } 255 | 256 | display_box <- function(box, ..., color = "red", lwd = 2) { 257 | stopifnot(is.matrix(box), dim(box) == c(2, 2)) 258 | box[, 2] <- image_size[1] - box[, 2] # <1> 259 | rect(xleft = box[1, 1], ytop = box[1, 2], 260 | xright = box[2, 1], ybottom = box[2, 2], 261 | ..., border = color, lwd = 2) 262 | } 263 | 264 | 265 | input_point <- rbind(c(580, 480)) # <1> 266 | input_label <- 1 # <2> 267 | 268 | display_image(image) 269 | display_points(input_point) 270 | 271 | 272 | str(model$input) # <1> 273 | 274 | 275 | np <- import("numpy", convert = FALSE) # <1> 276 | 277 | image |> 278 | np_array("float32") |> 279 | np$expand_dims(0L) |> # <2> 280 | str() 281 | 282 | 283 | outputs <- model |> predict(list( 284 | images = image |> np_array("float32") |> np$expand_dims(0L), 285 | points = input_point |> np_array("float32") |> np$expand_dims(0L), 286 | labels = input_label |> np_array("float32") |> np$expand_dims(0L) 287 | )) 288 | 289 | str(outputs) 290 | 291 | 292 | display_image(image) 293 | display_mask(outputs$masks) 294 | display_points(input_point) 295 | 296 | 297 | input_label <- 1 298 | input_point <- rbind(c(300, 550)) 299 | 300 | outputs <- model |> predict(list( 301 | images = image |> np_array("float32") |> np$expand_dims(0L), 302 | points = input_point |> np_array("float32") |> np$expand_dims(0L), 303 | labels = input_label |> np_array("float32") |> np$expand_dims(0L) 304 | )) 305 | 306 | 307 | display_image(image) 308 | display_mask(outputs$masks) 309 | display_points(input_point) 310 | 311 | 312 | par(mfrow = c(1, 3)) 313 | for (i in 2:4) { 314 | display_image(image) 315 | display_mask(outputs$masks, index = i) 316 | display_points(input_point) 317 | } 318 | 319 | 320 | input_box <- rbind(c(520, 180), # <1> 321 | c(770, 420)) # <2> 322 | 323 | display_image(image) 324 | display_box(input_box) 325 | 326 | 327 | outputs <- model |> predict(list( 328 | images = image |> np_array("float32") |> np$expand_dims(0L), 329 | boxes = input_box |> np_array("float32") |> np$expand_dims(c(0L, 1L)) # <1> 330 | )) 331 | 332 | display_image(image) 333 | display_box(input_box) 334 | display_mask(outputs$masks) 335 | 336 | 337 | 338 | -------------------------------------------------------------------------------- /chapter04_classification-and-regression.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | 3 | 4 | .[.[train_data, train_labels], .[test_data, test_labels]] <- 5 | dataset_imdb(num_words = 10000) 6 | 7 | 8 | str(train_data) 9 | 10 | 11 | str(train_labels) 12 | 13 | max(sapply(train_data, max)) 14 | 15 | 16 | word_index <- dataset_imdb_word_index() # <1> 17 | 18 | reverse_word_index <- names(word_index) # <2> 19 | names(reverse_word_index) <- as.character(word_index) # <2> 20 | 21 | decoded_words <- train_data[[1]] |> 22 | sapply(function(i) { 23 | if (i > 3) reverse_word_index[[as.character(i - 3)]] # <3> 24 | else "?" 25 | }) 26 | decoded_review <- paste0(decoded_words, collapse = " ") 27 | 28 | 29 | decoded_review |> substr(1, 200) |> strwrap(70) |> cat(sep = "\n") 30 | 31 | 32 | multi_hot_encode <- function(sequences, num_classes) { 33 | results <- matrix(0, nrow = length(sequences), ncol = num_classes) # <1> 34 | for (i in seq_along(sequences)) { 35 | results[i, sequences[[i]]] <- 1 # <2> 36 | } 37 | results 38 | } 39 | x_train <- multi_hot_encode(train_data, num_classes = 10000) # <3> 40 | x_test <- multi_hot_encode(test_data, num_classes = 10000) # <4> 41 | 42 | 43 | x_train[1, ] |> str() 44 | 45 | 46 | str(x_train) 47 | 48 | y_train <- as.numeric(train_labels) 49 | y_test <- as.numeric(test_labels) 50 | 51 | 52 | model <- keras_model_sequential() |> 53 | layer_dense(16, activation = "relu") |> 54 | layer_dense(16, activation = "relu") |> 55 | layer_dense(1, activation = "sigmoid") 56 | 57 | 58 | sigmoid <- function(x) 1 / (1 + exp(-1 * x)) 59 | withr::with_par(list(pty = "s"), { 60 | plot(sigmoid, -4, 4, 61 | main = "sigmoid", 62 | ylim = c(-1, 2), 63 | ylab = ~ sigmoid(x), xlab = ~ x, 64 | panel.first = grid()) 65 | }) 66 | 67 | 68 | relu <- function(x) pmax(0, x) 69 | withr::with_par(list(pty = "s"), { 70 | plot(relu, -4, 4, 71 | main = "relu", 72 | ylim = c(-1, 2), 73 | ylab = ~ relu(x), xlab = ~ x, 74 | panel.first = grid()) 75 | }) 76 | 77 | 78 | model |> compile( 79 | optimizer = "adam", 80 | loss = "binary_crossentropy", 81 | metrics = c("accuracy") 82 | ) 83 | 84 | 85 | val_indices <- 1:1000 86 | 87 | x_val <- x_train[val_indices, ] 88 | partial_x_train <- x_train[-val_indices, ] 89 | 90 | y_val <- y_train[val_indices] 91 | partial_y_train <- y_train[-val_indices] 92 | 93 | 94 | history <- model |> fit( 95 | partial_x_train, partial_y_train, 96 | epochs = 20, 97 | batch_size = 512, 98 | validation_data = list(x_val, y_val) 99 | ) 100 | 101 | 102 | history <- model |> fit( 103 | x_train, y_train, 104 | epochs = 20, 105 | batch_size = 512, 106 | validation_split = 0.2 107 | ) 108 | 109 | 110 | str(history$metrics) 111 | 112 | 113 | library(ggplot2) 114 | plot(history) + ggtitle("[IMDB] Training history") 115 | 116 | 117 | model <- keras_model_sequential() |> 118 | layer_dense(16, activation = "relu") |> 119 | layer_dense(16, activation = "relu") |> 120 | layer_dense(1, activation = "sigmoid") 121 | 122 | model |> compile( 123 | optimizer = "adam", 124 | loss = "binary_crossentropy", 125 | metrics = "accuracy" 126 | ) 127 | 128 | model |> fit(x_train, y_train, epochs = 4, batch_size = 512) 129 | 130 | results <- model |> evaluate(x_test, y_test) 131 | 132 | 133 | results 134 | 135 | 136 | preds <- model |> predict(x_test) 137 | str(preds) 138 | 139 | 140 | .[.[train_data, train_labels], .[test_data, test_labels]] <- 141 | dataset_reuters(num_words = 10000) 142 | 143 | 144 | str(train_data) 145 | 146 | 147 | str(test_data) 148 | 149 | 150 | train_data[[11]] |> str() 151 | 152 | 153 | word_index <- dataset_reuters_word_index() 154 | reverse_word_index <- setNames(object = names(word_index), 155 | nm = unlist(word_index) - 3L) # <1> 156 | decoded_newswire <- train_data[[1]] |> 157 | sapply(function(i) { 158 | if (i > 3) reverse_word_index[[as.character(i)]] 159 | else "?" 160 | }) |> 161 | paste0(collapse = " ") 162 | decoded_newswire 163 | 164 | 165 | train_labels[[11]] 166 | 167 | 168 | x_train <- multi_hot_encode(train_data, num_classes = 10000) # <1> 169 | x_test <- multi_hot_encode(test_data, num_classes = 10000) # <2> 170 | 171 | 172 | one_hot_encode <- function(labels, num_classes = 46) { 173 | results <- matrix(0, nrow = length(labels), ncol = num_classes) 174 | for (i in seq_along(labels)) { 175 | label_position <- labels[[i]] + 1 # <1> 176 | results[i, label_position] <- 1 177 | } 178 | results 179 | } 180 | 181 | y_train <- one_hot_encode(train_labels) # <2> 182 | y_test <- one_hot_encode(test_labels) # <3> 183 | 184 | 185 | y_train <- to_categorical(train_labels) 186 | y_test <- to_categorical(test_labels) 187 | 188 | 189 | model <- keras_model_sequential() |> 190 | layer_dense(64, activation = "relu") |> 191 | layer_dense(64, activation = "relu") |> 192 | layer_dense(46, activation = "softmax") 193 | 194 | 195 | model |> compile( 196 | optimizer = "adam", 197 | loss = "categorical_crossentropy", 198 | metrics = c("accuracy", metric_top_k_categorical_accuracy(k = 3)) 199 | ) 200 | 201 | 202 | val_indices <- 1:1000 203 | 204 | x_val <- x_train[val_indices,] 205 | partial_x_train <- x_train[-val_indices,] 206 | 207 | y_val <- y_train[val_indices,] 208 | partial_y_train <- y_train[-val_indices,] 209 | 210 | 211 | history <- model |> fit( 212 | partial_x_train, partial_y_train, 213 | epochs = 20, 214 | batch_size = 512, 215 | validation_data = list(x_val, y_val) 216 | ) 217 | 218 | 219 | plot(history) + ggtitle("Training and validation metrics") 220 | 221 | 222 | model <- keras_model_sequential() |> 223 | layer_dense(64, activation = "relu") |> 224 | layer_dense(64, activation = "relu") |> 225 | layer_dense(46, activation = "softmax") 226 | 227 | model |> compile( 228 | optimizer = "adam", 229 | loss = "categorical_crossentropy", 230 | metrics = "accuracy" 231 | ) 232 | 233 | model |> fit(x_train, y_train, epochs = 9, batch_size = 512) 234 | 235 | results <- model |> evaluate(x_test, y_test) 236 | 237 | 238 | results 239 | 240 | 241 | mean(test_labels == sample(test_labels)) 242 | 243 | 244 | predictions <- model |> predict(x_test) 245 | 246 | 247 | str(predictions) 248 | 249 | 250 | sum(predictions[1, ]) 251 | 252 | 253 | which.max(predictions[1,]) 254 | 255 | 256 | y_train <- train_labels 257 | y_test <- test_labels 258 | 259 | 260 | model |> compile( 261 | optimizer = "adam", 262 | loss = "sparse_categorical_crossentropy", 263 | metrics = "accuracy" 264 | ) 265 | 266 | 267 | model <- keras_model_sequential() |> 268 | layer_dense(64, activation = "relu") |> 269 | layer_dense(4, activation = "relu") |> 270 | layer_dense(46, activation = "softmax") 271 | 272 | model |> compile( 273 | optimizer = "adam", 274 | loss = "categorical_crossentropy", 275 | metrics = "accuracy" 276 | ) 277 | model |> fit( 278 | partial_x_train, partial_y_train, 279 | epochs = 20, 280 | batch_size = 128, 281 | validation_data = list(x_val, y_val) 282 | ) 283 | 284 | 285 | .[.[train_data, train_targets], .[test_data, test_targets]] <- 286 | dataset_california_housing(version = "small") # <1> 287 | 288 | 289 | str(train_data) 290 | 291 | 292 | str(test_data) 293 | 294 | 295 | train_mean <- apply(train_data, 2, mean) 296 | train_sd <- apply(train_data, 2, sd) 297 | x_train <- scale(train_data, center = train_mean, scale = train_sd) 298 | x_test <- scale(test_data, center = train_mean, scale = train_sd) 299 | 300 | 301 | y_train <- train_targets / 100000 302 | y_test <- test_targets / 100000 303 | 304 | 305 | get_model <- function() { # <1> 306 | model <- keras_model_sequential() |> 307 | layer_dense(64, activation = "relu") |> 308 | layer_dense(64, activation = "relu") |> 309 | layer_dense(1) 310 | model |> compile( 311 | optimizer = "adam", 312 | loss = "mean_squared_error", 313 | metrics = "mean_absolute_error" 314 | ) 315 | model 316 | } 317 | 318 | 319 | k <- 4 320 | fold_id <- sample(rep(1:k, length.out = nrow(train_data))) 321 | num_epochs <- 50 322 | all_scores <- numeric(k) 323 | 324 | for (i in 1:k) { 325 | cat(sprintf("Processing fold #%i\n", i)) 326 | 327 | fold_val_indices <- which(fold_id == i) 328 | fold_x_val <- x_train[fold_val_indices, ] # <1> 329 | fold_y_val <- y_train[fold_val_indices] # <1> 330 | fold_x_train <- x_train[-fold_val_indices, ] # <2> 331 | fold_y_train <- y_train[-fold_val_indices] # <2> 332 | 333 | model <- get_model() # <3> 334 | model |> fit( # <4> 335 | fold_x_train, fold_y_train, 336 | epochs = num_epochs, batch_size = 16, verbose = 0 337 | ) 338 | results <- model |> evaluate(fold_x_val, fold_y_val, verbose = 0) # <5> 339 | all_scores[i] <- results$mean_absolute_error 340 | } 341 | 342 | 343 | round(all_scores, 3) 344 | 345 | mean(all_scores) 346 | 347 | 348 | k <- 4 349 | num_epochs <- 200 350 | all_mae_histories <- list() 351 | 352 | for (i in 1:k) { 353 | cat(sprintf("Processing fold #%i\n", i)) 354 | 355 | fold_val_indices <- which(fold_id == i) # <1> 356 | fold_x_val <- x_train[fold_val_indices, ] # <1> 357 | fold_y_val <- y_train[fold_val_indices] # <1> 358 | fold_x_train <- x_train[-fold_val_indices, ] # <2> 359 | fold_y_train <- y_train[-fold_val_indices] # <2> 360 | 361 | model <- get_model() # <3> 362 | history <- model |> fit( # <4> 363 | fold_x_train, fold_y_train, 364 | validation_data = list(fold_x_val, fold_y_val), 365 | epochs = num_epochs, batch_size = 16, verbose = 0 366 | ) 367 | mae_history <- history$metrics$val_mean_absolute_error 368 | all_mae_histories[[i]] <- mae_history 369 | } 370 | 371 | all_mae_histories <- do.call(cbind, all_mae_histories) 372 | 373 | 374 | average_mae_history <- rowMeans(all_mae_histories) 375 | 376 | 377 | plot(average_mae_history, ylab = "Validation MAE", xlab = "Epoch", type = 'l') 378 | 379 | 380 | truncated_mae_history <- average_mae_history[-(1:10)] 381 | plot(average_mae_history, xlab = "epoch", type = 'l', 382 | ylim = range(truncated_mae_history)) 383 | 384 | 385 | model <- get_model() # <1> 386 | model |> fit(x_train, y_train, # <2> 387 | epochs = 130, batch_size = 16, verbose = 0) 388 | .[test_mean_squared_error, test_mean_absolute_error] <- 389 | model |> evaluate(x_test, y_test) 390 | 391 | 392 | test_mean_absolute_error 393 | 394 | 395 | predictions <- model |> predict(test_data) 396 | predictions[1, ] 397 | 398 | -------------------------------------------------------------------------------- /2e/ch14.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include=FALSE------------------------------------------------- 2 | library(tensorflow) 3 | library(keras3) 4 | tensorflow::as_tensor(1) 5 | 6 | 7 | ## ---- include = FALSE----------------------------------------------------- 8 | ## Define objects so all the visible code is runnable 9 | num_classes <- 10 10 | num_values <- 4 11 | num_inputs_features <- 20 12 | num_timesteps <- 20 13 | height <- width <- 200 14 | channels <- 3 15 | num_features <- 20 16 | sequence_length <- 200 17 | embed_dim <- 256 18 | dense_dim <- 32 19 | num_heads <- 4 20 | vocab_size <- 15000 21 | 22 | layer_transformer_encoder <- new_layer_class( 23 | classname = "TransformerEncoder", 24 | initialize = function(embed_dim, dense_dim, num_heads, ...) { 25 | super$initialize(...) 26 | self$embed_dim <- embed_dim 27 | self$dense_dim <- dense_dim 28 | self$num_heads <- num_heads 29 | self$attention <- 30 | layer_multi_head_attention(num_heads = num_heads, 31 | key_dim = embed_dim) 32 | 33 | self$dense_proj <- keras_model_sequential() %>% 34 | layer_dense(dense_dim, activation = "relu") %>% 35 | layer_dense(embed_dim) 36 | 37 | self$layernorm_1 <- layer_layer_normalization() 38 | self$layernorm_2 <- layer_layer_normalization() 39 | }, 40 | 41 | call = function(inputs, mask = NULL) { 42 | if (!is.null(mask)) 43 | mask <- mask[, tf$newaxis, ] 44 | 45 | inputs %>% 46 | { self$attention(., ., attention_mask = mask) + . } %>% 47 | self$layernorm_1() %>% 48 | { self$dense_proj(.) + . } %>% 49 | self$layernorm_2() 50 | }, 51 | 52 | get_config = function() { 53 | config <- super$get_config() 54 | for(name in c("embed_dim", "num_heads", "dense_dim")) 55 | config[[name]] <- self[[name]] 56 | config 57 | } 58 | ) 59 | 60 | layer_transformer_decoder <- new_layer_class( 61 | classname = "TransformerDecoder", 62 | 63 | initialize = function(embed_dim, dense_dim, num_heads, ...) { 64 | super$initialize(...) 65 | self$embed_dim <- embed_dim 66 | self$dense_dim <- dense_dim 67 | self$num_heads <- num_heads 68 | self$attention_1 <- layer_multi_head_attention(num_heads = num_heads, 69 | key_dim = embed_dim) 70 | self$attention_2 <- layer_multi_head_attention(num_heads = num_heads, 71 | key_dim = embed_dim) 72 | self$dense_proj <- keras_model_sequential() %>% 73 | layer_dense(dense_dim, activation = "relu") %>% 74 | layer_dense(embed_dim) 75 | 76 | self$layernorm_1 <- layer_layer_normalization() 77 | self$layernorm_2 <- layer_layer_normalization() 78 | self$layernorm_3 <- layer_layer_normalization() 79 | self$supports_masking <- TRUE 80 | }, 81 | 82 | get_config = function() { 83 | config <- super$get_config() 84 | for (name in c("embed_dim", "num_heads", "dense_dim")) 85 | config[[name]] <- self[[name]] 86 | config 87 | }, 88 | 89 | get_causal_attention_mask = function(inputs) { 90 | c(batch_size, sequence_length, encoding_length) %<-% 91 | tf$unstack(tf$shape(inputs)) 92 | 93 | x <- tf$range(sequence_length) 94 | i <- x[, tf$newaxis] 95 | j <- x[tf$newaxis, ] 96 | mask <- tf$cast(i >= j, "int32") 97 | 98 | tf$tile(mask[tf$newaxis, , ], 99 | tf$stack(c(batch_size, 1L, 1L))) 100 | }, 101 | 102 | call = function(inputs, encoder_outputs, mask = NULL) { 103 | 104 | causal_mask <- self$get_causal_attention_mask(inputs) 105 | 106 | if (is.null(mask)) 107 | mask <- causal_mask 108 | else 109 | mask %<>% { tf$minimum(tf$cast(.[, tf$newaxis, ], "int32"), 110 | causal_mask) } 111 | 112 | inputs %>% 113 | { self$attention_1(query = ., value = ., key = ., 114 | attention_mask = causal_mask) + . } %>% 115 | self$layernorm_1() %>% 116 | 117 | { self$attention_2(query = ., 118 | value = encoder_outputs, 119 | key = encoder_outputs, 120 | attention_mask = mask) + . } %>% 121 | self$layernorm_2() %>% 122 | 123 | { self$dense_proj(.) + . } %>% 124 | self$layernorm_3() 125 | 126 | } 127 | ) 128 | 129 | layer_positional_embedding <- new_layer_class( 130 | classname = "PositionalEmbedding", 131 | 132 | initialize = function(sequence_length, input_dim, output_dim, ...) { 133 | super$initialize(...) 134 | self$token_embeddings <- 135 | layer_embedding(input_dim = input_dim, 136 | output_dim = output_dim) 137 | self$position_embeddings <- 138 | layer_embedding(input_dim = sequence_length, 139 | output_dim = output_dim) 140 | self$sequence_length <- sequence_length 141 | self$input_dim <- input_dim 142 | self$output_dim <- output_dim 143 | }, 144 | 145 | call = function(inputs) { 146 | length <- tf$shape(inputs)[-1] 147 | positions <- tf$range(start = 0L, limit = length, delta = 1L) 148 | embedded_tokens <- self$token_embeddings(inputs) 149 | embedded_positions <- self$position_embeddings(positions) 150 | embedded_tokens + embedded_positions 151 | }, 152 | 153 | compute_mask = function(inputs, mask = NULL) { 154 | inputs != 0 155 | }, 156 | 157 | get_config = function() { 158 | config <- super$get_config() 159 | for(name in c("output_dim", "sequence_length", "input_dim")) 160 | config[[name]] <- self[[name]] 161 | config 162 | } 163 | ) 164 | 165 | 166 | ## ------------------------------------------------------------------------- 167 | inputs <- layer_input(shape = c(num_inputs_features)) 168 | outputs <- inputs %>% 169 | layer_dense(32, activation = "relu") %>% 170 | layer_dense(32, activation = "relu") %>% 171 | layer_dense(1, activation = "sigmoid") 172 | model <- keras_model(inputs, outputs) 173 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy") 174 | 175 | 176 | ## ------------------------------------------------------------------------- 177 | inputs <- layer_input(shape = c(num_inputs_features)) 178 | outputs <- inputs %>% 179 | layer_dense(32, activation = "relu") %>% 180 | layer_dense(32, activation = "relu") %>% 181 | layer_dense(num_classes, activation = "sigmoid") 182 | model <- keras_model(inputs, outputs) 183 | model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy") 184 | 185 | 186 | ## ------------------------------------------------------------------------- 187 | inputs <- layer_input(shape = c(num_inputs_features)) 188 | outputs <- inputs %>% 189 | layer_dense(32, activation = "relu") %>% 190 | layer_dense(32, activation = "relu") %>% 191 | layer_dense(num_classes, activation = "sigmoid") 192 | model <- keras_model(inputs, outputs) 193 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy") 194 | 195 | 196 | ## ------------------------------------------------------------------------- 197 | inputs <- layer_input(shape = c(num_inputs_features)) 198 | outputs <- inputs %>% 199 | layer_dense(32, activation = "relu") %>% 200 | layer_dense(32, activation = "relu") %>% 201 | layer_dense(num_values) 202 | model <- keras_model(inputs, outputs) 203 | model %>% compile(optimizer = "rmsprop", loss = "mse") 204 | 205 | 206 | ## ------------------------------------------------------------------------- 207 | inputs <- layer_input(shape = c(height, width, channels)) 208 | outputs <- inputs %>% 209 | layer_separable_conv_2d(32, 3, activation = "relu") %>% 210 | layer_separable_conv_2d(64, 3, activation = "relu") %>% 211 | layer_max_pooling_2d(2) %>% 212 | layer_separable_conv_2d(64, 3, activation = "relu") %>% 213 | layer_separable_conv_2d(128, 3, activation = "relu") %>% 214 | layer_max_pooling_2d(2) %>% 215 | layer_separable_conv_2d(64, 3, activation = "relu") %>% 216 | layer_separable_conv_2d(128, 3, activation = "relu") %>% 217 | layer_global_average_pooling_2d() %>% 218 | layer_dense(32, activation = "relu") %>% 219 | layer_dense(num_classes, activation = "softmax") 220 | model <- keras_model(inputs, outputs) 221 | model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy") 222 | 223 | 224 | ## ------------------------------------------------------------------------- 225 | inputs <- layer_input(shape = c(num_timesteps, num_features)) 226 | outputs <- inputs %>% 227 | layer_lstm(32) %>% 228 | layer_dense(num_classes, activation = "sigmoid") 229 | model <- keras_model(inputs, outputs) 230 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy") 231 | 232 | 233 | ## ------------------------------------------------------------------------- 234 | inputs <- layer_input(shape = c(num_timesteps, num_features)) 235 | outputs <- inputs %>% 236 | layer_lstm(32, return_sequences = TRUE) %>% 237 | layer_lstm(32, return_sequences = TRUE) %>% 238 | layer_lstm(32) %>% 239 | layer_dense(num_classes, activation = "sigmoid") 240 | model <- keras_model(inputs, outputs) 241 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy") 242 | 243 | 244 | ## ------------------------------------------------------------------------- 245 | encoder_inputs <- layer_input(shape = c(sequence_length), dtype = "int64") 246 | encoder_outputs <- encoder_inputs %>% 247 | layer_positional_embedding(sequence_length, vocab_size, embed_dim) %>% 248 | layer_transformer_encoder(embed_dim, dense_dim, num_heads) 249 | 250 | decoder <- layer_transformer_decoder(NULL, embed_dim, dense_dim, num_heads) 251 | decoder_inputs <- layer_input(shape = c(NA), dtype = "int64") 252 | decoder_outputs <- decoder_inputs %>% 253 | layer_positional_embedding(sequence_length, vocab_size, embed_dim) %>% 254 | decoder(., encoder_outputs) %>% 255 | layer_dense(vocab_size, activation = "softmax") 256 | 257 | transformer <- keras_model(list(encoder_inputs, decoder_inputs), 258 | decoder_outputs) 259 | transformer %>% 260 | compile(optimizer = "rmsprop", loss = "categorical_crossentropy") 261 | 262 | 263 | ## ------------------------------------------------------------------------- 264 | inputs <- layer_input(shape=c(sequence_length), dtype="int64") 265 | outputs <- inputs %>% 266 | layer_positional_embedding(sequence_length, vocab_size, embed_dim) %>% 267 | layer_transformer_encoder(embed_dim, dense_dim, num_heads) %>% 268 | layer_global_max_pooling_1d() %>% 269 | layer_dense(1, activation = "sigmoid") 270 | model <- keras_model(inputs, outputs) 271 | model %>% compile(optimizer="rmsprop", loss="binary_crossentropy") 272 | 273 | 274 | 275 | ## ---- eval = FALSE-------------------------------------------------------- 276 | ## # Instance 277 | ## l <- as.list(obj) 278 | ## #---- 279 | ## l_sum <- 0 280 | ## l_entries <- 0 281 | ## for (e in l) { 282 | ## if (!is.null(e)) { 283 | ## l_sum <- l_sum + e 284 | ## l_entries <- l_entries + 1 285 | ## } 286 | ## } 287 | ## avg <- l_sum / l_entries 288 | ## #----- 289 | ## print('avg:', avg) 290 | ## 291 | ## 292 | ## # Instance 293 | ## my_list <- get_data() 294 | ## ---- 295 | ## total <- 0 296 | ## num_elems <- 0 297 | ## for (n in my_list) { 298 | ## if (!is.null(n)) { 299 | ## total <- total + e 300 | ## num_elems <- num_elems + 1 301 | ## } 302 | ## } 303 | ## mean <- total / num_elems 304 | ## ---- 305 | ## update_mean(mean) 306 | ## 307 | ## 308 | ## # Shared abstraction 309 | ## compute_mean <- function(x) { 310 | ## num_elems <- total <- 0 311 | ## for (e in x) 312 | ## if (!is.null(e)) { 313 | ## total <- total + e 314 | ## num_elems <- num_elems + 1 315 | ## } 316 | ## total / num_elems 317 | ## } 318 | -------------------------------------------------------------------------------- /chapter05_fundamentals-of-ml.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | 3 | 4 | library(keras3) 5 | 6 | .[.[train_images, train_labels], .] <- dataset_mnist() 7 | train_images <- array_reshape(train_images / 255, c(60000, 28 * 28)) 8 | 9 | runif_array <- \(dim) array(runif(prod(dim)), dim) 10 | 11 | noise_channels <- runif_array(dim(train_images)) 12 | train_images_with_noise_channels <- cbind(train_images, noise_channels) 13 | 14 | zeros_channels <- array(0, dim(train_images)) 15 | train_images_with_zeros_channels <- cbind(train_images, zeros_channels) 16 | 17 | 18 | get_model <- function() { 19 | model <- keras_model_sequential() |> 20 | layer_dense(512, activation = "relu") |> 21 | layer_dense(10, activation = "softmax") 22 | 23 | model |> compile( 24 | optimizer = "adam", 25 | loss = "sparse_categorical_crossentropy", 26 | metrics = c("accuracy") 27 | ) 28 | 29 | model 30 | } 31 | 32 | model <- get_model() 33 | history_noise <- model |> fit( 34 | train_images_with_noise_channels, train_labels, 35 | epochs = 10, 36 | batch_size = 128, 37 | validation_split = 0.2 38 | ) 39 | 40 | model <- get_model() 41 | history_zeros <- model |> fit( 42 | train_images_with_zeros_channels, train_labels, 43 | epochs = 10, 44 | batch_size = 128, 45 | validation_split = 0.2 46 | ) 47 | 48 | 49 | plot(NULL, 50 | main = "Effect of Noise Channels on Validation Accuracy", 51 | xlab = "Epochs", xlim = c(1, history_noise$params$epochs), 52 | ylab = "Validation Accuracy", ylim = c(0.9, 1), las = 1) 53 | lines(history_zeros$metrics$val_accuracy, lty = 1, type = "o") 54 | lines(history_noise$metrics$val_accuracy, lty = 2, type = "o") 55 | legend("bottomright", lty = 1:2, 56 | legend = c("Validation accuracy with zeros channels", 57 | "Validation accuracy with noise channels")) 58 | 59 | 60 | .[.[train_images, train_labels], .] <- dataset_mnist() 61 | train_images <- array_reshape(train_images / 255, 62 | c(60000, 28 * 28)) 63 | 64 | random_train_labels <- sample(train_labels) # <1> 65 | 66 | model <- keras_model_sequential() |> 67 | layer_dense(512, activation = "relu") |> 68 | layer_dense(10, activation = "softmax") 69 | 70 | model |> compile(optimizer = "rmsprop", 71 | loss = "sparse_categorical_crossentropy", 72 | metrics = "accuracy") 73 | 74 | history <- model |> fit( 75 | train_images, random_train_labels, 76 | epochs = 100, batch_size = 128, 77 | validation_split = 0.2 78 | ) 79 | 80 | 81 | .[.[train_images, train_labels], .] <- dataset_mnist() 82 | train_images <- array_reshape(train_images / 255, 83 | c(60000, 28 * 28)) 84 | 85 | model <- keras_model_sequential() |> 86 | layer_dense(512, activation = "relu") |> 87 | layer_dense(10, activation = "softmax") 88 | 89 | model |> compile( 90 | optimizer = optimizer_rmsprop(1), 91 | loss = "sparse_categorical_crossentropy", 92 | metrics = "accuracy" 93 | ) 94 | 95 | history <- model |> fit( 96 | train_images, train_labels, 97 | epochs = 10, batch_size = 128, 98 | validation_split = 0.2 99 | ) 100 | 101 | 102 | model <- keras_model_sequential() |> 103 | layer_dense(512, activation = "relu") |> 104 | layer_dense(10, activation = "softmax") 105 | 106 | model |> compile( 107 | optimizer = optimizer_rmsprop(1e-2), 108 | loss = "sparse_categorical_crossentropy", 109 | metrics = "accuracy" 110 | ) 111 | 112 | history <- model |> fit( 113 | train_images, train_labels, 114 | epochs = 10, batch_size = 128, 115 | validation_split = 0.2 116 | ) 117 | 118 | 119 | model <- keras_model_sequential() |> 120 | layer_dense(10, activation = "softmax") 121 | 122 | model |> compile( 123 | optimizer = "rmsprop", 124 | loss = "sparse_categorical_crossentropy", 125 | metrics = "accuracy" 126 | ) 127 | 128 | history_small_model <- model |> fit( 129 | train_images, train_labels, 130 | epochs = 20, 131 | batch_size = 128, 132 | validation_split = 0.2 133 | ) 134 | 135 | 136 | plot(history_small_model$metrics$val_loss, type = 'o', 137 | main = "Effect of Insufficient Model Capacity on Validation Loss", 138 | xlab = "Epochs", ylab = "Validation Loss") 139 | 140 | 141 | model <- keras_model_sequential() |> 142 | layer_dense(128, activation="relu") |> 143 | layer_dense(128, activation="relu") |> 144 | layer_dense(10, activation="softmax") 145 | 146 | model |> compile( 147 | optimizer="rmsprop", 148 | loss="sparse_categorical_crossentropy", 149 | metrics="accuracy" 150 | ) 151 | 152 | history_large_model <- model |> fit( 153 | train_images, train_labels, 154 | epochs = 20, 155 | batch_size = 128, 156 | validation_split = 0.2 157 | ) 158 | 159 | 160 | plot(history_large_model$metrics$val_loss, type = 'o', 161 | main = "Validation Loss for a Model with Appropriate Capacity", 162 | xlab = "Epochs", ylab = "Validation Loss") 163 | 164 | 165 | model <- keras_model_sequential() |> 166 | layer_dense(2048, activation = "relu") |> 167 | layer_dense(2048, activation = "relu") |> 168 | layer_dense(2048, activation = "relu") |> 169 | layer_dense(10, activation = "softmax") 170 | 171 | model |> compile( 172 | optimizer = "rmsprop", 173 | loss = "sparse_categorical_crossentropy", 174 | metrics = "accuracy" 175 | ) 176 | 177 | history_very_large_model <- model |> fit( 178 | train_images, train_labels, 179 | epochs = 20, 180 | batch_size = 32, # <1> 181 | validation_split = 0.2 182 | ) 183 | 184 | 185 | plot(history_very_large_model$metrics$val_loss, type = 'o', 186 | main = "Validation Loss for a Model with Too Much Capacity", 187 | xlab = "Epochs", ylab = "Validation Loss") 188 | 189 | 190 | .[.[train_data, train_labels], .] <- dataset_imdb(num_words = 10000) 191 | 192 | vectorize_sequences <- function(sequences, dimension = 10000) { 193 | results <- matrix(0, nrow = length(sequences), ncol = dimension) 194 | for (i in seq_along(sequences)) 195 | results[i, sequences[[i]]] <- 1 196 | results 197 | } 198 | 199 | train_data <- vectorize_sequences(train_data) 200 | 201 | model <- keras_model_sequential() |> 202 | layer_dense(16, activation="relu") |> 203 | layer_dense(16, activation="relu") |> 204 | layer_dense(1, activation="sigmoid") 205 | 206 | model |> compile( 207 | optimizer = "rmsprop", 208 | loss = "binary_crossentropy", 209 | metrics = "accuracy" 210 | ) 211 | 212 | history_original <- model |> fit( 213 | train_data, train_labels, 214 | epochs = 20, batch_size = 512, validation_split = 0.4 215 | ) 216 | 217 | 218 | model <- keras_model_sequential() |> 219 | layer_dense(4, activation = "relu") |> 220 | layer_dense(4, activation = "relu") |> 221 | layer_dense(1, activation = "sigmoid") 222 | 223 | model |> compile( 224 | optimizer = "rmsprop", 225 | loss = "binary_crossentropy", 226 | metrics = "accuracy" 227 | ) 228 | 229 | history_smaller_model <- model |> fit( 230 | train_data, train_labels, 231 | epochs = 20, batch_size = 512, validation_split = 0.4 232 | ) 233 | 234 | 235 | plot( 236 | NULL, 237 | main = "Original Model vs. Smaller Model on IMDB Review Classification", 238 | xlab = "Epochs", 239 | xlim = c(1, history_original$params$epochs), 240 | ylab = "Validation Loss", 241 | ylim = extendrange(c(history_original$metrics$val_loss, 242 | history_smaller_model$metrics$val_loss)), 243 | panel.first = abline(v = 1:history_original$params$epochs, 244 | lty = "dotted", col = "lightgrey") 245 | ) 246 | 247 | lines(history_original $metrics$val_loss, lty = 2) 248 | lines(history_smaller_model$metrics$val_loss, lty = 1) 249 | legend("topleft", lty = 2:1, 250 | legend = c("Validation loss of original model", 251 | "Validation loss of smaller model")) 252 | 253 | 254 | model <- keras_model_sequential() |> 255 | layer_dense(512, activation = "relu") |> 256 | layer_dense(512, activation = "relu") |> 257 | layer_dense(1, activation = "sigmoid") 258 | 259 | model |> compile( 260 | optimizer = "rmsprop", 261 | loss = "binary_crossentropy", 262 | metrics = "accuracy" 263 | ) 264 | 265 | history_larger_model <- model |> fit( 266 | train_data, train_labels, 267 | epochs = 20, batch_size = 512, validation_split = 0.4 268 | ) 269 | 270 | 271 | plot( 272 | NULL, 273 | main = "Original Model vs. Much Larger Model on IMDB Review Classification", 274 | xlab = "Epochs", xlim = c(1, history_original$params$epochs), 275 | ylab = "Validation Loss", 276 | ylim = range(c(history_original$metrics$val_loss, 277 | history_larger_model$metrics$val_loss)), 278 | panel.first = abline(v = 1:history_original$params$epochs, 279 | lty = "dotted", col = "lightgrey") 280 | ) 281 | lines(history_original $metrics$val_loss, lty = 2) 282 | lines(history_larger_model$metrics$val_loss, lty = 1) 283 | legend("topleft", lty = 2:1, 284 | legend = c("Validation loss of original model", 285 | "Validation loss of larger model")) 286 | 287 | 288 | model <- keras_model_sequential() |> 289 | layer_dense(16, activation = "relu", 290 | kernel_regularizer = regularizer_l2(0.002)) |> 291 | layer_dense(16, activation = "relu", 292 | kernel_regularizer = regularizer_l2(0.002)) |> 293 | layer_dense(1, activation = "sigmoid") 294 | 295 | model |> compile( 296 | optimizer = "rmsprop", 297 | loss = "binary_crossentropy", 298 | metrics = "accuracy" 299 | ) 300 | 301 | history_l2_reg <- model |> fit( 302 | train_data, train_labels, 303 | epochs = 20, batch_size = 512, validation_split = 0.4 304 | ) 305 | 306 | 307 | plot(NULL, 308 | main = "Effect of L2 Weight Regularization on Validation Loss", 309 | xlab = "Epochs", xlim = c(1, history_original$params$epochs), 310 | ylab = "Validation Loss", 311 | ylim = range(c(history_original$metrics$val_loss, 312 | history_l2_reg $metrics$val_loss)), 313 | panel.first = abline(v = 1:history_original$params$epochs, 314 | lty = "dotted", col = "lightgrey")) 315 | lines(history_original$metrics$val_loss, lty = 2) 316 | lines(history_l2_reg $metrics$val_loss, lty = 1) 317 | legend("topleft", lty = 2:1, 318 | legend = c("Validation loss of original model", 319 | "Validation loss of L2-regularized model")) 320 | 321 | 322 | regularizer_l1(0.001) # <1> 323 | regularizer_l1_l2(l1 = 0.001, l2 = 0.001) # <2> 324 | 325 | 326 | model <- keras_model_sequential() |> 327 | layer_dense(16, activation = "relu") |> 328 | layer_dropout(0.5) |> 329 | layer_dense(16, activation = "relu") |> 330 | layer_dropout(0.5) |> 331 | layer_dense(1, activation = "sigmoid") 332 | 333 | model |> compile( 334 | optimizer = "rmsprop", 335 | loss = "binary_crossentropy", 336 | metrics = "accuracy" 337 | ) 338 | 339 | history_dropout <- model |> fit( 340 | train_data, train_labels, 341 | epochs = 20, batch_size = 512, 342 | validation_split = 0.4 343 | ) 344 | 345 | 346 | plot(NULL, 347 | main = "Effect of Dropout on Validation Loss", 348 | xlab = "Epochs", xlim = c(1, history_original$params$epochs), 349 | ylab = "Validation Loss", 350 | ylim = range(c(history_original$metrics$val_loss, 351 | history_dropout $metrics$val_loss)), 352 | panel.first = abline(v = 1:history_original$params$epochs, 353 | lty = "dotted", col = "lightgrey")) 354 | lines(history_original$metrics$val_loss, lty = 2) 355 | lines(history_dropout $metrics$val_loss, lty = 1) 356 | legend("topleft", lty = 2:1, 357 | legend = c("Validation loss of original model", 358 | "Validation loss of dropout-regularized model")) 359 | 360 | -------------------------------------------------------------------------------- /chapter13_timeseries-forecasting.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | library(tensorflow, exclude = c("shape", "set_random_seed")) 5 | library(tfdatasets, exclude = "shape") 6 | library(dplyr, warn.conflicts = FALSE) 7 | library(keras3) 8 | use_backend("jax") 9 | 10 | 11 | "https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip" |> 12 | get_file(origin = _) |> 13 | zip::unzip("jena_climate_2009_2016.csv") 14 | 15 | 16 | writeLines(readLines("jena_climate_2009_2016.csv", 3)) 17 | 18 | 19 | withr::with_package("readr", { 20 | full_df <- read_csv( 21 | "jena_climate_2009_2016.csv", 22 | locale = locale(tz = "Etc/GMT+1"), 23 | col_types = cols( 24 | `Date Time` = col_datetime("%d.%m.%Y %H:%M:%S"), 25 | .default = col_double() 26 | ) 27 | ) 28 | }) 29 | 30 | 31 | tibble::glimpse(full_df) 32 | 33 | 34 | plot(`T (degC)` ~ `Date Time`, data = full_df, pch = 20, cex = .3) 35 | 36 | 37 | plot(`T (degC)` ~ `Date Time`, data = full_df[1:1440, ]) 38 | 39 | 40 | num_train_samples <- round(nrow(full_df) * .5) 41 | num_val_samples <- round(nrow(full_df) * 0.25) 42 | num_test_samples <- nrow(full_df) - num_train_samples - num_val_samples 43 | 44 | train_df <- full_df[seq(num_train_samples), ] 45 | 46 | val_df <- full_df[seq(from = nrow(train_df) + 1, 47 | length.out = num_val_samples), ] 48 | 49 | test_df <- full_df[seq(to = nrow(full_df), 50 | length.out = num_test_samples), ] 51 | 52 | cat("num_train_samples:", nrow(train_df), "\n") 53 | cat("num_val_samples:", nrow(val_df), "\n") 54 | cat("num_test_samples:", nrow(test_df), "\n") 55 | 56 | 57 | input_data_colnames <- names(full_df) |> setdiff(c("Date Time")) 58 | normalization_values <- train_df[input_data_colnames] |> 59 | lapply(\(col) list(mean = mean(col), sd = sd(col))) 60 | 61 | str(normalization_values) 62 | 63 | normalize_input_data <- function(df) { 64 | purrr::map2(df, normalization_values[names(df)], \(col, nv) { 65 | (col - nv$mean) / nv$sd 66 | }) |> as_tibble() 67 | } 68 | 69 | 70 | int_sequence <- seq(10) 71 | dummy_dataset <- timeseries_dataset_from_array( 72 | data = head(int_sequence, -3), 73 | targets = tail(int_sequence, -3), 74 | sequence_length = 3, 75 | batch_size = 2 76 | ) 77 | 78 | dummy_dataset_iterator <- as_array_iterator(dummy_dataset) 79 | 80 | repeat { 81 | batch <- iter_next(dummy_dataset_iterator) 82 | if (is.null(batch)) 83 | break 84 | .[inputs, targets] <- batch 85 | for (r in 1:nrow(inputs)) 86 | cat(sprintf("input: [ %s ] target: %s\n", 87 | paste(inputs[r, ], collapse = " "), targets[r])) 88 | cat(strrep("-", 27), "\n") 89 | } 90 | 91 | 92 | sampling_rate <- 6 93 | sequence_length <- 120 94 | delay <- sampling_rate * (sequence_length + 24 - 1) 95 | batch_size <- 256 96 | 97 | df_to_inputs_and_targets <- function(df) { 98 | inputs <- df[input_data_colnames] |> 99 | normalize_input_data() |> 100 | as.matrix() 101 | 102 | targets <- as.array(df$`T (degC)`) 103 | 104 | list( 105 | head(inputs, -delay), 106 | tail(targets, -delay) 107 | ) 108 | } 109 | 110 | make_dataset <- function(df) { 111 | .[inputs, targets] <- df_to_inputs_and_targets(df) 112 | 113 | timeseries_dataset_from_array( 114 | inputs, targets, 115 | sampling_rate = sampling_rate, 116 | sequence_length = sequence_length, 117 | shuffle = TRUE, 118 | batch_size = batch_size 119 | ) 120 | } 121 | 122 | train_dataset <- make_dataset(train_df) 123 | val_dataset <- make_dataset(val_df) 124 | test_dataset <- make_dataset(test_df) 125 | 126 | 127 | .[samples, targets] <- iter_next(as_iterator(train_dataset)) 128 | cat("samples shape: ", format(samples$shape), "\n", 129 | "targets shape: ", format(targets$shape), "\n", sep = "") 130 | 131 | 132 | evaluate_naive_method <- function(dataset) { 133 | 134 | .[temp_sd = sd, temp_mean = mean] <- normalization_values$`T (degC)` 135 | unnormalize_temperature <- function(x) { 136 | (x * temp_sd) + temp_mean 137 | } 138 | 139 | temp_col_idx <- match("T (degC)", input_data_colnames) 140 | 141 | reduction <- dataset |> 142 | dataset_unbatch() |> 143 | dataset_map(function(samples, target) { 144 | last_temp_in_input <- samples@r[-1, temp_col_idx] # <1> 145 | pred <- unnormalize_temperature(last_temp_in_input) # <2> 146 | abs(pred - target) 147 | }) |> 148 | dataset_reduce( 149 | initial_state = list(total_samples_seen = 0L, 150 | total_abs_error = 0), 151 | reduce_func = function(state, element) { 152 | `add<-` <- `+` 153 | add(state$total_samples_seen) <- 1L 154 | add(state$total_abs_error) <- element 155 | state 156 | } 157 | ) |> 158 | lapply(as.numeric) # <3> 159 | 160 | mae <- with(reduction, total_abs_error / total_samples_seen) # <4> 161 | mae 162 | } 163 | 164 | sprintf("Validation MAE: %.2f", evaluate_naive_method(val_dataset)) 165 | sprintf("Test MAE: %.2f", evaluate_naive_method(test_dataset)) 166 | 167 | 168 | ncol_input_data <- length(input_data_colnames) 169 | 170 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data)) 171 | outputs <- inputs |> 172 | layer_flatten() |> 173 | layer_dense(16, activation="relu") |> 174 | layer_dense(1) 175 | 176 | model <- keras_model(inputs, outputs) 177 | 178 | callbacks = list( 179 | callback_model_checkpoint("jena_dense.keras", save_best_only = TRUE) # <1> 180 | ) 181 | 182 | model |> compile( 183 | optimizer = "rmsprop", 184 | loss = "mse", 185 | metrics = "mae" 186 | ) 187 | 188 | history <- model |> fit( 189 | train_dataset, 190 | epochs = 10, 191 | validation_data = val_dataset, 192 | callbacks = callbacks 193 | ) 194 | 195 | model <- load_model("jena_dense.keras") # <2> 196 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)["mae"]) 197 | 198 | 199 | plot(history, metrics = "mae") 200 | 201 | 202 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data)) 203 | outputs <- inputs |> 204 | layer_conv_1d(8, 24, activation = "relu") |> 205 | layer_max_pooling_1d(2) |> 206 | layer_conv_1d(8, 12, activation = "relu") |> 207 | layer_max_pooling_1d(2) |> 208 | layer_conv_1d(8, 6, activation = "relu") |> 209 | layer_global_average_pooling_1d() |> 210 | layer_dense(1) 211 | model <- keras_model(inputs, outputs) 212 | 213 | callbacks <- list( 214 | callback_model_checkpoint("jena_conv.keras", save_best_only = TRUE) 215 | ) 216 | 217 | model |> compile( 218 | optimizer = "rmsprop", 219 | loss = "mse", 220 | metrics = "mae" 221 | ) 222 | 223 | 224 | history <- model |> fit( 225 | train_dataset, 226 | epochs = 10, 227 | validation_data = val_dataset, 228 | callbacks = callbacks 229 | ) 230 | 231 | 232 | model <- load_model("jena_conv.keras") 233 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)[["mae"]]) 234 | 235 | 236 | plot(history, metrics = "mae") 237 | 238 | 239 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data)) 240 | outputs <- inputs |> 241 | layer_lstm(16) |> 242 | layer_dense(1) 243 | model <- keras_model(inputs, outputs) 244 | 245 | callbacks <- list( 246 | callback_model_checkpoint("jena_lstm.keras", save_best_only = TRUE) 247 | ) 248 | 249 | compile(model, optimizer = "rmsprop", loss = "mse", metrics = "mae") 250 | 251 | 252 | history <- model |> fit( 253 | train_dataset, 254 | epochs = 10, 255 | validation_data = val_dataset, 256 | callbacks = callbacks 257 | ) 258 | 259 | 260 | local({ 261 | p <- plot(history, metrics = "mae") 262 | p$data %<>% .[.$epoch > 1, ] 263 | print(p) 264 | }) 265 | 266 | 267 | model <- load_model("jena_dense.keras") 268 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)[["mae"]]) 269 | 270 | 271 | runif_array <- function(dim) array(runif(prod(dim)), dim) 272 | 273 | timesteps <- 100 # <1> 274 | input_features <- 32 # <2> 275 | output_features <- 64 # <3> 276 | 277 | inputs <- runif_array(c(timesteps, input_features)) # <4> 278 | state_t <- array(0, dim = output_features) # <5> 279 | W <- runif_array(c(output_features, input_features)) # <6> 280 | U <- runif_array(c(output_features, output_features)) # <6> 281 | b <- runif_array(c(output_features, 1)) # <6> 282 | outputs <- array(0, dim = c(timesteps, output_features)) 283 | 284 | for(ts in 1:timesteps) { 285 | input_t <- inputs[ts, ] # <7> 286 | output_t <- tanh( (W %*% input_t) + (U %*% state_t) + b ) # <8> 287 | outputs[ts, ] <- state_t <- output_t # <9> 288 | } 289 | 290 | final_output_sequence <- outputs # <10> 291 | 292 | 293 | num_features <- 14 294 | inputs <- keras_input(shape = c(NA, num_features)) 295 | outputs <- inputs |> layer_simple_rnn(16) 296 | 297 | 298 | num_features <- 14 299 | steps <- 120 300 | inputs <- keras_input(shape = c(steps, num_features)) 301 | outputs <- inputs |> layer_simple_rnn(16, return_sequences = FALSE) # <1> 302 | op_shape(outputs) 303 | 304 | 305 | num_features <- 14 306 | steps <- 120 307 | inputs <- keras_input(shape = c(steps, num_features)) 308 | outputs <- inputs |> layer_simple_rnn(16, return_sequences = TRUE) # <1> 309 | op_shape(outputs) 310 | 311 | 312 | inputs <- keras_input(shape = c(steps, num_features)) 313 | outputs <- inputs |> 314 | layer_simple_rnn(16, return_sequences = TRUE) |> 315 | layer_simple_rnn(16, return_sequences = TRUE) |> 316 | layer_simple_rnn(16) 317 | 318 | 319 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data)) 320 | outputs <- inputs |> 321 | layer_lstm(32, recurrent_dropout = 0.25) |> 322 | layer_dropout(0.5) |> # <1> 323 | layer_dense(1) 324 | model <- keras_model(inputs, outputs) 325 | 326 | callbacks = list( 327 | callback_model_checkpoint("jena_lstm_dropout.keras", save_best_only = TRUE) 328 | ) 329 | 330 | compile(model, optimizer = "rmsprop", loss = "mse", metrics = "mae") 331 | 332 | 333 | history <- model |> fit( 334 | train_dataset, 335 | epochs = 50, 336 | validation_data = val_dataset, 337 | callbacks = callbacks 338 | ) 339 | 340 | 341 | local({ 342 | p <- plot(history, metrics = "mae") 343 | p$data %<>% .[.$epoch > 1, ] 344 | print(p) 345 | }) 346 | 347 | 348 | inputs <- keras_input(shape = c(sequence_length, num_features)) # <1> 349 | x <- inputs |> layer_lstm(32, recurrent_dropout = 0.2, unroll = TRUE) # <2> 350 | 351 | 352 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data)) 353 | outputs <- inputs |> 354 | layer_gru(32, recurrent_dropout = 0.5, return_sequences = TRUE) |> 355 | layer_gru(32, recurrent_dropout = 0.5) |> 356 | layer_dropout(0.5) |> 357 | layer_dense(1) 358 | model <- keras_model(inputs, outputs) 359 | 360 | callbacks <- list( 361 | callback_model_checkpoint( 362 | "jena_stacked_gru_dropout.keras", save_best_only = TRUE 363 | ) 364 | ) 365 | 366 | model |> compile(optimizer = "rmsprop", loss = "mse", metrics = "mae") 367 | 368 | 369 | history <- model |> fit( 370 | train_dataset, 371 | epochs = 50, 372 | validation_data = val_dataset, 373 | callbacks = callbacks 374 | ) 375 | 376 | 377 | plot(history) 378 | 379 | 380 | model <- load_model("jena_stacked_gru_dropout.keras") 381 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)[["mae"]]) 382 | 383 | 384 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data)) 385 | outputs <- inputs |> 386 | layer_bidirectional(layer_lstm(units = 16)) |> 387 | layer_dense(1) 388 | model <- keras_model(inputs, outputs) 389 | 390 | model |> compile(optimizer = "rmsprop", loss = "mse", metrics = "mae") 391 | 392 | history <- model |> fit( 393 | train_dataset, 394 | epochs = 10, 395 | validation_data = val_dataset 396 | ) 397 | 398 | plot(history) 399 | 400 | 401 | 402 | -------------------------------------------------------------------------------- /chapter08_image-classification.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | 3 | 4 | inputs <- keras_input(shape = c(28, 28, 1)) 5 | outputs <- inputs |> 6 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |> 7 | layer_max_pooling_2d(pool_size = 2) |> 8 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |> 9 | layer_max_pooling_2d(pool_size = 2) |> 10 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |> 11 | layer_global_average_pooling_2d() |> 12 | layer_dense(10, activation = "softmax") 13 | model <- keras_model(inputs = inputs, outputs = outputs) 14 | 15 | 16 | model 17 | 18 | 19 | .[.[train_images, train_labels], 20 | .[test_images, test_labels]] <- dataset_mnist() 21 | train_images <- array_reshape(train_images, c(60000, 28, 28, 1)) / 255 22 | test_images <- array_reshape(test_images, c(10000, 28, 28, 1)) / 255 23 | 24 | model |> compile( 25 | optimizer = "rmsprop", 26 | loss = "sparse_categorical_crossentropy", 27 | metrics = c("accuracy") 28 | ) 29 | model |> fit(train_images, train_labels, epochs = 5, batch_size = 64) 30 | 31 | 32 | result <- evaluate(model, test_images, test_labels) 33 | cat("Test accuracy:", result$accuracy, "\n") 34 | 35 | 36 | inputs <- keras_input(shape = c(28, 28, 1)) 37 | outputs <- inputs |> 38 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |> 39 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |> 40 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |> 41 | layer_global_average_pooling_2d() |> 42 | layer_dense(10, activation = "softmax") 43 | 44 | model_no_max_pool <- keras_model(inputs = inputs, outputs = outputs) 45 | 46 | 47 | model_no_max_pool 48 | 49 | 50 | unlink("dogs-vs-cats", recursive = TRUE) 51 | unlink("dogs_vs_cats_small", recursive = TRUE) 52 | 53 | 54 | zip::unzip('dogs-vs-cats.zip', exdir = "dogs-vs-cats", files = "train.zip") 55 | 56 | 57 | zip::unzip("dogs-vs-cats/train.zip", exdir = "dogs-vs-cats") 58 | 59 | 60 | library(fs) 61 | original_dir <- path("dogs-vs-cats/train") # <1> 62 | new_base_dir <- path("dogs_vs_cats_small") # <2> 63 | 64 | make_subset <- function(subset_name, start_index, end_index) { # <3> 65 | for (category in c("dog", "cat")) { 66 | file_name <- glue::glue("{category}.{ start_index:end_index }.jpg") 67 | dir_create(new_base_dir / subset_name / category) 68 | file_copy(original_dir / file_name, 69 | new_base_dir / subset_name / category / file_name) 70 | } 71 | } 72 | 73 | make_subset("train", start_index = 1, end_index = 1000) # <4> 74 | make_subset("validation", start_index = 1001, end_index = 1500) # <5> 75 | make_subset("test", start_index = 1501, end_index = 2500) # <6> 76 | 77 | 78 | inputs <- keras_input(shape = c(180, 180, 3)) # <1> 79 | outputs <- inputs |> 80 | layer_rescaling(1 / 255) |> # <2> 81 | layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |> 82 | layer_max_pooling_2d(pool_size = 2) |> 83 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |> 84 | layer_max_pooling_2d(pool_size = 2) |> 85 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |> 86 | layer_max_pooling_2d(pool_size = 2) |> 87 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |> 88 | layer_max_pooling_2d(pool_size = 2) |> 89 | layer_conv_2d(filters = 512, kernel_size = 3, activation = "relu") |> 90 | layer_global_average_pooling_2d() |> # <3> 91 | layer_dense(1, activation = "sigmoid") 92 | model <- keras_model(inputs, outputs) 93 | 94 | 95 | model 96 | 97 | 98 | model |> compile(loss = "binary_crossentropy", 99 | optimizer = "adam", 100 | metrics = "accuracy") 101 | 102 | 103 | image_size <- shape(180, 180) 104 | batch_size <- 32 105 | 106 | train_dataset <- 107 | image_dataset_from_directory(new_base_dir / "train", 108 | image_size = image_size, 109 | batch_size = batch_size) 110 | validation_dataset <- 111 | image_dataset_from_directory(new_base_dir / "validation", 112 | image_size = image_size, 113 | batch_size = batch_size) 114 | test_dataset <- 115 | image_dataset_from_directory(new_base_dir / "test", 116 | image_size = image_size, 117 | batch_size = batch_size) 118 | 119 | 120 | reticulate::import("tensorflow")$constant(1L) 121 | 122 | 123 | library(tfdatasets, exclude = c("shape")) 124 | 125 | example_array <- array(seq(100*6), c(100, 6)) 126 | head(example_array) 127 | dataset <- tensor_slices_dataset(example_array) # <1> 128 | 129 | 130 | dataset_iterator <- as_iterator(dataset) 131 | for (i in 1:3) { 132 | element <- iter_next(dataset_iterator) 133 | print(element) 134 | } 135 | 136 | 137 | dataset_iterator <- as_iterator(dataset) 138 | for (i in 1:3) { 139 | element <- iter_next(dataset_iterator) 140 | print(element) 141 | } 142 | 143 | 144 | batched_dataset <- dataset |> dataset_batch(3) 145 | batched_dataset_iterator <- as_iterator(batched_dataset) 146 | for (i in 1:3) { 147 | element <- iter_next(batched_dataset_iterator) 148 | print(element) 149 | } 150 | 151 | 152 | reshaped_dataset <- dataset |> 153 | dataset_map(\(element) tf$reshape(element, shape(2, 3))) 154 | 155 | reshaped_dataset_iterator <- as_iterator(reshaped_dataset) 156 | for (i in 1:3) { 157 | element <- iter_next(reshaped_dataset_iterator) 158 | print(element) 159 | } 160 | 161 | 162 | .[data_batch, labels_batch] <- train_dataset |> as_iterator() |> iter_next() 163 | op_shape(data_batch) 164 | op_shape(labels_batch) 165 | 166 | 167 | callbacks <- list( 168 | callback_model_checkpoint( 169 | filepath = "convnet_from_scratch.keras", 170 | save_best_only = TRUE, 171 | monitor = "val_loss" 172 | ) 173 | ) 174 | 175 | history <- model |> 176 | fit( 177 | train_dataset, 178 | epochs = 50, 179 | validation_data = validation_dataset, 180 | callbacks = callbacks 181 | ) 182 | 183 | 184 | plot(history) 185 | 186 | 187 | test_model <- load_model("convnet_from_scratch.keras") 188 | result <- evaluate(test_model, test_dataset) 189 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 190 | 191 | 192 | data_augmentation_layers <- list( # <1> 193 | layer_random_flip(, "horizontal"), #< 194 | layer_random_rotation(, 0.1), 195 | layer_random_zoom(, 0.2) 196 | ) 197 | 198 | data_augmentation <- function(images, targets) { # <2> 199 | for (layer in data_augmentation_layers) 200 | images <- layer(images) 201 | list(images, targets) 202 | } 203 | 204 | augmented_train_dataset <- train_dataset |> 205 | dataset_map(data_augmentation, num_parallel_calls = 8) |> # <3> 206 | dataset_prefetch() # <4> 207 | 208 | 209 | batch <- train_dataset |> as_iterator() |> iter_next() 210 | .[images, labels] <- batch 211 | 212 | par(mfrow = c(3, 3), mar = rep(.5, 4)) 213 | 214 | image <- images[1, , , ] 215 | plot(as.raster(image, max = 255)) # <1> 216 | 217 | for (i in 2:9) { 218 | .[augmented_images, ..] <- data_augmentation(images, NULL) # <2> 219 | augmented_image <- augmented_images@r[1] |> as.array() # <3> 220 | plot(as.raster(augmented_image, max = 255)) # <3> 221 | } 222 | 223 | 224 | inputs <- keras_input(shape = c(180, 180, 3)) 225 | outputs <- inputs |> 226 | layer_rescaling(1 / 255) |> 227 | layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |> 228 | layer_max_pooling_2d(pool_size = 2) |> 229 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |> 230 | layer_max_pooling_2d(pool_size = 2) |> 231 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |> 232 | layer_max_pooling_2d(pool_size = 2) |> 233 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |> 234 | layer_max_pooling_2d(pool_size = 2) |> 235 | layer_conv_2d(filters = 512, kernel_size = 3, activation = "relu") |> 236 | layer_global_average_pooling_2d() |> 237 | layer_dropout(0.25) |> 238 | layer_dense(1, activation = "sigmoid") 239 | 240 | model <- keras_model(inputs, outputs) 241 | 242 | model |> compile( 243 | loss = "binary_crossentropy", 244 | optimizer = "adam", 245 | metrics = "accuracy" 246 | ) 247 | 248 | 249 | callbacks <- list( 250 | callback_model_checkpoint( 251 | filepath = "convnet_from_scratch_with_augmentation.keras", 252 | save_best_only = TRUE, 253 | monitor = "val_loss" 254 | ) 255 | ) 256 | 257 | history <- model |> fit( 258 | augmented_train_dataset, 259 | epochs = 100, # <1> 260 | validation_data = validation_dataset, 261 | callbacks = callbacks 262 | ) 263 | 264 | 265 | plot(history) 266 | 267 | 268 | test_model <- load_model("convnet_from_scratch_with_augmentation.keras") 269 | result <- evaluate(test_model, test_dataset) 270 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 271 | 272 | 273 | conv_base <- application_xception( 274 | weights = "imagenet", 275 | include_top = FALSE, 276 | input_shape = c(180, 180, 3) 277 | ) 278 | 279 | 280 | preprocess_inputs <- application_preprocess_inputs(conv_base) # <1> 281 | get_features_and_labels <- function(dataset) { 282 | dataset |> 283 | as_array_iterator() |> 284 | iterate(function(batch) { 285 | .[images, labels] <- batch 286 | preprocessed_images <- preprocess_inputs(images) 287 | features <- conv_base |> predict(preprocessed_images, verbose = 0) 288 | tibble::tibble(features, labels) 289 | }) |> 290 | dplyr::bind_rows() 291 | } 292 | 293 | .[train_features, train_labels] <- get_features_and_labels(train_dataset) 294 | .[val_features, val_labels] <- get_features_and_labels(validation_dataset) 295 | .[test_features, test_labels] <- get_features_and_labels(test_dataset) 296 | 297 | 298 | dim(train_features) 299 | 300 | 301 | inputs <- keras_input(shape = c(6, 6, 2048)) 302 | outputs <- inputs |> 303 | layer_global_average_pooling_2d() |> # <1> 304 | layer_dense(256, activation = "relu") |> 305 | layer_dropout(0.25) |> 306 | layer_dense(1, activation = "sigmoid") 307 | 308 | model <- keras_model(inputs, outputs) 309 | 310 | model |> compile( 311 | loss = "binary_crossentropy", 312 | optimizer = "adam", 313 | metrics = "accuracy" 314 | ) 315 | 316 | callbacks <- list( 317 | callback_model_checkpoint( 318 | filepath = "feature_extraction.keras", 319 | save_best_only = TRUE, 320 | monitor = "val_loss" 321 | ) 322 | ) 323 | 324 | history <- model |> fit( 325 | train_features, train_labels, 326 | epochs = 2, 327 | validation_data = list(val_features, val_labels), 328 | callbacks = callbacks 329 | ) 330 | 331 | 332 | plot(history) 333 | 334 | 335 | test_model <- load_model("feature_extraction.keras") 336 | result <- evaluate(test_model, test_features, test_labels) 337 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 338 | 339 | 340 | conv_base <- application_xception( 341 | weights = "imagenet", 342 | include_top = FALSE, 343 | input_shape = c(180, 180, 3) 344 | ) 345 | freeze_weights(conv_base) 346 | 347 | 348 | unfreeze_weights(conv_base) # <1> 349 | length(conv_base$trainable_weights) 350 | 351 | freeze_weights(conv_base) # <2> 352 | length(conv_base$trainable_weights) 353 | 354 | 355 | inputs <- keras_input(shape=c(180, 180, 3)) 356 | outputs <- inputs |> 357 | preprocess_inputs() |> 358 | conv_base() |> 359 | layer_global_average_pooling_2d() |> 360 | layer_dense(256) |> 361 | layer_dropout(0.25) |> 362 | layer_dense(1, activation = "sigmoid") 363 | model <- keras_model(inputs, outputs) 364 | model |> compile( 365 | loss = "binary_crossentropy", 366 | optimizer = "adam", 367 | metrics = "accuracy" 368 | ) 369 | 370 | 371 | callbacks <- list( 372 | callback_model_checkpoint( 373 | filepath = "feature_extraction_with_data_augmentation.keras", 374 | save_best_only = TRUE, 375 | monitor = "val_loss" 376 | ) 377 | ) 378 | 379 | history <- model |> fit( 380 | augmented_train_dataset, 381 | epochs = 30, 382 | validation_data = validation_dataset, 383 | callbacks = callbacks 384 | ) 385 | 386 | 387 | plot(history) 388 | 389 | 390 | test_model <- load_model( 391 | "feature_extraction_with_data_augmentation.keras") 392 | result <- evaluate(test_model, test_dataset) 393 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 394 | 395 | 396 | unfreeze_weights(conv_base, from = -4) 397 | conv_base 398 | 399 | 400 | model |> compile( 401 | loss = "binary_crossentropy", 402 | optimizer = optimizer_adam(learning_rate = 1e-5), 403 | metrics = "accuracy" 404 | ) 405 | 406 | callbacks <- list( 407 | callback_model_checkpoint( 408 | filepath = "fine_tuning.keras", 409 | save_best_only = TRUE, 410 | monitor = "val_loss" 411 | ) 412 | ) 413 | 414 | history <- model |> fit( 415 | augmented_train_dataset, 416 | epochs = 30, 417 | validation_data = validation_dataset, 418 | callbacks = callbacks 419 | ) 420 | 421 | 422 | model <- load_model("fine_tuning.keras") 423 | result <- evaluate(model, test_dataset) 424 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 425 | 426 | -------------------------------------------------------------------------------- /2e/ch04.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE----------------------------------------------- 2 | tensorflow::as_tensor(1) 3 | 4 | 5 | ## ------------------------------------------------------------------------- 6 | library(keras3) 7 | 8 | imdb <- dataset_imdb(num_words = 10000) 9 | c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb 10 | 11 | 12 | ## ------------------------------------------------------------------------- 13 | str(train_data) 14 | str(train_labels) 15 | 16 | 17 | ## ------------------------------------------------------------------------- 18 | max(sapply(train_data, max)) 19 | 20 | 21 | ## ------------------------------------------------------------------------- 22 | word_index <- dataset_imdb_word_index() 23 | 24 | reverse_word_index <- names(word_index) 25 | names(reverse_word_index) <- as.character(word_index) 26 | 27 | decoded_words <- train_data[[1]] %>% 28 | sapply(function(i) { 29 | if (i > 3) reverse_word_index[[as.character(i - 3)]] 30 | else "?" 31 | }) 32 | decoded_review <- paste0(decoded_words, collapse = " ") 33 | decoded_review 34 | 35 | 36 | ## ------------------------------------------------------------------------- 37 | vectorize_sequences <- function(sequences, dimension = 10000) { 38 | results <- array(0, dim = c(length(sequences), dimension)) 39 | for (i in seq_along(sequences)) { 40 | sequence <- sequences[[i]] 41 | for (j in sequence) 42 | results[i, j] <- 1 43 | } 44 | results 45 | } 46 | 47 | x_train <- vectorize_sequences(train_data) 48 | x_test <- vectorize_sequences(test_data) 49 | 50 | 51 | ## ------------------------------------------------------------------------- 52 | str(x_train) 53 | 54 | 55 | ## ------------------------------------------------------------------------- 56 | y_train <- as.numeric(train_labels) 57 | y_test <- as.numeric(test_labels) 58 | 59 | 60 | ## ------------------------------------------------------------------------- 61 | model <- keras_model_sequential() %>% 62 | layer_dense(16, activation = "relu") %>% 63 | layer_dense(16, activation = "relu") %>% 64 | layer_dense(1, activation = "sigmoid") 65 | 66 | 67 | ## ---- eval = FALSE-------------------------------------------------------- 68 | ## output <- relu(dot(input, W) + b) 69 | 70 | 71 | ## ---- eval = FALSE-------------------------------------------------------- 72 | ## output <- dot(input, W) + b 73 | 74 | 75 | ## ------------------------------------------------------------------------- 76 | model %>% compile(optimizer = "rmsprop", 77 | loss = "binary_crossentropy", 78 | metrics = "accuracy") 79 | 80 | 81 | ## ------------------------------------------------------------------------- 82 | x_val <- x_train[seq(10000), ] 83 | partial_x_train <- x_train[-seq(10000), ] 84 | y_val <- y_train[seq(10000)] 85 | partial_y_train <- y_train[-seq(10000)] 86 | 87 | 88 | ## ------------------------------------------------------------------------- 89 | history <- model %>% fit( 90 | partial_x_train, 91 | partial_y_train, 92 | epochs = 20, 93 | batch_size = 512, 94 | validation_data = list(x_val, y_val) 95 | ) 96 | 97 | 98 | ## ------------------------------------------------------------------------- 99 | str(history$metrics) 100 | 101 | 102 | ## ---- message=FALSE------------------------------------------------------- 103 | plot(history) 104 | 105 | 106 | ## ------------------------------------------------------------------------- 107 | history_df <- as.data.frame(history) 108 | str(history_df) 109 | 110 | 111 | ## ------------------------------------------------------------------------- 112 | model <- keras_model_sequential() %>% 113 | layer_dense(16, activation = "relu") %>% 114 | layer_dense(16, activation = "relu") %>% 115 | layer_dense(1, activation = "sigmoid") 116 | 117 | model %>% compile(optimizer = "rmsprop", 118 | loss = "binary_crossentropy", 119 | metrics = "accuracy") 120 | 121 | model %>% fit(x_train, y_train, epochs = 4, batch_size = 512) 122 | results <- model %>% evaluate(x_test, y_test) 123 | 124 | 125 | ## ------------------------------------------------------------------------- 126 | results 127 | 128 | 129 | ## ---- include = FALSE----------------------------------------------------- 130 | model %>% predict(x_test) 131 | 132 | 133 | ## ------------------------------------------------------------------------- 134 | reuters <- dataset_reuters(num_words = 10000) 135 | c(c(train_data, train_labels), c(test_data, test_labels)) %<-% reuters 136 | 137 | 138 | ## ------------------------------------------------------------------------- 139 | length(train_data) 140 | length(test_data) 141 | 142 | 143 | ## ------------------------------------------------------------------------- 144 | str(train_data) 145 | 146 | 147 | ## ------------------------------------------------------------------------- 148 | word_index <- dataset_reuters_word_index() 149 | 150 | reverse_word_index <- names(word_index) 151 | names(reverse_word_index) <- as.character(word_index) 152 | 153 | decoded_words <- train_data[[1]] %>% 154 | sapply(function(i) { 155 | if (i > 3) reverse_word_index[[as.character(i - 3)]] 156 | else "?" 157 | }) 158 | decoded_review <- paste0(decoded_words, collapse = " ") 159 | decoded_review 160 | 161 | 162 | ## ------------------------------------------------------------------------- 163 | str(train_labels) 164 | 165 | 166 | ## ------------------------------------------------------------------------- 167 | vectorize_sequences <- function(sequences, dimension = 10000) { 168 | results <- matrix(0, nrow = length(sequences), ncol = dimension) 169 | for (i in seq_along(sequences)) 170 | results[i, sequences[[i]]] <- 1 171 | results 172 | } 173 | 174 | x_train <- vectorize_sequences(train_data) 175 | x_test <- vectorize_sequences(test_data) 176 | 177 | 178 | ## ------------------------------------------------------------------------- 179 | to_one_hot <- function(labels, dimension = 46) { 180 | results <- matrix(0, nrow = length(labels), ncol = dimension) 181 | labels <- labels + 1 182 | for(i in seq_along(labels)) { 183 | j <- labels[[i]] 184 | results[i, j] <- 1 185 | } 186 | results 187 | } 188 | y_train <- to_one_hot(train_labels) 189 | y_test <- to_one_hot(test_labels) 190 | 191 | 192 | ## ------------------------------------------------------------------------- 193 | y_train <- to_categorical(train_labels) 194 | y_test <- to_categorical(test_labels) 195 | 196 | 197 | ## ------------------------------------------------------------------------- 198 | model <- keras_model_sequential() %>% 199 | layer_dense(64, activation = "relu") %>% 200 | layer_dense(64, activation = "relu") %>% 201 | layer_dense(46, activation = "softmax") 202 | 203 | 204 | ## ------------------------------------------------------------------------- 205 | model %>% compile(optimizer = "rmsprop", 206 | loss = "categorical_crossentropy", 207 | metrics = "accuracy") 208 | 209 | 210 | ## ------------------------------------------------------------------------- 211 | val_indices <- 1:1000 212 | 213 | x_val <- x_train[val_indices, ] 214 | partial_x_train <- x_train[-val_indices, ] 215 | 216 | y_val <- y_train[val_indices, ] 217 | partial_y_train <- y_train[-val_indices, ] 218 | 219 | 220 | ## ------------------------------------------------------------------------- 221 | history <- model %>% fit( 222 | partial_x_train, 223 | partial_y_train, 224 | epochs = 20, 225 | batch_size = 512, 226 | validation_data = list(x_val, y_val) 227 | ) 228 | 229 | 230 | ## ------------------------------------------------------------------------- 231 | plot(history) 232 | 233 | 234 | ## ------------------------------------------------------------------------- 235 | model <- keras_model_sequential() %>% 236 | layer_dense(64, activation = "relu") %>% 237 | layer_dense(64, activation = "relu") %>% 238 | layer_dense(46, activation = "softmax") 239 | 240 | model %>% compile(optimizer = "rmsprop", 241 | loss = "categorical_crossentropy", 242 | metrics = "accuracy") 243 | 244 | model %>% fit(x_train, y_train, epochs = 9, batch_size = 512) 245 | 246 | results <- model %>% evaluate(x_test, y_test) 247 | 248 | 249 | ## ------------------------------------------------------------------------- 250 | results 251 | 252 | 253 | ## ------------------------------------------------------------------------- 254 | mean(test_labels == sample(test_labels)) 255 | 256 | 257 | ## ------------------------------------------------------------------------- 258 | predictions <- model %>% predict(x_test) 259 | 260 | 261 | ## ------------------------------------------------------------------------- 262 | str(predictions) 263 | 264 | 265 | ## ------------------------------------------------------------------------- 266 | sum(predictions[1, ]) 267 | 268 | 269 | ## ------------------------------------------------------------------------- 270 | which.max(predictions[1, ]) 271 | 272 | 273 | ## ------------------------------------------------------------------------- 274 | y_train <- train_labels 275 | y_test <- test_labels 276 | 277 | 278 | ## ------------------------------------------------------------------------- 279 | model %>% compile( 280 | optimizer = "rmsprop", 281 | loss = "sparse_categorical_crossentropy", 282 | metrics = "accuracy") 283 | 284 | 285 | ## ------------------------------------------------------------------------- 286 | model <- keras_model_sequential() %>% 287 | layer_dense(64, activation = "relu") %>% 288 | layer_dense(4, activation = "relu") %>% 289 | layer_dense(46, activation = "softmax") 290 | 291 | model %>% compile(optimizer = "rmsprop", 292 | loss = "categorical_crossentropy", 293 | metrics = "accuracy") 294 | 295 | model %>% fit( 296 | partial_x_train, 297 | partial_y_train, 298 | epochs = 20, 299 | batch_size = 128, 300 | validation_data = list(x_val, y_val) 301 | ) 302 | 303 | 304 | ## ------------------------------------------------------------------------- 305 | boston <- dataset_boston_housing() 306 | c(c(train_data, train_targets), c(test_data, test_targets)) %<-% boston 307 | 308 | 309 | ## ------------------------------------------------------------------------- 310 | str(train_data) 311 | str(test_data) 312 | 313 | 314 | ## ------------------------------------------------------------------------- 315 | str(train_targets) 316 | 317 | 318 | ## ------------------------------------------------------------------------- 319 | mean <- apply(train_data, 2, mean) 320 | sd <- apply(train_data, 2, sd) 321 | train_data <- scale(train_data, center = mean, scale = sd) 322 | test_data <- scale(test_data, center = mean, scale = sd) 323 | 324 | 325 | ## ------------------------------------------------------------------------- 326 | build_model <- function() { 327 | 328 | model <- keras_model_sequential() %>% 329 | layer_dense(64, activation = "relu") %>% 330 | layer_dense(64, activation = "relu") %>% 331 | layer_dense(1) 332 | 333 | model %>% compile(optimizer = "rmsprop", 334 | loss = "mse", 335 | metrics = "mae") 336 | model 337 | } 338 | 339 | 340 | ## ------------------------------------------------------------------------- 341 | k <- 4 342 | fold_id <- sample(rep(1:k, length.out = nrow(train_data))) 343 | num_epochs <- 100 344 | all_scores <- numeric() 345 | 346 | for (i in 1:k) { 347 | cat("Processing fold #", i, "\n") 348 | 349 | val_indices <- which(fold_id == i) 350 | 351 | val_data <- train_data[val_indices, ] 352 | val_targets <- train_targets[val_indices] 353 | 354 | partial_train_data <- train_data[-val_indices, ] 355 | partial_train_targets <- train_targets[-val_indices] 356 | 357 | model <- build_model() 358 | 359 | model %>% fit( 360 | partial_train_data, 361 | partial_train_targets, 362 | epochs = num_epochs, 363 | batch_size = 16, 364 | verbose = 0 365 | ) 366 | 367 | results <- model %>% evaluate(val_data, val_targets, verbose = 0) 368 | all_scores[[i]] <- results[['mae']] 369 | } 370 | 371 | 372 | ## ------------------------------------------------------------------------- 373 | all_scores 374 | mean(all_scores) 375 | 376 | 377 | ## ------------------------------------------------------------------------- 378 | num_epochs <- 500 379 | all_mae_histories <- list() 380 | for (i in 1:k) { 381 | cat("Processing fold #", i, "\n") 382 | 383 | val_indices <- which(fold_id == i) 384 | val_data <- train_data[val_indices, ] 385 | val_targets <- train_targets[val_indices] 386 | 387 | partial_train_data <- train_data[-val_indices, ] 388 | partial_train_targets <- train_targets[-val_indices] 389 | 390 | model <- build_model() 391 | history <- model %>% fit( 392 | partial_train_data, partial_train_targets, 393 | validation_data = list(val_data, val_targets), 394 | epochs = num_epochs, batch_size = 16, verbose = 0 395 | ) 396 | mae_history <- history$metrics$val_mae 397 | all_mae_histories[[i]] <- mae_history 398 | } 399 | 400 | all_mae_histories <- do.call(cbind, all_mae_histories) 401 | 402 | 403 | ## ------------------------------------------------------------------------- 404 | average_mae_history <- rowMeans(all_mae_histories) 405 | 406 | 407 | ## ------------------------------------------------------------------------- 408 | plot(average_mae_history, xlab = "epoch", type = 'l') 409 | 410 | 411 | ## ------------------------------------------------------------------------- 412 | truncated_mae_history <- average_mae_history[-(1:10)] 413 | plot(average_mae_history, xlab = "epoch", type = 'l', 414 | ylim = range(truncated_mae_history)) 415 | 416 | 417 | ## ------------------------------------------------------------------------- 418 | model <- build_model() 419 | model %>% fit(train_data, train_targets, 420 | epochs = 130, batch_size = 16, verbose = 0) 421 | result <- model %>% evaluate(test_data, test_targets) 422 | 423 | 424 | ## ------------------------------------------------------------------------- 425 | result$mae 426 | 427 | 428 | ## ------------------------------------------------------------------------- 429 | predictions <- model %>% predict(test_data) 430 | predictions[1, ] 431 | -------------------------------------------------------------------------------- /chapter02_mathematical-building-blocks.R: -------------------------------------------------------------------------------- 1 | keras3::use_backend("tensorflow") 2 | 3 | 4 | library(keras3) 5 | .[.[train_images, train_labels], .[test_images, test_labels]] <- dataset_mnist() 6 | 7 | 8 | str(train_images) 9 | str(train_labels) 10 | 11 | 12 | str(test_images) 13 | str(test_labels) 14 | 15 | 16 | model <- keras_model_sequential(layers = list( 17 | layer_dense(units = 512, activation = "relu"), 18 | layer_dense(units = 10, activation = "softmax") 19 | )) 20 | 21 | 22 | model |> compile( 23 | optimizer = "adam", 24 | loss = "sparse_categorical_crossentropy", 25 | metrics = "accuracy" 26 | ) 27 | 28 | 29 | train_images <- array_reshape(train_images, c(60000, 28 * 28)) 30 | train_images <- train_images / 255 31 | test_images <- array_reshape(test_images, c(10000, 28 * 28)) 32 | test_images <- test_images / 255 33 | 34 | test_labels <- matrix(test_labels, ncol = 1) 35 | train_labels <- matrix(train_labels, ncol = 1) 36 | 37 | 38 | fit(model, train_images, train_labels, epochs = 5, batch_size = 128) 39 | 40 | 41 | test_digits <- test_images[1:10, ] 42 | predictions <- predict(model, test_digits) 43 | str(predictions) 44 | predictions[1, ] 45 | 46 | 47 | which.max(predictions[1,]) 48 | predictions[1, 8] 49 | 50 | 51 | test_labels[1] 52 | 53 | 54 | metrics <- evaluate(model, test_images, test_labels) 55 | metrics$accuracy 56 | 57 | 58 | x <- as.array(c(12, 3, 6, 14, 7)) 59 | str(x) 60 | dim(x) 61 | 62 | 63 | x <- np_array(x) 64 | x 65 | x$shape 66 | x$ndim 67 | 68 | 69 | x <- array(seq(3 * 5), dim = c(3, 5)) 70 | x 71 | dim(x) 72 | 73 | np_array(x) 74 | 75 | 76 | x <- array(seq(2 * 3 * 4), dim = c(2, 3, 4)) 77 | str(x) 78 | dim(x) 79 | length(dim(x)) 80 | 81 | 82 | x 83 | np_array(x) 84 | 85 | 86 | .[.[train_images, train_labels], .[test_images, test_labels]] <- 87 | dataset_mnist() # <1> 88 | 89 | 90 | length(dim(train_images)) 91 | 92 | 93 | dim(train_images) 94 | 95 | 96 | typeof(train_images) 97 | 98 | 99 | knitr::knit_hooks$set(no_mar = function(before, ...) { 100 | if (before) par(mar = c(0, 0, 0, 0)) 101 | }) 102 | 103 | 104 | digit <- train_images[5, , ] 105 | plot(as.raster(abs(255 - digit), max = 255)) 106 | 107 | 108 | train_labels[5] 109 | 110 | 111 | x <- np_array(1:10) 112 | x@r[1] 113 | x@py[1] 114 | 115 | 116 | x@r[1:3] 117 | x@py[1:3] 118 | 119 | 120 | x@r[NA:5] 121 | x@r[6:NA] 122 | 123 | 124 | images <- np_array(train_images, "float32") 125 | my_slice <- images@r[10:99] # <1> 126 | my_slice <- images@r[10:99, , ] # <2> 127 | my_slice <- images@r[10:99, NA:NA, NA:NA] # <3> 128 | my_slice <- images@r[10:99, 1:28, 1:28] # <4> 129 | my_slice$shape 130 | 131 | 132 | my_slice <- images@r[, 15:NA, 15:NA] 133 | my_slice$shape 134 | 135 | 136 | images$shape # <1> 137 | images@r[newaxis] |> _$shape # <2> 138 | images@r[, newaxis] |> _$shape # <3> 139 | images@r[.., newaxis] |> _$shape # <4> 140 | images@r[.., newaxis, ] |> _$shape # <5> 141 | 142 | 143 | x <- np_array(1:10) 144 | x@r[-1] 145 | x@r[-3:-1] 146 | x@r[-3:NA] 147 | 148 | 149 | my_slice <- images@r[, 8:-8, 8:-8] 150 | my_slice$shape 151 | 152 | 153 | batch <- train_images[1:128, , ] # <1> 154 | 155 | 156 | batch <- train_images[129:256, , ] 157 | 158 | 159 | n <- 3 160 | ids <- seq(to = 128 * n, length.out = 128) 161 | batch <- train_images[ids, , ] 162 | 163 | 164 | naive_relu <- function(x) { 165 | stopifnot(is.array(x), length(dim(x)) == 2) # <1> 166 | for (i in 1:nrow(x)) 167 | for (j in 1:ncol(x)) 168 | x[i, j] <- max(x[i, j], 0) # <2> 169 | x 170 | } 171 | 172 | 173 | naive_add <- function(x, y) { 174 | stopifnot(is.array(x), is.array(y), 175 | length(dim(x)) == 2, dim(x) == dim(y)) # <1> 176 | for (i in 1:nrow(x)) 177 | for (j in 1:ncol(x)) 178 | x[i, j] <- x[i, j] + y[i, j] 179 | x 180 | } 181 | 182 | 183 | runif_array <- function(dim) { 184 | array(runif(prod(dim)), dim) 185 | } 186 | 187 | x <- runif_array(c(20, 100)) 188 | y <- runif_array(c(20, 100)) 189 | 190 | system.time({ 191 | for (i in seq_len(1000)) { 192 | z <- x + y 193 | z <- pmax(z, 0) 194 | } 195 | })[["elapsed"]] 196 | 197 | 198 | x <- runif_array(c(20, 100)) 199 | y <- runif_array(c(20, 100)) 200 | 201 | system.time({ 202 | for (i in seq_len(1000)) { 203 | z <- naive_add(x, y) 204 | z <- naive_relu(z) 205 | } 206 | })[["elapsed"]] 207 | 208 | 209 | X <- runif_array(c(32, 10)) # <1> 210 | y <- runif_array(c(10)) # <2> 211 | 212 | 213 | dim(y) <- c(1, 10) 214 | str(y) # <1> 215 | 216 | 217 | Y <- y[rep(1, 32), ] 218 | dim(Y) # <1> 219 | 220 | 221 | naive_add_matrix_and_vector <- function(x, y) { 222 | stopifnot(length(dim(x)) == 2, # <1> 223 | length(dim(y)) == 1, # <2> 224 | ncol(x) == dim(y)) 225 | for (i in seq_len(dim(x)[1])) 226 | for (j in seq_len(dim(x)[2])) 227 | x[i, j] <- x[i, j] + y[j] 228 | x 229 | } 230 | 231 | 232 | x <- np_array(runif_array(c(64, 3, 32, 10))) # <1> 233 | y <- np_array(runif_array(c(32, 10))) # <2> 234 | z <- x + y # <3> 235 | 236 | 237 | x <- np_array(runif_array(32)) 238 | y <- np_array(runif_array(32)) 239 | 240 | z <- x %*% y # <1> 241 | 242 | np <- reticulate::import("numpy", convert = FALSE) 243 | z <- np$matmul(x, y) # <2> 244 | 245 | 246 | naive_vector_dot <- function(x, y) { 247 | stopifnot(length(dim(x)) == 1, # <1> 248 | length(dim(y)) == 1, # <1> 249 | dim(x) == dim(y)) # # <1> 250 | z <- 0 251 | for (i in seq_along(x)) 252 | z <- z + x[i] * y[i] 253 | z 254 | } 255 | 256 | 257 | naive_matrix_vector_dot <- function(x, y) { 258 | stopifnot(length(dim(x)) == 2, # <1> 259 | length(dim(y)) == 1, # <2> 260 | nrow(x) == dim(y)) # <3> 261 | z <- array(0, dim = nrow(x)) # <4> 262 | for (i in 1:nrow(x)) 263 | for (j in 1:ncol(x)) 264 | z[i] <- z[i] + x[i, j] * y[j] 265 | z 266 | } 267 | 268 | 269 | naive_matrix_vector_dot <- function(x, y) { 270 | z <- array(0, dim = c(nrow(x))) 271 | for (i in 1:nrow(x)) 272 | z[i] <- naive_vector_dot(x[i, ], y) 273 | z 274 | } 275 | 276 | 277 | naive_matrix_dot <- function(x, y) { 278 | stopifnot(length(dim(x)) == 2, # <1> 279 | length(dim(y)) == 2, # <1> 280 | ncol(x) == nrow(y)) # <2> 281 | z <- array(0, dim = c(nrow(x), ncol(y))) # <3> 282 | for (i in 1:nrow(x)) # <4> 283 | for (j in 1:ncol(y)) { # <5> 284 | row_x <- x[i, ] 285 | column_y <- y[, j] 286 | z[i, j] <- naive_vector_dot(row_x, column_y) 287 | } 288 | z 289 | } 290 | 291 | 292 | train_images <- array_reshape(train_images, c(60000, 28 * 28)) 293 | 294 | 295 | x <- array(1:6) 296 | x 297 | array_reshape(x, dim = c(3, 2)) 298 | array_reshape(x, dim = c(2, 3)) 299 | 300 | 301 | x <- array(1:6, dim = c(3, 2)) 302 | x 303 | t(x) 304 | 305 | 306 | .[.[train_images, train_labels], .[test_images, test_labels]] <- 307 | dataset_mnist() 308 | 309 | train_images <- array_reshape(train_images, c(60000, 28 * 28)) / 255 310 | test_images <- array_reshape(test_images, c(10000, 28 * 28)) / 255 311 | 312 | test_labels <- matrix(test_labels, ncol = 1) 313 | train_labels <- matrix(train_labels, ncol = 1) 314 | 315 | 316 | model <- keras_model_sequential() |> 317 | layer_dense(units = 512, activation = "relu") |> 318 | layer_dense(units = 10, activation = "softmax") 319 | 320 | 321 | compile(model, 322 | optimizer = "rmsprop", 323 | loss = "sparse_categorical_crossentropy", 324 | metrics = c("accuracy")) 325 | 326 | 327 | fit(model, train_images, train_labels, epochs = 5, batch_size = 128) 328 | 329 | 330 | layer_naive_dense <- function(input_size, output_size, activation = NULL) { 331 | self <- new.env(parent = emptyenv()) # <1> 332 | attr(self, "class") <- "NaiveDense" 333 | 334 | self$activation <- activation 335 | 336 | self$W <- keras_variable(shape = shape(input_size, output_size), # <2> 337 | initializer = "uniform", dtype = "float32") # <2> 338 | 339 | self$b <- keras_variable(shape = shape(output_size), # <3> 340 | initializer = "zeros", dtype = "float32") # <3> 341 | 342 | self$weights <- list(self$W, self$b) # <4> 343 | 344 | self$call <- function(inputs) { # <5> 345 | x <- (inputs %*% self$W) + self$b # <5> 346 | if (is.function(self$activation)) # <5> 347 | x <- self$activation(x) # <5> 348 | x # <5> 349 | } # <5> 350 | 351 | self 352 | } 353 | 354 | 355 | naive_sequential_model <- function(layers) { 356 | self <- new.env(parent = emptyenv()) 357 | attr(self, "class") <- "NaiveSequential" 358 | 359 | self$layers <- layers 360 | 361 | self$weights <- lapply(layers, \(layer) layer$weights) |> unlist() 362 | 363 | self$call <- function(inputs) { 364 | x <- inputs 365 | for (layer in self$layers) 366 | x <- layer$call(x) 367 | x 368 | } 369 | 370 | self 371 | } 372 | 373 | 374 | model <- naive_sequential_model(list( 375 | layer_naive_dense(input_size = 28 * 28, output_size = 512, 376 | activation = op_relu), 377 | layer_naive_dense(input_size = 512, output_size = 10, 378 | activation = op_softmax) 379 | )) 380 | stopifnot(length(model$weights) == 4) 381 | 382 | 383 | new_batch_generator <- function(images, labels, batch_size = 128) { 384 | 385 | stopifnot(nrow(images) == nrow(labels)) 386 | index <- 1 387 | 388 | function() { # <1> 389 | start <- index 390 | if(start > nrow(images)) 391 | return(NULL) # <2> 392 | 393 | end <- start + batch_size - 1 394 | if(end > nrow(images)) 395 | end <- nrow(images) # <3> 396 | 397 | index <<- end + 1 398 | list(images = images[start:end, , drop = FALSE], 399 | labels = labels[start:end, , drop = FALSE]) 400 | } 401 | } 402 | 403 | 404 | learning_rate <- 1e-3 405 | 406 | update_weights <- function(gradients, weights) { 407 | mapply(function(w, g) { 408 | w$assign(w - g * learning_rate) # <1> 409 | }, weights, gradients) 410 | } 411 | 412 | 413 | optimizer <- optimizer_sgd(learning_rate = 1e-3) 414 | 415 | update_weights <- function(gradients, weights) { 416 | optimizer$apply(gradients, weights) 417 | } 418 | 419 | 420 | library(tensorflow) 421 | 422 | x <- tf$zeros(shape = shape()) # <1> 423 | with(tf$GradientTape() %as% tape, { # <2> 424 | y <- 2 * x + 3 # <3> 425 | }) 426 | grad_of_y_wrt_x <- tape$gradient(y, x) # <4> 427 | 428 | 429 | one_training_step <- function(model, images_batch, labels_batch) { 430 | with(tf$GradientTape() %as% tape, { 431 | predictions <- model$call(images_batch) 432 | per_sample_losses <- 433 | op_sparse_categorical_crossentropy(labels_batch, predictions) 434 | average_loss <- op_mean(per_sample_losses) 435 | }) 436 | gradients <- tape$gradient(average_loss, model$weights) 437 | update_weights(gradients, model$weights) 438 | average_loss 439 | } 440 | 441 | 442 | fit <- function(model, images, labels, epochs, batch_size = 128) { 443 | for (epoch_counter in seq_len(epochs)) { 444 | cat("Epoch ", epoch_counter, "\n") 445 | batch_generator <- new_batch_generator(images, labels, batch_size) 446 | batch_counter <- 0 447 | repeat { 448 | batch <- batch_generator() 449 | if (is.null(batch)) 450 | break 451 | batch_counter <- batch_counter + 1 452 | loss <- one_training_step(model, batch$images, batch$labels) 453 | if (batch_counter %% 100 == 0) 454 | cat(sprintf("loss at batch %s: %.2f\n", batch_counter, loss)) 455 | } 456 | } 457 | } 458 | 459 | 460 | mnist <- dataset_mnist() 461 | train_images <- array_reshape(mnist$train$x, c(60000, 28 * 28)) / 255 462 | test_images <- array_reshape(mnist$test$x, c(10000, 28 * 28)) / 255 463 | train_labels <- matrix(mnist$train$y) 464 | test_labels <- matrix(mnist$test$y) 465 | 466 | fit(model, train_images, train_labels, epochs = 10, batch_size = 128) 467 | 468 | 469 | predictions <- model$call(test_images) 470 | predictions <- as.array(predictions) # convert Tensorflow Tensor to R array 471 | predicted_labels <- max.col(predictions) - 1 472 | matches <- predicted_labels == test_labels 473 | cat(sprintf("accuracy: %.2f\n", mean(matches))) 474 | 475 | -------------------------------------------------------------------------------- /chapter16_text-generation.R: -------------------------------------------------------------------------------- 1 | library(fs) 2 | library(stringr) 3 | library(keras3) 4 | reticulate::py_require("keras-hub==0.18.1") 5 | reticulate::py_require("tensorflow-text") 6 | config_set_dtype_policy("float16") 7 | 8 | 9 | zipfile <- get_file( 10 | origin = "https://huggingface.co/datasets/mattdangerw/mini-c4/resolve/main/mini-c4.zip" 11 | ) 12 | 13 | 14 | unzip(zipfile, list = TRUE) # <1> 15 | 16 | 17 | extract_dir <- fs::path("./mini-c4") 18 | 19 | 20 | fs::dir_info(extract_dir) 21 | fs::path(extract_dir, "shard0.txt") |> 22 | readLines(n = 1) |> 23 | str_replace_all(r"(\\n)", "\n") |> 24 | str_split_1("\n") |> 25 | str_wrap(width = 76) |> 26 | cat(sep = "\n--\n") 27 | 28 | 29 | keras_hub <- reticulate::import("keras_hub") 30 | tokenizer <- keras_hub$tokenizers$SentencePieceTokenizer("mini_gpt.model") 31 | 32 | 33 | tokenized <- tokenizer$tokenize("The quick brown fox.") 34 | tokenized 35 | tokenizer$detokenize(tokenized) 36 | 37 | 38 | library(tfdatasets, exclude = "shape") 39 | library(tensorflow, exclude = c("shape", "set_random_seed")) 40 | 41 | batch_size <- 128L 42 | sequence_length <- 256L 43 | suffix <- 44 | tokenizer$token_to_id("<|endoftext|>") |> 45 | tf$constant(shape = shape(1)) 46 | 47 | files <- dir_ls(extract_dir) 48 | ds <- 49 | text_line_dataset(files, num_parallel_reads = 12) |> 50 | dataset_map(\(x) tf$strings$regex_replace(x, r"(\\n)", "\n"), # <1> 51 | num_parallel_calls = 12) |> 52 | dataset_map(tokenizer, num_parallel_calls = 12) |> # <2> 53 | dataset_map(\(x) tf$concat(c(x, suffix), -1L), num_parallel_calls = 12) |> # <3> 54 | dataset_rebatch(sequence_length + 1, drop_remainder = TRUE) |> # <4> 55 | dataset_map(\(x) list(x@r[NA:-2], x@r[2:NA]), num_parallel_calls = 12) |> # <5> 56 | dataset_batch(batch_size, num_parallel_calls = 12) |> # <6> 57 | dataset_cache() 58 | 59 | 60 | num_batches <- 38581 61 | num_batches 62 | 63 | 64 | num_val_batches <- 500 65 | num_train_batches <- num_batches - num_val_batches 66 | val_ds <- ds |> dataset_take(num_val_batches) 67 | train_ds <- ds |> dataset_skip(num_val_batches) |> dataset_repeat() 68 | 69 | 70 | layer_transformer_decoder <- new_layer_class( 71 | "TransformerDecoder", 72 | initialize = function(hidden_dim, intermediate_dim, num_heads) { 73 | super$initialize() 74 | key_dim <- hidden_dim %/% num_heads 75 | self$self_attention <- layer_multi_head_attention( 76 | num_heads = num_heads, 77 | key_dim = key_dim, 78 | dropout = 0.1 79 | ) # <1> 80 | self$self_attention_layernorm <- layer_layer_normalization() # <1> 81 | self$feed_forward_1 <- layer_dense(units = intermediate_dim, 82 | activation = "relu") # <2> 83 | self$feed_forward_2 <- layer_dense(units = hidden_dim) # <2> 84 | self$feed_forward_layernorm <- layer_layer_normalization() # <2> 85 | self$dropout <- layer_dropout(rate = 0.1) # <2> 86 | }, 87 | call = function(inputs) { 88 | residual <- x <- inputs # <3> 89 | x <- self$self_attention(query = x, key = x, value = x, 90 | use_causal_mask = TRUE) # <3> 91 | x <- x |> self$dropout() # <3> 92 | x <- x + residual # <3> 93 | x <- x |> self$self_attention_layernorm() # <3> 94 | 95 | residual <- x # <4> 96 | x <- x |> 97 | self$feed_forward_1() |> # <4> 98 | self$feed_forward_2() |> # <4> 99 | self$dropout() 100 | x <- x + residual # <4> 101 | x <- x |> self$feed_forward_layernorm() 102 | 103 | x 104 | } 105 | ) 106 | 107 | 108 | layer_positional_embedding <- new_layer_class( 109 | "PositionalEmbedding", 110 | initialize = function(sequence_length, input_dim, output_dim) { 111 | super$initialize() 112 | self$token_embeddings <- layer_embedding( 113 | input_dim = input_dim, output_dim = output_dim 114 | ) 115 | self$position_embeddings <- layer_embedding( 116 | input_dim = sequence_length, output_dim = output_dim 117 | ) 118 | }, 119 | call = function(inputs, reverse = FALSE) { 120 | if (reverse) { 121 | token_embeddings <- self$token_embeddings$embeddings 122 | return(op_matmul(inputs, op_transpose(token_embeddings))) 123 | } 124 | .[.., sequence_length] <- op_shape(inputs) 125 | positions <- 126 | op_arange(0, sequence_length - 1, dtype = "int32") |> 127 | op_expand_dims(1) 128 | embedded_tokens <- self$token_embeddings(inputs) 129 | embedded_positions <- self$position_embeddings(positions) 130 | embedded_tokens + embedded_positions 131 | } 132 | ) 133 | 134 | 135 | vocab_size <- tokenizer$vocabulary_size() 136 | hidden_dim <- 128 137 | intermediate_dim <- 512 138 | num_heads <- 4 139 | num_layers <- 4 140 | 141 | inputs <- keras_input(shape = c(NA), dtype = "int32", name = "inputs") 142 | embedding <- 143 | layer_positional_embedding(, sequence_length, vocab_size, hidden_dim) 144 | 145 | x <- inputs |> 146 | embedding() |> 147 | layer_layer_normalization() 148 | 149 | for (i in seq_len(num_layers)) { 150 | x <- x |> 151 | layer_transformer_decoder(hidden_dim, intermediate_dim, num_heads) 152 | } 153 | 154 | outputs <- x |> embedding(reverse = TRUE) 155 | mini_gpt <- keras_model(inputs, outputs) 156 | 157 | 158 | hidden_dim <- 512 159 | intermediate_dim <- 2056 160 | num_heads <- 8 161 | num_layers <- 8 162 | 163 | 164 | warmup_schedule <- new_learning_rate_schedule_class( 165 | classname = "WarmupSchedule", 166 | 167 | initialize = function() { 168 | self$rate <- 1e-4 # <1> 169 | self$warmup_steps <- 1000 170 | }, 171 | 172 | call = function(step) { 173 | step <- step |> op_cast(dtype = "float32") 174 | scale <- op_minimum(step / self$warmup_steps, 1) 175 | self$rate * scale 176 | } 177 | ) 178 | 179 | 180 | schedule <- warmup_schedule() 181 | x <- seq(0, 5000, by = 100) 182 | y <- sapply(x, \(step) as.array(schedule(step))) 183 | plot(x, y, type = "l", 184 | main = "Warmup Schedule", 185 | xlab = "Train Step", ylab = "Learning Rate", 186 | bty = "n", panel.first = grid()) 187 | 188 | 189 | load_model_weights(mini_gpt, "mini_gpt.weights.h5") 190 | 191 | 192 | prompt <- "A piece of advice" 193 | generate <- function(prompt, max_length = 64) { 194 | tokens <- as.array(tokenizer(prompt)) 195 | prompt_length <- length(tokens) 196 | for (i in seq(from = prompt_length + 1, to = max_length)) { 197 | prediction <- mini_gpt(matrix(tokens, nrow = 1)) 198 | prediction <- prediction@r[1, -1] 199 | next_token <- op_argmax(prediction, zero_indexed = TRUE) 200 | tokens[i] <- as.array(next_token) 201 | } 202 | tokenizer$detokenize(tokens) 203 | } 204 | generate("A piece of advice") 205 | 206 | 207 | compiled_generate <- function(prompt, max_length = 64) { 208 | tokens <- as.array(tokenizer(prompt)) 209 | prompt_length <- length(tokens) 210 | tokens[seq(prompt_length + 1, max_length)] <- 0 # <1> 211 | dim(tokens) <- c(1, max_length) 212 | for (i in seq(prompt_length, max_length - 1)) { 213 | prediction <- predict(mini_gpt, tokens, verbose = 0) 214 | prediction <- prediction[, i, ] 215 | next_token <- which.max(prediction) - 1L 216 | tokens[, i + 1] <- next_token 217 | } 218 | tokenizer$detokenize(tokens) 219 | } 220 | 221 | 222 | system.time(compiled_generate(prompt, 64))[["elapsed"]] 223 | 224 | 225 | compiled_generate <- function(prompt, sample_fn, max_length = 64) { 226 | tokens <- as.array(tokenizer(prompt)) 227 | prompt_length <- length(tokens) 228 | tokens[seq(prompt_length + 1, max_length)] <- 0 229 | dim(tokens) <- c(1, max_length) 230 | for (i in seq(prompt_length, max_length - 1)) { 231 | prediction <- predict(mini_gpt, tokens, verbose = 0) 232 | prediction <- prediction[, i, ] 233 | next_token <- sample_fn(prediction) - 1L 234 | tokens[, i + 1] <- as.array(next_token) 235 | } 236 | tokenizer$detokenize(tokens) 237 | } 238 | 239 | 240 | greedy_search <- function(preds) { 241 | op_argmax(preds) 242 | } 243 | 244 | compiled_generate(prompt, greedy_search) 245 | 246 | 247 | random_sample <- function(preds, temperature = 1) { 248 | preds <- preds / temperature 249 | preds <- op_reshape(preds, c(1, -1)) 250 | random_categorical(preds, num_samples = 1) |> 251 | op_squeeze() 252 | } 253 | 254 | 255 | compiled_generate(prompt, random_sample) 256 | 257 | 258 | compiled_generate(prompt, \(x) random_sample(x, temperature = 2)) 259 | compiled_generate(prompt, \(x) random_sample(x, temperature = 0.8)) 260 | compiled_generate(prompt, \(x) random_sample(x, temperature = 0.2)) 261 | 262 | 263 | prompt <- "A piece of advice" 264 | top_k <- function(preds, k = 5, temperature = 1) { 265 | preds <- preds / temperature 266 | .[top_preds, top_indices] <- op_top_k(preds, k = k, sorted = FALSE) 267 | choice <- random_sample(top_preds) 268 | op_take(top_indices, choice) 269 | } 270 | 271 | 272 | compiled_generate(prompt, \(x) top_k(x, k = 5, temperature = 0.5)) 273 | 274 | 275 | rm(list = ls()); gc(TRUE); reticulate::import("gc")$collect() 276 | rm(list = ls()); gc(TRUE); reticulate::import("gc")$collect() 277 | 278 | 279 | library(keras3) 280 | library(reticulate) 281 | py_require("keras_hub") 282 | 283 | config_set_dtype_policy("float16") 284 | keras_hub <- import("keras_hub") 285 | kaggle_credentials <- jsonlite::read_json("~/.kaggle/kaggle.json") 286 | withr::with_envvar(c( 287 | KAGGLE_USERNAME = kaggle_credentials$username, 288 | KAGGLE_KEY = kaggle_credentials$key), { 289 | gemma_lm <- keras_hub$models$GemmaCausalLM$from_preset("gemma2_2b_en") 290 | } 291 | ) 292 | 293 | 294 | gemma_lm 295 | 296 | 297 | gemma_lm$compile(sampler = "greedy") 298 | gemma_lm$generate("A piece of advice", max_length = 64L) 299 | gemma_lm$generate("How can I make brownies?", max_length = 64L) 300 | 301 | 302 | gemma_lm$generate( 303 | paste0( 304 | "The following brownie recipe is easy to make in just a few steps.", 305 | "\n\nYou can start by" 306 | ), 307 | max_length = 64L 308 | ) 309 | 310 | 311 | gemma_lm$generate( 312 | "Tell me about the 61st president of the United States.", 313 | max_length = 64L 314 | ) 315 | 316 | 317 | TEMPLATE = glue::trim(r"---( 318 | [instruction] 319 | {instruction}[end] 320 | [reponse] 321 | {response}[end] 322 | )---") 323 | 324 | dataset_path <- get_file(origin = paste0( 325 | "https://huggingface.co/datasets/databricks/", 326 | "databricks-dolly-15k/resolve/main/databricks-dolly-15k.jsonl" 327 | )) 328 | 329 | data <- readr::read_lines(dataset_path) |> 330 | lapply(jsonlite::parse_json) |> 331 | dplyr::bind_rows() 332 | 333 | data 334 | 335 | data <- data |> 336 | dplyr::filter(context != "") |> 337 | glue::glue_data(TEMPLATE) 338 | 339 | 340 | writeLines(data[[1]]) 341 | 342 | 343 | library(tfdatasets) 344 | ds <- tensor_slices_dataset(data) |> 345 | dataset_shuffle(2000) |> 346 | dataset_batch(1) 347 | val_ds <- ds |> dataset_take(100) 348 | train_ds <- ds |> dataset_skip(100) 349 | 350 | 351 | preprocessor <- gemma_lm$preprocessor 352 | preprocessor$sequence_length <- 512L 353 | batch <- iter_next(as_iterator(train_ds)) 354 | str(batch) 355 | .[x, y, sample_weight] <- preprocessor(batch) 356 | str(x) 357 | str(y) 358 | str(sample_weight) 359 | 360 | 361 | x$token_ids |> as.array() |> _[1, 1:5] 362 | y |> as.array() |> _[1, 1:5] 363 | 364 | 365 | layer_linear <- new_layer_class( 366 | classname = "Linear", 367 | initialize = function(input_dim, output_dim) { 368 | super$initialize() 369 | self$kernel <- self$add_weight(shape = shape(input_dim, output_dim)) 370 | }, 371 | call = function(inputs) { 372 | op_matmul(inputs, self$kernel) 373 | } 374 | ) 375 | 376 | 377 | layer_lora_linear <- new_layer_class( 378 | classname = "LoraLinear", 379 | 380 | initialize = function(input_dim, output_dim, rank) { 381 | super$initialize() 382 | self$kernel <- self$add_weight(shape(input_dim, output_dim), 383 | trainable = FALSE) 384 | self$alpha <- self$add_weight(shape(input_dim, rank)) 385 | self$beta <- self$add_weight(shape(rank, output_dim)) 386 | }, 387 | 388 | call = function(inputs) { 389 | frozen <- inputs |> op_matmul(self$kernel) 390 | update <- inputs |> op_matmul(self$alpha) |> op_matmul(self$beta) 391 | frozen + update 392 | } 393 | ) 394 | 395 | 396 | rank <- 2L 397 | gemma_lm$backbone$trainable <- FALSE # <1> 398 | 399 | for (i in seq_len(gemma_lm$backbone$num_layers) - 1) { # <3> 400 | layer <- get_layer(gemma_lm$backbone, sprintf("decoder_block_%d", i)) # <2> 401 | 402 | layer$attention$key_dense$trainable <- TRUE 403 | layer$attention$key_dense$enable_lora(rank = rank) 404 | 405 | layer$attention$query_dense$trainable <- TRUE 406 | layer$attention$query_dense$enable_lora(rank = rank) 407 | } 408 | 409 | 410 | gemma_lm 411 | 412 | 413 | gemma_lm |> compile( 414 | loss = loss_sparse_categorical_crossentropy(from_logits = TRUE), 415 | optimizer = optimizer_adam(5e-5), 416 | weighted_metrics = metric_sparse_categorical_accuracy() 417 | ) 418 | 419 | 420 | rm(list = ls()); gc(); reticulate::import("gc")$collect() 421 | 422 | 423 | image_url <- paste0("https://github.com/mattdangerw/keras-nlp-scripts/", 424 | "blob/main/learned-python.png?raw=true") 425 | image_path <- get_file(origin = image_url) 426 | 427 | image <- image_path |> image_load() |> image_to_array() 428 | par(mar = c(0, 0, 0, 0)) 429 | plot(as.raster(image, max = 255L)) 430 | 431 | 432 | library(keras3) 433 | config_set_dtype_policy("float16") 434 | keras_hub <- import("keras_hub") 435 | pali_gemma_lm <- keras_hub$models$PaliGemmaCausalLM$from_preset( 436 | "pali_gemma_3b_mix_448" 437 | ) 438 | 439 | 440 | pali_gemma_lm 441 | 442 | 443 | pali_gemma_lm$generate(list( 444 | images = image, 445 | prompts = "cap en\n" 446 | )) 447 | pali_gemma_lm$generate(list( 448 | images = image, 449 | prompts = "answer en where is the snake doing?\n" 450 | )) 451 | pali_gemma_lm$generate(list( 452 | images = image, 453 | prompts = "detect glasses\n" 454 | )) 455 | 456 | 457 | library(stringr) 458 | 459 | response <- " glasses" 460 | box <- as.numeric(unlist(str_extract_all(response, "\\d+"))) 461 | 462 | .[height, ..] <- dim(image) 463 | box <- box * height / 1024 464 | 465 | .[ytop, xleft, ybottom, xright] <- box 466 | ytop <- height - ytop 467 | ybottom <- height - ybottom 468 | 469 | par(mar = c(0,0,0,0)) 470 | plot(as.raster(image, max = 255)) 471 | rect(xleft, ybottom, xright, ytop, 472 | border = "red", lwd = 4) 473 | 474 | -------------------------------------------------------------------------------- /2e/ch05.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE----------------------------------------------- 2 | tensorflow::as_tensor(1) 3 | library(keras3) 4 | 5 | 6 | 7 | ## ------------------------------------------------------------------------- 8 | library(keras3) 9 | 10 | mnist <- dataset_mnist() 11 | train_labels <- mnist$train$y 12 | train_images <- array_reshape(mnist$train$x / 255, 13 | c(60000, 28 * 28)) 14 | 15 | random_array <- function(dim) array(runif(prod(dim)), dim) 16 | 17 | noise_channels <- random_array(dim(train_images)) 18 | train_images_with_noise_channels <- cbind(train_images, noise_channels) 19 | 20 | zeros_channels <- array(0, dim(train_images)) 21 | train_images_with_zeros_channels <- cbind(train_images, zeros_channels) 22 | 23 | 24 | ## ------------------------------------------------------------------------- 25 | get_model <- function() { 26 | model <- keras_model_sequential() %>% 27 | layer_dense(512, activation = "relu") %>% 28 | layer_dense(10, activation = "softmax") 29 | 30 | model %>% compile( 31 | optimizer = "rmsprop", 32 | loss = "sparse_categorical_crossentropy", 33 | metrics = "accuracy") 34 | 35 | model 36 | } 37 | 38 | 39 | ## ------------------------------------------------------------------------- 40 | model <- get_model() 41 | history_noise <- model %>% fit( 42 | train_images_with_noise_channels, train_labels, 43 | epochs = 10, 44 | batch_size = 128, 45 | validation_split = 0.2) 46 | 47 | model <- get_model() 48 | history_zeros <- model %>% fit( 49 | train_images_with_zeros_channels, train_labels, 50 | epochs = 10, 51 | batch_size = 128, 52 | validation_split = 0.2) 53 | 54 | 55 | ## ------------------------------------------------------------------------- 56 | plot(NULL, 57 | main = "Effect of Noise Channels on Validation Accuracy", 58 | xlab = "Epochs", xlim = c(1, history_noise$params$epochs), 59 | ylab = "Validation Accuracy", ylim = c(0.9, 1), las = 1) 60 | lines(history_zeros$metrics$val_accuracy, lty = 1, type = "o") 61 | lines(history_noise$metrics$val_accuracy, lty = 2, type = "o") 62 | legend("bottomright", lty = 1:2, 63 | legend = c("Validation accuracy with zeros channels", 64 | "Validation accuracy with noise channels")) 65 | 66 | 67 | ## ------------------------------------------------------------------------- 68 | c(c(train_images, train_labels), .) %<-% dataset_mnist() 69 | 70 | train_images <- array_reshape(train_images / 255, 71 | c(60000, 28 * 28)) 72 | 73 | random_train_labels <- sample(train_labels) 74 | 75 | model <- keras_model_sequential() %>% 76 | layer_dense(512, activation = "relu") %>% 77 | layer_dense(10, activation = "softmax") 78 | 79 | model %>% compile(optimizer = "rmsprop", 80 | loss = "sparse_categorical_crossentropy", 81 | metrics = "accuracy") 82 | 83 | 84 | ## ------------------------------------------------------------------------- 85 | history <- model %>% fit(train_images, random_train_labels, 86 | epochs = 100, 87 | batch_size = 128, 88 | validation_split = 0.2) 89 | 90 | ## ------------------------------------------------------------------------- 91 | plot(history) 92 | 93 | 94 | ## ---- eval = FALSE-------------------------------------------------------- 95 | ## num_validation_samples <- 10000 96 | ## val_indices <- sample.int(num_validation_samples, nrow(data)) 97 | ## validation_data <- data[val_indices, ] 98 | ## training_data <- data[-val_indices, ] 99 | ## model <- get_model() 100 | ## fit(model, training_data, ...) 101 | ## validation_score <- evaluate(model, validation_data, ...) 102 | ## 103 | ## ... 104 | ## 105 | ## model <- get_model() 106 | ## fit(model, data, ...) 107 | ## test_score <- evaluate(model, test_data, ...) 108 | 109 | 110 | ## ---- eval = FALSE-------------------------------------------------------- 111 | ## k <- 3 112 | ## fold_id <- sample(rep(1:k, length.out = nrow(data))) 113 | ## validation_scores <- numeric() 114 | ## 115 | ## for (fold in seq_len(k)) { 116 | ## validation_idx <- which(fold_id == fold) 117 | ## 118 | ## validation_data <- data[validation_idx, ] 119 | ## training_data <- data[-validation_idx, ] 120 | ## model <- get_model() 121 | ## fit(model, training_data, ...) 122 | ## validation_score <- evaluate(model, validation_data, ...) 123 | ## validation_scores[[fold]] <- validation_score 124 | ## } 125 | ## 126 | ## validation_score <- mean(validation_scores) 127 | ## model <- get_model() 128 | ## fit(model, data, ...) 129 | ## test_score <- evaluate(model, test_data, ...) 130 | 131 | 132 | ## ------------------------------------------------------------------------- 133 | c(c(train_images, train_labels), .) %<-% dataset_mnist() 134 | train_images <- array_reshape(train_images / 255, 135 | c(60000, 28 * 28)) 136 | 137 | model <- keras_model_sequential() %>% 138 | layer_dense(units = 512, activation = "relu") %>% 139 | layer_dense(units = 10, activation = "softmax") 140 | 141 | model %>% compile(optimizer = optimizer_rmsprop(1), 142 | loss = "sparse_categorical_crossentropy", 143 | metrics = "accuracy") 144 | 145 | 146 | ## ------------------------------------------------------------------------- 147 | history <- model %>% fit(train_images, train_labels, 148 | epochs = 10, batch_size = 128, 149 | validation_split = 0.2) 150 | 151 | 152 | ## ------------------------------------------------------------------------- 153 | plot(history) 154 | 155 | 156 | ## ------------------------------------------------------------------------- 157 | model <- keras_model_sequential() %>% 158 | layer_dense(units = 512, activation = "relu") %>% 159 | layer_dense(units = 10, activation = "softmax") 160 | 161 | model %>% compile(optimizer = optimizer_rmsprop(1e-2), 162 | loss = "sparse_categorical_crossentropy", 163 | metrics = "accuracy") 164 | 165 | 166 | ## ------------------------------------------------------------------------- 167 | model %>% 168 | fit(train_images, train_labels, 169 | epochs = 10, batch_size = 128, 170 | validation_split = 0.2) -> 171 | history 172 | 173 | 174 | ## ------------------------------------------------------------------------- 175 | plot(history) 176 | 177 | 178 | ## ------------------------------------------------------------------------- 179 | model <- keras_model_sequential() %>% 180 | layer_dense(10, activation = "softmax") 181 | 182 | model %>% compile(optimizer = "rmsprop", 183 | loss = "sparse_categorical_crossentropy", 184 | metrics = "accuracy") 185 | 186 | history_small_model <- model %>% 187 | fit(train_images, train_labels, 188 | epochs = 20, 189 | batch_size = 128, 190 | validation_split = 0.2) 191 | 192 | 193 | ## ------------------------------------------------------------------------- 194 | plot(history_small_model$metrics$val_loss, type = 'o', 195 | main = "Effect of Insufficient Model Capacity on Validation Loss", 196 | xlab = "Epochs", ylab = "Validation Loss") 197 | 198 | 199 | ## ------------------------------------------------------------------------- 200 | model <- keras_model_sequential() %>% 201 | layer_dense(96, activation="relu") %>% 202 | layer_dense(96, activation="relu") %>% 203 | layer_dense(10, activation="softmax") 204 | 205 | model %>% compile(optimizer="rmsprop", 206 | loss="sparse_categorical_crossentropy", 207 | metrics="accuracy") 208 | 209 | 210 | ## ------------------------------------------------------------------------- 211 | history_large_model <- model %>% 212 | fit(train_images, train_labels, 213 | epochs = 20, 214 | batch_size = 128, 215 | validation_split = 0.2) 216 | 217 | 218 | ## ------------------------------------------------------------------------- 219 | plot(history_large_model$metrics$val_loss, type = 'o', 220 | main = "Validation Loss for a Model with Appropriate Capacity", 221 | xlab = "Epochs", ylab = "Validation Loss") 222 | 223 | 224 | ## ------------------------------------------------------------------------- 225 | c(c(train_data, train_labels), .) %<-% dataset_imdb(num_words = 10000) 226 | 227 | vectorize_sequences <- function(sequences, dimension=10000) { 228 | results <- matrix(0, nrow = length(sequences), ncol = dimension) 229 | for(i in seq_along(sequences)) 230 | results[i, sequences[[i]]] <- 1 231 | results 232 | } 233 | 234 | train_data <- vectorize_sequences(train_data) 235 | 236 | model <- keras_model_sequential() %>% 237 | layer_dense(16, activation="relu") %>% 238 | layer_dense(16, activation="relu") %>% 239 | layer_dense(1, activation="sigmoid") 240 | 241 | model %>% compile(optimizer="rmsprop", 242 | loss="binary_crossentropy", 243 | metrics="accuracy") 244 | 245 | 246 | ## ------------------------------------------------------------------------- 247 | history_original <- model %>% 248 | fit(train_data, train_labels, 249 | epochs = 20, batch_size = 512, validation_split = 0.4) 250 | 251 | 252 | ## ------------------------------------------------------------------------- 253 | model <- keras_model_sequential() %>% 254 | layer_dense(4, activation = "relu") %>% 255 | layer_dense(4, activation = "relu") %>% 256 | layer_dense(1, activation = "sigmoid") 257 | 258 | model %>% compile(optimizer = "rmsprop", 259 | loss = "binary_crossentropy", 260 | metrics = "accuracy") 261 | 262 | 263 | ## ------------------------------------------------------------------------- 264 | history_smaller_model <- model %>% 265 | fit(train_data, train_labels, 266 | epochs = 20, batch_size = 512, validation_split = 0.4) 267 | 268 | 269 | ## ------------------------------------------------------------------------- 270 | plot( 271 | NULL, 272 | main = "Original Model vs. Smaller Model on IMDB Review Classification", 273 | xlab = "Epochs", 274 | xlim = c(1, history_original$params$epochs), 275 | ylab = "Validation Loss", 276 | ylim = extendrange(history_original$metrics$val_loss), 277 | panel.first = abline(v = 1:history_original$params$epochs, 278 | lty = "dotted", col = "lightgrey") 279 | ) 280 | 281 | lines(history_original $metrics$val_loss, lty = 2) 282 | lines(history_smaller_model$metrics$val_loss, lty = 1) 283 | legend("topleft", lty = 2:1, 284 | legend = c("Validation loss of original model", 285 | "Validation loss of smaller model")) 286 | 287 | 288 | ## ------------------------------------------------------------------------- 289 | model <- keras_model_sequential() %>% 290 | layer_dense(512, activation="relu") %>% 291 | layer_dense(512, activation="relu") %>% 292 | layer_dense(1, activation="sigmoid") 293 | 294 | model %>% compile(optimizer="rmsprop", 295 | loss="binary_crossentropy", 296 | metrics="accuracy") 297 | 298 | 299 | ## ------------------------------------------------------------------------- 300 | history_larger_model <- model %>% 301 | fit(train_data, train_labels, 302 | epochs = 20, batch_size = 512, validation_split = 0.4) 303 | 304 | 305 | ## ------------------------------------------------------------------------- 306 | plot( 307 | NULL, 308 | main = 309 | "Original Model vs. Much Larger Model on IMDB Review Classification", 310 | xlab = "Epochs", xlim = c(1, history_original$params$epochs), 311 | ylab = "Validation Loss", 312 | ylim = range(c(history_original$metrics$val_loss, 313 | history_larger_model$metrics$val_loss)), 314 | panel.first = abline(v = 1:history_original$params$epochs, 315 | lty = "dotted", col = "lightgrey") 316 | ) 317 | lines(history_original $metrics$val_loss, lty = 2) 318 | lines(history_larger_model$metrics$val_loss, lty = 1) 319 | legend("topleft", lty = 2:1, 320 | legend = c("Validation loss of original model", 321 | "Validation loss of larger model")) 322 | 323 | 324 | ## ------------------------------------------------------------------------- 325 | model <- keras_model_sequential() %>% 326 | layer_dense(16, activation = "relu", 327 | kernel_regularizer = regularizer_l2(0.002)) %>% 328 | layer_dense(16, activation = "relu", 329 | kernel_regularizer = regularizer_l2(0.002)) %>% 330 | layer_dense(1, activation = "sigmoid") 331 | 332 | model %>% compile(optimizer="rmsprop", 333 | loss="binary_crossentropy", 334 | metrics="accuracy") 335 | 336 | 337 | ## ------------------------------------------------------------------------- 338 | history_l2_reg <- model %>% fit( 339 | train_data, train_labels, 340 | epochs = 20, batch_size = 512, validation_split = 0.4) 341 | 342 | ## ------------------------------------------------------------------------- 343 | plot(history_l2_reg) 344 | 345 | 346 | ## ------------------------------------------------------------------------- 347 | plot(NULL, 348 | main = "Effect of L2 Weight Regularization on Validation Loss", 349 | xlab = "Epochs", xlim = c(1, history_original$params$epochs), 350 | ylab = "Validation Loss", 351 | ylim = range(c(history_original$metrics$val_loss, 352 | history_l2_reg $metrics$val_loss)), 353 | panel.first = abline(v = 1:history_original$params$epochs, 354 | lty = "dotted", col = "lightgrey")) 355 | lines(history_original$metrics$val_loss, lty = 2) 356 | lines(history_l2_reg $metrics$val_loss, lty = 1) 357 | legend("topleft", lty = 2:1, 358 | legend = c("Validation loss of original model", 359 | "Validation loss of L2-regularized model")) 360 | 361 | 362 | ## ------------------------------------------------------------------------- 363 | regularizer_l1(0.001) 364 | regularizer_l1_l2(l1 = 0.001, l2 = 0.001) 365 | 366 | 367 | ## ---- eval = FALSE-------------------------------------------------------- 368 | ## zero_out <- random_array(dim(layer_output)) < .5 369 | ## layer_output[zero_out] <- 0 370 | 371 | 372 | ## ---- eval = FALSE-------------------------------------------------------- 373 | ## layer_output <- layer_output * .5 374 | 375 | 376 | ## ---- eval = FALSE-------------------------------------------------------- 377 | ## layer_output[random_array(dim(layer_output)) < dropout_rate] <- 0 378 | ## layer_output <- layer_output / .5 379 | 380 | 381 | ## ------------------------------------------------------------------------- 382 | model <- keras_model_sequential() %>% 383 | layer_dense(16, activation = "relu") %>% 384 | layer_dropout(0.5) %>% 385 | layer_dense(16, activation = "relu") %>% 386 | layer_dropout(0.5) %>% 387 | layer_dense(1, activation = "sigmoid") 388 | 389 | model %>% compile(optimizer = "rmsprop", 390 | loss = "binary_crossentropy", 391 | metrics = "accuracy") 392 | 393 | 394 | ## ------------------------------------------------------------------------- 395 | history_dropout <- model %>% fit( 396 | train_data, train_labels, 397 | epochs = 20, batch_size = 512, 398 | validation_split = 0.4 399 | ) 400 | 401 | 402 | ## ------------------------------------------------------------------------- 403 | plot(history_dropout) 404 | 405 | 406 | ## ------------------------------------------------------------------------- 407 | plot(NULL, 408 | main = "Effect of Dropout on Validation Loss", 409 | xlab = "Epochs", xlim = c(1, history_original$params$epochs), 410 | ylab = "Validation Loss", 411 | ylim = range(c(history_original$metrics$val_loss, 412 | history_dropout $metrics$val_loss)), 413 | panel.first = abline(v = 1:history_original$params$epochs, 414 | lty = "dotted", col = "lightgrey")) 415 | lines(history_original$metrics$val_loss, lty = 2) 416 | lines(history_dropout $metrics$val_loss, lty = 1) 417 | legend("topleft", lty = 1:2, 418 | legend = c("Validation loss of dropout-regularized model", 419 | "Validation loss of original model")) 420 | -------------------------------------------------------------------------------- /2e/ch03.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE----------------------------------------------- 2 | library(tensorflow) 3 | library(keras3) 4 | tf_function(function(x) x+1)(as_tensor(1)) 5 | 6 | 7 | ## ---- eval = FALSE-------------------------------------------------------- 8 | ## install.packages("keras") 9 | ## 10 | ## library(reticulate) 11 | ## virtualenv_create("r-reticulate", python = install_python()) 12 | ## 13 | ## library(keras3) 14 | ## install_keras(envname = "r-reticulate") 15 | 16 | 17 | ## ------------------------------------------------------------------------- 18 | tensorflow::tf_config() 19 | 20 | 21 | ## ------------------------------------------------------------------------- 22 | r_array <- array(1:6, c(2, 3)) 23 | tf_tensor <- as_tensor(r_array) 24 | tf_tensor 25 | 26 | 27 | ## ------------------------------------------------------------------------- 28 | dim(tf_tensor) 29 | tf_tensor + tf_tensor 30 | 31 | 32 | ## ------------------------------------------------------------------------- 33 | methods(class = "tensorflow.tensor") 34 | 35 | 36 | ## ------------------------------------------------------------------------- 37 | tf_tensor$ndim 38 | 39 | 40 | ## ------------------------------------------------------------------------- 41 | as_tensor(1)$ndim 42 | as_tensor(1:2)$ndim 43 | 44 | 45 | ## ------------------------------------------------------------------------- 46 | tf_tensor$shape 47 | 48 | 49 | ## ------------------------------------------------------------------------- 50 | methods(class = class(shape())[1]) 51 | 52 | 53 | ## ------------------------------------------------------------------------- 54 | shape(2, 3) 55 | 56 | 57 | ## ------------------------------------------------------------------------- 58 | tf_tensor$dtype 59 | 60 | 61 | ## ------------------------------------------------------------------------- 62 | r_array <- array(1) 63 | typeof(r_array) 64 | as_tensor(r_array)$dtype 65 | 66 | 67 | ## ------------------------------------------------------------------------- 68 | as_tensor(r_array, dtype = "float32") 69 | 70 | 71 | ## ------------------------------------------------------------------------- 72 | as_tensor(0, shape = c(2, 3)) 73 | 74 | 75 | ## ------------------------------------------------------------------------- 76 | as_tensor(1:6, shape = c(2, 3)) 77 | 78 | 79 | ## ------------------------------------------------------------------------- 80 | array(1:6, dim = c(2, 3)) 81 | 82 | 83 | ## ------------------------------------------------------------------------- 84 | array_reshape(1:6, c(2, 3), order = "C") 85 | array_reshape(1:6, c(2, 3), order = "F") 86 | 87 | 88 | ## ------------------------------------------------------------------------- 89 | array_reshape(1:6, c(-1, 3)) 90 | as_tensor(1:6, shape = c(NA, 3)) 91 | 92 | 93 | ## ------------------------------------------------------------------------- 94 | train_images <- as_tensor(dataset_mnist()$train$x) 95 | my_slice <- train_images[, 15:NA, 15:NA] 96 | 97 | 98 | ## ------------------------------------------------------------------------- 99 | my_slice <- train_images[, 8:-8, 8:-8] 100 | 101 | 102 | ## ------------------------------------------------------------------------- 103 | my_slice <- train_images[1:100, all_dims()] 104 | 105 | 106 | ## ------------------------------------------------------------------------- 107 | my_slice <- train_images[1:100, , ] 108 | 109 | 110 | ## ------------------------------------------------------------------------- 111 | x <- as_tensor(1, shape = c(64, 3, 32, 10)) 112 | y <- as_tensor(2, shape = c(32, 10)) 113 | z <- x + y 114 | 115 | 116 | ## ------------------------------------------------------------------------- 117 | z <- x + y[tf$newaxis, tf$newaxis, , ] 118 | 119 | 120 | ## ------------------------------------------------------------------------- 121 | library(tensorflow) 122 | tf$ones(shape(1, 3)) 123 | tf$zeros(shape(1, 3)) 124 | tf$random$normal(shape(1, 3), mean = 0, stddev = 1) 125 | tf$random$uniform(shape(1, 3)) 126 | 127 | 128 | ## ---- error = TRUE-------------------------------------------------------- 129 | tf$ones(c(2, 1)) 130 | 131 | 132 | ## ------------------------------------------------------------------------- 133 | tf$ones(c(2L, 1L)) 134 | 135 | 136 | ## ------------------------------------------------------------------------- 137 | m <- as_tensor(1:12, shape = c(3, 4)) 138 | tf$reduce_mean(m, axis = 0L, keepdims = TRUE) 139 | 140 | 141 | ## ------------------------------------------------------------------------- 142 | mean(m, axis = 1, keepdims = TRUE) 143 | 144 | 145 | ## ------------------------------------------------------------------------- 146 | x <- array(1, dim = c(2, 2)) 147 | x[1, 1] <- 0 148 | 149 | 150 | ## ---- error = TRUE-------------------------------------------------------- 151 | x <- as_tensor(1, shape = c(2, 2)) 152 | x[1, 1] <- 0 153 | 154 | 155 | ## ------------------------------------------------------------------------- 156 | v <- tf$Variable(initial_value = tf$random$normal(shape(3, 1))) 157 | v 158 | 159 | 160 | ## ------------------------------------------------------------------------- 161 | v$assign(tf$ones(shape(3, 1))) 162 | 163 | 164 | ## ------------------------------------------------------------------------- 165 | v[1, 1]$assign(3) 166 | 167 | 168 | ## ------------------------------------------------------------------------- 169 | v$assign_add(tf$ones(shape(3, 1))) 170 | 171 | 172 | ## ------------------------------------------------------------------------- 173 | a <- tf$ones(c(2L, 2L)) 174 | b <- tf$square(a) 175 | c <- tf$sqrt(a) 176 | d <- b + c 177 | e <- tf$matmul(a, b) 178 | e <- e * d 179 | 180 | 181 | ## ------------------------------------------------------------------------- 182 | input_var <- tf$Variable(initial_value = 3) 183 | with(tf$GradientTape() %as% tape, { 184 | result <- tf$square(input_var) 185 | }) 186 | gradient <- tape$gradient(result, input_var) 187 | 188 | 189 | ## ------------------------------------------------------------------------- 190 | input_const <- as_tensor(3) 191 | with(tf$GradientTape() %as% tape, { 192 | tape$watch(input_const) 193 | result = tf$square(input_const) 194 | }) 195 | gradient <- tape$gradient(result, input_const) 196 | 197 | 198 | ## ------------------------------------------------------------------------- 199 | time <- tf$Variable(0) 200 | with(tf$GradientTape() %as% outer_tape, { 201 | with(tf$GradientTape() %as% inner_tape, { 202 | position <- 4.9 * time ^ 2 203 | }) 204 | speed <- inner_tape$gradient(position, time) 205 | }) 206 | acceleration <- outer_tape$gradient(speed, time) 207 | acceleration 208 | 209 | 210 | ## ------------------------------------------------------------------------- 211 | num_samples_per_class <- 1000 212 | Sigma <- rbind(c(1, 0.5), 213 | c(0.5, 1)) 214 | negative_samples <- MASS::mvrnorm(n = num_samples_per_class, 215 | mu = c(0, 3), 216 | Sigma = Sigma) 217 | positive_samples <- MASS::mvrnorm(n = num_samples_per_class, 218 | mu = c(3, 0), 219 | Sigma = Sigma) 220 | 221 | 222 | ## ------------------------------------------------------------------------- 223 | inputs <- rbind(negative_samples, positive_samples) 224 | 225 | 226 | ## ------------------------------------------------------------------------- 227 | targets <- rbind(array(0, dim = c(num_samples_per_class, 1)), 228 | array(1, dim = c(num_samples_per_class, 1))) 229 | 230 | 231 | ## ------------------------------------------------------------------------- 232 | plot(x = inputs[, 1], y = inputs[, 2], 233 | col = ifelse(targets[,1] == 0, "purple", "green")) 234 | 235 | 236 | ## ------------------------------------------------------------------------- 237 | input_dim <- 2 238 | output_dim <- 1 239 | W <- tf$Variable(initial_value = 240 | tf$random$uniform(shape(input_dim, output_dim))) 241 | b <- tf$Variable(initial_value = tf$zeros(shape(output_dim))) 242 | 243 | 244 | ## ------------------------------------------------------------------------- 245 | model <- function(inputs) 246 | tf$matmul(inputs, W) + b 247 | 248 | 249 | ## ------------------------------------------------------------------------- 250 | square_loss <- function(targets, predictions) { 251 | per_sample_losses <- (targets - predictions)^2 252 | mean(per_sample_losses) 253 | } 254 | 255 | 256 | ## ------------------------------------------------------------------------- 257 | square_loss <- function(targets, predictions) { 258 | per_sample_losses <- tf$square(targets - predictions) 259 | tf$reduce_mean(per_sample_losses) 260 | } 261 | 262 | 263 | ## ------------------------------------------------------------------------- 264 | learning_rate <- 0.1 265 | 266 | training_step <- function(inputs, targets) { 267 | with(tf$GradientTape() %as% tape, { 268 | predictions <- model(inputs) 269 | loss <- square_loss(predictions, targets) 270 | }) 271 | grad_loss_wrt <- tape$gradient(loss, list(W = W, b = b)) 272 | W$assign_sub(grad_loss_wrt$W * learning_rate) 273 | b$assign_sub(grad_loss_wrt$b * learning_rate) 274 | loss 275 | } 276 | 277 | 278 | ## ------------------------------------------------------------------------- 279 | inputs <- as_tensor(inputs, dtype = "float32") 280 | for (step in seq(40)) { 281 | loss <- training_step(inputs, targets) 282 | cat(sprintf("Loss at step %s: %.4f\n", step, loss)) 283 | } 284 | 285 | 286 | ## ------------------------------------------------------------------------- 287 | predictions <- model(inputs) 288 | 289 | inputs <- as.array(inputs) 290 | predictions <- as.array(predictions) 291 | plot(inputs[, 1], inputs[, 2], 292 | col = ifelse(predictions[, 1] <= 0.5, "purple", "green")) 293 | 294 | 295 | ## ------------------------------------------------------------------------- 296 | plot(x = inputs[, 1], y = inputs[, 2], 297 | col = ifelse(predictions[, 1] <= 0.5, "purple", "green")) 298 | 299 | slope <- -W[1, ] / W[2, ] 300 | intercept <- (0.5 - b) / W[2, ] 301 | abline(as.array(intercept), as.array(slope), col = "red") 302 | 303 | 304 | ## ------------------------------------------------------------------------- 305 | layer_simple_dense <- new_layer_class( 306 | classname = "SimpleDense", 307 | 308 | initialize = function(units, activation = NULL) { 309 | super$initialize() 310 | self$units <- as.integer(units) 311 | self$activation <- activation 312 | }, 313 | 314 | build = function(input_shape) { 315 | input_dim <- input_shape[length(input_shape)] 316 | self$W <- self$add_weight(shape = c(input_dim, self$units), 317 | initializer = "random_normal") 318 | self$b <- self$add_weight(shape = c(self$units), 319 | initializer = "zeros") 320 | }, 321 | 322 | call = function(inputs) { 323 | y <- tf$matmul(inputs, self$W) + self$b 324 | if (!is.null(self$activation)) 325 | y <- self$activation(y) 326 | y 327 | } 328 | ) 329 | 330 | 331 | ## ------------------------------------------------------------------------- 332 | my_dense <- layer_simple_dense(units = 32, activation = tf$nn$relu) 333 | input_tensor <- as_tensor(1, shape = c(2, 784)) 334 | output_tensor <- my_dense(input_tensor) 335 | output_tensor$shape 336 | 337 | 338 | ## ------------------------------------------------------------------------- 339 | layer <- layer_dense(units = 32, activation = "relu") 340 | 341 | 342 | ## ------------------------------------------------------------------------- 343 | model <- keras_model_sequential(list( 344 | layer_dense(units = 32, activation="relu"), 345 | layer_dense(units = 32) 346 | )) 347 | 348 | 349 | ## ---- eval = FALSE-------------------------------------------------------- 350 | ## model <- model_naive_sequential(list( 351 | ## layer_naive_dense(input_size = 784, output_size = 32, 352 | ## activation = "relu"), 353 | ## layer_naive_dense(input_size = 32, output_size = 64, 354 | ## activation = "relu"), 355 | ## layer_naive_dense(input_size = 64, output_size = 32, 356 | ## activation = "relu"), 357 | ## layer_naive_dense(input_size = 32, output_size = 10, 358 | ## activation = "softmax") 359 | ## )) 360 | 361 | 362 | ## ------------------------------------------------------------------------- 363 | layer <- function(inputs) { 364 | if(!self$built) { 365 | self$build(inputs$shape) 366 | self$built <- TRUE 367 | } 368 | self$call(inputs) 369 | } 370 | 371 | 372 | ## ------------------------------------------------------------------------- 373 | model <- keras_model_sequential(list( 374 | layer_simple_dense(units = 32, activation = "relu"), 375 | layer_simple_dense(units = 64, activation = "relu"), 376 | layer_simple_dense(units = 32, activation = "relu"), 377 | layer_simple_dense(units = 10, activation = "softmax") 378 | )) 379 | 380 | 381 | ## ------------------------------------------------------------------------- 382 | model <- keras_model_sequential() 383 | layer_simple_dense(model, 32, activation = "relu") 384 | layer_simple_dense(model, 64, activation = "relu") 385 | layer_simple_dense(model, 32, activation = "relu") 386 | layer_simple_dense(model, 10, activation = "softmax") 387 | 388 | 389 | ## ------------------------------------------------------------------------- 390 | length(model$layers) 391 | 392 | 393 | ## ------------------------------------------------------------------------- 394 | model <- keras_model_sequential() %>% 395 | layer_simple_dense(32, activation = "relu") %>% 396 | layer_simple_dense(64, activation = "relu") %>% 397 | layer_simple_dense(32, activation = "relu") %>% 398 | layer_simple_dense(10, activation = "softmax") 399 | 400 | 401 | ## ------------------------------------------------------------------------- 402 | model <- keras_model_sequential() %>% layer_dense(1) 403 | model %>% compile(optimizer = "rmsprop", 404 | loss = "mean_squared_error", 405 | metrics = "accuracy") 406 | 407 | 408 | ## ------------------------------------------------------------------------- 409 | model %>% compile( 410 | optimizer = optimizer_rmsprop(), 411 | loss = loss_mean_squared_error(), 412 | metrics = metric_binary_accuracy() 413 | ) 414 | 415 | 416 | ## ---- eval = FALSE-------------------------------------------------------- 417 | ## model %>% compile( 418 | ## optimizer = optimizer_rmsprop(learning_rate = 1e-4), 419 | ## loss = my_custom_loss, 420 | ## metrics = c(my_custom_metric_1, my_custom_metric_2) 421 | ## ) 422 | 423 | 424 | ## ------------------------------------------------------------------------- 425 | ls(pattern = "^optimizer_", "package:keras") 426 | 427 | 428 | ## ------------------------------------------------------------------------- 429 | ls(pattern = "^loss_", "package:keras") 430 | 431 | 432 | ## ------------------------------------------------------------------------- 433 | ls(pattern = "^metric_", "package:keras") 434 | 435 | 436 | ## ------------------------------------------------------------------------- 437 | history <- model %>% 438 | fit(inputs, targets, 439 | epochs = 5, batch_size = 128) 440 | 441 | 442 | ## ------------------------------------------------------------------------- 443 | str(history$metrics) 444 | 445 | 446 | ## ------------------------------------------------------------------------- 447 | model <- keras_model_sequential() %>% 448 | layer_dense(1) 449 | 450 | model %>% compile(optimizer_rmsprop(learning_rate = 0.1), 451 | loss = loss_mean_squared_error(), 452 | metrics = metric_binary_accuracy()) 453 | 454 | n_cases <- dim(inputs)[1] 455 | num_validation_samples <- round(0.3 * n_cases) 456 | val_indices <- sample.int(n_cases, num_validation_samples) 457 | 458 | val_inputs <- inputs[val_indices, ] 459 | val_targets <- targets[val_indices, , drop = FALSE] 460 | training_inputs <- inputs[-val_indices, ] 461 | training_targets <- targets[-val_indices, , drop = FALSE] 462 | 463 | model %>% fit( 464 | training_inputs, 465 | training_targets, 466 | epochs = 5, 467 | batch_size = 16, 468 | validation_data = list(val_inputs, val_targets) 469 | ) 470 | 471 | 472 | ## ------------------------------------------------------------------------- 473 | loss_and_metrics <- evaluate(model, val_inputs, val_targets, 474 | batch_size = 128) 475 | 476 | 477 | ## ---- eval = FALSE-------------------------------------------------------- 478 | ## predictions <- model(new_inputs) 479 | 480 | 481 | ## ---- eval = FALSE-------------------------------------------------------- 482 | ## predictions <- model %>% 483 | ## predict(new_inputs, batch_size=128) 484 | 485 | 486 | ## ------------------------------------------------------------------------- 487 | predictions <- model %>% 488 | predict(val_inputs, batch_size=128) 489 | head(predictions, 10) 490 | -------------------------------------------------------------------------------- /2e/ch08.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include = FALSE----------------------------------------------- 2 | library(keras3) 3 | tensorflow::tf_function(function(x) x + 1)(1) 4 | 5 | 6 | ## ------------------------------------------------------------------------- 7 | inputs <- layer_input(shape = c(28, 28, 1)) 8 | 9 | outputs <- inputs %>% 10 | layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>% 11 | layer_max_pooling_2d(pool_size = 2) %>% 12 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>% 13 | layer_max_pooling_2d(pool_size = 2) %>% 14 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>% 15 | layer_flatten() %>% 16 | layer_dense(10, activation = "softmax") 17 | 18 | model <- keras_model(inputs, outputs) 19 | 20 | 21 | ## ------------------------------------------------------------------------- 22 | model 23 | 24 | 25 | ## ------------------------------------------------------------------------- 26 | c(c(train_images, train_labels), c(test_images, test_labels)) %<-% 27 | dataset_mnist() 28 | train_images <- array_reshape(train_images, c(60000, 28, 28, 1)) / 255 29 | test_images <- array_reshape(test_images, c(10000, 28, 28, 1)) / 255 30 | 31 | model %>% compile(optimizer = "rmsprop", 32 | loss = "sparse_categorical_crossentropy", 33 | metrics = c("accuracy")) 34 | model %>% fit(train_images, train_labels, epochs = 5, batch_size = 64) 35 | 36 | 37 | ## ------------------------------------------------------------------------- 38 | result <- evaluate(model, test_images, test_labels) 39 | cat("Test accuracy:", result$accuracy, "\n") 40 | 41 | 42 | ## ------------------------------------------------------------------------- 43 | inputs <- layer_input(shape = c(28, 28, 1)) 44 | outputs <- inputs %>% 45 | layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>% 46 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>% 47 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>% 48 | layer_flatten() %>% 49 | layer_dense(10, activation = "softmax") 50 | model_no_max_pool <- keras_model(inputs = inputs, outputs = outputs) 51 | 52 | 53 | ## ------------------------------------------------------------------------- 54 | model_no_max_pool 55 | 56 | 57 | ## ---- eval = FALSE-------------------------------------------------------- 58 | ## library(fs) 59 | ## dir_create("~/.kaggle") 60 | ## file_move("~/Downloads/kaggle.json", "~/.kaggle/") 61 | ## file_chmod("~/.kaggle/kaggle.json", "0600") 62 | 63 | 64 | ## ---- eval = FALSE-------------------------------------------------------- 65 | ## reticulate::py_install("kaggle", pip = TRUE) 66 | 67 | 68 | ## ---- eval = FALSE-------------------------------------------------------- 69 | ## system('kaggle competitions download -c dogs-vs-cats') 70 | 71 | 72 | ## ---- include = FALSE----------------------------------------------------- 73 | # just to make the following chunks reproducible 74 | unlink("dogs-vs-cats", recursive = TRUE) 75 | # note bene, we use the {zip} package instead of base::unzip 76 | # because the latter raises an error on this file. 77 | 78 | 79 | ## ------------------------------------------------------------------------- 80 | zip::unzip('dogs-vs-cats.zip', exdir = "dogs-vs-cats", files = "train.zip") 81 | zip::unzip("dogs-vs-cats/train.zip", exdir = "dogs-vs-cats") 82 | 83 | 84 | ## ---- include = FALSE----------------------------------------------------- 85 | unlink("cats_vs_dogs_small", recursive = TRUE) 86 | library(fs) 87 | 88 | 89 | ## ------------------------------------------------------------------------- 90 | library(fs) 91 | original_dir <- path("dogs-vs-cats/train") 92 | new_base_dir <- path("cats_vs_dogs_small") 93 | 94 | make_subset <- function(subset_name, start_index, end_index) { 95 | for (category in c("dog", "cat")) { 96 | file_name <- glue::glue("{category}.{ start_index:end_index }.jpg") 97 | dir_create(new_base_dir / subset_name / category) 98 | file_copy(original_dir / file_name, 99 | new_base_dir / subset_name / category / file_name) 100 | } 101 | } 102 | 103 | make_subset("train", start_index = 1, end_index = 1000) 104 | make_subset("validation", start_index = 1001, end_index = 1500) 105 | make_subset("test", start_index = 1501, end_index = 2500) 106 | 107 | 108 | ## ------------------------------------------------------------------------- 109 | inputs <- layer_input(shape = c(180, 180, 3)) 110 | outputs <- inputs %>% 111 | layer_rescaling(1 / 255) %>% 112 | layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>% 113 | layer_max_pooling_2d(pool_size = 2) %>% 114 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>% 115 | layer_max_pooling_2d(pool_size = 2) %>% 116 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>% 117 | layer_max_pooling_2d(pool_size = 2) %>% 118 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>% 119 | layer_max_pooling_2d(pool_size = 2) %>% 120 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>% 121 | layer_flatten() %>% 122 | layer_dense(1, activation = "sigmoid") 123 | model <- keras_model(inputs, outputs) 124 | 125 | 126 | ## ------------------------------------------------------------------------- 127 | model 128 | 129 | 130 | ## ------------------------------------------------------------------------- 131 | model %>% compile(loss = "binary_crossentropy", 132 | optimizer = "rmsprop", 133 | metrics = "accuracy") 134 | 135 | 136 | ## ------------------------------------------------------------------------- 137 | train_dataset <- 138 | image_dataset_from_directory(new_base_dir / "train", 139 | image_size = c(180, 180), 140 | batch_size = 32) 141 | validation_dataset <- 142 | image_dataset_from_directory(new_base_dir / "validation", 143 | image_size = c(180, 180), 144 | batch_size = 32) 145 | test_dataset <- 146 | image_dataset_from_directory(new_base_dir / "test", 147 | image_size = c(180, 180), 148 | batch_size = 32) 149 | 150 | 151 | ## ------------------------------------------------------------------------- 152 | library(tfdatasets, exclude = "shape") 153 | example_array <- array(seq(100*6), c(100, 6)) 154 | head(example_array) 155 | dataset <- tensor_slices_dataset(example_array) 156 | 157 | 158 | ## ------------------------------------------------------------------------- 159 | dataset_iterator <- as_iterator(dataset) 160 | for(i in 1:3) { 161 | element <- iter_next(dataset_iterator) 162 | print(element) 163 | } 164 | 165 | 166 | ## ------------------------------------------------------------------------- 167 | dataset_array_iterator <- as_array_iterator(dataset) 168 | for(i in 1:3) { 169 | element <- iter_next(dataset_array_iterator) 170 | str(element) 171 | } 172 | 173 | 174 | ## ------------------------------------------------------------------------- 175 | batched_dataset <- dataset %>% 176 | dataset_batch(3) 177 | batched_dataset_iterator <- as_iterator(batched_dataset) 178 | for(i in 1:3) { 179 | element <- iter_next(batched_dataset_iterator) 180 | print(element) 181 | } 182 | 183 | 184 | ## ------------------------------------------------------------------------- 185 | reshaped_dataset <- dataset %>% 186 | dataset_map(function(element) tf$reshape(element, shape(2, 3))) 187 | 188 | reshaped_dataset_iterator <- as_iterator(reshaped_dataset) 189 | for(i in 1:3) { 190 | element <- iter_next(reshaped_dataset_iterator) 191 | print(element) 192 | } 193 | 194 | 195 | ## ------------------------------------------------------------------------- 196 | c(data_batch, labels_batch) %<-% iter_next(as_iterator(train_dataset)) 197 | data_batch$shape 198 | labels_batch$shape 199 | 200 | 201 | ## ------------------------------------------------------------------------- 202 | callbacks <- list( 203 | callback_model_checkpoint( 204 | filepath = "convnet_from_scratch.keras", 205 | save_best_only = TRUE, 206 | monitor = "val_loss" 207 | ) 208 | ) 209 | 210 | history <- model %>% 211 | fit( 212 | train_dataset, 213 | epochs = 30, 214 | validation_data = validation_dataset, 215 | callbacks = callbacks 216 | ) 217 | 218 | 219 | ## ------------------------------------------------------------------------- 220 | plot(history) 221 | 222 | 223 | ## ------------------------------------------------------------------------- 224 | test_model <- load_model("convnet_from_scratch.keras") 225 | result <- evaluate(test_model, test_dataset) 226 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 227 | 228 | 229 | ## ------------------------------------------------------------------------- 230 | data_augmentation <- keras_model_sequential() %>% 231 | layer_random_flip("horizontal") %>% 232 | layer_random_rotation(0.1) %>% 233 | layer_random_zoom(0.2) 234 | 235 | 236 | ## ------------------------------------------------------------------------- 237 | 238 | 239 | 240 | ## ------------------------------------------------------------------------- 241 | library(tfdatasets, exclude = "shape") 242 | batch <- train_dataset %>% 243 | as_iterator() %>% 244 | iter_next() 245 | 246 | c(images, labels) %<-% batch 247 | 248 | par(mfrow = c(3, 3), mar = rep(.5, 4)) 249 | 250 | image <- images[1, , , ] 251 | plot(as.raster(as.array(image), max = 255)) 252 | 253 | # plot augmented images 254 | for (i in 2:9) { 255 | augmented_images <- data_augmentation(images) 256 | augmented_image <- augmented_images[1, , , ] 257 | plot(as.raster(as.array(augmented_image), max = 255)) 258 | } 259 | 260 | 261 | ## ------------------------------------------------------------------------- 262 | inputs <- layer_input(shape = c(180, 180, 3)) 263 | outputs <- inputs %>% 264 | data_augmentation() %>% 265 | layer_rescaling(1 / 255) %>% 266 | layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>% 267 | layer_max_pooling_2d(pool_size = 2) %>% 268 | layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>% 269 | layer_max_pooling_2d(pool_size = 2) %>% 270 | layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>% 271 | layer_max_pooling_2d(pool_size = 2) %>% 272 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>% 273 | layer_max_pooling_2d(pool_size = 2) %>% 274 | layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>% 275 | layer_flatten() %>% 276 | layer_dropout(0.5) %>% 277 | layer_dense(1, activation = "sigmoid") 278 | 279 | model <- keras_model(inputs, outputs) 280 | 281 | model %>% compile(loss = "binary_crossentropy", 282 | optimizer = "rmsprop", 283 | metrics = "accuracy") 284 | 285 | 286 | ## ------------------------------------------------------------------------- 287 | callbacks <- list( 288 | callback_model_checkpoint( 289 | filepath = "convnet_from_scratch_with_augmentation.keras", 290 | save_best_only = TRUE, 291 | monitor = "val_loss" 292 | ) 293 | ) 294 | 295 | history <- model %>% fit( 296 | train_dataset, 297 | epochs = 100, 298 | validation_data = validation_dataset, 299 | callbacks = callbacks 300 | ) 301 | 302 | 303 | ## ------------------------------------------------------------------------- 304 | plot(history) 305 | 306 | 307 | ## ------------------------------------------------------------------------- 308 | test_model <- load_model("convnet_from_scratch_with_augmentation.keras") 309 | result <- evaluate(test_model, test_dataset) 310 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 311 | 312 | 313 | ## ------------------------------------------------------------------------- 314 | conv_base <- application_vgg16( 315 | weights = "imagenet", 316 | include_top = FALSE, 317 | input_shape = c(180, 180, 3) 318 | ) 319 | 320 | 321 | ## ------------------------------------------------------------------------- 322 | conv_base 323 | 324 | 325 | ## ------------------------------------------------------------------------- 326 | get_features_and_labels <- function(dataset) { 327 | n_batches <- length(dataset) 328 | all_features <- vector("list", n_batches) 329 | all_labels <- vector("list", n_batches) 330 | iterator <- as_array_iterator(dataset) 331 | for (i in 1:n_batches) { 332 | c(images, labels) %<-% iter_next(iterator) 333 | preprocessed_images <- imagenet_preprocess_input(images) 334 | features <- conv_base %>% predict(preprocessed_images) 335 | 336 | all_labels[[i]] <- labels 337 | all_features[[i]] <- features 338 | } 339 | 340 | all_features <- listarrays::bind_on_rows(all_features) 341 | all_labels <- listarrays::bind_on_rows(all_labels) 342 | 343 | list(all_features, all_labels) 344 | } 345 | 346 | c(train_features, train_labels) %<-% get_features_and_labels(train_dataset) 347 | c(val_features, val_labels) %<-% get_features_and_labels(validation_dataset) 348 | c(test_features, test_labels) %<-% get_features_and_labels(test_dataset) 349 | 350 | 351 | ## ------------------------------------------------------------------------- 352 | dim(train_features) 353 | 354 | 355 | ## ------------------------------------------------------------------------- 356 | inputs <- layer_input(shape = c(5, 5, 512)) 357 | outputs <- inputs %>% 358 | layer_flatten() %>% 359 | layer_dense(256) %>% 360 | layer_dropout(.5) %>% 361 | layer_dense(1, activation = "sigmoid") 362 | 363 | model <- keras_model(inputs, outputs) 364 | 365 | model %>% compile(loss = "binary_crossentropy", 366 | optimizer = "rmsprop", 367 | metrics = "accuracy") 368 | 369 | callbacks <- list( 370 | callback_model_checkpoint( 371 | filepath = "feature_extraction.keras", 372 | save_best_only = TRUE, 373 | monitor = "val_loss" 374 | ) 375 | ) 376 | 377 | history <- model %>% fit( 378 | train_features, train_labels, 379 | epochs = 20, 380 | validation_data = list(val_features, val_labels), 381 | callbacks = callbacks 382 | ) 383 | 384 | 385 | ## ------------------------------------------------------------------------- 386 | plot(history) 387 | 388 | 389 | ## ------------------------------------------------------------------------- 390 | conv_base <- application_vgg16( 391 | weights = "imagenet", 392 | include_top = FALSE) 393 | freeze_weights(conv_base) 394 | 395 | 396 | ## ------------------------------------------------------------------------- 397 | unfreeze_weights(conv_base) 398 | cat("This is the number of trainable weights", 399 | "before freezing the conv base:", 400 | length(conv_base$trainable_weights), "\n") 401 | 402 | 403 | ## ------------------------------------------------------------------------- 404 | freeze_weights(conv_base) 405 | cat("This is the number of trainable weights", 406 | "after freezing the conv base:", 407 | length(conv_base$trainable_weights), "\n") 408 | 409 | 410 | ## ------------------------------------------------------------------------- 411 | data_augmentation <- keras_model_sequential() %>% 412 | layer_random_flip("horizontal") %>% 413 | layer_random_rotation(0.1) %>% 414 | layer_random_zoom(0.2) 415 | 416 | inputs <- layer_input(shape = c(180, 180, 3)) 417 | outputs <- inputs %>% 418 | data_augmentation() %>% 419 | imagenet_preprocess_input() %>% 420 | conv_base() %>% 421 | layer_flatten() %>% 422 | layer_dense(256) %>% 423 | layer_dropout(0.5) %>% 424 | layer_dense(1, activation = "sigmoid") 425 | model <- keras_model(inputs, outputs) 426 | model %>% compile(loss = "binary_crossentropy", 427 | optimizer = "rmsprop", 428 | metrics = "accuracy") 429 | 430 | 431 | ## ------------------------------------------------------------------------- 432 | callbacks <- list( 433 | callback_model_checkpoint( 434 | filepath = "feature_extraction_with_data_augmentation.keras", 435 | save_best_only = TRUE, 436 | monitor = "val_loss" 437 | ) 438 | ) 439 | 440 | history <- model %>% fit( 441 | train_dataset, 442 | epochs = 50, 443 | validation_data = validation_dataset, 444 | callbacks = callbacks 445 | ) 446 | 447 | 448 | ## ------------------------------------------------------------------------- 449 | test_model <- load_model( 450 | "feature_extraction_with_data_augmentation.keras") 451 | result <- evaluate(test_model, test_dataset) 452 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 453 | 454 | 455 | ## ------------------------------------------------------------------------- 456 | conv_base 457 | 458 | 459 | ## ------------------------------------------------------------------------- 460 | unfreeze_weights(conv_base, from = -4) 461 | conv_base 462 | 463 | 464 | ## ------------------------------------------------------------------------- 465 | model %>% compile( 466 | loss = "binary_crossentropy", 467 | optimizer = optimizer_rmsprop(learning_rate = 1e-5), 468 | metrics = "accuracy" 469 | ) 470 | 471 | callbacks <- list( 472 | callback_model_checkpoint( 473 | filepath = "fine_tuning.keras", 474 | save_best_only = TRUE, 475 | monitor = "val_loss" 476 | ) 477 | ) 478 | 479 | history <- model %>% fit( 480 | train_dataset, 481 | epochs = 30, 482 | validation_data = validation_dataset, 483 | callbacks = callbacks 484 | ) 485 | 486 | 487 | ## ------------------------------------------------------------------------- 488 | model <- load_model("fine_tuning.keras") 489 | result <- evaluate(model, test_dataset) 490 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy)) 491 | -------------------------------------------------------------------------------- /chapter17_image-generation.R: -------------------------------------------------------------------------------- 1 | library(keras3) 2 | reticulate::py_require("keras-hub==0.18.1") 3 | 4 | 5 | latent_dim <- 2 # <1> 6 | 7 | encoder_inputs <- keras_input(shape = c(28, 28, 1)) 8 | x <- encoder_inputs |> 9 | layer_conv_2d(32, 3, activation = "relu", strides = 2, padding = "same") |> 10 | layer_conv_2d(64, 3, activation = "relu", strides = 2, padding = "same") |> 11 | layer_flatten() |> 12 | layer_dense(16, activation = "relu") 13 | z_mean <- x |> layer_dense(latent_dim, name="z_mean") # <2> 14 | z_log_var <- x |> layer_dense(latent_dim, name="z_log_var") # <2> 15 | encoder <- keras_model(encoder_inputs, list(z_mean, z_log_var), 16 | name="encoder") 17 | 18 | 19 | encoder 20 | 21 | 22 | layer_sampler <- new_layer_class( 23 | classname = "Sampler", 24 | initialize = function(...) { 25 | super$initialize(...) 26 | self$seed_generator <- random_seed_generator() # <1> 27 | self$built <- TRUE 28 | }, 29 | call = function(z_mean, z_log_var) { 30 | .[batch_size, z_size] <- op_shape(z_mean) 31 | epsilon <- random_normal(shape = op_shape(z_mean), # <2> 32 | seed = self$seed_generator) # <2> 33 | z_mean + (op_exp(0.5 * z_log_var) * epsilon) # <3> 34 | } 35 | ) 36 | 37 | 38 | latent_inputs <- keras_input(shape = c(latent_dim)) # <1> 39 | decoder_outputs <- latent_inputs |> 40 | layer_dense(7 * 7 * 64, activation = "relu") |> # <2> 41 | layer_reshape(c(7, 7, 64)) |> # <3> 42 | layer_conv_2d_transpose(64, 3, activation = "relu", # <4> 43 | strides = 2, padding = "same") |> # <4> 44 | layer_conv_2d_transpose(32, 3, activation = "relu", # <4> 45 | strides = 2, padding = "same") |> # <4> 46 | layer_conv_2d(1, 3, activation = "sigmoid", padding = "same") # <5> 47 | decoder <- keras_model(latent_inputs, decoder_outputs, 48 | name = "decoder") 49 | 50 | 51 | decoder 52 | 53 | 54 | model_vae <- new_model_class( 55 | classname = "VAE", 56 | 57 | initialize = function(encoder, decoder, ...) { 58 | super$initialize(...) 59 | self$encoder <- encoder 60 | self$decoder <- decoder 61 | self$sampler <- layer_sampler() 62 | self$reconstruction_loss_tracker <- # <1> 63 | metric_mean(name = "reconstruction_loss") # <1> 64 | self$kl_loss_tracker <- metric_mean(name = "kl_loss") # <1> 65 | }, 66 | 67 | call = function(inputs) { 68 | self$encoder(inputs) 69 | }, 70 | 71 | compute_loss = function(x, y, y_pred, sample_weight = NULL, training = TRUE) { 72 | original <- x # <2> 73 | .[z_mean, z_log_var] <- y_pred # <3> 74 | 75 | 76 | z <- self$sampler(z_mean, z_log_var) 77 | reconstruction <- self$decoder(z) # <4> 78 | 79 | reconstruction_loss <- # <5> 80 | loss_binary_crossentropy(original, reconstruction) |> # <5> 81 | op_sum(axis = c(2, 3)) |> # <5> 82 | op_mean() # <5> 83 | 84 | kl_loss <- -0.5 * ( # <6> 85 | 1 + z_log_var - op_square(z_mean) - op_exp(z_log_var) # <6> 86 | ) # <6> 87 | total_loss <- reconstruction_loss + op_mean(kl_loss) # <6> 88 | 89 | self$reconstruction_loss_tracker$update_state(reconstruction_loss) # <7> 90 | self$kl_loss_tracker$update_state(kl_loss) # <7> 91 | 92 | total_loss 93 | } 94 | ) 95 | 96 | 97 | .[.[x_train, ..], .[x_test, ..]] <- dataset_mnist() 98 | mnist_digits <- vctrs::vec_c(x_train, x_test) # <1> 99 | mnist_digits <- mnist_digits / 255 100 | dim(mnist_digits) <- c(dim(mnist_digits), 1) 101 | 102 | str(mnist_digits) 103 | 104 | vae <- model_vae(encoder, decoder) 105 | vae |> compile(optimizer = optimizer_adam()) # <2> 106 | vae |> fit(mnist_digits, epochs = 30, batch_size = 128) # <3> 107 | 108 | 109 | n <- 30 # <1> 110 | digit_size <- 28 111 | 112 | z <- seq(-1, 1, length.out = n) # <2> 113 | z_grid <- as.matrix(expand.grid(z, z)) # <2> 114 | 115 | decoded <- predict(vae$decoder, z_grid, verbose = 0) # <3> 116 | 117 | z_grid_i <- expand.grid(x = seq_len(n), y = seq_len(n)) # <4> 118 | figure <- array(0, c(digit_size * n, digit_size * n)) # <4> 119 | 120 | for (i in 1:nrow(z_grid_i)) { 121 | .[xi, yi] <- z_grid_i[i, ] 122 | digit <- decoded[i, , , ] 123 | figure[seq(to = (n + 1 - xi) * digit_size, length.out = digit_size), 124 | seq(to = yi * digit_size, length.out = digit_size)] <- 125 | digit 126 | } 127 | 128 | par(pty = "s") # <8> 129 | lim <- extendrange(r = c(-1, 1), f = 1 - (n / (n+.5))) # <5> 130 | plot(NULL, frame.plot = FALSE, 131 | ylim = lim, xlim = lim, 132 | xlab = ~z[1], ylab = ~z[2]) # <6> 133 | rasterImage(as.raster(1 - figure, max = 1), # <7> 134 | lim[1], lim[1], lim[2], lim[2], 135 | interpolate = FALSE) 136 | 137 | 138 | fpath <- "flowers" 139 | 140 | 141 | library(tfdatasets, exclude = "shape") 142 | batch_size <- 32 143 | image_size <- 128 144 | images_dir <- fs::path("flowers", "jpg") 145 | dataset <- image_dataset_from_directory( 146 | images_dir, 147 | labels = NULL, # <1> 148 | image_size = c(image_size, image_size), 149 | crop_to_aspect_ratio = TRUE # <2> 150 | ) 151 | dataset <- dataset |> dataset_rebatch( 152 | batch_size, 153 | drop_remainder = TRUE # <3> 154 | ) 155 | 156 | 157 | img <- dataset |> as_array_iterator() |> iter_next() |> _[1, , ,] 158 | par(mar = c(0, 0, 0, 0)) 159 | plot(as.raster(img, max = 255)) 160 | 161 | 162 | residual_block <- function(x, width) { # <1> 163 | .[.., n_features] <- op_shape(x) 164 | 165 | if (n_features == width) { 166 | residual <- x 167 | } else { 168 | residual <- x |> layer_conv_2d(filters = width, kernel_size = 1) 169 | } 170 | 171 | x <- x |> 172 | layer_batch_normalization(center = FALSE, scale = FALSE) |> 173 | layer_conv_2d(width, kernel_size = 3, padding = "same", 174 | activation = "swish") |> 175 | layer_conv_2d(width, kernel_size = 3, padding = "same") 176 | 177 | x + residual 178 | } 179 | 180 | 181 | get_model <- function(image_size, widths, block_depth) { 182 | noisy_images <- keras_input(shape = c(image_size, image_size, 3)) 183 | noise_rates <- keras_input(shape = c(1, 1, 1)) 184 | 185 | x <- noisy_images |> layer_conv_2d(filters = widths[1], kernel_size = 1) 186 | n <- noise_rates |> layer_upsampling_2d(size = image_size, interpolation = "nearest") 187 | x <- layer_concatenate(c(x, n)) 188 | 189 | skips <- list() 190 | 191 | for (width in head(widths, -1)) { # <2> 192 | for (i in seq_len(block_depth)) { # <2> 193 | x <- x |> residual_block(width) # <2> 194 | skips <- c(skips, x) 195 | } 196 | x <- x |> layer_average_pooling_2d(pool_size = 2) 197 | } 198 | 199 | for (i in seq_len(block_depth)) { # <3> 200 | x <- x |> residual_block(tail(widths, 1)) # <3> 201 | } 202 | 203 | for (width in rev(head(widths, -1))) { # <4> 204 | x <- x |> layer_upsampling_2d(size = 2, interpolation = "bilinear") # <4> 205 | for (i in seq_len(block_depth)) { # <4> 206 | x <- x |> layer_concatenate(tail(skips, 1)[[1]]) # <4> 207 | skips <- head(skips, -1) 208 | x <- x |> residual_block(width) 209 | } 210 | } 211 | 212 | pred_noise_masks <- x |> layer_conv_2d( # <5> 213 | filters = 3, kernel_size = 1, kernel_initializer = "zeros" # <5> 214 | ) # <5> 215 | 216 | model <- keras_model(inputs = list(noisy_images, noise_rates), # <6> 217 | outputs = pred_noise_masks) # <6> 218 | model 219 | } 220 | 221 | 222 | diffusion_schedule <- function(diffusion_times, min_signal_rate = 0.02, 223 | max_signal_rate = 0.95) { 224 | start_angle <- op_arccos(max_signal_rate) |> op_cast(dtype = "float32") 225 | end_angle <- op_arccos(min_signal_rate) |> op_cast(dtype = "float32") 226 | 227 | diffusion_angles <- start_angle + diffusion_times * (end_angle - start_angle) 228 | signal_rates <- op_cos(diffusion_angles) 229 | noise_rates <- op_sin(diffusion_angles) 230 | 231 | list(noise_rates = noise_rates, signal_rates = signal_rates) 232 | } 233 | 234 | 235 | diffusion_times <- op_arange(0, 1, 0.01) # <1> 236 | schedule <- diffusion_schedule(diffusion_times) # <1> 237 | 238 | diffusion_times <- as.array(diffusion_times) # <2> 239 | noise_rates <- as.array(schedule$noise_rates) # <2> 240 | signal_rates <- as.array(schedule$signal_rates) # <2> 241 | 242 | plot(NULL, type = "n", main = "Diffusion Schedule", # <3> 243 | ylab = "Rate", ylim = c(0, 1), 244 | xlab = "Diffusion time", xlim = c(0, 1)) 245 | lines(diffusion_times, noise_rates, col = "blue", lty = 1) # <4> 246 | lines(diffusion_times, signal_rates, col = "red", lty = 2) # <4> 247 | 248 | legend("bottomleft", # <5> 249 | legend = c("Noise rate", "Signal rate"), # <5> 250 | col = c("blue", "red"), lty = c(1, 2)) 251 | 252 | 253 | new_diffusion_model <- new_model_class( 254 | classname = "DiffusionModel", 255 | 256 | initialize = function(image_size, widths, block_depth, ...) { 257 | super$initialize(...) 258 | self$image_size <- shape(image_size) 259 | self$denoising_model <- get_model(image_size, widths, block_depth) 260 | self$seed_generator <- random_seed_generator() 261 | self$loss <- loss_mean_absolute_error() # <1> 262 | self$normalizer <- layer_normalization() # <2> 263 | }, 264 | 265 | denoise = function(noisy_images, noise_rates, signal_rates) { 266 | pred_noise_masks <- self$denoising_model(list(noisy_images, noise_rates)) # <3> 267 | pred_images <- 268 | (noisy_images - noise_rates * pred_noise_masks) / 269 | signal_rates # <4> 270 | list(pred_images = pred_images, pred_noise_masks = pred_noise_masks) 271 | }, 272 | 273 | call = function(images) { 274 | images <- self$normalizer(images) 275 | .[batch_size, ..] <- op_shape(images) 276 | 277 | noise_masks <- random_normal( # <5> 278 | shape = c(batch_size, self$image_size, self$image_size, 3), 279 | seed = self$seed_generator 280 | ) 281 | 282 | diffusion_times <- random_uniform( # <6> 283 | shape = c(batch_size, 1, 1, 1), 284 | minval = 0.0, maxval = 1.0, 285 | seed = self$seed_generator 286 | ) 287 | 288 | .[noise_rates, signal_rates] <- diffusion_schedule(diffusion_times) 289 | noisy_images <- signal_rates * images + noise_rates * noise_masks # <7> 290 | 291 | .[pred_images, pred_noise_masks] <- 292 | self$denoise(noisy_images, noise_rates, signal_rates) # <8> 293 | 294 | list(pred_images, pred_noise_masks, noise_masks) 295 | }, 296 | 297 | compute_loss = function(x, y, y_pred, sample_weight = NULL, training = TRUE) { 298 | .[.., pred_noise_masks, noise_masks] <- y_pred 299 | self$loss(noise_masks, pred_noise_masks) 300 | }, 301 | 302 | generate = function(num_images, diffusion_steps) { 303 | noisy_images <- random_normal( # <9> 304 | shape = c(num_images, self$image_size, self$image_size, 3), 305 | seed = self$seed_generator 306 | ) 307 | 308 | diffusion_times <- seq(1, 0, length.out = diffusion_steps) 309 | 310 | for (i in seq_len(diffusion_steps - 1)) { 311 | diffusion_time <- diffusion_times[i] 312 | next_diffusion_time <- diffusion_times[i + 1] 313 | 314 | .[noise_rates, signal_rates] <- diffusion_time |> 315 | op_broadcast_to(c(num_images, 1, 1, 1)) |> 316 | diffusion_schedule() # <10> 317 | 318 | .[pred_images, pred_noises] <- 319 | self$denoise(noisy_images, noise_rates, signal_rates) # <11> 320 | 321 | .[next_noise_rates, next_signal_rates] <- next_diffusion_time |> 322 | op_broadcast_to(c(num_images, 1, 1, 1)) |> 323 | diffusion_schedule() # <12> 324 | 325 | noisy_images <- # <13> 326 | (next_signal_rates * pred_images) + 327 | (next_noise_rates * pred_noises) 328 | } 329 | 330 | images <- # <14> 331 | self$normalizer$mean + pred_images * 332 | op_sqrt(self$normalizer$variance) 333 | 334 | op_clip(images, 0, 255) # <15> 335 | } 336 | ) 337 | 338 | 339 | callback_visualization <- new_callback_class( 340 | classname = "VisualizationCallback", 341 | initialize = function(diffusion_steps = 20, num_rows = 3, num_cols = 6) { 342 | self$diffusion_steps <- diffusion_steps 343 | self$num_rows <- num_rows 344 | self$num_cols <- num_cols 345 | }, 346 | 347 | on_epoch_end = function(epoch = NULL, logs = NULL) { 348 | generated_images <- self$model$generate( 349 | num_images = self$num_rows * self$num_cols, 350 | diffusion_steps = self$diffusion_steps 351 | ) |> as.array() 352 | 353 | par(mfrow = c(self$num_rows, self$num_cols), 354 | mar = c(0, 0, 0, 0)) 355 | 356 | for (i in seq_len(self$num_rows * self$num_cols)) { 357 | plot(as.raster(generated_images[i, , , ], max = 255)) 358 | } 359 | } 360 | ) 361 | 362 | 363 | model <- new_diffusion_model(image_size, widths = c(32, 64, 96, 128), 364 | block_depth = 2) 365 | model$normalizer$adapt(dataset) # <1> 366 | 367 | 368 | model |> compile( 369 | optimizer = optimizer_adam_w( # <1> 370 | learning_rate = learning_rate_schedule_inverse_time_decay( # <1> 371 | initial_learning_rate = 1e-3, # <1> 372 | decay_steps = 1000, # <1> 373 | decay_rate = 0.1 # <1> 374 | ), # <1> 375 | use_ema = TRUE, # <2> 376 | ema_overwrite_frequency = 100 # <3> 377 | ) 378 | ) 379 | 380 | 381 | library(keras3) 382 | reticulate::py_require("keras-hub==0.18.1") 383 | keras_hub <- reticulate::import("keras_hub") 384 | 385 | model <- keras_hub$models$TextToImage$from_preset( 386 | "stable_diffusion_3_medium", 387 | image_shape = shape(512, 512, 3), 388 | dtype = "float16" 389 | ) 390 | 391 | image <- model$generate( 392 | "photograph of an astronaut riding a horse, detailed, 8k", 393 | guidance_scale = 1 394 | ) 395 | 396 | 397 | par(mar = c(0, 0, 0, 0)) 398 | plot(as.raster(image, max = max(image))) 399 | 400 | 401 | prompts <- c( 402 | "A photograph of a cat wearing a top hat, photorealistic", 403 | "A neon sci-fi skyline at night, illustration" 404 | ) 405 | 406 | images <- model$generate(prompts, num_steps = 25L, guidance_scale = 7.5) 407 | 408 | for(i in seq_len(nrow(images))) { 409 | image <- images[i, , , ] 410 | plot(as.raster(image, max = 255L)) 411 | image_array_save(image, sprintf("generated_image_%i.png", i), scale = FALSE) 412 | } 413 | 414 | 415 | 416 | --------------------------------------------------------------------------------