├── 2e
    ├── ch01.R
    ├── ch06.R
    ├── install-r-tensorflow.R
    ├── README.md
    ├── ch13.R
    ├── ch14.R
    ├── ch04.R
    ├── ch05.R
    ├── ch03.R
    └── ch08.R
├── chapter01_what-is-deep-learning.R
├── chapter01_what-is-deep-learning_tmp.R
├── chapter06_universal-workflow-of-ml.R
├── .gitignore
├── README.md
├── deep-learning-with-r-code.Rproj
├── chapter19_future_of_ai.R
├── chapter09_convnet-architecture-patterns.R
├── chapter12_object-detection.R
├── chapter18_best-practices-for-the-real-world.R
├── chapter10_interpreting-what-convnets-learn.R
├── chapter11_image-segmentation.R
├── chapter04_classification-and-regression.R
├── chapter05_fundamentals-of-ml.R
├── chapter13_timeseries-forecasting.R
├── chapter08_image-classification.R
├── chapter02_mathematical-building-blocks.R
├── chapter16_text-generation.R
└── chapter17_image-generation.R


/2e/ch01.R:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/chapter01_what-is-deep-learning.R:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/chapter01_what-is-deep-learning_tmp.R:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/chapter06_universal-workflow-of-ml.R:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/2e/ch06.R:
--------------------------------------------------------------------------------
1 | ## ---- eval = FALSE--------------------------------------------------------
2 | ## x <- scale(x)
3 | 
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | cats_vs_dogs_small
6 | dogs-vs-cats
7 | pets_dataset
8 | *.keras


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Deep Learning with R (Code Only)
2 | 
3 | The repository contains code from the book "Deep Learning with R," 2nd and 3rd editions.
4 | Code for the 2nd edition can be found in the "2e" directory.
5 | 


--------------------------------------------------------------------------------
/deep-learning-with-r-code.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 


--------------------------------------------------------------------------------
/2e/install-r-tensorflow.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | if(!requireNamespace("remotes")) install.packages("remotes")
 4 | 
 5 | remotes::update_packages()
 6 | remotes::install_cran(c("readr", "tibble", "zip", "fs", "listarrays", "keras"))
 7 | 
 8 | envname <- "r-keras"
 9 | 
10 | if("--fresh" %in% commandArgs(TRUE)) {
11 |   reticulate::miniconda_uninstall()
12 |   unlink("~/.pyenv", recursive = TRUE)
13 |   unlink(paste0("~/.virtualenvs/", envname), recursive = TRUE)
14 | }
15 | 
16 | 
17 | python <- reticulate::install_python("3.9:latest")
18 | reticulate::virtualenv_create(envname, python = python)
19 | 
20 | keras::install_keras(
21 |   envname = envname,
22 |   extra_packages = c("keras-tuner", "ipython", "kaggle")
23 | )
24 | 


--------------------------------------------------------------------------------
/2e/README.md:
--------------------------------------------------------------------------------
 1 | ## Deep Learning with R, 2nd Edition (Code Only)
 2 | 
 3 | The folder contains just the code from the book "Deep Learning with R, 2nd Edition".
 4 | 
 5 | You can install all the dependencies by cloning this repo and sourcing the `"install-r-tensorflow.R"` script,
 6 | either at the R console or the terminal:
 7 | 
 8 | ```bash
 9 | Rscript install-r-tensorflow.R
10 | ```
11 | 
12 | The script creates a "r-keras" virtual environment that will automatically be
13 | discovered by the `keras` R package.
14 | 
15 | Note: the install script assumes that R and CUDA drivers are already installed.
16 | 
17 | Modern reticulate no longer requires setting up a manual virtual environment.
18 | Instead of running the `install-r-tensorflow.R` script, you can instead
19 | request that reticulate use an ephemeral python installation by running this
20 | at the start of the R session:
21 | 
22 | ```r
23 | reticulate::py_require(c(
24 | 
25 |   "tensorflow",
26 |   # If you are on Linux and want to use a GPU, you can instead install:
27 |   # "tensorflow[and-cuda]", or to force cpu only, "tensorflow-cpu"
28 | 
29 |   # install legacy (v2) keras
30 |   "tf-keras",
31 | 
32 |   # additional packages used in the book
33 |   "keras-tuner", "ipython", "kaggle"
34 | ))
35 | ```
36 | 


--------------------------------------------------------------------------------
/chapter19_future_of_ai.R:
--------------------------------------------------------------------------------
 1 | library(keras3)
 2 | num_input_features <- 10
 3 | num_classes <- 10
 4 | num_values <- 10
 5 | num_features <- 10
 6 | height <- width <- 512
 7 | channels <- 3
 8 | num_timesteps <- 100
 9 | sequence_length <- 100
10 | 
11 | 
12 | inputs <- keras_input(shape = c(num_input_features))
13 | outputs <- inputs |>
14 |   layer_dense(32, activation = "relu") |>
15 |   layer_dense(32, activation = "relu") |>
16 |   layer_dense(1, activation = "sigmoid")
17 | model <- keras_model(inputs, outputs)
18 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy")
19 | 
20 | 
21 | inputs <- keras_input(shape = c(num_input_features))
22 | outputs <- inputs |>
23 |   layer_dense(32, activation = "relu") |>
24 |   layer_dense(32, activation = "relu") |>
25 |   layer_dense(num_classes, activation = "softmax")
26 | model <- keras_model(inputs, outputs)
27 | model |> compile(optimizer = "rmsprop", loss = "categorical_crossentropy")
28 | 
29 | 
30 | inputs <- keras_input(shape = c(num_input_features))
31 | outputs <- inputs |>
32 |   layer_dense(32, activation = "relu") |>
33 |   layer_dense(32, activation = "relu") |>
34 |   layer_dense(num_classes, activation = "sigmoid")
35 | model <- keras_model(inputs, outputs)
36 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy")
37 | 
38 | 
39 | inputs <- keras_input(shape = c(num_input_features))
40 | outputs <- inputs |>
41 |   layer_dense(32, activation = "relu") |>
42 |   layer_dense(32, activation = "relu") |>
43 |   layer_dense(num_values)
44 | model <- keras_model(inputs, outputs)
45 | model |> compile(optimizer = "rmsprop", loss = "mse")
46 | 
47 | 
48 | inputs <- keras_input(shape = c(height, width, channels))
49 | outputs <- inputs |>
50 |   layer_separable_conv_2d(32, 3, activation = "relu") |>
51 |   layer_separable_conv_2d(64, 3, activation = "relu") |>
52 |   layer_max_pooling_2d(2) |>
53 |   layer_separable_conv_2d(64, 3, activation = "relu") |>
54 |   layer_separable_conv_2d(128, 3, activation = "relu") |>
55 |   layer_max_pooling_2d(2) |>
56 |   layer_separable_conv_2d(64, 3, activation = "relu") |>
57 |   layer_separable_conv_2d(128, 3, activation = "relu") |>
58 |   layer_global_average_pooling_2d() |>
59 |   layer_dense(32, activation = "relu") |>
60 |   layer_dense(num_classes, activation = "softmax")
61 | model <- keras_model(inputs, outputs)
62 | model |> compile(optimizer = "rmsprop", loss = "categorical_crossentropy")
63 | 
64 | 
65 | inputs <- keras_input(shape = c(num_timesteps, num_features))
66 | outputs <- inputs |>
67 |   layer_lstm(32) |>
68 |   layer_dense(num_classes, activation = "sigmoid")
69 | model <- keras_model(inputs, outputs)
70 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy")
71 | 
72 | 
73 | inputs <- keras_input(shape = c(num_timesteps, num_features))
74 | outputs <- inputs |>
75 |   layer_lstm(32, return_sequences = TRUE) |>
76 |   layer_lstm(32, return_sequences = TRUE) |>
77 |   layer_lstm(32) |>
78 |   layer_dense(num_classes, activation = "sigmoid")
79 | model <- keras_model(inputs, outputs)
80 | model |> compile(optimizer = "rmsprop", loss = "binary_crossentropy")
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/chapter09_convnet-architecture-patterns.R:
--------------------------------------------------------------------------------
  1 | library(keras3)
  2 | 
  3 | 
  4 | inputs <- keras_input(shape = c(32, 32, 3))
  5 | x <- inputs |> layer_conv_2d(32, 3, activation = "relu")
  6 | residual <- x                                                                   # <1>
  7 | x <- x |> layer_conv_2d(64, 3, activation = "relu", padding = "same")           # <2>
  8 | residual <- residual |> layer_conv_2d(64, 1)                                    # <3>
  9 | x <- layer_add(c(x, residual))                                                  # <4>
 10 | 
 11 | 
 12 | inputs <- keras_input(shape = c(32, 32, 3))
 13 | x <- inputs |> layer_conv_2d(32, 3, activation = "relu")
 14 | residual <- x                                                                   # <1>
 15 | x <- x |>
 16 |   layer_conv_2d(64, 3, activation = "relu", padding = "same") |>                # <2>
 17 |   layer_max_pooling_2d(2, padding = "same")                                     # <2>
 18 | residual <- residual |>
 19 |   layer_conv_2d(64, 1, strides = 2)                                             # <3>
 20 | x <- layer_add(list(x, residual))                                               # <4>
 21 | 
 22 | 
 23 | inputs <- keras_input(shape = c(32, 32, 3))
 24 | x <- inputs |> layer_rescaling(scale = 1/255)
 25 | 
 26 | residual_block <- function(x, filters, pooling = FALSE) {                       # <1>
 27 |   residual <- x
 28 |   x <- x |>
 29 |     layer_conv_2d(filters, 3, activation = "relu", padding = "same") |>
 30 |     layer_conv_2d(filters, 3, activation = "relu", padding = "same")
 31 | 
 32 |   if (pooling) {
 33 |     x <- x |> layer_max_pooling_2d(pool_size = 2, padding = "same")
 34 |     residual <- residual |> layer_conv_2d(filters, 1, strides = 2)              # <2>
 35 |   } else if (filters != op_shape(residual)[[4]]) {
 36 |     residual <- residual |> layer_conv_2d(filters, 1)                           # <3>
 37 |   }
 38 | 
 39 |   layer_add(list(x, residual))
 40 | }
 41 | 
 42 | outputs <- x |>
 43 |   residual_block(filters = 32, pooling = TRUE) |>                               # <4>
 44 |   residual_block(filters = 64, pooling = TRUE) |>                               # <5>
 45 |   residual_block(filters = 128, pooling = FALSE) |>                             # <6>
 46 |   layer_global_average_pooling_2d() |>
 47 |   layer_dense(units = 1, activation = "sigmoid")
 48 | 
 49 | model <- keras_model(inputs = inputs, outputs = outputs)
 50 | 
 51 | 
 52 | model
 53 | 
 54 | 
 55 | image_size <- c(180, 180)
 56 | batch_size <- 32
 57 | data_dir <- fs::path("dogs_vs_cats_small")
 58 | 
 59 | train_dataset <-
 60 |   image_dataset_from_directory(data_dir / "train",
 61 |                                image_size = image_size,
 62 |                                batch_size = batch_size)
 63 | validation_dataset <-
 64 |   image_dataset_from_directory(data_dir / "validation",
 65 |                                image_size = image_size,
 66 |                                batch_size = batch_size)
 67 | test_dataset <-
 68 |   image_dataset_from_directory(data_dir / "test",
 69 |                                image_size = image_size,
 70 |                                batch_size = batch_size)
 71 | 
 72 | 
 73 | data_augmentation_layers <- list(
 74 |   layer_random_flip(, "horizontal"),
 75 |   layer_random_rotation(, 0.1),
 76 |   layer_random_zoom(, 0.2)
 77 | )
 78 | 
 79 | data_augmentation <- function(images, targets) {
 80 |   for (layer in data_augmentation_layers) {
 81 |     images <- layer(images)
 82 |   }
 83 |   list(images, targets)
 84 | }
 85 | 
 86 | augmented_train_dataset <- train_dataset |>
 87 |   tfdatasets::dataset_map(data_augmentation, num_parallel_calls = 8) |>
 88 |   tfdatasets::dataset_prefetch(4)
 89 | 
 90 | 
 91 | inputs <- keras_input(shape = c(180, 180, 3))
 92 | 
 93 | x <- inputs |>
 94 |   layer_rescaling(scale = 1 / 255) |>                                           # <1>
 95 |   layer_conv_2d(filters = 32, kernel_size = 5, use_bias = FALSE)                # <2>
 96 | 
 97 | for (size in c(32, 64, 128, 256, 512)) {                                        # <3>
 98 |   residual <- x
 99 | 
100 |   x <- x |>
101 |     layer_batch_normalization() |>
102 |     layer_activation("relu") |>
103 |     layer_separable_conv_2d(size, 3, padding = "same", use_bias = FALSE) |>
104 | 
105 |     layer_batch_normalization() |>
106 |     layer_activation("relu") |>
107 |     layer_separable_conv_2d(size, 3, padding = "same", use_bias = FALSE) |>
108 | 
109 |     layer_max_pooling_2d(pool_size = 3, strides = 2, padding = "same")
110 | 
111 |   residual <- residual |>
112 |     layer_conv_2d(size, 1, strides = 2, padding = "same", use_bias = FALSE)
113 | 
114 |   x <- layer_add(x, residual)
115 | }
116 | 
117 | outputs <- x |>
118 |   layer_global_average_pooling_2d() |>
119 |   layer_dropout(0.25) |>                                                        # <4>
120 |   layer_dense(1, activation = "sigmoid")
121 | 
122 | model <- keras_model(inputs, outputs)
123 | 
124 | 
125 | compile(model,
126 |         loss = "binary_crossentropy",
127 |         optimizer = "adam",
128 |         metrics = "accuracy")
129 | history <- fit(model, augmented_train_dataset,
130 |                epochs = 100,
131 |                validation_data = validation_dataset)
132 | 
133 | 
134 | plot(history)
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/chapter12_object-detection.R:
--------------------------------------------------------------------------------
  1 | library(dplyr, warn.conflicts = FALSE)
  2 | library(stringr)
  3 | library(xml2)
  4 | library(reticulate)
  5 | library(tensorflow, exclude = c("shape", "set_random_seed"))
  6 | library(tfdatasets, exclude = "shape")
  7 | library(keras3)
  8 | py_require("matplotlib")
  9 | py_require("opencv-python")
 10 | py_require("keras-hub==0.18.1")
 11 | 
 12 | use_backend("tensorflow")
 13 | 
 14 | 
 15 | VOC_url <- \(data) sprintf(
 16 |   "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOC%s_06-Nov-2007.tar", data
 17 | )
 18 | VOC_train_val <- get_file(origin = VOC_url("trainval"))
 19 | VOC_test <- get_file(origin = VOC_url("test"))
 20 | 
 21 | 
 22 | BASE_DIR       <- fs::path("VOCdevkit", "VOC2007")
 23 | IMAGE_DIR      <- BASE_DIR / "JPEGImages"
 24 | ANNOTATION_DIR <- BASE_DIR / "Annotations"
 25 | IMAGESET_DIR   <- BASE_DIR / "ImageSets" / "Main"
 26 | 
 27 | CLASSES <- c(
 28 |   "aeroplane",
 29 |   "bicycle",
 30 |   "bird",
 31 |   "boat",
 32 |   "bottle",
 33 |   "bus",
 34 |   "car",
 35 |   "cat",
 36 |   "chair",
 37 |   "cow",
 38 |   "diningtable",
 39 |   "dog",
 40 |   "horse",
 41 |   "motorbike",
 42 |   "person",
 43 |   "pottedplant",
 44 |   "sheep",
 45 |   "sofa",
 46 |   "train",
 47 |   "tvmonitor"
 48 | )
 49 | 
 50 | 
 51 | list.files(IMAGESET_DIR)
 52 | 
 53 | 
 54 | read_example <- function(image_id) {
 55 |   image_path <- tf$strings$join(list(IMAGE_DIR, "/", image_id, ".jpg"))
 56 |   csv_path   <-  tf$strings$join(list(ANNOTATION_DIR, "/", image_id, ".csv"))
 57 | 
 58 |   image <- image_path |>
 59 |     tf$io$read_file() |>
 60 |     tf$io$decode_jpeg(channels = 3L) |>
 61 |     tf$ensure_shape(shape(NA, NA, 3))
 62 | 
 63 |   csv_data <- csv_path |>
 64 |     tf$io$read_file() |>
 65 |     tf$strings$regex_replace("\n$", "") |> #<1> drop last newline
 66 |     tf$strings$split("\n") |>
 67 |     tf$io$decode_csv(list(
 68 |       tf$constant(list(), dtype = tf$int32), # class_idx
 69 |       tf$constant(list(), dtype = tf$float32), # ymin
 70 |       tf$constant(list(), dtype = tf$float32), # xmin
 71 |       tf$constant(list(), dtype = tf$float32), # ymax
 72 |       tf$constant(list(), dtype = tf$float32)  # xmax
 73 |     ))
 74 | 
 75 |   .[class_idx, ymin, xmin, ymax, xmax] <- csv_data
 76 | 
 77 |   bbox <- tf$stack(list(ymin, xmin, ymax, xmax), axis = -1L)  |>
 78 |     tf$ensure_shape(shape(NA, 4))
 79 | 
 80 |   label <- class_idx |> tf$ensure_shape(shape(NA))
 81 | 
 82 |   list(image = image,
 83 |        objects = list(bbox = bbox, label = label))
 84 | }
 85 | 
 86 | 
 87 | split <- "test"
 88 | split_file <- fs::path(IMAGESET_DIR, split, ext = "txt")
 89 | image_id <- readLines(split_file, 1) |> tf$constant()
 90 | 
 91 | read_example(image_id) |> str()
 92 | 
 93 | 
 94 | get_dataset <- function(split, shuffle_files = TRUE, shuffle_buffer_size = 1000) {
 95 | 
 96 |   split_file <- fs::path(IMAGESET_DIR, split, ext = "txt")
 97 |   ds <- text_line_dataset(split_file, num_parallel_reads = 12)
 98 | 
 99 |   if (shuffle_files)
100 |     ds <- ds |> dataset_shuffle(shuffle_buffer_size)
101 | 
102 |   ds <- ds |>
103 |     dataset_map(read_example, num_parallel_calls = 12) |>
104 |     dataset_prefetch()
105 | 
106 |   ds
107 | }
108 | 
109 | train_ds <- get_dataset("trainval", shuffle_files = TRUE)
110 | eval_ds <- get_dataset("test", shuffle_files = TRUE)
111 | 
112 | train_ds |>
113 |   as_iterator() |> iter_next() |> str()
114 | 
115 | 
116 | example  <- train_ds |> dataset_batch(1) |> as_iterator() |> iter_next()
117 | 
118 | par(mar = c(0,0,0,0))
119 | 
120 | keras$visualization$plot_bounding_box_gallery(
121 |   example$image,
122 |   bounding_box_format = "rel_yxyx",
123 |   y_true = list(
124 |     boxes = example$objects$bbox, #|> op_convert_to_numpy(),
125 |     labels = example$objects$label #|> op_convert_to_numpy()
126 |   ),
127 |   scale = 8,
128 |   class_mapping = CLASSES
129 | )
130 | 
131 | 
132 | BBOX_FORMAT <- "yxyx"
133 | 
134 | parse_record <- function(record) {
135 |   image <- record$image
136 |   .[h, w, depth] <- tf$shape(image) |> tf$unstack(3L)
137 |   rel_boxes <- record$objects$bbox
138 |   abs_boxes <- keras$utils$bounding_boxes$convert_format(
139 |     rel_boxes,
140 |     source = "rel_yxyx",
141 |     target = BBOX_FORMAT,
142 |     height = h,
143 |     width = w
144 |   )
145 |   labels <- record$objects$label
146 | 
147 |   list(images = image,
148 |        bounding_boxes = list(boxes = abs_boxes, labels = labels))
149 | }
150 | 
151 | record  <- train_ds |> as_iterator() |> iter_next()
152 | 
153 | 
154 | envir::import_from(keras$visualization, plot_bounding_box_gallery)
155 | IMAGE_SIZE <- shape(640, 640)
156 | BATCH_SIZE <- 4
157 | 
158 | resizing <- layer_resizing(
159 |   height = IMAGE_SIZE[[1]],
160 |   width = IMAGE_SIZE[[2]],
161 |   interpolation = "bilinear",
162 |   pad_to_aspect_ratio = TRUE,
163 |   bounding_box_format = BBOX_FORMAT,
164 | )
165 | 
166 | max_box_layer <- layer_max_num_bounding_boxes(
167 |   max_number = 100,
168 |   bounding_box_format = BBOX_FORMAT
169 | )
170 | 
171 | data_augmentation_layers <- list(
172 |   layer_random_flip(mode = "horizontal", bounding_box_format = BBOX_FORMAT)
173 | )
174 | 
175 | 
176 | prepare_dataset <- function(ds, batch_size = 4) {
177 | 
178 |   ds <- ds |>
179 |     dataset_map(parse_record, num_parallel_calls = 8) |>
180 |     dataset_map(resizing, num_parallel_calls = 8)
181 | 
182 |   for (layer in data_augmentation_layers)
183 |     ds <- ds |> dataset_map(layer, num_parallel_calls = NULL)
184 | 
185 |   ds <- ds |>
186 |     dataset_map(max_box_layer, num_parallel_calls = 8) |>
187 |     dataset_batch(batch_size, drop_remainder = TRUE) |>
188 |     dataset_prefetch()
189 | 
190 |   ds
191 | 
192 | }
193 | 
194 | train_ds_prepared <- prepare_dataset(train_ds, batch_size=BATCH_SIZE)
195 | eval_ds_prepared <-  prepare_dataset(eval_ds, batch_size=BATCH_SIZE)
196 | 
197 | first_images_prepared <- train_ds_prepared |> as_iterator() |> iter_next()
198 | 
199 | plot_bounding_box_gallery(
200 |   first_images_prepared$images,
201 |   bounding_box_format = BBOX_FORMAT,
202 |   y_true = first_images_prepared$bounding_boxes,
203 |   scale = 4,
204 |   class_mapping = py_dict(seq_along(CLASSES), CLASSES)
205 | )
206 | 
207 | 
208 | library(keras3)
209 | 
210 | keras_hub <- reticulate::import("keras_hub")
211 | model <- keras_hub$models$ImageObjectDetector$
212 |   from_preset("retinanet_resnet50_fpn_coco")
213 | 
214 | 
215 | model_with_random_head <- keras_hub$models$ImageObjectDetector$from_preset(
216 |   "retinanet_resnet50_fpn_coco",
217 |   num_classes = length(CLASSES)
218 | )
219 | 
220 | 
221 | split_labels <- function(x) {
222 |   list(x$images,
223 |        list(
224 |          boxes =  x$bounding_boxes$boxes,
225 |          classes = x$bounding_boxes$labels
226 |        ))
227 | }
228 | 
229 | train_ds_prepared <- train_ds_prepared |> dataset_map(split_labels)
230 | eval_ds_prepared <- eval_ds_prepared |> dataset_map(split_labels)
231 | 
232 | callbacks = list(
233 |   callback_model_checkpoint(
234 |     "pascal_voc_detection.keras",
235 |     save_best_only = TRUE,
236 |     monitor = "val_loss"
237 |   )
238 | )
239 | 
240 | 
241 | model <- load_model("pascal_voc_detection.keras")
242 | 
243 | .[images, gt_boxes] <- iter_next(as_iterator(eval_ds_prepared))
244 | predictions <- model |> predict(images)
245 | 
246 | 
247 | plot_bounding_box_gallery(
248 |   images,
249 |   bounding_box_format = BBOX_FORMAT,
250 |   y_true = list(boxes = gt_boxes$boxes, labels = gt_boxes$classes),
251 |   y_pred = list(boxes = predictions$boxes, labels = predictions$classes),
252 |   scale = 8,
253 |   class_mapping = py_dict(seq_along(CLASSES), CLASSES)
254 | )
255 | 
256 | 


--------------------------------------------------------------------------------
/chapter18_best-practices-for-the-real-world.R:
--------------------------------------------------------------------------------
  1 | Sys.unsetenv("CUDA_VISIBLE_DEVICES")
  2 | Sys.setenv("XLA_FLAGS" = "--xla_force_host_platform_device_count=8")
  3 | library(reticulate)
  4 | library(keras3)
  5 | use_backend("jax")
  6 | py_require(c("keras-tuner", "scikit-learn"))
  7 | 
  8 | 
  9 | library(keras3)
 10 | library(reticulate)
 11 | use_backend("jax")
 12 | py_require(c("keras-tuner", "scikit-learn"))
 13 | 
 14 | 
 15 | build_model <- function(hp, num_classes = 10) {
 16 |   units  <- hp$Int(                                                             # <1>
 17 |     name = "units",                                                             # <1>
 18 |     min_value = 16L,                                                            # <1>
 19 |     max_value = 64L,                                                            # <1>
 20 |     step = 16L)                                                                 # <1>
 21 |   model  <- keras_model_sequential() |>
 22 |     layer_dense(units, activation = "relu") |>
 23 |     layer_dense(num_classes, activation = "softmax")
 24 | 
 25 |   optimizer <- hp$Choice(name = "optimizer",                                    # <2>
 26 |                          values = c("rmsprop", "adam"))                         # <2>
 27 |   model |> compile(optimizer = optimizer,
 28 |                    loss = "sparse_categorical_crossentropy",
 29 |                    metrics = "accuracy")
 30 |   model                                                                         # <3>
 31 | }
 32 | 
 33 | 
 34 | kt <- import("keras_tuner")
 35 | 
 36 | SimpleMLP(kt$HyperModel) %py_class%  {
 37 |   `__init__` <- function(self, num_classes) {                                   # <2>
 38 |     self.num_classes = num_classes
 39 |   }
 40 | 
 41 |   build <- function(self, hp) {
 42 |     build_model(hp, self$num_classes)
 43 |   }
 44 | }
 45 | 
 46 | hypermodel <- SimpleMLP(num_classes=10)
 47 | 
 48 | 
 49 | tuner <- kt$BayesianOptimization(
 50 |   build_model,                                                                  # <1>
 51 |   objective = "val_accuracy",                                                   # <2>
 52 |   max_trials = 100L,                                                            # <3>
 53 |   executions_per_trial = 2L,                                                    # <4>
 54 |   directory = "mnist_kt_test",                                                  # <5>
 55 |   overwrite = TRUE                                                              # <6>
 56 | )
 57 | 
 58 | 
 59 | tuner$search_space_summary()
 60 | 
 61 | 
 62 | .[.[x_train_full, y_train_full], .[x_test, y_test]] <- dataset_mnist()          # <1>
 63 | x_train_full <- x_train_full |> array_reshape(c(-1, 28 * 28))                   # <1>
 64 | x_train_full <- x_train_full / 255                                              # <1>
 65 | x_test <- x_test |> array_reshape(c(-1, 28 * 28))                               # <1>
 66 | x_test <- x_test / 255                                                          # <1>
 67 | 
 68 | num_val_samples <- 10000                                                        # <2>
 69 | val_i <- seq_len(num_val_samples)
 70 | x_val <- x_train_full[val_i, ]                                                  # <2>
 71 | x_train <- x_train_full[-val_i, ]                                               # <2>
 72 | y_val <- y_train_full[val_i] |> as.matrix()                                     # <2>
 73 | y_train <- y_train_full[-val_i] |> as.matrix()                                  # <2>
 74 | 
 75 | callbacks <- list(                                                              # <3>
 76 |   callback_early_stopping(monitor = "val_loss", patience = 5)                   # <3>
 77 | )
 78 | 
 79 | 
 80 | top_n <- 4L
 81 | best_hps <- tuner$get_best_hyperparameters(top_n)                               # <1>
 82 | 
 83 | 
 84 | get_best_epoch <- function(hp) {
 85 |   model <- build_model(hp)
 86 |   callbacks <- list(
 87 |     callback_early_stopping(
 88 |       monitor = "val_loss",
 89 |       mode = "min",
 90 |       patience = 10                                                             # <1>
 91 |     )
 92 |   )
 93 | 
 94 |   history <- model |> fit(
 95 |     x_train, y_train,
 96 |     validation_data = list(x_val, y_val),
 97 |     epochs = 100,
 98 |     batch_size = 128,
 99 |     callbacks = callbacks
100 |   )
101 | 
102 |   best_epoch <- history |>
103 |     tibble::as_tibble() |>
104 |     dplyr::filter(metric == "loss" & value == min(value)) |>
105 |     _$epoch
106 |   glue::glue("Best epoch: {best_epoch}")
107 |   best_epoch
108 | }
109 | 
110 | 
111 | get_best_trained_model <- function(hp) {
112 |     best_epoch <- get_best_epoch(hp)
113 |     model <- build_model(hp)
114 |     model |> fit(
115 |         x_train_full, y_train_full,
116 |         batch_size=128L, epochs=as.integer(best_epoch * 1.2))
117 |     model
118 | }
119 | 
120 | best_models <- py_eval("[]", convert = FALSE)
121 | for (hp in best_hps) {
122 |   model <- get_best_trained_model(hp)
123 |   model |> evaluate(x_test, y_test) |> print()
124 |   best_models$append(model)
125 | }
126 | 
127 | 
128 | best_models <- tuner$get_best_models(top_n)
129 | 
130 | 
131 | model <- keras_model_sequential(input_shape = c(16000)) |>
132 |   layer_dense(64000, activation = "relu") |>
133 |   layer_dense(8000, activation = "sigmoid")
134 | 
135 | 
136 | keras$distribution$list_devices()
137 | 
138 | 
139 | devices <- paste0("gpu:", 0:7)
140 | device_mesh <- keras$distribution$DeviceMesh(
141 |   shape = shape(2, 4),
142 |   axis_names = c("data", "model"),
143 |   devices = devices
144 | )
145 | 
146 | 
147 | for (v in model$variables)
148 |   cat(v$path, "\n")
149 | 
150 | 
151 | layout_map <- keras$distribution$LayoutMap(device_mesh)
152 | layout_map["sequential/dense/kernel"]   <- tuple(NULL, "model")
153 | layout_map["sequential/dense/bias"]     <- tuple("model")
154 | layout_map["sequential/dense_1/kernel"] <- tuple(NULL, "model")
155 | layout_map["sequential/dense_1/bias"]   <- tuple("model")
156 | 
157 | 
158 | model_parallel <- keras$distribution$ModelParallel(
159 |     layout_map=layout_map,
160 |     batch_dim_name="data"                                                       # <1>
161 | )
162 | keras$distribution$set_distribution(model_parallel)
163 | 
164 | 
165 | model$layers[[1]]$kernel$value$sharding
166 | 
167 | 
168 | jax <- reticulate::import("jax")
169 | value <- model$layers[[1]]$kernel$value
170 | jax$debug$visualize_sharding(value$shape, value$sharding)
171 | 
172 | 
173 | keras$distribution$set_distribution(
174 |   keras$distribution$DataParallel(devices = c("gpu:0", "gpu:1"))
175 | )
176 | 
177 | 
178 | config_set_dtype_policy("float16")
179 | 
180 | 
181 | config_set_dtype_policy("mixed_float16")
182 | 
183 | 
184 | optimizer <- optimizer_adam(learning_rate = 1e-3, loss_scale_factor = 10)
185 | 
186 | 
187 | optimizer <-
188 |   optimizer_adam(learning_rate = 1e-3) |>
189 |   optimizer_loss_scale()
190 | 
191 | 
192 | x <- op_array(rbind(c(0.1, 0.9), c(1.2, -0.8)))
193 | kernel <- op_array(rbind(c(-0.1, -2.2), c(1.1, 0.7)))
194 | 
195 | 
196 | abs_max_quantize <- function(value) {
197 |   abs_max <- op_max(op_abs(value), keepdims = TRUE)                             # <1>
198 |   scale <- op_divide(127, abs_max + 1e-7)                                       # <2>
199 |   scaled_value <- value * scale                                                 # <3>
200 |   scaled_value <- op_clip(op_round(scaled_value), -127, 127)                    # <4>
201 |   scaled_value <- op_cast(scaled_value, dtype = "int8")                         # <5>
202 |   list(scaled_value, scale)
203 | }
204 | 
205 | .[int_x, x_scale] <- abs_max_quantize(x)
206 | .[int_kernel, kernel_scale] <- abs_max_quantize(kernel)
207 | 
208 | 
209 | int_y <- op_matmul(int_x, int_kernel)
210 | y <- op_cast(int_y, dtype = "float32") / (x_scale * kernel_scale)
211 | 
212 | 
213 | y
214 | op_matmul(x, kernel)
215 | 
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/2e/ch13.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include=FALSE-------------------------------------------------
  2 | library(tensorflow)
  3 | library(keras3)
  4 | tensorflow::as_tensor(1)
  5 | 
  6 | 
  7 | ## ---- eval = FALSE--------------------------------------------------------
  8 | ## reticulate::py_install("keras-tuner", pip = TRUE)
  9 | 
 10 | 
 11 | ## -------------------------------------------------------------------------
 12 | 
 13 | build_model <- function(hp, num_classes = 10) {
 14 | 
 15 |   units <- hp$Int(name = "units",
 16 |                   min_value = 16L, max_value = 64L, step = 16L)
 17 | 
 18 |   model <- keras_model_sequential() %>%
 19 |     layer_dense(units, activation = "relu") %>%
 20 |     layer_dense(num_classes, activation = "softmax")
 21 | 
 22 |   optimizer <- hp$Choice(name = "optimizer",
 23 |                          values = c("rmsprop", "adam"))
 24 | 
 25 |   model %>% compile(optimizer = optimizer,
 26 |                     loss = "sparse_categorical_crossentropy",
 27 |                     metrics = c("accuracy"))
 28 |   model
 29 | }
 30 | 
 31 | 
 32 | ## -------------------------------------------------------------------------
 33 | kt <- reticulate::import("kerastuner")
 34 | 
 35 | SimpleMLP(kt$HyperModel) %py_class% {
 36 | 
 37 |   `__init__` <- function(num_classes) {
 38 |     self$num_classes <- num_classes
 39 |   }
 40 | 
 41 |   build <- function(hp) {
 42 |     build_model(hp, self$num_classes)
 43 |   }
 44 | 
 45 | }
 46 | 
 47 | hypermodel <- SimpleMLP(num_classes = 10)
 48 | 
 49 | 
 50 | ## import kerastuner as kt
 51 | ##
 52 | ## class SimpleMLP(kt.HyperModel):
 53 | ##    def __init__(self, num_classes):
 54 | ##         self.num_classes = num_classes
 55 | ##
 56 | ##     def build(self, hp):
 57 | ##       build_model(hp, self.num_classes)
 58 | ##
 59 | ## hypermodel = SimpleMLP(num_classes = 10)
 60 | 
 61 | 
 62 | ## -------------------------------------------------------------------------
 63 | tuner <- kt$BayesianOptimization(
 64 |   build_model,
 65 |   objective = "val_accuracy",
 66 |   max_trials = 100L,
 67 |   executions_per_trial = 2L,
 68 |   directory = "mnist_kt_test",
 69 |   overwrite = TRUE
 70 | )
 71 | 
 72 | 
 73 | ## -------------------------------------------------------------------------
 74 | tuner$search_space_summary()
 75 | 
 76 | 
 77 | ## ---- eval = FALSE--------------------------------------------------------
 78 | ## objective <- kt$Objective(
 79 | ##   name = "val_accuracy",
 80 | ##   direction = "max"
 81 | ## )
 82 | ##
 83 | ## tuner <- kt$BayesianOptimization(
 84 | ##   build_model,
 85 | ##   objective = objective,
 86 | ##   ...
 87 | ## )
 88 | 
 89 | 
 90 | ## -------------------------------------------------------------------------
 91 | c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
 92 | x_train %<>% { array_reshape(., c(-1, 28 * 28)) / 255 }
 93 | x_test  %<>% { array_reshape(., c(-1, 28 * 28)) / 255 }
 94 | x_train_full <- x_train
 95 | y_train_full <- y_train
 96 | num_val_samples <- 10000
 97 | c(x_train, x_val) %<-% list(x_train[seq(num_val_samples), ],
 98 |                             x_train[-seq(num_val_samples), ])
 99 | c(y_train, y_val) %<-% list(y_train[seq(num_val_samples)],
100 |                             y_train[-seq(num_val_samples)])
101 | 
102 | callbacks <- c(
103 |     callback_early_stopping(monitor="val_loss", patience=5)
104 | )
105 | 
106 | tuner$search(
107 |   x_train, y_train,
108 |   batch_size = 128L,
109 |   epochs = 100L,
110 |   validation_data = list(x_val, y_val),
111 |   callbacks = callbacks,
112 |   verbose = 2L
113 | )
114 | 
115 | 
116 | ## -------------------------------------------------------------------------
117 | top_n <- 4L
118 | best_hps <- tuner$get_best_hyperparameters(top_n)
119 | str(best_hps)
120 | 
121 | 
122 | ## -------------------------------------------------------------------------
123 | get_best_epoch <- function(hp) {
124 |   model <- build_model(hp)
125 | 
126 |   callbacks <- c(
127 |     callback_early_stopping(monitor = "val_loss", mode = "min",
128 |                             patience = 10))
129 | 
130 |   history <- model %>% fit(
131 |     x_train, y_train,
132 |     validation_data = list(x_val, y_val),
133 |     epochs = 100,
134 |     batch_size = 128,
135 |     callbacks = callbacks
136 |   )
137 | 
138 |   best_epoch <- which.min(history$metrics$val_loss)
139 |   print(glue::glue("Best epoch: {best_epoch}"))
140 |   invisible(best_epoch)
141 | }
142 | 
143 | 
144 | ## -------------------------------------------------------------------------
145 | get_best_trained_model <- function(hp) {
146 |   best_epoch <- get_best_epoch(hp)
147 |   model <- build_model(hp)
148 |   model %>% fit(
149 |     x_train_full,
150 |     y_train_full,
151 |     batch_size = 128,
152 |     epochs = round(best_epoch * 1.2)
153 |   )
154 |   model
155 | }
156 | 
157 | best_models <- best_hps %>%
158 |   lapply(get_best_trained_model)
159 | 
160 | 
161 | ## -------------------------------------------------------------------------
162 | best_models <- tuner$get_best_models(top_n)
163 | 
164 | 
165 | ## ---- eval = FALSE--------------------------------------------------------
166 | ## preds_a <- model_a %>% predict(x_val)
167 | ## preds_b <- model_b %>% predict(x_val)
168 | ## preds_c <- model_c %>% predict(x_val)
169 | ## preds_d <- model_d %>% predict(x_val)
170 | ## final_preds <- 0.25 * (preds_a + preds_b + preds_c + preds_d)
171 | 
172 | 
173 | ## ---- eval = FALSE--------------------------------------------------------
174 | ## preds_a <- model_a %>% predict(x_val)
175 | ## preds_b <- model_b %>% predict(x_val)
176 | ## preds_c <- model_c %>% predict(x_val)
177 | ## preds_d <- model_d %>% predict(x_val)
178 | ## final_preds <-
179 | ##   0.5 * preds_a + 0.25 * preds_b + 0.1 * preds_c + 0.15 * preds_d
180 | 
181 | 
182 | ## ---- eval = FALSE--------------------------------------------------------
183 | ## <sign> * (2 ^ (<exponent> - 127)) * 1.<mantissa>
184 | 
185 | 
186 | ## -------------------------------------------------------------------------
187 | r_array <- base::array(0, dim = c(2, 2))
188 | tf_tensor <- tensorflow::as_tensor(r_array)
189 | tf_tensor$dtype
190 | 
191 | 
192 | ## -------------------------------------------------------------------------
193 | r_array <- base::array(0, dim = c(2, 2))
194 | tf_tensor <- tensorflow::as_tensor(r_array, dtype = "float32")
195 | tf_tensor$dtype
196 | 
197 | 
198 | ## -------------------------------------------------------------------------
199 | keras::keras$mixed_precision$set_global_policy("mixed_float16")
200 | 
201 | 
202 | ## ---- eval = FALSE--------------------------------------------------------
203 | ## library(tensorflow)
204 | ## strategy <- tf$distribute$MirroredStrategy()
205 | ## cat("Number of devices:", strategy$num_replicas_in_sync, "\n")
206 | ## with(strategy$scope(), {
207 | ##   model <- get_compiled_model()
208 | ## })
209 | ## model %>% fit(
210 | ##   train_dataset,
211 | ##   epochs = 100,
212 | ##   validation_data = val_dataset,
213 | ##   callbacks = callbacks
214 | ## )
215 | 
216 | 
217 | ## -------------------------------------------------------------------------
218 | build_model <- function(input_size) {
219 |   resnet <- application_resnet50(weights = NULL,
220 |                                  include_top = FALSE,
221 |                                  pooling = "max")
222 | 
223 |   inputs <- layer_input(c(input_size, 3))
224 | 
225 |   outputs <- inputs %>%
226 |     resnet_preprocess_input() %>%
227 |     resnet() %>%
228 |     layer_dense(10, activation = "softmax")
229 | 
230 |   model <- keras_model(inputs, outputs)
231 | 
232 |   model %>% compile(
233 |     optimizer = "rmsprop",
234 |     loss = "sparse_categorical_crossentropy",
235 |     metrics = "accuracy"
236 |   )
237 | 
238 |   model
239 | }
240 | 
241 | strategy <- tf$distribute$MirroredStrategy()
242 | cat("Number of replicas:", strategy$num_replicas_in_sync, "\n")
243 | 
244 | with(strategy$scope(), {
245 |   model <- build_model(input_size = c(32, 32))
246 | })
247 | 
248 | 
249 | ## ---- eval = FALSE--------------------------------------------------------
250 | ## c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_cifar10()
251 | ## model %>% fit(x_train, y_train, batch_size = 1024)
252 | 
253 | 
254 | ## ---- eval = FALSE--------------------------------------------------------
255 | ## tpu <- tf$distribute$cluster_resolver$TPUClusterResolver$connect()
256 | ## cat("Device:", tpu$master(), "\n")
257 | ## strategy <- tf$distribute$TPUStrategy(tpu)
258 | ## with(strategy$scope(), { ... })
259 | 


--------------------------------------------------------------------------------
/chapter10_interpreting-what-convnets-learn.R:
--------------------------------------------------------------------------------
  1 | library(keras3)
  2 | 
  3 | 
  4 | library(keras3)
  5 | model <- load_model("convnet_from_scratch_with_augmentation.keras")
  6 | 
  7 | 
  8 | img_path <- get_file(                                                           # <1>
  9 |   fname="cat.jpg",                                                              # <1>
 10 |   origin="https://img-datasets.s3.amazonaws.com/cat.jpg"                        # <1>
 11 | )                                                                               # <1>
 12 | 
 13 | get_img_array <- function(img_path, target_size) {
 14 |   image <- img_path |>
 15 |     image_load(target_size = target_size) |>                                    # <2>
 16 |     image_to_array()                                                            # <3>
 17 |   dim(image) <- c(1, dim(image))                                                # <4>
 18 |   image
 19 | }
 20 | 
 21 | img <- get_img_array(img_path, target_size = c(180, 180))
 22 | str(img)
 23 | 
 24 | 
 25 | display_image <- function(x, ..., max = 255, margin = 0) {
 26 |   par(mar = rep(margin, 4))
 27 | 
 28 |   x |> as.array() |> drop() |>
 29 |     as.raster(max = max) |>
 30 |     plot(..., interpolate = FALSE)
 31 | }
 32 | 
 33 | img |> display_image()
 34 | 
 35 | 
 36 | is_conv_layer <- function(x) inherits(x, keras$layers$Conv2D)
 37 | is_pooling_layer <- function(x) inherits(x, keras$layers$MaxPool2D)
 38 | 
 39 | layer_outputs <- list()
 40 | for (layer in model$layers)                                                     # <1>
 41 |   if (is_conv_layer(layer) || is_pooling_layer(layer))                          # <1>
 42 |     layer_outputs[[layer$name]] <- layer$output                                 # <1>
 43 | 
 44 | activation_model <-                                                             # <2>
 45 |   keras_model(inputs = model$input,
 46 |               outputs = layer_outputs)
 47 | 
 48 | 
 49 | activations <- predict(activation_model, img)                                   # <1>
 50 | str(activations)
 51 | 
 52 | 
 53 | first_layer_activation <- activations[[ names(layer_outputs)[1] ]]
 54 | dim(first_layer_activation)
 55 | 
 56 | 
 57 | plot_activations <- function(x, ...) {
 58 |   withr::local_par(list(mar = c(0,0,0,0)))
 59 | 
 60 |   x <- drop(as.array(x))
 61 |   if (sum(x) == 0)
 62 |     return(plot(as.raster("gray")))
 63 | 
 64 |   rotate <- function(x) t(apply(x, 2, rev))
 65 |   graphics::image(
 66 |     rotate(x), asp = 1, axes = FALSE, useRaster = TRUE,
 67 |     col = viridis::viridis(256), ...
 68 |   )
 69 | }
 70 | 
 71 | plot_activations(first_layer_activation[, , , 1])
 72 | plot_activations(first_layer_activation[, , , 2])
 73 | plot_activations(first_layer_activation[, , , 3])
 74 | plot_activations(first_layer_activation[, , , 4])
 75 | plot_activations(first_layer_activation[, , , 5])
 76 | plot_activations(first_layer_activation[, , , 6])
 77 | plot_activations(first_layer_activation[, , , 7])
 78 | plot_activations(first_layer_activation[, , , 8])
 79 | 
 80 | 
 81 | for (layer_name in names(activations)) {                                        # <1>
 82 |   layer_activation <- activations[[layer_name]]                                 # <1>
 83 | 
 84 |   n_features <- dim(layer_activation)[[4]]                                      # <2>
 85 | 
 86 |   par(mfrow = n2mfrow(n_features, asp = 1.75),                                  # <3>
 87 |       mar = rep(.1, 4), oma = c(0, 0, 1.5, 0))                                  # <3>
 88 | 
 89 |   for (j in 1:n_features)                                                       # <4>
 90 |     plot_activations(layer_activation[, , , j])                                 # <4>
 91 |   title(main = layer_name, outer = TRUE)                                        # <5>
 92 | }
 93 | 
 94 | 
 95 | model <- application_xception(
 96 |   weights = "imagenet",
 97 |   include_top = FALSE                                                           # <1>
 98 | )
 99 | 
100 | 
101 | unlist(lapply(model$layers, \(layer) {
102 |   if (inherits(layer, keras$layers$Conv2D) ||
103 |       inherits(layer, keras$layers$SeparableConv2D))
104 |     layer$name
105 | }))
106 | 
107 | 
108 | layer_name <- "block3_sepconv1"                                                 # <1>
109 | layer <- model |> get_layer(name = layer_name)                                  # <2>
110 | feature_extractor <-
111 |   keras_model(inputs = model$input,                                             # <3>
112 |               outputs = layer$output)
113 | 
114 | 
115 | activation <- img |>
116 |   application_preprocess_inputs(model = model) |>
117 |   feature_extractor()
118 | 
119 | 
120 | compute_loss <- function(image, filter_index) {                                 # <1>
121 |   activation <- feature_extractor(image)
122 |   filter_activation <- activation@r[, 3:-3, 3:-3, filter_index]                 # <2>
123 |   op_mean(filter_activation)                                                    # <3>
124 | }
125 | 
126 | 
127 | model <- application_xception(weights = "imagenet", include_top = FALSE)
128 | 
129 | layer_name <- "block3_sepconv1"
130 | layer <- model |> get_layer(name = layer_name)
131 | feature_extractor <- keras_model(inputs = model$input, outputs = layer$output)
132 | 
133 | compute_loss <- function(image, filter_index) {
134 |   activation <- feature_extractor(image)
135 |   filter_activation <- activation@r[, 3:-3, 3:-3, filter_index]
136 |   op_mean(filter_activation)
137 | }
138 | 
139 | 
140 | jax <- reticulate::import("jax")
141 | 
142 | grad_fn <- jax$grad(compute_loss)
143 | 
144 | gradient_ascent_step <- jax$jit(function(image, filter_index, learning_rate) {
145 |   grads <- grad_fn(image, filter_index)
146 |   grads <- op_normalize(grads)
147 |   image + (learning_rate * grads)
148 | })
149 | 
150 | 
151 | img_height <- img_width <- 200
152 | 
153 | generate_filter_pattern <- function(filter_index) {
154 |   iterations <- 30                                                              # <1>
155 |   learning_rate <- 10                                                           # <2>
156 |   image <- random_uniform(                                                      # <3>
157 |     minval = 0.4, maxval = 0.6,                                                 # <3>
158 |     shape = shape(1, img_width, img_height, 3)                                  # <3>
159 |   )
160 | 
161 |   for (i in seq(iterations))                                                    # <4>
162 |     image <- gradient_ascent_step(image, filter_index, learning_rate)           # <4>
163 | 
164 |   image
165 | }
166 | 
167 | 
168 | deprocess_image <- function(image, crop = TRUE) {
169 |   image <- op_squeeze(image, axis = 1)                                          # <1>
170 |   image <- image - op_mean(image)                                               # <2>
171 |   image <- image / op_std(image)                                                # <2>
172 |   image <- (image * 64) + 128                                                   # <2>
173 |   image <- op_clip(image, 0, 255)                                               # <2>
174 |   if (crop) {
175 |     image <- image@r[26:-26, 26:-26, ]                                          # <3>
176 |   }
177 |   op_cast(image, "uint8")
178 | }
179 | 
180 | 
181 | generate_filter_pattern(filter_index = 2L) |>
182 |   deprocess_image() |>
183 |   display_image()
184 | 
185 | 
186 | par(mfrow = c(8, 8))
187 | for (i in seq_len(64)) {
188 |   generate_filter_pattern(filter_index = i) |>
189 |     deprocess_image() |>
190 |     display_image(margin = .1)
191 | }
192 | 
193 | 
194 | model <- application_xception(weights = "imagenet")                             # <1>
195 | preprocess_input <- application_preprocess_inputs(model)
196 | 
197 | 
198 | img_path <- get_file(
199 |   fname = "elephant.jpg",                                                       # <1>
200 |   origin = "https://img-datasets.s3.amazonaws.com/elephant.jpg"                 # <1>
201 | )
202 | 
203 | get_image_tensor <- function(img_path, target_size = NULL) {
204 |   img_path |>
205 |     image_load(target_size = target_size) |>                                    # <2>
206 |     image_to_array(dtype = "float32") |>                                        # <3>
207 |     op_expand_dims(1) |>                                                        # <4>
208 |     preprocess_input()                                                          # <5>
209 | }
210 | 
211 | image_tensor <- get_image_tensor(img_path, target_size = c(299, 299))
212 | 
213 | 
214 | preds <- predict(model, image_tensor)
215 | application_decode_predictions(model, preds, top = 3)
216 | 
217 | 
218 | which.max(preds[1, ])
219 | 
220 | 
221 | last_conv_layer_name <- "block14_sepconv2_act"
222 | classifier_layer_names <- c("avg_pool", "predictions")
223 | last_conv_layer <- model |> get_layer(last_conv_layer_name)
224 | last_conv_layer_model <- keras_model(model$inputs,
225 |                                      last_conv_layer$output)
226 | 
227 | 
228 | classifier_input <- keras_input(batch_shape = last_conv_layer$output$shape)
229 | 
230 | x <- classifier_input
231 | for (layer_name in classifier_layer_names)
232 |   x <- get_layer(model, layer_name)(x)
233 | 
234 | classifier_model <- keras_model(classifier_input, x)
235 | 
236 | 
237 | torch <- reticulate::import("jax")
238 | 
239 | loss_fn <- function(last_conv_layer_output) {                                   # <1>
240 |   preds <- classifier_model(last_conv_layer_output)
241 |   top_pred_index <- op_argmax(preds@r[1])
242 |   top_class_channel <- preds[, top_pred_index]
243 |   top_class_channel@r[1]                                                        # <2>
244 | }
245 | grad_fn <- jax$grad(loss_fn)                                                    # <3>
246 | 
247 | get_top_class_gradients <- function(image_tensor) {
248 |   last_conv_layer_output <- last_conv_layer_model(image_tensor)
249 |   grads <- grad_fn(last_conv_layer_output)                                      # <4>
250 |   list(grads, last_conv_layer_output)
251 | }
252 | 
253 | 
254 | .[grads, last_conv_layer_output] <- get_top_class_gradients(image_tensor)
255 | 
256 | pooled_grads <- op_mean(grads, axis = c(1, 2, 3), keepdims = TRUE)              # <1>
257 | output <- last_conv_layer_output * pooled_grads                                 # <2>
258 | heatmap <- op_mean(output@r[1], axis = -1)                                      # <3>
259 | 
260 | 
261 | plot_activations(heatmap)
262 | 
263 | 
264 | palette <- hcl.colors(256, palette = "Spectral", alpha = .4, rev = TRUE)
265 | heatmap <- as.array(heatmap)
266 | heatmap[] <- palette[cut(heatmap, 256)]
267 | heatmap <- as.raster(heatmap)
268 | 
269 | img <- image_load(img_path) |> image_to_array()
270 | display_image(img)
271 | rasterImage(heatmap, 0, 0, ncol(img), nrow(img), interpolate = FALSE)
272 | 
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/chapter11_image-segmentation.R:
--------------------------------------------------------------------------------
  1 | Sys.setenv("XLA_PYTHON_CLIENT_PREALLOCATE"="false")
  2 | Sys.setenv("XLA_PYTHON_CLIENT_MEM_FRACTION"="1") #.xx fraction
  3 | 
  4 | library(reticulate)
  5 | library(fs)
  6 | library(dplyr, warn.conflicts = FALSE)
  7 | library(tensorflow, exclude = c("shape", "set_random_seed"))
  8 | library(tfdatasets, exclude = "shape")
  9 | library(keras3)
 10 | 
 11 | py_require("keras-hub==0.18.1")
 12 | py_require("keras-hub")
 13 | display_image <- function(x, ..., max = 255, margin = 0) {
 14 |   par(mar = rep(margin, 4))
 15 | 
 16 |   x |> as.array() |> drop() |>
 17 |     as.raster(max = max) |>
 18 |     plot(..., interpolate = FALSE)
 19 | }
 20 | 
 21 | 
 22 | library(fs)
 23 | data_dir <- path("pets_dataset")
 24 | dir_create(data_dir)
 25 | 
 26 | 
 27 | library(dplyr, warn.conflicts = FALSE)
 28 | input_dir <- data_dir / "images"
 29 | target_dir <- data_dir / "annotations/trimaps/"
 30 | 
 31 | all_image_paths <- tibble(
 32 |   input = sort(dir_ls(input_dir, glob = "*.jpg")),
 33 |   target = sort(dir_ls(target_dir, glob = "*.png", all = FALSE))                # <1>
 34 | )
 35 | 
 36 | 
 37 | glimpse(all_image_paths)
 38 | 
 39 | 
 40 | display_image <- function(x, ..., max = 255, margin = 0) {
 41 |   par(mar = rep(margin, 4))
 42 | 
 43 |   x |> as.array() |> drop() |>
 44 |     as.raster(max = max) |>
 45 |     plot(..., interpolate = FALSE)
 46 | }
 47 | 
 48 | 
 49 | all_image_paths$input[10] |>                                                    # <1>
 50 |   image_load() |> image_to_array() |>
 51 |   display_image()
 52 | 
 53 | 
 54 | display_target <- function(target, ..., offset = TRUE) {
 55 |   if (offset)
 56 |     target <- target - 1L
 57 |   display_image(target, max = 2L, ...)
 58 | }
 59 | 
 60 | all_image_paths$target[10] |>
 61 |   image_load(color_mode = "grayscale") |> image_to_array() |>                   # <1>
 62 |   display_target()
 63 | 
 64 | 
 65 | library(tfdatasets, exclude = "shape")
 66 | 
 67 | img_size <- shape(200, 200)
 68 | 
 69 | tf_image_load <- function(path, target_size = NULL, ...) {
 70 |     img <- path |>
 71 |       tf$io$read_file() |>
 72 |       tf$io$decode_image(..., expand_animations = FALSE)
 73 | 
 74 |     if (!is.null(target_size))
 75 |       img <- img |> tf$image$resize(target_size)
 76 | 
 77 |     img
 78 |   }
 79 | 
 80 | make_dataset <- function(image_paths) {
 81 |   stopifnot(is.data.frame(image_paths),
 82 |             names(image_paths) == c("input", "target"))
 83 | 
 84 |   image_paths |>
 85 |   tensor_slices_dataset() |>
 86 |     dataset_map(function(example_paths) {
 87 |       .[input_path = input, target_path = target] <- example_paths
 88 | 
 89 |       input_image <- input_path |>
 90 |         tf_image_load(channels = 3L, target_size = img_size)                    # <1>
 91 | 
 92 |       target <- target_path |>
 93 |         tf_image_load(channels = 1L, target_size = img_size)                    # <2>
 94 | 
 95 |       target <- tf$cast(target, "uint8") - 1L                                   # <3>
 96 | 
 97 |       list(input_image, target)
 98 |     }) |>
 99 |     dataset_cache() |>                                                          # <4>
100 |     dataset_shuffle(buffer_size = nrow(image_paths)) |>                         # <5>
101 |     dataset_batch(32)
102 | }
103 | 
104 | num_val_samples <- 1000                                                         # <6>
105 | 
106 | image_paths <- all_image_paths |>
107 |   dplyr::mutate(use = ifelse(sample.int(n()) > num_val_samples,
108 |                              "train", "val")) |>                                # <7>
109 |   tidyr::nest(.by = use) |>
110 |   tibble::deframe()
111 | 
112 | image_paths
113 | 
114 | .[train_ds = train, val_ds = val] <- image_paths |> lapply(make_dataset)        # <8>
115 | 
116 | 
117 | batch <- train_ds |> as_iterator() |> iter_next()
118 | str(batch)
119 | 
120 | .[images, targets] <- batch
121 | par(mfrow = c(4, 8))
122 | for (i in 1:16) {
123 |   images@r[i] |> display_image()
124 |   targets@r[i] |> display_target(offset = FALSE)                                # <1>
125 | }
126 | 
127 | 
128 | get_model <- function(img_size, num_classes) {
129 | 
130 |   conv <- function(..., padding = "same", activation = "relu")                  # <1>
131 |     layer_conv_2d(..., padding = padding, activation = activation)
132 | 
133 |   conv_transpose <- function(..., padding = "same", activation = "relu")        # <1>
134 |     layer_conv_2d_transpose(..., padding = padding, activation = activation)
135 | 
136 |   input <- keras_input(shape = c(img_size, 3))
137 |   output <- input |>
138 |     layer_rescaling(scale = 1/255) |>                                           # <2>
139 |     conv(64, 3, strides = 2) |>
140 |     conv(64, 3) |>
141 |     conv(128, 3, strides = 2) |>
142 |     conv(128, 3) |>
143 |     conv(256, 3, strides = 2) |>
144 |     conv(256, 3) |>
145 |     conv_transpose(256, 3) |>
146 |     conv_transpose(256, 3, strides = 2) |>
147 |     conv_transpose(128, 3) |>
148 |     conv_transpose(128, 3, strides = 2) |>
149 |     conv_transpose(64, 3) |>
150 |     conv_transpose(64, 3, strides = 2) |>
151 |     conv(num_classes, 3, activation = "softmax")                                # <3>
152 | 
153 |   keras_model(input, output)
154 | }
155 | 
156 | model <- get_model(img_size = img_size, num_classes = 3)
157 | model
158 | 
159 | 
160 | foreground_iou <- metric_iou(
161 |   num_classes = 3,                                                              # <1>
162 |   target_class_ids = c(0),                                                      # <2>
163 |   name = "foreground_iou",
164 |   sparse_y_true = TRUE,                                                         # <3>
165 |   sparse_y_pred = FALSE,                                                        # <4>
166 | )
167 | 
168 | 
169 | model |> compile(
170 |   optimizer = "adam",
171 |   loss = "sparse_categorical_crossentropy",
172 |   metrics = foreground_iou
173 | )
174 | 
175 | callbacks <- list(
176 |   callback_model_checkpoint("oxford_segmentation.keras",
177 |                             save_best_only = TRUE)
178 | )
179 | 
180 | 
181 | history <- readRDS("ch11-history.rds")
182 | 
183 | 
184 | plot(history, metrics = "loss")
185 | 
186 | 
187 | model <- load_model("oxford_segmentation.keras")
188 | 
189 | 
190 | for (i in 1:10) {
191 | 
192 | test_image <- image_paths$val$input[i] |>
193 |   tf_image_load(channels = 3L, target_size = img_size)
194 | 
195 | test_mask <- image_paths$val$target[i] |>
196 |   tf_image_load(channels = 1L, target_size = img_size) |>
197 |   tf$subtract(1)
198 | 
199 | predicted_mask_probs <- model(test_image@r[newaxis])
200 | predicted_mask <- op_argmax(predicted_mask_probs, axis = -1) - 1
201 | 
202 | par(mfrow = c(1, 3))
203 | display_image(test_image); title("image")
204 | display_target(predicted_mask, offset = FALSE)
205 | display_target(test_mask, offset = FALSE)
206 | }
207 | 
208 | 
209 | library(reticulate)
210 | py_require("keras-hub==0.18.1")                                                 # <1>
211 | 
212 | keras_hub <- import("keras_hub")                                                # <2>
213 | 
214 | 
215 | model <- keras_hub$models$ImageSegmenter$from_preset("sam_huge_sa1b")
216 | 
217 | 
218 | count_params(model)
219 | 
220 | 
221 | path <- get_file(
222 |   origin = "https://s3.amazonaws.com/keras.io/img/book/fruits.jpg"              # <1>
223 | )
224 | pil_image <- image_load(path)                                                   # <2>
225 | image_array <- image_to_array(pil_image, dtype = "float32")                     # <3>
226 | display_image(image_array)                                                      # <4>
227 | 
228 | 
229 | image_size <- c(1024, 1024)
230 | 
231 | resize_and_pad <- function(x) {
232 |   op_image_resize(x, image_size, pad_to_aspect_ratio = TRUE)
233 | }
234 | 
235 | image <- resize_and_pad(image_array)
236 | op_shape(image)
237 | 
238 | 
239 | display_points <- function(coords, color = "white") {
240 |   stopifnot(is.matrix(coords), ncol(coords) == 2)
241 |   coords[, 2] <- image_size[1] - coords[, 2]                                    # <1>
242 |   points(coords, col = color, pch = 8, cex = 2, lwd = 2)
243 | }
244 | 
245 | display_mask <- function(mask, index = 1, color = "dodgerblue", alpha = 0.6) {
246 |   .[r, g, b] <- col2rgb(color)
247 |   color <- rgb(r, g, b, alpha * 255, maxColorValue = 255)
248 | 
249 |   mask <- mask |> as.array() |> drop() |> _[index, , ]
250 |   mask[] <- ifelse(mask > 1, color, rgb(0, 0, 0, 0))
251 | 
252 |   .[h, w] <- image_size
253 |   rasterImage(mask, 0, 0, h, w, interpolate = FALSE)
254 | }
255 | 
256 | display_box <- function(box, ..., color = "red", lwd = 2) {
257 |   stopifnot(is.matrix(box), dim(box) == c(2, 2))
258 |   box[, 2] <- image_size[1] - box[, 2]                                          # <1>
259 |   rect(xleft = box[1, 1], ytop = box[1, 2],
260 |        xright = box[2, 1], ybottom = box[2, 2],
261 |        ..., border = color, lwd = 2)
262 | }
263 | 
264 | 
265 | input_point <- rbind(c(580, 480))                                               # <1>
266 | input_label <- 1                                                                # <2>
267 | 
268 | display_image(image)
269 | display_points(input_point)
270 | 
271 | 
272 | str(model$input)                                                                # <1>
273 | 
274 | 
275 | np <- import("numpy", convert = FALSE)                                          # <1>
276 | 
277 | image |>
278 |   np_array("float32") |>
279 |   np$expand_dims(0L) |>                                                         # <2>
280 |   str()
281 | 
282 | 
283 | outputs <- model |> predict(list(
284 |   images = image |> np_array("float32") |> np$expand_dims(0L),
285 |   points = input_point |> np_array("float32") |> np$expand_dims(0L),
286 |   labels = input_label |> np_array("float32") |> np$expand_dims(0L)
287 | ))
288 | 
289 | str(outputs)
290 | 
291 | 
292 | display_image(image)
293 | display_mask(outputs$masks)
294 | display_points(input_point)
295 | 
296 | 
297 | input_label <- 1
298 | input_point <- rbind(c(300, 550))
299 | 
300 | outputs <- model |> predict(list(
301 |   images = image |> np_array("float32") |> np$expand_dims(0L),
302 |   points = input_point |> np_array("float32") |> np$expand_dims(0L),
303 |   labels = input_label |> np_array("float32") |> np$expand_dims(0L)
304 | ))
305 | 
306 | 
307 | display_image(image)
308 | display_mask(outputs$masks)
309 | display_points(input_point)
310 | 
311 | 
312 | par(mfrow = c(1, 3))
313 | for (i in 2:4) {
314 |   display_image(image)
315 |   display_mask(outputs$masks, index = i)
316 |   display_points(input_point)
317 | }
318 | 
319 | 
320 | input_box <- rbind(c(520, 180),                                                 # <1>
321 |                    c(770, 420))                                                 # <2>
322 | 
323 | display_image(image)
324 | display_box(input_box)
325 | 
326 | 
327 | outputs <- model |> predict(list(
328 |   images = image |> np_array("float32") |> np$expand_dims(0L),
329 |   boxes = input_box |> np_array("float32") |> np$expand_dims(c(0L, 1L))         # <1>
330 | ))
331 | 
332 | display_image(image)
333 | display_box(input_box)
334 | display_mask(outputs$masks)
335 | 
336 | 
337 | 
338 | 


--------------------------------------------------------------------------------
/chapter04_classification-and-regression.R:
--------------------------------------------------------------------------------
  1 | library(keras3)
  2 | 
  3 | 
  4 | .[.[train_data, train_labels], .[test_data, test_labels]] <-
  5 |   dataset_imdb(num_words = 10000)
  6 | 
  7 | 
  8 | str(train_data)
  9 | 
 10 | 
 11 | str(train_labels)
 12 | 
 13 | max(sapply(train_data, max))
 14 | 
 15 | 
 16 | word_index <- dataset_imdb_word_index()                                         # <1>
 17 | 
 18 | reverse_word_index <- names(word_index)                                         # <2>
 19 | names(reverse_word_index) <- as.character(word_index)                           # <2>
 20 | 
 21 | decoded_words <- train_data[[1]] |>
 22 |   sapply(function(i) {
 23 |     if (i > 3) reverse_word_index[[as.character(i - 3)]]                        # <3>
 24 |     else "?"
 25 |   })
 26 | decoded_review <- paste0(decoded_words, collapse = " ")
 27 | 
 28 | 
 29 | decoded_review |> substr(1, 200) |> strwrap(70) |> cat(sep = "\n")
 30 | 
 31 | 
 32 | multi_hot_encode <- function(sequences, num_classes) {
 33 |   results <- matrix(0, nrow = length(sequences), ncol = num_classes)            # <1>
 34 |   for (i in seq_along(sequences)) {
 35 |     results[i, sequences[[i]]] <- 1                                             # <2>
 36 |   }
 37 |   results
 38 | }
 39 | x_train <- multi_hot_encode(train_data, num_classes = 10000)                    # <3>
 40 | x_test <- multi_hot_encode(test_data, num_classes = 10000)                      # <4>
 41 | 
 42 | 
 43 | x_train[1, ] |> str()
 44 | 
 45 | 
 46 | str(x_train)
 47 | 
 48 | y_train <- as.numeric(train_labels)
 49 | y_test <- as.numeric(test_labels)
 50 | 
 51 | 
 52 | model <- keras_model_sequential() |>
 53 |   layer_dense(16, activation = "relu") |>
 54 |   layer_dense(16, activation = "relu") |>
 55 |   layer_dense(1, activation = "sigmoid")
 56 | 
 57 | 
 58 | sigmoid <- function(x) 1 / (1 + exp(-1 * x))
 59 | withr::with_par(list(pty = "s"), {
 60 |   plot(sigmoid, -4, 4,
 61 |     main = "sigmoid",
 62 |     ylim = c(-1, 2),
 63 |     ylab = ~ sigmoid(x), xlab = ~ x,
 64 |     panel.first = grid())
 65 | })
 66 | 
 67 | 
 68 | relu <- function(x) pmax(0, x)
 69 | withr::with_par(list(pty = "s"), {
 70 |   plot(relu, -4, 4,
 71 |     main = "relu",
 72 |     ylim = c(-1, 2),
 73 |     ylab = ~ relu(x), xlab = ~ x,
 74 |     panel.first = grid())
 75 | })
 76 | 
 77 | 
 78 | model |> compile(
 79 |   optimizer = "adam",
 80 |   loss = "binary_crossentropy",
 81 |   metrics = c("accuracy")
 82 | )
 83 | 
 84 | 
 85 | val_indices <- 1:1000
 86 | 
 87 | x_val <- x_train[val_indices, ]
 88 | partial_x_train <- x_train[-val_indices, ]
 89 | 
 90 | y_val <- y_train[val_indices]
 91 | partial_y_train <- y_train[-val_indices]
 92 | 
 93 | 
 94 | history <- model |> fit(
 95 |   partial_x_train, partial_y_train,
 96 |   epochs = 20,
 97 |   batch_size = 512,
 98 |   validation_data = list(x_val, y_val)
 99 | )
100 | 
101 | 
102 | history <- model |> fit(
103 |   x_train, y_train,
104 |   epochs = 20,
105 |   batch_size = 512,
106 |   validation_split = 0.2
107 | )
108 | 
109 | 
110 | str(history$metrics)
111 | 
112 | 
113 | library(ggplot2)
114 | plot(history) + ggtitle("[IMDB] Training history")
115 | 
116 | 
117 | model <- keras_model_sequential() |>
118 |   layer_dense(16, activation = "relu") |>
119 |   layer_dense(16, activation = "relu") |>
120 |   layer_dense(1, activation = "sigmoid")
121 | 
122 | model |> compile(
123 |   optimizer = "adam",
124 |   loss = "binary_crossentropy",
125 |   metrics = "accuracy"
126 | )
127 | 
128 | model |> fit(x_train, y_train, epochs = 4, batch_size = 512)
129 | 
130 | results <- model |> evaluate(x_test, y_test)
131 | 
132 | 
133 | results
134 | 
135 | 
136 | preds <- model |> predict(x_test)
137 | str(preds)
138 | 
139 | 
140 | .[.[train_data, train_labels], .[test_data, test_labels]] <-
141 |   dataset_reuters(num_words = 10000)
142 | 
143 | 
144 | str(train_data)
145 | 
146 | 
147 | str(test_data)
148 | 
149 | 
150 | train_data[[11]] |> str()
151 | 
152 | 
153 | word_index <- dataset_reuters_word_index()
154 | reverse_word_index <- setNames(object = names(word_index),
155 |                                nm = unlist(word_index) - 3L)                    # <1>
156 | decoded_newswire <- train_data[[1]] |>
157 |   sapply(function(i) {
158 |     if (i > 3) reverse_word_index[[as.character(i)]]
159 |     else "?"
160 |   }) |>
161 |   paste0(collapse = " ")
162 | decoded_newswire
163 | 
164 | 
165 | train_labels[[11]]
166 | 
167 | 
168 | x_train <- multi_hot_encode(train_data, num_classes = 10000)                    # <1>
169 | x_test <- multi_hot_encode(test_data, num_classes = 10000)                      # <2>
170 | 
171 | 
172 | one_hot_encode <- function(labels, num_classes = 46) {
173 |   results <- matrix(0, nrow = length(labels), ncol = num_classes)
174 |   for (i in seq_along(labels)) {
175 |     label_position <- labels[[i]] + 1                                           # <1>
176 |     results[i, label_position] <- 1
177 |   }
178 |   results
179 | }
180 | 
181 | y_train <- one_hot_encode(train_labels)                                         # <2>
182 | y_test <- one_hot_encode(test_labels)                                           # <3>
183 | 
184 | 
185 | y_train <- to_categorical(train_labels)
186 | y_test <- to_categorical(test_labels)
187 | 
188 | 
189 | model <- keras_model_sequential() |>
190 |   layer_dense(64, activation = "relu") |>
191 |   layer_dense(64, activation = "relu") |>
192 |   layer_dense(46, activation = "softmax")
193 | 
194 | 
195 | model |> compile(
196 |   optimizer = "adam",
197 |   loss = "categorical_crossentropy",
198 |   metrics = c("accuracy", metric_top_k_categorical_accuracy(k = 3))
199 | )
200 | 
201 | 
202 | val_indices <- 1:1000
203 | 
204 | x_val <- x_train[val_indices,]
205 | partial_x_train <- x_train[-val_indices,]
206 | 
207 | y_val <- y_train[val_indices,]
208 | partial_y_train <- y_train[-val_indices,]
209 | 
210 | 
211 | history <- model |> fit(
212 |   partial_x_train, partial_y_train,
213 |   epochs = 20,
214 |   batch_size = 512,
215 |   validation_data = list(x_val, y_val)
216 | )
217 | 
218 | 
219 | plot(history) + ggtitle("Training and validation metrics")
220 | 
221 | 
222 | model <- keras_model_sequential() |>
223 |   layer_dense(64, activation = "relu") |>
224 |   layer_dense(64, activation = "relu") |>
225 |   layer_dense(46, activation = "softmax")
226 | 
227 | model |> compile(
228 |   optimizer = "adam",
229 |   loss = "categorical_crossentropy",
230 |   metrics = "accuracy"
231 | )
232 | 
233 | model |> fit(x_train, y_train, epochs = 9, batch_size = 512)
234 | 
235 | results <- model |> evaluate(x_test, y_test)
236 | 
237 | 
238 | results
239 | 
240 | 
241 | mean(test_labels == sample(test_labels))
242 | 
243 | 
244 | predictions <- model |> predict(x_test)
245 | 
246 | 
247 | str(predictions)
248 | 
249 | 
250 | sum(predictions[1, ])
251 | 
252 | 
253 | which.max(predictions[1,])
254 | 
255 | 
256 | y_train <- train_labels
257 | y_test <- test_labels
258 | 
259 | 
260 | model |> compile(
261 |   optimizer = "adam",
262 |   loss = "sparse_categorical_crossentropy",
263 |   metrics = "accuracy"
264 | )
265 | 
266 | 
267 | model <- keras_model_sequential() |>
268 |   layer_dense(64, activation = "relu") |>
269 |   layer_dense(4, activation = "relu") |>
270 |   layer_dense(46, activation = "softmax")
271 | 
272 | model |> compile(
273 |   optimizer = "adam",
274 |   loss = "categorical_crossentropy",
275 |   metrics = "accuracy"
276 | )
277 | model |> fit(
278 |   partial_x_train, partial_y_train,
279 |   epochs = 20,
280 |   batch_size = 128,
281 |   validation_data = list(x_val, y_val)
282 | )
283 | 
284 | 
285 | .[.[train_data, train_targets], .[test_data, test_targets]] <-
286 |   dataset_california_housing(version = "small")                                 # <1>
287 | 
288 | 
289 | str(train_data)
290 | 
291 | 
292 | str(test_data)
293 | 
294 | 
295 | train_mean <- apply(train_data, 2, mean)
296 | train_sd <- apply(train_data, 2, sd)
297 | x_train <- scale(train_data, center = train_mean, scale = train_sd)
298 | x_test <- scale(test_data, center = train_mean, scale = train_sd)
299 | 
300 | 
301 | y_train <- train_targets / 100000
302 | y_test <- test_targets / 100000
303 | 
304 | 
305 | get_model <- function() {                                                       # <1>
306 |   model <- keras_model_sequential() |>
307 |     layer_dense(64, activation = "relu") |>
308 |     layer_dense(64, activation = "relu") |>
309 |     layer_dense(1)
310 |   model |> compile(
311 |     optimizer = "adam",
312 |     loss = "mean_squared_error",
313 |     metrics = "mean_absolute_error"
314 |   )
315 |   model
316 | }
317 | 
318 | 
319 | k <- 4
320 | fold_id <- sample(rep(1:k, length.out = nrow(train_data)))
321 | num_epochs <- 50
322 | all_scores <- numeric(k)
323 | 
324 | for (i in 1:k) {
325 |   cat(sprintf("Processing fold #%i\n", i))
326 | 
327 |   fold_val_indices <- which(fold_id == i)
328 |   fold_x_val <- x_train[fold_val_indices, ]                                     # <1>
329 |   fold_y_val <- y_train[fold_val_indices]                                       # <1>
330 |   fold_x_train <- x_train[-fold_val_indices, ]                                  # <2>
331 |   fold_y_train <- y_train[-fold_val_indices]                                    # <2>
332 | 
333 |   model <- get_model()                                                          # <3>
334 |   model |> fit(                                                                 # <4>
335 |     fold_x_train, fold_y_train,
336 |     epochs = num_epochs, batch_size = 16, verbose = 0
337 |   )
338 |   results <- model |> evaluate(fold_x_val, fold_y_val, verbose = 0)             # <5>
339 |   all_scores[i] <- results$mean_absolute_error
340 | }
341 | 
342 | 
343 | round(all_scores, 3)
344 | 
345 | mean(all_scores)
346 | 
347 | 
348 | k <- 4
349 | num_epochs <- 200
350 | all_mae_histories <- list()
351 | 
352 | for (i in 1:k) {
353 |   cat(sprintf("Processing fold #%i\n", i))
354 | 
355 |   fold_val_indices <- which(fold_id == i)                                       # <1>
356 |   fold_x_val <- x_train[fold_val_indices, ]                                     # <1>
357 |   fold_y_val <- y_train[fold_val_indices]                                       # <1>
358 |   fold_x_train <- x_train[-fold_val_indices, ]                                  # <2>
359 |   fold_y_train <- y_train[-fold_val_indices]                                    # <2>
360 | 
361 |   model <- get_model()                                                          # <3>
362 |   history <- model |> fit(                                                      # <4>
363 |     fold_x_train, fold_y_train,
364 |     validation_data = list(fold_x_val, fold_y_val),
365 |     epochs = num_epochs, batch_size = 16, verbose = 0
366 |   )
367 |   mae_history <- history$metrics$val_mean_absolute_error
368 |   all_mae_histories[[i]] <- mae_history
369 | }
370 | 
371 | all_mae_histories <- do.call(cbind, all_mae_histories)
372 | 
373 | 
374 | average_mae_history <- rowMeans(all_mae_histories)
375 | 
376 | 
377 | plot(average_mae_history, ylab = "Validation MAE", xlab = "Epoch", type = 'l')
378 | 
379 | 
380 | truncated_mae_history <- average_mae_history[-(1:10)]
381 | plot(average_mae_history, xlab = "epoch", type = 'l',
382 |      ylim = range(truncated_mae_history))
383 | 
384 | 
385 | model <- get_model()                                                            # <1>
386 | model |> fit(x_train, y_train,                                                  # <2>
387 |              epochs = 130, batch_size = 16, verbose = 0)
388 | .[test_mean_squared_error, test_mean_absolute_error] <-
389 |   model |> evaluate(x_test, y_test)
390 | 
391 | 
392 | test_mean_absolute_error
393 | 
394 | 
395 | predictions <- model |> predict(test_data)
396 | predictions[1, ]
397 | 
398 | 


--------------------------------------------------------------------------------
/2e/ch14.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include=FALSE-------------------------------------------------
  2 | library(tensorflow)
  3 | library(keras3)
  4 | tensorflow::as_tensor(1)
  5 | 
  6 | 
  7 | ## ---- include = FALSE-----------------------------------------------------
  8 | ## Define objects so all the visible code is runnable
  9 | num_classes <- 10
 10 | num_values <- 4
 11 | num_inputs_features <- 20
 12 | num_timesteps <- 20
 13 | height <- width <- 200
 14 | channels <- 3
 15 | num_features <- 20
 16 | sequence_length <- 200
 17 | embed_dim <- 256
 18 | dense_dim <- 32
 19 | num_heads <- 4
 20 | vocab_size <- 15000
 21 | 
 22 | layer_transformer_encoder <- new_layer_class(
 23 |   classname = "TransformerEncoder",
 24 |   initialize = function(embed_dim, dense_dim, num_heads, ...) {
 25 |     super$initialize(...)
 26 |     self$embed_dim <- embed_dim
 27 |     self$dense_dim <- dense_dim
 28 |     self$num_heads <- num_heads
 29 |     self$attention <-
 30 |       layer_multi_head_attention(num_heads = num_heads,
 31 |                                  key_dim = embed_dim)
 32 | 
 33 |     self$dense_proj <- keras_model_sequential() %>%
 34 |       layer_dense(dense_dim, activation = "relu") %>%
 35 |       layer_dense(embed_dim)
 36 | 
 37 |     self$layernorm_1 <- layer_layer_normalization()
 38 |     self$layernorm_2 <- layer_layer_normalization()
 39 |   },
 40 | 
 41 |   call = function(inputs, mask = NULL) {
 42 |     if (!is.null(mask))
 43 |       mask <- mask[, tf$newaxis, ]
 44 | 
 45 |     inputs %>%
 46 |       { self$attention(., ., attention_mask = mask) + . } %>%
 47 |       self$layernorm_1() %>%
 48 |       { self$dense_proj(.) + . } %>%
 49 |       self$layernorm_2()
 50 |   },
 51 | 
 52 |   get_config = function() {
 53 |     config <- super$get_config()
 54 |     for(name in c("embed_dim", "num_heads", "dense_dim"))
 55 |       config[[name]] <- self[[name]]
 56 |     config
 57 |   }
 58 | )
 59 | 
 60 | layer_transformer_decoder <- new_layer_class(
 61 |   classname = "TransformerDecoder",
 62 | 
 63 |   initialize = function(embed_dim, dense_dim, num_heads, ...) {
 64 |     super$initialize(...)
 65 |     self$embed_dim <- embed_dim
 66 |     self$dense_dim <- dense_dim
 67 |     self$num_heads <- num_heads
 68 |     self$attention_1 <- layer_multi_head_attention(num_heads = num_heads,
 69 |                                                    key_dim = embed_dim)
 70 |     self$attention_2 <- layer_multi_head_attention(num_heads = num_heads,
 71 |                                                    key_dim = embed_dim)
 72 |     self$dense_proj <- keras_model_sequential() %>%
 73 |       layer_dense(dense_dim, activation = "relu") %>%
 74 |       layer_dense(embed_dim)
 75 | 
 76 |     self$layernorm_1 <- layer_layer_normalization()
 77 |     self$layernorm_2 <- layer_layer_normalization()
 78 |     self$layernorm_3 <- layer_layer_normalization()
 79 |     self$supports_masking <- TRUE
 80 |   },
 81 | 
 82 |   get_config = function() {
 83 |     config <- super$get_config()
 84 |     for (name in c("embed_dim", "num_heads", "dense_dim"))
 85 |       config[[name]] <- self[[name]]
 86 |     config
 87 |   },
 88 | 
 89 |   get_causal_attention_mask = function(inputs) {
 90 |     c(batch_size, sequence_length, encoding_length) %<-%
 91 |       tf$unstack(tf$shape(inputs))
 92 | 
 93 |     x <- tf$range(sequence_length)
 94 |     i <- x[, tf$newaxis]
 95 |     j <- x[tf$newaxis, ]
 96 |     mask <- tf$cast(i >= j, "int32")
 97 | 
 98 |     tf$tile(mask[tf$newaxis, , ],
 99 |             tf$stack(c(batch_size, 1L, 1L)))
100 |   },
101 | 
102 |   call = function(inputs, encoder_outputs, mask = NULL) {
103 | 
104 |     causal_mask <- self$get_causal_attention_mask(inputs)
105 | 
106 |     if (is.null(mask))
107 |       mask <- causal_mask
108 |     else
109 |       mask %<>% { tf$minimum(tf$cast(.[, tf$newaxis, ], "int32"),
110 |                              causal_mask) }
111 | 
112 |     inputs %>%
113 |       { self$attention_1(query = ., value = ., key = .,
114 |                          attention_mask = causal_mask) + . } %>%
115 |       self$layernorm_1() %>%
116 | 
117 |       { self$attention_2(query = .,
118 |                          value = encoder_outputs,
119 |                          key = encoder_outputs,
120 |                          attention_mask = mask) + . } %>%
121 |       self$layernorm_2() %>%
122 | 
123 |       { self$dense_proj(.) + . } %>%
124 |       self$layernorm_3()
125 | 
126 |   }
127 | )
128 | 
129 | layer_positional_embedding <- new_layer_class(
130 |   classname = "PositionalEmbedding",
131 | 
132 |   initialize = function(sequence_length, input_dim, output_dim, ...) {
133 |     super$initialize(...)
134 |     self$token_embeddings <-
135 |       layer_embedding(input_dim = input_dim,
136 |                       output_dim = output_dim)
137 |     self$position_embeddings <-
138 |       layer_embedding(input_dim = sequence_length,
139 |                       output_dim = output_dim)
140 |     self$sequence_length <- sequence_length
141 |     self$input_dim <- input_dim
142 |     self$output_dim <- output_dim
143 |   },
144 | 
145 |   call = function(inputs) {
146 |     length <- tf$shape(inputs)[-1]
147 |     positions <- tf$range(start = 0L, limit = length, delta = 1L)
148 |     embedded_tokens <- self$token_embeddings(inputs)
149 |     embedded_positions <- self$position_embeddings(positions)
150 |     embedded_tokens + embedded_positions
151 |   },
152 | 
153 |   compute_mask = function(inputs, mask = NULL) {
154 |     inputs != 0
155 |   },
156 | 
157 |   get_config = function() {
158 |     config <- super$get_config()
159 |     for(name in c("output_dim", "sequence_length", "input_dim"))
160 |       config[[name]] <- self[[name]]
161 |     config
162 |   }
163 | )
164 | 
165 | 
166 | ## -------------------------------------------------------------------------
167 | inputs <- layer_input(shape = c(num_inputs_features))
168 | outputs <- inputs %>%
169 |   layer_dense(32, activation = "relu") %>%
170 |   layer_dense(32, activation = "relu") %>%
171 |   layer_dense(1, activation = "sigmoid")
172 | model <- keras_model(inputs, outputs)
173 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy")
174 | 
175 | 
176 | ## -------------------------------------------------------------------------
177 | inputs <- layer_input(shape = c(num_inputs_features))
178 | outputs <- inputs %>%
179 |   layer_dense(32, activation = "relu") %>%
180 |   layer_dense(32, activation = "relu") %>%
181 |   layer_dense(num_classes, activation = "sigmoid")
182 | model <- keras_model(inputs, outputs)
183 | model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy")
184 | 
185 | 
186 | ## -------------------------------------------------------------------------
187 | inputs <- layer_input(shape = c(num_inputs_features))
188 | outputs <- inputs %>%
189 |   layer_dense(32, activation = "relu") %>%
190 |   layer_dense(32, activation = "relu") %>%
191 |   layer_dense(num_classes, activation = "sigmoid")
192 | model <- keras_model(inputs, outputs)
193 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy")
194 | 
195 | 
196 | ## -------------------------------------------------------------------------
197 | inputs <- layer_input(shape = c(num_inputs_features))
198 | outputs <- inputs %>%
199 |   layer_dense(32, activation = "relu") %>%
200 |   layer_dense(32, activation = "relu") %>%
201 |   layer_dense(num_values)
202 | model <- keras_model(inputs, outputs)
203 | model %>% compile(optimizer = "rmsprop", loss = "mse")
204 | 
205 | 
206 | ## -------------------------------------------------------------------------
207 | inputs <- layer_input(shape = c(height, width, channels))
208 | outputs <- inputs %>%
209 |   layer_separable_conv_2d(32, 3, activation = "relu") %>%
210 |   layer_separable_conv_2d(64, 3, activation = "relu") %>%
211 |   layer_max_pooling_2d(2) %>%
212 |   layer_separable_conv_2d(64, 3, activation = "relu") %>%
213 |   layer_separable_conv_2d(128, 3, activation = "relu") %>%
214 |   layer_max_pooling_2d(2) %>%
215 |   layer_separable_conv_2d(64, 3, activation = "relu") %>%
216 |   layer_separable_conv_2d(128, 3, activation = "relu") %>%
217 |   layer_global_average_pooling_2d() %>%
218 |   layer_dense(32, activation = "relu") %>%
219 |   layer_dense(num_classes, activation = "softmax")
220 | model <- keras_model(inputs, outputs)
221 | model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy")
222 | 
223 | 
224 | ## -------------------------------------------------------------------------
225 | inputs <- layer_input(shape = c(num_timesteps, num_features))
226 | outputs <- inputs %>%
227 |   layer_lstm(32) %>%
228 |   layer_dense(num_classes, activation = "sigmoid")
229 | model <- keras_model(inputs, outputs)
230 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy")
231 | 
232 | 
233 | ## -------------------------------------------------------------------------
234 | inputs <- layer_input(shape = c(num_timesteps, num_features))
235 | outputs <- inputs %>%
236 |   layer_lstm(32, return_sequences = TRUE) %>%
237 |   layer_lstm(32, return_sequences = TRUE) %>%
238 |   layer_lstm(32) %>%
239 |   layer_dense(num_classes, activation = "sigmoid")
240 | model <- keras_model(inputs, outputs)
241 | model %>% compile(optimizer = "rmsprop", loss = "binary_crossentropy")
242 | 
243 | 
244 | ## -------------------------------------------------------------------------
245 | encoder_inputs <- layer_input(shape = c(sequence_length), dtype = "int64")
246 | encoder_outputs <- encoder_inputs %>%
247 |   layer_positional_embedding(sequence_length, vocab_size, embed_dim) %>%
248 |   layer_transformer_encoder(embed_dim, dense_dim, num_heads)
249 | 
250 | decoder <- layer_transformer_decoder(NULL, embed_dim, dense_dim, num_heads)
251 | decoder_inputs <- layer_input(shape = c(NA), dtype = "int64")
252 | decoder_outputs <- decoder_inputs %>%
253 |   layer_positional_embedding(sequence_length, vocab_size, embed_dim) %>%
254 |   decoder(., encoder_outputs) %>%
255 |   layer_dense(vocab_size, activation = "softmax")
256 | 
257 | transformer <- keras_model(list(encoder_inputs, decoder_inputs),
258 |                            decoder_outputs)
259 | transformer %>%
260 |   compile(optimizer = "rmsprop", loss = "categorical_crossentropy")
261 | 
262 | 
263 | ## -------------------------------------------------------------------------
264 | inputs <- layer_input(shape=c(sequence_length), dtype="int64")
265 | outputs <- inputs %>%
266 |   layer_positional_embedding(sequence_length, vocab_size, embed_dim) %>%
267 |   layer_transformer_encoder(embed_dim, dense_dim, num_heads) %>%
268 |   layer_global_max_pooling_1d() %>%
269 |   layer_dense(1, activation = "sigmoid")
270 | model <- keras_model(inputs, outputs)
271 | model %>% compile(optimizer="rmsprop", loss="binary_crossentropy")
272 | 
273 | 
274 | 
275 | ## ---- eval = FALSE--------------------------------------------------------
276 | ## # Instance
277 | ## l <- as.list(obj)
278 | ## #----
279 | ## l_sum <- 0
280 | ## l_entries <- 0
281 | ## for (e in l) {
282 | ##   if (!is.null(e)) {
283 | ##     l_sum <- l_sum + e
284 | ##     l_entries <- l_entries + 1
285 | ##   }
286 | ## }
287 | ## avg <- l_sum / l_entries
288 | ## #-----
289 | ## print('avg:', avg)
290 | ##
291 | ##
292 | ## # Instance
293 | ## my_list <- get_data()
294 | ## ----
295 | ## total <- 0
296 | ## num_elems <- 0
297 | ## for (n in my_list) {
298 | ##   if (!is.null(n)) {
299 | ##     total <- total + e
300 | ##     num_elems <- num_elems + 1
301 | ##   }
302 | ## }
303 | ## mean <- total / num_elems
304 | ## ----
305 | ## update_mean(mean)
306 | ##
307 | ##
308 | ## # Shared abstraction
309 | ## compute_mean <- function(x) {
310 | ##   num_elems <- total <- 0
311 | ##   for (e in x)
312 | ##     if (!is.null(e)) {
313 | ##       total <- total + e
314 | ##       num_elems <- num_elems + 1
315 | ##     }
316 | ##   total / num_elems
317 | ## }
318 | 


--------------------------------------------------------------------------------
/chapter05_fundamentals-of-ml.R:
--------------------------------------------------------------------------------
  1 | library(keras3)
  2 | 
  3 | 
  4 | library(keras3)
  5 | 
  6 | .[.[train_images, train_labels], .] <- dataset_mnist()
  7 | train_images <- array_reshape(train_images / 255, c(60000, 28 * 28))
  8 | 
  9 | runif_array <- \(dim) array(runif(prod(dim)), dim)
 10 | 
 11 | noise_channels <- runif_array(dim(train_images))
 12 | train_images_with_noise_channels <- cbind(train_images, noise_channels)
 13 | 
 14 | zeros_channels <- array(0, dim(train_images))
 15 | train_images_with_zeros_channels <- cbind(train_images, zeros_channels)
 16 | 
 17 | 
 18 | get_model <- function() {
 19 |   model <- keras_model_sequential() |>
 20 |     layer_dense(512, activation = "relu") |>
 21 |     layer_dense(10, activation = "softmax")
 22 | 
 23 |   model |> compile(
 24 |     optimizer = "adam",
 25 |     loss = "sparse_categorical_crossentropy",
 26 |     metrics = c("accuracy")
 27 |   )
 28 | 
 29 |   model
 30 | }
 31 | 
 32 | model <- get_model()
 33 | history_noise <- model |> fit(
 34 |   train_images_with_noise_channels, train_labels,
 35 |   epochs = 10,
 36 |   batch_size = 128,
 37 |   validation_split = 0.2
 38 | )
 39 | 
 40 | model <- get_model()
 41 | history_zeros <- model |> fit(
 42 |   train_images_with_zeros_channels, train_labels,
 43 |   epochs = 10,
 44 |   batch_size = 128,
 45 |   validation_split = 0.2
 46 | )
 47 | 
 48 | 
 49 | plot(NULL,
 50 |      main = "Effect of Noise Channels on Validation Accuracy",
 51 |      xlab = "Epochs", xlim = c(1, history_noise$params$epochs),
 52 |      ylab = "Validation Accuracy", ylim = c(0.9, 1), las = 1)
 53 | lines(history_zeros$metrics$val_accuracy, lty = 1, type = "o")
 54 | lines(history_noise$metrics$val_accuracy, lty = 2, type = "o")
 55 | legend("bottomright", lty = 1:2,
 56 |        legend = c("Validation accuracy with zeros channels",
 57 |                   "Validation accuracy with noise channels"))
 58 | 
 59 | 
 60 | .[.[train_images, train_labels], .] <- dataset_mnist()
 61 | train_images <- array_reshape(train_images / 255,
 62 |                               c(60000, 28 * 28))
 63 | 
 64 | random_train_labels <- sample(train_labels)                                     # <1>
 65 | 
 66 | model <- keras_model_sequential() |>
 67 |   layer_dense(512, activation = "relu") |>
 68 |   layer_dense(10, activation = "softmax")
 69 | 
 70 | model |> compile(optimizer = "rmsprop",
 71 |                  loss = "sparse_categorical_crossentropy",
 72 |                  metrics = "accuracy")
 73 | 
 74 | history <- model |> fit(
 75 |   train_images, random_train_labels,
 76 |   epochs = 100, batch_size = 128,
 77 |   validation_split = 0.2
 78 | )
 79 | 
 80 | 
 81 | .[.[train_images, train_labels], .] <- dataset_mnist()
 82 | train_images <- array_reshape(train_images / 255,
 83 |                               c(60000, 28 * 28))
 84 | 
 85 | model <- keras_model_sequential() |>
 86 |   layer_dense(512, activation = "relu") |>
 87 |   layer_dense(10, activation = "softmax")
 88 | 
 89 | model |> compile(
 90 |   optimizer = optimizer_rmsprop(1),
 91 |   loss = "sparse_categorical_crossentropy",
 92 |   metrics = "accuracy"
 93 | )
 94 | 
 95 | history <- model |> fit(
 96 |   train_images, train_labels,
 97 |   epochs = 10, batch_size = 128,
 98 |   validation_split = 0.2
 99 | )
100 | 
101 | 
102 | model <- keras_model_sequential() |>
103 |   layer_dense(512, activation = "relu") |>
104 |   layer_dense(10, activation = "softmax")
105 | 
106 | model |> compile(
107 |   optimizer = optimizer_rmsprop(1e-2),
108 |   loss = "sparse_categorical_crossentropy",
109 |   metrics = "accuracy"
110 | )
111 | 
112 | history <- model |> fit(
113 |   train_images, train_labels,
114 |   epochs = 10, batch_size = 128,
115 |   validation_split = 0.2
116 | )
117 | 
118 | 
119 | model <- keras_model_sequential() |>
120 |   layer_dense(10, activation = "softmax")
121 | 
122 | model |> compile(
123 |   optimizer = "rmsprop",
124 |   loss = "sparse_categorical_crossentropy",
125 |   metrics = "accuracy"
126 | )
127 | 
128 | history_small_model <- model |> fit(
129 |   train_images, train_labels,
130 |   epochs = 20,
131 |   batch_size = 128,
132 |   validation_split = 0.2
133 | )
134 | 
135 | 
136 | plot(history_small_model$metrics$val_loss, type = 'o',
137 |      main = "Effect of Insufficient Model Capacity on Validation Loss",
138 |      xlab = "Epochs", ylab = "Validation Loss")
139 | 
140 | 
141 | model <- keras_model_sequential() |>
142 |     layer_dense(128, activation="relu") |>
143 |     layer_dense(128, activation="relu") |>
144 |     layer_dense(10, activation="softmax")
145 | 
146 | model |> compile(
147 |   optimizer="rmsprop",
148 |   loss="sparse_categorical_crossentropy",
149 |   metrics="accuracy"
150 | )
151 | 
152 | history_large_model <- model |> fit(
153 |   train_images, train_labels,
154 |   epochs = 20,
155 |   batch_size = 128,
156 |   validation_split = 0.2
157 | )
158 | 
159 | 
160 | plot(history_large_model$metrics$val_loss, type = 'o',
161 |      main = "Validation Loss for a Model with Appropriate Capacity",
162 |      xlab = "Epochs", ylab = "Validation Loss")
163 | 
164 | 
165 | model <- keras_model_sequential() |>
166 |   layer_dense(2048, activation = "relu") |>
167 |   layer_dense(2048, activation = "relu") |>
168 |   layer_dense(2048, activation = "relu") |>
169 |   layer_dense(10, activation = "softmax")
170 | 
171 | model |> compile(
172 |   optimizer = "rmsprop",
173 |   loss = "sparse_categorical_crossentropy",
174 |   metrics = "accuracy"
175 | )
176 | 
177 | history_very_large_model <- model |> fit(
178 |   train_images, train_labels,
179 |   epochs = 20,
180 |   batch_size = 32,                                                          # <1>
181 |   validation_split = 0.2
182 | )
183 | 
184 | 
185 | plot(history_very_large_model$metrics$val_loss, type = 'o',
186 |      main = "Validation Loss for a Model with Too Much Capacity",
187 |      xlab = "Epochs", ylab = "Validation Loss")
188 | 
189 | 
190 | .[.[train_data, train_labels], .] <- dataset_imdb(num_words = 10000)
191 | 
192 | vectorize_sequences <- function(sequences, dimension = 10000) {
193 |   results <- matrix(0, nrow = length(sequences), ncol = dimension)
194 |   for (i in seq_along(sequences))
195 |     results[i, sequences[[i]]] <- 1
196 |   results
197 | }
198 | 
199 | train_data <- vectorize_sequences(train_data)
200 | 
201 | model <- keras_model_sequential() |>
202 |     layer_dense(16, activation="relu") |>
203 |     layer_dense(16, activation="relu") |>
204 |     layer_dense(1, activation="sigmoid")
205 | 
206 | model |> compile(
207 |   optimizer = "rmsprop",
208 |   loss = "binary_crossentropy",
209 |   metrics = "accuracy"
210 | )
211 | 
212 | history_original <- model |> fit(
213 |   train_data, train_labels,
214 |   epochs = 20, batch_size = 512, validation_split = 0.4
215 | )
216 | 
217 | 
218 | model <- keras_model_sequential() |>
219 |   layer_dense(4, activation = "relu") |>
220 |   layer_dense(4, activation = "relu") |>
221 |   layer_dense(1, activation = "sigmoid")
222 | 
223 | model |> compile(
224 |   optimizer = "rmsprop",
225 |   loss = "binary_crossentropy",
226 |   metrics = "accuracy"
227 | )
228 | 
229 | history_smaller_model <- model |> fit(
230 |   train_data, train_labels,
231 |   epochs = 20, batch_size = 512, validation_split = 0.4
232 | )
233 | 
234 | 
235 | plot(
236 |   NULL,
237 |   main = "Original Model vs. Smaller Model on IMDB Review Classification",
238 |   xlab = "Epochs",
239 |   xlim = c(1, history_original$params$epochs),
240 |   ylab = "Validation Loss",
241 |   ylim = extendrange(c(history_original$metrics$val_loss,
242 |                        history_smaller_model$metrics$val_loss)),
243 |   panel.first = abline(v = 1:history_original$params$epochs,
244 |                        lty = "dotted", col = "lightgrey")
245 | )
246 | 
247 | lines(history_original     $metrics$val_loss, lty = 2)
248 | lines(history_smaller_model$metrics$val_loss, lty = 1)
249 | legend("topleft", lty = 2:1,
250 |        legend = c("Validation loss of original model",
251 |                   "Validation loss of smaller model"))
252 | 
253 | 
254 | model <- keras_model_sequential() |>
255 |   layer_dense(512, activation = "relu") |>
256 |   layer_dense(512, activation = "relu") |>
257 |   layer_dense(1, activation = "sigmoid")
258 | 
259 | model |> compile(
260 |   optimizer = "rmsprop",
261 |   loss = "binary_crossentropy",
262 |   metrics = "accuracy"
263 | )
264 | 
265 | history_larger_model <- model |> fit(
266 |   train_data, train_labels,
267 |   epochs = 20, batch_size = 512, validation_split = 0.4
268 | )
269 | 
270 | 
271 | plot(
272 |   NULL,
273 |   main = "Original Model vs. Much Larger Model on IMDB Review Classification",
274 |   xlab = "Epochs", xlim = c(1, history_original$params$epochs),
275 |   ylab = "Validation Loss",
276 |   ylim = range(c(history_original$metrics$val_loss,
277 |                  history_larger_model$metrics$val_loss)),
278 |   panel.first = abline(v = 1:history_original$params$epochs,
279 |                        lty = "dotted", col = "lightgrey")
280 | )
281 | lines(history_original    $metrics$val_loss, lty = 2)
282 | lines(history_larger_model$metrics$val_loss, lty = 1)
283 | legend("topleft", lty = 2:1,
284 |        legend = c("Validation loss of original model",
285 |                   "Validation loss of larger model"))
286 | 
287 | 
288 | model <- keras_model_sequential() |>
289 |   layer_dense(16, activation = "relu",
290 |               kernel_regularizer = regularizer_l2(0.002)) |>
291 |   layer_dense(16, activation = "relu",
292 |               kernel_regularizer = regularizer_l2(0.002)) |>
293 |   layer_dense(1, activation = "sigmoid")
294 | 
295 | model |> compile(
296 |   optimizer = "rmsprop",
297 |   loss = "binary_crossentropy",
298 |   metrics = "accuracy"
299 | )
300 | 
301 | history_l2_reg <- model |> fit(
302 |   train_data, train_labels,
303 |   epochs = 20, batch_size = 512, validation_split = 0.4
304 | )
305 | 
306 | 
307 | plot(NULL,
308 |      main = "Effect of L2 Weight Regularization on Validation Loss",
309 |      xlab = "Epochs", xlim = c(1, history_original$params$epochs),
310 |      ylab = "Validation Loss",
311 |      ylim = range(c(history_original$metrics$val_loss,
312 |                     history_l2_reg  $metrics$val_loss)),
313 |      panel.first = abline(v = 1:history_original$params$epochs,
314 |                           lty = "dotted", col = "lightgrey"))
315 | lines(history_original$metrics$val_loss, lty = 2)
316 | lines(history_l2_reg  $metrics$val_loss, lty = 1)
317 | legend("topleft", lty = 2:1,
318 |        legend = c("Validation loss of original model",
319 |                   "Validation loss of L2-regularized model"))
320 | 
321 | 
322 | regularizer_l1(0.001)                                                           # <1>
323 | regularizer_l1_l2(l1 = 0.001, l2 = 0.001)                                       # <2>
324 | 
325 | 
326 | model <- keras_model_sequential() |>
327 |   layer_dense(16, activation = "relu") |>
328 |   layer_dropout(0.5) |>
329 |   layer_dense(16, activation = "relu") |>
330 |   layer_dropout(0.5) |>
331 |   layer_dense(1, activation = "sigmoid")
332 | 
333 | model |> compile(
334 |   optimizer = "rmsprop",
335 |   loss = "binary_crossentropy",
336 |   metrics = "accuracy"
337 | )
338 | 
339 | history_dropout <- model |> fit(
340 |   train_data, train_labels,
341 |   epochs = 20, batch_size = 512,
342 |   validation_split = 0.4
343 | )
344 | 
345 | 
346 | plot(NULL,
347 |      main = "Effect of Dropout on Validation Loss",
348 |      xlab = "Epochs", xlim = c(1, history_original$params$epochs),
349 |      ylab = "Validation Loss",
350 |      ylim = range(c(history_original$metrics$val_loss,
351 |                     history_dropout $metrics$val_loss)),
352 |      panel.first = abline(v = 1:history_original$params$epochs,
353 |                           lty = "dotted", col = "lightgrey"))
354 | lines(history_original$metrics$val_loss, lty = 2)
355 | lines(history_dropout $metrics$val_loss, lty = 1)
356 | legend("topleft", lty = 2:1,
357 |        legend = c("Validation loss of original model",
358 |                   "Validation loss of dropout-regularized model"))
359 | 
360 | 


--------------------------------------------------------------------------------
/chapter13_timeseries-forecasting.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | library(tensorflow, exclude = c("shape", "set_random_seed"))
  5 | library(tfdatasets, exclude = "shape")
  6 | library(dplyr, warn.conflicts = FALSE)
  7 | library(keras3)
  8 | use_backend("jax")
  9 | 
 10 | 
 11 | "https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip" |>
 12 |   get_file(origin = _) |>
 13 |   zip::unzip("jena_climate_2009_2016.csv")
 14 | 
 15 | 
 16 | writeLines(readLines("jena_climate_2009_2016.csv", 3))
 17 | 
 18 | 
 19 | withr::with_package("readr", {
 20 |   full_df <- read_csv(
 21 |     "jena_climate_2009_2016.csv",
 22 |     locale = locale(tz = "Etc/GMT+1"),
 23 |     col_types = cols(
 24 |       `Date Time` = col_datetime("%d.%m.%Y %H:%M:%S"),
 25 |       .default = col_double()
 26 |     )
 27 |   )
 28 | })
 29 | 
 30 | 
 31 | tibble::glimpse(full_df)
 32 | 
 33 | 
 34 | plot(`T (degC)` ~ `Date Time`, data = full_df, pch = 20, cex = .3)
 35 | 
 36 | 
 37 | plot(`T (degC)` ~ `Date Time`, data = full_df[1:1440, ])
 38 | 
 39 | 
 40 | num_train_samples <- round(nrow(full_df) * .5)
 41 | num_val_samples <- round(nrow(full_df) * 0.25)
 42 | num_test_samples <- nrow(full_df) - num_train_samples - num_val_samples
 43 | 
 44 | train_df <- full_df[seq(num_train_samples), ]
 45 | 
 46 | val_df <- full_df[seq(from = nrow(train_df) + 1,
 47 |                       length.out = num_val_samples), ]
 48 | 
 49 | test_df <- full_df[seq(to = nrow(full_df),
 50 |                        length.out = num_test_samples), ]
 51 | 
 52 | cat("num_train_samples:", nrow(train_df), "\n")
 53 | cat("num_val_samples:", nrow(val_df), "\n")
 54 | cat("num_test_samples:", nrow(test_df), "\n")
 55 | 
 56 | 
 57 | input_data_colnames <- names(full_df) |> setdiff(c("Date Time"))
 58 | normalization_values <- train_df[input_data_colnames] |>
 59 |   lapply(\(col) list(mean = mean(col), sd = sd(col)))
 60 | 
 61 | str(normalization_values)
 62 | 
 63 | normalize_input_data <- function(df) {
 64 |   purrr::map2(df, normalization_values[names(df)], \(col, nv) {
 65 |     (col - nv$mean) / nv$sd
 66 |   }) |> as_tibble()
 67 | }
 68 | 
 69 | 
 70 | int_sequence <- seq(10)
 71 | dummy_dataset <- timeseries_dataset_from_array(
 72 |   data = head(int_sequence, -3),
 73 |   targets = tail(int_sequence, -3),
 74 |   sequence_length = 3,
 75 |   batch_size = 2
 76 | )
 77 | 
 78 | dummy_dataset_iterator <- as_array_iterator(dummy_dataset)
 79 | 
 80 | repeat {
 81 |   batch <- iter_next(dummy_dataset_iterator)
 82 |   if (is.null(batch))
 83 |     break
 84 |   .[inputs, targets] <- batch
 85 |   for (r in 1:nrow(inputs))
 86 |     cat(sprintf("input: [ %s ]  target: %s\n",
 87 |                 paste(inputs[r, ], collapse = " "), targets[r]))
 88 |   cat(strrep("-", 27), "\n")
 89 | }
 90 | 
 91 | 
 92 | sampling_rate <- 6
 93 | sequence_length <- 120
 94 | delay <- sampling_rate * (sequence_length + 24 - 1)
 95 | batch_size <- 256
 96 | 
 97 | df_to_inputs_and_targets <- function(df) {
 98 |   inputs <- df[input_data_colnames] |>
 99 |     normalize_input_data() |>
100 |     as.matrix()
101 | 
102 |   targets <- as.array(df$`T (degC)`)
103 | 
104 |   list(
105 |     head(inputs, -delay),
106 |     tail(targets, -delay)
107 |   )
108 | }
109 | 
110 | make_dataset <- function(df) {
111 |   .[inputs, targets] <- df_to_inputs_and_targets(df)
112 | 
113 |   timeseries_dataset_from_array(
114 |     inputs, targets,
115 |     sampling_rate = sampling_rate,
116 |     sequence_length = sequence_length,
117 |     shuffle = TRUE,
118 |     batch_size = batch_size
119 |   )
120 | }
121 | 
122 | train_dataset <- make_dataset(train_df)
123 | val_dataset <- make_dataset(val_df)
124 | test_dataset <- make_dataset(test_df)
125 | 
126 | 
127 | .[samples, targets] <- iter_next(as_iterator(train_dataset))
128 | cat("samples shape: ", format(samples$shape), "\n",
129 |     "targets shape: ", format(targets$shape), "\n", sep = "")
130 | 
131 | 
132 | evaluate_naive_method <- function(dataset) {
133 | 
134 |   .[temp_sd = sd, temp_mean = mean] <- normalization_values$`T (degC)`
135 |   unnormalize_temperature <- function(x) {
136 |     (x * temp_sd) + temp_mean
137 |   }
138 | 
139 |   temp_col_idx <- match("T (degC)", input_data_colnames)
140 | 
141 |   reduction <- dataset |>
142 |     dataset_unbatch() |>
143 |     dataset_map(function(samples, target) {
144 |       last_temp_in_input <- samples@r[-1, temp_col_idx]                         # <1>
145 |       pred <- unnormalize_temperature(last_temp_in_input)                       # <2>
146 |       abs(pred - target)
147 |     }) |>
148 |     dataset_reduce(
149 |       initial_state = list(total_samples_seen = 0L,
150 |                            total_abs_error = 0),
151 |       reduce_func = function(state, element) {
152 |         `add<-` <- `+`
153 |         add(state$total_samples_seen) <- 1L
154 |         add(state$total_abs_error) <- element
155 |         state
156 |       }
157 |     ) |>
158 |     lapply(as.numeric)                                                          # <3>
159 | 
160 |   mae <- with(reduction, total_abs_error / total_samples_seen)                  # <4>
161 |   mae
162 | }
163 | 
164 | sprintf("Validation MAE: %.2f", evaluate_naive_method(val_dataset))
165 | sprintf("Test MAE: %.2f", evaluate_naive_method(test_dataset))
166 | 
167 | 
168 | ncol_input_data <- length(input_data_colnames)
169 | 
170 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data))
171 | outputs <- inputs |>
172 |   layer_flatten() |>
173 |   layer_dense(16, activation="relu") |>
174 |   layer_dense(1)
175 | 
176 | model <- keras_model(inputs, outputs)
177 | 
178 | callbacks = list(
179 |   callback_model_checkpoint("jena_dense.keras", save_best_only = TRUE)          # <1>
180 | )
181 | 
182 | model |> compile(
183 |   optimizer = "rmsprop",
184 |   loss = "mse",
185 |   metrics = "mae"
186 | )
187 | 
188 | history <- model |> fit(
189 |   train_dataset,
190 |   epochs = 10,
191 |   validation_data = val_dataset,
192 |   callbacks = callbacks
193 | )
194 | 
195 | model <- load_model("jena_dense.keras")                                         # <2>
196 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)["mae"])
197 | 
198 | 
199 | plot(history, metrics = "mae")
200 | 
201 | 
202 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data))
203 | outputs <- inputs |>
204 |   layer_conv_1d(8, 24, activation = "relu") |>
205 |   layer_max_pooling_1d(2) |>
206 |   layer_conv_1d(8, 12, activation = "relu") |>
207 |   layer_max_pooling_1d(2) |>
208 |   layer_conv_1d(8, 6, activation = "relu") |>
209 |   layer_global_average_pooling_1d() |>
210 |   layer_dense(1)
211 | model <- keras_model(inputs, outputs)
212 | 
213 | callbacks <- list(
214 |   callback_model_checkpoint("jena_conv.keras", save_best_only = TRUE)
215 | )
216 | 
217 | model |> compile(
218 |   optimizer = "rmsprop",
219 |   loss = "mse",
220 |   metrics = "mae"
221 | )
222 | 
223 | 
224 | history <- model |> fit(
225 |   train_dataset,
226 |   epochs = 10,
227 |   validation_data = val_dataset,
228 |   callbacks = callbacks
229 | )
230 | 
231 | 
232 | model <- load_model("jena_conv.keras")
233 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)[["mae"]])
234 | 
235 | 
236 | plot(history, metrics = "mae")
237 | 
238 | 
239 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data))
240 | outputs <- inputs |>
241 |   layer_lstm(16) |>
242 |   layer_dense(1)
243 | model <- keras_model(inputs, outputs)
244 | 
245 | callbacks <- list(
246 |   callback_model_checkpoint("jena_lstm.keras", save_best_only = TRUE)
247 | )
248 | 
249 | compile(model, optimizer = "rmsprop", loss = "mse", metrics = "mae")
250 | 
251 | 
252 | history <- model |> fit(
253 |   train_dataset,
254 |   epochs = 10,
255 |   validation_data = val_dataset,
256 |   callbacks = callbacks
257 | )
258 | 
259 | 
260 | local({
261 |   p <- plot(history, metrics = "mae")
262 |   p$data %<>% .[.$epoch > 1, ]
263 |   print(p)
264 | })
265 | 
266 | 
267 | model <- load_model("jena_dense.keras")
268 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)[["mae"]])
269 | 
270 | 
271 | runif_array <- function(dim) array(runif(prod(dim)), dim)
272 | 
273 | timesteps <- 100                                                                # <1>
274 | input_features <- 32                                                            # <2>
275 | output_features <- 64                                                           # <3>
276 | 
277 | inputs <- runif_array(c(timesteps, input_features))                             # <4>
278 | state_t <- array(0, dim = output_features)                                      # <5>
279 | W <- runif_array(c(output_features, input_features))                            # <6>
280 | U <- runif_array(c(output_features, output_features))                           # <6>
281 | b <- runif_array(c(output_features, 1))                                         # <6>
282 | outputs <- array(0, dim = c(timesteps, output_features))
283 | 
284 | for(ts in 1:timesteps) {
285 |   input_t <- inputs[ts, ]                                                       # <7>
286 |   output_t <- tanh( (W %*% input_t) + (U %*% state_t) + b )                     # <8>
287 |   outputs[ts, ] <- state_t <- output_t                                          # <9>
288 | }
289 | 
290 | final_output_sequence <- outputs                                                # <10>
291 | 
292 | 
293 | num_features <- 14
294 | inputs <- keras_input(shape = c(NA, num_features))
295 | outputs <- inputs |> layer_simple_rnn(16)
296 | 
297 | 
298 | num_features <- 14
299 | steps <- 120
300 | inputs <- keras_input(shape = c(steps, num_features))
301 | outputs <- inputs |> layer_simple_rnn(16, return_sequences = FALSE)             # <1>
302 | op_shape(outputs)
303 | 
304 | 
305 | num_features <- 14
306 | steps <- 120
307 | inputs <- keras_input(shape = c(steps, num_features))
308 | outputs <- inputs |> layer_simple_rnn(16, return_sequences = TRUE)              # <1>
309 | op_shape(outputs)
310 | 
311 | 
312 | inputs <- keras_input(shape = c(steps, num_features))
313 | outputs <- inputs |>
314 |   layer_simple_rnn(16, return_sequences = TRUE) |>
315 |   layer_simple_rnn(16, return_sequences = TRUE) |>
316 |   layer_simple_rnn(16)
317 | 
318 | 
319 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data))
320 | outputs <- inputs |>
321 |   layer_lstm(32, recurrent_dropout = 0.25) |>
322 |   layer_dropout(0.5) |>                                                         # <1>
323 |   layer_dense(1)
324 | model <- keras_model(inputs, outputs)
325 | 
326 | callbacks = list(
327 |   callback_model_checkpoint("jena_lstm_dropout.keras", save_best_only = TRUE)
328 | )
329 | 
330 | compile(model, optimizer = "rmsprop", loss = "mse", metrics = "mae")
331 | 
332 | 
333 | history <- model |> fit(
334 |   train_dataset,
335 |   epochs = 50,
336 |   validation_data = val_dataset,
337 |   callbacks = callbacks
338 | )
339 | 
340 | 
341 | local({
342 |   p <- plot(history, metrics = "mae")
343 |   p$data %<>% .[.$epoch > 1, ]
344 |   print(p)
345 | })
346 | 
347 | 
348 | inputs <- keras_input(shape = c(sequence_length, num_features))                 # <1>
349 | x <- inputs |> layer_lstm(32, recurrent_dropout = 0.2, unroll = TRUE)           # <2>
350 | 
351 | 
352 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data))
353 | outputs <- inputs |>
354 |   layer_gru(32, recurrent_dropout = 0.5, return_sequences = TRUE) |>
355 |   layer_gru(32, recurrent_dropout = 0.5) |>
356 |   layer_dropout(0.5) |>
357 |   layer_dense(1)
358 | model <- keras_model(inputs, outputs)
359 | 
360 | callbacks <- list(
361 |   callback_model_checkpoint(
362 |     "jena_stacked_gru_dropout.keras", save_best_only = TRUE
363 |   )
364 | )
365 | 
366 | model |> compile(optimizer = "rmsprop", loss = "mse", metrics = "mae")
367 | 
368 | 
369 | history <- model |> fit(
370 |   train_dataset,
371 |   epochs = 50,
372 |   validation_data = val_dataset,
373 |   callbacks = callbacks
374 | )
375 | 
376 | 
377 | plot(history)
378 | 
379 | 
380 | model <- load_model("jena_stacked_gru_dropout.keras")
381 | sprintf("Test MAE: %.2f", evaluate(model, test_dataset)[["mae"]])
382 | 
383 | 
384 | inputs <- keras_input(shape = c(sequence_length, ncol_input_data))
385 | outputs <- inputs |>
386 |   layer_bidirectional(layer_lstm(units = 16)) |>
387 |   layer_dense(1)
388 | model <- keras_model(inputs, outputs)
389 | 
390 | model |> compile(optimizer = "rmsprop", loss = "mse", metrics = "mae")
391 | 
392 | history <- model |> fit(
393 |   train_dataset,
394 |   epochs = 10,
395 |   validation_data = val_dataset
396 | )
397 | 
398 | plot(history)
399 | 
400 | 
401 | 
402 | 


--------------------------------------------------------------------------------
/chapter08_image-classification.R:
--------------------------------------------------------------------------------
  1 | library(keras3)
  2 | 
  3 | 
  4 | inputs <- keras_input(shape = c(28, 28, 1))
  5 | outputs <- inputs |>
  6 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |>
  7 |   layer_max_pooling_2d(pool_size = 2) |>
  8 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |>
  9 |   layer_max_pooling_2d(pool_size = 2) |>
 10 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |>
 11 |   layer_global_average_pooling_2d() |>
 12 |   layer_dense(10, activation = "softmax")
 13 | model <- keras_model(inputs = inputs, outputs = outputs)
 14 | 
 15 | 
 16 | model
 17 | 
 18 | 
 19 | .[.[train_images, train_labels],
 20 |   .[test_images, test_labels]] <- dataset_mnist()
 21 | train_images <- array_reshape(train_images, c(60000, 28, 28, 1)) / 255
 22 | test_images  <- array_reshape(test_images, c(10000, 28, 28, 1)) / 255
 23 | 
 24 | model |> compile(
 25 |   optimizer = "rmsprop",
 26 |   loss = "sparse_categorical_crossentropy",
 27 |   metrics = c("accuracy")
 28 | )
 29 | model |> fit(train_images, train_labels, epochs = 5, batch_size = 64)
 30 | 
 31 | 
 32 | result <- evaluate(model, test_images, test_labels)
 33 | cat("Test accuracy:", result$accuracy, "\n")
 34 | 
 35 | 
 36 | inputs <- keras_input(shape = c(28, 28, 1))
 37 | outputs <- inputs |>
 38 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |>
 39 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |>
 40 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |>
 41 |   layer_global_average_pooling_2d() |>
 42 |   layer_dense(10, activation = "softmax")
 43 | 
 44 | model_no_max_pool <- keras_model(inputs = inputs, outputs = outputs)
 45 | 
 46 | 
 47 | model_no_max_pool
 48 | 
 49 | 
 50 | unlink("dogs-vs-cats", recursive = TRUE)
 51 | unlink("dogs_vs_cats_small", recursive = TRUE)
 52 | 
 53 | 
 54 | zip::unzip('dogs-vs-cats.zip', exdir = "dogs-vs-cats", files = "train.zip")
 55 | 
 56 | 
 57 | zip::unzip("dogs-vs-cats/train.zip", exdir = "dogs-vs-cats")
 58 | 
 59 | 
 60 | library(fs)
 61 | original_dir <- path("dogs-vs-cats/train")                                      # <1>
 62 | new_base_dir <- path("dogs_vs_cats_small")                                      # <2>
 63 | 
 64 | make_subset <- function(subset_name, start_index, end_index) {                  # <3>
 65 |   for (category in c("dog", "cat")) {
 66 |     file_name <- glue::glue("{category}.{ start_index:end_index }.jpg")
 67 |     dir_create(new_base_dir / subset_name / category)
 68 |     file_copy(original_dir / file_name,
 69 |               new_base_dir / subset_name / category / file_name)
 70 |   }
 71 | }
 72 | 
 73 | make_subset("train", start_index = 1, end_index = 1000)                         # <4>
 74 | make_subset("validation", start_index = 1001, end_index = 1500)                 # <5>
 75 | make_subset("test", start_index = 1501, end_index = 2500)                       # <6>
 76 | 
 77 | 
 78 | inputs <- keras_input(shape = c(180, 180, 3))                                   # <1>
 79 | outputs <- inputs |>
 80 |   layer_rescaling(1 / 255) |>                                                   # <2>
 81 |   layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
 82 |   layer_max_pooling_2d(pool_size = 2) |>
 83 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |>
 84 |   layer_max_pooling_2d(pool_size = 2) |>
 85 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |>
 86 |   layer_max_pooling_2d(pool_size = 2) |>
 87 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |>
 88 |   layer_max_pooling_2d(pool_size = 2) |>
 89 |   layer_conv_2d(filters = 512, kernel_size = 3, activation = "relu") |>
 90 |   layer_global_average_pooling_2d() |>                                          # <3>
 91 |   layer_dense(1, activation = "sigmoid")
 92 | model <- keras_model(inputs, outputs)
 93 | 
 94 | 
 95 | model
 96 | 
 97 | 
 98 | model |> compile(loss = "binary_crossentropy",
 99 |                  optimizer = "adam",
100 |                  metrics = "accuracy")
101 | 
102 | 
103 | image_size <- shape(180, 180)
104 | batch_size <- 32
105 | 
106 | train_dataset <-
107 |   image_dataset_from_directory(new_base_dir / "train",
108 |                                image_size = image_size,
109 |                                batch_size = batch_size)
110 | validation_dataset <-
111 |   image_dataset_from_directory(new_base_dir / "validation",
112 |                                image_size = image_size,
113 |                                batch_size = batch_size)
114 | test_dataset <-
115 |   image_dataset_from_directory(new_base_dir / "test",
116 |                                image_size = image_size,
117 |                                batch_size = batch_size)
118 | 
119 | 
120 | reticulate::import("tensorflow")$constant(1L)
121 | 
122 | 
123 | library(tfdatasets, exclude = c("shape"))
124 | 
125 | example_array <- array(seq(100*6), c(100, 6))
126 | head(example_array)
127 | dataset <- tensor_slices_dataset(example_array)                                 # <1>
128 | 
129 | 
130 | dataset_iterator <- as_iterator(dataset)
131 | for (i in 1:3) {
132 |   element <- iter_next(dataset_iterator)
133 |   print(element)
134 | }
135 | 
136 | 
137 | dataset_iterator <- as_iterator(dataset)
138 | for (i in 1:3) {
139 |   element <- iter_next(dataset_iterator)
140 |   print(element)
141 | }
142 | 
143 | 
144 | batched_dataset <- dataset |> dataset_batch(3)
145 | batched_dataset_iterator <- as_iterator(batched_dataset)
146 | for (i in 1:3) {
147 |   element <- iter_next(batched_dataset_iterator)
148 |   print(element)
149 | }
150 | 
151 | 
152 | reshaped_dataset <- dataset |>
153 |   dataset_map(\(element) tf$reshape(element, shape(2, 3)))
154 | 
155 | reshaped_dataset_iterator <- as_iterator(reshaped_dataset)
156 | for (i in 1:3) {
157 |   element <- iter_next(reshaped_dataset_iterator)
158 |   print(element)
159 | }
160 | 
161 | 
162 | .[data_batch, labels_batch] <- train_dataset |> as_iterator() |> iter_next()
163 | op_shape(data_batch)
164 | op_shape(labels_batch)
165 | 
166 | 
167 | callbacks <- list(
168 |   callback_model_checkpoint(
169 |     filepath = "convnet_from_scratch.keras",
170 |     save_best_only = TRUE,
171 |     monitor = "val_loss"
172 |   )
173 | )
174 | 
175 | history <- model |>
176 |   fit(
177 |     train_dataset,
178 |     epochs = 50,
179 |     validation_data = validation_dataset,
180 |     callbacks = callbacks
181 |   )
182 | 
183 | 
184 | plot(history)
185 | 
186 | 
187 | test_model <- load_model("convnet_from_scratch.keras")
188 | result <- evaluate(test_model, test_dataset)
189 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
190 | 
191 | 
192 | data_augmentation_layers <- list(                                               # <1>
193 |   layer_random_flip(, "horizontal"), #<
194 |   layer_random_rotation(, 0.1),
195 |   layer_random_zoom(, 0.2)
196 | )
197 | 
198 | data_augmentation <- function(images, targets) {                                # <2>
199 |   for (layer in data_augmentation_layers)
200 |     images <- layer(images)
201 |   list(images, targets)
202 | }
203 | 
204 | augmented_train_dataset <- train_dataset |>
205 |   dataset_map(data_augmentation, num_parallel_calls = 8) |>                     # <3>
206 |   dataset_prefetch()                                                            # <4>
207 | 
208 | 
209 | batch <- train_dataset |> as_iterator() |> iter_next()
210 | .[images, labels] <- batch
211 | 
212 | par(mfrow = c(3, 3), mar = rep(.5, 4))
213 | 
214 | image <- images[1, , , ]
215 | plot(as.raster(image, max = 255))                                               # <1>
216 | 
217 | for (i in 2:9) {
218 |   .[augmented_images, ..] <- data_augmentation(images, NULL)                    # <2>
219 |   augmented_image <- augmented_images@r[1] |> as.array()                        # <3>
220 |   plot(as.raster(augmented_image, max = 255))                                   # <3>
221 | }
222 | 
223 | 
224 | inputs <- keras_input(shape = c(180, 180, 3))
225 | outputs <- inputs |>
226 |   layer_rescaling(1 / 255) |>
227 |   layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
228 |   layer_max_pooling_2d(pool_size = 2) |>
229 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") |>
230 |   layer_max_pooling_2d(pool_size = 2) |>
231 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") |>
232 |   layer_max_pooling_2d(pool_size = 2) |>
233 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") |>
234 |   layer_max_pooling_2d(pool_size = 2) |>
235 |   layer_conv_2d(filters = 512, kernel_size = 3, activation = "relu") |>
236 |   layer_global_average_pooling_2d() |>
237 |   layer_dropout(0.25) |>
238 |   layer_dense(1, activation = "sigmoid")
239 | 
240 | model <- keras_model(inputs, outputs)
241 | 
242 | model |> compile(
243 |   loss = "binary_crossentropy",
244 |   optimizer = "adam",
245 |   metrics = "accuracy"
246 | )
247 | 
248 | 
249 | callbacks <- list(
250 |   callback_model_checkpoint(
251 |     filepath = "convnet_from_scratch_with_augmentation.keras",
252 |     save_best_only = TRUE,
253 |     monitor = "val_loss"
254 |   )
255 | )
256 | 
257 | history <- model |> fit(
258 |   augmented_train_dataset,
259 |   epochs = 100,                                                                 # <1>
260 |   validation_data = validation_dataset,
261 |   callbacks = callbacks
262 | )
263 | 
264 | 
265 | plot(history)
266 | 
267 | 
268 | test_model <- load_model("convnet_from_scratch_with_augmentation.keras")
269 | result <- evaluate(test_model, test_dataset)
270 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
271 | 
272 | 
273 | conv_base <- application_xception(
274 |   weights = "imagenet",
275 |   include_top = FALSE,
276 |   input_shape = c(180, 180, 3)
277 | )
278 | 
279 | 
280 | preprocess_inputs <- application_preprocess_inputs(conv_base)                   # <1>
281 | get_features_and_labels <- function(dataset) {
282 |   dataset |>
283 |     as_array_iterator() |>
284 |     iterate(function(batch) {
285 |       .[images, labels] <- batch
286 |       preprocessed_images <- preprocess_inputs(images)
287 |       features <- conv_base |> predict(preprocessed_images, verbose = 0)
288 |       tibble::tibble(features, labels)
289 |     }) |>
290 |     dplyr::bind_rows()
291 | }
292 | 
293 | .[train_features, train_labels] <- get_features_and_labels(train_dataset)
294 | .[val_features, val_labels] <- get_features_and_labels(validation_dataset)
295 | .[test_features, test_labels] <- get_features_and_labels(test_dataset)
296 | 
297 | 
298 | dim(train_features)
299 | 
300 | 
301 | inputs <- keras_input(shape = c(6, 6, 2048))
302 | outputs <- inputs |>
303 |   layer_global_average_pooling_2d() |>                                          # <1>
304 |   layer_dense(256, activation = "relu") |>
305 |   layer_dropout(0.25) |>
306 |   layer_dense(1, activation = "sigmoid")
307 | 
308 | model <- keras_model(inputs, outputs)
309 | 
310 | model |> compile(
311 |   loss = "binary_crossentropy",
312 |   optimizer = "adam",
313 |   metrics = "accuracy"
314 | )
315 | 
316 | callbacks <- list(
317 |   callback_model_checkpoint(
318 |     filepath = "feature_extraction.keras",
319 |     save_best_only = TRUE,
320 |     monitor = "val_loss"
321 |   )
322 | )
323 | 
324 | history <- model |> fit(
325 |   train_features, train_labels,
326 |   epochs = 2,
327 |   validation_data = list(val_features, val_labels),
328 |   callbacks = callbacks
329 | )
330 | 
331 | 
332 | plot(history)
333 | 
334 | 
335 | test_model <- load_model("feature_extraction.keras")
336 | result <- evaluate(test_model, test_features, test_labels)
337 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
338 | 
339 | 
340 | conv_base <- application_xception(
341 |   weights = "imagenet",
342 |   include_top = FALSE,
343 |   input_shape = c(180, 180, 3)
344 | )
345 | freeze_weights(conv_base)
346 | 
347 | 
348 | unfreeze_weights(conv_base)                                                     # <1>
349 | length(conv_base$trainable_weights)
350 | 
351 | freeze_weights(conv_base)                                                       # <2>
352 | length(conv_base$trainable_weights)
353 | 
354 | 
355 | inputs <- keras_input(shape=c(180, 180, 3))
356 | outputs <- inputs |>
357 |   preprocess_inputs() |>
358 |   conv_base() |>
359 |   layer_global_average_pooling_2d() |>
360 |   layer_dense(256) |>
361 |   layer_dropout(0.25) |>
362 |   layer_dense(1, activation = "sigmoid")
363 | model <- keras_model(inputs, outputs)
364 | model |> compile(
365 |   loss = "binary_crossentropy",
366 |   optimizer = "adam",
367 |   metrics = "accuracy"
368 | )
369 | 
370 | 
371 | callbacks <- list(
372 |   callback_model_checkpoint(
373 |     filepath = "feature_extraction_with_data_augmentation.keras",
374 |     save_best_only = TRUE,
375 |     monitor = "val_loss"
376 |   )
377 | )
378 | 
379 | history <- model |> fit(
380 |   augmented_train_dataset,
381 |   epochs = 30,
382 |   validation_data = validation_dataset,
383 |   callbacks = callbacks
384 | )
385 | 
386 | 
387 | plot(history)
388 | 
389 | 
390 | test_model <- load_model(
391 |   "feature_extraction_with_data_augmentation.keras")
392 | result <- evaluate(test_model, test_dataset)
393 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
394 | 
395 | 
396 | unfreeze_weights(conv_base, from = -4)
397 | conv_base
398 | 
399 | 
400 | model |> compile(
401 |   loss = "binary_crossentropy",
402 |   optimizer = optimizer_adam(learning_rate = 1e-5),
403 |   metrics = "accuracy"
404 | )
405 | 
406 | callbacks <- list(
407 |   callback_model_checkpoint(
408 |     filepath = "fine_tuning.keras",
409 |     save_best_only = TRUE,
410 |     monitor = "val_loss"
411 |   )
412 | )
413 | 
414 | history <- model |> fit(
415 |   augmented_train_dataset,
416 |   epochs = 30,
417 |   validation_data = validation_dataset,
418 |   callbacks = callbacks
419 | )
420 | 
421 | 
422 | model <- load_model("fine_tuning.keras")
423 | result <-  evaluate(model, test_dataset)
424 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
425 | 
426 | 


--------------------------------------------------------------------------------
/2e/ch04.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include = FALSE-----------------------------------------------
  2 | tensorflow::as_tensor(1)
  3 | 
  4 | 
  5 | ## -------------------------------------------------------------------------
  6 | library(keras3)
  7 | 
  8 | imdb <- dataset_imdb(num_words = 10000)
  9 | c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb
 10 | 
 11 | 
 12 | ## -------------------------------------------------------------------------
 13 | str(train_data)
 14 | str(train_labels)
 15 | 
 16 | 
 17 | ## -------------------------------------------------------------------------
 18 | max(sapply(train_data, max))
 19 | 
 20 | 
 21 | ## -------------------------------------------------------------------------
 22 | word_index <- dataset_imdb_word_index()
 23 | 
 24 | reverse_word_index <- names(word_index)
 25 | names(reverse_word_index) <- as.character(word_index)
 26 | 
 27 | decoded_words <- train_data[[1]] %>%
 28 |   sapply(function(i) {
 29 |     if (i > 3) reverse_word_index[[as.character(i - 3)]]
 30 |     else "?"
 31 |     })
 32 | decoded_review <- paste0(decoded_words, collapse = " ")
 33 | decoded_review
 34 | 
 35 | 
 36 | ## -------------------------------------------------------------------------
 37 | vectorize_sequences <- function(sequences, dimension = 10000) {
 38 |   results <- array(0, dim = c(length(sequences), dimension))
 39 |   for (i in seq_along(sequences)) {
 40 |     sequence <- sequences[[i]]
 41 |     for (j in sequence)
 42 |       results[i, j] <- 1
 43 |   }
 44 |   results
 45 | }
 46 | 
 47 | x_train <- vectorize_sequences(train_data)
 48 | x_test <- vectorize_sequences(test_data)
 49 | 
 50 | 
 51 | ## -------------------------------------------------------------------------
 52 | str(x_train)
 53 | 
 54 | 
 55 | ## -------------------------------------------------------------------------
 56 | y_train <- as.numeric(train_labels)
 57 | y_test <- as.numeric(test_labels)
 58 | 
 59 | 
 60 | ## -------------------------------------------------------------------------
 61 | model <- keras_model_sequential() %>%
 62 |   layer_dense(16, activation = "relu") %>%
 63 |   layer_dense(16, activation = "relu") %>%
 64 |   layer_dense(1, activation = "sigmoid")
 65 | 
 66 | 
 67 | ## ---- eval = FALSE--------------------------------------------------------
 68 | ## output <- relu(dot(input, W) + b)
 69 | 
 70 | 
 71 | ## ---- eval = FALSE--------------------------------------------------------
 72 | ## output <- dot(input, W) + b
 73 | 
 74 | 
 75 | ## -------------------------------------------------------------------------
 76 | model %>% compile(optimizer = "rmsprop",
 77 |                   loss = "binary_crossentropy",
 78 |                   metrics = "accuracy")
 79 | 
 80 | 
 81 | ## -------------------------------------------------------------------------
 82 | x_val <- x_train[seq(10000), ]
 83 | partial_x_train <- x_train[-seq(10000), ]
 84 | y_val <- y_train[seq(10000)]
 85 | partial_y_train <- y_train[-seq(10000)]
 86 | 
 87 | 
 88 | ## -------------------------------------------------------------------------
 89 | history <- model %>% fit(
 90 |   partial_x_train,
 91 |   partial_y_train,
 92 |   epochs = 20,
 93 |   batch_size = 512,
 94 |   validation_data = list(x_val, y_val)
 95 | )
 96 | 
 97 | 
 98 | ## -------------------------------------------------------------------------
 99 | str(history$metrics)
100 | 
101 | 
102 | ## ---- message=FALSE-------------------------------------------------------
103 | plot(history)
104 | 
105 | 
106 | ## -------------------------------------------------------------------------
107 | history_df <- as.data.frame(history)
108 | str(history_df)
109 | 
110 | 
111 | ## -------------------------------------------------------------------------
112 | model <- keras_model_sequential() %>%
113 |   layer_dense(16, activation = "relu") %>%
114 |   layer_dense(16, activation = "relu") %>%
115 |   layer_dense(1, activation = "sigmoid")
116 | 
117 | model %>% compile(optimizer = "rmsprop",
118 |                   loss = "binary_crossentropy",
119 |                   metrics = "accuracy")
120 | 
121 | model %>% fit(x_train, y_train, epochs = 4, batch_size = 512)
122 | results <- model %>% evaluate(x_test, y_test)
123 | 
124 | 
125 | ## -------------------------------------------------------------------------
126 | results
127 | 
128 | 
129 | ## ---- include = FALSE-----------------------------------------------------
130 | model %>% predict(x_test)
131 | 
132 | 
133 | ## -------------------------------------------------------------------------
134 | reuters <- dataset_reuters(num_words = 10000)
135 | c(c(train_data, train_labels), c(test_data, test_labels)) %<-% reuters
136 | 
137 | 
138 | ## -------------------------------------------------------------------------
139 | length(train_data)
140 | length(test_data)
141 | 
142 | 
143 | ## -------------------------------------------------------------------------
144 | str(train_data)
145 | 
146 | 
147 | ## -------------------------------------------------------------------------
148 | word_index <- dataset_reuters_word_index()
149 | 
150 | reverse_word_index <- names(word_index)
151 | names(reverse_word_index) <- as.character(word_index)
152 | 
153 | decoded_words <- train_data[[1]] %>%
154 |   sapply(function(i) {
155 |     if (i > 3) reverse_word_index[[as.character(i - 3)]]
156 |     else "?"
157 |     })
158 | decoded_review <- paste0(decoded_words, collapse = " ")
159 | decoded_review
160 | 
161 | 
162 | ## -------------------------------------------------------------------------
163 | str(train_labels)
164 | 
165 | 
166 | ## -------------------------------------------------------------------------
167 | vectorize_sequences <- function(sequences, dimension = 10000) {
168 |   results <- matrix(0, nrow = length(sequences), ncol = dimension)
169 |   for (i in seq_along(sequences))
170 |     results[i, sequences[[i]]] <- 1
171 |   results
172 | }
173 | 
174 | x_train <- vectorize_sequences(train_data)
175 | x_test <- vectorize_sequences(test_data)
176 | 
177 | 
178 | ## -------------------------------------------------------------------------
179 | to_one_hot <- function(labels, dimension = 46) {
180 |   results <- matrix(0, nrow = length(labels), ncol = dimension)
181 |   labels <- labels + 1
182 |   for(i in seq_along(labels)) {
183 |     j <- labels[[i]]
184 |     results[i, j] <- 1
185 |   }
186 |   results
187 | }
188 | y_train <- to_one_hot(train_labels)
189 | y_test <- to_one_hot(test_labels)
190 | 
191 | 
192 | ## -------------------------------------------------------------------------
193 | y_train <- to_categorical(train_labels)
194 | y_test <- to_categorical(test_labels)
195 | 
196 | 
197 | ## -------------------------------------------------------------------------
198 | model <- keras_model_sequential() %>%
199 |   layer_dense(64, activation = "relu") %>%
200 |   layer_dense(64, activation = "relu") %>%
201 |   layer_dense(46, activation = "softmax")
202 | 
203 | 
204 | ## -------------------------------------------------------------------------
205 | model %>% compile(optimizer = "rmsprop",
206 |                   loss = "categorical_crossentropy",
207 |                   metrics = "accuracy")
208 | 
209 | 
210 | ## -------------------------------------------------------------------------
211 | val_indices <- 1:1000
212 | 
213 | x_val <- x_train[val_indices, ]
214 | partial_x_train <- x_train[-val_indices, ]
215 | 
216 | y_val <- y_train[val_indices, ]
217 | partial_y_train <- y_train[-val_indices, ]
218 | 
219 | 
220 | ## -------------------------------------------------------------------------
221 | history <- model %>% fit(
222 |   partial_x_train,
223 |   partial_y_train,
224 |   epochs = 20,
225 |   batch_size = 512,
226 |   validation_data = list(x_val, y_val)
227 | )
228 | 
229 | 
230 | ## -------------------------------------------------------------------------
231 | plot(history)
232 | 
233 | 
234 | ## -------------------------------------------------------------------------
235 | model <- keras_model_sequential() %>%
236 |   layer_dense(64, activation = "relu") %>%
237 |   layer_dense(64, activation = "relu") %>%
238 |   layer_dense(46, activation = "softmax")
239 | 
240 | model %>% compile(optimizer = "rmsprop",
241 |                   loss = "categorical_crossentropy",
242 |                   metrics = "accuracy")
243 | 
244 | model %>% fit(x_train, y_train, epochs = 9, batch_size = 512)
245 | 
246 | results <- model %>% evaluate(x_test, y_test)
247 | 
248 | 
249 | ## -------------------------------------------------------------------------
250 | results
251 | 
252 | 
253 | ## -------------------------------------------------------------------------
254 | mean(test_labels == sample(test_labels))
255 | 
256 | 
257 | ## -------------------------------------------------------------------------
258 | predictions <- model %>% predict(x_test)
259 | 
260 | 
261 | ## -------------------------------------------------------------------------
262 | str(predictions)
263 | 
264 | 
265 | ## -------------------------------------------------------------------------
266 | sum(predictions[1, ])
267 | 
268 | 
269 | ## -------------------------------------------------------------------------
270 | which.max(predictions[1, ])
271 | 
272 | 
273 | ## -------------------------------------------------------------------------
274 | y_train <- train_labels
275 | y_test <- test_labels
276 | 
277 | 
278 | ## -------------------------------------------------------------------------
279 | model %>% compile(
280 |   optimizer = "rmsprop",
281 |   loss = "sparse_categorical_crossentropy",
282 |   metrics = "accuracy")
283 | 
284 | 
285 | ## -------------------------------------------------------------------------
286 | model <- keras_model_sequential() %>%
287 |   layer_dense(64, activation = "relu") %>%
288 |   layer_dense(4, activation = "relu") %>%
289 |   layer_dense(46, activation = "softmax")
290 | 
291 | model %>% compile(optimizer = "rmsprop",
292 |                   loss = "categorical_crossentropy",
293 |                   metrics = "accuracy")
294 | 
295 | model %>% fit(
296 |   partial_x_train,
297 |   partial_y_train,
298 |   epochs = 20,
299 |   batch_size = 128,
300 |   validation_data = list(x_val, y_val)
301 | )
302 | 
303 | 
304 | ## -------------------------------------------------------------------------
305 | boston <- dataset_boston_housing()
306 | c(c(train_data, train_targets), c(test_data, test_targets)) %<-% boston
307 | 
308 | 
309 | ## -------------------------------------------------------------------------
310 | str(train_data)
311 | str(test_data)
312 | 
313 | 
314 | ## -------------------------------------------------------------------------
315 | str(train_targets)
316 | 
317 | 
318 | ## -------------------------------------------------------------------------
319 | mean <- apply(train_data, 2, mean)
320 | sd <- apply(train_data, 2, sd)
321 | train_data <- scale(train_data, center = mean, scale = sd)
322 | test_data <- scale(test_data, center = mean, scale = sd)
323 | 
324 | 
325 | ## -------------------------------------------------------------------------
326 | build_model <- function() {
327 | 
328 |   model <- keras_model_sequential() %>%
329 |     layer_dense(64, activation = "relu") %>%
330 |     layer_dense(64, activation = "relu") %>%
331 |     layer_dense(1)
332 | 
333 |   model %>% compile(optimizer = "rmsprop",
334 |                     loss = "mse",
335 |                     metrics = "mae")
336 |   model
337 | }
338 | 
339 | 
340 | ## -------------------------------------------------------------------------
341 | k <- 4
342 | fold_id <- sample(rep(1:k, length.out = nrow(train_data)))
343 | num_epochs <- 100
344 | all_scores <- numeric()
345 | 
346 | for (i in 1:k) {
347 |   cat("Processing fold #", i, "\n")
348 | 
349 |   val_indices <- which(fold_id == i)
350 | 
351 |   val_data <- train_data[val_indices, ]
352 |   val_targets <- train_targets[val_indices]
353 | 
354 |   partial_train_data <- train_data[-val_indices, ]
355 |   partial_train_targets <- train_targets[-val_indices]
356 | 
357 |   model <- build_model()
358 | 
359 |   model %>% fit(
360 |     partial_train_data,
361 |     partial_train_targets,
362 |     epochs = num_epochs,
363 |     batch_size = 16,
364 |     verbose = 0
365 |   )
366 | 
367 |   results <- model %>% evaluate(val_data, val_targets, verbose = 0)
368 |   all_scores[[i]] <- results[['mae']]
369 | }
370 | 
371 | 
372 | ## -------------------------------------------------------------------------
373 | all_scores
374 | mean(all_scores)
375 | 
376 | 
377 | ## -------------------------------------------------------------------------
378 | num_epochs <- 500
379 | all_mae_histories <- list()
380 | for (i in 1:k) {
381 |   cat("Processing fold #", i, "\n")
382 | 
383 |   val_indices <- which(fold_id == i)
384 |   val_data <- train_data[val_indices, ]
385 |   val_targets <- train_targets[val_indices]
386 | 
387 |   partial_train_data <- train_data[-val_indices, ]
388 |   partial_train_targets <- train_targets[-val_indices]
389 | 
390 |   model <- build_model()
391 |   history <- model %>% fit(
392 |     partial_train_data, partial_train_targets,
393 |     validation_data = list(val_data, val_targets),
394 |     epochs = num_epochs, batch_size = 16, verbose = 0
395 |   )
396 |   mae_history <- history$metrics$val_mae
397 |   all_mae_histories[[i]] <- mae_history
398 | }
399 | 
400 | all_mae_histories <- do.call(cbind, all_mae_histories)
401 | 
402 | 
403 | ## -------------------------------------------------------------------------
404 | average_mae_history <- rowMeans(all_mae_histories)
405 | 
406 | 
407 | ## -------------------------------------------------------------------------
408 | plot(average_mae_history, xlab = "epoch", type = 'l')
409 | 
410 | 
411 | ## -------------------------------------------------------------------------
412 | truncated_mae_history <- average_mae_history[-(1:10)]
413 | plot(average_mae_history, xlab = "epoch", type = 'l',
414 |      ylim = range(truncated_mae_history))
415 | 
416 | 
417 | ## -------------------------------------------------------------------------
418 | model <- build_model()
419 | model %>% fit(train_data, train_targets,
420 |               epochs = 130, batch_size = 16, verbose = 0)
421 | result <- model %>% evaluate(test_data, test_targets)
422 | 
423 | 
424 | ## -------------------------------------------------------------------------
425 | result$mae
426 | 
427 | 
428 | ## -------------------------------------------------------------------------
429 | predictions <- model %>% predict(test_data)
430 | predictions[1, ]
431 | 


--------------------------------------------------------------------------------
/chapter02_mathematical-building-blocks.R:
--------------------------------------------------------------------------------
  1 | keras3::use_backend("tensorflow")
  2 | 
  3 | 
  4 | library(keras3)
  5 | .[.[train_images, train_labels], .[test_images, test_labels]] <- dataset_mnist()
  6 | 
  7 | 
  8 | str(train_images)
  9 | str(train_labels)
 10 | 
 11 | 
 12 | str(test_images)
 13 | str(test_labels)
 14 | 
 15 | 
 16 | model <- keras_model_sequential(layers = list(
 17 |   layer_dense(units = 512, activation = "relu"),
 18 |   layer_dense(units = 10, activation = "softmax")
 19 | ))
 20 | 
 21 | 
 22 | model |> compile(
 23 |   optimizer = "adam",
 24 |   loss = "sparse_categorical_crossentropy",
 25 |   metrics = "accuracy"
 26 | )
 27 | 
 28 | 
 29 | train_images <- array_reshape(train_images, c(60000, 28 * 28))
 30 | train_images <- train_images / 255
 31 | test_images <- array_reshape(test_images, c(10000, 28 * 28))
 32 | test_images <- test_images / 255
 33 | 
 34 | test_labels <- matrix(test_labels, ncol = 1)
 35 | train_labels <- matrix(train_labels, ncol = 1)
 36 | 
 37 | 
 38 | fit(model, train_images, train_labels, epochs = 5, batch_size = 128)
 39 | 
 40 | 
 41 | test_digits <- test_images[1:10, ]
 42 | predictions <- predict(model, test_digits)
 43 | str(predictions)
 44 | predictions[1, ]
 45 | 
 46 | 
 47 | which.max(predictions[1,])
 48 | predictions[1, 8]
 49 | 
 50 | 
 51 | test_labels[1]
 52 | 
 53 | 
 54 | metrics <- evaluate(model, test_images, test_labels)
 55 | metrics$accuracy
 56 | 
 57 | 
 58 | x <- as.array(c(12, 3, 6, 14, 7))
 59 | str(x)
 60 | dim(x)
 61 | 
 62 | 
 63 | x <- np_array(x)
 64 | x
 65 | x$shape
 66 | x$ndim
 67 | 
 68 | 
 69 | x <- array(seq(3 * 5), dim = c(3, 5))
 70 | x
 71 | dim(x)
 72 | 
 73 | np_array(x)
 74 | 
 75 | 
 76 | x <- array(seq(2 * 3 * 4), dim = c(2, 3, 4))
 77 | str(x)
 78 | dim(x)
 79 | length(dim(x))
 80 | 
 81 | 
 82 | x
 83 | np_array(x)
 84 | 
 85 | 
 86 | .[.[train_images, train_labels], .[test_images, test_labels]] <-
 87 |   dataset_mnist()                                                               # <1>
 88 | 
 89 | 
 90 | length(dim(train_images))
 91 | 
 92 | 
 93 | dim(train_images)
 94 | 
 95 | 
 96 | typeof(train_images)
 97 | 
 98 | 
 99 | knitr::knit_hooks$set(no_mar = function(before, ...) {
100 |   if (before) par(mar = c(0, 0, 0, 0))
101 | })
102 | 
103 | 
104 | digit <- train_images[5, , ]
105 | plot(as.raster(abs(255 - digit), max = 255))
106 | 
107 | 
108 | train_labels[5]
109 | 
110 | 
111 | x <- np_array(1:10)
112 | x@r[1]
113 | x@py[1]
114 | 
115 | 
116 | x@r[1:3]
117 | x@py[1:3]
118 | 
119 | 
120 | x@r[NA:5]
121 | x@r[6:NA]
122 | 
123 | 
124 | images <- np_array(train_images, "float32")
125 | my_slice <- images@r[10:99]                                                     # <1>
126 | my_slice <- images@r[10:99, , ]                                                 # <2>
127 | my_slice <- images@r[10:99, NA:NA, NA:NA]                                       # <3>
128 | my_slice <- images@r[10:99, 1:28, 1:28]                                         # <4>
129 | my_slice$shape
130 | 
131 | 
132 | my_slice <- images@r[, 15:NA, 15:NA]
133 | my_slice$shape
134 | 
135 | 
136 | images$shape                                                                    # <1>
137 | images@r[newaxis] |> _$shape                                                    # <2>
138 | images@r[, newaxis] |> _$shape                                                  # <3>
139 | images@r[.., newaxis] |> _$shape                                                # <4>
140 | images@r[.., newaxis, ] |> _$shape                                              # <5>
141 | 
142 | 
143 | x <- np_array(1:10)
144 | x@r[-1]
145 | x@r[-3:-1]
146 | x@r[-3:NA]
147 | 
148 | 
149 | my_slice <- images@r[, 8:-8, 8:-8]
150 | my_slice$shape
151 | 
152 | 
153 | batch <- train_images[1:128, , ]                                                # <1>
154 | 
155 | 
156 | batch <- train_images[129:256, , ]
157 | 
158 | 
159 | n <- 3
160 | ids <- seq(to = 128 * n, length.out = 128)
161 | batch <- train_images[ids, , ]
162 | 
163 | 
164 | naive_relu <- function(x) {
165 |   stopifnot(is.array(x), length(dim(x)) == 2)                                   # <1>
166 |   for (i in 1:nrow(x))
167 |     for (j in 1:ncol(x))
168 |       x[i, j] <- max(x[i, j], 0)                                                # <2>
169 |   x
170 | }
171 | 
172 | 
173 | naive_add <- function(x, y) {
174 |   stopifnot(is.array(x), is.array(y),
175 |             length(dim(x)) == 2, dim(x) == dim(y))                              # <1>
176 |   for (i in 1:nrow(x))
177 |     for (j in 1:ncol(x))
178 |       x[i, j]  <- x[i, j] + y[i, j]
179 |   x
180 | }
181 | 
182 | 
183 | runif_array <- function(dim) {
184 |   array(runif(prod(dim)), dim)
185 | }
186 | 
187 | x <- runif_array(c(20, 100))
188 | y <- runif_array(c(20, 100))
189 | 
190 | system.time({
191 |   for (i in seq_len(1000)) {
192 |     z <- x + y
193 |     z <- pmax(z, 0)
194 |   }
195 | })[["elapsed"]]
196 | 
197 | 
198 | x <- runif_array(c(20, 100))
199 | y <- runif_array(c(20, 100))
200 | 
201 | system.time({
202 |   for (i in seq_len(1000)) {
203 |     z <- naive_add(x, y)
204 |     z <- naive_relu(z)
205 |   }
206 | })[["elapsed"]]
207 | 
208 | 
209 | X <- runif_array(c(32, 10))                                                     # <1>
210 | y <- runif_array(c(10))                                                         # <2>
211 | 
212 | 
213 | dim(y) <- c(1, 10)
214 | str(y)                                                                          # <1>
215 | 
216 | 
217 | Y <- y[rep(1, 32), ]
218 | dim(Y)                                                                          # <1>
219 | 
220 | 
221 | naive_add_matrix_and_vector <- function(x, y) {
222 |   stopifnot(length(dim(x)) == 2,                                                # <1>
223 |             length(dim(y)) == 1,                                                # <2>
224 |             ncol(x) == dim(y))
225 |   for (i in seq_len(dim(x)[1]))
226 |     for (j in seq_len(dim(x)[2]))
227 |       x[i, j] <- x[i, j] + y[j]
228 |   x
229 | }
230 | 
231 | 
232 | x <- np_array(runif_array(c(64, 3, 32, 10)))                                    # <1>
233 | y <- np_array(runif_array(c(32, 10)))                                           # <2>
234 | z <- x + y                                                                      # <3>
235 | 
236 | 
237 | x <- np_array(runif_array(32))
238 | y <- np_array(runif_array(32))
239 | 
240 | z <- x %*% y                                                                    # <1>
241 | 
242 | np <- reticulate::import("numpy", convert = FALSE)
243 | z <- np$matmul(x, y)                                                            # <2>
244 | 
245 | 
246 | naive_vector_dot <- function(x, y) {
247 |   stopifnot(length(dim(x)) == 1,                                                # <1>
248 |             length(dim(y)) == 1,                                                # <1>
249 |             dim(x) == dim(y)) #                                                 # <1>
250 |   z <- 0
251 |   for (i in seq_along(x))
252 |     z <- z + x[i] * y[i]
253 |   z
254 | }
255 | 
256 | 
257 | naive_matrix_vector_dot <- function(x, y) {
258 |   stopifnot(length(dim(x)) == 2,                                                # <1>
259 |             length(dim(y)) == 1,                                                # <2>
260 |             nrow(x) == dim(y))                                                  # <3>
261 |   z <- array(0, dim = nrow(x))                                                  # <4>
262 |   for (i in 1:nrow(x))
263 |     for (j in 1:ncol(x))
264 |       z[i] <- z[i] + x[i, j] * y[j]
265 |   z
266 | }
267 | 
268 | 
269 | naive_matrix_vector_dot <- function(x, y) {
270 |   z <- array(0, dim = c(nrow(x)))
271 |   for (i in 1:nrow(x))
272 |     z[i] <- naive_vector_dot(x[i, ], y)
273 |   z
274 | }
275 | 
276 | 
277 | naive_matrix_dot <- function(x, y) {
278 |   stopifnot(length(dim(x)) == 2,                                                # <1>
279 |             length(dim(y)) == 2,                                                # <1>
280 |             ncol(x) == nrow(y))                                                 # <2>
281 |   z <- array(0, dim = c(nrow(x), ncol(y)))                                      # <3>
282 |   for (i in 1:nrow(x))                                                          # <4>
283 |     for (j in 1:ncol(y)) {                                                      # <5>
284 |       row_x <- x[i, ]
285 |       column_y <- y[, j]
286 |       z[i, j] <- naive_vector_dot(row_x, column_y)
287 |     }
288 |   z
289 | }
290 | 
291 | 
292 | train_images <- array_reshape(train_images, c(60000, 28 * 28))
293 | 
294 | 
295 | x <- array(1:6)
296 | x
297 | array_reshape(x, dim = c(3, 2))
298 | array_reshape(x, dim = c(2, 3))
299 | 
300 | 
301 | x <- array(1:6, dim = c(3, 2))
302 | x
303 | t(x)
304 | 
305 | 
306 | .[.[train_images, train_labels], .[test_images, test_labels]] <-
307 |   dataset_mnist()
308 | 
309 | train_images <- array_reshape(train_images, c(60000, 28 * 28)) / 255
310 | test_images <- array_reshape(test_images, c(10000, 28 * 28)) / 255
311 | 
312 | test_labels <- matrix(test_labels, ncol = 1)
313 | train_labels <- matrix(train_labels, ncol = 1)
314 | 
315 | 
316 | model <- keras_model_sequential() |>
317 |   layer_dense(units = 512, activation = "relu") |>
318 |   layer_dense(units = 10, activation = "softmax")
319 | 
320 | 
321 | compile(model,
322 |         optimizer = "rmsprop",
323 |         loss = "sparse_categorical_crossentropy",
324 |         metrics = c("accuracy"))
325 | 
326 | 
327 | fit(model, train_images, train_labels, epochs = 5, batch_size = 128)
328 | 
329 | 
330 | layer_naive_dense <- function(input_size, output_size, activation = NULL) {
331 |   self <- new.env(parent = emptyenv())                                          # <1>
332 |   attr(self, "class") <- "NaiveDense"
333 | 
334 |   self$activation <- activation
335 | 
336 |   self$W <- keras_variable(shape = shape(input_size, output_size),              # <2>
337 |                            initializer = "uniform", dtype = "float32")          # <2>
338 | 
339 |   self$b <- keras_variable(shape = shape(output_size),                          # <3>
340 |                            initializer = "zeros", dtype = "float32")            # <3>
341 | 
342 |   self$weights <- list(self$W, self$b)                                          # <4>
343 | 
344 |   self$call <- function(inputs) {                                               # <5>
345 |     x <- (inputs %*% self$W) + self$b                                           # <5>
346 |     if (is.function(self$activation))                                           # <5>
347 |       x <- self$activation(x)                                                   # <5>
348 |     x                                                                           # <5>
349 |   }                                                                             # <5>
350 | 
351 |   self
352 | }
353 | 
354 | 
355 | naive_sequential_model <- function(layers) {
356 |   self <- new.env(parent = emptyenv())
357 |   attr(self, "class") <- "NaiveSequential"
358 | 
359 |   self$layers <- layers
360 | 
361 |   self$weights <- lapply(layers, \(layer) layer$weights) |> unlist()
362 | 
363 |   self$call <- function(inputs) {
364 |     x <- inputs
365 |     for (layer in self$layers)
366 |       x <- layer$call(x)
367 |     x
368 |   }
369 | 
370 |   self
371 | }
372 | 
373 | 
374 | model <- naive_sequential_model(list(
375 |   layer_naive_dense(input_size = 28 * 28, output_size = 512,
376 |                     activation = op_relu),
377 |   layer_naive_dense(input_size = 512, output_size = 10,
378 |                     activation = op_softmax)
379 | ))
380 | stopifnot(length(model$weights) == 4)
381 | 
382 | 
383 | new_batch_generator <- function(images, labels, batch_size = 128) {
384 | 
385 |   stopifnot(nrow(images) == nrow(labels))
386 |   index <- 1
387 | 
388 |   function() {                                                                  # <1>
389 |     start <- index
390 |     if(start > nrow(images))
391 |       return(NULL)                                                              # <2>
392 | 
393 |     end <- start + batch_size - 1
394 |     if(end > nrow(images))
395 |       end <- nrow(images)                                                       # <3>
396 | 
397 |     index <<- end + 1
398 |     list(images = images[start:end, , drop = FALSE],
399 |          labels = labels[start:end, , drop = FALSE])
400 |   }
401 | }
402 | 
403 | 
404 | learning_rate <- 1e-3
405 | 
406 | update_weights <- function(gradients, weights) {
407 |   mapply(function(w, g) {
408 |     w$assign(w - g * learning_rate)                                             # <1>
409 |   }, weights, gradients)
410 | }
411 | 
412 | 
413 | optimizer <- optimizer_sgd(learning_rate = 1e-3)
414 | 
415 | update_weights <- function(gradients, weights) {
416 |   optimizer$apply(gradients, weights)
417 | }
418 | 
419 | 
420 | library(tensorflow)
421 | 
422 | x <- tf$zeros(shape = shape())                                                  # <1>
423 | with(tf$GradientTape() %as% tape, {                                             # <2>
424 |   y <- 2 * x + 3                                                                # <3>
425 | })
426 | grad_of_y_wrt_x <- tape$gradient(y, x)                                          # <4>
427 | 
428 | 
429 | one_training_step <- function(model, images_batch, labels_batch) {
430 |   with(tf$GradientTape() %as% tape, {
431 |     predictions <- model$call(images_batch)
432 |     per_sample_losses <-
433 |       op_sparse_categorical_crossentropy(labels_batch, predictions)
434 |     average_loss <- op_mean(per_sample_losses)
435 |   })
436 |   gradients <- tape$gradient(average_loss, model$weights)
437 |   update_weights(gradients, model$weights)
438 |   average_loss
439 | }
440 | 
441 | 
442 | fit <- function(model, images, labels, epochs, batch_size = 128) {
443 |   for (epoch_counter in seq_len(epochs)) {
444 |     cat("Epoch ", epoch_counter, "\n")
445 |     batch_generator <- new_batch_generator(images, labels, batch_size)
446 |     batch_counter <- 0
447 |     repeat {
448 |       batch <- batch_generator()
449 |       if (is.null(batch))
450 |         break
451 |       batch_counter <- batch_counter + 1
452 |       loss <- one_training_step(model, batch$images, batch$labels)
453 |       if (batch_counter %% 100 == 0)
454 |         cat(sprintf("loss at batch %s: %.2f\n", batch_counter, loss))
455 |     }
456 |   }
457 | }
458 | 
459 | 
460 | mnist <- dataset_mnist()
461 | train_images <- array_reshape(mnist$train$x, c(60000, 28 * 28)) / 255
462 | test_images <- array_reshape(mnist$test$x, c(10000, 28 * 28)) / 255
463 | train_labels <- matrix(mnist$train$y)
464 | test_labels <- matrix(mnist$test$y)
465 | 
466 | fit(model, train_images, train_labels, epochs = 10, batch_size = 128)
467 | 
468 | 
469 | predictions <- model$call(test_images)
470 | predictions <- as.array(predictions) # convert Tensorflow Tensor to R array
471 | predicted_labels <- max.col(predictions) - 1
472 | matches <- predicted_labels == test_labels
473 | cat(sprintf("accuracy: %.2f\n", mean(matches)))
474 | 
475 | 


--------------------------------------------------------------------------------
/chapter16_text-generation.R:
--------------------------------------------------------------------------------
  1 | library(fs)
  2 | library(stringr)
  3 | library(keras3)
  4 | reticulate::py_require("keras-hub==0.18.1")
  5 | reticulate::py_require("tensorflow-text")
  6 | config_set_dtype_policy("float16")
  7 | 
  8 | 
  9 | zipfile <- get_file(
 10 |   origin = "https://huggingface.co/datasets/mattdangerw/mini-c4/resolve/main/mini-c4.zip"
 11 | )
 12 | 
 13 | 
 14 | unzip(zipfile, list = TRUE)                                                     # <1>
 15 | 
 16 | 
 17 | extract_dir <- fs::path("./mini-c4")
 18 | 
 19 | 
 20 | fs::dir_info(extract_dir)
 21 | fs::path(extract_dir, "shard0.txt") |>
 22 |   readLines(n = 1) |>
 23 |   str_replace_all(r"(\\n)", "\n") |>
 24 |   str_split_1("\n") |>
 25 |   str_wrap(width = 76) |>
 26 |   cat(sep = "\n--\n")
 27 | 
 28 | 
 29 | keras_hub <- reticulate::import("keras_hub")
 30 | tokenizer <- keras_hub$tokenizers$SentencePieceTokenizer("mini_gpt.model")
 31 | 
 32 | 
 33 | tokenized <- tokenizer$tokenize("The quick brown fox.")
 34 | tokenized
 35 | tokenizer$detokenize(tokenized)
 36 | 
 37 | 
 38 | library(tfdatasets, exclude = "shape")
 39 | library(tensorflow, exclude = c("shape", "set_random_seed"))
 40 | 
 41 | batch_size <- 128L
 42 | sequence_length <- 256L
 43 | suffix <-
 44 |   tokenizer$token_to_id("<|endoftext|>") |>
 45 |   tf$constant(shape = shape(1))
 46 | 
 47 | files <- dir_ls(extract_dir)
 48 | ds <-
 49 |   text_line_dataset(files, num_parallel_reads = 12) |>
 50 |   dataset_map(\(x) tf$strings$regex_replace(x, r"(\\n)", "\n"),                 # <1>
 51 |               num_parallel_calls = 12) |>
 52 |   dataset_map(tokenizer, num_parallel_calls = 12) |>                            # <2>
 53 |   dataset_map(\(x) tf$concat(c(x, suffix), -1L), num_parallel_calls = 12) |>    # <3>
 54 |   dataset_rebatch(sequence_length + 1, drop_remainder = TRUE) |>                # <4>
 55 |   dataset_map(\(x) list(x@r[NA:-2], x@r[2:NA]), num_parallel_calls = 12) |>     # <5>
 56 |   dataset_batch(batch_size, num_parallel_calls = 12) |>                         # <6>
 57 |   dataset_cache()
 58 | 
 59 | 
 60 | num_batches <- 38581
 61 | num_batches
 62 | 
 63 | 
 64 | num_val_batches <- 500
 65 | num_train_batches <- num_batches - num_val_batches
 66 | val_ds <- ds |> dataset_take(num_val_batches)
 67 | train_ds <- ds |> dataset_skip(num_val_batches) |> dataset_repeat()
 68 | 
 69 | 
 70 | layer_transformer_decoder <- new_layer_class(
 71 |   "TransformerDecoder",
 72 |   initialize = function(hidden_dim, intermediate_dim, num_heads) {
 73 |     super$initialize()
 74 |     key_dim <- hidden_dim %/% num_heads
 75 |     self$self_attention <- layer_multi_head_attention(
 76 |       num_heads = num_heads,
 77 |       key_dim = key_dim,
 78 |       dropout = 0.1
 79 |     )                                                                           # <1>
 80 |     self$self_attention_layernorm <- layer_layer_normalization()                # <1>
 81 |     self$feed_forward_1 <- layer_dense(units = intermediate_dim,
 82 |                                        activation = "relu")                     # <2>
 83 |     self$feed_forward_2 <- layer_dense(units = hidden_dim)                      # <2>
 84 |     self$feed_forward_layernorm <- layer_layer_normalization()                  # <2>
 85 |     self$dropout <- layer_dropout(rate = 0.1)                                   # <2>
 86 |   },
 87 |   call = function(inputs) {
 88 |     residual <- x <- inputs                                                     # <3>
 89 |     x <- self$self_attention(query = x, key = x, value = x,
 90 |                              use_causal_mask = TRUE)                            # <3>
 91 |     x <- x |> self$dropout()                                                    # <3>
 92 |     x <- x + residual                                                           # <3>
 93 |     x <- x |> self$self_attention_layernorm()                                   # <3>
 94 | 
 95 |     residual <- x                                                               # <4>
 96 |     x <- x |>
 97 |       self$feed_forward_1() |>                                                  # <4>
 98 |       self$feed_forward_2() |>                                                  # <4>
 99 |       self$dropout()
100 |     x <- x + residual                                                           # <4>
101 |     x <- x |> self$feed_forward_layernorm()
102 | 
103 |     x
104 |   }
105 | )
106 | 
107 | 
108 | layer_positional_embedding <- new_layer_class(
109 |   "PositionalEmbedding",
110 |   initialize = function(sequence_length, input_dim, output_dim) {
111 |     super$initialize()
112 |     self$token_embeddings <- layer_embedding(
113 |       input_dim = input_dim, output_dim = output_dim
114 |     )
115 |     self$position_embeddings <- layer_embedding(
116 |       input_dim = sequence_length, output_dim = output_dim
117 |     )
118 |   },
119 |   call = function(inputs, reverse = FALSE) {
120 |     if (reverse) {
121 |       token_embeddings <- self$token_embeddings$embeddings
122 |       return(op_matmul(inputs, op_transpose(token_embeddings)))
123 |     }
124 |     .[.., sequence_length] <- op_shape(inputs)
125 |     positions <-
126 |       op_arange(0, sequence_length - 1, dtype = "int32") |>
127 |       op_expand_dims(1)
128 |     embedded_tokens <- self$token_embeddings(inputs)
129 |     embedded_positions <- self$position_embeddings(positions)
130 |     embedded_tokens + embedded_positions
131 |   }
132 | )
133 | 
134 | 
135 | vocab_size <- tokenizer$vocabulary_size()
136 | hidden_dim <- 128
137 | intermediate_dim <- 512
138 | num_heads <- 4
139 | num_layers <- 4
140 | 
141 | inputs <- keras_input(shape = c(NA), dtype = "int32", name = "inputs")
142 | embedding <-
143 |   layer_positional_embedding(, sequence_length, vocab_size, hidden_dim)
144 | 
145 | x <- inputs |>
146 |   embedding() |>
147 |   layer_layer_normalization()
148 | 
149 | for (i in seq_len(num_layers)) {
150 |   x <- x |>
151 |     layer_transformer_decoder(hidden_dim, intermediate_dim, num_heads)
152 | }
153 | 
154 | outputs <- x |> embedding(reverse = TRUE)
155 | mini_gpt <- keras_model(inputs, outputs)
156 | 
157 | 
158 | hidden_dim <- 512
159 | intermediate_dim <- 2056
160 | num_heads <- 8
161 | num_layers <- 8
162 | 
163 | 
164 | warmup_schedule <- new_learning_rate_schedule_class(
165 |   classname = "WarmupSchedule",
166 | 
167 |   initialize = function() {
168 |     self$rate <- 1e-4                                                           # <1>
169 |     self$warmup_steps <- 1000
170 |   },
171 | 
172 |   call = function(step) {
173 |     step <- step |> op_cast(dtype = "float32")
174 |     scale <- op_minimum(step / self$warmup_steps, 1)
175 |     self$rate * scale
176 |   }
177 | )
178 | 
179 | 
180 | schedule <- warmup_schedule()
181 | x <- seq(0, 5000, by = 100)
182 | y <- sapply(x, \(step) as.array(schedule(step)))
183 | plot(x, y, type = "l",
184 |      main = "Warmup Schedule",
185 |      xlab = "Train Step", ylab = "Learning Rate",
186 |      bty = "n", panel.first = grid())
187 | 
188 | 
189 | load_model_weights(mini_gpt, "mini_gpt.weights.h5")
190 | 
191 | 
192 | prompt <- "A piece of advice"
193 | generate <- function(prompt, max_length = 64) {
194 |   tokens <- as.array(tokenizer(prompt))
195 |   prompt_length <- length(tokens)
196 |   for (i in seq(from = prompt_length + 1, to = max_length)) {
197 |     prediction <- mini_gpt(matrix(tokens, nrow = 1))
198 |     prediction <- prediction@r[1, -1]
199 |     next_token <- op_argmax(prediction, zero_indexed = TRUE)
200 |     tokens[i] <- as.array(next_token)
201 |   }
202 |   tokenizer$detokenize(tokens)
203 | }
204 | generate("A piece of advice")
205 | 
206 | 
207 | compiled_generate <- function(prompt, max_length = 64) {
208 |   tokens <- as.array(tokenizer(prompt))
209 |   prompt_length <- length(tokens)
210 |   tokens[seq(prompt_length + 1, max_length)] <- 0                               # <1>
211 |   dim(tokens) <- c(1, max_length)
212 |   for (i in seq(prompt_length, max_length - 1)) {
213 |     prediction <- predict(mini_gpt, tokens, verbose = 0)
214 |     prediction <- prediction[, i, ]
215 |     next_token <- which.max(prediction) - 1L
216 |     tokens[, i + 1] <- next_token
217 |   }
218 |   tokenizer$detokenize(tokens)
219 | }
220 | 
221 | 
222 | system.time(compiled_generate(prompt, 64))[["elapsed"]]
223 | 
224 | 
225 | compiled_generate <- function(prompt, sample_fn, max_length = 64) {
226 |   tokens <- as.array(tokenizer(prompt))
227 |   prompt_length <- length(tokens)
228 |   tokens[seq(prompt_length + 1, max_length)] <- 0
229 |   dim(tokens) <- c(1, max_length)
230 |   for (i in seq(prompt_length, max_length - 1)) {
231 |     prediction <- predict(mini_gpt, tokens, verbose = 0)
232 |     prediction <- prediction[, i, ]
233 |     next_token <- sample_fn(prediction) - 1L
234 |     tokens[, i + 1] <- as.array(next_token)
235 |   }
236 |   tokenizer$detokenize(tokens)
237 | }
238 | 
239 | 
240 | greedy_search <- function(preds) {
241 |   op_argmax(preds)
242 | }
243 | 
244 | compiled_generate(prompt, greedy_search)
245 | 
246 | 
247 | random_sample <- function(preds, temperature = 1) {
248 |   preds <- preds / temperature
249 |   preds <- op_reshape(preds, c(1, -1))
250 |   random_categorical(preds, num_samples = 1) |>
251 |     op_squeeze()
252 | }
253 | 
254 | 
255 | compiled_generate(prompt, random_sample)
256 | 
257 | 
258 | compiled_generate(prompt, \(x) random_sample(x, temperature = 2))
259 | compiled_generate(prompt, \(x) random_sample(x, temperature = 0.8))
260 | compiled_generate(prompt, \(x) random_sample(x, temperature = 0.2))
261 | 
262 | 
263 | prompt <- "A piece of advice"
264 | top_k <- function(preds, k = 5, temperature = 1) {
265 |   preds <-  preds / temperature
266 |   .[top_preds, top_indices] <- op_top_k(preds, k = k, sorted = FALSE)
267 |   choice <- random_sample(top_preds)
268 |   op_take(top_indices, choice)
269 | }
270 | 
271 | 
272 | compiled_generate(prompt, \(x) top_k(x, k = 5, temperature = 0.5))
273 | 
274 | 
275 | rm(list = ls()); gc(TRUE); reticulate::import("gc")$collect()
276 | rm(list = ls()); gc(TRUE); reticulate::import("gc")$collect()
277 | 
278 | 
279 | library(keras3)
280 | library(reticulate)
281 | py_require("keras_hub")
282 | 
283 | config_set_dtype_policy("float16")
284 | keras_hub <- import("keras_hub")
285 | kaggle_credentials <- jsonlite::read_json("~/.kaggle/kaggle.json")
286 | withr::with_envvar(c(
287 |   KAGGLE_USERNAME = kaggle_credentials$username,
288 |   KAGGLE_KEY = kaggle_credentials$key), {
289 |     gemma_lm <- keras_hub$models$GemmaCausalLM$from_preset("gemma2_2b_en")
290 |   }
291 | )
292 | 
293 | 
294 | gemma_lm
295 | 
296 | 
297 | gemma_lm$compile(sampler = "greedy")
298 | gemma_lm$generate("A piece of advice", max_length = 64L)
299 | gemma_lm$generate("How can I make brownies?", max_length = 64L)
300 | 
301 | 
302 | gemma_lm$generate(
303 |   paste0(
304 |     "The following brownie recipe is easy to make in just a few steps.",
305 |     "\n\nYou can start by"
306 |   ),
307 |   max_length = 64L
308 | )
309 | 
310 | 
311 | gemma_lm$generate(
312 |   "Tell me about the 61st president of the United States.",
313 |   max_length = 64L
314 | )
315 | 
316 | 
317 | TEMPLATE = glue::trim(r"---(
318 |   [instruction]
319 |   {instruction}[end]
320 |   [reponse]
321 |   {response}[end]
322 |   )---")
323 | 
324 | dataset_path <- get_file(origin = paste0(
325 |   "https://huggingface.co/datasets/databricks/",
326 |   "databricks-dolly-15k/resolve/main/databricks-dolly-15k.jsonl"
327 | ))
328 | 
329 | data <- readr::read_lines(dataset_path) |>
330 |   lapply(jsonlite::parse_json) |>
331 |   dplyr::bind_rows()
332 | 
333 | data
334 | 
335 | data <- data |>
336 |   dplyr::filter(context != "") |>
337 |   glue::glue_data(TEMPLATE)
338 | 
339 | 
340 | writeLines(data[[1]])
341 | 
342 | 
343 | library(tfdatasets)
344 | ds <- tensor_slices_dataset(data) |>
345 |   dataset_shuffle(2000) |>
346 |   dataset_batch(1)
347 | val_ds <- ds |> dataset_take(100)
348 | train_ds <- ds |> dataset_skip(100)
349 | 
350 | 
351 | preprocessor <- gemma_lm$preprocessor
352 | preprocessor$sequence_length <- 512L
353 | batch <- iter_next(as_iterator(train_ds))
354 | str(batch)
355 | .[x, y, sample_weight] <- preprocessor(batch)
356 | str(x)
357 | str(y)
358 | str(sample_weight)
359 | 
360 | 
361 | x$token_ids |> as.array() |> _[1, 1:5]
362 | y |> as.array() |> _[1, 1:5]
363 | 
364 | 
365 | layer_linear <- new_layer_class(
366 |   classname = "Linear",
367 |   initialize = function(input_dim, output_dim) {
368 |     super$initialize()
369 |     self$kernel <- self$add_weight(shape = shape(input_dim, output_dim))
370 |   },
371 |   call = function(inputs) {
372 |     op_matmul(inputs, self$kernel)
373 |   }
374 | )
375 | 
376 | 
377 | layer_lora_linear <- new_layer_class(
378 |   classname = "LoraLinear",
379 | 
380 |   initialize = function(input_dim, output_dim, rank) {
381 |     super$initialize()
382 |     self$kernel <- self$add_weight(shape(input_dim, output_dim),
383 |                                    trainable = FALSE)
384 |     self$alpha <- self$add_weight(shape(input_dim, rank))
385 |     self$beta <- self$add_weight(shape(rank, output_dim))
386 |   },
387 | 
388 |   call = function(inputs) {
389 |     frozen <- inputs |> op_matmul(self$kernel)
390 |     update <- inputs |> op_matmul(self$alpha) |> op_matmul(self$beta)
391 |     frozen + update
392 |   }
393 | )
394 | 
395 | 
396 | rank <- 2L
397 | gemma_lm$backbone$trainable <- FALSE                                            # <1>
398 | 
399 | for (i in seq_len(gemma_lm$backbone$num_layers) - 1) {                          # <3>
400 |   layer <- get_layer(gemma_lm$backbone, sprintf("decoder_block_%d", i))         # <2>
401 | 
402 |   layer$attention$key_dense$trainable <- TRUE
403 |   layer$attention$key_dense$enable_lora(rank = rank)
404 | 
405 |   layer$attention$query_dense$trainable <- TRUE
406 |   layer$attention$query_dense$enable_lora(rank = rank)
407 | }
408 | 
409 | 
410 | gemma_lm
411 | 
412 | 
413 | gemma_lm |> compile(
414 |   loss = loss_sparse_categorical_crossentropy(from_logits = TRUE),
415 |   optimizer = optimizer_adam(5e-5),
416 |   weighted_metrics = metric_sparse_categorical_accuracy()
417 | )
418 | 
419 | 
420 | rm(list = ls()); gc(); reticulate::import("gc")$collect()
421 | 
422 | 
423 | image_url <- paste0("https://github.com/mattdangerw/keras-nlp-scripts/",
424 |                     "blob/main/learned-python.png?raw=true")
425 | image_path <- get_file(origin = image_url)
426 | 
427 | image <- image_path |> image_load() |> image_to_array()
428 | par(mar = c(0, 0, 0, 0))
429 | plot(as.raster(image, max = 255L))
430 | 
431 | 
432 | library(keras3)
433 | config_set_dtype_policy("float16")
434 | keras_hub <- import("keras_hub")
435 | pali_gemma_lm <- keras_hub$models$PaliGemmaCausalLM$from_preset(
436 |   "pali_gemma_3b_mix_448"
437 | )
438 | 
439 | 
440 | pali_gemma_lm
441 | 
442 | 
443 | pali_gemma_lm$generate(list(
444 |   images = image,
445 |   prompts = "cap en\n"
446 | ))
447 | pali_gemma_lm$generate(list(
448 |   images = image,
449 |   prompts = "answer en where is the snake doing?\n"
450 | ))
451 | pali_gemma_lm$generate(list(
452 |   images = image,
453 |   prompts = "detect glasses\n"
454 | ))
455 | 
456 | 
457 | library(stringr)
458 | 
459 | response <- "<loc0280><loc0371><loc0380><loc0685> glasses"
460 | box <- as.numeric(unlist(str_extract_all(response, "\\d+")))
461 | 
462 | .[height, ..] <- dim(image)
463 | box <- box * height / 1024
464 | 
465 | .[ytop, xleft, ybottom, xright] <- box
466 | ytop    <- height - ytop
467 | ybottom <- height - ybottom
468 | 
469 | par(mar = c(0,0,0,0))
470 | plot(as.raster(image, max = 255))
471 | rect(xleft, ybottom, xright, ytop,
472 |      border = "red", lwd = 4)
473 | 
474 | 


--------------------------------------------------------------------------------
/2e/ch05.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include = FALSE-----------------------------------------------
  2 | tensorflow::as_tensor(1)
  3 | library(keras3)
  4 | 
  5 | 
  6 | 
  7 | ## -------------------------------------------------------------------------
  8 | library(keras3)
  9 | 
 10 | mnist <- dataset_mnist()
 11 | train_labels <- mnist$train$y
 12 | train_images <- array_reshape(mnist$train$x / 255,
 13 |                               c(60000, 28 * 28))
 14 | 
 15 | random_array <- function(dim) array(runif(prod(dim)), dim)
 16 | 
 17 | noise_channels <- random_array(dim(train_images))
 18 | train_images_with_noise_channels <- cbind(train_images, noise_channels)
 19 | 
 20 | zeros_channels <- array(0, dim(train_images))
 21 | train_images_with_zeros_channels <- cbind(train_images, zeros_channels)
 22 | 
 23 | 
 24 | ## -------------------------------------------------------------------------
 25 | get_model <- function() {
 26 |   model <- keras_model_sequential() %>%
 27 |     layer_dense(512, activation = "relu") %>%
 28 |     layer_dense(10, activation = "softmax")
 29 | 
 30 |   model %>% compile(
 31 |     optimizer = "rmsprop",
 32 |     loss = "sparse_categorical_crossentropy",
 33 |     metrics = "accuracy")
 34 | 
 35 |   model
 36 | }
 37 | 
 38 | 
 39 | ## -------------------------------------------------------------------------
 40 | model <- get_model()
 41 | history_noise <- model %>% fit(
 42 |   train_images_with_noise_channels, train_labels,
 43 |   epochs = 10,
 44 |   batch_size = 128,
 45 |   validation_split = 0.2)
 46 | 
 47 | model <- get_model()
 48 | history_zeros <- model %>% fit(
 49 |   train_images_with_zeros_channels, train_labels,
 50 |   epochs = 10,
 51 |   batch_size = 128,
 52 |   validation_split = 0.2)
 53 | 
 54 | 
 55 | ## -------------------------------------------------------------------------
 56 | plot(NULL,
 57 |      main = "Effect of Noise Channels on Validation Accuracy",
 58 |      xlab = "Epochs", xlim = c(1, history_noise$params$epochs),
 59 |      ylab = "Validation Accuracy", ylim = c(0.9, 1), las = 1)
 60 | lines(history_zeros$metrics$val_accuracy, lty = 1, type = "o")
 61 | lines(history_noise$metrics$val_accuracy, lty = 2, type = "o")
 62 | legend("bottomright", lty = 1:2,
 63 |        legend = c("Validation accuracy with zeros channels",
 64 |                   "Validation accuracy with noise channels"))
 65 | 
 66 | 
 67 | ## -------------------------------------------------------------------------
 68 | c(c(train_images, train_labels), .) %<-% dataset_mnist()
 69 | 
 70 | train_images <- array_reshape(train_images / 255,
 71 |                               c(60000, 28 * 28))
 72 | 
 73 | random_train_labels <- sample(train_labels)
 74 | 
 75 | model <- keras_model_sequential() %>%
 76 |   layer_dense(512, activation = "relu") %>%
 77 |   layer_dense(10, activation = "softmax")
 78 | 
 79 | model %>% compile(optimizer = "rmsprop",
 80 |                   loss = "sparse_categorical_crossentropy",
 81 |                   metrics = "accuracy")
 82 | 
 83 | 
 84 | ## -------------------------------------------------------------------------
 85 | history <- model %>% fit(train_images, random_train_labels,
 86 |                          epochs = 100,
 87 |                          batch_size = 128,
 88 |                          validation_split = 0.2)
 89 | 
 90 | ## -------------------------------------------------------------------------
 91 | plot(history)
 92 | 
 93 | 
 94 | ## ---- eval = FALSE--------------------------------------------------------
 95 | ## num_validation_samples <- 10000
 96 | ## val_indices <- sample.int(num_validation_samples, nrow(data))
 97 | ## validation_data <- data[val_indices, ]
 98 | ## training_data <- data[-val_indices, ]
 99 | ## model <- get_model()
100 | ## fit(model, training_data, ...)
101 | ## validation_score <- evaluate(model, validation_data, ...)
102 | ##
103 | ## ...
104 | ##
105 | ## model <- get_model()
106 | ## fit(model, data, ...)
107 | ## test_score <- evaluate(model, test_data, ...)
108 | 
109 | 
110 | ## ---- eval = FALSE--------------------------------------------------------
111 | ## k <- 3
112 | ## fold_id <- sample(rep(1:k, length.out = nrow(data)))
113 | ## validation_scores <- numeric()
114 | ##
115 | ## for (fold in seq_len(k)) {
116 | ##   validation_idx <- which(fold_id == fold)
117 | ##
118 | ##   validation_data <- data[validation_idx, ]
119 | ##   training_data <- data[-validation_idx, ]
120 | ##   model <- get_model()
121 | ##   fit(model, training_data, ...)
122 | ##   validation_score <- evaluate(model, validation_data, ...)
123 | ##   validation_scores[[fold]] <- validation_score
124 | ## }
125 | ##
126 | ## validation_score <- mean(validation_scores)
127 | ## model <- get_model()
128 | ## fit(model, data, ...)
129 | ## test_score <- evaluate(model, test_data, ...)
130 | 
131 | 
132 | ## -------------------------------------------------------------------------
133 | c(c(train_images, train_labels), .) %<-% dataset_mnist()
134 | train_images <- array_reshape(train_images / 255,
135 |                               c(60000, 28 * 28))
136 | 
137 | model <- keras_model_sequential() %>%
138 |   layer_dense(units = 512, activation = "relu") %>%
139 |   layer_dense(units = 10, activation = "softmax")
140 | 
141 | model %>% compile(optimizer = optimizer_rmsprop(1),
142 |                   loss = "sparse_categorical_crossentropy",
143 |                   metrics = "accuracy")
144 | 
145 | 
146 | ## -------------------------------------------------------------------------
147 | history <- model %>% fit(train_images, train_labels,
148 |                          epochs = 10, batch_size = 128,
149 |                          validation_split = 0.2)
150 | 
151 | 
152 | ## -------------------------------------------------------------------------
153 | plot(history)
154 | 
155 | 
156 | ## -------------------------------------------------------------------------
157 | model <- keras_model_sequential() %>%
158 |   layer_dense(units = 512, activation = "relu") %>%
159 |   layer_dense(units = 10, activation = "softmax")
160 | 
161 | model %>% compile(optimizer = optimizer_rmsprop(1e-2),
162 |                   loss = "sparse_categorical_crossentropy",
163 |                   metrics = "accuracy")
164 | 
165 | 
166 | ## -------------------------------------------------------------------------
167 | model %>%
168 |   fit(train_images, train_labels,
169 |       epochs = 10, batch_size = 128,
170 |       validation_split = 0.2) ->
171 |   history
172 | 
173 | 
174 | ## -------------------------------------------------------------------------
175 | plot(history)
176 | 
177 | 
178 | ## -------------------------------------------------------------------------
179 | model <- keras_model_sequential() %>%
180 |   layer_dense(10, activation = "softmax")
181 | 
182 | model %>% compile(optimizer = "rmsprop",
183 |                   loss = "sparse_categorical_crossentropy",
184 |                   metrics = "accuracy")
185 | 
186 | history_small_model <- model %>%
187 |   fit(train_images, train_labels,
188 |       epochs = 20,
189 |       batch_size = 128,
190 |       validation_split = 0.2)
191 | 
192 | 
193 | ## -------------------------------------------------------------------------
194 | plot(history_small_model$metrics$val_loss, type = 'o',
195 |      main = "Effect of Insufficient Model Capacity on Validation Loss",
196 |      xlab = "Epochs", ylab = "Validation Loss")
197 | 
198 | 
199 | ## -------------------------------------------------------------------------
200 | model <- keras_model_sequential() %>%
201 |     layer_dense(96, activation="relu") %>%
202 |     layer_dense(96, activation="relu") %>%
203 |     layer_dense(10, activation="softmax")
204 | 
205 | model %>% compile(optimizer="rmsprop",
206 |                   loss="sparse_categorical_crossentropy",
207 |                   metrics="accuracy")
208 | 
209 | 
210 | ## -------------------------------------------------------------------------
211 | history_large_model <- model %>%
212 |   fit(train_images, train_labels,
213 |       epochs = 20,
214 |       batch_size = 128,
215 |       validation_split = 0.2)
216 | 
217 | 
218 | ## -------------------------------------------------------------------------
219 | plot(history_large_model$metrics$val_loss, type = 'o',
220 |      main = "Validation Loss for a Model with Appropriate Capacity",
221 |      xlab = "Epochs", ylab = "Validation Loss")
222 | 
223 | 
224 | ## -------------------------------------------------------------------------
225 | c(c(train_data, train_labels), .) %<-% dataset_imdb(num_words = 10000)
226 | 
227 | vectorize_sequences <- function(sequences, dimension=10000) {
228 |     results <- matrix(0, nrow = length(sequences), ncol = dimension)
229 |     for(i in seq_along(sequences))
230 |         results[i, sequences[[i]]] <- 1
231 |     results
232 | }
233 | 
234 | train_data <- vectorize_sequences(train_data)
235 | 
236 | model <- keras_model_sequential() %>%
237 |     layer_dense(16, activation="relu") %>%
238 |     layer_dense(16, activation="relu") %>%
239 |     layer_dense(1, activation="sigmoid")
240 | 
241 | model %>% compile(optimizer="rmsprop",
242 |                   loss="binary_crossentropy",
243 |                   metrics="accuracy")
244 | 
245 | 
246 | ## -------------------------------------------------------------------------
247 | history_original <- model %>%
248 |   fit(train_data, train_labels,
249 |       epochs = 20, batch_size = 512, validation_split = 0.4)
250 | 
251 | 
252 | ## -------------------------------------------------------------------------
253 | model <- keras_model_sequential() %>%
254 |   layer_dense(4, activation = "relu") %>%
255 |   layer_dense(4, activation = "relu") %>%
256 |   layer_dense(1, activation = "sigmoid")
257 | 
258 | model %>% compile(optimizer = "rmsprop",
259 |                   loss = "binary_crossentropy",
260 |                   metrics = "accuracy")
261 | 
262 | 
263 | ## -------------------------------------------------------------------------
264 | history_smaller_model <- model %>%
265 |   fit(train_data, train_labels,
266 |       epochs = 20, batch_size = 512, validation_split = 0.4)
267 | 
268 | 
269 | ## -------------------------------------------------------------------------
270 | plot(
271 |   NULL,
272 |   main = "Original Model vs. Smaller Model on IMDB Review Classification",
273 |   xlab = "Epochs",
274 |   xlim = c(1, history_original$params$epochs),
275 |   ylab = "Validation Loss",
276 |   ylim = extendrange(history_original$metrics$val_loss),
277 |   panel.first = abline(v = 1:history_original$params$epochs,
278 |                        lty = "dotted", col = "lightgrey")
279 | )
280 | 
281 | lines(history_original     $metrics$val_loss, lty = 2)
282 | lines(history_smaller_model$metrics$val_loss, lty = 1)
283 | legend("topleft", lty = 2:1,
284 |        legend = c("Validation loss of original model",
285 |                   "Validation loss of smaller model"))
286 | 
287 | 
288 | ## -------------------------------------------------------------------------
289 | model <- keras_model_sequential() %>%
290 |     layer_dense(512, activation="relu") %>%
291 |     layer_dense(512, activation="relu") %>%
292 |     layer_dense(1, activation="sigmoid")
293 | 
294 | model %>% compile(optimizer="rmsprop",
295 |                   loss="binary_crossentropy",
296 |                   metrics="accuracy")
297 | 
298 | 
299 | ## -------------------------------------------------------------------------
300 | history_larger_model <- model %>%
301 |   fit(train_data, train_labels,
302 |       epochs = 20, batch_size = 512, validation_split = 0.4)
303 | 
304 | 
305 | ## -------------------------------------------------------------------------
306 | plot(
307 |   NULL,
308 |   main =
309 |     "Original Model vs. Much Larger Model on IMDB Review Classification",
310 |   xlab = "Epochs", xlim = c(1, history_original$params$epochs),
311 |   ylab = "Validation Loss",
312 |   ylim = range(c(history_original$metrics$val_loss,
313 |                  history_larger_model$metrics$val_loss)),
314 |   panel.first = abline(v = 1:history_original$params$epochs,
315 |                        lty = "dotted", col = "lightgrey")
316 | )
317 | lines(history_original    $metrics$val_loss, lty = 2)
318 | lines(history_larger_model$metrics$val_loss, lty = 1)
319 | legend("topleft", lty = 2:1,
320 |        legend = c("Validation loss of original model",
321 |                   "Validation loss of larger model"))
322 | 
323 | 
324 | ## -------------------------------------------------------------------------
325 | model <- keras_model_sequential() %>%
326 |   layer_dense(16, activation = "relu",
327 |               kernel_regularizer = regularizer_l2(0.002)) %>%
328 |   layer_dense(16, activation = "relu",
329 |               kernel_regularizer = regularizer_l2(0.002)) %>%
330 |   layer_dense(1, activation = "sigmoid")
331 | 
332 | model %>% compile(optimizer="rmsprop",
333 |                   loss="binary_crossentropy",
334 |                   metrics="accuracy")
335 | 
336 | 
337 | ## -------------------------------------------------------------------------
338 | history_l2_reg <- model %>% fit(
339 |   train_data, train_labels,
340 |   epochs = 20, batch_size = 512, validation_split = 0.4)
341 | 
342 | ## -------------------------------------------------------------------------
343 | plot(history_l2_reg)
344 | 
345 | 
346 | ## -------------------------------------------------------------------------
347 | plot(NULL,
348 |      main = "Effect of L2 Weight Regularization on Validation Loss",
349 |      xlab = "Epochs", xlim = c(1, history_original$params$epochs),
350 |      ylab = "Validation Loss",
351 |      ylim = range(c(history_original$metrics$val_loss,
352 |                     history_l2_reg  $metrics$val_loss)),
353 |      panel.first = abline(v = 1:history_original$params$epochs,
354 |                           lty = "dotted", col = "lightgrey"))
355 | lines(history_original$metrics$val_loss, lty = 2)
356 | lines(history_l2_reg  $metrics$val_loss, lty = 1)
357 | legend("topleft", lty = 2:1,
358 |        legend = c("Validation loss of original model",
359 |                   "Validation loss of L2-regularized model"))
360 | 
361 | 
362 | ## -------------------------------------------------------------------------
363 | regularizer_l1(0.001)
364 | regularizer_l1_l2(l1 = 0.001, l2 = 0.001)
365 | 
366 | 
367 | ## ---- eval = FALSE--------------------------------------------------------
368 | ## zero_out <- random_array(dim(layer_output)) < .5
369 | ## layer_output[zero_out] <- 0
370 | 
371 | 
372 | ## ---- eval = FALSE--------------------------------------------------------
373 | ## layer_output <- layer_output * .5
374 | 
375 | 
376 | ## ---- eval = FALSE--------------------------------------------------------
377 | ## layer_output[random_array(dim(layer_output)) < dropout_rate] <- 0
378 | ## layer_output <- layer_output / .5
379 | 
380 | 
381 | ## -------------------------------------------------------------------------
382 | model <- keras_model_sequential() %>%
383 |   layer_dense(16, activation = "relu") %>%
384 |   layer_dropout(0.5) %>%
385 |   layer_dense(16, activation = "relu") %>%
386 |   layer_dropout(0.5) %>%
387 |   layer_dense(1, activation = "sigmoid")
388 | 
389 | model %>% compile(optimizer = "rmsprop",
390 |                   loss = "binary_crossentropy",
391 |                   metrics = "accuracy")
392 | 
393 | 
394 | ## -------------------------------------------------------------------------
395 | history_dropout <- model %>% fit(
396 |   train_data, train_labels,
397 |   epochs = 20, batch_size = 512,
398 |   validation_split = 0.4
399 | )
400 | 
401 | 
402 | ## -------------------------------------------------------------------------
403 | plot(history_dropout)
404 | 
405 | 
406 | ## -------------------------------------------------------------------------
407 | plot(NULL,
408 |      main = "Effect of Dropout on Validation Loss",
409 |      xlab = "Epochs", xlim = c(1, history_original$params$epochs),
410 |      ylab = "Validation Loss",
411 |      ylim = range(c(history_original$metrics$val_loss,
412 |                     history_dropout $metrics$val_loss)),
413 |      panel.first = abline(v = 1:history_original$params$epochs,
414 |                           lty = "dotted", col = "lightgrey"))
415 | lines(history_original$metrics$val_loss, lty = 2)
416 | lines(history_dropout $metrics$val_loss, lty = 1)
417 | legend("topleft", lty = 1:2,
418 |        legend = c("Validation loss of dropout-regularized model",
419 |                   "Validation loss of original model"))
420 | 


--------------------------------------------------------------------------------
/2e/ch03.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include = FALSE-----------------------------------------------
  2 | library(tensorflow)
  3 | library(keras3)
  4 | tf_function(function(x) x+1)(as_tensor(1))
  5 | 
  6 | 
  7 | ## ---- eval = FALSE--------------------------------------------------------
  8 | ## install.packages("keras")
  9 | ##
 10 | ## library(reticulate)
 11 | ## virtualenv_create("r-reticulate", python = install_python())
 12 | ##
 13 | ## library(keras3)
 14 | ## install_keras(envname = "r-reticulate")
 15 | 
 16 | 
 17 | ## -------------------------------------------------------------------------
 18 | tensorflow::tf_config()
 19 | 
 20 | 
 21 | ## -------------------------------------------------------------------------
 22 | r_array <- array(1:6, c(2, 3))
 23 | tf_tensor <- as_tensor(r_array)
 24 | tf_tensor
 25 | 
 26 | 
 27 | ## -------------------------------------------------------------------------
 28 | dim(tf_tensor)
 29 | tf_tensor + tf_tensor
 30 | 
 31 | 
 32 | ## -------------------------------------------------------------------------
 33 | methods(class = "tensorflow.tensor")
 34 | 
 35 | 
 36 | ## -------------------------------------------------------------------------
 37 | tf_tensor$ndim
 38 | 
 39 | 
 40 | ## -------------------------------------------------------------------------
 41 | as_tensor(1)$ndim
 42 | as_tensor(1:2)$ndim
 43 | 
 44 | 
 45 | ## -------------------------------------------------------------------------
 46 | tf_tensor$shape
 47 | 
 48 | 
 49 | ## -------------------------------------------------------------------------
 50 | methods(class = class(shape())[1])
 51 | 
 52 | 
 53 | ## -------------------------------------------------------------------------
 54 | shape(2, 3)
 55 | 
 56 | 
 57 | ## -------------------------------------------------------------------------
 58 | tf_tensor$dtype
 59 | 
 60 | 
 61 | ## -------------------------------------------------------------------------
 62 | r_array <- array(1)
 63 | typeof(r_array)
 64 | as_tensor(r_array)$dtype
 65 | 
 66 | 
 67 | ## -------------------------------------------------------------------------
 68 | as_tensor(r_array, dtype = "float32")
 69 | 
 70 | 
 71 | ## -------------------------------------------------------------------------
 72 | as_tensor(0, shape = c(2, 3))
 73 | 
 74 | 
 75 | ## -------------------------------------------------------------------------
 76 | as_tensor(1:6, shape = c(2, 3))
 77 | 
 78 | 
 79 | ## -------------------------------------------------------------------------
 80 | array(1:6, dim = c(2, 3))
 81 | 
 82 | 
 83 | ## -------------------------------------------------------------------------
 84 | array_reshape(1:6, c(2, 3), order = "C")
 85 | array_reshape(1:6, c(2, 3), order = "F")
 86 | 
 87 | 
 88 | ## -------------------------------------------------------------------------
 89 | array_reshape(1:6, c(-1, 3))
 90 | as_tensor(1:6, shape = c(NA, 3))
 91 | 
 92 | 
 93 | ## -------------------------------------------------------------------------
 94 | train_images <- as_tensor(dataset_mnist()$train$x)
 95 | my_slice <- train_images[, 15:NA, 15:NA]
 96 | 
 97 | 
 98 | ## -------------------------------------------------------------------------
 99 | my_slice <- train_images[, 8:-8, 8:-8]
100 | 
101 | 
102 | ## -------------------------------------------------------------------------
103 | my_slice <- train_images[1:100, all_dims()]
104 | 
105 | 
106 | ## -------------------------------------------------------------------------
107 | my_slice <- train_images[1:100, , ]
108 | 
109 | 
110 | ## -------------------------------------------------------------------------
111 | x <- as_tensor(1, shape = c(64, 3, 32, 10))
112 | y <- as_tensor(2, shape = c(32, 10))
113 | z <- x + y
114 | 
115 | 
116 | ## -------------------------------------------------------------------------
117 | z <- x + y[tf$newaxis, tf$newaxis, , ]
118 | 
119 | 
120 | ## -------------------------------------------------------------------------
121 | library(tensorflow)
122 | tf$ones(shape(1, 3))
123 | tf$zeros(shape(1, 3))
124 | tf$random$normal(shape(1, 3), mean = 0, stddev = 1)
125 | tf$random$uniform(shape(1, 3))
126 | 
127 | 
128 | ## ---- error = TRUE--------------------------------------------------------
129 | tf$ones(c(2, 1))
130 | 
131 | 
132 | ## -------------------------------------------------------------------------
133 | tf$ones(c(2L, 1L))
134 | 
135 | 
136 | ## -------------------------------------------------------------------------
137 | m <- as_tensor(1:12, shape = c(3, 4))
138 | tf$reduce_mean(m, axis = 0L, keepdims = TRUE)
139 | 
140 | 
141 | ## -------------------------------------------------------------------------
142 | mean(m, axis = 1, keepdims = TRUE)
143 | 
144 | 
145 | ## -------------------------------------------------------------------------
146 | x <- array(1, dim = c(2, 2))
147 | x[1, 1] <- 0
148 | 
149 | 
150 | ## ---- error = TRUE--------------------------------------------------------
151 | x <- as_tensor(1, shape = c(2, 2))
152 | x[1, 1] <- 0
153 | 
154 | 
155 | ## -------------------------------------------------------------------------
156 | v <- tf$Variable(initial_value = tf$random$normal(shape(3, 1)))
157 | v
158 | 
159 | 
160 | ## -------------------------------------------------------------------------
161 | v$assign(tf$ones(shape(3, 1)))
162 | 
163 | 
164 | ## -------------------------------------------------------------------------
165 | v[1, 1]$assign(3)
166 | 
167 | 
168 | ## -------------------------------------------------------------------------
169 | v$assign_add(tf$ones(shape(3, 1)))
170 | 
171 | 
172 | ## -------------------------------------------------------------------------
173 | a <- tf$ones(c(2L, 2L))
174 | b <- tf$square(a)
175 | c <- tf$sqrt(a)
176 | d <- b + c
177 | e <- tf$matmul(a, b)
178 | e <- e * d
179 | 
180 | 
181 | ## -------------------------------------------------------------------------
182 | input_var <- tf$Variable(initial_value = 3)
183 | with(tf$GradientTape() %as% tape, {
184 |   result <- tf$square(input_var)
185 | })
186 | gradient <- tape$gradient(result, input_var)
187 | 
188 | 
189 | ## -------------------------------------------------------------------------
190 | input_const <- as_tensor(3)
191 | with(tf$GradientTape() %as% tape, {
192 |    tape$watch(input_const)
193 |    result = tf$square(input_const)
194 | })
195 | gradient <- tape$gradient(result, input_const)
196 | 
197 | 
198 | ## -------------------------------------------------------------------------
199 | time <- tf$Variable(0)
200 | with(tf$GradientTape() %as% outer_tape, {
201 |   with(tf$GradientTape() %as% inner_tape, {
202 |     position <- 4.9 * time ^ 2
203 |   })
204 |   speed <- inner_tape$gradient(position, time)
205 | })
206 | acceleration <- outer_tape$gradient(speed, time)
207 | acceleration
208 | 
209 | 
210 | ## -------------------------------------------------------------------------
211 | num_samples_per_class <- 1000
212 | Sigma <- rbind(c(1, 0.5),
213 |                c(0.5, 1))
214 | negative_samples <- MASS::mvrnorm(n = num_samples_per_class,
215 |                                   mu = c(0, 3),
216 |                                   Sigma = Sigma)
217 | positive_samples <- MASS::mvrnorm(n = num_samples_per_class,
218 |                                   mu = c(3, 0),
219 |                                   Sigma = Sigma)
220 | 
221 | 
222 | ## -------------------------------------------------------------------------
223 | inputs <- rbind(negative_samples, positive_samples)
224 | 
225 | 
226 | ## -------------------------------------------------------------------------
227 | targets <- rbind(array(0, dim = c(num_samples_per_class, 1)),
228 |                  array(1, dim = c(num_samples_per_class, 1)))
229 | 
230 | 
231 | ## -------------------------------------------------------------------------
232 | plot(x = inputs[, 1], y = inputs[, 2],
233 |      col = ifelse(targets[,1] == 0, "purple", "green"))
234 | 
235 | 
236 | ## -------------------------------------------------------------------------
237 | input_dim <- 2
238 | output_dim <- 1
239 | W <- tf$Variable(initial_value =
240 |                    tf$random$uniform(shape(input_dim, output_dim)))
241 | b <- tf$Variable(initial_value = tf$zeros(shape(output_dim)))
242 | 
243 | 
244 | ## -------------------------------------------------------------------------
245 | model <- function(inputs)
246 |   tf$matmul(inputs, W) + b
247 | 
248 | 
249 | ## -------------------------------------------------------------------------
250 | square_loss <- function(targets, predictions) {
251 |   per_sample_losses <- (targets - predictions)^2
252 |   mean(per_sample_losses)
253 | }
254 | 
255 | 
256 | ## -------------------------------------------------------------------------
257 | square_loss <- function(targets, predictions) {
258 |   per_sample_losses <- tf$square(targets - predictions)
259 |   tf$reduce_mean(per_sample_losses)
260 | }
261 | 
262 | 
263 | ## -------------------------------------------------------------------------
264 | learning_rate <- 0.1
265 | 
266 | training_step <- function(inputs, targets) {
267 |   with(tf$GradientTape() %as% tape, {
268 |     predictions <- model(inputs)
269 |     loss <- square_loss(predictions, targets)
270 |   })
271 |   grad_loss_wrt <- tape$gradient(loss, list(W = W, b = b))
272 |   W$assign_sub(grad_loss_wrt$W * learning_rate)
273 |   b$assign_sub(grad_loss_wrt$b * learning_rate)
274 |   loss
275 | }
276 | 
277 | 
278 | ## -------------------------------------------------------------------------
279 | inputs <- as_tensor(inputs, dtype = "float32")
280 | for (step in seq(40)) {
281 |   loss <- training_step(inputs, targets)
282 |   cat(sprintf("Loss at step %s: %.4f\n", step, loss))
283 | }
284 | 
285 | 
286 | ## -------------------------------------------------------------------------
287 | predictions <- model(inputs)
288 | 
289 | inputs <- as.array(inputs)
290 | predictions <- as.array(predictions)
291 | plot(inputs[, 1], inputs[, 2],
292 |      col = ifelse(predictions[, 1] <= 0.5, "purple", "green"))
293 | 
294 | 
295 | ## -------------------------------------------------------------------------
296 | plot(x = inputs[, 1], y = inputs[, 2],
297 |      col = ifelse(predictions[, 1] <= 0.5, "purple", "green"))
298 | 
299 | slope <- -W[1, ] / W[2, ]
300 | intercept <- (0.5 - b) / W[2, ]
301 | abline(as.array(intercept), as.array(slope), col = "red")
302 | 
303 | 
304 | ## -------------------------------------------------------------------------
305 | layer_simple_dense <- new_layer_class(
306 |   classname = "SimpleDense",
307 | 
308 |   initialize = function(units, activation = NULL) {
309 |     super$initialize()
310 |     self$units <- as.integer(units)
311 |     self$activation <- activation
312 |   },
313 | 
314 |   build = function(input_shape) {
315 |     input_dim <- input_shape[length(input_shape)]
316 |     self$W <- self$add_weight(shape = c(input_dim, self$units),
317 |                               initializer = "random_normal")
318 |     self$b <- self$add_weight(shape = c(self$units),
319 |                               initializer = "zeros")
320 |   },
321 | 
322 |   call = function(inputs) {
323 |     y <- tf$matmul(inputs, self$W) + self$b
324 |     if (!is.null(self$activation))
325 |       y <- self$activation(y)
326 |     y
327 |   }
328 | )
329 | 
330 | 
331 | ## -------------------------------------------------------------------------
332 | my_dense <- layer_simple_dense(units = 32, activation = tf$nn$relu)
333 | input_tensor <- as_tensor(1, shape = c(2, 784))
334 | output_tensor <- my_dense(input_tensor)
335 | output_tensor$shape
336 | 
337 | 
338 | ## -------------------------------------------------------------------------
339 | layer <- layer_dense(units = 32, activation = "relu")
340 | 
341 | 
342 | ## -------------------------------------------------------------------------
343 | model <- keras_model_sequential(list(
344 |     layer_dense(units = 32, activation="relu"),
345 |     layer_dense(units = 32)
346 | ))
347 | 
348 | 
349 | ## ---- eval = FALSE--------------------------------------------------------
350 | ## model <- model_naive_sequential(list(
351 | ##   layer_naive_dense(input_size = 784, output_size = 32,
352 | ##                     activation = "relu"),
353 | ##   layer_naive_dense(input_size = 32, output_size = 64,
354 | ##                     activation = "relu"),
355 | ##   layer_naive_dense(input_size = 64, output_size = 32,
356 | ##                     activation = "relu"),
357 | ##   layer_naive_dense(input_size = 32, output_size = 10,
358 | ##                     activation = "softmax")
359 | ## ))
360 | 
361 | 
362 | ## -------------------------------------------------------------------------
363 | layer <- function(inputs) {
364 |   if(!self$built) {
365 |     self$build(inputs$shape)
366 |     self$built <- TRUE
367 |   }
368 |   self$call(inputs)
369 | }
370 | 
371 | 
372 | ## -------------------------------------------------------------------------
373 | model <- keras_model_sequential(list(
374 |   layer_simple_dense(units = 32, activation = "relu"),
375 |   layer_simple_dense(units = 64, activation = "relu"),
376 |   layer_simple_dense(units = 32, activation = "relu"),
377 |   layer_simple_dense(units = 10, activation = "softmax")
378 | ))
379 | 
380 | 
381 | ## -------------------------------------------------------------------------
382 | model <- keras_model_sequential()
383 | layer_simple_dense(model, 32, activation = "relu")
384 | layer_simple_dense(model, 64, activation = "relu")
385 | layer_simple_dense(model, 32, activation = "relu")
386 | layer_simple_dense(model, 10, activation = "softmax")
387 | 
388 | 
389 | ## -------------------------------------------------------------------------
390 | length(model$layers)
391 | 
392 | 
393 | ## -------------------------------------------------------------------------
394 | model <- keras_model_sequential() %>%
395 |   layer_simple_dense(32, activation = "relu") %>%
396 |   layer_simple_dense(64, activation = "relu") %>%
397 |   layer_simple_dense(32, activation = "relu") %>%
398 |   layer_simple_dense(10, activation = "softmax")
399 | 
400 | 
401 | ## -------------------------------------------------------------------------
402 | model <- keras_model_sequential() %>% layer_dense(1)
403 | model %>% compile(optimizer = "rmsprop",
404 |                   loss = "mean_squared_error",
405 |                   metrics = "accuracy")
406 | 
407 | 
408 | ## -------------------------------------------------------------------------
409 | model %>% compile(
410 |   optimizer = optimizer_rmsprop(),
411 |   loss = loss_mean_squared_error(),
412 |   metrics = metric_binary_accuracy()
413 | )
414 | 
415 | 
416 | ## ---- eval = FALSE--------------------------------------------------------
417 | ## model %>% compile(
418 | ##   optimizer = optimizer_rmsprop(learning_rate = 1e-4),
419 | ##   loss = my_custom_loss,
420 | ##   metrics = c(my_custom_metric_1, my_custom_metric_2)
421 | ## )
422 | 
423 | 
424 | ## -------------------------------------------------------------------------
425 | ls(pattern = "^optimizer_", "package:keras")
426 | 
427 | 
428 | ## -------------------------------------------------------------------------
429 | ls(pattern = "^loss_", "package:keras")
430 | 
431 | 
432 | ## -------------------------------------------------------------------------
433 | ls(pattern = "^metric_", "package:keras")
434 | 
435 | 
436 | ## -------------------------------------------------------------------------
437 | history <- model %>%
438 |   fit(inputs, targets,
439 |       epochs = 5, batch_size = 128)
440 | 
441 | 
442 | ## -------------------------------------------------------------------------
443 | str(history$metrics)
444 | 
445 | 
446 | ## -------------------------------------------------------------------------
447 | model <- keras_model_sequential() %>%
448 |   layer_dense(1)
449 | 
450 | model %>% compile(optimizer_rmsprop(learning_rate = 0.1),
451 |                   loss = loss_mean_squared_error(),
452 |                   metrics = metric_binary_accuracy())
453 | 
454 | n_cases <- dim(inputs)[1]
455 | num_validation_samples <- round(0.3 * n_cases)
456 | val_indices <- sample.int(n_cases, num_validation_samples)
457 | 
458 | val_inputs <- inputs[val_indices, ]
459 | val_targets <- targets[val_indices, , drop = FALSE]
460 | training_inputs <- inputs[-val_indices, ]
461 | training_targets <- targets[-val_indices, , drop = FALSE]
462 | 
463 | model %>% fit(
464 |   training_inputs,
465 |   training_targets,
466 |   epochs = 5,
467 |   batch_size = 16,
468 |   validation_data = list(val_inputs, val_targets)
469 | )
470 | 
471 | 
472 | ## -------------------------------------------------------------------------
473 | loss_and_metrics <- evaluate(model, val_inputs, val_targets,
474 |                              batch_size = 128)
475 | 
476 | 
477 | ## ---- eval = FALSE--------------------------------------------------------
478 | ## predictions <- model(new_inputs)
479 | 
480 | 
481 | ## ---- eval = FALSE--------------------------------------------------------
482 | ## predictions <- model %>%
483 | ##   predict(new_inputs, batch_size=128)
484 | 
485 | 
486 | ## -------------------------------------------------------------------------
487 | predictions <- model %>%
488 |   predict(val_inputs, batch_size=128)
489 | head(predictions, 10)
490 | 


--------------------------------------------------------------------------------
/2e/ch08.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include = FALSE-----------------------------------------------
  2 | library(keras3)
  3 | tensorflow::tf_function(function(x) x + 1)(1)
  4 | 
  5 | 
  6 | ## -------------------------------------------------------------------------
  7 | inputs <- layer_input(shape = c(28, 28, 1))
  8 | 
  9 | outputs <- inputs %>%
 10 |   layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>%
 11 |   layer_max_pooling_2d(pool_size = 2) %>%
 12 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>%
 13 |   layer_max_pooling_2d(pool_size = 2) %>%
 14 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>%
 15 |   layer_flatten() %>%
 16 |   layer_dense(10, activation = "softmax")
 17 | 
 18 | model <- keras_model(inputs, outputs)
 19 | 
 20 | 
 21 | ## -------------------------------------------------------------------------
 22 | model
 23 | 
 24 | 
 25 | ## -------------------------------------------------------------------------
 26 | c(c(train_images, train_labels), c(test_images, test_labels)) %<-%
 27 |   dataset_mnist()
 28 | train_images <- array_reshape(train_images, c(60000, 28, 28, 1)) / 255
 29 | test_images <- array_reshape(test_images, c(10000, 28, 28, 1)) / 255
 30 | 
 31 | model %>% compile(optimizer = "rmsprop",
 32 |                   loss = "sparse_categorical_crossentropy",
 33 |                   metrics = c("accuracy"))
 34 | model %>% fit(train_images, train_labels, epochs = 5, batch_size = 64)
 35 | 
 36 | 
 37 | ## -------------------------------------------------------------------------
 38 | result <- evaluate(model, test_images, test_labels)
 39 | cat("Test accuracy:", result$accuracy, "\n")
 40 | 
 41 | 
 42 | ## -------------------------------------------------------------------------
 43 | inputs <- layer_input(shape = c(28, 28, 1))
 44 | outputs <- inputs %>%
 45 |   layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>%
 46 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>%
 47 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>%
 48 |   layer_flatten() %>%
 49 |   layer_dense(10, activation = "softmax")
 50 | model_no_max_pool <- keras_model(inputs = inputs, outputs = outputs)
 51 | 
 52 | 
 53 | ## -------------------------------------------------------------------------
 54 | model_no_max_pool
 55 | 
 56 | 
 57 | ## ---- eval = FALSE--------------------------------------------------------
 58 | ## library(fs)
 59 | ## dir_create("~/.kaggle")
 60 | ## file_move("~/Downloads/kaggle.json", "~/.kaggle/")
 61 | ## file_chmod("~/.kaggle/kaggle.json", "0600")
 62 | 
 63 | 
 64 | ## ---- eval = FALSE--------------------------------------------------------
 65 | ## reticulate::py_install("kaggle", pip = TRUE)
 66 | 
 67 | 
 68 | ## ---- eval = FALSE--------------------------------------------------------
 69 | ## system('kaggle competitions download -c dogs-vs-cats')
 70 | 
 71 | 
 72 | ## ---- include = FALSE-----------------------------------------------------
 73 | # just to make the following chunks reproducible
 74 | unlink("dogs-vs-cats", recursive = TRUE)
 75 | # note bene, we use the {zip} package instead of base::unzip
 76 | # because the latter raises an error on this file.
 77 | 
 78 | 
 79 | ## -------------------------------------------------------------------------
 80 | zip::unzip('dogs-vs-cats.zip', exdir = "dogs-vs-cats", files = "train.zip")
 81 | zip::unzip("dogs-vs-cats/train.zip", exdir = "dogs-vs-cats")
 82 | 
 83 | 
 84 | ## ---- include = FALSE-----------------------------------------------------
 85 | unlink("cats_vs_dogs_small", recursive = TRUE)
 86 | library(fs)
 87 | 
 88 | 
 89 | ## -------------------------------------------------------------------------
 90 | library(fs)
 91 | original_dir <- path("dogs-vs-cats/train")
 92 | new_base_dir <- path("cats_vs_dogs_small")
 93 | 
 94 | make_subset <- function(subset_name, start_index, end_index) {
 95 |   for (category in c("dog", "cat")) {
 96 |     file_name <- glue::glue("{category}.{ start_index:end_index }.jpg")
 97 |     dir_create(new_base_dir / subset_name / category)
 98 |     file_copy(original_dir / file_name,
 99 |               new_base_dir / subset_name / category / file_name)
100 |   }
101 | }
102 | 
103 | make_subset("train", start_index = 1, end_index = 1000)
104 | make_subset("validation", start_index = 1001, end_index = 1500)
105 | make_subset("test", start_index = 1501, end_index = 2500)
106 | 
107 | 
108 | ## -------------------------------------------------------------------------
109 | inputs <- layer_input(shape = c(180, 180, 3))
110 | outputs <- inputs %>%
111 |   layer_rescaling(1 / 255) %>%
112 |   layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>%
113 |   layer_max_pooling_2d(pool_size = 2) %>%
114 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>%
115 |   layer_max_pooling_2d(pool_size = 2) %>%
116 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>%
117 |   layer_max_pooling_2d(pool_size = 2) %>%
118 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>%
119 |   layer_max_pooling_2d(pool_size = 2) %>%
120 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>%
121 |   layer_flatten() %>%
122 |   layer_dense(1, activation = "sigmoid")
123 | model <- keras_model(inputs, outputs)
124 | 
125 | 
126 | ## -------------------------------------------------------------------------
127 | model
128 | 
129 | 
130 | ## -------------------------------------------------------------------------
131 | model %>% compile(loss = "binary_crossentropy",
132 |                   optimizer = "rmsprop",
133 |                   metrics = "accuracy")
134 | 
135 | 
136 | ## -------------------------------------------------------------------------
137 | train_dataset <-
138 |   image_dataset_from_directory(new_base_dir / "train",
139 |                                image_size = c(180, 180),
140 |                                batch_size = 32)
141 | validation_dataset <-
142 |   image_dataset_from_directory(new_base_dir / "validation",
143 |                                image_size = c(180, 180),
144 |                                batch_size = 32)
145 | test_dataset <-
146 |   image_dataset_from_directory(new_base_dir / "test",
147 |                                image_size = c(180, 180),
148 |                                batch_size = 32)
149 | 
150 | 
151 | ## -------------------------------------------------------------------------
152 | library(tfdatasets, exclude = "shape")
153 | example_array <- array(seq(100*6), c(100, 6))
154 | head(example_array)
155 | dataset <- tensor_slices_dataset(example_array)
156 | 
157 | 
158 | ## -------------------------------------------------------------------------
159 | dataset_iterator <- as_iterator(dataset)
160 | for(i in 1:3) {
161 |   element <- iter_next(dataset_iterator)
162 |   print(element)
163 | }
164 | 
165 | 
166 | ## -------------------------------------------------------------------------
167 | dataset_array_iterator <- as_array_iterator(dataset)
168 | for(i in 1:3) {
169 |   element <- iter_next(dataset_array_iterator)
170 |   str(element)
171 | }
172 | 
173 | 
174 | ## -------------------------------------------------------------------------
175 | batched_dataset <- dataset %>%
176 |   dataset_batch(3)
177 | batched_dataset_iterator <- as_iterator(batched_dataset)
178 | for(i in 1:3) {
179 |   element <- iter_next(batched_dataset_iterator)
180 |   print(element)
181 | }
182 | 
183 | 
184 | ## -------------------------------------------------------------------------
185 | reshaped_dataset <- dataset %>%
186 |   dataset_map(function(element) tf$reshape(element, shape(2, 3)))
187 | 
188 | reshaped_dataset_iterator <- as_iterator(reshaped_dataset)
189 | for(i in 1:3) {
190 |   element <- iter_next(reshaped_dataset_iterator)
191 |   print(element)
192 | }
193 | 
194 | 
195 | ## -------------------------------------------------------------------------
196 | c(data_batch, labels_batch) %<-% iter_next(as_iterator(train_dataset))
197 | data_batch$shape
198 | labels_batch$shape
199 | 
200 | 
201 | ## -------------------------------------------------------------------------
202 | callbacks <- list(
203 |   callback_model_checkpoint(
204 |     filepath = "convnet_from_scratch.keras",
205 |     save_best_only = TRUE,
206 |     monitor = "val_loss"
207 |   )
208 | )
209 | 
210 | history <- model %>%
211 |   fit(
212 |     train_dataset,
213 |     epochs = 30,
214 |     validation_data = validation_dataset,
215 |     callbacks = callbacks
216 |   )
217 | 
218 | 
219 | ## -------------------------------------------------------------------------
220 | plot(history)
221 | 
222 | 
223 | ## -------------------------------------------------------------------------
224 | test_model <- load_model("convnet_from_scratch.keras")
225 | result <- evaluate(test_model, test_dataset)
226 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
227 | 
228 | 
229 | ## -------------------------------------------------------------------------
230 | data_augmentation <- keras_model_sequential() %>%
231 |   layer_random_flip("horizontal") %>%
232 |   layer_random_rotation(0.1) %>%
233 |   layer_random_zoom(0.2)
234 | 
235 | 
236 | ## -------------------------------------------------------------------------
237 | 
238 | 
239 | 
240 | ## -------------------------------------------------------------------------
241 | library(tfdatasets, exclude = "shape")
242 | batch <- train_dataset %>%
243 |   as_iterator() %>%
244 |   iter_next()
245 | 
246 | c(images, labels) %<-% batch
247 | 
248 | par(mfrow = c(3, 3), mar = rep(.5, 4))
249 | 
250 | image <- images[1, , , ]
251 | plot(as.raster(as.array(image), max = 255))
252 | 
253 | # plot augmented images
254 | for (i in 2:9) {
255 |   augmented_images <- data_augmentation(images)
256 |   augmented_image <- augmented_images[1, , , ]
257 |   plot(as.raster(as.array(augmented_image), max = 255))
258 | }
259 | 
260 | 
261 | ## -------------------------------------------------------------------------
262 | inputs <- layer_input(shape = c(180, 180, 3))
263 | outputs <- inputs %>%
264 |   data_augmentation() %>%
265 |   layer_rescaling(1 / 255) %>%
266 |   layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") %>%
267 |   layer_max_pooling_2d(pool_size = 2) %>%
268 |   layer_conv_2d(filters = 64, kernel_size = 3, activation = "relu") %>%
269 |   layer_max_pooling_2d(pool_size = 2) %>%
270 |   layer_conv_2d(filters = 128, kernel_size = 3, activation = "relu") %>%
271 |   layer_max_pooling_2d(pool_size = 2) %>%
272 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>%
273 |   layer_max_pooling_2d(pool_size = 2) %>%
274 |   layer_conv_2d(filters = 256, kernel_size = 3, activation = "relu") %>%
275 |   layer_flatten() %>%
276 |   layer_dropout(0.5) %>%
277 |   layer_dense(1, activation = "sigmoid")
278 | 
279 | model <- keras_model(inputs, outputs)
280 | 
281 | model %>% compile(loss = "binary_crossentropy",
282 |                   optimizer = "rmsprop",
283 |                   metrics = "accuracy")
284 | 
285 | 
286 | ## -------------------------------------------------------------------------
287 | callbacks <- list(
288 |   callback_model_checkpoint(
289 |     filepath = "convnet_from_scratch_with_augmentation.keras",
290 |     save_best_only = TRUE,
291 |     monitor = "val_loss"
292 |   )
293 | )
294 | 
295 | history <- model %>% fit(
296 |   train_dataset,
297 |   epochs = 100,
298 |   validation_data = validation_dataset,
299 |   callbacks = callbacks
300 | )
301 | 
302 | 
303 | ## -------------------------------------------------------------------------
304 | plot(history)
305 | 
306 | 
307 | ## -------------------------------------------------------------------------
308 | test_model <- load_model("convnet_from_scratch_with_augmentation.keras")
309 | result <- evaluate(test_model, test_dataset)
310 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
311 | 
312 | 
313 | ## -------------------------------------------------------------------------
314 | conv_base <- application_vgg16(
315 |   weights = "imagenet",
316 |   include_top = FALSE,
317 |   input_shape = c(180, 180, 3)
318 | )
319 | 
320 | 
321 | ## -------------------------------------------------------------------------
322 | conv_base
323 | 
324 | 
325 | ## -------------------------------------------------------------------------
326 | get_features_and_labels <- function(dataset) {
327 |   n_batches <- length(dataset)
328 |   all_features <- vector("list", n_batches)
329 |   all_labels <- vector("list", n_batches)
330 |   iterator <- as_array_iterator(dataset)
331 |   for (i in 1:n_batches) {
332 |     c(images, labels) %<-% iter_next(iterator)
333 |     preprocessed_images <- imagenet_preprocess_input(images)
334 |     features <- conv_base %>% predict(preprocessed_images)
335 | 
336 |     all_labels[[i]] <- labels
337 |     all_features[[i]] <- features
338 |   }
339 | 
340 |   all_features <- listarrays::bind_on_rows(all_features)
341 |   all_labels <- listarrays::bind_on_rows(all_labels)
342 | 
343 |   list(all_features, all_labels)
344 | }
345 | 
346 | c(train_features, train_labels) %<-% get_features_and_labels(train_dataset)
347 | c(val_features, val_labels) %<-% get_features_and_labels(validation_dataset)
348 | c(test_features, test_labels) %<-% get_features_and_labels(test_dataset)
349 | 
350 | 
351 | ## -------------------------------------------------------------------------
352 | dim(train_features)
353 | 
354 | 
355 | ## -------------------------------------------------------------------------
356 | inputs <- layer_input(shape = c(5, 5, 512))
357 | outputs <- inputs %>%
358 |   layer_flatten() %>%
359 |   layer_dense(256) %>%
360 |   layer_dropout(.5) %>%
361 |   layer_dense(1, activation = "sigmoid")
362 | 
363 | model <- keras_model(inputs, outputs)
364 | 
365 | model %>% compile(loss = "binary_crossentropy",
366 |                   optimizer = "rmsprop",
367 |                   metrics = "accuracy")
368 | 
369 | callbacks <- list(
370 |   callback_model_checkpoint(
371 |     filepath = "feature_extraction.keras",
372 |     save_best_only = TRUE,
373 |     monitor = "val_loss"
374 |   )
375 | )
376 | 
377 | history <- model %>% fit(
378 |   train_features, train_labels,
379 |   epochs = 20,
380 |   validation_data = list(val_features, val_labels),
381 |   callbacks = callbacks
382 | )
383 | 
384 | 
385 | ## -------------------------------------------------------------------------
386 | plot(history)
387 | 
388 | 
389 | ## -------------------------------------------------------------------------
390 | conv_base <- application_vgg16(
391 |     weights = "imagenet",
392 |     include_top = FALSE)
393 | freeze_weights(conv_base)
394 | 
395 | 
396 | ## -------------------------------------------------------------------------
397 | unfreeze_weights(conv_base)
398 | cat("This is the number of trainable weights",
399 |     "before freezing the conv base:",
400 |     length(conv_base$trainable_weights), "\n")
401 | 
402 | 
403 | ## -------------------------------------------------------------------------
404 | freeze_weights(conv_base)
405 | cat("This is the number of trainable weights",
406 |     "after freezing the conv base:",
407 |     length(conv_base$trainable_weights), "\n")
408 | 
409 | 
410 | ## -------------------------------------------------------------------------
411 | data_augmentation <- keras_model_sequential() %>%
412 |   layer_random_flip("horizontal") %>%
413 |   layer_random_rotation(0.1) %>%
414 |   layer_random_zoom(0.2)
415 | 
416 | inputs <- layer_input(shape = c(180, 180, 3))
417 | outputs <- inputs %>%
418 |   data_augmentation() %>%
419 |   imagenet_preprocess_input() %>%
420 |   conv_base() %>%
421 |   layer_flatten() %>%
422 |   layer_dense(256) %>%
423 |   layer_dropout(0.5) %>%
424 |   layer_dense(1, activation = "sigmoid")
425 | model <- keras_model(inputs, outputs)
426 | model %>% compile(loss = "binary_crossentropy",
427 |                   optimizer = "rmsprop",
428 |                   metrics = "accuracy")
429 | 
430 | 
431 | ## -------------------------------------------------------------------------
432 | callbacks <- list(
433 |   callback_model_checkpoint(
434 |     filepath = "feature_extraction_with_data_augmentation.keras",
435 |     save_best_only = TRUE,
436 |     monitor = "val_loss"
437 |   )
438 | )
439 | 
440 | history <- model %>% fit(
441 |   train_dataset,
442 |   epochs = 50,
443 |   validation_data = validation_dataset,
444 |   callbacks = callbacks
445 | )
446 | 
447 | 
448 | ## -------------------------------------------------------------------------
449 | test_model <- load_model(
450 |   "feature_extraction_with_data_augmentation.keras")
451 | result <- evaluate(test_model, test_dataset)
452 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
453 | 
454 | 
455 | ## -------------------------------------------------------------------------
456 | conv_base
457 | 
458 | 
459 | ## -------------------------------------------------------------------------
460 | unfreeze_weights(conv_base, from = -4)
461 | conv_base
462 | 
463 | 
464 | ## -------------------------------------------------------------------------
465 | model %>% compile(
466 |   loss = "binary_crossentropy",
467 |   optimizer = optimizer_rmsprop(learning_rate = 1e-5),
468 |   metrics = "accuracy"
469 | )
470 | 
471 | callbacks <- list(
472 |   callback_model_checkpoint(
473 |     filepath = "fine_tuning.keras",
474 |     save_best_only = TRUE,
475 |     monitor = "val_loss"
476 |   )
477 | )
478 | 
479 | history <- model %>% fit(
480 |   train_dataset,
481 |   epochs = 30,
482 |   validation_data = validation_dataset,
483 |   callbacks = callbacks
484 | )
485 | 
486 | 
487 | ## -------------------------------------------------------------------------
488 | model <- load_model("fine_tuning.keras")
489 | result <-  evaluate(model, test_dataset)
490 | cat(sprintf("Test accuracy: %.3f\n", result$accuracy))
491 | 


--------------------------------------------------------------------------------
/chapter17_image-generation.R:
--------------------------------------------------------------------------------
  1 | library(keras3)
  2 | reticulate::py_require("keras-hub==0.18.1")
  3 | 
  4 | 
  5 | latent_dim <- 2                                                                 # <1>
  6 | 
  7 | encoder_inputs <- keras_input(shape = c(28, 28, 1))
  8 | x <- encoder_inputs |>
  9 |   layer_conv_2d(32, 3, activation = "relu", strides = 2, padding = "same") |>
 10 |   layer_conv_2d(64, 3, activation = "relu", strides = 2, padding = "same") |>
 11 |   layer_flatten() |>
 12 |   layer_dense(16, activation = "relu")
 13 | z_mean    <- x |> layer_dense(latent_dim, name="z_mean")                        # <2>
 14 | z_log_var <- x |> layer_dense(latent_dim, name="z_log_var")                     # <2>
 15 | encoder <- keras_model(encoder_inputs, list(z_mean, z_log_var),
 16 |                        name="encoder")
 17 | 
 18 | 
 19 | encoder
 20 | 
 21 | 
 22 | layer_sampler <- new_layer_class(
 23 |   classname = "Sampler",
 24 |   initialize = function(...) {
 25 |     super$initialize(...)
 26 |     self$seed_generator <- random_seed_generator()                              # <1>
 27 |     self$built <- TRUE
 28 |   },
 29 |   call = function(z_mean, z_log_var) {
 30 |     .[batch_size, z_size] <- op_shape(z_mean)
 31 |     epsilon <- random_normal(shape = op_shape(z_mean),                          # <2>
 32 |                              seed = self$seed_generator)                        # <2>
 33 |     z_mean + (op_exp(0.5 * z_log_var) * epsilon)                                # <3>
 34 |   }
 35 | )
 36 | 
 37 | 
 38 | latent_inputs <- keras_input(shape = c(latent_dim))                             # <1>
 39 | decoder_outputs <- latent_inputs |>
 40 |   layer_dense(7 * 7 * 64, activation = "relu") |>                               # <2>
 41 |   layer_reshape(c(7, 7, 64)) |>                                                 # <3>
 42 |   layer_conv_2d_transpose(64, 3, activation = "relu",                           # <4>
 43 |                           strides = 2, padding = "same") |>                     # <4>
 44 |   layer_conv_2d_transpose(32, 3, activation = "relu",                           # <4>
 45 |                           strides = 2, padding = "same") |>                     # <4>
 46 |   layer_conv_2d(1, 3, activation = "sigmoid", padding = "same")                 # <5>
 47 | decoder <- keras_model(latent_inputs, decoder_outputs,
 48 |                        name = "decoder")
 49 | 
 50 | 
 51 | decoder
 52 | 
 53 | 
 54 | model_vae <- new_model_class(
 55 |   classname = "VAE",
 56 | 
 57 |   initialize = function(encoder, decoder, ...) {
 58 |     super$initialize(...)
 59 |     self$encoder <- encoder
 60 |     self$decoder <- decoder
 61 |     self$sampler <- layer_sampler()
 62 |     self$reconstruction_loss_tracker <-                                         # <1>
 63 |       metric_mean(name = "reconstruction_loss")                                 # <1>
 64 |     self$kl_loss_tracker <- metric_mean(name = "kl_loss")                       # <1>
 65 |   },
 66 | 
 67 |   call = function(inputs) {
 68 |     self$encoder(inputs)
 69 |   },
 70 | 
 71 |   compute_loss = function(x, y, y_pred, sample_weight = NULL, training = TRUE) {
 72 |     original <- x                                                               # <2>
 73 |     .[z_mean, z_log_var] <- y_pred                                              # <3>
 74 | 
 75 | 
 76 |     z <- self$sampler(z_mean, z_log_var)
 77 |     reconstruction <- self$decoder(z)                                           # <4>
 78 | 
 79 |     reconstruction_loss <-                                                      # <5>
 80 |       loss_binary_crossentropy(original, reconstruction) |>                     # <5>
 81 |       op_sum(axis = c(2, 3)) |>                                                 # <5>
 82 |       op_mean()                                                                 # <5>
 83 | 
 84 |     kl_loss <- -0.5 * (                                                         # <6>
 85 |       1 + z_log_var - op_square(z_mean) - op_exp(z_log_var)                     # <6>
 86 |     )                                                                           # <6>
 87 |     total_loss <- reconstruction_loss + op_mean(kl_loss)                        # <6>
 88 | 
 89 |     self$reconstruction_loss_tracker$update_state(reconstruction_loss)          # <7>
 90 |     self$kl_loss_tracker$update_state(kl_loss)                                  # <7>
 91 | 
 92 |     total_loss
 93 |   }
 94 | )
 95 | 
 96 | 
 97 | .[.[x_train, ..], .[x_test, ..]] <- dataset_mnist()
 98 | mnist_digits <- vctrs::vec_c(x_train, x_test)                                   # <1>
 99 | mnist_digits <- mnist_digits / 255
100 | dim(mnist_digits) <- c(dim(mnist_digits), 1)
101 | 
102 | str(mnist_digits)
103 | 
104 | vae <- model_vae(encoder, decoder)
105 | vae |> compile(optimizer = optimizer_adam())                                    # <2>
106 | vae |> fit(mnist_digits, epochs = 30, batch_size = 128)                         # <3>
107 | 
108 | 
109 | n <- 30                                                                         # <1>
110 | digit_size <- 28
111 | 
112 | z <- seq(-1, 1, length.out = n)                                                 # <2>
113 | z_grid <- as.matrix(expand.grid(z, z))                                          # <2>
114 | 
115 | decoded <- predict(vae$decoder, z_grid, verbose = 0)                            # <3>
116 | 
117 | z_grid_i <- expand.grid(x = seq_len(n), y = seq_len(n))                         # <4>
118 | figure <- array(0, c(digit_size * n, digit_size * n))                           # <4>
119 | 
120 | for (i in 1:nrow(z_grid_i)) {
121 |   .[xi, yi] <- z_grid_i[i, ]
122 |   digit <- decoded[i, , , ]
123 |   figure[seq(to = (n + 1 - xi) * digit_size, length.out = digit_size),
124 |          seq(to = yi * digit_size, length.out = digit_size)] <-
125 |     digit
126 | }
127 | 
128 | par(pty = "s")                                                                  # <8>
129 | lim <- extendrange(r = c(-1, 1), f = 1 - (n / (n+.5)))                          # <5>
130 | plot(NULL, frame.plot = FALSE,
131 |      ylim = lim, xlim = lim,
132 |      xlab = ~z[1], ylab = ~z[2])                                                # <6>
133 | rasterImage(as.raster(1 - figure, max = 1),                                     # <7>
134 |             lim[1], lim[1], lim[2], lim[2],
135 |             interpolate = FALSE)
136 | 
137 | 
138 | fpath <- "flowers"
139 | 
140 | 
141 | library(tfdatasets, exclude = "shape")
142 | batch_size <- 32
143 | image_size <- 128
144 | images_dir <- fs::path("flowers", "jpg")
145 | dataset <- image_dataset_from_directory(
146 |   images_dir,
147 |   labels = NULL,                                                                # <1>
148 |   image_size = c(image_size, image_size),
149 |   crop_to_aspect_ratio = TRUE                                                   # <2>
150 | )
151 | dataset <- dataset |> dataset_rebatch(
152 |   batch_size,
153 |   drop_remainder = TRUE                                                         # <3>
154 | )
155 | 
156 | 
157 | img <- dataset |> as_array_iterator() |> iter_next() |> _[1, , ,]
158 | par(mar = c(0, 0, 0, 0))
159 | plot(as.raster(img, max = 255))
160 | 
161 | 
162 | residual_block <- function(x, width) {                                          # <1>
163 |   .[.., n_features] <- op_shape(x)
164 | 
165 |   if (n_features == width) {
166 |     residual <- x
167 |   } else {
168 |     residual <- x |> layer_conv_2d(filters = width, kernel_size = 1)
169 |   }
170 | 
171 |   x <- x |>
172 |     layer_batch_normalization(center = FALSE, scale = FALSE) |>
173 |     layer_conv_2d(width, kernel_size = 3, padding = "same",
174 |                   activation = "swish") |>
175 |     layer_conv_2d(width, kernel_size = 3, padding = "same")
176 | 
177 |   x + residual
178 | }
179 | 
180 | 
181 | get_model <- function(image_size, widths, block_depth) {
182 |   noisy_images <- keras_input(shape = c(image_size, image_size, 3))
183 |   noise_rates <- keras_input(shape = c(1, 1, 1))
184 | 
185 |   x <- noisy_images |> layer_conv_2d(filters = widths[1], kernel_size = 1)
186 |   n <- noise_rates |> layer_upsampling_2d(size = image_size, interpolation = "nearest")
187 |   x <- layer_concatenate(c(x, n))
188 | 
189 |   skips <- list()
190 | 
191 |   for (width in head(widths, -1)) {                                             # <2>
192 |     for (i in seq_len(block_depth)) {                                           # <2>
193 |       x <- x |> residual_block(width)                                           # <2>
194 |       skips <- c(skips, x)
195 |     }
196 |     x <- x |> layer_average_pooling_2d(pool_size = 2)
197 |   }
198 | 
199 |   for (i in seq_len(block_depth)) {                                             # <3>
200 |     x <- x |> residual_block(tail(widths, 1))                                   # <3>
201 |   }
202 | 
203 |   for (width in rev(head(widths, -1))) {                                        # <4>
204 |     x <- x |> layer_upsampling_2d(size = 2, interpolation = "bilinear")         # <4>
205 |     for (i in seq_len(block_depth)) {                                           # <4>
206 |       x <- x |> layer_concatenate(tail(skips, 1)[[1]])                          # <4>
207 |       skips <- head(skips, -1)
208 |       x <- x |> residual_block(width)
209 |     }
210 |   }
211 | 
212 |   pred_noise_masks <- x |> layer_conv_2d(                                       # <5>
213 |     filters = 3, kernel_size = 1, kernel_initializer = "zeros"                  # <5>
214 |   )                                                                             # <5>
215 | 
216 |   model <- keras_model(inputs = list(noisy_images, noise_rates),                # <6>
217 |                        outputs = pred_noise_masks)                              # <6>
218 |   model
219 | }
220 | 
221 | 
222 | diffusion_schedule <- function(diffusion_times, min_signal_rate = 0.02,
223 |                                max_signal_rate = 0.95) {
224 |   start_angle <- op_arccos(max_signal_rate) |> op_cast(dtype = "float32")
225 |   end_angle <- op_arccos(min_signal_rate) |> op_cast(dtype = "float32")
226 | 
227 |   diffusion_angles <- start_angle + diffusion_times * (end_angle - start_angle)
228 |   signal_rates <- op_cos(diffusion_angles)
229 |   noise_rates <- op_sin(diffusion_angles)
230 | 
231 |   list(noise_rates = noise_rates, signal_rates = signal_rates)
232 | }
233 | 
234 | 
235 | diffusion_times <- op_arange(0, 1, 0.01)                                        # <1>
236 | schedule <- diffusion_schedule(diffusion_times)                                 # <1>
237 | 
238 | diffusion_times <- as.array(diffusion_times)                                    # <2>
239 | noise_rates <- as.array(schedule$noise_rates)                                   # <2>
240 | signal_rates <- as.array(schedule$signal_rates)                                 # <2>
241 | 
242 | plot(NULL, type = "n", main = "Diffusion Schedule",                             # <3>
243 |      ylab = "Rate",  ylim = c(0, 1),
244 |      xlab = "Diffusion time", xlim = c(0, 1))
245 | lines(diffusion_times, noise_rates, col = "blue", lty = 1)                      # <4>
246 | lines(diffusion_times, signal_rates, col = "red", lty = 2)                      # <4>
247 | 
248 | legend("bottomleft",                                                            # <5>
249 |        legend = c("Noise rate", "Signal rate"),                                 # <5>
250 |        col = c("blue", "red"), lty = c(1, 2))
251 | 
252 | 
253 | new_diffusion_model <- new_model_class(
254 |   classname = "DiffusionModel",
255 | 
256 |   initialize = function(image_size, widths, block_depth, ...) {
257 |     super$initialize(...)
258 |     self$image_size <- shape(image_size)
259 |     self$denoising_model <- get_model(image_size, widths, block_depth)
260 |     self$seed_generator <- random_seed_generator()
261 |     self$loss <- loss_mean_absolute_error()                                     # <1>
262 |     self$normalizer <- layer_normalization()                                    # <2>
263 |   },
264 | 
265 |   denoise = function(noisy_images, noise_rates, signal_rates) {
266 |     pred_noise_masks <- self$denoising_model(list(noisy_images, noise_rates))   # <3>
267 |     pred_images <-
268 |       (noisy_images - noise_rates * pred_noise_masks) /
269 |       signal_rates                                                              # <4>
270 |     list(pred_images = pred_images, pred_noise_masks = pred_noise_masks)
271 |   },
272 | 
273 |   call = function(images) {
274 |     images <- self$normalizer(images)
275 |     .[batch_size, ..] <- op_shape(images)
276 | 
277 |     noise_masks <- random_normal(                                               # <5>
278 |       shape = c(batch_size, self$image_size, self$image_size, 3),
279 |       seed = self$seed_generator
280 |     )
281 | 
282 |     diffusion_times <- random_uniform(                                          # <6>
283 |       shape = c(batch_size, 1, 1, 1),
284 |       minval = 0.0, maxval = 1.0,
285 |       seed = self$seed_generator
286 |     )
287 | 
288 |     .[noise_rates, signal_rates] <- diffusion_schedule(diffusion_times)
289 |     noisy_images <- signal_rates * images + noise_rates * noise_masks           # <7>
290 | 
291 |     .[pred_images, pred_noise_masks] <-
292 |       self$denoise(noisy_images, noise_rates, signal_rates)                     # <8>
293 | 
294 |     list(pred_images, pred_noise_masks, noise_masks)
295 |   },
296 | 
297 |   compute_loss = function(x, y, y_pred, sample_weight = NULL, training = TRUE) {
298 |     .[.., pred_noise_masks, noise_masks] <- y_pred
299 |     self$loss(noise_masks, pred_noise_masks)
300 |   },
301 | 
302 |   generate = function(num_images, diffusion_steps) {
303 |     noisy_images <- random_normal(                                              # <9>
304 |       shape = c(num_images, self$image_size, self$image_size, 3),
305 |       seed = self$seed_generator
306 |     )
307 | 
308 |     diffusion_times <- seq(1, 0, length.out = diffusion_steps)
309 | 
310 |     for (i in seq_len(diffusion_steps - 1)) {
311 |       diffusion_time <- diffusion_times[i]
312 |       next_diffusion_time <- diffusion_times[i + 1]
313 | 
314 |       .[noise_rates, signal_rates] <- diffusion_time |>
315 |         op_broadcast_to(c(num_images, 1, 1, 1)) |>
316 |         diffusion_schedule()                                                    # <10>
317 | 
318 |       .[pred_images, pred_noises] <-
319 |         self$denoise(noisy_images, noise_rates, signal_rates)                   # <11>
320 | 
321 |       .[next_noise_rates, next_signal_rates] <- next_diffusion_time |>
322 |         op_broadcast_to(c(num_images, 1, 1, 1)) |>
323 |         diffusion_schedule()                                                    # <12>
324 | 
325 |       noisy_images <-                                                           # <13>
326 |         (next_signal_rates * pred_images) +
327 |         (next_noise_rates * pred_noises)
328 |     }
329 | 
330 |     images <-                                                                   # <14>
331 |       self$normalizer$mean + pred_images *
332 |       op_sqrt(self$normalizer$variance)
333 | 
334 |     op_clip(images, 0, 255)                                                     # <15>
335 |   }
336 | )
337 | 
338 | 
339 | callback_visualization <- new_callback_class(
340 |   classname = "VisualizationCallback",
341 |   initialize = function(diffusion_steps = 20, num_rows = 3, num_cols = 6) {
342 |     self$diffusion_steps <- diffusion_steps
343 |     self$num_rows <- num_rows
344 |     self$num_cols <- num_cols
345 |   },
346 | 
347 |   on_epoch_end = function(epoch = NULL, logs = NULL) {
348 |     generated_images <- self$model$generate(
349 |       num_images = self$num_rows * self$num_cols,
350 |       diffusion_steps = self$diffusion_steps
351 |     ) |> as.array()
352 | 
353 |     par(mfrow = c(self$num_rows, self$num_cols),
354 |         mar = c(0, 0, 0, 0))
355 | 
356 |     for (i in seq_len(self$num_rows * self$num_cols)) {
357 |       plot(as.raster(generated_images[i, , , ], max = 255))
358 |     }
359 |   }
360 | )
361 | 
362 | 
363 | model <- new_diffusion_model(image_size, widths = c(32, 64, 96, 128),
364 |                              block_depth = 2)
365 | model$normalizer$adapt(dataset)                                                 # <1>
366 | 
367 | 
368 | model |> compile(
369 |   optimizer = optimizer_adam_w(                                                 # <1>
370 |     learning_rate = learning_rate_schedule_inverse_time_decay(                  # <1>
371 |       initial_learning_rate = 1e-3,                                             # <1>
372 |       decay_steps = 1000,                                                       # <1>
373 |       decay_rate = 0.1                                                          # <1>
374 |     ),                                                                          # <1>
375 |     use_ema = TRUE,                                                             # <2>
376 |     ema_overwrite_frequency = 100                                               # <3>
377 |   )
378 | )
379 | 
380 | 
381 | library(keras3)
382 | reticulate::py_require("keras-hub==0.18.1")
383 | keras_hub <- reticulate::import("keras_hub")
384 | 
385 | model <- keras_hub$models$TextToImage$from_preset(
386 |   "stable_diffusion_3_medium",
387 |   image_shape = shape(512, 512, 3),
388 |   dtype = "float16"
389 | )
390 | 
391 | image <- model$generate(
392 |   "photograph of an astronaut riding a horse, detailed, 8k",
393 |   guidance_scale = 1
394 | )
395 | 
396 | 
397 | par(mar = c(0, 0, 0, 0))
398 | plot(as.raster(image, max = max(image)))
399 | 
400 | 
401 | prompts <- c(
402 |   "A photograph of a cat wearing a top hat, photorealistic",
403 |   "A neon sci-fi skyline at night, illustration"
404 | )
405 | 
406 | images <- model$generate(prompts, num_steps = 25L, guidance_scale = 7.5)
407 | 
408 | for(i in seq_len(nrow(images))) {
409 |   image <- images[i, , , ]
410 |   plot(as.raster(image, max = 255L))
411 |   image_array_save(image, sprintf("generated_image_%i.png", i), scale = FALSE)
412 | }
413 | 
414 | 
415 | 
416 | 


--------------------------------------------------------------------------------