├── LICENSE
├── README.md
├── recsys-binary-implicit-keras.R
├── tensorflow-wide-n-deep.R
└── triplet-loss-keras
    ├── data-movielens.R
    ├── metric-auc.R
    ├── plot-model.R
    ├── triplet-loss-bpr-keras.R
    └── triplet-loss-margin-keras.R


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018, Nan Xiao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deep-learning-recipes
 2 | 
 3 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)
 4 | ![License: MIT](https://img.shields.io/github/license/nanxstats/deep-learning-recipes.svg)
 5 | 
 6 | R implementation for selected machine learning methods with deep learning frameworks (Keras, Tensorflow).
 7 | 
 8 | Principles: clean, self-contained, minimal dependency, works with the latest framework versions.
 9 | 
10 | ## Contents
11 | 
12 | - Triplet losses for implicit feedback recommender systems. [[blog post](https://nanx.me/blog/post/triplet-loss-r-keras/)] [[code](triplet-loss-keras)]
13 | - Matrix factorization for binary implicit feedback data. [[blog post](https://nanx.me/blog/post/recsys-binary-implicit-feedback-r-keras/)] [[code](recsys-binary-implicit-keras.R)]
14 | - "Wide and deep" model for regression/classification. [[blog post](https://nanx.me/blog/post/building-my-first-deep-learning-machine/)] [[code](tensorflow-wide-n-deep.R)]
15 | 


--------------------------------------------------------------------------------
/recsys-binary-implicit-keras.R:
--------------------------------------------------------------------------------
  1 | library("keras")
  2 | library("magrittr")
  3 | library("progress")
  4 | library("hrbrthemes")
  5 | library("ggsci")
  6 | 
  7 | use_virtualenv("~/tensorflow/venv/")
  8 | 
  9 | # 1. preprocess data -----------------------------------------------------------
 10 | 
 11 | # see full dataset at https://github.com/nanxstats/MEF/
 12 | url_drug <- "https://raw.githubusercontent.com/nanxstats/MEF/master/data/drug.txt"
 13 | url_adr <- "https://raw.githubusercontent.com/nanxstats/MEF/master/data/adr.txt"
 14 | url_pair <- "https://raw.githubusercontent.com/nanxstats/MEF/master/data/association.txt"
 15 | 
 16 | df_drug <- read.table(url_drug, sep = "\t", as.is = TRUE)
 17 | df_adr <- read.table(url_adr, sep = "\t", as.is = TRUE)
 18 | df_pair <- read.table(url_pair, sep = "\t", as.is = TRUE)
 19 | 
 20 | names(df_drug) <- c("drug_id", "drugbank_id", "kegg_id", "name", "smiles")
 21 | names(df_adr) <- c("adr_id", "name")
 22 | names(df_pair) <- c("drug_id", "adr_id")
 23 | 
 24 | # clean up drug id
 25 | clean_drug_id <- function(x) gsub("-", "", as.character(x))
 26 | df_drug$drug_id <- clean_drug_id(df_drug$drug_id)
 27 | df_pair$drug_id <- clean_drug_id(df_pair$drug_id)
 28 | 
 29 | # map drug id and adr id to integer numbers (for the input)
 30 | for (i in 1L:nrow(df_pair)) {
 31 |   df_pair$"drug_id"[i] <- which(df_pair$"drug_id"[i] == df_drug$"drug_id") - 1L
 32 |   df_pair$"adr_id"[i] <- which(df_pair$"adr_id"[i] == df_adr$"adr_id") - 1L
 33 | }
 34 | 
 35 | # complete the unobserved rows as class 0
 36 | pair <- expand.grid(
 37 |   "drug_id" = unique(df_pair$drug_id),
 38 |   "adr_id" = unique(df_pair$adr_id),
 39 |   stringsAsFactors = FALSE
 40 | )
 41 | pair$"class" <- 0L
 42 | 
 43 | # set known drug-ADR association pairs as class 1
 44 | pb <- progress_bar$new(
 45 |   format = "[:bar] :percent eta: :eta", total = nrow(df_pair)
 46 | )
 47 | for (i in 1L:nrow(df_pair)) {
 48 |   pb$tick()
 49 |   pair[intersect(
 50 |     which(df_pair$drug_id[i] == pair$drug_id),
 51 |     which(df_pair$adr[i] == pair$adr_id)
 52 |   ), "class"] <- 1L
 53 | }
 54 | 
 55 | # convert to integer
 56 | pair$drug_id <- as.integer(pair$drug_id)
 57 | pair$adr_id <- as.integer(pair$adr_id)
 58 | 
 59 | # shuffle rows
 60 | set.seed(42)
 61 | pair <- pair[sample(1L:nrow(pair)), ]
 62 | 
 63 | # 2. matrix factorization with Keras -------------------------------------------
 64 | 
 65 | # basic settings
 66 | n_drug <- length(unique(df_pair$drug_id))
 67 | n_adr <- length(unique(df_pair$adr_id))
 68 | k <- 10 # number of latent factors to learn
 69 | 
 70 | # input layers
 71 | input_drug <- layer_input(shape = c(1))
 72 | input_adr <- layer_input(shape = c(1))
 73 | 
 74 | # embedding and flatten layers
 75 | embed_drug <- input_drug %>%
 76 |   layer_embedding(input_dim = n_drug, output_dim = k, input_length = 1) %>%
 77 |   layer_flatten()
 78 | embed_adr <- input_adr %>%
 79 |   layer_embedding(input_dim = n_adr, output_dim = k, input_length = 1) %>%
 80 |   layer_flatten()
 81 | 
 82 | # dot product and output layer (can be replaced by arbitrary DNN architecture)
 83 | pred <- layer_dot(list(embed_drug, embed_adr), axes = -1) %>%
 84 |   layer_dense(units = 1, activation = "sigmoid")
 85 | 
 86 | # define model inputs/outputs
 87 | model <- keras_model(inputs = c(input_drug, input_adr), outputs = pred)
 88 | model %>% compile(
 89 |   loss = "binary_crossentropy",
 90 |   metric = "binary_accuracy",
 91 |   optimizer = optimizer_rmsprop() # the most stable one here
 92 | )
 93 | 
 94 | # inspect model
 95 | summary(model)
 96 | 
 97 | # train the model
 98 | history <- model %>% fit(
 99 |   x = list(
100 |     matrix(pair$drug_id, ncol = 1),
101 |     matrix(pair$adr_id, ncol = 1)
102 |   ),
103 |   y = matrix(pair$class, ncol = 1),
104 |   class_weight = list("1" = 50.0, "0" = 1.0), # deal with unbalanced classes
105 |   epochs = 20,
106 |   batch_size = 2000, # needs some tuning
107 |   validation_split = 0.2
108 | )
109 | 
110 | # plot training history
111 | plot(history) +
112 |   theme_ipsum() +
113 |   scale_color_startrek() +
114 |   scale_fill_startrek()
115 | 


--------------------------------------------------------------------------------
/tensorflow-wide-n-deep.R:
--------------------------------------------------------------------------------
 1 | library("msaenet")
 2 | library("tensorflow")
 3 | library("tfestimators")
 4 | 
 5 | # load tensorflow-gpu in virtualenv folder
 6 | use_virtualenv("~/tensorflow/venv/")
 7 | 
 8 | # generate synthetic data for binary classification
 9 | # 1 million observations x 100 features (20 useful)
10 | n <- 1e+6L
11 | p <- 1e+2L
12 | sim <- msaenet.sim.binomial(n = n, p = p, snr = 1, coef = rep(1, 20), p.train = 0.5)
13 | 
14 | # create input data: features
15 | df_tr <- as.data.frame(sim$x.tr)
16 | df_te <- as.data.frame(sim$x.te)
17 | 
18 | # set feature type so tensorflow recognizes them
19 | feat <- vector("list", ncol(df_tr))
20 | for (i in 1L:length(feat)) feat[[i]] <- column_numeric(paste0("V", i))
21 | 
22 | wide_columns <- feature_columns(feat)
23 | deep_columns <- feature_columns(feat)
24 | 
25 | # define the "wide and deep" model
26 | model <- dnn_linear_combined_classifier(
27 |   linear_feature_columns = wide_columns,
28 |   linear_optimizer = "Ftrl",
29 |   dnn_feature_columns = deep_columns,
30 |   dnn_optimizer = "Adam",
31 |   dnn_hidden_units = c(50, 20, 10),
32 |   dnn_dropout = 0.5
33 | )
34 | 
35 | # add response to the input data
36 | df_tr$y <- sim$y.tr
37 | df_te$y <- sim$y.te
38 | 
39 | # input data constructor function
40 | constructed_input_fn <- function(dataset)
41 |   input_fn(dataset, features = -y, response = y)
42 | 
43 | train_input_fn <- constructed_input_fn(df_tr)
44 | eval_input_fn <- constructed_input_fn(df_te)
45 | 
46 | # train the model
47 | train(model, input_fn = train_input_fn, steps = 1e+4)
48 | # 2018-08-19 20:50:13.846538: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1484] Adding visible gpu devices: 0
49 | # 2018-08-19 20:50:13.846567: I tensorflow/core/common_runtime/gpu/gpu_device.cc:965] Device interconnect StreamExecutor with strength 1 edge matrix:
50 | # 2018-08-19 20:50:13.846572: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971]      0
51 | # 2018-08-19 20:50:13.846575: I tensorflow/core/common_runtime/gpu/gpu_device.cc:984] 0:   N
52 | # 2018-08-19 20:50:13.846661: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1097] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9492 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1)
53 | # Training 3907/10000 [============>....................] - ETA:  2m - loss: 12.08
54 | # Training completed after 3907 steps but 10000 steps was specified
55 | 
56 | # evaluate model on the test set
57 | metrics <- evaluate(model, input_fn = eval_input_fn, steps = 1e+4)
58 | str(metrics)
59 | # Classes ‘tbl_df’, ‘tbl’ and 'data.frame':	1 obs. of  11 variables:
60 | # $ accuracy            : num 0.745
61 | # $ accuracy_baseline   : num 0.501
62 | # $ auc                 : num 0.828
63 | # $ auc_precision_recall: num 0.828
64 | # $ average_loss        : num 0.507
65 | # $ label/mean          : num 0.501
66 | # $ loss                : num 64.9
67 | # $ precision           : num 0.744
68 | # $ prediction/mean     : num 0.502
69 | # $ recall              : num 0.748
70 | # $ global_step         : num 3907
71 | 


--------------------------------------------------------------------------------
/triplet-loss-keras/data-movielens.R:
--------------------------------------------------------------------------------
 1 | read_movielens <- function() {
 2 |   url_uabase <- "http://files.grouplens.org/datasets/movielens/ml-100k/ua.base"
 3 |   url_uatest <- "http://files.grouplens.org/datasets/movielens/ml-100k/ua.test"
 4 |   train <- read.table(url_uabase)
 5 |   test <- read.table(url_uatest)
 6 |   names(train) <- names(test) <- c("user_id", "item_id", "rating", "timestamp")
 7 |   train$"timestamp" <- test$"timestamp" <- NULL
 8 |   list("train" = train, "test" = test)
 9 | }
10 | 
11 | binarize_ratings <- function(df) {
12 |   idx_pos <- df$rating >= 4L
13 |   idx_neg <- df$rating < 4L
14 |   df$rating[idx_pos] <- 1L
15 |   df$rating[idx_neg] <- 0L
16 |   df
17 | }
18 | 
19 | get_movielens_data <- function() {
20 |   movielens <- read_movielens()
21 |   train <- movielens$train
22 |   test <- movielens$test
23 | 
24 |   # binarize the 1 to 5 star ratings
25 |   train <- binarize_ratings(train)
26 |   test <- binarize_ratings(test)
27 | 
28 |   list("train" = train, "test" = test)
29 | }
30 | 
31 | # generate triplets
32 | # (random non-positive item as the negative item)
33 | get_triplets <- function(df) {
34 |   # first, simply extract all positive pairs
35 |   idx_pos <- which(df$rating == 1L)
36 |   user_id <- df$user_id[idx_pos]
37 |   item_id_pos <- df$item_id[idx_pos]
38 | 
39 |   df_ret <- data.frame(user_id, item_id_pos)
40 |   df_ret$item_id_neg <- NA
41 | 
42 |   # sample from the non-positive items for each unique user id
43 |   for (i in unique(df_ret$user_id)) {
44 |     idx_user <- which(df_ret$user_id == i)
45 |     df_user <- df_ret[idx_user, ]
46 |     item_id_np <- setdiff(unique(df$item_id), df_user$item_id_pos)
47 |     df_ret$item_id_neg[idx_user] <- sample(item_id_np, size = length(idx_user))
48 |   }
49 |   df_ret
50 | }
51 | 


--------------------------------------------------------------------------------
/triplet-loss-keras/metric-auc.R:
--------------------------------------------------------------------------------
 1 | library("pROC")
 2 | 
 3 | # predict outcome probability
 4 | predict_prob <- function(model, user_id, item_id) {
 5 |   user_matrix <- model %>% get_layer("embed_user") %>% get_weights() %>% extract2(1L)
 6 |   user_vector <- user_matrix[user_id + 1L, , drop = FALSE]
 7 |   item_matrix <- model %>% get_layer("embed_item") %>% get_weights() %>% extract2(1L)
 8 |   item_vectors <- item_matrix[item_id + 1L, , drop = FALSE]
 9 | 
10 |   sigmoid <- function(x) 1 / (1 + exp(-x))
11 |   scores <- user_vector %*% t(item_vectors) %>% sigmoid() %>% as.vector()
12 |   scores
13 | }
14 | 
15 | # compute an "average AUC" for predictions
16 | auc_avg <- function(model, df) {
17 | 
18 |   # all user ids in the set to evaluate on
19 |   user_id <- unique(df$user_id)
20 |   n_users <- length(user_id)
21 | 
22 |   # all possible items in the test set
23 |   n_items <- max(df$item_id)
24 |   item_id <- 1L:n_items
25 | 
26 |   # for each user, we make predictions on all possible items,
27 |   # compute the auc, until all auc values for all # of users are computed.
28 |   # then average all the auc values.
29 |   scores <- rep(NA, n_users)
30 | 
31 |   for (i in 1L:n_users) {
32 |     prob <- predict_prob(model, user_id[i], item_id)
33 |     label <- rep(0L, n_items)
34 |     # fill in positive labels
35 |     item_id_pos <- df$item_id[which(df$user_id == user_id[i] & df$rating == 1L)]
36 |     if (length(item_id_pos) >= 1L) {
37 |       label[item_id_pos] <- 1L
38 |       # the argument `direction` is super important here...
39 |       # since unlike sklearn.metrics.roc_auc_score,
40 |       # pROC will always report the auc > 0.5 when direction = "auto".
41 |       # so the mean value of all these aucs will be significantly > 0.5.
42 |       scores[i] <- as.numeric(auc(label, prob, direction = "<"))
43 |     } else {
44 |       # all zeros exception handling: AUC requires the label to have two levels
45 |       scores[i] <- NA
46 |     }
47 |   }
48 | 
49 |   mean(scores, na.rm = TRUE)
50 | }
51 | 


--------------------------------------------------------------------------------
/triplet-loss-keras/plot-model.R:
--------------------------------------------------------------------------------
 1 | library("reticulate")
 2 | 
 3 | # write to png (low-res)
 4 | k <- import("keras")
 5 | 
 6 | plot_model <- k$utils$plot_model
 7 | plot_model(model, to_file = "triplet-loss-model-keras.png", show_shapes = TRUE, show_layer_names = TRUE)
 8 | 
 9 | # write to pdf
10 | pydot <- import("pydot")
11 | 
12 | model_to_dot <- k$utils$vis_utils$model_to_dot
13 | g <- model_to_dot(model)
14 | g$write_pdf("triplet-loss-model-keras.pdf")
15 | 


--------------------------------------------------------------------------------
/triplet-loss-keras/triplet-loss-bpr-keras.R:
--------------------------------------------------------------------------------
  1 | # Matrix factorization with BPR triplet loss and Keras -------------------------
  2 | 
  3 | library("keras")
  4 | library("magrittr")
  5 | library("reshape2")
  6 | library("ggplot2")
  7 | library("gridExtra")
  8 | library("hrbrthemes")
  9 | library("ggsci")
 10 | 
 11 | use_virtualenv("~/tensorflow/venv/")
 12 | 
 13 | # 1. Define BPR triplet loss ---------------------------------------------------
 14 | 
 15 | # identity loss: workaround Keras loss definition to use custom triplet loss
 16 | # there is no true label: we just want to minimize the BPR triplet loss
 17 | # to learn the embeddings
 18 | loss_identity <- function(y_true, y_pred) k_mean(y_pred - 0 * y_true)
 19 | 
 20 | # BPR triplet loss
 21 | loss_bpr_triplet <- function(x) {
 22 |   embed_user <- x[[1]]
 23 |   embed_item_positive <- x[[2]]
 24 |   embed_item_negative <- x[[3]]
 25 | 
 26 |   loss <- 1.0 - k_sigmoid(
 27 |     k_sum(embed_user * embed_item_positive, axis = -1, keepdims = TRUE) -
 28 |       k_sum(embed_user * embed_item_negative, axis = -1, keepdims = TRUE)
 29 |   )
 30 | 
 31 |   loss
 32 | }
 33 | 
 34 | # build and compile the model with BPR triplet loss
 35 | build_model <- function(n_user, n_item, n_factor) {
 36 | 
 37 |   # input layer for users
 38 |   input_user <- layer_input(shape = c(1), name = "input_user")
 39 | 
 40 |   # input layers for items (positive and negative)
 41 |   input_item_positive <- layer_input(shape = c(1), name = "input_item_positive")
 42 |   input_item_negative <- layer_input(shape = c(1), name = "input_item_negative")
 43 | 
 44 |   # embedding layer for users
 45 |   embed_user <- input_user %>%
 46 |     layer_embedding(
 47 |       input_dim = n_user, output_dim = n_factor,
 48 |       input_length = 1, name = "embed_user"
 49 |     ) %>%
 50 |     layer_flatten()
 51 | 
 52 |   # embedding layer shared by positive and negative items
 53 |   layer_embed_item <- layer_embedding(
 54 |     input_dim = n_item, output_dim = n_factor,
 55 |     input_length = 1, name = "embed_item"
 56 |   )
 57 | 
 58 |   embed_item_positive <- input_item_positive %>%
 59 |     layer_embed_item() %>%
 60 |     layer_flatten()
 61 |   embed_item_negative <- input_item_negative %>%
 62 |     layer_embed_item() %>%
 63 |     layer_flatten()
 64 | 
 65 |   # BPR triplet loss is the output
 66 |   loss <- list(embed_user, embed_item_positive, embed_item_negative) %>%
 67 |     layer_lambda(loss_bpr_triplet, output_shape = c(1))
 68 | 
 69 |   # define model inputs/outputs
 70 |   model <- keras_model(
 71 |     inputs = c(input_user, input_item_positive, input_item_negative),
 72 |     outputs = loss
 73 |   )
 74 | 
 75 |   # compile model
 76 |   model %>% compile(loss = loss_identity, optimizer = optimizer_nadam())
 77 | 
 78 |   model
 79 | }
 80 | 
 81 | # 2. Prepare the data ----------------------------------------------------------
 82 | 
 83 | # set model parameters
 84 | 
 85 | k <- 100 # number of latent factors to learn
 86 | n_epochs <- 20 # number of epochs
 87 | 
 88 | # read data
 89 | source("data-movielens.R")
 90 | movielens <- get_movielens_data()
 91 | 
 92 | # prepare training and test data
 93 | data_train <- movielens$train
 94 | data_test <- movielens$test
 95 | 
 96 | # model constants: remember to include all ids from train and test
 97 | n_user <- length(unique(c(data_train$user_id, data_test$user_id))) + 1L
 98 | n_item <- length(unique(c(data_train$item_id, data_test$item_id))) + 1L
 99 | 
100 | # prepare the test triplets
101 | triplets_test <- get_triplets(data_test)
102 | 
103 | # 3. Train the model -----------------------------------------------------------
104 | 
105 | # build the model
106 | model <- build_model(n_user, n_item, k)
107 | 
108 | # inspect the model
109 | summary(model)
110 | 
111 | # sanity check: untrained model's auc should be around 0.5
112 | source("metric-auc.R")
113 | auc_avg(model, data_test)
114 | 
115 | # training loop
116 | train_loss <- test_loss <- train_auc <- test_auc <- rep(NA, n_epochs)
117 | 
118 | for (epoch in 1L:n_epochs) {
119 |   cat("Epoch", epoch, "\n")
120 | 
121 |   # sample triplets from the training data
122 |   triplets_train <- get_triplets(data_train)
123 | 
124 |   history_train <- model %>%
125 |     fit(
126 |       x = list(
127 |         "input_user" = matrix(triplets_train$user_id, ncol = 1),
128 |         "input_item_positive" = matrix(triplets_train$item_id_pos, ncol = 1),
129 |         "input_item_negative" = matrix(triplets_train$item_id_neg, ncol = 1)
130 |       ),
131 |       y = matrix(1, nrow = length(triplets_train$user_id), ncol = 1),
132 |       batch_size = 64, epochs = 1, verbose = 1, shuffle = TRUE
133 |     )
134 | 
135 |   train_loss[epoch] <- history_train$metrics$loss
136 |   train_auc[epoch] <- auc_avg(model, data_train)
137 |   cat("AUC train:", train_auc[epoch], "\n")
138 | 
139 |   history_test <- model %>%
140 |     evaluate(
141 |       x = list(
142 |         "input_user" = matrix(triplets_test$user_id, ncol = 1),
143 |         "input_item_positive" = matrix(triplets_test$item_id_pos, ncol = 1),
144 |         "input_item_negative" = matrix(triplets_test$item_id_neg, ncol = 1)
145 |       ),
146 |       y = matrix(1.0, nrow = length(triplets_test$user_id), ncol = 1),
147 |       batch_size = 64, verbose = 0
148 |     )
149 | 
150 |   test_loss[epoch] <- unname(history_test)
151 |   test_auc[epoch] <- auc_avg(model, data_test)
152 |   cat("AUC test:", test_auc[epoch], "\n")
153 | }
154 | 
155 | # 4. Plot loss and user-averaged AUC -------------------------------------------
156 | 
157 | df_loss <-
158 |   data.frame("epoch" = 1L:n_epochs, "train" = train_loss, "test" = test_loss)
159 | df_loss <-
160 |   melt(df_loss, id.vars = "epoch", variable.name = "data", value.name = "loss")
161 | p_loss <- ggplot(df_loss, aes(x = epoch, y = loss)) +
162 |   geom_point(aes(fill = data), shape = 21, colour = "#333333") +
163 |   geom_smooth(aes(colour = data), span = 0.5, se = FALSE, show.legend = FALSE) +
164 |   theme_ipsum() +
165 |   theme(plot.margin = unit(c(1, 1, 0, 1), "cm")) +
166 |   scale_fill_tron() +
167 |   scale_color_tron()
168 | 
169 | df_auc <-
170 |   data.frame("epoch" = 1L:n_epochs, "train" = train_auc, "test" = test_auc)
171 | df_auc <-
172 |   melt(df_auc, id.vars = "epoch", variable.name = "data", value.name = "auc")
173 | p_auc <- ggplot(df_auc, aes(x = epoch, y = auc)) +
174 |   geom_point(aes(fill = data), shape = 21, colour = "#333333") +
175 |   geom_smooth(aes(colour = data), span = 0.5, se = FALSE, show.legend = FALSE) +
176 |   theme_ipsum() +
177 |   theme(plot.margin = unit(c(0, 1, 1, 1), "cm")) +
178 |   scale_fill_tron() +
179 |   scale_color_tron()
180 | 
181 | p <- grid.arrange(p_loss, p_auc, nrow = 2)
182 | 
183 | ggsave("triplet-loss-bpr-movielens.png", p, width = 9, height = 6)
184 | 


--------------------------------------------------------------------------------
/triplet-loss-keras/triplet-loss-margin-keras.R:
--------------------------------------------------------------------------------
  1 | # Matrix factorization with margin-based triplet loss and Keras ----------------
  2 | 
  3 | library("keras")
  4 | library("magrittr")
  5 | library("reshape2")
  6 | library("ggplot2")
  7 | library("gridExtra")
  8 | library("hrbrthemes")
  9 | library("ggsci")
 10 | 
 11 | use_virtualenv("~/tensorflow/venv/")
 12 | 
 13 | # 1. Define margin-based triplet loss ------------------------------------------
 14 | 
 15 | # identity loss: workaround Keras loss definition to use custom triplet loss
 16 | # there is no true label: we just want to minimize the margin-based triplet loss
 17 | # to learn the embeddings
 18 | loss_identity <- function(y_true, y_pred) k_mean(y_pred - 0 * y_true)
 19 | 
 20 | # margin-based triplet loss
 21 | loss_margin_triplet <- function(x) {
 22 |   embed_user <- x[[1]]
 23 |   embed_item_positive <- x[[2]]
 24 |   embed_item_negative <- x[[3]]
 25 | 
 26 |   loss <- k_maximum(
 27 |     0.0,
 28 |     k_sum(embed_user * embed_item_negative, axis = -1, keepdims = TRUE) -
 29 |       k_sum(embed_user * embed_item_positive, axis = -1, keepdims = TRUE) +
 30 |       10.0
 31 |   )
 32 | 
 33 |   loss
 34 | }
 35 | 
 36 | # build and compile the model with margin-based triplet loss
 37 | build_model <- function(n_user, n_item, n_factor) {
 38 | 
 39 |   # input layer for users
 40 |   input_user <- layer_input(shape = c(1), name = "input_user")
 41 | 
 42 |   # input layers for items (positive and negative)
 43 |   input_item_positive <- layer_input(shape = c(1), name = "input_item_positive")
 44 |   input_item_negative <- layer_input(shape = c(1), name = "input_item_negative")
 45 | 
 46 |   # embedding layer for users
 47 |   embed_user <- input_user %>%
 48 |     layer_embedding(
 49 |       input_dim = n_user, output_dim = n_factor,
 50 |       input_length = 1, name = "embed_user"
 51 |     ) %>%
 52 |     layer_flatten()
 53 | 
 54 |   # embedding layer shared by positive and negative items
 55 |   layer_embed_item <- layer_embedding(
 56 |     input_dim = n_item, output_dim = n_factor,
 57 |     input_length = 1, name = "embed_item"
 58 |   )
 59 | 
 60 |   embed_item_positive <- input_item_positive %>%
 61 |     layer_embed_item() %>%
 62 |     layer_flatten()
 63 |   embed_item_negative <- input_item_negative %>%
 64 |     layer_embed_item() %>%
 65 |     layer_flatten()
 66 | 
 67 |   # margin-based triplet loss is the output
 68 |   loss <- list(embed_user, embed_item_positive, embed_item_negative) %>%
 69 |     layer_lambda(loss_margin_triplet, output_shape = c(1))
 70 | 
 71 |   # define model inputs/outputs
 72 |   model <- keras_model(
 73 |     inputs = c(input_user, input_item_positive, input_item_negative),
 74 |     outputs = loss
 75 |   )
 76 | 
 77 |   # compile model
 78 |   model %>% compile(loss = loss_identity, optimizer = optimizer_nadam())
 79 | 
 80 |   model
 81 | }
 82 | 
 83 | # 2. Prepare the data ----------------------------------------------------------
 84 | 
 85 | # set model parameters
 86 | 
 87 | k <- 100 # number of latent factors to learn
 88 | n_epochs <- 20 # number of epochs
 89 | 
 90 | # read data
 91 | source("data-movielens.R")
 92 | movielens <- get_movielens_data()
 93 | 
 94 | # prepare training and test data
 95 | data_train <- movielens$train
 96 | data_test <- movielens$test
 97 | 
 98 | # model constants: remember to include all ids from train and test
 99 | n_user <- length(unique(c(data_train$user_id, data_test$user_id))) + 1L
100 | n_item <- length(unique(c(data_train$item_id, data_test$item_id))) + 1L
101 | 
102 | # prepare the test triplets
103 | triplets_test <- get_triplets(data_test)
104 | 
105 | # 3. Train the model -----------------------------------------------------------
106 | 
107 | # build the model
108 | model <- build_model(n_user, n_item, k)
109 | 
110 | # inspect the model
111 | summary(model)
112 | 
113 | # sanity check: untrained model's auc should be around 0.5
114 | source("metric-auc.R")
115 | auc_avg(model, data_test)
116 | 
117 | # training loop
118 | train_loss <- test_loss <- train_auc <- test_auc <- rep(NA, n_epochs)
119 | 
120 | for (epoch in 1L:n_epochs) {
121 |   cat("Epoch", epoch, "\n")
122 | 
123 |   # sample triplets from the training data
124 |   triplets_train <- get_triplets(data_train)
125 | 
126 |   history_train <- model %>%
127 |     fit(
128 |       x = list(
129 |         "input_user" = matrix(triplets_train$user_id, ncol = 1),
130 |         "input_item_positive" = matrix(triplets_train$item_id_pos, ncol = 1),
131 |         "input_item_negative" = matrix(triplets_train$item_id_neg, ncol = 1)
132 |       ),
133 |       y = matrix(1, nrow = length(triplets_train$user_id), ncol = 1),
134 |       batch_size = 64, epochs = 1, verbose = 1, shuffle = TRUE
135 |     )
136 | 
137 |   train_loss[epoch] <- history_train$metrics$loss
138 |   train_auc[epoch] <- auc_avg(model, data_train)
139 |   cat("AUC train:", train_auc[epoch], "\n")
140 | 
141 |   history_test <- model %>%
142 |     evaluate(
143 |       x = list(
144 |         "input_user" = matrix(triplets_test$user_id, ncol = 1),
145 |         "input_item_positive" = matrix(triplets_test$item_id_pos, ncol = 1),
146 |         "input_item_negative" = matrix(triplets_test$item_id_neg, ncol = 1)
147 |       ),
148 |       y = matrix(1.0, nrow = length(triplets_test$user_id), ncol = 1),
149 |       batch_size = 64, verbose = 0
150 |     )
151 | 
152 |   test_loss[epoch] <- unname(history_test)
153 |   test_auc[epoch] <- auc_avg(model, data_test)
154 |   cat("AUC test:", test_auc[epoch], "\n")
155 | }
156 | 
157 | # 4. Plot loss and user-averaged AUC -------------------------------------------
158 | 
159 | df_loss <-
160 |   data.frame("epoch" = 1L:n_epochs, "train" = train_loss, "test" = test_loss)
161 | df_loss <-
162 |   melt(df_loss, id.vars = "epoch", variable.name = "data", value.name = "loss")
163 | p_loss <- ggplot(df_loss, aes(x = epoch, y = loss)) +
164 |   geom_point(aes(fill = data), shape = 21, colour = "#333333") +
165 |   geom_smooth(aes(colour = data), span = 0.5, se = FALSE, show.legend = FALSE) +
166 |   theme_ipsum() +
167 |   theme(plot.margin = unit(c(1, 1, 0, 1), "cm")) +
168 |   scale_fill_startrek() +
169 |   scale_color_startrek()
170 | 
171 | df_auc <-
172 |   data.frame("epoch" = 1L:n_epochs, "train" = train_auc, "test" = test_auc)
173 | df_auc <-
174 |   melt(df_auc, id.vars = "epoch", variable.name = "data", value.name = "auc")
175 | p_auc <- ggplot(df_auc, aes(x = epoch, y = auc)) +
176 |   geom_point(aes(fill = data), shape = 21, colour = "#333333") +
177 |   geom_smooth(aes(colour = data), span = 0.5, se = FALSE, show.legend = FALSE) +
178 |   theme_ipsum() +
179 |   theme(plot.margin = unit(c(0, 1, 1, 1), "cm")) +
180 |   scale_fill_startrek() +
181 |   scale_color_startrek()
182 | 
183 | p <- grid.arrange(p_loss, p_auc, nrow = 2)
184 | 
185 | ggsave("triplet-loss-margin-movielens.png", p, width = 9, height = 6)
186 | 


--------------------------------------------------------------------------------