├── README.md ├── gru4rec_BP ├── evaluation.py ├── main.py └── model.py └── gru4rec_BPTT ├── model.py ├── test.py ├── train.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # GRU4Rec_TensorFlow 2 | This is the TensorFlow implementation of *GRu4Rec*, which is descibed in ICLR'2016 paper "Session-based Recommendations With Recurrent Neural Networks". See paper: http://arxiv.org/abs/1511.06939. 3 | 4 | * Codes under gru4rec\_BPTT use BPTT to train RNNs. This usually performs better than original gru4rec. 5 | * Codes under gru4rec\_BP use back propagation only to train RNNs, which is the optimization method adopted in original paper. 6 | 7 | # External Link 8 | We develop a general sequence-aware recommendation library at [here](https://github.com/DeepGraphLearning/RecommenderSystems). 9 | 10 | 11 | # Requirements 12 | Python: 2.7 13 | 14 | Pandas < 0.17 15 | 16 | Numpy 1.12.1 or later 17 | 18 | TensorFlow: 0.12.1 19 | 20 | # Usage 21 | Train/Test file should consists of three columns: 22 | 23 | First column: SessionId 24 | Second column: ItemId 25 | Third column: Timestamps 26 | 27 | To train a model with default parameter settings: 28 | 29 | $ python main.py 30 | 31 | Other optional parameters include: 32 | --layer: Number of GRU layers. Default is 1. 33 | --size: Number of hidden units in GRU model. Default is 100. 34 | --epoch: Runing epochs. Default is 3. 35 | --lr : Initial learning rate. Default is 0.001. 36 | --train: Specify whether training(1) or evaluating(0). Default is 1. 37 | --hidden_act: Activation function used in GRU units. Default is tanh. 38 | --final_act: Final activation function. Default is softmax. 39 | --loss: Loss functions, cross-entropy, bpr or top1 loss. Default is cross-entropy. 40 | --dropout: Dropout rate. Default is 0.5. 41 | 42 | To evaluate a trained model: 43 | 44 | $ python main.py --train 0 45 | 46 | One optional parameter is: 47 | --test: Specify which saved model to evaluate(only used when --train is 0). Default is 2. 48 | 49 | # Acknowledgement 50 | This repository refers a lot to the original [Theano implementation](https://github.com/hidasib/GRU4Rec). 51 | -------------------------------------------------------------------------------- /gru4rec_BP/evaluation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Feb 27 2017 4 | Author: Weiping Song 5 | """ 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | def evaluate_sessions_batch(model, train_data, test_data, cut_off=20, batch_size=50, session_key='SessionId', item_key='ItemId', time_key='Time'): 11 | 12 | ''' 13 | Evaluates the GRU4Rec network wrt. recommendation accuracy measured by recall@N and MRR@N. 14 | 15 | Parameters 16 | -------- 17 | model : A trained GRU4Rec model. 18 | train_data : It contains the transactions of the train set. In evaluation phrase, this is used to build item-to-id map. 19 | test_data : It contains the transactions of the test set. It has one column for session IDs, one for item IDs and one for the timestamp of the events (unix timestamps). 20 | cut-off : int 21 | Cut-off value (i.e. the length of the recommendation list; N for recall@N and MRR@N). Defauld value is 20. 22 | batch_size : int 23 | Number of events bundled into a batch during evaluation. Speeds up evaluation. If it is set high, the memory consumption increases. Default value is 100. 24 | session_key : string 25 | Header of the session ID column in the input file (default: 'SessionId') 26 | item_key : string 27 | Header of the item ID column in the input file (default: 'ItemId') 28 | time_key : string 29 | Header of the timestamp column in the input file (default: 'Time') 30 | 31 | Returns 32 | -------- 33 | out : tuple 34 | (Recall@N, MRR@N) 35 | 36 | ''' 37 | model.predict = False 38 | # Build itemidmap from train data. 39 | itemids = train_data[item_key].unique() 40 | itemidmap = pd.Series(data=np.arange(len(itemids)), index=itemids) 41 | 42 | test_data.sort([session_key, time_key], inplace=True) 43 | offset_sessions = np.zeros(test_data[session_key].nunique()+1, dtype=np.int32) 44 | offset_sessions[1:] = test_data.groupby(session_key).size().cumsum() 45 | evalutation_point_count = 0 46 | mrr, recall = 0.0, 0.0 47 | if len(offset_sessions) - 1 < batch_size: 48 | batch_size = len(offset_sessions) - 1 49 | iters = np.arange(batch_size).astype(np.int32) 50 | maxiter = iters.max() 51 | start = offset_sessions[iters] 52 | end = offset_sessions[iters+1] 53 | in_idx = np.zeros(batch_size, dtype=np.int32) 54 | np.random.seed(42) 55 | while True: 56 | valid_mask = iters >= 0 57 | if valid_mask.sum() == 0: 58 | break 59 | start_valid = start[valid_mask] 60 | minlen = (end[valid_mask]-start_valid).min() 61 | in_idx[valid_mask] = test_data[item_key].values[start_valid] 62 | for i in xrange(minlen-1): 63 | out_idx = test_data[item_key].values[start_valid+i+1] 64 | preds = model.predict_next_batch(iters, in_idx, itemidmap, batch_size) 65 | preds.fillna(0, inplace=True) 66 | in_idx[valid_mask] = out_idx 67 | ranks = (preds.values.T[valid_mask].T > np.diag(preds.ix[in_idx].values)[valid_mask]).sum(axis=0) + 1 68 | rank_ok = ranks < cut_off 69 | recall += rank_ok.sum() 70 | mrr += (1.0 / ranks[rank_ok]).sum() 71 | evalutation_point_count += len(ranks) 72 | start = start+minlen-1 73 | mask = np.arange(len(iters))[(valid_mask) & (end-start<=1)] 74 | for idx in mask: 75 | maxiter += 1 76 | if maxiter >= len(offset_sessions)-1: 77 | iters[idx] = -1 78 | else: 79 | iters[idx] = maxiter 80 | start[idx] = offset_sessions[maxiter] 81 | end[idx] = offset_sessions[maxiter+1] 82 | return recall/evalutation_point_count, mrr/evalutation_point_count 83 | -------------------------------------------------------------------------------- /gru4rec_BP/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Feb 26 2017 4 | Author: Weiping Song 5 | """ 6 | import os 7 | import tensorflow as tf 8 | import pandas as pd 9 | import numpy as np 10 | import argparse 11 | 12 | import model 13 | import evaluation 14 | 15 | PATH_TO_TRAIN = '/PATH/TO/rsc15_train_full.txt' 16 | PATH_TO_TEST = '/PATH/TO/rsc15_test.txt' 17 | 18 | class Args(): 19 | is_training = False 20 | layers = 1 21 | rnn_size = 100 22 | n_epochs = 3 23 | batch_size = 50 24 | dropout_p_hidden=1 25 | learning_rate = 0.001 26 | decay = 0.96 27 | decay_steps = 1e4 28 | sigma = 0 29 | init_as_normal = False 30 | reset_after_session = True 31 | session_key = 'SessionId' 32 | item_key = 'ItemId' 33 | time_key = 'Time' 34 | grad_cap = 0 35 | test_model = 2 36 | checkpoint_dir = './checkpoint' 37 | loss = 'cross-entropy' 38 | final_act = 'softmax' 39 | hidden_act = 'tanh' 40 | n_items = -1 41 | 42 | def parseArgs(): 43 | parser = argparse.ArgumentParser(description='GRU4Rec args') 44 | parser.add_argument('--layer', default=1, type=int) 45 | parser.add_argument('--size', default=100, type=int) 46 | parser.add_argument('--epoch', default=3, type=int) 47 | parser.add_argument('--lr', default=0.001, type=float) 48 | parser.add_argument('--train', default=1, type=int) 49 | parser.add_argument('--test', default=2, type=int) 50 | parser.add_argument('--hidden_act', default='tanh', type=str) 51 | parser.add_argument('--final_act', default='softmax', type=str) 52 | parser.add_argument('--loss', default='cross-entropy', type=str) 53 | parser.add_argument('--dropout', default='0.5', type=float) 54 | 55 | return parser.parse_args() 56 | 57 | 58 | if __name__ == '__main__': 59 | command_line = parseArgs() 60 | data = pd.read_csv(PATH_TO_TRAIN, sep='\t', dtype={'ItemId': np.int64}) 61 | valid = pd.read_csv(PATH_TO_TEST, sep='\t', dtype={'ItemId': np.int64}) 62 | args = Args() 63 | args.n_items = len(data['ItemId'].unique()) 64 | args.layers = command_line.layer 65 | args.rnn_size = command_line.size 66 | args.n_epochs = command_line.epoch 67 | args.learning_rate = command_line.lr 68 | args.is_training = command_line.train 69 | args.test_model = command_line.test 70 | args.hidden_act = command_line.hidden_act 71 | args.final_act = command_line.final_act 72 | args.loss = command_line.loss 73 | args.dropout_p_hidden = 1.0 if args.is_training == 0 else command_line.dropout 74 | print(args.dropout_p_hidden) 75 | if not os.path.exists(args.checkpoint_dir): 76 | os.mkdir(args.checkpoint_dir) 77 | gpu_config = tf.ConfigProto() 78 | gpu_config.gpu_options.allow_growth = True 79 | with tf.Session(config=gpu_config) as sess: 80 | gru = model.GRU4Rec(sess, args) 81 | if args.is_training: 82 | gru.fit(data) 83 | else: 84 | res = evaluation.evaluate_sessions_batch(gru, data, valid) 85 | print('Recall@20: {}\tMRR@20: {}'.format(res[0], res[1])) 86 | -------------------------------------------------------------------------------- /gru4rec_BP/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Feb 26, 2017 4 | @author: Weiping Song 5 | """ 6 | import os 7 | import tensorflow as tf 8 | from tensorflow.python.ops import rnn_cell 9 | import pandas as pd 10 | import numpy as np 11 | 12 | class GRU4Rec: 13 | 14 | def __init__(self, sess, args): 15 | self.sess = sess 16 | self.is_training = args.is_training 17 | 18 | self.layers = args.layers 19 | self.rnn_size = args.rnn_size 20 | self.n_epochs = args.n_epochs 21 | self.batch_size = args.batch_size 22 | self.dropout_p_hidden = args.dropout_p_hidden 23 | self.learning_rate = args.learning_rate 24 | self.decay = args.decay 25 | self.decay_steps = args.decay_steps 26 | self.sigma = args.sigma 27 | self.init_as_normal = args.init_as_normal 28 | self.reset_after_session = args.reset_after_session 29 | self.session_key = args.session_key 30 | self.item_key = args.item_key 31 | self.time_key = args.time_key 32 | self.grad_cap = args.grad_cap 33 | self.n_items = args.n_items 34 | if args.hidden_act == 'tanh': 35 | self.hidden_act = self.tanh 36 | elif args.hidden_act == 'relu': 37 | self.hidden_act = self.relu 38 | else: 39 | raise NotImplementedError 40 | 41 | if args.loss == 'cross-entropy': 42 | if args.final_act == 'tanh': 43 | self.final_activation = self.softmaxth 44 | else: 45 | self.final_activation = self.softmax 46 | self.loss_function = self.cross_entropy 47 | elif args.loss == 'bpr': 48 | if args.final_act == 'linear': 49 | self.final_activation = self.linear 50 | elif args.final_act == 'relu': 51 | self.final_activation = self.relu 52 | else: 53 | self.final_activation = self.tanh 54 | self.loss_function = self.bpr 55 | elif args.loss == 'top1': 56 | if args.final_act == 'linear': 57 | self.final_activation = self.linear 58 | elif args.final_act == 'relu': 59 | self.final_activatin = self.relu 60 | else: 61 | self.final_activation = self.tanh 62 | self.loss_function = self.top1 63 | else: 64 | raise NotImplementedError 65 | 66 | self.checkpoint_dir = args.checkpoint_dir 67 | if not os.path.isdir(self.checkpoint_dir): 68 | raise Exception("[!] Checkpoint Dir not found") 69 | 70 | self.build_model() 71 | self.sess.run(tf.global_variables_initializer()) 72 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) 73 | 74 | if self.is_training: 75 | return 76 | 77 | # use self.predict_state to hold hidden states during prediction. 78 | self.predict_state = [np.zeros([self.batch_size, self.rnn_size], dtype=np.float32) for _ in xrange(self.layers)] 79 | ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) 80 | if ckpt and ckpt.model_checkpoint_path: 81 | self.saver.restore(sess, '{}/gru-model-{}'.format(self.checkpoint_dir, args.test_model)) 82 | 83 | ########################ACTIVATION FUNCTIONS######################### 84 | def linear(self, X): 85 | return X 86 | def tanh(self, X): 87 | return tf.nn.tanh(X) 88 | def softmax(self, X): 89 | return tf.nn.softmax(X) 90 | def softmaxth(self, X): 91 | return tf.nn.softmax(tf.tanh(X)) 92 | def relu(self, X): 93 | return tf.nn.relu(X) 94 | def sigmoid(self, X): 95 | return tf.nn.sigmoid(X) 96 | 97 | ############################LOSS FUNCTIONS###################### 98 | def cross_entropy(self, yhat): 99 | return tf.reduce_mean(-tf.log(tf.diag_part(yhat)+1e-24)) 100 | def bpr(self, yhat): 101 | yhatT = tf.transpose(yhat) 102 | return tf.reduce_mean(-tf.log(tf.nn.sigmoid(tf.diag_part(yhat)-yhatT))) 103 | def top1(self, yhat): 104 | yhatT = tf.transpose(yhat) 105 | term1 = tf.reduce_mean(tf.nn.sigmoid(-tf.diag_part(yhat)+yhatT)+tf.nn.sigmoid(yhatT**2), axis=0) 106 | term2 = tf.nn.sigmoid(tf.diag_part(yhat)**2) / self.batch_size 107 | return tf.reduce_mean(term1 - term2) 108 | 109 | def build_model(self): 110 | 111 | self.X = tf.placeholder(tf.int32, [self.batch_size], name='input') 112 | self.Y = tf.placeholder(tf.int32, [self.batch_size], name='output') 113 | self.state = [tf.placeholder(tf.float32, [self.batch_size, self.rnn_size], name='rnn_state') for _ in xrange(self.layers)] 114 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 115 | 116 | with tf.variable_scope('gru_layer'): 117 | sigma = self.sigma if self.sigma != 0 else np.sqrt(6.0 / (self.n_items + self.rnn_size)) 118 | if self.init_as_normal: 119 | initializer = tf.random_normal_initializer(mean=0, stddev=sigma) 120 | else: 121 | initializer = tf.random_uniform_initializer(minval=-sigma, maxval=sigma) 122 | embedding = tf.get_variable('embedding', [self.n_items, self.rnn_size], initializer=initializer) 123 | softmax_W = tf.get_variable('softmax_w', [self.n_items, self.rnn_size], initializer=initializer) 124 | softmax_b = tf.get_variable('softmax_b', [self.n_items], initializer=tf.constant_initializer(0.0)) 125 | 126 | cell = rnn_cell.GRUCell(self.rnn_size, activation=self.hidden_act) 127 | drop_cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.dropout_p_hidden) 128 | stacked_cell = rnn_cell.MultiRNNCell([drop_cell] * self.layers) 129 | 130 | inputs = tf.nn.embedding_lookup(embedding, self.X) 131 | output, state = stacked_cell(inputs, tuple(self.state)) 132 | self.final_state = state 133 | 134 | if self.is_training: 135 | ''' 136 | Use other examples of the minibatch as negative samples. 137 | ''' 138 | sampled_W = tf.nn.embedding_lookup(softmax_W, self.Y) 139 | sampled_b = tf.nn.embedding_lookup(softmax_b, self.Y) 140 | logits = tf.matmul(output, sampled_W, transpose_b=True) + sampled_b 141 | self.yhat = self.final_activation(logits) 142 | self.cost = self.loss_function(self.yhat) 143 | else: 144 | logits = tf.matmul(output, softmax_W, transpose_b=True) + softmax_b 145 | self.yhat = self.final_activation(logits) 146 | 147 | if not self.is_training: 148 | return 149 | 150 | self.lr = tf.maximum(1e-5,tf.train.exponential_decay(self.learning_rate, self.global_step, self.decay_steps, self.decay, staircase=True)) 151 | 152 | ''' 153 | Try different optimizers. 154 | ''' 155 | #optimizer = tf.train.AdagradOptimizer(self.lr) 156 | optimizer = tf.train.AdamOptimizer(self.lr) 157 | #optimizer = tf.train.AdadeltaOptimizer(self.lr) 158 | #optimizer = tf.train.RMSPropOptimizer(self.lr) 159 | 160 | tvars = tf.trainable_variables() 161 | gvs = optimizer.compute_gradients(self.cost, tvars) 162 | if self.grad_cap > 0: 163 | capped_gvs = [(tf.clip_by_norm(grad, self.grad_cap), var) for grad, var in gvs] 164 | else: 165 | capped_gvs = gvs 166 | self.train_op = optimizer.apply_gradients(capped_gvs, global_step=self.global_step) 167 | 168 | def init(self, data): 169 | data.sort([self.session_key, self.time_key], inplace=True) 170 | offset_sessions = np.zeros(data[self.session_key].nunique()+1, dtype=np.int32) 171 | offset_sessions[1:] = data.groupby(self.session_key).size().cumsum() 172 | return offset_sessions 173 | 174 | def fit(self, data): 175 | self.error_during_train = False 176 | itemids = data[self.item_key].unique() 177 | self.n_items = len(itemids) 178 | self.itemidmap = pd.Series(data=np.arange(self.n_items), index=itemids) 179 | data = pd.merge(data, pd.DataFrame({self.item_key:itemids, 'ItemIdx':self.itemidmap[itemids].values}), on=self.item_key, how='inner') 180 | offset_sessions = self.init(data) 181 | print('fitting model...') 182 | for epoch in xrange(self.n_epochs): 183 | epoch_cost = [] 184 | state = [np.zeros([self.batch_size, self.rnn_size], dtype=np.float32) for _ in xrange(self.layers)] 185 | session_idx_arr = np.arange(len(offset_sessions)-1) 186 | iters = np.arange(self.batch_size) 187 | maxiter = iters.max() 188 | start = offset_sessions[session_idx_arr[iters]] 189 | end = offset_sessions[session_idx_arr[iters]+1] 190 | finished = False 191 | while not finished: 192 | minlen = (end-start).min() 193 | out_idx = data.ItemIdx.values[start] 194 | for i in range(minlen-1): 195 | in_idx = out_idx 196 | out_idx = data.ItemIdx.values[start+i+1] 197 | # prepare inputs, targeted outputs and hidden states 198 | fetches = [self.cost, self.final_state, self.global_step, self.lr, self.train_op] 199 | feed_dict = {self.X: in_idx, self.Y: out_idx} 200 | for j in xrange(self.layers): 201 | feed_dict[self.state[j]] = state[j] 202 | 203 | cost, state, step, lr, _ = self.sess.run(fetches, feed_dict) 204 | epoch_cost.append(cost) 205 | if np.isnan(cost): 206 | print(str(epoch) + ':Nan error!') 207 | self.error_during_train = True 208 | return 209 | if step == 1 or step % self.decay_steps == 0: 210 | avgc = np.mean(epoch_cost) 211 | print('Epoch {}\tStep {}\tlr: {:.6f}\tloss: {:.6f}'.format(epoch, step, lr, avgc)) 212 | start = start+minlen-1 213 | mask = np.arange(len(iters))[(end-start)<=1] 214 | for idx in mask: 215 | maxiter += 1 216 | if maxiter >= len(offset_sessions)-1: 217 | finished = True 218 | break 219 | iters[idx] = maxiter 220 | start[idx] = offset_sessions[session_idx_arr[maxiter]] 221 | end[idx] = offset_sessions[session_idx_arr[maxiter]+1] 222 | if len(mask) and self.reset_after_session: 223 | for i in xrange(self.layers): 224 | state[i][mask] = 0 225 | 226 | avgc = np.mean(epoch_cost) 227 | if np.isnan(avgc): 228 | print('Epoch {}: Nan error!'.format(epoch, avgc)) 229 | self.error_during_train = True 230 | return 231 | self.saver.save(self.sess, '{}/gru-model'.format(self.checkpoint_dir), global_step=epoch) 232 | 233 | def predict_next_batch(self, session_ids, input_item_ids, itemidmap, batch=50): 234 | ''' 235 | Gives predicton scores for a selected set of items. Can be used in batch mode to predict for multiple independent events (i.e. events of different sessions) at once and thus speed up evaluation. 236 | 237 | If the session ID at a given coordinate of the session_ids parameter remains the same during subsequent calls of the function, the corresponding hidden state of the network will be kept intact (i.e. that's how one can predict an item to a session). 238 | If it changes, the hidden state of the network is reset to zeros. 239 | 240 | Parameters 241 | -------- 242 | session_ids : 1D array 243 | Contains the session IDs of the events of the batch. Its length must equal to the prediction batch size (batch param). 244 | input_item_ids : 1D array 245 | Contains the item IDs of the events of the batch. Every item ID must be must be in the training data of the network. Its length must equal to the prediction batch size (batch param). 246 | batch : int 247 | Prediction batch size. 248 | 249 | Returns 250 | -------- 251 | out : pandas.DataFrame 252 | Prediction scores for selected items for every event of the batch. 253 | Columns: events of the batch; rows: items. Rows are indexed by the item IDs. 254 | 255 | ''' 256 | if batch != self.batch_size: 257 | raise Exception('Predict batch size({}) must match train batch size({})'.format(batch, self.batch_size)) 258 | if not self.predict: 259 | self.current_session = np.ones(batch) * -1 260 | self.predict = True 261 | 262 | session_change = np.arange(batch)[session_ids != self.current_session] 263 | if len(session_change) > 0: # change internal states with session changes 264 | for i in xrange(self.layers): 265 | self.predict_state[i][session_change] = 0.0 266 | self.current_session=session_ids.copy() 267 | 268 | in_idxs = itemidmap[input_item_ids] 269 | fetches = [self.yhat, self.final_state] 270 | feed_dict = {self.X: in_idxs} 271 | for i in xrange(self.layers): 272 | feed_dict[self.state[i]] = self.predict_state[i] 273 | preds, self.predict_state = self.sess.run(fetches, feed_dict) 274 | preds = np.asarray(preds).T 275 | return pd.DataFrame(data=preds, index=itemidmap.index) 276 | 277 | -------------------------------------------------------------------------------- /gru4rec_BPTT/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Feb 26, 2017 4 | @author: Weiping Song 5 | """ 6 | import sys 7 | import tensorflow as tf 8 | from tensorflow.contrib import rnn 9 | from tensorflow.contrib import seq2seq 10 | from tensorflow.contrib import legacy_seq2seq 11 | import numpy as np 12 | 13 | class GRU4Rec: 14 | def __init__(self, args): 15 | self.args = args 16 | if not args.is_training: 17 | self.args.batch_size = 1 18 | if args.hidden_act == 'tanh': 19 | self.hidden_act = self.tanh 20 | elif args.hidden_act == 'relu': 21 | self.hidden_act = self.relu 22 | else: 23 | raise NotImplementedError 24 | 25 | self.build_model() 26 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=20) 27 | 28 | ########################ACTIVATION FUNCTIONS######################### 29 | def linear(self, X): 30 | return X 31 | def tanh(self, X): 32 | return tf.nn.tanh(X) 33 | def softmax(self, X): 34 | return tf.nn.softmax(X) 35 | def softmaxth(self, X): 36 | return tf.nn.softmax(tf.tanh(X)) 37 | def relu(self, X): 38 | return tf.nn.relu(X) 39 | def sigmoid(self, X): 40 | return tf.nn.sigmoid(X) 41 | 42 | def build_model(self): 43 | # input X and target Y, last state of last batch, lengths of sessions in current batch. 44 | self.X = tf.placeholder(tf.int32, [None, None], name='input') 45 | self.Y = tf.placeholder(tf.int32, [None, None], name='output') 46 | self.sess_len = tf.count_nonzero(self.X, 1) 47 | self.mask = tf.reshape(tf.to_float(tf.not_equal(self.X, 0)), (-1,)) 48 | 49 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 50 | 51 | with tf.variable_scope('LSTM_layer'): 52 | sigma = self.args.sigma if self.args.sigma != 0 else np.sqrt(6.0 / (self.args.n_items + self.args.rnn_size)) 53 | if self.args.init_as_normal: 54 | initializer = tf.random_normal_initializer(mean=0, stddev=sigma) 55 | else: 56 | initializer = tf.random_uniform_initializer(minval=-sigma, maxval=sigma) 57 | embedding = tf.get_variable('embedding', [self.args.n_items + 1, self.args.rnn_size]) 58 | softmax_W = tf.get_variable('softmax_w', [self.args.rnn_size, 1 + self.args.n_items]) 59 | softmax_b = tf.get_variable('softmax_b', [self.args.n_items + 1]) 60 | 61 | cells = [] 62 | for _ in range(self.args.layers): 63 | cell = rnn.BasicLSTMCell(self.args.rnn_size, activation=self.hidden_act) 64 | if self.args.is_training and (self.args.keep_prob < 1.0): 65 | cell = rnn.DropoutWrapper(cell, output_keep_prob=self.args.keep_prob) 66 | cells.append(cell) 67 | self.cell = cell = rnn.MultiRNNCell(cells) 68 | 69 | 70 | zero_state = cell.zero_state(self.args.batch_size, dtype=tf.float32) 71 | inputs = tf.nn.embedding_lookup(embedding, self.X) 72 | outputs, state = tf.nn.dynamic_rnn(cell, inputs, sequence_length=self.sess_len, initial_state=zero_state) 73 | self.final_state = state 74 | output = tf.reshape(outputs, [-1, self.args.rnn_size]) 75 | 76 | if self.args.is_training: 77 | logits = tf.matmul(output, softmax_W) + softmax_b 78 | label = tf.reshape(self.Y, (-1,)) 79 | loss = legacy_seq2seq.sequence_loss_by_example([logits], [self.Y], [tf.ones([tf.shape(logits)[0]])]) 80 | self.nan = tf.reduce_sum(tf.to_float(tf.is_nan(loss))) 81 | mask_loss = loss * self.mask 82 | self.cost = tf.reduce_sum(mask_loss) / tf.reduce_sum(self.mask) 83 | else: 84 | self.prediction = logits = tf.matmul(output, softmax_W) + softmax_b 85 | self.hit_at_k, self.ndcg_at_k, self.num_target = self._metric_at_k() 86 | 87 | if not self.args.is_training: 88 | return 89 | 90 | self.lr = tf.maximum(1e-5,tf.train.exponential_decay(self.args.learning_rate, self.global_step, self.args.decay_steps, self.args.decay, staircase=True)) 91 | optimizer = tf.train.AdamOptimizer(self.lr) 92 | 93 | tvars = tf.trainable_variables() 94 | gvs = optimizer.compute_gradients(self.cost, tvars) 95 | if self.args.grad_cap > 0: 96 | capped_gvs = [(tf.clip_by_norm(grad, self.args.grad_cap), var) for grad, var in gvs] 97 | else: 98 | capped_gvs = gvs 99 | self.train_op = optimizer.apply_gradients(capped_gvs, global_step=self.global_step) 100 | 101 | 102 | def _metric_at_k(self, k=20): 103 | prediction = self.prediction 104 | prediction_transposed = tf.transpose(prediction) 105 | labels = tf.reshape(self.Y, shape=(-1,)) 106 | pred_values = tf.expand_dims(tf.diag_part(tf.nn.embedding_lookup(prediction_transposed, labels)), -1) 107 | tile_pred_values = tf.tile(pred_values, [1, self.args.n_items]) 108 | ranks = tf.reduce_sum(tf.cast(prediction[:,1:] > tile_pred_values, dtype=tf.float32), -1) + 1 109 | 110 | ndcg = 1. / (log2(1.0 + ranks)) 111 | hit_at_k = tf.nn.in_top_k(prediction, labels, k=k) # also known as Recall@k 112 | hit_at_k = tf.cast(hit_at_k, dtype=tf.float32) 113 | istarget = tf.reshape(self.mask, shape=(-1,)) 114 | hit_at_k *= istarget 115 | ndcg_at_k = ndcg * istarget 116 | 117 | return (tf.reduce_sum(hit_at_k), tf.reduce_sum(ndcg_at_k), tf.reduce_sum(istarget)) 118 | def log2(x): 119 | numerator = tf.log(x) 120 | denominator = tf.log(tf.constant(2, dtype=numerator.dtype)) 121 | return numerator / denominator 122 | -------------------------------------------------------------------------------- /gru4rec_BPTT/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Feb 27 2017 4 | Author: Weiping Song 5 | """ 6 | import sys 7 | import numpy as np 8 | import argparse 9 | import tensorflow as tf 10 | 11 | from model import GRU4Rec 12 | from utils import load_test 13 | 14 | unfold_max = 20 15 | cut_off = 20 16 | 17 | test_x, test_y, n_items = load_test(unfold_max) 18 | 19 | class Args(): 20 | is_training = False 21 | layers = 1 22 | rnn_size = 100 23 | n_epochs = 10 24 | batch_size = 50 25 | keep_prob = 1 26 | learning_rate = 0.002 27 | decay = 0.98 28 | decay_steps = 1e3*5 29 | sigma = 0.0005 30 | init_as_normal = False 31 | grad_cap = 0 32 | test_model = 9 33 | checkpoint_dir = 'save/{}'.format('lstm') 34 | loss = 'cross-entropy' 35 | final_act = 'softmax' 36 | hidden_act = 'tanh' 37 | n_items = -1 38 | 39 | def parseArgs(): 40 | args = Args() 41 | parser = argparse.ArgumentParser(description='GRU4Rec args') 42 | parser.add_argument('--layer', default=1, type=int) 43 | parser.add_argument('--size', default=100, type=int) 44 | parser.add_argument('--batch', default=256, type=int) 45 | parser.add_argument('--epoch', default=5, type=int) 46 | parser.add_argument('--lr', default=0.001, type=float) 47 | parser.add_argument('--dr', default=0.98, type=float) 48 | parser.add_argument('--ds', default=400, type=int) 49 | parser.add_argument('--keep', default='1.0', type=float) 50 | command_line = parser.parse_args() 51 | 52 | args.layers = command_line.layer 53 | args.batch_size = command_line.batch 54 | args.n_epochs = command_line.epoch 55 | args.learning_rate = command_line.lr 56 | args.rnn_size = command_line.size 57 | args.keep_prob = command_line.keep 58 | args.decay = command_line.dr 59 | args.decay_steps = command_line.ds 60 | args.checkpoint_dir += ('_p' + str(command_line.keep)) 61 | args.checkpoint_dir += ('_rnn' + str(command_line.size)) 62 | args.checkpoint_dir += ('_batch'+str(command_line.batch)) 63 | args.checkpoint_dir += ('_lr'+str(command_line.lr)) 64 | args.checkpoint_dir += ('_dr'+str(command_line.dr)) 65 | args.checkpoint_dir += ('_ds'+str(command_line.ds)) 66 | args.checkpoint_dir += ('_unfold'+str(unfold_max)) 67 | return args 68 | 69 | def evaluate(args): 70 | ''' 71 | Returns 72 | -------- 73 | out : tuple 74 | (Recall@N, MRR@N) 75 | ''' 76 | args.n_items = n_items 77 | evaluation_point_count = 0 78 | mrr_l, recall_l, ndcg20_l, ndcg_l = 0.0, 0.0, 0.0, 0.0 79 | np.random.seed(42) 80 | 81 | gpu_config = tf.ConfigProto() 82 | gpu_config.gpu_options.allow_growth = True 83 | model = GRU4Rec(args) 84 | with tf.Session(config=gpu_config) as sess: 85 | #tf.global_variables_initializer().run() 86 | saver = tf.train.Saver(tf.global_variables()) 87 | ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) 88 | if ckpt and ckpt.model_checkpoint_path: 89 | saver.restore(sess, ckpt.model_checkpoint_path) 90 | print('Restore model from {} successfully!'.format(args.checkpoint_dir)) 91 | else: 92 | print('Restore model from {} failed!'.format(args.checkpoint_dir)) 93 | return 94 | batch_idx = 0 95 | while batch_idx < len(test_x): 96 | batch_x = test_x[batch_idx: batch_idx + args.batch_size] 97 | batch_y = test_y[batch_idx: batch_idx + args.batch_size] 98 | feed_dict = {model.X: batch_x, model.Y: batch_y} 99 | hit, ndcg, n_target = sess.run([model.hit_at_k, model.ndcg_at_k, model.num_target], feed_dict=feed_dict) 100 | recall_l += hit 101 | ndcg_l += ndcg 102 | evaluation_point_count += n_target 103 | batch_idx += args.batch_size 104 | 105 | return recall_l / evaluation_point_count, ndcg_l / evaluation_point_count 106 | 107 | if __name__ == '__main__': 108 | args = parseArgs() 109 | res = evaluate(args) 110 | print('lr: {}\tbatch_size: {}\tdecay_steps:{}\tdecay_rate:{}\tkeep_prob:{}\tdim: {}\tlayer: {}'.format(args.learning_rate, args.batch_size, args.decay_steps, args.decay, args.keep_prob, args.rnn_size, args.layers)) 111 | print('Recall@20: {}\tNDCG: {}'.format(res[0], res[1])) 112 | sys.stdout.flush() 113 | -------------------------------------------------------------------------------- /gru4rec_BPTT/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Feb 26 2017 4 | Author: Weiping Song 5 | """ 6 | import os, sys 7 | import tensorflow as tf 8 | import numpy as np 9 | import argparse, random 10 | 11 | from model import GRU4Rec 12 | from utils import load_train, load_valid 13 | 14 | unfold_max = 20 15 | error_during_training = False 16 | 17 | train_x, train_y, n_items = load_train(unfold_max) 18 | valid_x, valid_y, _ = load_valid(unfold_max) 19 | 20 | class Args(): 21 | is_training = True 22 | layers = 1 23 | rnn_size = 100 24 | n_epochs = 10 25 | batch_size = 50 26 | keep_prob=1 27 | learning_rate = 0.001 28 | decay = 0.98 29 | decay_steps = 2*1e3 30 | sigma = 0.0001 31 | init_as_normal = False 32 | grad_cap = 0 33 | checkpoint_dir = 'save/{}'.format('lstm') 34 | loss = 'cross-entropy' 35 | final_act = 'softmax' 36 | hidden_act = 'tanh' 37 | n_items = -1 38 | n_users = 1000 39 | init_from = None 40 | eval_point = 1*1e3 41 | 42 | def parseArgs(): 43 | args = Args() 44 | parser = argparse.ArgumentParser(description='LSTM4Rec args') 45 | parser.add_argument('--layer', default=1, type=int) 46 | parser.add_argument('--size', default=100, type=int) 47 | parser.add_argument('--batch', default=256, type=int) 48 | parser.add_argument('--epoch', default=100, type=int) 49 | parser.add_argument('--lr', default=0.001, type=float) 50 | parser.add_argument('--dr', default=0.98, type=float) 51 | parser.add_argument('--ds', default=400, type=int) 52 | parser.add_argument('--keep', default='1.0', type=float) 53 | parser.add_argument('--init_from', default=None, type=str) 54 | command_line = parser.parse_args() 55 | 56 | args.layers = command_line.layer 57 | args.batch_size = command_line.batch 58 | args.n_epochs = command_line.epoch 59 | args.learning_rate = command_line.lr 60 | args.decay = command_line.dr 61 | args.decay_steps = command_line.ds 62 | args.rnn_size = command_line.size 63 | args.keep_prob = command_line.keep 64 | args.checkpoint_dir += ('_p' + str(command_line.keep)) 65 | args.checkpoint_dir += ('_rnn' + str(command_line.size)) 66 | args.checkpoint_dir += ('_batch'+str(command_line.batch)) 67 | args.checkpoint_dir += ('_lr'+str(command_line.lr)) 68 | args.checkpoint_dir += ('_dr'+str(command_line.dr)) 69 | args.checkpoint_dir += ('_ds'+str(command_line.ds)) 70 | args.checkpoint_dir += ('_unfold'+str(unfold_max)) 71 | args.init_from = command_line.init_from 72 | return args 73 | 74 | def train(args): 75 | # Read train and test data. 76 | global n_items, train_x, train_y 77 | args.n_items = n_items 78 | print('#Items: {}'.format(n_items)) 79 | print('#Training sessions: {}'.format(len(train_x))) 80 | sys.stdout.flush() 81 | # set gpu configuations. 82 | gpu_config = tf.ConfigProto() 83 | gpu_config.gpu_options.allow_growth = True 84 | with tf.Session(config=gpu_config) as sess: 85 | model = GRU4Rec(args) 86 | if args.init_from is not None: 87 | ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) 88 | if ckpt and ckpt.model_checkpoint_path: 89 | model.saver.restore(sess, ckpt.model_checkpoint_path) 90 | print 'Restore model from :'.format(args.checkpoint_dir) 91 | else: 92 | sess.run(tf.global_variables_initializer()) 93 | print 'Randomly initialize model' 94 | valid_losses = [] 95 | best_step = -1 96 | best_epoch = -1 97 | best_loss = 100.0 98 | error_during_train = False 99 | num_batches = len(train_x) / args.batch_size 100 | 101 | data = list(zip(train_x, train_y)) 102 | random.shuffle(data) 103 | train_x, train_y = zip(*data) 104 | patience = 10 105 | stop_cn = 0 106 | 107 | for epoch in xrange(args.n_epochs): 108 | epoch_cost = [] 109 | for k in xrange(num_batches - 1): 110 | 111 | in_data = train_x[k*args.batch_size: (k+1)*args.batch_size] 112 | out_data = train_y[k*args.batch_size: (k+1)*args.batch_size] 113 | fetches = [model.nan, model.cost, model.global_step, model.lr, model.train_op] 114 | feed_dict = {model.X: in_data, model.Y: out_data} 115 | xnan, cost, step, lr, _ = sess.run(fetches, feed_dict) 116 | epoch_cost.append(cost) 117 | if np.isnan(cost): 118 | print(str(epoch) + ':Nan error!') 119 | error_during_train = True 120 | return 121 | if step == 1 or step % args.decay_steps == 0: 122 | avgc = np.mean(epoch_cost) 123 | print('Epoch {}\tProgress {}/{}\tlr: {:.6f}\tloss: {:.6f}'.format(epoch, k, num_batches, lr, avgc)) 124 | if step % args.eval_point == 0: 125 | valid_loss = eval_validation(model, sess) 126 | valid_losses.append(valid_loss) 127 | print('Evaluation loss after step {}: {:.6f}'.format(step, valid_loss)) 128 | if valid_loss < best_loss: 129 | stop_cn = 0 130 | best_epoch = epoch 131 | best_step = step 132 | best_loss = valid_losses[-1] 133 | ckpt_path = os.path.join(args.checkpoint_dir, 'model.ckpt') 134 | model.saver.save(sess, ckpt_path, global_step=step) 135 | print("model saved to {}".format(ckpt_path)) 136 | sys.stdout.flush() 137 | else: 138 | stop_cn += 1 139 | if stop_cn >= patience: 140 | break 141 | if stop_cn >= patience: 142 | break 143 | 144 | print('Best evaluation loss appears in epoch {}, step {}. Lowest loss: {:.6f}'.format(best_epoch, best_step, best_loss)) 145 | return 146 | 147 | def eval_validation(model, sess): 148 | global valid_x, valid_y 149 | valid_batches = len(valid_x) / args.batch_size 150 | valid_loss = [] 151 | for k in xrange(valid_batches): 152 | in_data = valid_x[k*args.batch_size: (k+1)*args.batch_size] 153 | out_data = valid_y[k*args.batch_size: (k+1)*args.batch_size] 154 | 155 | feed_dict = {model.X: in_data, 156 | model.Y: out_data, 157 | } 158 | fetches = model.cost 159 | cost = sess.run(fetches, feed_dict) 160 | if np.isnan(cost): 161 | print('Evaluation loss Nan!') 162 | sys.exit(1) 163 | valid_loss.append(cost) 164 | return np.mean(valid_loss) 165 | 166 | if __name__ == '__main__': 167 | args = parseArgs() 168 | if not os.path.exists('save'): 169 | os.mkdir('save') 170 | if not os.path.exists(args.checkpoint_dir): 171 | os.mkdir(args.checkpoint_dir) 172 | print('rnn size: {}\tlayer: {}\tbatch: {}\tepoch: {}\tkeep: {}'.format(args.rnn_size, args.layers, args.batch_size, args.n_epochs, args.keep_prob)) 173 | train(args) 174 | -------------------------------------------------------------------------------- /gru4rec_BPTT/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | 5 | 6 | PATH = './data/' 7 | TRAINFILE = PATH + 'train.tsv' 8 | TESTFILE = PATH + 'test.tsv' 9 | VALIDFILE = PATH + 'valid.tsv' 10 | 11 | def get_item(): 12 | train = pd.read_csv(TRAINFILE, sep='\t', dtype={0:str, 1:str, 2:np.float32}) 13 | valid = pd.read_csv(VALIDFILE, sep='\t', dtype={0:str, 1:str, 2:np.float32}) 14 | test = pd.read_csv(TESTFILE, sep='\t', dtype={0:str, 1:str, 2:np.float32}) 15 | data = pd.concat([train, valid, test]) 16 | return data.ItemId.unique() 17 | 18 | 19 | def _load_data(f, max_len): 20 | """ 21 | Data format in file f: 22 | SessionId\tItemId\tTimestamp\n 23 | """ 24 | 25 | if os.path.exists('item2id.map'): 26 | item2idmap = {} 27 | for line in open('item2id.map'): 28 | k, v = line.strip().split('\t') 29 | item2idmap[k] = int(v) 30 | else: 31 | items = get_item() 32 | item2idmap = dict(zip(items, range(1, 1+items.size))) 33 | with open('item2id.map', 'w') as fout: 34 | for k, v in item2idmap.iteritems(): 35 | fout.write(str(k) + '\t' + str(v) + '\n') 36 | n_items = len(item2idmap) 37 | data = pd.read_csv(f, sep='\t', dtype={0:str, 1:str, 2:np.float32}) 38 | data['ItemId'] = data['ItemId'].map(item2idmap) 39 | data = data.sort_values(by=['Timestamp']).groupby('SessionId')['ItemId'].apply(list).to_dict() 40 | new_x = [] 41 | new_y = [] 42 | for k, v in data.items(): 43 | x = v[:-1] 44 | y = v[1:] 45 | if len(x) < 2: 46 | continue 47 | padded_len = max_len - len(x) 48 | if padded_len > 0: 49 | x.extend([0] * padded_len) 50 | y.extend([0] * padded_len) 51 | new_x.append(x[:max_len]) 52 | new_y.append(y[:max_len]) 53 | return (new_x, new_y, n_items) 54 | 55 | def load_train(max_len): 56 | return _load_data(TRAINFILE, max_len) 57 | 58 | def load_valid(max_len): 59 | return _load_data(VALIDFILE, max_len) 60 | 61 | def load_test(max_len): 62 | return _load_data(TESTFILE, max_len) 63 | 64 | if __name__ == '__main__': 65 | load_train(20) 66 | --------------------------------------------------------------------------------