├── README.md ├── metric.py ├── main.py └── drr_model.py /README.md: -------------------------------------------------------------------------------- 1 | # drr 2 | code for the paper "Personalized Context-Aware Re-ranking for E-commerce Recommendation Systems" 3 | -------------------------------------------------------------------------------- /metric.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import sys, json 3 | 4 | def calc_average_precision_at_k(labels, k): 5 | n = min(len(labels),k) 6 | labels = labels[:n] 7 | p = [] 8 | p_cnt = 0 9 | for i in range(n): 10 | if labels[i]>0: 11 | p_cnt+=1 12 | p.append(p_cnt*1.0/(i+1)) 13 | if p_cnt > 0: 14 | return sum(p)/p_cnt 15 | else: 16 | return 0.0 17 | 18 | def calc_precision_at_k(labels, k): 19 | n = min(len(labels),k) 20 | labels = labels[:n] 21 | p_cnt = 0 22 | for i in range(n): 23 | if labels[i]>0: 24 | p_cnt+=1 25 | return p_cnt*1.0/n 26 | 27 | def make_metric_dict(): 28 | return { 'p@5':0, 'p@10':0, 'p@1':0, 'map@5':0, 'map@10':0, 'map@30':0 } 29 | 30 | def main(): 31 | metric_keys = [ 'p@1', 'p@5', 'p@10', 'map@5', 'map@10', 'map@30' ] 32 | filename = sys.argv[1] 33 | print "calc metric from %s" % filename 34 | f = file(filename) 35 | cnt = 0 36 | d = {} # for stat 37 | for line in f: 38 | try: 39 | step_labels = [ json.loads(labels) for labels in line.strip().split("\t") ] 40 | except: 41 | print line 42 | continue 43 | n = len(step_labels) 44 | for i in range(n): 45 | if not d.has_key(i): 46 | d[i] = make_metric_dict() 47 | d[i]['p@1'] += calc_precision_at_k(step_labels[i], 1) 48 | d[i]['p@5'] += calc_precision_at_k(step_labels[i], 5) 49 | d[i]['p@10'] += calc_precision_at_k(step_labels[i], 10) 50 | d[i]['map@5'] += calc_average_precision_at_k(step_labels[i], 5) 51 | d[i]['map@10'] += calc_average_precision_at_k(step_labels[i], 10) 52 | d[i]['map@30'] += calc_average_precision_at_k(step_labels[i], 30) 53 | cnt+=1 54 | f.close() 55 | n = len(d) 56 | print 'total_record_cnt=%d step_range=[0,%d]' % (cnt, n-1) 57 | for i in range(n): 58 | info = ["step=%d" % i] 59 | for key in metric_keys: 60 | info.append("%s=%0.2f" % (key, d[i][key] * 100.0/cnt)) 61 | print " ".join(info) 62 | 63 | 64 | if __name__=="__main__": 65 | main() 66 | 67 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #! -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import tensorflow as tf 4 | from drr_model import * 5 | 6 | import numpy as np 7 | import time 8 | import json 9 | import os 10 | 11 | tf.app.flags.DEFINE_boolean("train", False, "train or predict") 12 | #params setting 13 | tf.app.flags.DEFINE_string("train_set", "", "the file path of train set") 14 | tf.app.flags.DEFINE_string("validation_set", "", "the file path of validation set") 15 | tf.app.flags.DEFINE_string("test_set", "", "the file path of test set") 16 | tf.app.flags.DEFINE_string("log_dir", "./log/", "the log directory") 17 | tf.app.flags.DEFINE_string("saved_model_name", "drr_model.h5", "the saved model name") 18 | tf.app.flags.DEFINE_integer("model_type", 0, "drr model type, 0:drr_base 1:drr_personalized_v1 2:drr_personalized_v2") 19 | tf.app.flags.DEFINE_integer("batch_size", 512, "batch size for training") 20 | tf.app.flags.DEFINE_integer("seq_len", 30, "the length of input list") 21 | tf.app.flags.DEFINE_integer("train_epochs", 100, "epoch for training") 22 | tf.app.flags.DEFINE_integer("train_steps_per_epoch", 1000, "steps per epoch for training") 23 | tf.app.flags.DEFINE_integer("validation_steps", 2000, "steps for validation") 24 | tf.app.flags.DEFINE_integer("early_stop_patience", 10, "early stop when model is not improved with X epochs") 25 | tf.app.flags.DEFINE_integer("lr_per_step", 4000, "update learning rate per X step") 26 | 27 | tf.app.flags.DEFINE_integer("d_feature", 12, "the feature length of each item in the input list") 28 | tf.app.flags.DEFINE_integer("d_model", 64, "param used drr_model") 29 | tf.app.flags.DEFINE_integer("d_inner_hid", 128, "param used in drr_model") 30 | tf.app.flags.DEFINE_integer("n_head", 1, "param used in drr_model") 31 | tf.app.flags.DEFINE_integer("d_k", 64, "param used in drr_model") 32 | tf.app.flags.DEFINE_integer("d_v", 64, "param used in drr_model") 33 | tf.app.flags.DEFINE_integer("n_layers", 2, "param used in drr_model") 34 | tf.app.flags.DEFINE_float("dropout", 0.1, "param used in drr_model") 35 | tf.app.flags.DEFINE_integer("pos_embedding_mode", 1, "param used in drr_model") # 0:use fix PE 1:use learnable PE 2:unuse PE 36 | 37 | FLAGS = tf.app.flags.FLAGS 38 | 39 | 40 | FEATURE_INFO_MAP = { 41 | "icf" : ['icf1', 'icf2', 'icf3', 'icf4', 'icf5'], 42 | "ucf" : ['ucf1', 'ufc2', 'ucf3'], 43 | "iv" : ['iv1', 'iv2', 'iv3', 'iv4', 'iv5', 'iv6', 'iv7', 'iv8', 'iv9', 'iv10', 'iv11', 'iv12'], 44 | "pv" : ['pv1', 'pv2', 'pv3', 'pv4', 'pv5', 'pv6', 'pv7' ], 45 | "iv+pv" : ['iv1', 'iv2', 'iv3', 'iv4', 'iv5', 'iv6', 'iv7', 'iv8', 'iv9', 'iv10', 'iv11', 'iv12', 'pv1', 'pv2', 'pv3', 'pv4', 'pv5', 'pv6', 'pv7'] 46 | } 47 | 48 | #get position 49 | def get_pos(batch_size, seq_len): 50 | outputs = np.zeros((batch_size, seq_len), dtype=np.int32) 51 | i = 0 52 | for i in range(batch_size): 53 | outputs[i] = np.arange(seq_len, dtype=np.int32) 54 | i+=1 55 | return outputs 56 | 57 | #get label from raw input batch 58 | def get_label(label_batch, batch_size, seq_len): 59 | outputs = np.zeros((batch_size, seq_len)) 60 | i = 0 61 | for row in label_batch: 62 | outputs[i] = np.array(json.loads(row)) 63 | i+=1 64 | return outputs 65 | 66 | #get uid from raw input batch 67 | def get_uid(features_batch, batch_size, seq_len): 68 | outputs = np.zeros((batch_size, seq_len), dtype=np.int32) 69 | i = 0 70 | for uid in features_batch: 71 | outputs[i] = np.array([uid]*seq_len, dtype=np.int32) 72 | i+=1 73 | return outputs 74 | 75 | #get icf from raw input batch 76 | def get_icf(features_batch, batch_size, seq_len): 77 | global FEATURE_INFO_MAP 78 | feature_len = len(FEATURE_INFO_MAP["icf"]) 79 | outputs = [] 80 | for i in range(feature_len): 81 | outputs.append(np.zeros((batch_size, seq_len), dtype=np.int32)) 82 | j = 0 83 | for row in features_batch: 84 | feature_data = np.array(json.loads(row), dtype=np.int32).T 85 | for i in range(feature_len): 86 | outputs[i][j] = feature_data[i,:] 87 | j+=1 88 | return outputs 89 | 90 | #get ucf from raw input batch 91 | def get_ucf(features_batch, batch_size, seq_len): 92 | global FEATURE_INFO_MAP 93 | feature_len = len(FEATURE_INFO_MAP['ucf']) 94 | outputs = [] 95 | for i in range(feature_len): 96 | outputs.append(np.zeros((batch_size, seq_len), dtype=np.int32)) 97 | j = 0 98 | for row in features_batch: 99 | feature_data = np.tile(np.array(json.loads(row.replace("null","0")), dtype=np.int32),(seq_len,1)).T 100 | for i in range(feature_len): 101 | outputs[i][j] = feature_data[i,:] 102 | j+=1 103 | return outputs 104 | 105 | #get iv from input batch 106 | def get_iv(features_batch, batch_size, seq_len): 107 | global FEATURE_INFO_MAP 108 | feature_len = len(FEATURE_INFO_MAP['iv']) 109 | outputs = np.zeros((batch_size, seq_len, feature_len)) 110 | i = 0 111 | for row in features_batch: 112 | outputs[i] = np.array(json.loads(row)) 113 | i+=1 114 | return outputs 115 | 116 | #get pv from input batch 117 | def get_pv(features_batch, batch_size, seq_len): 118 | global FEATURE_INFO_MAP 119 | feature_len = len(FEATURE_INFO_MAP['pv']) 120 | outputs = np.zeros((batch_size, seq_len, feature_len)) 121 | i = 0 122 | for row in features_batch: 123 | outputs[i] = np.array(json.loads(row)) 124 | i+=1 125 | return outputs 126 | 127 | #get iv and pv from input batch 128 | def get_iv_and_pv(iv_batch, pv_batch, batch_size, seq_len): 129 | iv = get_iv(iv_batch, batch_size, seq_len) 130 | pv = get_pv(pv_batch, batch_size, seq_len) 131 | return np.dstack((iv, pv)) 132 | 133 | #get features from input batch 134 | def get_features(uid_batch, ucf_batch, icf_batch, pv_batch, iv_batch, batch_size, seq_len): 135 | if FLAGS.model_type == 0: # drr_base, see paper for more detail 136 | outputs = [] 137 | outputs.append(get_pos(batch_size, seq_len)) 138 | outputs.append(get_iv(iv_batch, batch_size, seq_len)) 139 | assert FLAGS.d_feature == len(FEATURE_INFO_MAP['iv']) 140 | return outputs 141 | elif FLAGS.model_type == 1: # drr_personalized_v1, see paper for more detail 142 | outputs = [] 143 | outputs.append(get_pos(batch_size, seq_len)) 144 | outputs.append(get_uid(uid_batch, batch_size, seq_len)) 145 | outputs.extend(get_ucf(ucf_batch, batch_size, seq_len)) 146 | outputs.extend(get_icf(icf_batch, batch_size, seq_len)) 147 | outputs.append(get_iv(iv_batch, batch_size, seq_len)) 148 | assert FLAGS.d_feature == len(FEATURE_INFO_MAP['iv']) 149 | return outputs 150 | elif FLAGS.model_type == 2: # drr_personalized_v2, see paper for more detail 151 | outputs = [] 152 | outputs.append(get_pos(batch_size, seq_len)) 153 | outputs.append(get_iv_and_pv(iv_batch, pv_batch, batch_size, seq_len)) 154 | assert FLAGS.d_feature == len(FEATURE_INFO_MAP['iv']) + len(FEATURE_INFO_MAP['pv']) 155 | return outputs 156 | 157 | def input_generator(filename, batch_size, seq_len, repeat_cnt=-1): 158 | print("data_set={0} batch_size={1} seq_len={2} repeat_cnt={3} for input_generator".format(filename, batch_size, seq_len, repeat_cnt)) 159 | dataset = tf.contrib.data.CsvDataset([filename], record_defaults=[0,"","","","",""], field_delim='\t').repeat(repeat_cnt).batch(batch_size) 160 | next_val = dataset.make_one_shot_iterator().get_next() 161 | with K.get_session().as_default() as sess: 162 | while True: 163 | uid_batch, ucf_batch, icf_batch, pv_batch, iv_batch, label_batch = sess.run(next_val) 164 | yield get_features(uid_batch, ucf_batch, icf_batch, pv_batch, iv_batch, batch_size, seq_len), get_label(label_batch, batch_size, seq_len) 165 | 166 | 167 | #get model 168 | def get_model(): 169 | t = DrrModel(FLAGS.seq_len, FLAGS.d_feature, d_model=FLAGS.d_model, d_inner_hid=FLAGS.d_inner_hid, n_head=FLAGS.n_head, d_k=FLAGS.d_k, d_v=FLAGS.d_v, layers=FLAGS.n_layers, dropout=FLAGS.dropout) 170 | if FLAGS.model_type == 0 or FLAGS.model_type == 2: 171 | model = t.build_model(pos_mode=FLAGS.pos_embedding_mode) 172 | elif FLAGS.model_type == 1: 173 | model = t.build_model_ex(pos_mode=FLAGS.pos_embedding_mode) 174 | model.summary() 175 | print("model_type={0}".format(FLAGS.model_type)) 176 | print("model_setting:\n\tseq_len={0}\n\td_feature={1}\n\td_model={2}\n\td_inner_hid={3}\n\tn_head={4}\n\td_k={5}\n\td_v={6}\n\tn_layers={7}\n\tdropout={8}\n\tpos_embedding_mode={9}".format(FLAGS.seq_len, FLAGS.d_feature, FLAGS.d_model, FLAGS.d_inner_hid, FLAGS.n_head,FLAGS.d_k, FLAGS.d_v, FLAGS.n_layers, FLAGS.dropout, FLAGS.pos_embedding_mode)) 177 | print("-"*98) 178 | return model 179 | 180 | class LRSchedulerPerStep(Callback): 181 | def __init__(self, d_model, warmup=4000): 182 | self.basic = d_model**-0.5 183 | self.warm = warmup**-1.5 184 | self.step_num = 0 185 | def on_batch_begin(self, batch, logs = None): 186 | self.step_num += 1 187 | lr = self.basic * min(self.step_num**-0.5, self.step_num*self.warm) 188 | K.set_value(self.model.optimizer.lr, lr) 189 | 190 | class LRSchedulerPerEpoch(Callback): 191 | def __init__(self, d_model, warmup=4000, num_per_epoch=1000): 192 | self.basic = d_model**-0.5 193 | self.warm = warmup**-1.5 194 | self.num_per_epoch = num_per_epoch 195 | self.step_num = 1 196 | def on_epoch_begin(self, epoch, logs = None): 197 | self.step_num += self.num_per_epoch 198 | lr = self.basic * min(self.step_num**-0.5, self.step_num*self.warm) 199 | K.set_value(self.model.optimizer.lr, lr) 200 | #train 201 | def train(): 202 | print("trainig....") 203 | if not os.path.exists(FLAGS.log_dir): 204 | os.mkdir(FLAGS.log_dir) 205 | print("create log directory:{0}".format(FLAGS.log_dir)) 206 | model = get_model() 207 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 208 | assert FLAGS.train_set !="" 209 | assert FLAGS.validation_set !="" 210 | print("train_set={0} validation_set={1} batch_size={2} seq_len={3}".format(FLAGS.train_set, 211 | FLAGS.validation_set, FLAGS.batch_size, FLAGS.seq_len)) 212 | train_gen = input_generator(FLAGS.train_set, FLAGS.batch_size, FLAGS.seq_len) 213 | next(train_gen) 214 | validation_gen = input_generator(FLAGS.validation_set, FLAGS.batch_size, FLAGS.seq_len) 215 | next(validation_gen) 216 | print("saved_model_name={0} early_stop_patience={1} lr_per_step={2}".format(FLAGS.saved_model_name, 217 | FLAGS.early_stop_patience, FLAGS.lr_per_step)) 218 | callback_list = [ TensorBoard(log_dir=FLAGS.log_dir), 219 | ModelCheckpoint(FLAGS.saved_model_name, verbose=1, monitor='val_loss', save_weights_only=True, save_best_only=True), 220 | EarlyStopping(monitor='val_loss', patience=FLAGS.early_stop_patience, verbose=1), 221 | LRSchedulerPerStep(FLAGS.d_model, FLAGS.lr_per_step)] 222 | print("train_epochs={0} train_steps_per_epoch={1} validation_steps={2}".format(FLAGS.train_epochs, 223 | FLAGS.train_steps_per_epoch, FLAGS.validation_steps)) 224 | model.fit_generator(train_gen, epochs=FLAGS.train_epochs, steps_per_epoch=FLAGS.train_steps_per_epoch 225 | , verbose=2, callbacks=callback_list, validation_data=validation_gen, validation_steps=FLAGS.validation_steps) 226 | K.clear_session() 227 | print("finish training!") 228 | 229 | #predict 230 | def predict(): 231 | print("predicting...") 232 | if not os.path.exists(FLAGS.saved_model_name): 233 | print("the model file {0} does not exist!".format(FLAGS.saved_model_name)) 234 | return 235 | else: 236 | print("load model from {0}!".format(FLAGS.saved_model_name)) 237 | model = get_model() 238 | model.load_weights(FLAGS.saved_model_name) 239 | assert FLAGS.test_set !="" 240 | test_gen = input_generator(FLAGS.test_set, FLAGS.batch_size, FLAGS.seq_len, 1) 241 | batch_cnt = 0 242 | predict_output_file="%s.predict.out" % FLAGS.test_set 243 | fout = file(predict_output_file, "w") 244 | try: 245 | for test_batch in test_gen: 246 | batch_cnt+=1 247 | features_batch = test_batch[0] 248 | label_batch = test_batch[1] 249 | predict_batch = model.predict_on_batch(features_batch) 250 | print("processed {0} batchs...".format(batch_cnt)) 251 | for labels,predicts in zip(label_batch, predict_batch): 252 | if sum(labels) > 0: # predict valid labels 253 | new_ranks = np.argsort(-predicts) 254 | new_labels = labels[new_ranks] 255 | fout.write("%s\t%s\n" % (json.dumps(labels.tolist()), json.dumps(new_labels.tolist()))) 256 | except tf.errors.OutOfRangeError: 257 | print("finish predicting!") 258 | fout.close() 259 | return 0; 260 | 261 | 262 | def main(_): 263 | beg_time = time.time() 264 | if FLAGS.train: 265 | train() 266 | else: 267 | predict() 268 | #get_model() 269 | end_time = time.time() 270 | time_cost = (time.time() - beg_time)/60 271 | print("job done! time_cost={0} minutes".format(round(time_cost))) 272 | 273 | if __name__=="__main__": 274 | tf.app.run() 275 | -------------------------------------------------------------------------------- /drr_model.py: -------------------------------------------------------------------------------- 1 | #! -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function 3 | import tensorflow as tf 4 | from tensorflow import keras 5 | from tensorflow.python.keras.models import Sequential,Model 6 | from tensorflow.python.keras import initializers 7 | from tensorflow.python.keras.activations import tanh, softmax 8 | from tensorflow.python.keras.layers import Add, Conv1D, Lambda, Dropout, Dense, GRU, LSTM, InputSpec, Bidirectional, TimeDistributed, Flatten, Activation, BatchNormalization 9 | from tensorflow.python.keras.layers import Layer, Input, concatenate, GlobalAveragePooling1D, Embedding, RepeatVector, Reshape 10 | from tensorflow.python.keras import backend as K 11 | from tensorflow.python.keras.metrics import top_k_categorical_accuracy 12 | from tensorflow.python.keras.estimator import model_to_estimator 13 | from tensorflow.python.keras.callbacks import Callback, EarlyStopping, TensorBoard, ModelCheckpoint 14 | from tensorflow.python.platform import tf_logging as logging 15 | from tensorflow.python.keras.initializers import Ones, Zeros 16 | import numpy as np 17 | 18 | class LayerNormalization(Layer): 19 | def __init__(self, eps=1e-6, **kwargs): 20 | self.eps = eps 21 | super(LayerNormalization, self).__init__(**kwargs) 22 | def build(self, input_shape): 23 | self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], 24 | initializer=Ones(), trainable=True) 25 | self.beta = self.add_weight(name='beta', shape=input_shape[-1:], 26 | initializer=Zeros(), trainable=True) 27 | super(LayerNormalization, self).build(input_shape) 28 | def call(self, x): 29 | mean = K.mean(x, axis=-1, keepdims=True) 30 | std = K.std(x, axis=-1, keepdims=True) 31 | return self.gamma * (x - mean) / (std + self.eps) + self.beta 32 | def compute_output_shape(self, input_shape): 33 | return input_shape 34 | 35 | class ScaledDotProductAttention(): 36 | def __init__(self, d_model, attn_dropout=0.1): 37 | self.temper = np.sqrt(d_model) 38 | self.dropout = Dropout(attn_dropout) 39 | def __call__(self, q, k, v, mask): 40 | attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k]) 41 | if mask is not None: 42 | mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask) 43 | attn = Add()([attn, mmask]) 44 | attn = Activation('softmax')(attn) 45 | attn = self.dropout(attn) 46 | output = Lambda(lambda x:K.batch_dot(x[0], x[1]))([attn, v]) 47 | return output, attn 48 | 49 | class MultiHeadAttention(): 50 | # mode 0 - big martixes, faster; mode 1 - more clear implementation 51 | def __init__(self, n_head, d_model, d_k, d_v, dropout, mode=0, use_norm=True): 52 | self.mode = mode 53 | self.n_head = n_head 54 | self.d_k = d_k 55 | self.d_v = d_v 56 | self.dropout = dropout 57 | if mode == 0: 58 | self.qs_layer = Dense(n_head*d_k, use_bias=False) 59 | self.ks_layer = Dense(n_head*d_k, use_bias=False) 60 | self.vs_layer = Dense(n_head*d_v, use_bias=False) 61 | elif mode == 1: 62 | self.qs_layers = [] 63 | self.ks_layers = [] 64 | self.vs_layers = [] 65 | for _ in range(n_head): 66 | self.qs_layers.append(TimeDistributed(Dense(d_k, use_bias=False))) 67 | self.ks_layers.append(TimeDistributed(Dense(d_k, use_bias=False))) 68 | self.vs_layers.append(TimeDistributed(Dense(d_v, use_bias=False))) 69 | self.attention = ScaledDotProductAttention(d_model) 70 | self.layer_norm = LayerNormalization() if use_norm else None 71 | self.w_o = TimeDistributed(Dense(d_model)) 72 | 73 | def __call__(self, q, k, v, mask=None): 74 | d_k, d_v = self.d_k, self.d_v 75 | n_head = self.n_head 76 | 77 | if self.mode == 0: 78 | qs = self.qs_layer(q) # [batch_size, len_q, n_head*d_k] 79 | ks = self.ks_layer(k) 80 | vs = self.vs_layer(v) 81 | 82 | def reshape1(x): 83 | s = tf.shape(x) # [batch_size, len_q, n_head * d_k] 84 | x = tf.reshape(x, [s[0], s[1], n_head, d_k]) 85 | x = tf.transpose(x, [2, 0, 1, 3]) 86 | x = tf.reshape(x, [-1, s[1], d_k]) # [n_head * batch_size, len_q, d_k] 87 | return x 88 | qs = Lambda(reshape1)(qs) 89 | ks = Lambda(reshape1)(ks) 90 | vs = Lambda(reshape1)(vs) 91 | 92 | if mask is not None: 93 | mask = Lambda(lambda x:K.repeat_elements(x, n_head, 0))(mask) 94 | head, attn = self.attention(qs, ks, vs, mask=mask) 95 | 96 | def reshape2(x): 97 | s = tf.shape(x) # [n_head * batch_size, len_v, d_v] 98 | x = tf.reshape(x, [n_head, -1, s[1], s[2]]) 99 | x = tf.transpose(x, [1, 2, 0, 3]) 100 | x = tf.reshape(x, [-1, s[1], n_head*d_v]) # [batch_size, len_v, n_head * d_v] 101 | return x 102 | head = Lambda(reshape2)(head) 103 | elif self.mode == 1: 104 | heads = []; attns = [] 105 | for i in range(n_head): 106 | qs = self.qs_layers[i](q) 107 | ks = self.ks_layers[i](k) 108 | vs = self.vs_layers[i](v) 109 | head, attn = self.attention(qs, ks, vs, mask) 110 | heads.append(head); attns.append(attn) 111 | head = Concatenate()(heads) if n_head > 1 else heads[0] 112 | attn = Concatenate()(attns) if n_head > 1 else attns[0] 113 | 114 | outputs = self.w_o(head) 115 | outputs = Dropout(self.dropout)(outputs) 116 | if not self.layer_norm: return outputs, attn 117 | outputs = Add()([outputs, q]) 118 | return self.layer_norm(outputs), attn 119 | 120 | class PositionwiseFeedForward(): 121 | def __init__(self, d_hid, d_inner_hid, dropout=0.1): 122 | self.w_1 = Conv1D(d_inner_hid, 1, activation='relu') 123 | self.w_2 = Conv1D(d_hid, 1) 124 | self.layer_norm = LayerNormalization() 125 | self.dropout = Dropout(dropout) 126 | def __call__(self, x): 127 | output = self.w_1(x) 128 | output = self.w_2(output) 129 | output = self.dropout(output) 130 | output = Add()([output, x]) 131 | return self.layer_norm(output) 132 | 133 | class EncoderLayer(): 134 | def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, dropout=0.1): 135 | self.self_att_layer = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 136 | self.pos_ffn_layer = PositionwiseFeedForward(d_model, d_inner_hid, dropout=dropout) 137 | def __call__(self, enc_input, mask=None): 138 | output, slf_attn = self.self_att_layer(enc_input, enc_input, enc_input, mask=mask) 139 | output = self.pos_ffn_layer(output) 140 | return output, slf_attn 141 | 142 | def GetPosEncodingMatrix(max_len, d_emb): 143 | pos_enc = np.array([ 144 | [pos / np.power(10000, 2 * (j // 2) / d_emb) for j in range(d_emb)] 145 | if pos != 0 else np.zeros(d_emb) 146 | for pos in range(max_len) 147 | ]) 148 | pos_enc[1:, 0::2] = np.sin(pos_enc[1:, 0::2]) # dim 2i 149 | pos_enc[1:, 1::2] = np.cos(pos_enc[1:, 1::2]) # dim 2i+1 150 | return pos_enc 151 | 152 | def GetSubMask(s): 153 | len_s = tf.shape(s)[1] 154 | bs = tf.shape(s)[:1] 155 | mask = tf.cumsum(tf.eye(len_s, batch_shape=bs), 1) 156 | return mask 157 | 158 | class Encoder(): 159 | def __init__(self, d_model, d_inner_hid, n_head, d_k, d_v, layers=2, dropout=0.1): 160 | self.emb_dropout = Dropout(dropout) 161 | self.layers = [EncoderLayer(d_model, d_inner_hid, n_head, d_k, d_v, dropout) for _ in range(layers)] 162 | def __call__(self, x, return_att=False, mask=None, active_layers=999): 163 | x = self.emb_dropout(x) 164 | if return_att: atts = [] 165 | for enc_layer in self.layers[:active_layers]: 166 | x, att = enc_layer(x, mask) 167 | if return_att: atts.append(att) 168 | return (x, atts) if return_att else x 169 | 170 | class DrrModel: 171 | def __init__(self, seq_len, d_feature, d_model=64, d_inner_hid=128, n_head=1, d_k=64, d_v=64, layers=2, dropout=0.1): 172 | self.seq_len = seq_len 173 | self.d_feature = d_feature 174 | self.d_model = d_model 175 | self.encoder = Encoder(d_model, d_inner_hid, n_head, d_k, d_v, layers, dropout) 176 | #drr_base or drr_personalized_v2 177 | def build_model(self, pos_mode=0, use_mask=False, active_layers=999): 178 | v_input = Input(shape=(self.seq_len, self.d_feature), name='v_input') 179 | d0 = TimeDistributed(Dense(self.d_model))(v_input) 180 | pos_input = Input(shape=(self.seq_len,), dtype='int32', name='pos_input') 181 | if pos_mode == 0: # use fixed pos embedding 182 | pos_embedding = Embedding(self.seq_len, self.d_model, trainable=False,\ 183 | weights=[GetPosEncodingMatrix(self.seq_len, self.d_model)]) 184 | p0 = pos_embedding(pos_input) 185 | elif pos_mode == 1: # use trainable pos embedding 186 | pos_embedding = Embedding(self.seq_len, self.d_model) 187 | p0 = pos_embedding(pos_input) 188 | else: # no pos embedding 189 | p0 = None 190 | if p0 != None: 191 | combine_input = Add()([d0, p0]) 192 | else: 193 | combine_input = d0 # no pos 194 | sub_mask = None 195 | if use_mask: 196 | sub_mask = Lambda(GetSubMask)(pos_input) 197 | enc_output = self.encoder(combine_input, mask=sub_mask, active_layers=active_layers) 198 | # score 199 | time_score_dense1 = TimeDistributed(Dense(self.d_model, activation='tanh'))(enc_output) 200 | time_score_dense2 = TimeDistributed(Dense(1))(time_score_dense1) 201 | flat = Flatten()(time_score_dense2) 202 | score_output = Activation(activation='softmax')(flat) 203 | self.model = Model([pos_input, v_input], score_output) 204 | return self.model 205 | #drr_personalized_v1 206 | def build_model_ex(self, pos_mode=0, use_mask=False, active_layers=999): 207 | #define embedding layer 208 | uid_embedding = Embedding(750000, 16, name='uid_embedding') # for uid 209 | itemid_embedding = Embedding(7500000, 32, name='itemid_embedding') # for icf1 210 | f1_embedding = Embedding(8, 2, name='f1_embedding') # for ucf1 211 | f2_embedding = Embedding(4, 2, name='f2_embedding') # for ucf2 & icf3 212 | f3_embedding = Embedding(8, 2, name='f3_embedding') # for ucf3 & icf4 213 | f4_embedding = Embedding(4, 2, name='f4_embedding') # for icf5 214 | f5_embedding = Embedding(256, 4, name='f5_embedding') # icf2 215 | #define user input 216 | uid_input = Input(shape=(self.seq_len,), dtype='int32', name='uid_input') 217 | ucf1_input = Input(shape=(self.seq_len,), dtype='int32', name='ucf1_input') 218 | ucf2_input = Input(shape=(self.seq_len,), dtype='int32', name='ucf2_input') 219 | ucf3_input = Input(shape=(self.seq_len,), dtype='int32', name='ucf3_input') 220 | #define item input 221 | icf1_input = Input(shape=(self.seq_len,), dtype='int32', name='icf1_input') 222 | icf2_input = Input(shape=(self.seq_len,), dtype='int32', name='icf2_input') 223 | icf3_input = Input(shape=(self.seq_len,), dtype='int32', name='icf3_input') 224 | icf4_input = Input(shape=(self.seq_len,), dtype='int32', name='icf4_input') 225 | icf5_input = Input(shape=(self.seq_len,), dtype='int32', name='icf5_input') 226 | #define dense input 227 | v_input = Input(shape=(self.seq_len, self.d_feature), name='v_input') 228 | #define user embedding 229 | u0 = uid_embedding(uid_input) 230 | u1 = f1_embedding(ucf1_input) 231 | u2 = f2_embedding(ucf2_input) 232 | u3 = f3_embedding(ucf3_input) 233 | #define item embedding 234 | i1 = itemid_embedding(icf1_input) 235 | i2 = f5_embedding(icf2_input) 236 | i3 = f2_embedding(icf3_input) 237 | i4 = f3_embedding(icf4_input) 238 | i5 = f4_embedding(icf5_input) 239 | #define page embedding: 16+2+2+2+32+4+2+2+2=64 240 | page_embedding = concatenate([v_input, u0, u1, u2, u3, i1, i2, i3, i4, i5], axis=-1, name='page_embedding') 241 | d0 = TimeDistributed(Dense(self.d_model))(page_embedding) 242 | pos_input = Input(shape=(self.seq_len,), dtype='int32', name='pos_input') 243 | if pos_mode == 0: # use fix pos embedding 244 | pos_embedding = Embedding(self.seq_len, self.d_model, trainable=False,\ 245 | weights=[GetPosEncodingMatrix(self.seq_len, self.d_model)]) 246 | p0 = pos_embedding(pos_input) 247 | elif pos_mode == 1: # use trainable ebmedding 248 | pos_embedding = Embedding(self.seq_len, self.d_model) 249 | p0 = pos_embedding(pos_input) 250 | else: # not use pos embedding 251 | p0 = None 252 | if p0 != None: 253 | combine_input = Add()([d0, p0]) 254 | else: 255 | combine_input = d0 # no pos 256 | sub_mask = None 257 | if use_mask: 258 | sub_mask = Lambda(GetSubMask)(pos_input) 259 | enc_output = self.encoder(combine_input, mask=sub_mask, active_layers=active_layers) 260 | # score 261 | time_score_dense1 = TimeDistributed(Dense(self.d_model, activation='tanh'))(enc_output) 262 | time_score_dense2 = TimeDistributed(Dense(1))(time_score_dense1) 263 | flat = Flatten()(time_score_dense2) 264 | score_output = Activation(activation='softmax')(flat) 265 | base_input = [pos_input, uid_input, ucf1_input, ucf2_input, ucf3_input, icf1_input, icf2_input, icf3_input, icf4_input, icf5_input, v_input] 266 | self.model = Model(base_input, score_output) 267 | return self.model 268 | --------------------------------------------------------------------------------