├── .DS_Store ├── .gitignore ├── README.md ├── prepare_data.sh └── script ├── .DS_Store ├── Dice.py ├── calc_ckpt.py ├── data_iterator.py ├── generate_voc.py ├── generate_voc.py.bk ├── local_aggretor.py ├── model.py ├── model_avazu.py ├── process_data.py ├── rnn.py ├── shuffle.py ├── split_by_user.py ├── train.py └── utils.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAN-Paper/Co-Action-Network/b8d984bee12e129cdb5dc323548e671978c6db48/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Co-Action Network 2 | 3 | Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction". 4 | 5 | paper: [arxiv (to be released)]() 6 | 7 | ## Installation 8 | dependences: 9 | 10 | tensorflow:1.4.1 11 | 12 | python: 2.7 13 | 14 | higher version of tensorflow and python3 will be supported soon! 15 | 16 | ## Getting Started 17 | training: 18 | 19 | CUDA_VISIBLE_DEVICES=0 python script/train.py train {model} 20 | 21 | model: CAN,Cartesion,PNN, etc. (check the train.py) 22 | 23 | ## Citation 24 | ## Contact 25 | ## License 26 | -------------------------------------------------------------------------------- /prepare_data.sh: -------------------------------------------------------------------------------- 1 | export PATH="~/anaconda4/bin:$PATH" 2 | wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz 3 | wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz 4 | gunzip reviews_Books.json.gz 5 | gunzip meta_Books.json.gz 6 | python script/process_data.py meta_Books.json reviews_Books_5.json 7 | python script/local_aggretor.py 8 | python script/split_by_user.py 9 | python script/generate_voc.py 10 | -------------------------------------------------------------------------------- /script/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CAN-Paper/Co-Action-Network/b8d984bee12e129cdb5dc323548e671978c6db48/script/.DS_Store -------------------------------------------------------------------------------- /script/Dice.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def dice(_x, axis=-1, epsilon=0.000000001, name=''): 4 | with tf.variable_scope(name, reuse=tf.AUTO_REUSE): 5 | alphas = tf.get_variable('alpha'+name, _x.get_shape()[-1], 6 | initializer=tf.constant_initializer(0.0), 7 | dtype=tf.float32) 8 | input_shape = list(_x.get_shape()) 9 | 10 | reduction_axes = list(range(len(input_shape))) 11 | del reduction_axes[axis] 12 | broadcast_shape = [1] * len(input_shape) 13 | broadcast_shape[axis] = input_shape[axis] 14 | 15 | # case: train mode (uses stats of the current batch) 16 | mean = tf.reduce_mean(_x, axis=reduction_axes) 17 | brodcast_mean = tf.reshape(mean, broadcast_shape) 18 | std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes) 19 | std = tf.sqrt(std) 20 | brodcast_std = tf.reshape(std, broadcast_shape) 21 | x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon) 22 | # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False) 23 | x_p = tf.sigmoid(x_normed) 24 | 25 | 26 | return alphas * (1.0 - x_p) * _x + x_p * _x 27 | 28 | def parametric_relu(_x): 29 | alphas = tf.get_variable('alpha', _x.get_shape()[-1], 30 | initializer=tf.constant_initializer(0.0), 31 | dtype=tf.float32) 32 | pos = tf.nn.relu(_x) 33 | neg = alphas * (_x - abs(_x)) * 0.5 34 | 35 | return pos + neg 36 | -------------------------------------------------------------------------------- /script/calc_ckpt.py: -------------------------------------------------------------------------------- 1 | 2 | ckpt = tf.train.get_checkpoint_state("./ckpt_path/").model_checkpoint_path 3 | saver = tf.train.import_meta_graph(ckpt+'.meta') 4 | variables = tf.trainable_variables() 5 | total_parameters = 0 6 | for variable in variables: 7 | shape = variable.get_shape() 8 | variable_parameters = 1 9 | for dim in shape: 10 | # print(dim) 11 | variable_parameters *= dim.value 12 | # print(variable_parameters) 13 | total_parameters += variable_parameters 14 | print(total_parameters) 15 | -------------------------------------------------------------------------------- /script/data_iterator.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import json 3 | import cPickle as pkl 4 | import random 5 | 6 | import gzip 7 | 8 | import shuffle 9 | 10 | def unicode_to_utf8(d): 11 | return dict((key.encode("UTF-8"), value) for (key,value) in d.items()) 12 | def dict_unicode_to_utf8(d): 13 | return dict(((key[0].encode("UTF-8"), key[1].encode("UTF-8")), value) for (key,value) in d.items()) 14 | 15 | def load_dict(filename): 16 | try: 17 | with open(filename, 'rb') as f: 18 | return unicode_to_utf8(json.load(f)) 19 | except: 20 | try: 21 | with open(filename, 'rb') as f: 22 | return unicode_to_utf8(pkl.load(f)) 23 | except: 24 | with open(filename, 'rb') as f: 25 | return dict_unicode_to_utf8(pkl.load(f)) 26 | 27 | 28 | def fopen(filename, mode='r'): 29 | if filename.endswith('.gz'): 30 | return gzip.open(filename, mode) 31 | return open(filename, mode) 32 | 33 | 34 | class DataIterator: 35 | 36 | def __init__(self, source, 37 | uid_voc, 38 | mid_voc, 39 | cat_voc, 40 | batch_size=128, 41 | maxlen=100, 42 | skip_empty=False, 43 | shuffle_each_epoch=False, 44 | sort_by_length=True, 45 | max_batch_size=20, 46 | minlen=None, 47 | label_type=1): 48 | if shuffle_each_epoch: 49 | self.source_orig = source 50 | self.source = shuffle.main(self.source_orig, temporary=True) 51 | else: 52 | self.source = fopen(source, 'r') 53 | self.source_dicts = [] 54 | #for source_dict in [uid_voc, mid_voc, cat_voc, cat_voc, cat_voc]:# 'item_carte_voc.pkl', 'cate_carte_voc.pkl']: 55 | for source_dict in [uid_voc, mid_voc, cat_voc, 'item_carte_voc.pkl', 'cate_carte_voc.pkl']: 56 | self.source_dicts.append(load_dict(source_dict)) 57 | 58 | f_meta = open("item-info", "r") 59 | meta_map = {} 60 | for line in f_meta: 61 | arr = line.strip().split("\t") 62 | if arr[0] not in meta_map: 63 | meta_map[arr[0]] = arr[1] 64 | self.meta_id_map ={} 65 | for key in meta_map: 66 | val = meta_map[key] 67 | if key in self.source_dicts[1]: 68 | mid_idx = self.source_dicts[1][key] 69 | else: 70 | mid_idx = 0 71 | if val in self.source_dicts[2]: 72 | cat_idx = self.source_dicts[2][val] 73 | else: 74 | cat_idx = 0 75 | self.meta_id_map[mid_idx] = cat_idx 76 | 77 | f_review = open("reviews-info", "r") 78 | self.mid_list_for_random = [] 79 | for line in f_review: 80 | arr = line.strip().split("\t") 81 | tmp_idx = 0 82 | if arr[1] in self.source_dicts[1]: 83 | tmp_idx = self.source_dicts[1][arr[1]] 84 | self.mid_list_for_random.append(tmp_idx) 85 | 86 | self.batch_size = batch_size 87 | self.maxlen = maxlen 88 | self.minlen = minlen 89 | self.skip_empty = skip_empty 90 | 91 | self.n_uid = len(self.source_dicts[0]) 92 | self.n_mid = len(self.source_dicts[1]) 93 | self.n_cat = len(self.source_dicts[2]) 94 | self.n_carte = [len(self.source_dicts[3]), len(self.source_dicts[4])] 95 | print("n_uid=%d, n_mid=%d, n_cat=%d" % (self.n_uid, self.n_mid, self.n_cat)) 96 | 97 | self.shuffle = shuffle_each_epoch 98 | self.sort_by_length = sort_by_length 99 | 100 | self.source_buffer = [] 101 | self.k = batch_size * max_batch_size 102 | 103 | self.end_of_data = False 104 | self.label_type = label_type 105 | 106 | def get_n(self): 107 | return self.n_uid, self.n_mid, self.n_cat, self.n_carte 108 | 109 | def __iter__(self): 110 | return self 111 | 112 | def reset(self): 113 | if self.shuffle: 114 | self.source= shuffle.main(self.source_orig, temporary=True) 115 | else: 116 | self.source.seek(0) 117 | 118 | def next(self): 119 | if self.end_of_data: 120 | self.end_of_data = False 121 | self.reset() 122 | raise StopIteration 123 | 124 | source = [] 125 | target = [] 126 | 127 | if len(self.source_buffer) == 0: 128 | for k_ in xrange(self.k): 129 | ss = self.source.readline() 130 | if ss == "": 131 | break 132 | self.source_buffer.append(ss.strip("\n").split("\t")) 133 | 134 | # sort by history behavior length 135 | if self.sort_by_length: 136 | his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer]) 137 | tidx = his_length.argsort() 138 | 139 | _sbuf = [self.source_buffer[i] for i in tidx] 140 | self.source_buffer = _sbuf 141 | else: 142 | self.source_buffer.reverse() 143 | 144 | if len(self.source_buffer) == 0: 145 | self.end_of_data = False 146 | self.reset() 147 | raise StopIteration 148 | 149 | try: 150 | 151 | # actual work here 152 | while True: 153 | 154 | # read from source file and map to word index 155 | try: 156 | ss = self.source_buffer.pop() 157 | except IndexError: 158 | break 159 | 160 | uid = self.source_dicts[0][ss[1]] if ss[1] in self.source_dicts[0] else 0 161 | mid = self.source_dicts[1][ss[2]] if ss[2] in self.source_dicts[1] else 0 162 | cat = self.source_dicts[2][ss[3]] if ss[3] in self.source_dicts[2] else 0 163 | 164 | tmp = [] 165 | item_carte = [] 166 | for fea in ss[4].split(""): 167 | m = self.source_dicts[1][fea] if fea in self.source_dicts[1] else 0 168 | tmp.append(m) 169 | i_c = self.source_dicts[3][(ss[2], fea)] if (ss[2], fea) in self.source_dicts[3] else 0 170 | item_carte.append(i_c) 171 | mid_list = tmp 172 | 173 | tmp1 = [] 174 | cate_carte = [] 175 | for fea in ss[5].split(""): 176 | c = self.source_dicts[2][fea] if fea in self.source_dicts[2] else 0 177 | tmp1.append(c) 178 | c_c = self.source_dicts[4][(ss[3], fea)] if (ss[3], fea) in self.source_dicts[4] else 0 179 | cate_carte.append(c_c) 180 | cat_list = tmp1 181 | 182 | # read from source file and map to word index 183 | 184 | if self.minlen != None: 185 | if len(mid_list) <= self.minlen: 186 | continue 187 | if self.skip_empty and (not mid_list): 188 | continue 189 | 190 | noclk_mid_list = [] 191 | noclk_cat_list = [] 192 | for pos_mid in mid_list: 193 | noclk_tmp_mid = [] 194 | noclk_tmp_cat = [] 195 | noclk_index = 0 196 | while True: 197 | noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1) 198 | noclk_mid = self.mid_list_for_random[noclk_mid_indx] 199 | if noclk_mid == pos_mid: 200 | continue 201 | noclk_tmp_mid.append(noclk_mid) 202 | noclk_tmp_cat.append(self.meta_id_map[noclk_mid]) 203 | noclk_index += 1 204 | if noclk_index >= 5: 205 | break 206 | noclk_mid_list.append(noclk_tmp_mid) 207 | noclk_cat_list.append(noclk_tmp_cat) 208 | carte_list = [item_carte, cate_carte] 209 | source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list, carte_list]) 210 | if self.label_type == 1: 211 | target.append([float(ss[0])]) 212 | else: 213 | target.append([float(ss[0]), 1-float(ss[0])]) 214 | 215 | if len(source) >= self.batch_size or len(target) >= self.batch_size: 216 | break 217 | except IOError: 218 | self.end_of_data = True 219 | 220 | # all sentence pairs in maxibatch filtered out because of length 221 | if len(source) == 0 or len(target) == 0: 222 | source, target = self.next() 223 | 224 | return source, target 225 | 226 | 227 | -------------------------------------------------------------------------------- /script/generate_voc.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | 3 | f_train = open("local_train_splitByUser", "r") 4 | uid_dict = {} 5 | mid_dict = {} 6 | cat_dict = {} 7 | item_carte_dict = {} 8 | cate_carte_dict = {} 9 | 10 | iddd = 0 11 | for line in f_train: 12 | arr = line.strip("\n").split("\t") 13 | clk = arr[0] 14 | uid = arr[1] 15 | mid = arr[2] 16 | cat = arr[3] 17 | mid_list = arr[4] 18 | cat_list = arr[5] 19 | if uid not in uid_dict: 20 | uid_dict[uid] = 0 21 | uid_dict[uid] += 1 22 | if mid not in mid_dict: 23 | mid_dict[mid] = 0 24 | mid_dict[mid] += 1 25 | if cat not in cat_dict: 26 | cat_dict[cat] = 0 27 | cat_dict[cat] += 1 28 | if len(mid_list) == 0: 29 | continue 30 | for m in mid_list.split(""): 31 | if m not in mid_dict: 32 | mid_dict[m] = 0 33 | mid_dict[m] += 1 34 | if (mid, m) not in item_carte_dict: 35 | item_carte_dict[(mid, m)] = 0 36 | item_carte_dict[(mid, m)] += 1 37 | #print iddd 38 | iddd+=1 39 | for c in cat_list.split(""): 40 | if c not in cat_dict: 41 | cat_dict[c] = 0 42 | cat_dict[c] += 1 43 | if (cat, c) not in cate_carte_dict: 44 | cate_carte_dict[(cat, c)] = 0 45 | cate_carte_dict[(cat, c)] += 1 46 | 47 | sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True) 48 | sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True) 49 | sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True) 50 | sorted_item_carte_dict = sorted(item_carte_dict.iteritems(), key=lambda x:x[1], reverse=True) 51 | sorted_cate_carte_dict = sorted(cate_carte_dict.iteritems(), key=lambda x:x[1], reverse=True) 52 | 53 | uid_voc = {} 54 | index = 0 55 | for key, value in sorted_uid_dict: 56 | uid_voc[key] = index 57 | index += 1 58 | 59 | mid_voc = {} 60 | mid_voc["default_mid"] = 0 61 | index = 1 62 | for key, value in sorted_mid_dict: 63 | mid_voc[key] = index 64 | index += 1 65 | 66 | cat_voc = {} 67 | cat_voc["default_cat"] = 0 68 | index = 1 69 | for key, value in sorted_cat_dict: 70 | cat_voc[key] = index 71 | index += 1 72 | 73 | item_carte_voc = {} 74 | item_carte_voc["default_item_carte"] = 0 75 | index = 1 76 | for key, value in sorted_item_carte_dict: 77 | item_carte_voc[key] = index 78 | index += 1 79 | 80 | cate_carte_voc = {} 81 | cate_carte_voc["default_cate_carte"] = 0 82 | index = 1 83 | for key, value in sorted_cate_carte_dict: 84 | cate_carte_voc[key] = index 85 | index += 1 86 | 87 | cPickle.dump(uid_voc, open("uid_voc.pkl", "w")) 88 | cPickle.dump(mid_voc, open("mid_voc.pkl", "w")) 89 | cPickle.dump(cat_voc, open("cat_voc.pkl", "w")) 90 | cPickle.dump(item_carte_voc, open("item_carte_voc.pkl", "w")) 91 | cPickle.dump(cate_carte_voc, open("cate_carte_voc.pkl", "w")) 92 | -------------------------------------------------------------------------------- /script/generate_voc.py.bk: -------------------------------------------------------------------------------- 1 | import cPickle 2 | 3 | f_train = open("local_train_splitByUser", "r") 4 | uid_dict = {} 5 | mid_dict = {} 6 | cat_dict = {} 7 | 8 | iddd = 0 9 | for line in f_train: 10 | arr = line.strip("\n").split("\t") 11 | clk = arr[0] 12 | uid = arr[1] 13 | mid = arr[2] 14 | cat = arr[3] 15 | mid_list = arr[4] 16 | cat_list = arr[5] 17 | if uid not in uid_dict: 18 | uid_dict[uid] = 0 19 | uid_dict[uid] += 1 20 | if mid not in mid_dict: 21 | mid_dict[mid] = 0 22 | mid_dict[mid] += 1 23 | if cat not in cat_dict: 24 | cat_dict[cat] = 0 25 | cat_dict[cat] += 1 26 | if len(mid_list) == 0: 27 | continue 28 | for m in mid_list.split(""): 29 | if m not in mid_dict: 30 | mid_dict[m] = 0 31 | mid_dict[m] += 1 32 | #print iddd 33 | iddd+=1 34 | for c in cat_list.split(""): 35 | if c not in cat_dict: 36 | cat_dict[c] = 0 37 | cat_dict[c] += 1 38 | 39 | sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True) 40 | sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True) 41 | sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True) 42 | 43 | uid_voc = {} 44 | index = 0 45 | for key, value in sorted_uid_dict: 46 | uid_voc[key] = index 47 | index += 1 48 | 49 | mid_voc = {} 50 | mid_voc["default_mid"] = 0 51 | index = 1 52 | for key, value in sorted_mid_dict: 53 | mid_voc[key] = index 54 | index += 1 55 | 56 | cat_voc = {} 57 | cat_voc["default_cat"] = 0 58 | index = 1 59 | for key, value in sorted_cat_dict: 60 | cat_voc[key] = index 61 | index += 1 62 | 63 | cPickle.dump(uid_voc, open("uid_voc.pkl", "w")) 64 | cPickle.dump(mid_voc, open("mid_voc.pkl", "w")) 65 | cPickle.dump(cat_voc, open("cat_voc.pkl", "w")) 66 | -------------------------------------------------------------------------------- /script/local_aggretor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import hashlib 3 | import random 4 | 5 | fin = open("jointed-new-split-info", "r") 6 | ftrain = open("local_train", "w") 7 | ftest = open("local_test", "w") 8 | 9 | last_user = "0" 10 | common_fea = "" 11 | line_idx = 0 12 | for line in fin: 13 | items = line.strip().split("\t") 14 | ds = items[0] 15 | clk = int(items[1]) 16 | user = items[2] 17 | movie_id = items[3] 18 | dt = items[5] 19 | cat1 = items[6] 20 | 21 | if ds=="20180118": 22 | fo = ftrain 23 | else: 24 | fo = ftest 25 | if user != last_user: 26 | movie_id_list = [] 27 | cate1_list = [] 28 | #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" 29 | else: 30 | history_clk_num = len(movie_id_list) 31 | cat_str = "" 32 | mid_str = "" 33 | for c1 in cate1_list: 34 | cat_str += c1 + "" 35 | for mid in movie_id_list: 36 | mid_str += mid + "" 37 | if len(cat_str) > 0: cat_str = cat_str[:-1] 38 | if len(mid_str) > 0: mid_str = mid_str[:-1] 39 | if history_clk_num >= 1: # 8 is the average length of user behavior 40 | print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + mid_str + "\t" + cat_str 41 | last_user = user 42 | if clk: 43 | movie_id_list.append(movie_id) 44 | cate1_list.append(cat1) 45 | line_idx += 1 46 | -------------------------------------------------------------------------------- /script/model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops.rnn_cell import GRUCell 3 | from tensorflow.python.ops.rnn_cell import LSTMCell 4 | from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn 5 | #from tensorflow.python.ops.rnn import dynamic_rnn 6 | from rnn import dynamic_rnn 7 | from utils import * 8 | from Dice import dice 9 | 10 | #### CAN config ##### 11 | weight_emb_w = [[16, 8], [8,4]] 12 | weight_emb_b = [0, 0] 13 | print(weight_emb_w, weight_emb_b) 14 | orders = 3 15 | order_indep = False # True 16 | WEIGHT_EMB_DIM = (sum([w[0]*w[1] for w in weight_emb_w]) + sum(weight_emb_b)) #* orders 17 | INDEP_NUM = 1 18 | if order_indep: 19 | INDEP_NUM *= orders 20 | 21 | print("orders: ",orders) 22 | CALC_MODE = "can" 23 | device = '/gpu:0' 24 | #### CAN config ##### 25 | 26 | def gen_coaction(ad, his_items, dim, mode="can", mask=None): 27 | weight, bias = [], [] 28 | idx = 0 29 | weight_orders = [] 30 | bias_orders = [] 31 | for i in range(orders): 32 | for w, b in zip(weight_emb_w, weight_emb_b): 33 | weight.append(tf.reshape(ad[:, idx:idx+w[0]*w[1]], [-1, w[0], w[1]])) 34 | idx += w[0] * w[1] 35 | if b == 0: 36 | bias.append(None) 37 | else: 38 | bias.append(tf.reshape(ad[:, idx:idx+b], [-1, 1, b])) 39 | idx += b 40 | weight_orders.append(weight) 41 | bias_orders.append(bias) 42 | if not order_indep: 43 | break 44 | 45 | if mode == "can": 46 | out_seq = [] 47 | hh = [] 48 | for i in range(orders): 49 | hh.append(his_items**(i+1)) 50 | #hh = [sum(hh)] 51 | for i, h in enumerate(hh): 52 | if order_indep: 53 | weight, bias = weight_orders[i], bias_orders[i] 54 | else: 55 | weight, bias = weight_orders[0], bias_orders[0] 56 | for j, (w, b) in enumerate(zip(weight, bias)): 57 | h = tf.matmul(h, w) 58 | if b is not None: 59 | h = h + b 60 | if j != len(weight)-1: 61 | h = tf.nn.tanh(h) 62 | out_seq.append(h) 63 | out_seq = tf.concat(out_seq, 2) 64 | if mask is not None: 65 | mask = tf.expand_dims(mask, axis=-1) 66 | out_seq = out_seq * mask 67 | out = tf.reduce_sum(out_seq, 1) 68 | if keep_fake_carte_seq and mode=="emb": 69 | return out, out_seq 70 | return out, None 71 | 72 | class Model(object): 73 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling = False, use_softmax=True, use_coaction=False, use_cartes=False): 74 | with tf.name_scope('Inputs'): 75 | self.mid_his_batch_ph = tf.placeholder(tf.int32, [None, None], name='mid_his_batch_ph') 76 | self.cate_his_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_his_batch_ph') 77 | self.uid_batch_ph = tf.placeholder(tf.int32, [None, ], name='uid_batch_ph') 78 | self.mid_batch_ph = tf.placeholder(tf.int32, [None, ], name='mid_batch_ph') 79 | self.cate_batch_ph = tf.placeholder(tf.int32, [None, ], name='cate_batch_ph') 80 | self.mask = tf.placeholder(tf.float32, [None, None], name='mask') 81 | self.seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph') 82 | self.target_ph = tf.placeholder(tf.float32, [None, None], name='target_ph') 83 | self.carte_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='carte_ph') 84 | self.lr = tf.placeholder(tf.float64, []) 85 | self.use_negsampling =use_negsampling 86 | self.use_softmax = False #use_softmax 87 | self.use_coaction = use_coaction 88 | self.use_cartes = use_cartes 89 | print("args:") 90 | print("negsampling: ", self.use_negsampling) 91 | print("softmax: ", self.use_softmax) 92 | print("co-action: ", self.use_coaction) 93 | print("carte: ", self.use_cartes) 94 | if use_negsampling: 95 | self.noclk_mid_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_mid_batch_ph') #generate 3 item IDs from negative sampling. 96 | self.noclk_cate_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_cate_batch_ph') 97 | 98 | # Embedding layer 99 | with tf.name_scope('Embedding_layer'): 100 | self.uid_embeddings_var = tf.get_variable("uid_embedding_var", [n_uid, EMBEDDING_DIM]) 101 | tf.summary.histogram('uid_embeddings_var', self.uid_embeddings_var) 102 | self.uid_batch_embedded = tf.nn.embedding_lookup(self.uid_embeddings_var, self.uid_batch_ph) 103 | 104 | self.mid_embeddings_var = tf.get_variable("mid_embedding_var", [n_mid, EMBEDDING_DIM]) 105 | tf.summary.histogram('mid_embeddings_var', self.mid_embeddings_var) 106 | self.mid_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_batch_ph) 107 | self.mid_his_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_his_batch_ph) 108 | if self.use_negsampling: 109 | self.noclk_mid_his_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.noclk_mid_batch_ph) 110 | 111 | self.cate_embeddings_var = tf.get_variable("cate_embedding_var", [n_cate, EMBEDDING_DIM]) 112 | tf.summary.histogram('cate_embeddings_var', self.cate_embeddings_var) 113 | self.cate_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.cate_batch_ph) 114 | self.cate_his_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.cate_his_batch_ph) 115 | if self.use_negsampling: 116 | self.noclk_cate_his_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.noclk_cate_batch_ph) 117 | 118 | if self.use_cartes: 119 | self.carte_embedding_vars = [] 120 | self.carte_batch_embedded = [] 121 | with tf.device(device): 122 | for i, num in enumerate(n_carte): 123 | print("carte num:", num) 124 | self.carte_embedding_vars.append(tf.get_variable("carte_embedding_var_{}".format(i), [num, EMBEDDING_DIM], trainable=True)) 125 | self.carte_batch_embedded.append(tf.nn.embedding_lookup(self.carte_embedding_vars[i], self.carte_batch_ph[:,i,:])) 126 | 127 | ### co-action ### 128 | if self.use_coaction: 129 | ph_dict = { 130 | "item": [self.mid_batch_ph, self.mid_his_batch_ph, self.mid_his_batch_embedded], 131 | "cate": [self.cate_batch_ph, self.cate_his_batch_ph, self.cate_his_batch_embedded] 132 | } 133 | self.mlp_batch_embedded = [] 134 | with tf.device(device): 135 | self.item_mlp_embeddings_var = tf.get_variable("item_mlp_embedding_var", [n_mid, INDEP_NUM * WEIGHT_EMB_DIM], trainable=True) 136 | self.cate_mlp_embeddings_var = tf.get_variable("cate_mlp_embedding_var", [n_cate, INDEP_NUM * WEIGHT_EMB_DIM], trainable=True) 137 | 138 | self.mlp_batch_embedded.append(tf.nn.embedding_lookup(self.item_mlp_embeddings_var, ph_dict['item'][0])) 139 | self.mlp_batch_embedded.append(tf.nn.embedding_lookup(self.cate_mlp_embeddings_var, ph_dict['cate'][0])) 140 | 141 | self.input_batch_embedded = [] 142 | self.item_input_embeddings_var = tf.get_variable("item_input_embedding_var", [n_mid, weight_emb_w[0][0] * INDEP_NUM], trainable=True) 143 | self.cate_input_embeddings_var = tf.get_variable("cate_input_embedding_var", [n_cate, weight_emb_w[0][0] * INDEP_NUM], trainable=True) 144 | self.input_batch_embedded.append(tf.nn.embedding_lookup(self.item_input_embeddings_var, ph_dict['item'][1])) 145 | self.input_batch_embedded.append(tf.nn.embedding_lookup(self.cate_input_embeddings_var, ph_dict['cate'][1])) 146 | 147 | self.item_eb = tf.concat([self.mid_batch_embedded, self.cate_batch_embedded], 1) 148 | self.item_his_eb = tf.concat([self.mid_his_batch_embedded, self.cate_his_batch_embedded], 2) 149 | self.item_his_eb_sum = tf.reduce_sum(self.item_his_eb, 1) 150 | if self.use_negsampling: 151 | self.noclk_item_his_eb = tf.concat( 152 | [self.noclk_mid_his_batch_embedded[:, :, 0, :], self.noclk_cate_his_batch_embedded[:, :, 0, :]], -1)# 0 means only using the first negative item ID. 3 item IDs are inputed in the line 24. 153 | self.noclk_item_his_eb = tf.reshape(self.noclk_item_his_eb, 154 | [-1, tf.shape(self.noclk_mid_his_batch_embedded)[1], 2*EMBEDDING_DIM])# cat embedding 18 concate item embedding 18. 155 | 156 | self.noclk_his_eb = tf.concat([self.noclk_mid_his_batch_embedded, self.noclk_cate_his_batch_embedded], -1) 157 | self.noclk_his_eb_sum_1 = tf.reduce_sum(self.noclk_his_eb, 2) 158 | self.noclk_his_eb_sum = tf.reduce_sum(self.noclk_his_eb_sum_1, 1) 159 | 160 | self.cross = [] 161 | if self.use_cartes: 162 | if self.mask is not None: 163 | mask = tf.expand_dims(self.mask, axis=-1) 164 | for i,emb in enumerate(self.carte_batch_embedded): 165 | emb = emb * mask 166 | carte_eb_sum = tf.reduce_sum(emb, 1) 167 | self.cross.append(carte_eb_sum) 168 | 169 | if self.use_coaction: 170 | input_batch = self.input_batch_embedded 171 | tmp_sum, tmp_seq = [], [] 172 | if INDEP_NUM == 2: 173 | for i, mlp_batch in enumerate(self.mlp_batch_embedded): 174 | for j, input_batch in enumerate(self.input_batch_embedded): 175 | coaction_sum, coaction_seq = gen_coaction(mlp_batch[:, WEIGHT_EMB_DIM * j: WEIGHT_EMB_DIM * (j+1)], input_batch[:, :, weight_emb_w[0][0] * i: weight_emb_w[0][0] * (i+1)], EMBEDDING_DIM, mode=CALC_MODE,mask=self.mask) 176 | tmp_sum.append(coaction_sum) 177 | tmp_seq.append(coaction_seq) 178 | else: 179 | for i, (mlp_batch, input_batch) in enumerate(zip(self.mlp_batch_embedded, self.input_batch_embedded)): 180 | coaction_sum, coaction_seq = gen_coaction(mlp_batch[:, : INDEP_NUM * WEIGHT_EMB_DIM], input_batch[:, :, : weight_emb_w[0][0]], EMBEDDING_DIM, mode=CALC_MODE, mask=self.mask) 181 | tmp_sum.append(coaction_sum) 182 | tmp_seq.append(coaction_seq) 183 | 184 | self.coaction_sum = tf.concat(tmp_sum, axis=1) 185 | self.cross.append(self.coaction_sum) 186 | 187 | def build_fcn_net(self, inp, use_dice = False): 188 | bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') 189 | dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') 190 | if use_dice: 191 | dnn1 = dice(dnn1, name='dice_1') 192 | else: 193 | dnn1 = prelu(dnn1, 'prelu1') 194 | 195 | dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') 196 | if use_dice: 197 | dnn2 = dice(dnn2, name='dice_2') 198 | else: 199 | dnn2 = prelu(dnn2, 'prelu2') 200 | dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3') 201 | return dnn3 202 | 203 | def build_loss(self, inp, L2=False): 204 | 205 | with tf.name_scope('Metrics'): 206 | # Cross-entropy loss and optimizer initialization 207 | if self.use_softmax: 208 | self.y_hat = tf.nn.softmax(inp) + 0.00000001 209 | ctr_loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph) 210 | else: 211 | self.y_hat = tf.nn.sigmoid(inp) 212 | ctr_loss = - tf.reduce_mean(tf.concat([tf.log(self.y_hat + 0.00000001) * self.target_ph, tf.log(1 - self.y_hat + 0.00000001) * (1-self.target_ph)], axis=1)) 213 | self.loss = ctr_loss 214 | if self.use_negsampling: 215 | self.loss += self.aux_loss 216 | if L2: 217 | self.loss += self.l2_loss 218 | 219 | tf.summary.scalar('loss', self.loss) 220 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) 221 | 222 | # Accuracy metric 223 | if self.use_softmax: 224 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) 225 | else: 226 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) 227 | tf.summary.scalar('accuracy', self.accuracy) 228 | 229 | 230 | def auxiliary_loss(self, h_states, click_seq, noclick_seq, mask, stag = None): 231 | mask = tf.cast(mask, tf.float32) 232 | click_input_ = tf.concat([h_states, click_seq], -1) 233 | noclick_input_ = tf.concat([h_states, noclick_seq], -1) 234 | click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0] 235 | noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0] 236 | click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask 237 | noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask 238 | loss_ = tf.reduce_mean(click_loss_ + noclick_loss_) 239 | return loss_ 240 | 241 | def auxiliary_net(self, in_, stag='auxiliary_net'): 242 | bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE) 243 | dnn1 = tf.layers.dense(bn1, 100, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE) 244 | dnn1 = tf.nn.sigmoid(dnn1) 245 | dnn2 = tf.layers.dense(dnn1, 50, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE) 246 | dnn2 = tf.nn.sigmoid(dnn2) 247 | dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE) 248 | if self.use_softmax: 249 | y_hat = tf.nn.softmax(dnn3) + 0.00000001 250 | else: 251 | y_hat = tf.nn.sigmoid(dnn3) + 0.00000001 252 | return y_hat 253 | 254 | 255 | def train(self, sess, inps): 256 | if self.use_negsampling: 257 | loss, accuracy, aux_loss, _ = sess.run([self.loss, self.accuracy, self.aux_loss, self.optimizer], feed_dict={ 258 | self.uid_batch_ph: inps[0], 259 | self.mid_batch_ph: inps[1], 260 | self.cate_batch_ph: inps[2], 261 | self.mid_his_batch_ph: inps[3], 262 | self.cate_his_batch_ph: inps[4], 263 | self.mask: inps[5], 264 | self.target_ph: inps[6], 265 | self.seq_len_ph: inps[7], 266 | self.lr: inps[8], 267 | self.noclk_mid_batch_ph: inps[9], 268 | self.noclk_cate_batch_ph: inps[10], 269 | self.carte_batch_ph: inps[11] 270 | }) 271 | return loss, accuracy, aux_loss 272 | else: 273 | loss, accuracy, _ = sess.run([self.loss, self.accuracy, self.optimizer], feed_dict={ 274 | self.uid_batch_ph: inps[0], 275 | self.mid_batch_ph: inps[1], 276 | self.cate_batch_ph: inps[2], 277 | self.mid_his_batch_ph: inps[3], 278 | self.cate_his_batch_ph: inps[4], 279 | self.mask: inps[5], 280 | self.target_ph: inps[6], 281 | self.seq_len_ph: inps[7], 282 | self.lr: inps[8], 283 | self.carte_batch_ph: inps[11] 284 | }) 285 | return loss, accuracy, 0 286 | 287 | def calculate(self, sess, inps): 288 | if self.use_negsampling: 289 | probs, loss, accuracy, aux_loss = sess.run([self.y_hat, self.loss, self.accuracy, self.aux_loss], feed_dict={ 290 | self.uid_batch_ph: inps[0], 291 | self.mid_batch_ph: inps[1], 292 | self.cate_batch_ph: inps[2], 293 | self.mid_his_batch_ph: inps[3], 294 | self.cate_his_batch_ph: inps[4], 295 | self.mask: inps[5], 296 | self.target_ph: inps[6], 297 | self.seq_len_ph: inps[7], 298 | self.noclk_mid_batch_ph: inps[8], 299 | self.noclk_cate_batch_ph: inps[9], 300 | self.carte_batch_ph: inps[10] 301 | }) 302 | return probs, loss, accuracy, aux_loss 303 | else: 304 | probs, loss, accuracy = sess.run([self.y_hat, self.loss, self.accuracy], feed_dict={ 305 | self.uid_batch_ph: inps[0], 306 | self.mid_batch_ph: inps[1], 307 | self.cate_batch_ph: inps[2], 308 | self.mid_his_batch_ph: inps[3], 309 | self.cate_his_batch_ph: inps[4], 310 | self.mask: inps[5], 311 | self.target_ph: inps[6], 312 | self.seq_len_ph: inps[7], 313 | self.carte_batch_ph: inps[10] 314 | }) 315 | return probs, loss, accuracy, 0 316 | 317 | def save(self, sess, path): 318 | saver = tf.train.Saver() 319 | saver.save(sess, save_path=path) 320 | 321 | def restore(self, sess, path): 322 | saver = tf.train.Saver() 323 | saver.restore(sess, save_path=path) 324 | print('model restored from %s' % path) 325 | 326 | class Model_NCF(Model): 327 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True): 328 | super(Model_NCF, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, 329 | ATTENTION_SIZE, 330 | use_negsampling, use_softmax) 331 | with tf.name_scope('ncf_embedding'): 332 | self.ncf_item_embedding_var = tf.get_variable("ncf_item_embedding_var", [n_mid, EMBEDDING_DIM], trainable=True) 333 | self.ncf_cate_embedding_var = tf.get_variable("ncf_cate_embedding_var", [n_cate, EMBEDDING_DIM], trainable=True) 334 | 335 | ncf_item_emb = tf.nn.embedding_lookup(self.ncf_item_embedding_var, self.mid_batch_ph) 336 | ncf_item_his_emb = tf.nn.embedding_lookup(self.ncf_item_embedding_var, self.mid_his_batch_ph) 337 | ncf_cate_emb = tf.nn.embedding_lookup(self.ncf_cate_embedding_var, self.cate_batch_ph) 338 | ncf_cate_his_emb = tf.nn.embedding_lookup(self.ncf_cate_embedding_var, self.cate_his_batch_ph) 339 | 340 | ncf_item_his_sum = tf.reduce_mean(ncf_item_his_emb, axis=1) 341 | ncf_cate_his_sum = tf.reduce_mean(ncf_cate_his_emb, axis=1) 342 | mf = tf.concat([ncf_item_emb * ncf_item_his_sum, ncf_cate_emb * ncf_cate_his_sum], axis=1) 343 | 344 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) 345 | logit = self.build_fcn_net(inp, mf, use_dice=False) 346 | self.build_loss(logit) 347 | 348 | def build_fcn_net(self, inp, mf, use_dice = False): 349 | bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') 350 | dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') 351 | if use_dice: 352 | dnn1 = dice(dnn1, name='dice_1') 353 | else: 354 | dnn1 = prelu(dnn1, scope='prelu_1') 355 | 356 | dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') 357 | if use_dice: 358 | dnn2 = dice(dnn2, name='dice_2') 359 | else: 360 | dnn2 = prelu(dnn2, scope='prelu_2') 361 | 362 | dnn2 = tf.concat([dnn2, mf], axis=1) 363 | dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3') 364 | return dnn3 365 | 366 | def ProductLayer(feas, DIM, share=True): 367 | row, col = [], [] 368 | num = len(feas) 369 | pair = num * (num-1) / 2 370 | for i in range(num - 1): 371 | for j in range(i+1, num): 372 | row.append(i) 373 | col.append(j) 374 | if share: 375 | p = tf.stack([feas[i] for i in row], axis=1) 376 | q = tf.stack([feas[i] for i in col], axis=1) 377 | else: 378 | tmp = [] 379 | count = {} 380 | for i in row: 381 | if i not in count: 382 | count[i] = 0 383 | else: 384 | count[i] += 1 385 | k = count[i] 386 | tmp.append(feas[i][:, k*DIM:(k+1)*DIM]) 387 | p = tf.stack(tmp, axis=1) 388 | tmp = [] 389 | for i in col: 390 | if i not in count: 391 | count[i] = 0 392 | else: 393 | count[i] += 1 394 | k = count[i] 395 | tmp.append(feas[i][:, k*DIM:(k+1)*DIM]) 396 | q = tf.stack(tmp, axis=1) 397 | 398 | ipnn = p * q 399 | ipnn = tf.reduce_sum(ipnn, axis=2, keep_dims=False) 400 | p = tf.expand_dims(p, axis=1) 401 | w = tf.get_variable("pnn_var", [DIM, pair, DIM], trainable=True) 402 | opnn = tf.reduce_sum((tf.multiply((tf.transpose(tf.reduce_sum(tf.multiply(p, w), axis=-1), [0, 2, 1])), q)), axis=-1) 403 | pnn = tf.concat([ipnn, opnn], axis=1) 404 | return pnn 405 | 406 | class Model_PNN(Model): 407 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True): 408 | super(Model_PNN, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 409 | 410 | fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_mean(self.mid_his_batch_embedded, axis=1), tf.reduce_mean(self.cate_his_batch_embedded, axis=1)] 411 | pnn = ProductLayer(fea_list, EMBEDDING_DIM) 412 | inp = tf.concat([self.uid_batch_embedded[:, :18], self.item_eb[:, :36], self.item_his_eb_sum[:, :36], pnn], 1) 413 | logit = self.build_fcn_net(inp, use_dice=False) 414 | self.build_loss(logit) 415 | 416 | def FMLayer(feas, output_dim=1): 417 | feas = tf.stack(feas, axis=1) 418 | square_of_sum = tf.reduce_sum(feas, axis=1, keep_dims=True) ** 2 419 | sum_of_square = tf.reduce_sum(feas ** 2, axis=1, keep_dims=True) 420 | fm_term = 0.5 * tf.reduce_sum(square_of_sum - sum_of_square, axis=2, keep_dims=False) 421 | if output_dim==2: 422 | fm_term = tf.concat([fm_term, tf.zeros_like(fm_term)], axis=1) 423 | return fm_term 424 | 425 | class Model_FM(Model): 426 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): 427 | super(Model_FM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 428 | 429 | w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) 430 | w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True) 431 | wx = [] 432 | wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) 433 | wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) 434 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) 435 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) 436 | b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) 437 | 438 | wx = tf.concat(wx, axis=1) 439 | lr_term = tf.reduce_sum(wx, axis=1) + b 440 | 441 | fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)] 442 | logit = tf.reduce_sum(wx, axis=1) + b + FMLayer(fea_list, 1) 443 | 444 | #self.l2_loss = 2e-5 * tf.add_n([tf.nn.l2_loss(v) for v in [wx, self.item_eb, self.item_his_eb_sum]]) 445 | self.build_loss(logit, L2=False) 446 | 447 | class Model_FFM(Model): 448 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): 449 | super(Model_FFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 450 | 451 | w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) 452 | w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True) 453 | wx = [] 454 | wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) 455 | wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) 456 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) 457 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) 458 | b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) 459 | 460 | wx = tf.concat(wx, axis=1) 461 | lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b 462 | 463 | with tf.name_scope('FFM_embedding'): 464 | 465 | FFM_item_embedding_var = tf.get_variable("FFM_item_embedding_var", [n_mid, 3, EMBEDDING_DIM], trainable=True) 466 | FFM_cate_embedding_var = tf.get_variable("FFM_cate_embedding_var", [n_cate, 3, EMBEDDING_DIM], trainable=True) 467 | item_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_batch_ph) 468 | item_his_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_his_batch_ph) 469 | item_his_sum = tf.reduce_sum(item_his_emb, axis=1) 470 | 471 | cate_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_batch_ph) 472 | cate_his_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_his_batch_ph) 473 | cate_his_sum = tf.reduce_sum(cate_his_emb, axis=1) 474 | 475 | fea_list = [item_emb, item_his_sum, cate_emb, cate_his_sum] 476 | feas = tf.stack(fea_list, axis=1) 477 | num = len(fea_list) 478 | rows, cols = [], [] 479 | for i in range(num-1): 480 | for j in range(i+1, num): 481 | rows.append([i, j-1]) 482 | cols.append([j, i]) 483 | p = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), rows), [1,0,2]) 484 | q = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), cols), [1,0,2]) 485 | ffm_term = tf.reduce_sum(p * q, axis=2) 486 | ffm_term = tf.reduce_sum(ffm_term, axis=1, keep_dims=True) 487 | logit = lr_term + ffm_term 488 | self.build_loss(logit) 489 | 490 | 491 | class Model_DeepFFM(Model): 492 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): 493 | super(Model_DeepFFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 494 | 495 | w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) 496 | w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True) 497 | wx = [] 498 | wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) 499 | wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) 500 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) 501 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) 502 | b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) 503 | 504 | wx = tf.concat(wx, axis=1) 505 | lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b 506 | 507 | with tf.name_scope('FFM_embedding'): 508 | 509 | FFM_item_embedding_var = tf.get_variable("FFM_item_embedding_var", [n_mid, 3, EMBEDDING_DIM], trainable=True) 510 | FFM_cate_embedding_var = tf.get_variable("FFM_cate_embedding_var", [n_cate, 3, EMBEDDING_DIM], trainable=True) 511 | item_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_batch_ph) 512 | item_his_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_his_batch_ph) 513 | item_his_sum = tf.reduce_sum(item_his_emb, axis=1) 514 | 515 | cate_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_batch_ph) 516 | cate_his_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_his_batch_ph) 517 | cate_his_sum = tf.reduce_sum(cate_his_emb, axis=1) 518 | 519 | fea_list = [item_emb, item_his_sum, cate_emb, cate_his_sum] 520 | feas = tf.stack(fea_list, axis=1) 521 | num = len(fea_list) 522 | rows, cols = [], [] 523 | for i in range(num-1): 524 | for j in range(i+1, num): 525 | rows.append([i, j-1]) 526 | cols.append([j, i]) 527 | p = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), rows), [1,0,2]) 528 | q = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), cols), [1,0,2]) 529 | ffm_term = tf.reduce_sum(p * q, axis=2) 530 | ffm_term = tf.reduce_sum(ffm_term, axis=1, keep_dims=True) 531 | 532 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) 533 | dnn_term = self.build_fcn_net(inp, use_dice=False) 534 | 535 | logit = dnn_term + lr_term + ffm_term 536 | self.build_loss(logit) 537 | 538 | class Model_DeepFM(Model): 539 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): 540 | super(Model_DeepFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 541 | w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) 542 | w_cate_var = tf.get_variable("w_cate_var", [n_cate, 1], trainable=True) 543 | wx = [] 544 | wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) 545 | wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) 546 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) 547 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) 548 | b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) 549 | 550 | wx = tf.concat(wx, axis=1) 551 | lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b 552 | 553 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) 554 | logit = self.build_fcn_net(inp, use_dice=False) 555 | 556 | fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)] 557 | fm_term = FMLayer(fea_list) 558 | logit = tf.layers.dense(tf.concat([logit, fm_term, lr_term], axis=1), 1, activation=None, name='fm_fc') 559 | #self.l2_loss = 0.01 * tf.add_n([tf.nn.l2_loss(v) for v in [wx, self.item_eb, self.item_his_eb_sum]]) 560 | self.build_loss(logit, L2=False) 561 | 562 | def ExtremeFMLayer(feas, dim, output_dim=1): 563 | num = len(feas) 564 | feas = tf.stack(feas, axis=1) # batch, field_num, emb_dim 565 | hidden_nn_layers = [] 566 | field_nums = [num] 567 | final_len = 0 568 | hidden_nn_layers.append(feas) 569 | final_result = [] 570 | cross_layers = [256, 256, 256] 571 | 572 | split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2) 573 | 574 | with tf.variable_scope("xfm", initializer=tf.contrib.layers.xavier_initializer(uniform=True)) as scope: 575 | for idx, layer_size in enumerate(cross_layers): 576 | split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2) 577 | dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True) 578 | dot_result_o = tf.reshape(dot_result_m, shape=[dim, -1, field_nums[0] * field_nums[-1]]) 579 | dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2]) 580 | 581 | filters = tf.get_variable(name="f_" + str(idx), 582 | shape=[1, field_nums[-1] * field_nums[0], layer_size], 583 | dtype=tf.float32) 584 | 585 | curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID') 586 | curr_out = tf.transpose(curr_out, perm=[0, 2, 1]) 587 | 588 | if idx != len(cross_layers) - 1: 589 | next_hidden, direct_connect = tf.split(curr_out, 2 * [int(layer_size / 2)], 1) 590 | final_len += int(layer_size / 2) 591 | else: 592 | direct_connect = curr_out 593 | next_hidden = 0 594 | final_len += layer_size 595 | field_nums.append(int(layer_size / 2)) 596 | 597 | final_result.append(direct_connect) 598 | hidden_nn_layers.append(next_hidden) 599 | 600 | 601 | result = tf.concat(final_result, axis=1) 602 | result = tf.reduce_sum(result, -1) 603 | 604 | w_nn_output = tf.get_variable(name='w_nn_output', 605 | shape=[final_len, 1], 606 | dtype=tf.float32) 607 | b_nn_output = tf.get_variable(name='b_nn_output', 608 | shape=[1], 609 | dtype=tf.float32, 610 | initializer=tf.zeros_initializer()) 611 | xfm_term = tf.matmul(result, w_nn_output) + b_nn_output 612 | 613 | if output_dim==2: 614 | xfm_term = tf.concat([xfm_term, tf.zeros_like(xfm_term)], axis=1) 615 | return xfm_term 616 | 617 | class Model_xDeepFM(Model): 618 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): 619 | super(Model_xDeepFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 620 | 621 | w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True) 622 | w_cate_var = tf.get_variable("w_cate_var", [n_cate, 1], trainable=True) 623 | wx = [] 624 | wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph)) 625 | wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph)) 626 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1)) 627 | wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1)) 628 | b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True) 629 | 630 | wx = tf.concat(wx, axis=1) 631 | lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b 632 | 633 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) 634 | mlp_term = self.build_fcn_net(inp, use_dice=False) 635 | 636 | fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)] 637 | fm_term = ExtremeFMLayer(fea_list, EMBEDDING_DIM) 638 | self.build_loss(mlp_term + fm_term) 639 | 640 | class Model_PIN(Model): 641 | def __init__(self,n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 642 | super(Model_PIN, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, 643 | BATCH_SIZE, SEQ_LEN, Flag="PIN") 644 | 645 | inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1) 646 | logit = self.build_fcn_net(inp, use_dice=False) 647 | 648 | feas = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)), axis=1), tf.reduce_sum(self.cate_his_batch_embedded * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)), axis=1)] 649 | 650 | self.feas = feas 651 | row, col = [], [] 652 | num = len(feas) 653 | for i in range(num - 1): 654 | for j in range(i+1, num): 655 | row.append(i) 656 | col.append(j) 657 | pairs = len(rows) 658 | p = tf.concat([feas[i] for i in row], axis=1) 659 | q = tf.concat([feas[i] for i in col], axis=1) 660 | pq = p * q 661 | inp = tf.concat([p,q,pq], axis=2) #batch, pair, 3*dim 662 | logit = self.pin(inp) 663 | self.build_loss(logit) 664 | 665 | def pin(self, inp): 666 | batch, pair, dim = inp.shape.as_list() 667 | with tf.variable_scope('product_network'): 668 | inp = tf.transpose(inp, [1,0,2]) 669 | x = tf.layers.dense(inp, 20, activation=None, name='fc1') 670 | x = tf.layers.batch_normalization(x, name='bn1') 671 | x = tf.nn.relu(x) 672 | x = tf.layers.dense(x, 1, activation=None, name='fc2') 673 | x = tf.layers.batch_normalization(x, name='bn2') 674 | x = tf.transpose(x, [1,0,2]) 675 | sub_out = tf.reshape(x, [-1, pair * dim]) 676 | 677 | with tf.variable_scope('network'): 678 | new_inp = tf.concat(self.feas+[sub_out], axis=1) 679 | x = tf.layers.dense(sub_out, 400, activation=tf.nn.relu, name='fc1') 680 | x = tf.layers.dense(x, 400, activation=tf.nn.relu, name='fc2') 681 | x = tf.layers.dense(x, 400, activation=tf.nn.relu, name='fc3') 682 | x = tf.layers.dense(x, 1, activation=None, name='fc4') 683 | return x 684 | 685 | class Model_ONN(Model): 686 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False): 687 | super(Model_ONN, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax) 688 | 689 | dim = 5 690 | self.item_embedding_var = tf.get_variable("item_embedding_var_onn", [n_mid, dim * 3], trainable=True) 691 | self.item_emb = tf.nn.embedding_lookup(self.item_embedding_var, self.mid_batch_ph) 692 | self.item_his_emb = tf.nn.embedding_lookup(self.item_embedding_var, self.mid_his_batch_ph) 693 | self.item_his_emb_sum = tf.reduce_mean(self.item_his_emb, axis=1) 694 | 695 | self.cate_embedding_var = tf.get_variable("cate_embedding_var_onn", [n_cate, dim * 3], trainable=True) 696 | self.cate_emb = tf.nn.embedding_lookup(self.cate_embedding_var, self.cate_batch_ph) 697 | self.cate_his_emb = tf.nn.embedding_lookup(self.cate_embedding_var, self.cate_his_batch_ph) 698 | self.cate_his_emb_sum = tf.reduce_mean(self.cate_his_emb, axis=1) 699 | 700 | fea_list = [self.item_emb, self.cate_emb, self.item_his_emb_sum, self.cate_his_emb_sum] 701 | onn = ProductLayer(fea_list, dim, False) 702 | 703 | inp = tf.concat([self.uid_batch_embedded, self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_mean(self.mid_his_batch_embedded, axis=1), tf.reduce_mean(self.cate_his_batch_embedded, axis=1), onn], 1) 704 | logit = self.build_fcn_net(inp, use_dice=False) 705 | self.build_loss(logit) 706 | 707 | class Model_WideDeep(Model): 708 | def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False): 709 | super(Model_WideDeep, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, 710 | ATTENTION_SIZE, 711 | use_negsampling) 712 | 713 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1) 714 | # Fully connected layer 715 | bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') 716 | dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') 717 | dnn1 = prelu(dnn1, 'p1') 718 | dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') 719 | dnn2 = prelu(dnn2, 'p2') 720 | dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3') 721 | d_layer_wide = tf.concat([tf.concat([self.item_eb,self.item_his_eb_sum], axis=-1), 722 | self.item_eb * self.item_his_eb_sum], axis=-1) 723 | d_layer_wide = tf.layers.dense(d_layer_wide, 2, activation=None, name='f_fm') 724 | self.y_hat = tf.nn.softmax(dnn3 + d_layer_wide) 725 | 726 | with tf.name_scope('Metrics'): 727 | # Cross-entropy loss and optimizer initialization 728 | self.loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph) 729 | tf.summary.scalar('loss', self.loss) 730 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) 731 | 732 | # Accuracy metric 733 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) 734 | tf.summary.scalar('accuracy', self.accuracy) 735 | self.merged = tf.summary.merge_all() 736 | 737 | class Model_DNN(Model): 738 | def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True, use_coaction=False, use_cartes=False): 739 | #EMBEDDING_DIM = 4 740 | super(Model_DNN, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, 741 | ATTENTION_SIZE, 742 | use_negsampling, use_softmax=use_softmax, use_coaction=use_coaction, use_cartes=use_cartes) 743 | 744 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum]+self.cross, 1) 745 | logit = self.build_fcn_net(inp, use_dice=False) 746 | self.build_loss(logit) 747 | 748 | 749 | class Model_DIN(Model): 750 | def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True): 751 | super(Model_DIN, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, 752 | ATTENTION_SIZE, 753 | use_negsampling, use_softmax=use_softmax) 754 | 755 | # Attention layer 756 | with tf.name_scope('Attention_layer'): 757 | attention_output = din_attention(self.item_eb, self.item_his_eb, ATTENTION_SIZE, self.mask) 758 | att_fea = tf.reduce_sum(attention_output, 1) 759 | tf.summary.histogram('att_fea', att_fea) 760 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, att_fea], -1) 761 | # Fully connected layer 762 | logit = self.build_fcn_net(inp, use_dice=True) 763 | self.build_loss(logit) 764 | 765 | 766 | class Model_DIEN(Model): 767 | def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=True, use_coaction=False): 768 | super(Model_DIEN, self).__init__(n_uid, n_mid, n_cate, n_carte, 769 | EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, 770 | use_negsampling, use_coaction=use_coaction) 771 | 772 | # RNN layer(-s) 773 | with tf.name_scope('rnn_1'): 774 | rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE), inputs=self.item_his_eb, 775 | sequence_length=self.seq_len_ph, dtype=tf.float32, 776 | scope="gru1") 777 | tf.summary.histogram('GRU_outputs', rnn_outputs) 778 | 779 | aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], 780 | self.noclk_item_his_eb[:, 1:, :], 781 | self.mask[:, 1:], stag="gru") 782 | self.aux_loss = aux_loss_1 783 | 784 | # Attention layer 785 | with tf.name_scope('Attention_layer_1'): 786 | att_outputs, alphas = din_fcn_attention(self.item_eb, rnn_outputs, ATTENTION_SIZE, self.mask, 787 | softmax_stag=1, stag='1_1', mode='LIST', return_alphas=True) 788 | tf.summary.histogram('alpha_outputs', alphas) 789 | 790 | with tf.name_scope('rnn_2'): 791 | rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs, 792 | att_scores = tf.expand_dims(alphas, -1), 793 | sequence_length=self.seq_len_ph, dtype=tf.float32, 794 | scope="gru2") 795 | tf.summary.histogram('GRU2_Final_State', final_state2) 796 | 797 | inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, final_state2]+self.cross, 1) 798 | prop = self.build_fcn_net(inp, use_dice=True) 799 | self.build_loss(prop) 800 | -------------------------------------------------------------------------------- /script/model_avazu.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import tensorflow as tf 3 | from utils import * 4 | from tensorflow.python.ops.rnn_cell import GRUCell 5 | import mimn as mimn 6 | import rum as rum 7 | from rnn import dynamic_rnn 8 | # import mann_simple_cell as mann_cell 9 | import random 10 | 11 | ### Exp config ### 12 | 13 | feature_num = [ 14 | 264,7,7,4842,7912,26,9136,580,36, 15 | 7338655,8303,5,4,2885,8,9,474,4,69,172,62 16 | ] 17 | # id starts with 1 18 | id_offset = [0] + [sum(feature_num[:i]) for i in range(1, len(feature_num))] 19 | 20 | emb_as_weight = True #False #True 21 | use_new_seq_emb = True #False # True 22 | #edge_type = "item" 23 | edge_type = "3-9" 24 | use_cartes = ["item-his_item"] 25 | use_cartes = ["cate-his_cate"] 26 | use_cartes = [ 27 | "3-9", "3-10", "4-9", "4-10", "6-9", "6-10", "7-9", "7-10", 28 | "16-9", "16-10", "19-9", "19-10", "13-16-19", "13-16-19-9", "13-16-19-10", 29 | "16-3", "16-6", "19-3", "19-6", "13-16-19-3", "13-16-19-6" 30 | ] 31 | use_cartes = [] 32 | 33 | WEIGHT_EMB_NUM = 1 34 | orders = 5 35 | CALC_MODE = "poly_x_x4" 36 | weight_emb_w, weight_emb_b = [], [] 37 | alpha = 1 38 | if CALC_MODE in ["seq_sum", "seq", "emb"]: 39 | weight_emb_w = [[4, 3], [3,4]] 40 | #weight_emb_w = [[16, 3], [3,4]] 41 | #weight_emb_w = [[16, 3], [3,4], [4,5],[5,5]] 42 | weight_emb_b = [3, 0] 43 | #weight_emb_b = [3, 4, 5, 0] 44 | WEIGHT_EMB_DIM = sum([w[0]*w[1] for w in weight_emb_w]) + sum(weight_emb_b) 45 | elif CALC_MODE.startswith("poly"): 46 | WEIGHT_EMB_DIM = 16 47 | if "vec" in CALC_MODE: 48 | WEIGHT_EMB_DIM = int(WEIGHT_EMB_DIM ** 0.5) 49 | elif "wx_ind" in CALC_MODE: 50 | WEIGHT_EMB_DIM *= 2 51 | elif "x_ind" in CALC_MODE: 52 | WEIGHT_EMB_DIM *= orders 53 | elif "x4" in CALC_MODE: 54 | alpha = 4 55 | WEIGHT_EMB_DIM *= alpha**2 56 | 57 | keep_fake_carte_seq = False # True 58 | carte_with_gru = True #False 59 | 60 | carte_num_dict = { 61 | "3-6": 8315+1, 62 | "6-9": 1849306+1, 63 | "4-7": 4547+1, 64 | "3-9": 2102068+1, 65 | "3-10": 161045+1, 66 | "4-9": 2073680+1, 67 | "4-10": 146645+1, 68 | "6-9": 1851115+1, 69 | "6-10": 93771+1, 70 | "7-9": 1765776+1, 71 | "7-10": 23738+1, 72 | "16-9": 2135855+1, 73 | "16-10": 128321+1, 74 | "19-9": 1637771+1, 75 | "19-10": 57099+1, 76 | "13-16-19": 16905+1, 77 | "13-16-19-9": 2579867+1, 78 | "13-16-19-10": 447410+1, 79 | "16-3": 33287+1, 80 | "16-6": 25011+1, 81 | "19-3": 24748+1, 82 | "19-6": 22125+1, 83 | "13-16-19-3": 142791+1, 84 | "13-16-19-6": 86211+1, 85 | } 86 | if use_cartes: 87 | n_cid = sum([carte_num_dict[c] for c in use_cartes]) - (len(use_cartes) - 1) 88 | #n_cid = 59201 #6689210 #8586832 #6689210 #6630010 89 | 90 | def eb_as_weight(ad, his_items, dim, mode="seq"): 91 | ad = tf.reshape(ad, [-1, WEIGHT_EMB_DIM]) 92 | weight, bias = [], [] 93 | idx = 0 94 | for w, b in zip(weight_emb_w, weight_emb_b): 95 | weight.append(tf.reshape(ad[:, idx:idx+w[0]*w[1]], [-1, w[0], w[1]])) 96 | idx += w[0] * w[1] 97 | if b == 0: 98 | bias.append(None) 99 | else: 100 | bias.append(tf.reshape(ad[:, idx:idx+b], [-1, 1, b])) 101 | idx += b 102 | 103 | if mode == "seq_sum": 104 | his_items_sum = tf.reduce_sum(his_items, 1) 105 | his_items_sum = tf.reshape(his_items_sum, [-1, 1, dim]) 106 | out_seq = tf.nn.selu(tf.matmul(his_items_sum, w_1) + b) 107 | out_seq = tf.matmul(out_seq, w_2) 108 | out = tf.reduce_sum(out_seq, 1) 109 | elif mode == "seq": 110 | his_items_ = tf.unstack(his_items, axis=1) 111 | out_seq = [] 112 | for item in his_items_: 113 | item = tf.reshape(item, [-1, 1, dim]) 114 | #out.append(tf.nn.selu(tf.matmul(item, w) + b)) 115 | h = item 116 | for w, b in zip(weight, bias): 117 | h = tf.matmul(h, w) 118 | if b is not None: 119 | h = tf.nn.selu(h + b) 120 | out_seq.append(h) 121 | #h = tf.nn.selu(tf.matmul(item, w_1) + b) 122 | #out_seq.append(tf.matmul(h, w_2)) 123 | out_seq = tf.concat(out_seq, 1) 124 | out = tf.reduce_sum(out_seq, 1) 125 | elif mode == "emb": 126 | inp = his_items 127 | h = tf.reshape(inp, [-1, 1, dim]) 128 | for w, b in zip(weight, bias): 129 | h = tf.matmul(h, w) 130 | if b is not None: 131 | h = tf.nn.selu(h + b) 132 | out = h 133 | out = tf.reduce_sum(out, 1) 134 | elif mode == "poly": 135 | h = tf.reshape(his_items, [-1, 1, dim]) 136 | w = tf.reshape(ad, [-1, dim, dim]) 137 | ww = [w**(i+1) for i in range(orders)] 138 | for i in range(orders): 139 | h = tf.matmul(h, ww[i]) 140 | #if i < 2: 141 | h = tf.nn.tanh(h) 142 | out = h 143 | out = tf.reduce_sum(out, 1) 144 | elif mode == "poly_w": 145 | h = tf.reshape(his_items, [-1, 1, dim]) 146 | w = tf.reshape(ad, [-1, dim, dim]) 147 | ww = [w**(i+1) for i in range(orders)] 148 | out = [] 149 | for i in range(orders): 150 | out.append(tf.nn.tanh(tf.matmul(h, ww[i]))) 151 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 152 | elif mode == "poly_x": 153 | h = tf.reshape(his_items, [-1, 1, dim]) 154 | w = tf.reshape(ad, [-1, dim, dim]) 155 | hh = [h**(i+1) for i in range(orders)] 156 | out = [] 157 | for i in range(orders): 158 | #out.append(tf.nn.tanh(tf.matmul(hh[i], w))) 159 | out.append(tf.matmul(hh[i], w)) 160 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 161 | elif mode == "poly_x_x4": 162 | h = tf.reshape(his_items, [-1, 1, dim * alpha]) 163 | w = tf.reshape(ad, [-1, dim*alpha, dim*alpha]) 164 | hh = [h**(i+1) for i in range(orders)] 165 | out = [] 166 | for i in range(orders): 167 | out.append(tf.nn.tanh(tf.matmul(hh[i], w))) 168 | #out.append(tf.matmul(hh[i], w)) 169 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 170 | elif mode == "poly_x_ind": 171 | h = tf.reshape(his_items, [-1, 1, dim]) 172 | ww = tf.split(ad, num_or_size_splits=orders, axis=1) 173 | ww = [tf.reshape(w, [-1, dim, dim]) for w in ww] 174 | hh = [h**(i+1) for i in range(orders)] 175 | out = [] 176 | for i in range(orders): 177 | out.append(tf.nn.tanh(tf.matmul(hh[i], ww[i]))) 178 | #out.append(tf.matmul(hh[i], ww[i])) 179 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 180 | elif mode == "poly_wx": 181 | h = tf.reshape(his_items, [-1, 1, dim]) 182 | w = tf.reshape(ad, [-1, dim, dim]) 183 | ww = [w**(i+1) for i in range(orders)] 184 | hh = [h**(i+1) for i in range(orders)] 185 | out = [] 186 | for i in range(orders): 187 | out.append(tf.nn.tanh(tf.matmul(hh[i], w))) 188 | out.append(tf.nn.tanh(tf.matmul(h, ww[i]))) 189 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 190 | elif mode == "poly_wx_ind": 191 | h = tf.reshape(his_items, [-1, 1, dim]) 192 | ww = tf.split(ad, num_or_size_splits=2, axis=1) 193 | ww = [tf.reshape(w, [-1, dim, dim]) for w in ww] 194 | ww1 = [ww[1]**(i+1) for i in range(orders)] 195 | hh = [h**(i+1) for i in range(orders)] 196 | out = [] 197 | for i in range(orders): 198 | out.append(tf.nn.tanh(tf.matmul(hh[i], ww[0]))) 199 | out.append(tf.nn.tanh(tf.matmul(h, ww1[i]))) 200 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 201 | elif mode == "poly_x_vec": 202 | h = tf.reshape(his_items, [-1, 1, dim]) 203 | w = tf.reshape(ad, [-1, 1, dim]) 204 | hh = [h**(i+1) for i in range(orders)] 205 | out = [] 206 | for i in range(orders): 207 | out.append(tf.nn.tanh(hh[i] * w)) 208 | #out.append(hh[i] * w) 209 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 210 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 211 | elif mode == "poly_pure": 212 | h = tf.reshape(his_items, [-1, 1, dim]) 213 | w = tf.reshape(ad, [-1, dim, dim]) 214 | ww = [w**(i+1) for i in range(orders)] 215 | hh = [h**(i+1) for i in range(orders)] 216 | out = [] 217 | for i in range(orders): 218 | for j in range(orders): 219 | out.append(tf.nn.tanh(tf.matmul(hh[i], ww[j]))) 220 | out = tf.reduce_sum(tf.concat(out, axis=1), 1) 221 | 222 | #out = tf.nn.selu(out) 223 | if keep_fake_carte_seq and mode=="seq": 224 | return out, out_seq 225 | return out, None 226 | 227 | def FM(feas): 228 | feas = tf.stack(feas, aixs=1) 229 | square_of_sum = tf.reduce_sum(feas, axis=1) ** 2 230 | sum_of_square = tf.reduce_sum(feas ** 2, axis=1) 231 | return 0.5 * (square_of_sum - sum_of_square) 232 | 233 | class Model(object): 234 | def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN, use_negsample=False, Flag="DNN"): 235 | self.model_flag = Flag 236 | self.reg = False 237 | self.use_negsample= use_negsample 238 | with tf.name_scope('Inputs'): 239 | self.user_batch_ph = tf.placeholder(tf.int32, [None, None], name='user_batch_ph') 240 | self.ad_batch_ph = tf.placeholder(tf.int32, [None, None], name='ad_batch_ph') 241 | self.scene_batch_ph = tf.placeholder(tf.int32, [None, None], name='scene_batch_ph') 242 | self.time_batch_ph = tf.placeholder(tf.int32, [None, ], name='time_batch_ph') 243 | self.clk_seq_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='clk_seq_batch_ph') 244 | self.carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='carte_batch_ph') 245 | #self.noclk_seq_batch_ph = tf.placeholder(tf.int32, [None, None], name='noclk_seq_batch_ph') 246 | ''' 247 | self.item_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='item_carte_batch_ph') 248 | self.cate_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_carte_batch_ph') 249 | self.item_cate_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='item_cate_carte_batch_ph') 250 | self.cate_item_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_item_carte_batch_ph') 251 | ''' 252 | self.clk_mask = tf.placeholder(tf.float32, [None, None], name='clk_mask_batch_ph') 253 | self.target_ph = tf.placeholder(tf.float32, [None, 2], name='target_ph') 254 | self.lr = tf.placeholder(tf.float64, []) 255 | 256 | # Embedding layer 257 | with tf.name_scope('Embedding_layer'): 258 | 259 | ad_ph = tf.split(self.ad_batch_ph, num_or_size_splits=10, axis=1) 260 | scene_ph = tf.split(self.scene_batch_ph, num_or_size_splits=6, axis=1) 261 | user_ph = tf.split(self.user_batch_ph, num_or_size_splits=4, axis=1) 262 | feature_ph = [self.time_batch_ph] + ad_ph[:2] + scene_ph + user_ph + ad_ph[2:] 263 | 264 | self.embedding_vars = [] 265 | features = [] 266 | for i, num in enumerate(feature_num): 267 | self.embedding_vars.append(tf.get_variable("embedding_var_fea{}".format(i), [num, EMBEDDING_DIM], trainable=True)) 268 | features.append(tf.nn.embedding_lookup(self.embedding_vars[i], feature_ph[i] - id_offset[i])) 269 | 270 | self.user_batch_embedded = tf.concat(features[9:13], axis=1) 271 | self.ad_batch_embedded = tf.concat(features[1:3]+features[13:], axis=1) 272 | self.scene_batch_embedded = tf.concat(features[3:9], axis=1) 273 | self.time_batch_embedded = features[0] 274 | self.clk_seq_batch_embedded = tf.nn.embedding_lookup(self.embedding_vars[0], self.clk_seq_batch_ph) 275 | 276 | if use_cartes: 277 | self.carte_embeddings_var = [] 278 | self.carte_batch_embedded = [] 279 | for i, c in enumerate(use_cartes): 280 | self.carte_embeddings_var.append(tf.get_variable("carte_embedding_var_{}".format(c), [carte_num_dict[c], EMBEDDING_DIM], trainable=True)) 281 | self.carte_batch_embedded.append(tf.nn.embedding_lookup(self.carte_embeddings_var[i], self.carte_batch_ph[:, i])) 282 | 283 | ### fake carte ### 284 | if emb_as_weight: 285 | ''' 286 | TODO: support multi-group cartesian feature, e.g., 13-16-19 287 | ''' 288 | idx_w, idx_x = map(int, edge_type.split('-')) 289 | 290 | self.weight_embeddings_var = tf.get_variable("weight_embedding_var", [feature_num[idx_w] + 1, WEIGHT_EMB_NUM * WEIGHT_EMB_DIM], trainable=True) 291 | self.weight_batch_embedded = tf.nn.embedding_lookup(self.weight_embeddings_var, feature_ph[idx_w]) 292 | if use_new_seq_emb: 293 | self.seq_embeddings_var = tf.get_variable("seq_embedding_var", [feature_num[idx_x], EMBEDDING_DIM * alpha], trainable=True) 294 | self.seq_his_batch_embedded = tf.nn.embedding_lookup(self.seq_embeddings_var, feature_ph[idx_x]) 295 | 296 | with tf.name_scope('init_operation'): 297 | for i, num in enumerate(feature_num): 298 | embedding_placeholder = tf.placeholder(tf.float32,[num, EMBEDDING_DIM], name="emb_ph_{}".format(i)) 299 | self.embedding_vars[i].assign(embedding_placeholder) 300 | 301 | if use_cartes: 302 | self.carte_embedding_placeholder = [] 303 | self.carte_embedding_init = [] 304 | for i, c in enumerate(use_cartes): 305 | self.carte_embedding_placeholder.append(tf.placeholder(tf.float32,[carte_num_dict[c], EMBEDDING_DIM], name="cid_emb_ph")) 306 | self.carte_embedding_init.append(self.carte_embeddings_var[i].assign(self.carte_embedding_placeholder[i])) 307 | 308 | if self.use_negsample: 309 | self.noclk_seq_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_seq_batch_ph') 310 | self.noclk_seq_batch_embedded = tf.nn.embedding_lookup(self.embeddings_var, self.noclk_seq_batch_ph) 311 | self.noclk_mask = tf.placeholder(tf.float32, [None, None], name='noclk_mask_batch_ph') 312 | #self.mid_neg_batch_ph = tf.placeholder(tf.int32, [None, None], name='neg_his_batch_ph') 313 | #self.cate_neg_batch_ph = tf.placeholder(tf.int32, [None, None], name='neg_cate_his_batch_ph') 314 | 315 | #self.neg_item_his_eb = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_neg_batch_ph) 316 | #self.neg_cate_his_eb = tf.nn.embedding_lookup(self.mid_embeddings_var, self.cate_neg_batch_ph) 317 | #self.neg_his_eb = tf.concat([self.neg_item_his_eb,self.neg_cate_his_eb], axis=2) * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)) 318 | self.noclk_seq_eb = tf.concat(tf.unstack(tf.reshape(self.noclk_seq_batch_embedded,(BATCH_SIZE, 10, SEQ_LEN, EMBEDDING_DIM)), axis=1), axis=-1) * tf.reshape(self.noclk_mask,(BATCH_SIZE, SEQ_LEN, 1)) 319 | 320 | self.user_eb = tf.reshape(self.user_batch_embedded, [-1, EMBEDDING_DIM * 4]) # [batch, 4, dim] -> [batch, 4*dim] 321 | self.ad_eb = tf.reshape(self.ad_batch_embedded, [-1, EMBEDDING_DIM * 10]) 322 | self.scene_eb = tf.reshape(self.scene_batch_embedded, [-1, EMBEDDING_DIM * 6]) 323 | self.time_eb = self.time_batch_embedded 324 | 325 | self.clk_seq_eb = tf.concat(tf.unstack(tf.reshape(self.clk_seq_batch_embedded,(BATCH_SIZE, 10, SEQ_LEN, EMBEDDING_DIM)), axis=1), axis=-1) * tf.reshape(self.clk_mask, (BATCH_SIZE, SEQ_LEN, 1)) 326 | self.clk_seq_eb_sum = tf.reduce_sum(self.clk_seq_eb, 1) 327 | 328 | 329 | self.carte_embs = [] 330 | if use_cartes: 331 | self.carte_embs += self.carte_batch_embedded 332 | 333 | if emb_as_weight: 334 | if use_new_seq_emb: 335 | seq_his_batch = self.seq_his_batch_embedded 336 | else: 337 | seq_his_batch = features[int(edge_type.split('-')[1])] 338 | tmp_sum, tmp_seq = [], [] 339 | if CALC_MODE.startswith("seq"): 340 | shape = (BATCH_SIZE, SEQ_LEN, EMBEDDING_DIM) 341 | else: 342 | shape = (BATCH_SIZE, EMBEDDING_DIM * alpha) 343 | for i in range(WEIGHT_EMB_NUM): 344 | fake_carte_sum, fake_carte_seq = eb_as_weight(self.weight_batch_embedded[:, i * WEIGHT_EMB_DIM: (i+1) * WEIGHT_EMB_DIM], tf.reshape(seq_his_batch, shape), EMBEDDING_DIM, mode=CALC_MODE) 345 | tmp_sum.append(fake_carte_sum) 346 | tmp_seq.append(fake_carte_seq) 347 | self.fake_carte_sum = tf.concat(tmp_sum, axis=1) 348 | if keep_fake_carte_seq: 349 | self.fake_carte_seq = tmp_seq 350 | 351 | 352 | def build_fcn_net(self, inp, use_dice = False): 353 | bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1') 354 | dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1') 355 | if use_dice: 356 | dnn1 = dice(dnn1, name='dice_1') 357 | else: 358 | dnn1 = prelu(dnn1, scope='prelu_1') 359 | 360 | dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2') 361 | if use_dice: 362 | dnn2 = dice(dnn2, name='dice_2') 363 | else: 364 | dnn2 = prelu(dnn2, scope='prelu_2') 365 | 366 | dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3') 367 | self.y_hat = tf.nn.softmax(dnn3) + 0.00000001 368 | 369 | with tf.name_scope('Metrics'): 370 | # Cross-entropy loss and optimizer initialization 371 | ctr_loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph) 372 | self.loss = ctr_loss 373 | if self.use_negsample: 374 | self.loss += self.aux_loss 375 | if self.reg: 376 | self.loss += self.reg_loss 377 | 378 | tf.summary.scalar('loss', self.loss) 379 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) 380 | # Accuracy metric 381 | self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32)) 382 | tf.summary.scalar('accuracy', self.accuracy) 383 | 384 | self.merged = tf.summary.merge_all() 385 | 386 | def auxiliary_loss(self, h_states, click_seq, noclick_seq, clk_mask=None, noclk_mask = None, stag = None): 387 | #mask = tf.cast(mask, tf.float32) 388 | if noclk_mask is None: 389 | noclk_mask = clk_mask 390 | click_input_ = tf.concat([h_states, click_seq], -1) 391 | noclick_input_ = tf.concat([h_states, noclick_seq], -1) 392 | click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0] 393 | noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0] 394 | 395 | click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * clk_mask 396 | noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * noclk_mask 397 | 398 | loss_ = tf.reduce_mean(click_loss_ + noclick_loss_) 399 | return loss_ 400 | 401 | def auxiliary_net(self, in_, stag='auxiliary_net'): 402 | bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE) 403 | dnn1 = tf.layers.dense(bn1, 100, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE) 404 | dnn1 = tf.nn.sigmoid(dnn1) 405 | dnn2 = tf.layers.dense(dnn1, 50, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE) 406 | dnn2 = tf.nn.sigmoid(dnn2) 407 | dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE) 408 | y_hat = tf.nn.softmax(dnn3) + 0.000001 409 | return y_hat 410 | 411 | def init_uid_weight(self, sess, uid_weight): 412 | sess.run(self.uid_embedding_init,feed_dict={self.uid_embedding_placeholder: uid_weight}) 413 | 414 | def init_mid_weight(self, sess, mid_weight): 415 | sess.run([self.mid_embedding_init],feed_dict={self.mid_embedding_placeholder: mid_weight}) 416 | 417 | def save_mid_embedding_weight(self, sess): 418 | embedding = sess.run(self.mid_embeddings_var) 419 | return embedding 420 | 421 | def save_uid_embedding_weight(self, sess): 422 | embedding = sess.run(self.uid_bp_memory) 423 | return embedding 424 | 425 | def train(self, sess, inps): 426 | input_dict = { 427 | self.user_batch_ph: inps[0], 428 | self.ad_batch_ph: inps[1], 429 | self.scene_batch_ph: inps[2], 430 | self.time_batch_ph: inps[3], 431 | self.clk_seq_batch_ph: inps[4], 432 | self.clk_mask: inps[6], 433 | self.target_ph: inps[-2], 434 | self.lr: inps[-1], 435 | } 436 | if use_cartes: 437 | input_dict[self.carte_batch_ph] = inps[-3] 438 | if "item-his_item" in use_cartes: 439 | input_dict[self.item_carte_batch_ph] = inps[10] 440 | if "cate-his_cate" in use_cartes: 441 | input_dict[self.cate_carte_batch_ph] = inps[11] 442 | if "item-his_cate" in use_cartes: 443 | input_dict[self.item_cate_carte_batch_ph] = inps[12] 444 | if "cate-his_item" in use_cartes: 445 | input_dict[self.cate_item_carte_batch_ph] = inps[13] 446 | 447 | if self.use_negsample: 448 | input_dict[self.noclk_seq_batch_ph] = inps[5] 449 | input_dict[self.noclk_mask] = inps[7] 450 | loss, aux_loss, accuracy, _ = sess.run([self.loss, self.aux_loss, self.accuracy, self.optimizer], feed_dict=input_dict) 451 | else: 452 | loss, accuracy, _ = sess.run([self.loss, self.accuracy, self.optimizer], feed_dict=input_dict) 453 | aux_loss = 0 454 | return loss, accuracy, aux_loss 455 | 456 | def calculate(self, sess, inps): 457 | input_dict = { 458 | self.user_batch_ph: inps[0], 459 | self.ad_batch_ph: inps[1], 460 | self.scene_batch_ph: inps[2], 461 | self.time_batch_ph: inps[3], 462 | self.clk_seq_batch_ph: inps[4], 463 | self.clk_mask: inps[6], 464 | self.target_ph: inps[-1], 465 | } 466 | if use_cartes: 467 | input_dict[self.carte_batch_ph] = inps[-2] 468 | 469 | if "item-his_item" in use_cartes: 470 | input_dict[self.item_carte_batch_ph] = inps[9] 471 | if "cate-his_cate" in use_cartes: 472 | input_dict[self.cate_carte_batch_ph] = inps[10] 473 | if "item-his_cate" in use_cartes: 474 | input_dict[self.item_cate_carte_batch_ph] = inps[11] 475 | if "cate-his_item" in use_cartes: 476 | input_dict[self.cate_item_carte_batch_ph] = inps[12] 477 | 478 | if self.use_negsample: 479 | input_dict[self.noclk_seq_batch_ph] = inps[5] 480 | input_dict[self.noclk_mask] = inps[7] 481 | probs, loss, accuracy, aux_loss = sess.run([self.y_hat, self.loss, self.accuracy, self.aux_loss], feed_dict=input_dict) 482 | else: 483 | probs, loss, accuracy = sess.run([self.y_hat, self.loss, self.accuracy], feed_dict=input_dict) 484 | aux_loss = 0 485 | return probs, loss, accuracy, aux_loss 486 | 487 | def save(self, sess, path): 488 | saver = tf.train.Saver() 489 | saver.save(sess, save_path=path) 490 | 491 | def restore(self, sess, path): 492 | saver = tf.train.Saver() 493 | saver.restore(sess, save_path=path) 494 | print('model restored from %s' % path) 495 | 496 | class Model_DNN(Model): 497 | def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 498 | super(Model_DNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 499 | BATCH_SIZE, SEQ_LEN, Flag="DNN") 500 | 501 | #inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1) 502 | if emb_as_weight: 503 | self.carte_embs.append(self.fake_carte_sum) 504 | inp = tf.concat([self.user_eb, self.ad_eb, self.scene_eb, self.time_eb] + self.carte_embs, 1) 505 | self.build_fcn_net(inp, use_dice=False) 506 | 507 | 508 | class Model_FFM(Model): 509 | def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 510 | super(Model_DNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 511 | BATCH_SIZE, SEQ_LEN, Flag="DNN") 512 | 513 | inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1) 514 | self.build_fcn_net(inp, use_dice=False) 515 | 516 | 517 | 518 | class Model_PNN(Model): 519 | def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 520 | super(Model_PNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 521 | BATCH_SIZE, SEQ_LEN, Flag="PNN") 522 | 523 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum], 1) 524 | self.build_fcn_net(inp, use_dice=False) 525 | 526 | 527 | class Model_GRU4REC(Model): 528 | def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 529 | super(Model_GRU4REC, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 530 | BATCH_SIZE, SEQ_LEN, Flag="GRU4REC") 531 | with tf.name_scope('rnn_1'): 532 | self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE) 533 | rnn_outputs, final_state1 = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb, 534 | sequence_length=self.sequence_length, dtype=tf.float32, 535 | scope="gru1") 536 | tf.summary.histogram('GRU_outputs', rnn_outputs) 537 | 538 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1], 1) 539 | self.build_fcn_net(inp, use_dice=False) 540 | 541 | 542 | class Model_DIN(Model): 543 | def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 544 | super(Model_DIN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 545 | BATCH_SIZE, SEQ_LEN, Flag="DIN") 546 | with tf.name_scope('Attention_layer'): 547 | attention_output = din_attention(self.item_eb, self.item_his_eb, HIDDEN_SIZE, self.mask) 548 | att_fea = tf.reduce_sum(attention_output, 1) 549 | tf.summary.histogram('att_fea', att_fea) 550 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, att_fea], -1) 551 | self.build_fcn_net(inp, use_dice=False) 552 | 553 | 554 | class Model_ARNN(Model): 555 | def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256): 556 | super(Model_ARNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 557 | BATCH_SIZE, SEQ_LEN, Flag="ARNN") 558 | with tf.name_scope('rnn_1'): 559 | self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE) 560 | rnn_outputs, final_state1 = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb, 561 | sequence_length=self.sequence_length, dtype=tf.float32, 562 | scope="gru1") 563 | tf.summary.histogram('GRU_outputs', rnn_outputs) 564 | # Attention layer 565 | with tf.name_scope('Attention_layer_1'): 566 | att_gru = din_attention(self.item_eb, rnn_outputs, HIDDEN_SIZE, self.mask) 567 | att_gru = tf.reduce_sum(att_gru, 1) 568 | 569 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1, att_gru], -1) 570 | self.build_fcn_net(inp, use_dice=False) 571 | 572 | class Model_RUM(Model): 573 | def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, MEMORY_SIZE, SEQ_LEN=400, mask_flag=True): 574 | super(Model_RUM, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 575 | BATCH_SIZE, SEQ_LEN, Flag="RUM") 576 | 577 | def clear_mask_state(state, begin_state, mask, t): 578 | state["controller_state"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1))) * begin_state["controller_state"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1)) * state["controller_state"] 579 | state["M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["M"] 580 | return state 581 | 582 | cell = rum.RUMCell(controller_units=HIDDEN_SIZE, memory_size=MEMORY_SIZE, memory_vector_dim=2*EMBEDDING_DIM,read_head_num=1, write_head_num=1, 583 | reuse=False, output_dim=HIDDEN_SIZE, clip_value=20, batch_size=BATCH_SIZE) 584 | 585 | state = cell.zero_state(BATCH_SIZE, tf.float32) 586 | begin_state = state 587 | for t in range(SEQ_LEN): 588 | output, state = cell(self.item_his_eb[:, t, :], state) 589 | if mask_flag: 590 | state = clear_mask_state(state, begin_state, self.mask, t) 591 | 592 | final_state = output 593 | before_memory = state['M'] 594 | rum_att_hist = din_attention(self.item_eb, before_memory, HIDDEN_SIZE, None) 595 | 596 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state, tf.squeeze(rum_att_hist)], 1) 597 | 598 | self.build_fcn_net(inp, use_dice=False) 599 | 600 | class Model_DIEN(Model): 601 | def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=400, use_negsample=False, use_mi_cons=False): 602 | super(Model_DIEN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 603 | BATCH_SIZE, SEQ_LEN, use_negsample, Flag="DIEN") 604 | 605 | with tf.name_scope('rnn_1'): 606 | self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE) 607 | rnn_outputs, _ = dynamic_rnn(GRUCell(10*EMBEDDING_DIM), inputs=self.clk_seq_eb, 608 | sequence_length=self.sequence_length, dtype=tf.float32, 609 | scope="gru1") 610 | tf.summary.histogram('GRU_outputs', rnn_outputs) 611 | 612 | if use_negsample: 613 | if use_mi_cons: 614 | #aux_loss_1 = self.info_NCE(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], self.mask[:, 1:]) 615 | #aux_loss_1 = self.info_NCE_aux(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], self.neg_his_eb[:, 1:, :], self.mask[:, 1:]) 616 | aux_loss_1 = self.mi_loss(rnn_outputs[:, :-1, :], self.clk_seq_eb[:, 1:, :], 617 | self.noclk_seq_eb[:, 1:, :], self.mask[:, 1:], stag = "mi_0") 618 | else: 619 | aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.clk_seq_eb[:, 1:, :], 620 | self.noclk_seq_eb[:, 1:, :], self.clk_mask[:, 1:], self.noclk_mask[:, 1:], stag = "bigru_0") 621 | self.aux_loss = aux_loss_1 622 | 623 | # Attention layer 624 | with tf.name_scope('Attention_layer_1'): 625 | att_outputs, alphas = din_attention(self.ad_eb, rnn_outputs, HIDDEN_SIZE, mask=self.clk_mask, mode="LIST", return_alphas=True) 626 | tf.summary.histogram('alpha_outputs', alphas) 627 | 628 | with tf.name_scope('rnn_2'): 629 | rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs, 630 | att_scores = tf.expand_dims(alphas, -1), 631 | sequence_length=self.sequence_length, dtype=tf.float32, 632 | scope="gru2") 633 | tf.summary.histogram('GRU2_Final_State', final_state2) 634 | 635 | #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum], 1) 636 | #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_carte_eb_sum], 1) 637 | #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.cate_carte_eb_sum], 1) 638 | #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_cate_carte_eb_sum], 1) 639 | #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.cate_carte_eb_sum], 1) 640 | #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_carte_eb_sum, self.cate_carte_eb_sum], 1) 641 | 642 | 643 | #if attention 644 | 645 | if emb_as_weight: 646 | if keep_fake_carte_seq: 647 | if carte_with_gru: 648 | with tf.name_scope('rnn_3'): 649 | self.fake_carte_seq, _ = dynamic_rnn(GRUCell(EMBEDDING_DIM), inputs=self.fake_carte_seq, 650 | sequence_length=self.sequence_length, dtype=tf.float32, 651 | scope="gru3") 652 | 653 | with tf.name_scope('Attention_layer_2'): 654 | carte_att_outputs, _ = din_attention(self.mid_batch_embedded, self.fake_carte_seq, HIDDEN_SIZE, mask=self.mask, stag="carte", mode="SUM", return_alphas=True) 655 | self.carte_embs.append(tf.reduce_sum(carte_att_outputs, 1)) 656 | #self.carte_embs.append(self.fake_carte_sum) 657 | else: 658 | self.carte_embs.append(self.fake_carte_sum) 659 | inp = tf.concat([self.user_eb, self.ad_eb, self.scene_eb, self.time_eb, final_state2, self.clk_seq_eb_sum, self.ad_eb*self.clk_seq_eb_sum] + self.carte_embs, 1) 660 | self.build_fcn_net(inp, use_dice=False) 661 | 662 | def neg_sample(self, neg_his_emb, K=10, mode="random"): 663 | shape = tf.shape(neg_his_emb) 664 | batch, seq, dim = shape[0], shape[1], shape[2] 665 | 666 | if mode == "random": 667 | neg = tf.expand_dims(neg_his_emb, 1) #[batch, 1, seq, dim] 668 | neg = tf.tile(neg, [1,seq, 1,1]) #[batch, seq, seq, dim] 669 | # index = tf.random_uniform((batch, seq, K), minval=0, maxval=seq, dtype=tf.int32) 670 | # neg = tf.batch_gather(neg, index) #[batch, seq, K, dim] 671 | neg = neg[:, :, :K, :] 672 | return neg 673 | elif mode == "aux": 674 | neg = tf.expand_dims(neg_his_emb, 1) 675 | return neg 676 | 677 | def mi_loss_(self, h_states, click_seq, noclick_seq, mask = None, stag = None): 678 | #mask = tf.cast(mask, tf.float32) 679 | ''' 680 | h = self.mlp(h_states, stag = stag) 681 | pos = self.mlp(click_seq, stag = stag) 682 | neg = self.mlp(noclick_seq, stag = stag) 683 | 684 | scores_pos = tf.matmul(h, pos) 685 | scores_neg = tf.matmul(h, neg) 686 | joint = tf.linalg.diag_part(score_pos) 687 | ''' 688 | pos = tf.concat([h_states, click_seq], axis=2) 689 | f_pos = self.mlp(pos) # [batch, seq, 1] 690 | 691 | K = 99 692 | neg = self.neg_sample(noclick_seq, K) 693 | h_states_tiled = tf.tile(tf.expand_dims(h_states, 2), [1,1,K,1]) # [batch, seq, K, dim] 694 | total = tf.concat([h_states_tiled, neg], axis=3) 695 | f_neg = self.mlp(total) #[batch, seq, K, 1] 696 | f_neg = tf.reduce_sum(f_neg, axis=2) 697 | f_total = f_pos + f_neg 698 | 699 | loss_ = tf.reshape(tf.log(f_pos / f_total), [-1, tf.shape(click_seq)[1]]) * mask 700 | loss_ = - tf.reduce_mean(loss_) 701 | 702 | return loss_ 703 | 704 | def mi_loss(self, h_states, click_seq, noclick_seq, mask, stag='NCE'): 705 | exp = 'random_1' 706 | if exp == 'random_1': 707 | shape = tf.shape(h_states) 708 | batch, len_seq, dim = shape[0], shape[1], shape[2] 709 | Wk_ct = [] 710 | x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') 711 | x = tf.unstack(x, axis=1) 712 | neg = tf.layers.dense(noclick_seq, 256, activation=None, name='neg_enc') 713 | neg = tf.unstack(neg, axis=1) 714 | c_t = tf.unstack(h_states, axis=1) 715 | with tf.name_scope(stag): 716 | for i in range(len(c_t)): 717 | Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) 718 | #nce = 0 719 | nce = [] 720 | for i in range(len(c_t)): 721 | s_p = tf.reduce_sum(x[i] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] 722 | s_n = tf.reduce_sum(neg[i] * Wk_ct[i], axis=1, keep_dims=True) 723 | score = tf.concat([s_p, s_n], axis=1) 724 | score = tf.nn.log_softmax(tf.exp(score), dim=1) 725 | score = tf.reshape(score[:, 0], [-1]) 726 | nce.append(score) 727 | nce = tf.stack(nce, axis=1) * mask 728 | nce = tf.reduce_sum(nce) 729 | nce /= -1.0 * tf.cast(batch*len_seq, tf.float32) 730 | return nce 731 | elif exp == 'random_all': 732 | shape = tf.shape(h_states) 733 | batch, len_seq, dim = shape[0], shape[1], shape[2] 734 | Wk_ct = [] 735 | x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') 736 | x = tf.unstack(x, axis=1) 737 | neg = tf.layers.dense(noclick_seq, 256, activation=None, name='neg_enc') 738 | neg = tf.unstack(neg, axis=1) 739 | c_t = tf.unstack(h_states, axis=1) 740 | with tf.name_scope(stag): 741 | for i in range(len(c_t)): 742 | Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) 743 | nce = [] 744 | for i in range(len(c_t)): 745 | s_p = tf.reduce_sum(x[i] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] 746 | s_n = [] 747 | for j in range(len(neg)): 748 | s_n.append(tf.reduce_sum(neg[j] * Wk_ct[i], axis=1, keep_dims=True)) 749 | score = tf.concat([s_p] + s_n, axis=1) 750 | score = tf.nn.log_softmax(tf.exp(score), dim=1) 751 | score = tf.reshape(score[:, 0], [-1]) 752 | nce.append(score) 753 | nce = tf.stack(nce, axis=1) * mask 754 | nce = tf.reduce_sum(nce) 755 | nce /= -1.0 * tf.cast(batch*len_seq, tf.float32) 756 | return nce 757 | 758 | elif exp == 'batch_1': 759 | shape = tf.shape(click_seq) 760 | batch, len_seq, dim = shape[0], shape[1], shape[2] 761 | x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') 762 | x = tf.unstack(x, axis=1) 763 | c_t = tf.unstack(h_states, axis=1) 764 | # different W for every step 765 | rand_idx = 12 766 | Wk_ct = [] 767 | with tf.name_scope(stag): 768 | for i in range(len(c_t)): 769 | Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) 770 | nce = [] 771 | for i in range(len(c_t)): 772 | x_i = tf.tile(x[i], [2,1]) 773 | s_p = tf.reduce_sum(x_i[0:128, :] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] 774 | s_n = tf.reduce_sum(x_i[rand_idx:rand_idx+128] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1] 775 | score = tf.concat([s_p, s_n], axis=1) 776 | score = tf.nn.log_softmax(tf.exp(score), dim=1) # softmax over batch 777 | score = tf.reshape(score[:, 0], [-1]) 778 | nce.append(score) 779 | nce =tf.stack(nce, axis=1) * mask 780 | nce = tf.reduce_sum(nce) 781 | nce /= -1.0*tf.cast(batch*len_seq, tf.float32) 782 | return nce 783 | 784 | elif exp == 'batch_all': 785 | shape = tf.shape(click_seq) 786 | batch, len_seq, dim = shape[0], shape[1], shape[2] 787 | x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc') 788 | x = tf.unstack(x, axis=1) 789 | c_t = tf.unstack(h_states, axis=1) 790 | # different W for every step 791 | Wk_ct = [] 792 | with tf.name_scope(stag): 793 | for i in range(len(c_t)): 794 | Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i))) 795 | nce = [] 796 | for i in range(len(c_t)): 797 | score = tf.exp(tf.matmul(x[i], tf.transpose(Wk_ct[i]))) 798 | score = tf.nn.log_softmax(score, dim=0) # softmax over batch 799 | nce.append(tf.linalg.diag_part(score)) 800 | #nce += tf.reduce_sum(tf.linalg.diag_part(score)) 801 | nce = tf.stack(nce, axis=1) * mask 802 | nce = tf.reduce_sum(nce) 803 | nce /= -1.0*tf.cast(batch*len_seq, tf.float32) 804 | return nce 805 | 806 | 807 | def mlp(self, in_, stag='mlp'): 808 | bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE) 809 | dnn1 = tf.layers.dense(bn1, 1024, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE) 810 | dnn2 = tf.layers.dense(dnn1, 512, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE) 811 | dnn3 = tf.layers.dense(dnn2, 256, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE) 812 | return dnn3 813 | ''' 814 | dnn4 = tf.layers.dense(dnn3, 1, activation=None, name='f4' + stag, reuse=tf.AUTO_REUSE) 815 | dnn4 = tf.nn.sigmoid(dnn4) 816 | return dnn4 817 | y_hat = tf.nn.softmax(dnn3) + 0.000001 818 | return y_hat 819 | ''' 820 | 821 | def auxiliary_loss(self, h_states, click_seq, noclick_seq, clk_mask=None, noclk_mask=None, stag=None): 822 | if noclk_mask is None: 823 | noclk_mask = clk_mask 824 | # postive 825 | click_input = tf.concat([h_states, click_seq], -1) 826 | click_prop = self.auxiliary_net(click_input, stag = stag)[:, :, 0] 827 | click_loss = - tf.reshape(tf.log(click_prop), [-1, tf.shape(click_seq)[1]]) * clk_mask 828 | 829 | # negative 830 | exp = 'random_1' 831 | if exp =='random_1': 832 | return super(Model_DIEN, self).auxiliary_loss(h_states, click_seq, noclick_seq, clk_mask, noclk_mask, stag) 833 | elif exp == 'random_all': 834 | batch = 99 835 | noclick_seq_ = tf.tile(noclick_seq, [1,2,1]) # shape = [batch, 2 * seq, dim] for sliding window 836 | noclick_input = [] 837 | for i in range(99): 838 | noclick_input.append(tf.concat([h_states, noclick_seq_[:, i:i+batch, :]], axis=-1)) 839 | noclick_input = tf.concat(noclick_input, axis=0) 840 | mask = tf.tile(mask, [batch, 1]) 841 | elif exp == 'batch_1': 842 | batch = 128 843 | h_states = tf.unstack(h_states, axis=1) 844 | click_seq = tf.unstack(click_seq, axis=1) 845 | noclick_input = [] 846 | rand_idx = 12 847 | for i in range(len(click_seq)): 848 | h = h_states[i] # seq i of the batch, shape = [batch, dim] 849 | c = click_seq[i] 850 | c = tf.tile(c, [2, 1]) # sliding window 851 | noclick_input.append(tf.concat([h, c[rand_idx:rand_idx+batch,:]], axis=1)) 852 | noclick_input = tf.stack(noclick_input, axis=1) 853 | elif exp == 'batch_all': 854 | batch = 128 855 | h_states = tf.unstack(h_states, axis=1) 856 | click_seq = tf.unstack(click_seq, axis=1) 857 | noclick_input = [] 858 | for i in range(len(click_seq)): 859 | h = h_states[i] # seq i of the batch, shape = [batch, dim] 860 | c = click_seq[i] 861 | c = tf.tile(c, [2, 1]) # sliding window 862 | neg = [] 863 | for i in range(1, batch): 864 | neg.append(tf.concat([h, c[i:i+batch,:]], axis=1)) 865 | noclick_input.append(tf.concat(neg, axis=0)) 866 | noclick_input = tf.stack(noclick_input, axis=1) 867 | mask = tf.tile(mask, [batch-1, 1]) 868 | 869 | noclick_prop = self.auxiliary_net(noclick_input, stag = stag)[:, :, 0] 870 | noclick_loss = - tf.reshape(tf.log(1.0 - noclick_prop), [-1, tf.shape(noclick_seq)[1]]) * mask 871 | loss_ = tf.reduce_mean(click_loss) + tf.reduce_mean(noclick_loss) 872 | return loss_ 873 | 874 | def aux_batch(self, h_states, click_seq, noclick_seq, mask = None, stag = None): 875 | #mask = tf.cast(mask, tf.float32) 876 | # batch = tf.shape(h_states)[0] 877 | batch = 128 878 | click_input_ = tf.concat([h_states, click_seq], -1) 879 | h_states_ = tf.unstack(h_states, axis=1) 880 | click_seq_ = tf.unstack(click_seq, axis=1) 881 | neg_input_total = [] 882 | for i in range(len(click_seq_)): 883 | h = h_states_[i] # seq i of the batch [batch, dim] 884 | c = click_seq_[i] 885 | c = tf.tile(c, [2, 1]) # sliding window 886 | neg = [] 887 | for i in range(1, batch): 888 | neg.append(tf.concat([h, c[i:i+batch,:]], axis=1)) 889 | neg_input_total.append(tf.concat(neg, axis=0)) 890 | noclick_input_ = tf.stack(neg_input_total, axis=1) 891 | #noclick_input_ = tf.concat([h_states, noclick_seq], -1) 892 | click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0] 893 | noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0] 894 | 895 | click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask 896 | mask = tf.tile(mask, [batch-1, 1]) 897 | noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask 898 | 899 | #loss_ = tf.reduce_mean(click_loss_ + noclick_loss_) 900 | loss_ = tf.reduce_mean(click_loss_) + tf.reduce_mean(noclick_loss_) 901 | return loss_ 902 | 903 | 904 | 905 | 906 | class Model_MIMN(Model): 907 | def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, MEMORY_SIZE, SEQ_LEN=400, Mem_Induction=0, Util_Reg=0, use_negsample=False, mask_flag=False): 908 | super(Model_MIMN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 909 | BATCH_SIZE, SEQ_LEN, use_negsample, Flag="MIMN") 910 | self.reg = Util_Reg 911 | 912 | def clear_mask_state(state, begin_state, begin_channel_rnn_state, mask, cell, t): 913 | state["controller_state"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1))) * begin_state["controller_state"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1)) * state["controller_state"] 914 | state["M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["M"] 915 | state["key_M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["key_M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["key_M"] 916 | state["sum_aggre"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["sum_aggre"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["sum_aggre"] 917 | if Mem_Induction > 0: 918 | temp_channel_rnn_state = [] 919 | for i in range(MEMORY_SIZE): 920 | temp_channel_rnn_state.append(cell.channel_rnn_state[i] * tf.expand_dims(mask[:,t], axis=1) + begin_channel_rnn_state[i]*(1- tf.expand_dims(mask[:,t], axis=1))) 921 | cell.channel_rnn_state = temp_channel_rnn_state 922 | temp_channel_rnn_output = [] 923 | for i in range(MEMORY_SIZE): 924 | temp_output = cell.channel_rnn_output[i] * tf.expand_dims(mask[:,t], axis=1) + begin_channel_rnn_output[i]*(1- tf.expand_dims(self.mask[:,t], axis=1)) 925 | temp_channel_rnn_output.append(temp_output) 926 | cell.channel_rnn_output = temp_channel_rnn_output 927 | 928 | return state 929 | 930 | cell = mimn.MIMNCell(controller_units=HIDDEN_SIZE, memory_size=MEMORY_SIZE, memory_vector_dim=2*EMBEDDING_DIM,read_head_num=1, write_head_num=1, 931 | reuse=False, output_dim=HIDDEN_SIZE, clip_value=20, batch_size=BATCH_SIZE, mem_induction=Mem_Induction, util_reg=Util_Reg) 932 | 933 | state = cell.zero_state(BATCH_SIZE, tf.float32) 934 | if Mem_Induction > 0: 935 | begin_channel_rnn_output = cell.channel_rnn_output 936 | else: 937 | begin_channel_rnn_output = 0.0 938 | 939 | begin_state = state 940 | self.state_list = [state] 941 | self.mimn_o = [] 942 | for t in range(SEQ_LEN): 943 | output, state, temp_output_list = cell(self.item_his_eb[:, t, :], state) 944 | if mask_flag: 945 | state = clear_mask_state(state, begin_state, begin_channel_rnn_output, self.mask, cell, t) 946 | self.mimn_o.append(output) 947 | self.state_list.append(state) 948 | 949 | self.mimn_o = tf.stack(self.mimn_o, axis=1) 950 | self.state_list.append(state) 951 | mean_memory = tf.reduce_mean(state['sum_aggre'], axis=-2) 952 | 953 | before_aggre = state['w_aggre'] 954 | read_out, _, _ = cell(self.item_eb, state) 955 | 956 | if use_negsample: 957 | aux_loss_1 = self.auxiliary_loss(self.mimn_o[:, :-1, :], self.item_his_eb[:, 1:, :], 958 | self.neg_his_eb[:, 1:, :], self.mask[:, 1:], stag = "bigru_0") 959 | self.aux_loss = aux_loss_1 960 | 961 | if self.reg: 962 | self.reg_loss = cell.capacity_loss(before_aggre) 963 | else: 964 | self.reg_loss = tf.zeros(1) 965 | 966 | if Mem_Induction == 1: 967 | channel_memory_tensor = tf.concat(temp_output_list, 1) 968 | multi_channel_hist = din_attention(self.item_eb, channel_memory_tensor, HIDDEN_SIZE, None, stag='pal') 969 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, read_out, tf.squeeze(multi_channel_hist), mean_memory*self.item_eb], 1) 970 | else: 971 | inp = tf.concat([self.item_eb, self.item_his_eb_sum, read_out, mean_memory*self.item_eb], 1) 972 | 973 | self.build_fcn_net(inp, use_dice=False) 974 | -------------------------------------------------------------------------------- /script/process_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | import time 4 | 5 | def process_meta(file): 6 | fi = open(file, "r") 7 | fo = open("item-info", "w") 8 | for line in fi: 9 | obj = eval(line) 10 | cat = obj["categories"][0][-1] 11 | print>>fo, obj["asin"] + "\t" + cat 12 | 13 | def process_reviews(file): 14 | fi = open(file, "r") 15 | user_map = {} 16 | fo = open("reviews-info", "w") 17 | for line in fi: 18 | obj = eval(line) 19 | userID = obj["reviewerID"] 20 | itemID = obj["asin"] 21 | rating = obj["overall"] 22 | time = obj["unixReviewTime"] 23 | print>>fo, userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time) 24 | 25 | def manual_join(): 26 | f_rev = open("reviews-info", "r") 27 | user_map = {} 28 | item_list = [] 29 | for line in f_rev: 30 | line = line.strip() 31 | items = line.split("\t") 32 | #loctime = time.localtime(float(items[-1])) 33 | #items[-1] = time.strftime('%Y-%m-%d', loctime) 34 | if items[0] not in user_map: 35 | user_map[items[0]]= [] 36 | user_map[items[0]].append(("\t".join(items), float(items[-1]))) 37 | item_list.append(items[1]) 38 | f_meta = open("item-info", "r") 39 | meta_map = {} 40 | for line in f_meta: 41 | arr = line.strip().split("\t") 42 | if arr[0] not in meta_map: 43 | meta_map[arr[0]] = arr[1] 44 | arr = line.strip().split("\t") 45 | fo = open("jointed-new", "w") 46 | for key in user_map: 47 | sorted_user_bh = sorted(user_map[key], key=lambda x:x[1]) 48 | for line, t in sorted_user_bh: 49 | items = line.split("\t") 50 | asin = items[1] 51 | j = 0 52 | while True: 53 | asin_neg_index = random.randint(0, len(item_list) - 1) 54 | asin_neg = item_list[asin_neg_index] 55 | if asin_neg == asin: 56 | continue 57 | items[1] = asin_neg 58 | print>>fo, "0" + "\t" + "\t".join(items) + "\t" + meta_map[asin_neg] 59 | j += 1 60 | if j == 1: #negative sampling frequency 61 | break 62 | if asin in meta_map: 63 | print>>fo, "1" + "\t" + line + "\t" + meta_map[asin] 64 | else: 65 | print>>fo, "1" + "\t" + line + "\t" + "default_cat" 66 | 67 | 68 | def split_test(): 69 | fi = open("jointed-new", "r") 70 | fo = open("jointed-new-split-info", "w") 71 | user_count = {} 72 | for line in fi: 73 | line = line.strip() 74 | user = line.split("\t")[1] 75 | if user not in user_count: 76 | user_count[user] = 0 77 | user_count[user] += 1 78 | fi.seek(0) 79 | i = 0 80 | last_user = "A26ZDKC53OP6JD" 81 | for line in fi: 82 | line = line.strip() 83 | user = line.split("\t")[1] 84 | if user == last_user: 85 | if i < user_count[user] - 2: # 1 + negative samples 86 | print>> fo, "20180118" + "\t" + line 87 | else: 88 | print>>fo, "20190119" + "\t" + line 89 | else: 90 | last_user = user 91 | i = 0 92 | if i < user_count[user] - 2: 93 | print>> fo, "20180118" + "\t" + line 94 | else: 95 | print>>fo, "20190119" + "\t" + line 96 | i += 1 97 | 98 | process_meta(sys.argv[1]) 99 | process_reviews(sys.argv[2]) 100 | manual_join() 101 | split_test() 102 | -------------------------------------------------------------------------------- /script/rnn.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """RNN helpers for TensorFlow models. 17 | 18 | 19 | @@bidirectional_dynamic_rnn 20 | @@dynamic_rnn 21 | @@raw_rnn 22 | @@static_rnn 23 | @@static_state_saving_rnn 24 | @@static_bidirectional_rnn 25 | """ 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from tensorflow.python.framework import constant_op 31 | from tensorflow.python.framework import dtypes 32 | from tensorflow.python.framework import ops 33 | from tensorflow.python.framework import tensor_shape 34 | from tensorflow.python.ops import array_ops 35 | from tensorflow.python.ops import control_flow_ops 36 | from tensorflow.python.ops import math_ops 37 | from tensorflow.python.ops import rnn_cell_impl 38 | from tensorflow.python.ops import tensor_array_ops 39 | from tensorflow.python.ops import variable_scope as vs 40 | from tensorflow.python.util import nest 41 | 42 | 43 | # pylint: disable=protected-access 44 | _concat = rnn_cell_impl._concat 45 | _like_rnncell = rnn_cell_impl._like_rnncell 46 | # pylint: enable=protected-access 47 | 48 | 49 | def _transpose_batch_time(x): 50 | """Transpose the batch and time dimensions of a Tensor. 51 | 52 | Retains as much of the static shape information as possible. 53 | 54 | Args: 55 | x: A tensor of rank 2 or higher. 56 | 57 | Returns: 58 | x transposed along the first two dimensions. 59 | 60 | Raises: 61 | ValueError: if `x` is rank 1 or lower. 62 | """ 63 | x_static_shape = x.get_shape() 64 | if x_static_shape.ndims is not None and x_static_shape.ndims < 2: 65 | raise ValueError( 66 | "Expected input tensor %s to have rank at least 2, but saw shape: %s" % 67 | (x, x_static_shape)) 68 | x_rank = array_ops.rank(x) 69 | x_t = array_ops.transpose( 70 | x, array_ops.concat( 71 | ([1, 0], math_ops.range(2, x_rank)), axis=0)) 72 | x_t.set_shape( 73 | tensor_shape.TensorShape([ 74 | x_static_shape[1].value, x_static_shape[0].value 75 | ]).concatenate(x_static_shape[2:])) 76 | return x_t 77 | 78 | 79 | def _best_effort_input_batch_size(flat_input): 80 | """Get static input batch size if available, with fallback to the dynamic one. 81 | 82 | Args: 83 | flat_input: An iterable of time major input Tensors of shape [max_time, 84 | batch_size, ...]. All inputs should have compatible batch sizes. 85 | 86 | Returns: 87 | The batch size in Python integer if available, or a scalar Tensor otherwise. 88 | 89 | Raises: 90 | ValueError: if there is any input with an invalid shape. 91 | """ 92 | for input_ in flat_input: 93 | shape = input_.shape 94 | if shape.ndims is None: 95 | continue 96 | if shape.ndims < 2: 97 | raise ValueError( 98 | "Expected input tensor %s to have rank at least 2" % input_) 99 | batch_size = shape[1].value 100 | if batch_size is not None: 101 | return batch_size 102 | # Fallback to the dynamic batch size of the first input. 103 | return array_ops.shape(flat_input[0])[1] 104 | 105 | 106 | def _infer_state_dtype(explicit_dtype, state): 107 | """Infer the dtype of an RNN state. 108 | 109 | Args: 110 | explicit_dtype: explicitly declared dtype or None. 111 | state: RNN's hidden state. Must be a Tensor or a nested iterable containing 112 | Tensors. 113 | 114 | Returns: 115 | dtype: inferred dtype of hidden state. 116 | 117 | Raises: 118 | ValueError: if `state` has heterogeneous dtypes or is empty. 119 | """ 120 | if explicit_dtype is not None: 121 | return explicit_dtype 122 | elif nest.is_sequence(state): 123 | inferred_dtypes = [element.dtype for element in nest.flatten(state)] 124 | if not inferred_dtypes: 125 | raise ValueError("Unable to infer dtype from empty state.") 126 | all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes]) 127 | if not all_same: 128 | raise ValueError( 129 | "State has tensors of different inferred_dtypes. Unable to infer a " 130 | "single representative dtype.") 131 | return inferred_dtypes[0] 132 | else: 133 | return state.dtype 134 | 135 | 136 | # pylint: disable=unused-argument 137 | def _rnn_step( 138 | time, sequence_length, min_sequence_length, max_sequence_length, 139 | zero_output, state, call_cell, state_size, skip_conditionals=False): 140 | """Calculate one step of a dynamic RNN minibatch. 141 | 142 | Returns an (output, state) pair conditioned on the sequence_lengths. 143 | When skip_conditionals=False, the pseudocode is something like: 144 | 145 | if t >= max_sequence_length: 146 | return (zero_output, state) 147 | if t < min_sequence_length: 148 | return call_cell() 149 | 150 | # Selectively output zeros or output, old state or new state depending 151 | # on if we've finished calculating each row. 152 | new_output, new_state = call_cell() 153 | final_output = np.vstack([ 154 | zero_output if time >= sequence_lengths[r] else new_output_r 155 | for r, new_output_r in enumerate(new_output) 156 | ]) 157 | final_state = np.vstack([ 158 | state[r] if time >= sequence_lengths[r] else new_state_r 159 | for r, new_state_r in enumerate(new_state) 160 | ]) 161 | return (final_output, final_state) 162 | 163 | Args: 164 | time: Python int, the current time step 165 | sequence_length: int32 `Tensor` vector of size [batch_size] 166 | min_sequence_length: int32 `Tensor` scalar, min of sequence_length 167 | max_sequence_length: int32 `Tensor` scalar, max of sequence_length 168 | zero_output: `Tensor` vector of shape [output_size] 169 | state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`, 170 | or a list/tuple of such tensors. 171 | call_cell: lambda returning tuple of (new_output, new_state) where 172 | new_output is a `Tensor` matrix of shape `[batch_size, output_size]`. 173 | new_state is a `Tensor` matrix of shape `[batch_size, state_size]`. 174 | state_size: The `cell.state_size` associated with the state. 175 | skip_conditionals: Python bool, whether to skip using the conditional 176 | calculations. This is useful for `dynamic_rnn`, where the input tensor 177 | matches `max_sequence_length`, and using conditionals just slows 178 | everything down. 179 | 180 | Returns: 181 | A tuple of (`final_output`, `final_state`) as given by the pseudocode above: 182 | final_output is a `Tensor` matrix of shape [batch_size, output_size] 183 | final_state is either a single `Tensor` matrix, or a tuple of such 184 | matrices (matching length and shapes of input `state`). 185 | 186 | Raises: 187 | ValueError: If the cell returns a state tuple whose length does not match 188 | that returned by `state_size`. 189 | """ 190 | 191 | # Convert state to a list for ease of use 192 | flat_state = nest.flatten(state) 193 | flat_zero_output = nest.flatten(zero_output) 194 | 195 | def _copy_one_through(output, new_output): 196 | # If the state contains a scalar value we simply pass it through. 197 | if output.shape.ndims == 0: 198 | return new_output 199 | copy_cond = (time >= sequence_length) 200 | with ops.colocate_with(new_output): 201 | return array_ops.where(copy_cond, output, new_output) 202 | 203 | def _copy_some_through(flat_new_output, flat_new_state): 204 | # Use broadcasting select to determine which values should get 205 | # the previous state & zero output, and which values should get 206 | # a calculated state & output. 207 | flat_new_output = [ 208 | _copy_one_through(zero_output, new_output) 209 | for zero_output, new_output in zip(flat_zero_output, flat_new_output)] 210 | flat_new_state = [ 211 | _copy_one_through(state, new_state) 212 | for state, new_state in zip(flat_state, flat_new_state)] 213 | return flat_new_output + flat_new_state 214 | 215 | def _maybe_copy_some_through(): 216 | """Run RNN step. Pass through either no or some past state.""" 217 | new_output, new_state = call_cell() 218 | 219 | nest.assert_same_structure(state, new_state) 220 | 221 | flat_new_state = nest.flatten(new_state) 222 | flat_new_output = nest.flatten(new_output) 223 | return control_flow_ops.cond( 224 | # if t < min_seq_len: calculate and return everything 225 | time < min_sequence_length, lambda: flat_new_output + flat_new_state, 226 | # else copy some of it through 227 | lambda: _copy_some_through(flat_new_output, flat_new_state)) 228 | 229 | # TODO(ebrevdo): skipping these conditionals may cause a slowdown, 230 | # but benefits from removing cond() and its gradient. We should 231 | # profile with and without this switch here. 232 | if skip_conditionals: 233 | # Instead of using conditionals, perform the selective copy at all time 234 | # steps. This is faster when max_seq_len is equal to the number of unrolls 235 | # (which is typical for dynamic_rnn). 236 | new_output, new_state = call_cell() 237 | nest.assert_same_structure(state, new_state) 238 | new_state = nest.flatten(new_state) 239 | new_output = nest.flatten(new_output) 240 | final_output_and_state = _copy_some_through(new_output, new_state) 241 | else: 242 | empty_update = lambda: flat_zero_output + flat_state 243 | final_output_and_state = control_flow_ops.cond( 244 | # if t >= max_seq_len: copy all state through, output zeros 245 | time >= max_sequence_length, empty_update, 246 | # otherwise calculation is required: copy some or all of it through 247 | _maybe_copy_some_through) 248 | 249 | if len(final_output_and_state) != len(flat_zero_output) + len(flat_state): 250 | raise ValueError("Internal error: state and output were not concatenated " 251 | "correctly.") 252 | final_output = final_output_and_state[:len(flat_zero_output)] 253 | final_state = final_output_and_state[len(flat_zero_output):] 254 | 255 | for output, flat_output in zip(final_output, flat_zero_output): 256 | output.set_shape(flat_output.get_shape()) 257 | for substate, flat_substate in zip(final_state, flat_state): 258 | substate.set_shape(flat_substate.get_shape()) 259 | 260 | final_output = nest.pack_sequence_as( 261 | structure=zero_output, flat_sequence=final_output) 262 | final_state = nest.pack_sequence_as( 263 | structure=state, flat_sequence=final_state) 264 | 265 | return final_output, final_state 266 | 267 | 268 | def _reverse_seq(input_seq, lengths): 269 | """Reverse a list of Tensors up to specified lengths. 270 | 271 | Args: 272 | input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features) 273 | or nested tuples of tensors. 274 | lengths: A `Tensor` of dimension batch_size, containing lengths for each 275 | sequence in the batch. If "None" is specified, simply reverses 276 | the list. 277 | 278 | Returns: 279 | time-reversed sequence 280 | """ 281 | if lengths is None: 282 | return list(reversed(input_seq)) 283 | 284 | flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq) 285 | 286 | flat_results = [[] for _ in range(len(input_seq))] 287 | for sequence in zip(*flat_input_seq): 288 | input_shape = tensor_shape.unknown_shape( 289 | ndims=sequence[0].get_shape().ndims) 290 | for input_ in sequence: 291 | input_shape.merge_with(input_.get_shape()) 292 | input_.set_shape(input_shape) 293 | 294 | # Join into (time, batch_size, depth) 295 | s_joined = array_ops.stack(sequence) 296 | 297 | # Reverse along dimension 0 298 | s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1) 299 | # Split again into list 300 | result = array_ops.unstack(s_reversed) 301 | for r, flat_result in zip(result, flat_results): 302 | r.set_shape(input_shape) 303 | flat_result.append(r) 304 | 305 | results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result) 306 | for input_, flat_result in zip(input_seq, flat_results)] 307 | return results 308 | 309 | 310 | def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, 311 | initial_state_fw=None, initial_state_bw=None, 312 | dtype=None, parallel_iterations=None, 313 | swap_memory=False, time_major=False, scope=None): 314 | """Creates a dynamic version of bidirectional recurrent neural network. 315 | 316 | Takes input and builds independent forward and backward RNNs. The input_size 317 | of forward and backward cell must match. The initial state for both directions 318 | is zero by default (but can be set optionally) and no intermediate states are 319 | ever returned -- the network is fully unrolled for the given (passed in) 320 | length(s) of the sequence(s) or completely unrolled if length(s) is not 321 | given. 322 | 323 | Args: 324 | cell_fw: An instance of RNNCell, to be used for forward direction. 325 | cell_bw: An instance of RNNCell, to be used for backward direction. 326 | inputs: The RNN inputs. 327 | If time_major == False (default), this must be a tensor of shape: 328 | `[batch_size, max_time, ...]`, or a nested tuple of such elements. 329 | If time_major == True, this must be a tensor of shape: 330 | `[max_time, batch_size, ...]`, or a nested tuple of such elements. 331 | sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, 332 | containing the actual lengths for each of the sequences in the batch. 333 | If not provided, all batch entries are assumed to be full sequences; and 334 | time reversal is applied from time `0` to `max_time` for each sequence. 335 | initial_state_fw: (optional) An initial state for the forward RNN. 336 | This must be a tensor of appropriate type and shape 337 | `[batch_size, cell_fw.state_size]`. 338 | If `cell_fw.state_size` is a tuple, this should be a tuple of 339 | tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. 340 | initial_state_bw: (optional) Same as for `initial_state_fw`, but using 341 | the corresponding properties of `cell_bw`. 342 | dtype: (optional) The data type for the initial states and expected output. 343 | Required if initial_states are not provided or RNN states have a 344 | heterogeneous dtype. 345 | parallel_iterations: (Default: 32). The number of iterations to run in 346 | parallel. Those operations which do not have any temporal dependency 347 | and can be run in parallel, will be. This parameter trades off 348 | time for space. Values >> 1 use more memory but take less time, 349 | while smaller values use less memory but computations take longer. 350 | swap_memory: Transparently swap the tensors produced in forward inference 351 | but needed for back prop from GPU to CPU. This allows training RNNs 352 | which would typically not fit on a single GPU, with very minimal (or no) 353 | performance penalty. 354 | time_major: The shape format of the `inputs` and `outputs` Tensors. 355 | If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. 356 | If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. 357 | Using `time_major = True` is a bit more efficient because it avoids 358 | transposes at the beginning and end of the RNN calculation. However, 359 | most TensorFlow data is batch-major, so by default this function 360 | accepts input and emits output in batch-major form. 361 | scope: VariableScope for the created subgraph; defaults to 362 | "bidirectional_rnn" 363 | 364 | Returns: 365 | A tuple (outputs, output_states) where: 366 | outputs: A tuple (output_fw, output_bw) containing the forward and 367 | the backward rnn output `Tensor`. 368 | If time_major == False (default), 369 | output_fw will be a `Tensor` shaped: 370 | `[batch_size, max_time, cell_fw.output_size]` 371 | and output_bw will be a `Tensor` shaped: 372 | `[batch_size, max_time, cell_bw.output_size]`. 373 | If time_major == True, 374 | output_fw will be a `Tensor` shaped: 375 | `[max_time, batch_size, cell_fw.output_size]` 376 | and output_bw will be a `Tensor` shaped: 377 | `[max_time, batch_size, cell_bw.output_size]`. 378 | It returns a tuple instead of a single concatenated `Tensor`, unlike 379 | in the `bidirectional_rnn`. If the concatenated one is preferred, 380 | the forward and backward outputs can be concatenated as 381 | `tf.concat(outputs, 2)`. 382 | output_states: A tuple (output_state_fw, output_state_bw) containing 383 | the forward and the backward final states of bidirectional rnn. 384 | 385 | Raises: 386 | TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. 387 | """ 388 | 389 | if not _like_rnncell(cell_fw): 390 | raise TypeError("cell_fw must be an instance of RNNCell") 391 | if not _like_rnncell(cell_bw): 392 | raise TypeError("cell_bw must be an instance of RNNCell") 393 | 394 | with vs.variable_scope(scope or "bidirectional_rnn"): 395 | # Forward direction 396 | with vs.variable_scope("fw") as fw_scope: 397 | output_fw, output_state_fw = dynamic_rnn( 398 | cell=cell_fw, inputs=inputs, sequence_length=sequence_length, 399 | initial_state=initial_state_fw, dtype=dtype, 400 | parallel_iterations=parallel_iterations, swap_memory=swap_memory, 401 | time_major=time_major, scope=fw_scope) 402 | 403 | # Backward direction 404 | if not time_major: 405 | time_dim = 1 406 | batch_dim = 0 407 | else: 408 | time_dim = 0 409 | batch_dim = 1 410 | 411 | def _reverse(input_, seq_lengths, seq_dim, batch_dim): 412 | if seq_lengths is not None: 413 | return array_ops.reverse_sequence( 414 | input=input_, seq_lengths=seq_lengths, 415 | seq_dim=seq_dim, batch_dim=batch_dim) 416 | else: 417 | return array_ops.reverse(input_, axis=[seq_dim]) 418 | 419 | with vs.variable_scope("bw") as bw_scope: 420 | inputs_reverse = _reverse( 421 | inputs, seq_lengths=sequence_length, 422 | seq_dim=time_dim, batch_dim=batch_dim) 423 | tmp, output_state_bw = dynamic_rnn( 424 | cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length, 425 | initial_state=initial_state_bw, dtype=dtype, 426 | parallel_iterations=parallel_iterations, swap_memory=swap_memory, 427 | time_major=time_major, scope=bw_scope) 428 | 429 | output_bw = _reverse( 430 | tmp, seq_lengths=sequence_length, 431 | seq_dim=time_dim, batch_dim=batch_dim) 432 | 433 | outputs = (output_fw, output_bw) 434 | output_states = (output_state_fw, output_state_bw) 435 | 436 | return (outputs, output_states) 437 | 438 | 439 | def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None, 440 | dtype=None, parallel_iterations=None, swap_memory=False, 441 | time_major=False, scope=None): 442 | """Creates a recurrent neural network specified by RNNCell `cell`. 443 | 444 | Performs fully dynamic unrolling of `inputs`. 445 | 446 | Example: 447 | 448 | ```python 449 | # create a BasicRNNCell 450 | rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) 451 | 452 | # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] 453 | 454 | # defining initial state 455 | initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) 456 | 457 | # 'state' is a tensor of shape [batch_size, cell_state_size] 458 | outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data, 459 | initial_state=initial_state, 460 | dtype=tf.float32) 461 | ``` 462 | 463 | ```python 464 | # create 2 LSTMCells 465 | rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]] 466 | 467 | # create a RNN cell composed sequentially of a number of RNNCells 468 | multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) 469 | 470 | # 'outputs' is a tensor of shape [batch_size, max_time, 256] 471 | # 'state' is a N-tuple where N is the number of LSTMCells containing a 472 | # tf.contrib.rnn.LSTMStateTuple for each cell 473 | outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell, 474 | inputs=data, 475 | dtype=tf.float32) 476 | ``` 477 | 478 | 479 | Args: 480 | cell: An instance of RNNCell. 481 | inputs: The RNN inputs. 482 | If `time_major == False` (default), this must be a `Tensor` of shape: 483 | `[batch_size, max_time, ...]`, or a nested tuple of such 484 | elements. 485 | If `time_major == True`, this must be a `Tensor` of shape: 486 | `[max_time, batch_size, ...]`, or a nested tuple of such 487 | elements. 488 | This may also be a (possibly nested) tuple of Tensors satisfying 489 | this property. The first two dimensions must match across all the inputs, 490 | but otherwise the ranks and other shape components may differ. 491 | In this case, input to `cell` at each time-step will replicate the 492 | structure of these tuples, except for the time dimension (from which the 493 | time is taken). 494 | The input to `cell` at each time step will be a `Tensor` or (possibly 495 | nested) tuple of Tensors each with dimensions `[batch_size, ...]`. 496 | sequence_length: (optional) An int32/int64 vector sized `[batch_size]`. 497 | Used to copy-through state and zero-out outputs when past a batch 498 | element's sequence length. So it's more for correctness than performance. 499 | initial_state: (optional) An initial state for the RNN. 500 | If `cell.state_size` is an integer, this must be 501 | a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. 502 | If `cell.state_size` is a tuple, this should be a tuple of 503 | tensors having shapes `[batch_size, s] for s in cell.state_size`. 504 | dtype: (optional) The data type for the initial state and expected output. 505 | Required if initial_state is not provided or RNN state has a heterogeneous 506 | dtype. 507 | parallel_iterations: (Default: 32). The number of iterations to run in 508 | parallel. Those operations which do not have any temporal dependency 509 | and can be run in parallel, will be. This parameter trades off 510 | time for space. Values >> 1 use more memory but take less time, 511 | while smaller values use less memory but computations take longer. 512 | swap_memory: Transparently swap the tensors produced in forward inference 513 | but needed for back prop from GPU to CPU. This allows training RNNs 514 | which would typically not fit on a single GPU, with very minimal (or no) 515 | performance penalty. 516 | time_major: The shape format of the `inputs` and `outputs` Tensors. 517 | If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. 518 | If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. 519 | Using `time_major = True` is a bit more efficient because it avoids 520 | transposes at the beginning and end of the RNN calculation. However, 521 | most TensorFlow data is batch-major, so by default this function 522 | accepts input and emits output in batch-major form. 523 | scope: VariableScope for the created subgraph; defaults to "rnn". 524 | 525 | Returns: 526 | A pair (outputs, state) where: 527 | 528 | outputs: The RNN output `Tensor`. 529 | 530 | If time_major == False (default), this will be a `Tensor` shaped: 531 | `[batch_size, max_time, cell.output_size]`. 532 | 533 | If time_major == True, this will be a `Tensor` shaped: 534 | `[max_time, batch_size, cell.output_size]`. 535 | 536 | Note, if `cell.output_size` is a (possibly nested) tuple of integers 537 | or `TensorShape` objects, then `outputs` will be a tuple having the 538 | same structure as `cell.output_size`, containing Tensors having shapes 539 | corresponding to the shape data in `cell.output_size`. 540 | 541 | state: The final state. If `cell.state_size` is an int, this 542 | will be shaped `[batch_size, cell.state_size]`. If it is a 543 | `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. 544 | If it is a (possibly nested) tuple of ints or `TensorShape`, this will 545 | be a tuple having the corresponding shapes. If cells are `LSTMCells` 546 | `state` will be a tuple containing a `LSTMStateTuple` for each cell. 547 | 548 | Raises: 549 | TypeError: If `cell` is not an instance of RNNCell. 550 | ValueError: If inputs is None or an empty list. 551 | """ 552 | if not _like_rnncell(cell): 553 | raise TypeError("cell must be an instance of RNNCell") 554 | 555 | # By default, time_major==False and inputs are batch-major: shaped 556 | # [batch, time, depth] 557 | # For internal calculations, we transpose to [time, batch, depth] 558 | flat_input = nest.flatten(inputs) 559 | 560 | if not time_major: 561 | # (B,T,D) => (T,B,D) 562 | flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input] 563 | flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input) 564 | 565 | parallel_iterations = parallel_iterations or 32 566 | if sequence_length is not None: 567 | sequence_length = math_ops.to_int32(sequence_length) 568 | if sequence_length.get_shape().ndims not in (None, 1): 569 | raise ValueError( 570 | "sequence_length must be a vector of length batch_size, " 571 | "but saw shape: %s" % sequence_length.get_shape()) 572 | sequence_length = array_ops.identity( # Just to find it in the graph. 573 | sequence_length, name="sequence_length") 574 | 575 | # Create a new scope in which the caching device is either 576 | # determined by the parent scope, or is set to place the cached 577 | # Variable using the same placement as for the rest of the RNN. 578 | with vs.variable_scope(scope or "rnn") as varscope: 579 | if varscope.caching_device is None: 580 | varscope.set_caching_device(lambda op: op.device) 581 | batch_size = _best_effort_input_batch_size(flat_input) 582 | 583 | if initial_state is not None: 584 | state = initial_state 585 | else: 586 | if not dtype: 587 | raise ValueError("If there is no initial_state, you must give a dtype.") 588 | state = cell.zero_state(batch_size, dtype) 589 | 590 | def _assert_has_shape(x, shape): 591 | x_shape = array_ops.shape(x) 592 | packed_shape = array_ops.stack(shape) 593 | return control_flow_ops.Assert( 594 | math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)), 595 | ["Expected shape for Tensor %s is " % x.name, 596 | packed_shape, " but saw shape: ", x_shape]) 597 | 598 | if sequence_length is not None: 599 | # Perform some shape validation 600 | with ops.control_dependencies( 601 | [_assert_has_shape(sequence_length, [batch_size])]): 602 | sequence_length = array_ops.identity( 603 | sequence_length, name="CheckSeqLen") 604 | 605 | inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input) 606 | 607 | (outputs, final_state) = _dynamic_rnn_loop( 608 | cell, 609 | inputs, 610 | state, 611 | parallel_iterations=parallel_iterations, 612 | swap_memory=swap_memory, 613 | att_scores = att_scores, 614 | sequence_length=sequence_length, 615 | dtype=dtype) 616 | 617 | # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth]. 618 | # If we are performing batch-major calculations, transpose output back 619 | # to shape [batch, time, depth] 620 | if not time_major: 621 | # (T,B,D) => (B,T,D) 622 | outputs = nest.map_structure(_transpose_batch_time, outputs) 623 | 624 | return (outputs, final_state) 625 | 626 | 627 | def _dynamic_rnn_loop(cell, 628 | inputs, 629 | initial_state, 630 | parallel_iterations, 631 | swap_memory, 632 | att_scores = None, 633 | sequence_length=None, 634 | dtype=None): 635 | """Internal implementation of Dynamic RNN. 636 | 637 | Args: 638 | cell: An instance of RNNCell. 639 | inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested 640 | tuple of such elements. 641 | initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if 642 | `cell.state_size` is a tuple, then this should be a tuple of 643 | tensors having shapes `[batch_size, s] for s in cell.state_size`. 644 | parallel_iterations: Positive Python int. 645 | swap_memory: A Python boolean 646 | sequence_length: (optional) An `int32` `Tensor` of shape [batch_size]. 647 | dtype: (optional) Expected dtype of output. If not specified, inferred from 648 | initial_state. 649 | 650 | Returns: 651 | Tuple `(final_outputs, final_state)`. 652 | final_outputs: 653 | A `Tensor` of shape `[time, batch_size, cell.output_size]`. If 654 | `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape` 655 | objects, then this returns a (possibly nsted) tuple of Tensors matching 656 | the corresponding shapes. 657 | final_state: 658 | A `Tensor`, or possibly nested tuple of Tensors, matching in length 659 | and shapes to `initial_state`. 660 | 661 | Raises: 662 | ValueError: If the input depth cannot be inferred via shape inference 663 | from the inputs. 664 | """ 665 | state = initial_state 666 | assert isinstance(parallel_iterations, int), "parallel_iterations must be int" 667 | 668 | state_size = cell.state_size 669 | 670 | flat_input = nest.flatten(inputs) 671 | flat_output_size = nest.flatten(cell.output_size) 672 | 673 | # Construct an initial output 674 | input_shape = array_ops.shape(flat_input[0]) 675 | time_steps = input_shape[0] 676 | batch_size = _best_effort_input_batch_size(flat_input) 677 | 678 | inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3) 679 | for input_ in flat_input) 680 | 681 | const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2] 682 | 683 | for shape in inputs_got_shape: 684 | if not shape[2:].is_fully_defined(): 685 | raise ValueError( 686 | "Input size (depth of inputs) must be accessible via shape inference," 687 | " but saw value None.") 688 | got_time_steps = shape[0].value 689 | got_batch_size = shape[1].value 690 | if const_time_steps != got_time_steps: 691 | raise ValueError( 692 | "Time steps is not the same for all the elements in the input in a " 693 | "batch.") 694 | if const_batch_size != got_batch_size: 695 | raise ValueError( 696 | "Batch_size is not the same for all the elements in the input.") 697 | 698 | # Prepare dynamic conditional copying of state & output 699 | def _create_zero_arrays(size): 700 | size = _concat(batch_size, size) 701 | return array_ops.zeros( 702 | array_ops.stack(size), _infer_state_dtype(dtype, state)) 703 | 704 | flat_zero_output = tuple(_create_zero_arrays(output) 705 | for output in flat_output_size) 706 | zero_output = nest.pack_sequence_as(structure=cell.output_size, 707 | flat_sequence=flat_zero_output) 708 | 709 | if sequence_length is not None: 710 | min_sequence_length = math_ops.reduce_min(sequence_length) 711 | max_sequence_length = math_ops.reduce_max(sequence_length) 712 | 713 | time = array_ops.constant(0, dtype=dtypes.int32, name="time") 714 | 715 | with ops.name_scope("dynamic_rnn") as scope: 716 | base_name = scope 717 | 718 | def _create_ta(name, dtype): 719 | return tensor_array_ops.TensorArray(dtype=dtype, 720 | size=time_steps, 721 | tensor_array_name=base_name + name) 722 | 723 | output_ta = tuple(_create_ta("output_%d" % i, 724 | _infer_state_dtype(dtype, state)) 725 | for i in range(len(flat_output_size))) 726 | input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype) 727 | for i in range(len(flat_input))) 728 | 729 | input_ta = tuple(ta.unstack(input_) 730 | for ta, input_ in zip(input_ta, flat_input)) 731 | 732 | def _time_step(time, output_ta_t, state, att_scores=None): 733 | """Take a time step of the dynamic RNN. 734 | 735 | Args: 736 | time: int32 scalar Tensor. 737 | output_ta_t: List of `TensorArray`s that represent the output. 738 | state: nested tuple of vector tensors that represent the state. 739 | 740 | Returns: 741 | The tuple (time + 1, output_ta_t with updated flow, new_state). 742 | """ 743 | 744 | input_t = tuple(ta.read(time) for ta in input_ta) 745 | # Restore some shape information 746 | for input_, shape in zip(input_t, inputs_got_shape): 747 | input_.set_shape(shape[1:]) 748 | 749 | input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t) 750 | if att_scores is not None: 751 | att_score = att_scores[:, time, :] 752 | call_cell = lambda: cell(input_t, state, att_score) 753 | else: 754 | call_cell = lambda: cell(input_t, state) 755 | 756 | if sequence_length is not None: 757 | (output, new_state) = _rnn_step( 758 | time=time, 759 | sequence_length=sequence_length, 760 | min_sequence_length=min_sequence_length, 761 | max_sequence_length=max_sequence_length, 762 | zero_output=zero_output, 763 | state=state, 764 | call_cell=call_cell, 765 | state_size=state_size, 766 | skip_conditionals=True) 767 | else: 768 | (output, new_state) = call_cell() 769 | 770 | # Pack state if using state tuples 771 | output = nest.flatten(output) 772 | 773 | output_ta_t = tuple( 774 | ta.write(time, out) for ta, out in zip(output_ta_t, output)) 775 | if att_scores is not None: 776 | return (time + 1, output_ta_t, new_state, att_scores) 777 | else: 778 | return (time + 1, output_ta_t, new_state) 779 | 780 | if att_scores is not None: 781 | _, output_final_ta, final_state, _ = control_flow_ops.while_loop( 782 | cond=lambda time, *_: time < time_steps, 783 | body=_time_step, 784 | loop_vars=(time, output_ta, state, att_scores), 785 | parallel_iterations=parallel_iterations, 786 | swap_memory=swap_memory) 787 | else: 788 | _, output_final_ta, final_state = control_flow_ops.while_loop( 789 | cond=lambda time, *_: time < time_steps, 790 | body=_time_step, 791 | loop_vars=(time, output_ta, state), 792 | parallel_iterations=parallel_iterations, 793 | swap_memory=swap_memory) 794 | 795 | # Unpack final output if not using output tuples. 796 | final_outputs = tuple(ta.stack() for ta in output_final_ta) 797 | 798 | # Restore some shape information 799 | for output, output_size in zip(final_outputs, flat_output_size): 800 | shape = _concat( 801 | [const_time_steps, const_batch_size], output_size, static=True) 802 | output.set_shape(shape) 803 | 804 | final_outputs = nest.pack_sequence_as( 805 | structure=cell.output_size, flat_sequence=final_outputs) 806 | 807 | return (final_outputs, final_state) 808 | 809 | 810 | def raw_rnn(cell, loop_fn, 811 | parallel_iterations=None, swap_memory=False, scope=None): 812 | """Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`. 813 | 814 | **NOTE: This method is still in testing, and the API may change.** 815 | 816 | This function is a more primitive version of `dynamic_rnn` that provides 817 | more direct access to the inputs each iteration. It also provides more 818 | control over when to start and finish reading the sequence, and 819 | what to emit for the output. 820 | 821 | For example, it can be used to implement the dynamic decoder of a seq2seq 822 | model. 823 | 824 | Instead of working with `Tensor` objects, most operations work with 825 | `TensorArray` objects directly. 826 | 827 | The operation of `raw_rnn`, in pseudo-code, is basically the following: 828 | 829 | ```python 830 | time = tf.constant(0, dtype=tf.int32) 831 | (finished, next_input, initial_state, _, loop_state) = loop_fn( 832 | time=time, cell_output=None, cell_state=None, loop_state=None) 833 | emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype) 834 | state = initial_state 835 | while not all(finished): 836 | (output, cell_state) = cell(next_input, state) 837 | (next_finished, next_input, next_state, emit, loop_state) = loop_fn( 838 | time=time + 1, cell_output=output, cell_state=cell_state, 839 | loop_state=loop_state) 840 | # Emit zeros and copy forward state for minibatch entries that are finished. 841 | state = tf.where(finished, state, next_state) 842 | emit = tf.where(finished, tf.zeros_like(emit), emit) 843 | emit_ta = emit_ta.write(time, emit) 844 | # If any new minibatch entries are marked as finished, mark these. 845 | finished = tf.logical_or(finished, next_finished) 846 | time += 1 847 | return (emit_ta, state, loop_state) 848 | ``` 849 | 850 | with the additional properties that output and state may be (possibly nested) 851 | tuples, as determined by `cell.output_size` and `cell.state_size`, and 852 | as a result the final `state` and `emit_ta` may themselves be tuples. 853 | 854 | A simple implementation of `dynamic_rnn` via `raw_rnn` looks like this: 855 | 856 | ```python 857 | inputs = tf.placeholder(shape=(max_time, batch_size, input_depth), 858 | dtype=tf.float32) 859 | sequence_length = tf.placeholder(shape=(batch_size,), dtype=tf.int32) 860 | inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time) 861 | inputs_ta = inputs_ta.unstack(inputs) 862 | 863 | cell = tf.contrib.rnn.LSTMCell(num_units) 864 | 865 | def loop_fn(time, cell_output, cell_state, loop_state): 866 | emit_output = cell_output # == None for time == 0 867 | if cell_output is None: # time == 0 868 | next_cell_state = cell.zero_state(batch_size, tf.float32) 869 | else: 870 | next_cell_state = cell_state 871 | elements_finished = (time >= sequence_length) 872 | finished = tf.reduce_all(elements_finished) 873 | next_input = tf.cond( 874 | finished, 875 | lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32), 876 | lambda: inputs_ta.read(time)) 877 | next_loop_state = None 878 | return (elements_finished, next_input, next_cell_state, 879 | emit_output, next_loop_state) 880 | 881 | outputs_ta, final_state, _ = raw_rnn(cell, loop_fn) 882 | outputs = outputs_ta.stack() 883 | ``` 884 | 885 | Args: 886 | cell: An instance of RNNCell. 887 | loop_fn: A callable that takes inputs 888 | `(time, cell_output, cell_state, loop_state)` 889 | and returns the tuple 890 | `(finished, next_input, next_cell_state, emit_output, next_loop_state)`. 891 | Here `time` is an int32 scalar `Tensor`, `cell_output` is a 892 | `Tensor` or (possibly nested) tuple of tensors as determined by 893 | `cell.output_size`, and `cell_state` is a `Tensor` 894 | or (possibly nested) tuple of tensors, as determined by the `loop_fn` 895 | on its first call (and should match `cell.state_size`). 896 | The outputs are: `finished`, a boolean `Tensor` of 897 | shape `[batch_size]`, `next_input`: the next input to feed to `cell`, 898 | `next_cell_state`: the next state to feed to `cell`, 899 | and `emit_output`: the output to store for this iteration. 900 | 901 | Note that `emit_output` should be a `Tensor` or (possibly nested) 902 | tuple of tensors with shapes and structure matching `cell.output_size` 903 | and `cell_output` above. The parameter `cell_state` and output 904 | `next_cell_state` may be either a single or (possibly nested) tuple 905 | of tensors. The parameter `loop_state` and 906 | output `next_loop_state` may be either a single or (possibly nested) tuple 907 | of `Tensor` and `TensorArray` objects. This last parameter 908 | may be ignored by `loop_fn` and the return value may be `None`. If it 909 | is not `None`, then the `loop_state` will be propagated through the RNN 910 | loop, for use purely by `loop_fn` to keep track of its own state. 911 | The `next_loop_state` parameter returned may be `None`. 912 | 913 | The first call to `loop_fn` will be `time = 0`, `cell_output = None`, 914 | `cell_state = None`, and `loop_state = None`. For this call: 915 | The `next_cell_state` value should be the value with which to initialize 916 | the cell's state. It may be a final state from a previous RNN or it 917 | may be the output of `cell.zero_state()`. It should be a 918 | (possibly nested) tuple structure of tensors. 919 | If `cell.state_size` is an integer, this must be 920 | a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. 921 | If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of 922 | appropriate type and shape `[batch_size] + cell.state_size`. 923 | If `cell.state_size` is a (possibly nested) tuple of ints or 924 | `TensorShape`, this will be a tuple having the corresponding shapes. 925 | The `emit_output` value may be either `None` or a (possibly nested) 926 | tuple structure of tensors, e.g., 927 | `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`. 928 | If this first `emit_output` return value is `None`, 929 | then the `emit_ta` result of `raw_rnn` will have the same structure and 930 | dtypes as `cell.output_size`. Otherwise `emit_ta` will have the same 931 | structure, shapes (prepended with a `batch_size` dimension), and dtypes 932 | as `emit_output`. The actual values returned for `emit_output` at this 933 | initializing call are ignored. Note, this emit structure must be 934 | consistent across all time steps. 935 | 936 | parallel_iterations: (Default: 32). The number of iterations to run in 937 | parallel. Those operations which do not have any temporal dependency 938 | and can be run in parallel, will be. This parameter trades off 939 | time for space. Values >> 1 use more memory but take less time, 940 | while smaller values use less memory but computations take longer. 941 | swap_memory: Transparently swap the tensors produced in forward inference 942 | but needed for back prop from GPU to CPU. This allows training RNNs 943 | which would typically not fit on a single GPU, with very minimal (or no) 944 | performance penalty. 945 | scope: VariableScope for the created subgraph; defaults to "rnn". 946 | 947 | Returns: 948 | A tuple `(emit_ta, final_state, final_loop_state)` where: 949 | 950 | `emit_ta`: The RNN output `TensorArray`. 951 | If `loop_fn` returns a (possibly nested) set of Tensors for 952 | `emit_output` during initialization, (inputs `time = 0`, 953 | `cell_output = None`, and `loop_state = None`), then `emit_ta` will 954 | have the same structure, dtypes, and shapes as `emit_output` instead. 955 | If `loop_fn` returns `emit_output = None` during this call, 956 | the structure of `cell.output_size` is used: 957 | If `cell.output_size` is a (possibly nested) tuple of integers 958 | or `TensorShape` objects, then `emit_ta` will be a tuple having the 959 | same structure as `cell.output_size`, containing TensorArrays whose 960 | elements' shapes correspond to the shape data in `cell.output_size`. 961 | 962 | `final_state`: The final cell state. If `cell.state_size` is an int, this 963 | will be shaped `[batch_size, cell.state_size]`. If it is a 964 | `TensorShape`, this will be shaped `[batch_size] + cell.state_size`. 965 | If it is a (possibly nested) tuple of ints or `TensorShape`, this will 966 | be a tuple having the corresponding shapes. 967 | 968 | `final_loop_state`: The final loop state as returned by `loop_fn`. 969 | 970 | Raises: 971 | TypeError: If `cell` is not an instance of RNNCell, or `loop_fn` is not 972 | a `callable`. 973 | """ 974 | 975 | if not _like_rnncell(cell): 976 | raise TypeError("cell must be an instance of RNNCell") 977 | if not callable(loop_fn): 978 | raise TypeError("loop_fn must be a callable") 979 | 980 | parallel_iterations = parallel_iterations or 32 981 | 982 | # Create a new scope in which the caching device is either 983 | # determined by the parent scope, or is set to place the cached 984 | # Variable using the same placement as for the rest of the RNN. 985 | with vs.variable_scope(scope or "rnn") as varscope: 986 | if varscope.caching_device is None: 987 | varscope.set_caching_device(lambda op: op.device) 988 | 989 | time = constant_op.constant(0, dtype=dtypes.int32) 990 | (elements_finished, next_input, initial_state, emit_structure, 991 | init_loop_state) = loop_fn( 992 | time, None, None, None) # time, cell_output, cell_state, loop_state 993 | flat_input = nest.flatten(next_input) 994 | 995 | # Need a surrogate loop state for the while_loop if none is available. 996 | loop_state = (init_loop_state if init_loop_state is not None 997 | else constant_op.constant(0, dtype=dtypes.int32)) 998 | 999 | input_shape = [input_.get_shape() for input_ in flat_input] 1000 | static_batch_size = input_shape[0][0] 1001 | 1002 | for input_shape_i in input_shape: 1003 | # Static verification that batch sizes all match 1004 | static_batch_size.merge_with(input_shape_i[0]) 1005 | 1006 | batch_size = static_batch_size.value 1007 | if batch_size is None: 1008 | batch_size = array_ops.shape(flat_input[0])[0] 1009 | 1010 | nest.assert_same_structure(initial_state, cell.state_size) 1011 | state = initial_state 1012 | flat_state = nest.flatten(state) 1013 | flat_state = [ops.convert_to_tensor(s) for s in flat_state] 1014 | state = nest.pack_sequence_as(structure=state, 1015 | flat_sequence=flat_state) 1016 | 1017 | if emit_structure is not None: 1018 | flat_emit_structure = nest.flatten(emit_structure) 1019 | flat_emit_size = [emit.shape if emit.shape.is_fully_defined() else 1020 | array_ops.shape(emit) for emit in flat_emit_structure] 1021 | flat_emit_dtypes = [emit.dtype for emit in flat_emit_structure] 1022 | else: 1023 | emit_structure = cell.output_size 1024 | flat_emit_size = nest.flatten(emit_structure) 1025 | flat_emit_dtypes = [flat_state[0].dtype] * len(flat_emit_size) 1026 | 1027 | flat_emit_ta = [ 1028 | tensor_array_ops.TensorArray( 1029 | dtype=dtype_i, dynamic_size=True, size=0, name="rnn_output_%d" % i) 1030 | for i, dtype_i in enumerate(flat_emit_dtypes)] 1031 | emit_ta = nest.pack_sequence_as(structure=emit_structure, 1032 | flat_sequence=flat_emit_ta) 1033 | flat_zero_emit = [ 1034 | array_ops.zeros(_concat(batch_size, size_i), dtype_i) 1035 | for size_i, dtype_i in zip(flat_emit_size, flat_emit_dtypes)] 1036 | zero_emit = nest.pack_sequence_as(structure=emit_structure, 1037 | flat_sequence=flat_zero_emit) 1038 | 1039 | def condition(unused_time, elements_finished, *_): 1040 | return math_ops.logical_not(math_ops.reduce_all(elements_finished)) 1041 | 1042 | def body(time, elements_finished, current_input, 1043 | emit_ta, state, loop_state): 1044 | """Internal while loop body for raw_rnn. 1045 | 1046 | Args: 1047 | time: time scalar. 1048 | elements_finished: batch-size vector. 1049 | current_input: possibly nested tuple of input tensors. 1050 | emit_ta: possibly nested tuple of output TensorArrays. 1051 | state: possibly nested tuple of state tensors. 1052 | loop_state: possibly nested tuple of loop state tensors. 1053 | 1054 | Returns: 1055 | Tuple having the same size as Args but with updated values. 1056 | """ 1057 | (next_output, cell_state) = cell(current_input, state) 1058 | 1059 | nest.assert_same_structure(state, cell_state) 1060 | nest.assert_same_structure(cell.output_size, next_output) 1061 | 1062 | next_time = time + 1 1063 | (next_finished, next_input, next_state, emit_output, 1064 | next_loop_state) = loop_fn( 1065 | next_time, next_output, cell_state, loop_state) 1066 | 1067 | nest.assert_same_structure(state, next_state) 1068 | nest.assert_same_structure(current_input, next_input) 1069 | nest.assert_same_structure(emit_ta, emit_output) 1070 | 1071 | # If loop_fn returns None for next_loop_state, just reuse the 1072 | # previous one. 1073 | loop_state = loop_state if next_loop_state is None else next_loop_state 1074 | 1075 | def _copy_some_through(current, candidate): 1076 | """Copy some tensors through via array_ops.where.""" 1077 | def copy_fn(cur_i, cand_i): 1078 | with ops.colocate_with(cand_i): 1079 | return array_ops.where(elements_finished, cur_i, cand_i) 1080 | return nest.map_structure(copy_fn, current, candidate) 1081 | 1082 | emit_output = _copy_some_through(zero_emit, emit_output) 1083 | next_state = _copy_some_through(state, next_state) 1084 | 1085 | emit_ta = nest.map_structure( 1086 | lambda ta, emit: ta.write(time, emit), emit_ta, emit_output) 1087 | 1088 | elements_finished = math_ops.logical_or(elements_finished, next_finished) 1089 | 1090 | return (next_time, elements_finished, next_input, 1091 | emit_ta, next_state, loop_state) 1092 | 1093 | returned = control_flow_ops.while_loop( 1094 | condition, body, loop_vars=[ 1095 | time, elements_finished, next_input, 1096 | emit_ta, state, loop_state], 1097 | parallel_iterations=parallel_iterations, 1098 | swap_memory=swap_memory) 1099 | 1100 | (emit_ta, final_state, final_loop_state) = returned[-3:] 1101 | 1102 | if init_loop_state is None: 1103 | final_loop_state = None 1104 | 1105 | return (emit_ta, final_state, final_loop_state) 1106 | 1107 | 1108 | def static_rnn(cell, 1109 | inputs, 1110 | initial_state=None, 1111 | dtype=None, 1112 | sequence_length=None, 1113 | scope=None): 1114 | """Creates a recurrent neural network specified by RNNCell `cell`. 1115 | 1116 | The simplest form of RNN network generated is: 1117 | 1118 | ```python 1119 | state = cell.zero_state(...) 1120 | outputs = [] 1121 | for input_ in inputs: 1122 | output, state = cell(input_, state) 1123 | outputs.append(output) 1124 | return (outputs, state) 1125 | ``` 1126 | However, a few other options are available: 1127 | 1128 | An initial state can be provided. 1129 | If the sequence_length vector is provided, dynamic calculation is performed. 1130 | This method of calculation does not compute the RNN steps past the maximum 1131 | sequence length of the minibatch (thus saving computational time), 1132 | and properly propagates the state at an example's sequence length 1133 | to the final state output. 1134 | 1135 | The dynamic calculation performed is, at time `t` for batch row `b`, 1136 | 1137 | ```python 1138 | (output, state)(b, t) = 1139 | (t >= sequence_length(b)) 1140 | ? (zeros(cell.output_size), states(b, sequence_length(b) - 1)) 1141 | : cell(input(b, t), state(b, t - 1)) 1142 | ``` 1143 | 1144 | Args: 1145 | cell: An instance of RNNCell. 1146 | inputs: A length T list of inputs, each a `Tensor` of shape 1147 | `[batch_size, input_size]`, or a nested tuple of such elements. 1148 | initial_state: (optional) An initial state for the RNN. 1149 | If `cell.state_size` is an integer, this must be 1150 | a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`. 1151 | If `cell.state_size` is a tuple, this should be a tuple of 1152 | tensors having shapes `[batch_size, s] for s in cell.state_size`. 1153 | dtype: (optional) The data type for the initial state and expected output. 1154 | Required if initial_state is not provided or RNN state has a heterogeneous 1155 | dtype. 1156 | sequence_length: Specifies the length of each sequence in inputs. 1157 | An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`. 1158 | scope: VariableScope for the created subgraph; defaults to "rnn". 1159 | 1160 | Returns: 1161 | A pair (outputs, state) where: 1162 | 1163 | - outputs is a length T list of outputs (one for each input), or a nested 1164 | tuple of such elements. 1165 | - state is the final state 1166 | 1167 | Raises: 1168 | TypeError: If `cell` is not an instance of RNNCell. 1169 | ValueError: If `inputs` is `None` or an empty list, or if the input depth 1170 | (column size) cannot be inferred from inputs via shape inference. 1171 | """ 1172 | 1173 | if not _like_rnncell(cell): 1174 | raise TypeError("cell must be an instance of RNNCell") 1175 | if not nest.is_sequence(inputs): 1176 | raise TypeError("inputs must be a sequence") 1177 | if not inputs: 1178 | raise ValueError("inputs must not be empty") 1179 | 1180 | outputs = [] 1181 | # Create a new scope in which the caching device is either 1182 | # determined by the parent scope, or is set to place the cached 1183 | # Variable using the same placement as for the rest of the RNN. 1184 | with vs.variable_scope(scope or "rnn") as varscope: 1185 | if varscope.caching_device is None: 1186 | varscope.set_caching_device(lambda op: op.device) 1187 | 1188 | # Obtain the first sequence of the input 1189 | first_input = inputs 1190 | while nest.is_sequence(first_input): 1191 | first_input = first_input[0] 1192 | 1193 | # Temporarily avoid EmbeddingWrapper and seq2seq badness 1194 | # TODO(lukaszkaiser): remove EmbeddingWrapper 1195 | if first_input.get_shape().ndims != 1: 1196 | 1197 | input_shape = first_input.get_shape().with_rank_at_least(2) 1198 | fixed_batch_size = input_shape[0] 1199 | 1200 | flat_inputs = nest.flatten(inputs) 1201 | for flat_input in flat_inputs: 1202 | input_shape = flat_input.get_shape().with_rank_at_least(2) 1203 | batch_size, input_size = input_shape[0], input_shape[1:] 1204 | fixed_batch_size.merge_with(batch_size) 1205 | for i, size in enumerate(input_size): 1206 | if size.value is None: 1207 | raise ValueError( 1208 | "Input size (dimension %d of inputs) must be accessible via " 1209 | "shape inference, but saw value None." % i) 1210 | else: 1211 | fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0] 1212 | 1213 | if fixed_batch_size.value: 1214 | batch_size = fixed_batch_size.value 1215 | else: 1216 | batch_size = array_ops.shape(first_input)[0] 1217 | if initial_state is not None: 1218 | state = initial_state 1219 | else: 1220 | if not dtype: 1221 | raise ValueError("If no initial_state is provided, " 1222 | "dtype must be specified") 1223 | state = cell.zero_state(batch_size, dtype) 1224 | 1225 | if sequence_length is not None: # Prepare variables 1226 | sequence_length = ops.convert_to_tensor( 1227 | sequence_length, name="sequence_length") 1228 | if sequence_length.get_shape().ndims not in (None, 1): 1229 | raise ValueError( 1230 | "sequence_length must be a vector of length batch_size") 1231 | 1232 | def _create_zero_output(output_size): 1233 | # convert int to TensorShape if necessary 1234 | size = _concat(batch_size, output_size) 1235 | output = array_ops.zeros( 1236 | array_ops.stack(size), _infer_state_dtype(dtype, state)) 1237 | shape = _concat(fixed_batch_size.value, output_size, static=True) 1238 | output.set_shape(tensor_shape.TensorShape(shape)) 1239 | return output 1240 | 1241 | output_size = cell.output_size 1242 | flat_output_size = nest.flatten(output_size) 1243 | flat_zero_output = tuple( 1244 | _create_zero_output(size) for size in flat_output_size) 1245 | zero_output = nest.pack_sequence_as( 1246 | structure=output_size, flat_sequence=flat_zero_output) 1247 | 1248 | sequence_length = math_ops.to_int32(sequence_length) 1249 | min_sequence_length = math_ops.reduce_min(sequence_length) 1250 | max_sequence_length = math_ops.reduce_max(sequence_length) 1251 | 1252 | for time, input_ in enumerate(inputs): 1253 | if time > 0: 1254 | varscope.reuse_variables() 1255 | # pylint: disable=cell-var-from-loop 1256 | call_cell = lambda: cell(input_, state) 1257 | # pylint: enable=cell-var-from-loop 1258 | if sequence_length is not None: 1259 | (output, state) = _rnn_step( 1260 | time=time, 1261 | sequence_length=sequence_length, 1262 | min_sequence_length=min_sequence_length, 1263 | max_sequence_length=max_sequence_length, 1264 | zero_output=zero_output, 1265 | state=state, 1266 | call_cell=call_cell, 1267 | state_size=cell.state_size) 1268 | else: 1269 | (output, state) = call_cell() 1270 | 1271 | outputs.append(output) 1272 | 1273 | return (outputs, state) 1274 | 1275 | 1276 | def static_state_saving_rnn(cell, 1277 | inputs, 1278 | state_saver, 1279 | state_name, 1280 | sequence_length=None, 1281 | scope=None): 1282 | """RNN that accepts a state saver for time-truncated RNN calculation. 1283 | 1284 | Args: 1285 | cell: An instance of `RNNCell`. 1286 | inputs: A length T list of inputs, each a `Tensor` of shape 1287 | `[batch_size, input_size]`. 1288 | state_saver: A state saver object with methods `state` and `save_state`. 1289 | state_name: Python string or tuple of strings. The name to use with the 1290 | state_saver. If the cell returns tuples of states (i.e., 1291 | `cell.state_size` is a tuple) then `state_name` should be a tuple of 1292 | strings having the same length as `cell.state_size`. Otherwise it should 1293 | be a single string. 1294 | sequence_length: (optional) An int32/int64 vector size [batch_size]. 1295 | See the documentation for rnn() for more details about sequence_length. 1296 | scope: VariableScope for the created subgraph; defaults to "rnn". 1297 | 1298 | Returns: 1299 | A pair (outputs, state) where: 1300 | outputs is a length T list of outputs (one for each input) 1301 | states is the final state 1302 | 1303 | Raises: 1304 | TypeError: If `cell` is not an instance of RNNCell. 1305 | ValueError: If `inputs` is `None` or an empty list, or if the arity and 1306 | type of `state_name` does not match that of `cell.state_size`. 1307 | """ 1308 | state_size = cell.state_size 1309 | state_is_tuple = nest.is_sequence(state_size) 1310 | state_name_tuple = nest.is_sequence(state_name) 1311 | 1312 | if state_is_tuple != state_name_tuple: 1313 | raise ValueError("state_name should be the same type as cell.state_size. " 1314 | "state_name: %s, cell.state_size: %s" % (str(state_name), 1315 | str(state_size))) 1316 | 1317 | if state_is_tuple: 1318 | state_name_flat = nest.flatten(state_name) 1319 | state_size_flat = nest.flatten(state_size) 1320 | 1321 | if len(state_name_flat) != len(state_size_flat): 1322 | raise ValueError("#elems(state_name) != #elems(state_size): %d vs. %d" % 1323 | (len(state_name_flat), len(state_size_flat))) 1324 | 1325 | initial_state = nest.pack_sequence_as( 1326 | structure=state_size, 1327 | flat_sequence=[state_saver.state(s) for s in state_name_flat]) 1328 | else: 1329 | initial_state = state_saver.state(state_name) 1330 | 1331 | (outputs, state) = static_rnn( 1332 | cell, 1333 | inputs, 1334 | initial_state=initial_state, 1335 | sequence_length=sequence_length, 1336 | scope=scope) 1337 | 1338 | if state_is_tuple: 1339 | flat_state = nest.flatten(state) 1340 | state_name = nest.flatten(state_name) 1341 | save_state = [ 1342 | state_saver.save_state(name, substate) 1343 | for name, substate in zip(state_name, flat_state) 1344 | ] 1345 | else: 1346 | save_state = [state_saver.save_state(state_name, state)] 1347 | 1348 | with ops.control_dependencies(save_state): 1349 | last_output = outputs[-1] 1350 | flat_last_output = nest.flatten(last_output) 1351 | flat_last_output = [ 1352 | array_ops.identity(output) for output in flat_last_output 1353 | ] 1354 | outputs[-1] = nest.pack_sequence_as( 1355 | structure=last_output, flat_sequence=flat_last_output) 1356 | 1357 | return (outputs, state) 1358 | 1359 | 1360 | def static_bidirectional_rnn(cell_fw, 1361 | cell_bw, 1362 | inputs, 1363 | initial_state_fw=None, 1364 | initial_state_bw=None, 1365 | dtype=None, 1366 | sequence_length=None, 1367 | scope=None): 1368 | """Creates a bidirectional recurrent neural network. 1369 | 1370 | Similar to the unidirectional case above (rnn) but takes input and builds 1371 | independent forward and backward RNNs with the final forward and backward 1372 | outputs depth-concatenated, such that the output will have the format 1373 | [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of 1374 | forward and backward cell must match. The initial state for both directions 1375 | is zero by default (but can be set optionally) and no intermediate states are 1376 | ever returned -- the network is fully unrolled for the given (passed in) 1377 | length(s) of the sequence(s) or completely unrolled if length(s) is not given. 1378 | 1379 | Args: 1380 | cell_fw: An instance of RNNCell, to be used for forward direction. 1381 | cell_bw: An instance of RNNCell, to be used for backward direction. 1382 | inputs: A length T list of inputs, each a tensor of shape 1383 | [batch_size, input_size], or a nested tuple of such elements. 1384 | initial_state_fw: (optional) An initial state for the forward RNN. 1385 | This must be a tensor of appropriate type and shape 1386 | `[batch_size, cell_fw.state_size]`. 1387 | If `cell_fw.state_size` is a tuple, this should be a tuple of 1388 | tensors having shapes `[batch_size, s] for s in cell_fw.state_size`. 1389 | initial_state_bw: (optional) Same as for `initial_state_fw`, but using 1390 | the corresponding properties of `cell_bw`. 1391 | dtype: (optional) The data type for the initial state. Required if 1392 | either of the initial states are not provided. 1393 | sequence_length: (optional) An int32/int64 vector, size `[batch_size]`, 1394 | containing the actual lengths for each of the sequences. 1395 | scope: VariableScope for the created subgraph; defaults to 1396 | "bidirectional_rnn" 1397 | 1398 | Returns: 1399 | A tuple (outputs, output_state_fw, output_state_bw) where: 1400 | outputs is a length `T` list of outputs (one for each input), which 1401 | are depth-concatenated forward and backward outputs. 1402 | output_state_fw is the final state of the forward rnn. 1403 | output_state_bw is the final state of the backward rnn. 1404 | 1405 | Raises: 1406 | TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. 1407 | ValueError: If inputs is None or an empty list. 1408 | """ 1409 | 1410 | if not _like_rnncell(cell_fw): 1411 | raise TypeError("cell_fw must be an instance of RNNCell") 1412 | if not _like_rnncell(cell_bw): 1413 | raise TypeError("cell_bw must be an instance of RNNCell") 1414 | if not nest.is_sequence(inputs): 1415 | raise TypeError("inputs must be a sequence") 1416 | if not inputs: 1417 | raise ValueError("inputs must not be empty") 1418 | 1419 | with vs.variable_scope(scope or "bidirectional_rnn"): 1420 | # Forward direction 1421 | with vs.variable_scope("fw") as fw_scope: 1422 | output_fw, output_state_fw = static_rnn( 1423 | cell_fw, 1424 | inputs, 1425 | initial_state_fw, 1426 | dtype, 1427 | sequence_length, 1428 | scope=fw_scope) 1429 | 1430 | # Backward direction 1431 | with vs.variable_scope("bw") as bw_scope: 1432 | reversed_inputs = _reverse_seq(inputs, sequence_length) 1433 | tmp, output_state_bw = static_rnn( 1434 | cell_bw, 1435 | reversed_inputs, 1436 | initial_state_bw, 1437 | dtype, 1438 | sequence_length, 1439 | scope=bw_scope) 1440 | 1441 | output_bw = _reverse_seq(tmp, sequence_length) 1442 | # Concat each of the forward/backward outputs 1443 | flat_output_fw = nest.flatten(output_fw) 1444 | flat_output_bw = nest.flatten(output_bw) 1445 | 1446 | flat_outputs = tuple( 1447 | array_ops.concat([fw, bw], 1) 1448 | for fw, bw in zip(flat_output_fw, flat_output_bw)) 1449 | 1450 | outputs = nest.pack_sequence_as( 1451 | structure=output_fw, flat_sequence=flat_outputs) 1452 | 1453 | return (outputs, output_state_fw, output_state_bw) 1454 | -------------------------------------------------------------------------------- /script/shuffle.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | 5 | import tempfile 6 | from subprocess import call 7 | 8 | 9 | def main(file, temporary=False): 10 | tf_os, tpath = tempfile.mkstemp(dir='~/DIN-V2-CODE') 11 | tf = open(tpath, 'w') 12 | 13 | fd = open(file, "r") 14 | for l in fd: 15 | print >> tf, l.strip("\n") 16 | tf.close() 17 | 18 | lines = open(tpath, 'r').readlines() 19 | random.shuffle(lines) 20 | if temporary: 21 | path, filename = os.path.split(os.path.realpath(file)) 22 | fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path) 23 | else: 24 | fd = open(file + '.shuf', 'w') 25 | 26 | for l in lines: 27 | s = l.strip("\n") 28 | print >> fd, s 29 | 30 | if temporary: 31 | fd.seek(0) 32 | else: 33 | fd.close() 34 | 35 | os.remove(tpath) 36 | 37 | return fd 38 | 39 | 40 | if __name__ == '__main__': 41 | main(sys.argv[1]) 42 | 43 | -------------------------------------------------------------------------------- /script/split_by_user.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | fi = open("local_test", "r") 4 | ftrain = open("local_train_splitByUser", "w") 5 | ftest = open("local_test_splitByUser", "w") 6 | 7 | while True: 8 | rand_int = random.randint(1, 10) 9 | noclk_line = fi.readline().strip() 10 | clk_line = fi.readline().strip() 11 | if noclk_line == "" or clk_line == "": 12 | break 13 | if rand_int == 2: 14 | print >> ftest, noclk_line 15 | print >> ftest, clk_line 16 | else: 17 | print >> ftrain, noclk_line 18 | print >> ftrain, clk_line 19 | 20 | 21 | -------------------------------------------------------------------------------- /script/train.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from data_iterator import DataIterator 3 | import tensorflow as tf 4 | from model import * 5 | import time 6 | import random 7 | import sys 8 | from utils import * 9 | 10 | EMBEDDING_DIM = 18 11 | HIDDEN_SIZE = 18 * 2 12 | ATTENTION_SIZE = 18 * 2 13 | best_auc = 0.0 14 | 15 | def prepare_data(input, target, maxlen = None, return_neg = False): 16 | # x: a list of sentences 17 | lengths_x = [len(s[4]) for s in input] 18 | seqs_mid = [inp[3] for inp in input] 19 | seqs_cat = [inp[4] for inp in input] 20 | noclk_seqs_mid = [inp[5] for inp in input] 21 | noclk_seqs_cat = [inp[6] for inp in input] 22 | seqs_item_carte = [inp[7][0] for inp in input] 23 | seqs_cate_carte = [inp[7][1] for inp in input] 24 | 25 | if maxlen is not None: 26 | new_seqs_mid = [] 27 | new_seqs_cat = [] 28 | new_noclk_seqs_mid = [] 29 | new_noclk_seqs_cat = [] 30 | new_lengths_x = [] 31 | new_seqs_item_carte = [] 32 | new_seqs_cate_carte = [] 33 | for l_x, inp in zip(lengths_x, input): 34 | if l_x > maxlen: 35 | new_seqs_mid.append(inp[3][l_x - maxlen:]) 36 | new_seqs_cat.append(inp[4][l_x - maxlen:]) 37 | new_noclk_seqs_mid.append(inp[5][l_x - maxlen:]) 38 | new_noclk_seqs_cat.append(inp[6][l_x - maxlen:]) 39 | new_seqs_item_carte.append(inp[7][0][l_x - maxlen:]) 40 | new_seqs_cate_carte.append(inp[7][1][l_x - maxlen:]) 41 | new_lengths_x.append(maxlen) 42 | else: 43 | new_seqs_mid.append(inp[3]) 44 | new_seqs_cat.append(inp[4]) 45 | new_noclk_seqs_mid.append(inp[5]) 46 | new_noclk_seqs_cat.append(inp[6]) 47 | new_seqs_item_carte.append(inp[7][0]) 48 | new_seqs_cate_carte.append(inp[7][1]) 49 | new_lengths_x.append(l_x) 50 | lengths_x = new_lengths_x 51 | seqs_mid = new_seqs_mid 52 | seqs_cat = new_seqs_cat 53 | noclk_seqs_mid = new_noclk_seqs_mid 54 | noclk_seqs_cat = new_noclk_seqs_cat 55 | seqs_item_carte = new_seqs_item_carte 56 | seqs_cate_carte = new_seqs_cate_carte 57 | 58 | if len(lengths_x) < 1: 59 | return None, None, None, None 60 | 61 | n_samples = len(seqs_mid) 62 | maxlen_x = numpy.max(lengths_x) 63 | neg_samples = len(noclk_seqs_mid[0][0]) 64 | 65 | mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') 66 | cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64') 67 | noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') 68 | noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64') 69 | item_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') 70 | cate_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64') 71 | mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32') 72 | for idx, [s_x, s_y, no_sx, no_sy, i_c, c_c] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat, seqs_item_carte, seqs_cate_carte)): 73 | mid_mask[idx, :lengths_x[idx]] = 1. 74 | mid_his[idx, :lengths_x[idx]] = s_x 75 | cat_his[idx, :lengths_x[idx]] = s_y 76 | noclk_mid_his[idx, :lengths_x[idx], :] = no_sx 77 | noclk_cat_his[idx, :lengths_x[idx], :] = no_sy 78 | item_carte[idx, :lengths_x[idx]] = i_c 79 | cate_carte[idx, :lengths_x[idx]] = c_c 80 | 81 | uids = numpy.array([inp[0] for inp in input]) 82 | mids = numpy.array([inp[1] for inp in input]) 83 | cats = numpy.array([inp[2] for inp in input]) 84 | 85 | carte = numpy.stack([item_carte, cate_carte], axis=1) 86 | 87 | if return_neg: 88 | return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his, carte 89 | 90 | else: 91 | return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), carte 92 | 93 | def eval(sess, test_data, model, model_path): 94 | 95 | loss_sum = 0. 96 | accuracy_sum = 0. 97 | aux_loss_sum = 0. 98 | nums = 0 99 | stored_arr = [] 100 | for src, tgt in test_data: 101 | nums += 1 102 | uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, return_neg=True) 103 | prob, loss, acc, aux_loss = model.calculate(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte]) 104 | loss_sum += loss 105 | aux_loss_sum = aux_loss 106 | accuracy_sum += acc 107 | prob_1 = prob[:, 0].tolist() 108 | target_1 = target[:, 0].tolist() 109 | for p ,t in zip(prob_1, target_1): 110 | stored_arr.append([p, t]) 111 | test_auc = calc_auc(stored_arr) 112 | accuracy_sum = accuracy_sum / nums 113 | loss_sum = loss_sum / nums 114 | aux_loss_sum / nums 115 | global best_auc 116 | if best_auc < test_auc: 117 | best_auc = test_auc 118 | #model.save(sess, model_path) 119 | return test_auc, loss_sum, accuracy_sum, aux_loss_sum 120 | 121 | def train( 122 | train_file = "local_train_splitByUser", 123 | test_file = "local_test_splitByUser", 124 | uid_voc = "uid_voc.pkl", 125 | mid_voc = "mid_voc.pkl", 126 | cat_voc = "cat_voc.pkl", 127 | batch_size = 128, 128 | maxlen = 100, 129 | test_iter = 8400, 130 | save_iter = 8400, 131 | model_type = 'DNN', 132 | seed = 2, 133 | ): 134 | model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed) 135 | best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) 136 | gpu_options = tf.GPUOptions(allow_growth=True) 137 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 138 | label_type = 1 139 | train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False, label_type=label_type) 140 | test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, label_type=label_type) 141 | n_uid, n_mid, n_cat, n_carte = train_data.get_n() 142 | if model_type == 'DNN': 143 | model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False) 144 | elif model_type == 'Cartesion': 145 | model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False, use_cartes=True) 146 | elif model_type == 'CAN+Cartesion': 147 | model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True, use_cartes=True) 148 | elif model_type == 'CAN': 149 | model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) 150 | elif model_type == 'PNN': 151 | model = Model_PNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 152 | elif model_type == 'ONN': 153 | model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 154 | elif model_type == 'Wide': 155 | model = Model_WideDeep(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 156 | elif model_type == 'NCF': 157 | model = Model_NCF(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 158 | elif model_type == 'FM': 159 | model = Model_FM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 160 | elif model_type == 'FFM': 161 | model = Model_FFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 162 | elif model_type == 'DeepFM': 163 | model = Model_DeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 164 | elif model_type == 'DeepFFM': 165 | model = Model_DeepFFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 166 | elif model_type == 'xDeepFM': 167 | model = Model_xDeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False) 168 | elif model_type == 'ONN': 169 | model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 170 | elif model_type == 'DIN': 171 | model = Model_DIN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 172 | elif model_type == 'DIEN': 173 | model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 174 | elif model_type == 'CAN+DIEN': 175 | model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True) 176 | else: 177 | print ("Invalid model_type : %s"% model_type) 178 | return 179 | print("Model: ", model_type) 180 | sess.run(tf.global_variables_initializer()) 181 | sess.run(tf.local_variables_initializer()) 182 | sys.stdout.flush() 183 | 184 | count() 185 | start_time = time.time() 186 | iter = 0 187 | lr = 0.001 188 | for itr in range(1): 189 | loss_sum = 0.0 190 | accuracy_sum = 0. 191 | aux_loss_sum = 0. 192 | for src, tgt in train_data: 193 | uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, maxlen, return_neg=True) 194 | loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats, carte]) 195 | loss_sum += loss 196 | accuracy_sum += acc 197 | aux_loss_sum += aux_loss 198 | iter += 1 199 | sys.stdout.flush() 200 | if (iter % 100) == 0: 201 | print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' % (iter, loss_sum / 100, accuracy_sum / 100, aux_loss_sum / 100)) 202 | loss_sum = 0.0 203 | accuracy_sum = 0.0 204 | aux_loss_sum = 0.0 205 | if (iter % test_iter) == 0: 206 | auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) 207 | print('iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_)) 208 | loss_sum = 0.0 209 | accuracy_sum = 0.0 210 | aux_loss_sum = 0.0 211 | if (iter % save_iter) == 0: 212 | print('save model iter: %d' %(iter)) 213 | model.save(sess, model_path+"--"+str(iter)) 214 | lr *= 0.5 215 | 216 | def count_flops(graph): 217 | flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation()) 218 | print('FLOPs: {}'.format(flops.total_float_ops)) 219 | 220 | def count(): 221 | total_parameters = 0 222 | for variable in tf.trainable_variables(): 223 | # shape is an array of tf.Dimension 224 | shape = variable.get_shape() 225 | variable_parameters = 1 226 | for dim in shape: 227 | variable_parameters *= dim.value 228 | total_parameters += variable_parameters 229 | print("Prameter: ", total_parameters) 230 | 231 | def test( 232 | train_file = "local_train_splitByUser", 233 | test_file = "local_test_splitByUser", 234 | uid_voc = "uid_voc.pkl", 235 | mid_voc = "mid_voc.pkl", 236 | cat_voc = "cat_voc.pkl", 237 | batch_size = 128, 238 | maxlen = 100, 239 | model_type = 'DNN', 240 | seed = 2 241 | ): 242 | 243 | model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed) 244 | gpu_options = tf.GPUOptions(allow_growth=True) 245 | with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 246 | train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) 247 | test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen) 248 | n_uid, n_mid, n_cat = train_data.get_n() 249 | if model_type == 'DNN': 250 | model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 251 | elif model_type == 'PNN': 252 | model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 253 | elif model_type == 'Wide': 254 | model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 255 | elif model_type == 'DIN': 256 | model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 257 | elif model_type == 'DIN-V2-gru-att-gru': 258 | model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 259 | elif model_type == 'DIN-V2-gru-gru-att': 260 | model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 261 | elif model_type == 'DIN-V2-gru-qa-attGru': 262 | model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 263 | elif model_type == 'DIN-V2-gru-vec-attGru': 264 | model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 265 | elif model_type == 'DIEN': 266 | model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE) 267 | else: 268 | print ("Invalid model_type : %s", model_type) 269 | return 270 | model.restore(sess, model_path) 271 | print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path)) 272 | 273 | if __name__ == '__main__': 274 | if len(sys.argv) == 4: 275 | SEED = int(sys.argv[3]) 276 | else: 277 | SEED = 3 278 | tf.set_random_seed(SEED) 279 | numpy.random.seed(SEED) 280 | random.seed(SEED) 281 | if sys.argv[1] == 'train': 282 | train(model_type=sys.argv[2], seed=SEED) 283 | elif sys.argv[1] == 'test': 284 | test(model_type=sys.argv[2], seed=SEED) 285 | else: 286 | print('do nothing...') 287 | 288 | 289 | -------------------------------------------------------------------------------- /script/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops.rnn_cell import * 3 | from tensorflow.python.ops.rnn_cell_impl import _Linear 4 | #from tensorflow import keras 5 | from tensorflow.python.ops import math_ops 6 | from tensorflow.python.ops import init_ops 7 | from tensorflow.python.ops import array_ops 8 | from tensorflow.python.ops import variable_scope as vs 9 | #from keras import backend as K 10 | 11 | class QAAttGRUCell(RNNCell): 12 | """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). 13 | Args: 14 | num_units: int, The number of units in the GRU cell. 15 | activation: Nonlinearity to use. Default: `tanh`. 16 | reuse: (optional) Python boolean describing whether to reuse variables 17 | in an existing scope. If not `True`, and the existing scope already has 18 | the given variables, an error is raised. 19 | kernel_initializer: (optional) The initializer to use for the weight and 20 | projection matrices. 21 | bias_initializer: (optional) The initializer to use for the bias. 22 | """ 23 | 24 | def __init__(self, 25 | num_units, 26 | activation=None, 27 | reuse=None, 28 | kernel_initializer=None, 29 | bias_initializer=None): 30 | super(QAAttGRUCell, self).__init__(_reuse=reuse) 31 | self._num_units = num_units 32 | self._activation = activation or math_ops.tanh 33 | self._kernel_initializer = kernel_initializer 34 | self._bias_initializer = bias_initializer 35 | self._gate_linear = None 36 | self._candidate_linear = None 37 | 38 | @property 39 | def state_size(self): 40 | return self._num_units 41 | 42 | @property 43 | def output_size(self): 44 | return self._num_units 45 | 46 | def __call__(self, inputs, state, att_score): 47 | return self.call(inputs, state, att_score) 48 | 49 | def call(self, inputs, state, att_score=None): 50 | """Gated recurrent unit (GRU) with nunits cells.""" 51 | if self._gate_linear is None: 52 | bias_ones = self._bias_initializer 53 | if self._bias_initializer is None: 54 | bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) 55 | with vs.variable_scope("gates"): # Reset gate and update gate. 56 | self._gate_linear = _Linear( 57 | [inputs, state], 58 | 2 * self._num_units, 59 | True, 60 | bias_initializer=bias_ones, 61 | kernel_initializer=self._kernel_initializer) 62 | 63 | value = math_ops.sigmoid(self._gate_linear([inputs, state])) 64 | r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) 65 | 66 | r_state = r * state 67 | if self._candidate_linear is None: 68 | with vs.variable_scope("candidate"): 69 | self._candidate_linear = _Linear( 70 | [inputs, r_state], 71 | self._num_units, 72 | True, 73 | bias_initializer=self._bias_initializer, 74 | kernel_initializer=self._kernel_initializer) 75 | c = self._activation(self._candidate_linear([inputs, r_state])) 76 | new_h = (1. - att_score) * state + att_score * c 77 | return new_h, new_h 78 | 79 | class VecAttGRUCell(RNNCell): 80 | """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078). 81 | Args: 82 | num_units: int, The number of units in the GRU cell. 83 | activation: Nonlinearity to use. Default: `tanh`. 84 | reuse: (optional) Python boolean describing whether to reuse variables 85 | in an existing scope. If not `True`, and the existing scope already has 86 | the given variables, an error is raised. 87 | kernel_initializer: (optional) The initializer to use for the weight and 88 | projection matrices. 89 | bias_initializer: (optional) The initializer to use for the bias. 90 | """ 91 | 92 | def __init__(self, 93 | num_units, 94 | activation=None, 95 | reuse=None, 96 | kernel_initializer=None, 97 | bias_initializer=None): 98 | super(VecAttGRUCell, self).__init__(_reuse=reuse) 99 | self._num_units = num_units 100 | self._activation = activation or math_ops.tanh 101 | self._kernel_initializer = kernel_initializer 102 | self._bias_initializer = bias_initializer 103 | self._gate_linear = None 104 | self._candidate_linear = None 105 | 106 | @property 107 | def state_size(self): 108 | return self._num_units 109 | 110 | @property 111 | def output_size(self): 112 | return self._num_units 113 | def __call__(self, inputs, state, att_score): 114 | return self.call(inputs, state, att_score) 115 | def call(self, inputs, state, att_score=None): 116 | """Gated recurrent unit (GRU) with nunits cells.""" 117 | if self._gate_linear is None: 118 | bias_ones = self._bias_initializer 119 | if self._bias_initializer is None: 120 | bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype) 121 | with vs.variable_scope("gates"): # Reset gate and update gate. 122 | self._gate_linear = _Linear( 123 | [inputs, state], 124 | 2 * self._num_units, 125 | True, 126 | bias_initializer=bias_ones, 127 | kernel_initializer=self._kernel_initializer) 128 | 129 | value = math_ops.sigmoid(self._gate_linear([inputs, state])) 130 | r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) 131 | 132 | r_state = r * state 133 | if self._candidate_linear is None: 134 | with vs.variable_scope("candidate"): 135 | self._candidate_linear = _Linear( 136 | [inputs, r_state], 137 | self._num_units, 138 | True, 139 | bias_initializer=self._bias_initializer, 140 | kernel_initializer=self._kernel_initializer) 141 | c = self._activation(self._candidate_linear([inputs, r_state])) 142 | u = (1.0 - att_score) * u 143 | new_h = u * state + (1 - u) * c 144 | return new_h, new_h 145 | 146 | def prelu(_x, scope=''): 147 | """parametric ReLU activation""" 148 | with tf.variable_scope(name_or_scope=scope, default_name="prelu"): 149 | _alpha = tf.get_variable("prelu_"+scope, shape=_x.get_shape()[-1], 150 | dtype=_x.dtype, initializer=tf.constant_initializer(0.1)) 151 | return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x) 152 | 153 | def calc_auc(raw_arr): 154 | """Summary 155 | 156 | Args: 157 | raw_arr (TYPE): Description 158 | 159 | Returns: 160 | TYPE: Description 161 | """ 162 | 163 | arr = sorted(raw_arr, key=lambda d:d[0], reverse=True) 164 | pos, neg = 0., 0. 165 | for record in arr: 166 | if record[1] == 1.: 167 | pos += 1 168 | else: 169 | neg += 1 170 | 171 | fp, tp = 0., 0. 172 | xy_arr = [] 173 | for record in arr: 174 | if record[1] == 1.: 175 | tp += 1 176 | else: 177 | fp += 1 178 | xy_arr.append([fp/neg, tp/pos]) 179 | 180 | auc = 0. 181 | prev_x = 0. 182 | prev_y = 0. 183 | for x, y in xy_arr: 184 | if x != prev_x: 185 | auc += ((x - prev_x) * (y + prev_y) / 2.) 186 | prev_x = x 187 | prev_y = y 188 | 189 | return auc 190 | 191 | def attention(query, facts, attention_size, mask, stag='null', mode='LIST', softmax_stag=1, time_major=False, return_alphas=False): 192 | if isinstance(facts, tuple): 193 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 194 | facts = tf.concat(facts, 2) 195 | 196 | if time_major: 197 | # (T,B,D) => (B,T,D) 198 | facts = tf.array_ops.transpose(facts, [1, 0, 2]) 199 | 200 | mask = tf.equal(mask, tf.ones_like(mask)) 201 | hidden_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer 202 | input_size = query.get_shape().as_list()[-1] 203 | 204 | # Trainable parameters 205 | w1 = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1)) 206 | w2 = tf.Variable(tf.random_normal([input_size, attention_size], stddev=0.1)) 207 | b = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) 208 | v = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) 209 | 210 | with tf.name_scope('v'): 211 | # Applying fully connected layer with non-linear activation to each of the B*T timestamps; 212 | # the shape of `tmp` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size 213 | tmp1 = tf.tensordot(facts, w1, axes=1) 214 | tmp2 = tf.tensordot(query, w2, axes=1) 215 | tmp2 = tf.reshape(tmp2, [-1, 1, tf.shape(tmp2)[-1]]) 216 | tmp = tf.tanh((tmp1 + tmp2) + b) 217 | 218 | # For each of the timestamps its vector of size A from `tmp` is reduced with `v` vector 219 | v_dot_tmp = tf.tensordot(tmp, v, axes=1, name='v_dot_tmp') # (B,T) shape 220 | key_masks = mask # [B, 1, T] 221 | # key_masks = tf.expand_dims(mask, 1) # [B, 1, T] 222 | paddings = tf.ones_like(v_dot_tmp) * (-2 ** 32 + 1) 223 | v_dot_tmp = tf.where(key_masks, v_dot_tmp, paddings) # [B, 1, T] 224 | alphas = tf.nn.softmax(v_dot_tmp, name='alphas') # (B,T) shape 225 | 226 | # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape 227 | #output = tf.reduce_sum(facts * tf.expand_dims(alphas, -1), 1) 228 | output = facts * tf.expand_dims(alphas, -1) 229 | output = tf.reshape(output, tf.shape(facts)) 230 | # output = output / (facts.get_shape().as_list()[-1] ** 0.5) 231 | if not return_alphas: 232 | return output 233 | else: 234 | return output, alphas 235 | 236 | def din_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False): 237 | if isinstance(facts, tuple): 238 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 239 | facts = tf.concat(facts, 2) 240 | print ("querry_size mismatch") 241 | query = tf.concat(values = [ 242 | query, 243 | query, 244 | ], axis=1) 245 | 246 | if time_major: 247 | # (T,B,D) => (B,T,D) 248 | facts = tf.array_ops.transpose(facts, [1, 0, 2]) 249 | mask = tf.equal(mask, tf.ones_like(mask)) 250 | facts_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer 251 | querry_size = query.get_shape().as_list()[-1] 252 | queries = tf.tile(query, [1, tf.shape(facts)[1]]) 253 | queries = tf.reshape(queries, tf.shape(facts)) 254 | din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1) 255 | d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag) 256 | d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag) 257 | d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag) 258 | d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]]) 259 | scores = d_layer_3_all 260 | # Mask 261 | # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1]) # [B, T] 262 | key_masks = tf.expand_dims(mask, 1) # [B, 1, T] 263 | paddings = tf.ones_like(scores) * (-2 ** 32 + 1) 264 | scores = tf.where(key_masks, scores, paddings) # [B, 1, T] 265 | 266 | # Scale 267 | # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5) 268 | 269 | # Activation 270 | if softmax_stag: 271 | scores = tf.nn.softmax(scores) # [B, 1, T] 272 | 273 | # Weighted sum 274 | if mode == 'SUM': 275 | output = tf.matmul(scores, facts) # [B, 1, H] 276 | # output = tf.reshape(output, [-1, tf.shape(facts)[-1]]) 277 | else: 278 | scores = tf.reshape(scores, [-1, tf.shape(facts)[1]]) 279 | output = facts * tf.expand_dims(scores, -1) 280 | output = tf.reshape(output, tf.shape(facts)) 281 | return output 282 | 283 | def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False): 284 | if isinstance(facts, tuple): 285 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 286 | facts = tf.concat(facts, 2) 287 | if len(facts.get_shape().as_list()) == 2: 288 | facts = tf.expand_dims(facts, 1) 289 | 290 | if time_major: 291 | # (T,B,D) => (B,T,D) 292 | facts = tf.array_ops.transpose(facts, [1, 0, 2]) 293 | # Trainable parameters 294 | mask = tf.equal(mask, tf.ones_like(mask)) 295 | facts_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer 296 | querry_size = query.get_shape().as_list()[-1] 297 | query = tf.layers.dense(query, facts_size, activation=None, name='f1' + stag) 298 | query = prelu(query) 299 | queries = tf.tile(query, [1, tf.shape(facts)[1]]) 300 | queries = tf.reshape(queries, tf.shape(facts)) 301 | din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1) 302 | d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag) 303 | d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag) 304 | d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag) 305 | d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]]) 306 | scores = d_layer_3_all 307 | # Mask 308 | # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1]) # [B, T] 309 | key_masks = tf.expand_dims(mask, 1) # [B, 1, T] 310 | paddings = tf.ones_like(scores) * (-2 ** 32 + 1) 311 | if not forCnn: 312 | scores = tf.where(key_masks, scores, paddings) # [B, 1, T] 313 | 314 | # Scale 315 | # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5) 316 | 317 | # Activation 318 | if softmax_stag: 319 | scores = tf.nn.softmax(scores) # [B, 1, T] 320 | 321 | # Weighted sum 322 | if mode == 'SUM': 323 | output = tf.matmul(scores, facts) # [B, 1, H] 324 | # output = tf.reshape(output, [-1, tf.shape(facts)[-1]]) 325 | else: 326 | scores = tf.reshape(scores, [-1, tf.shape(facts)[1]]) 327 | output = facts * tf.expand_dims(scores, -1) 328 | output = tf.reshape(output, tf.shape(facts)) 329 | if return_alphas: 330 | return output, scores 331 | return output 332 | 333 | def self_attention(facts, ATTENTION_SIZE, mask, stag='null'): 334 | if len(facts.get_shape().as_list()) == 2: 335 | facts = tf.expand_dims(facts, 1) 336 | 337 | def cond(batch, output, i): 338 | return tf.less(i, tf.shape(batch)[1]) 339 | 340 | def body(batch, output, i): 341 | self_attention_tmp = din_fcn_attention(batch[:, i, :], batch[:, 0:i+1, :], 342 | ATTENTION_SIZE, mask[:, 0:i+1], softmax_stag=1, stag=stag, 343 | mode='LIST') 344 | self_attention_tmp = tf.reduce_sum(self_attention_tmp, 1) 345 | output = output.write(i, self_attention_tmp) 346 | return batch, output, i + 1 347 | 348 | output_ta = tf.TensorArray(dtype=tf.float32, 349 | size=0, 350 | dynamic_size=True, 351 | element_shape=(facts[:, 0, :].get_shape())) 352 | _, output_op, _ = tf.while_loop(cond, body, [facts, output_ta, 0]) 353 | self_attention = output_op.stack() 354 | self_attention = tf.transpose(self_attention, perm = [1, 0, 2]) 355 | return self_attention 356 | 357 | def self_all_attention(facts, ATTENTION_SIZE, mask, stag='null'): 358 | if len(facts.get_shape().as_list()) == 2: 359 | facts = tf.expand_dims(facts, 1) 360 | 361 | def cond(batch, output, i): 362 | return tf.less(i, tf.shape(batch)[1]) 363 | 364 | def body(batch, output, i): 365 | self_attention_tmp = din_fcn_attention(batch[:, i, :], batch, 366 | ATTENTION_SIZE, mask, softmax_stag=1, stag=stag, 367 | mode='LIST') 368 | self_attention_tmp = tf.reduce_sum(self_attention_tmp, 1) 369 | output = output.write(i, self_attention_tmp) 370 | return batch, output, i + 1 371 | 372 | output_ta = tf.TensorArray(dtype=tf.float32, 373 | size=0, 374 | dynamic_size=True, 375 | element_shape=(facts[:, 0, :].get_shape())) 376 | _, output_op, _ = tf.while_loop(cond, body, [facts, output_ta, 0]) 377 | self_attention = output_op.stack() 378 | self_attention = tf.transpose(self_attention, perm = [1, 0, 2]) 379 | return self_attention 380 | 381 | def din_fcn_shine(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False): 382 | if isinstance(facts, tuple): 383 | # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. 384 | facts = tf.concat(facts, 2) 385 | 386 | if time_major: 387 | # (T,B,D) => (B,T,D) 388 | facts = tf.array_ops.transpose(facts, [1, 0, 2]) 389 | # Trainable parameters 390 | mask = tf.equal(mask, tf.ones_like(mask)) 391 | facts_size = facts.get_shape().as_list()[-1] # D value - hidden size of the RNN layer 392 | querry_size = query.get_shape().as_list()[-1] 393 | query = tf.layers.dense(query, facts_size, activation=None, name='f1_trans_shine' + stag) 394 | query = prelu(query) 395 | queries = tf.tile(query, [1, tf.shape(facts)[1]]) 396 | queries = tf.reshape(queries, tf.shape(facts)) 397 | din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1) 398 | d_layer_1_all = tf.layers.dense(din_all, facts_size, activation=tf.nn.sigmoid, name='f1_shine_att' + stag) 399 | d_layer_2_all = tf.layers.dense(d_layer_1_all, facts_size, activation=tf.nn.sigmoid, name='f2_shine_att' + stag) 400 | d_layer_2_all = tf.reshape(d_layer_2_all, tf.shape(facts)) 401 | output = d_layer_2_all 402 | return output 403 | 404 | --------------------------------------------------------------------------------