├── .DS_Store
├── .gitignore
├── README.md
├── prepare_data.sh
└── script
    ├── .DS_Store
    ├── Dice.py
    ├── calc_ckpt.py
    ├── data_iterator.py
    ├── generate_voc.py
    ├── generate_voc.py.bk
    ├── local_aggretor.py
    ├── model.py
    ├── model_avazu.py
    ├── process_data.py
    ├── rnn.py
    ├── shuffle.py
    ├── split_by_user.py
    ├── train.py
    └── utils.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CAN-Paper/Co-Action-Network/b8d984bee12e129cdb5dc323548e671978c6db48/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Co-Action Network
 2 | 
 3 | Implementation of paper "CAN: Revisiting Feature Co-Action for Click Through Rate Prediction".
 4 | 
 5 | paper: [arxiv (to be released)]()
 6 | 
 7 | ## Installation
 8 | dependences：
 9 | 
10 | tensorflow：1.4.1
11 | 
12 | python: 2.7
13 | 
14 | higher version of tensorflow and python3 will be supported soon!
15 | 
16 | ## Getting Started
17 | training:
18 | 
19 | CUDA_VISIBLE_DEVICES=0  python  script/train.py train {model}
20 | 
21 | model: CAN,Cartesion,PNN, etc. (check the train.py)
22 | 
23 | ## Citation
24 | ## Contact
25 | ## License
26 | 


--------------------------------------------------------------------------------
/prepare_data.sh:
--------------------------------------------------------------------------------
 1 | export PATH="~/anaconda4/bin:$PATH"
 2 | wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Books.json.gz
 3 | wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Books.json.gz
 4 | gunzip reviews_Books.json.gz
 5 | gunzip meta_Books.json.gz
 6 | python script/process_data.py meta_Books.json reviews_Books_5.json
 7 | python script/local_aggretor.py
 8 | python script/split_by_user.py
 9 | python script/generate_voc.py
10 | 


--------------------------------------------------------------------------------
/script/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CAN-Paper/Co-Action-Network/b8d984bee12e129cdb5dc323548e671978c6db48/script/.DS_Store


--------------------------------------------------------------------------------
/script/Dice.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def dice(_x, axis=-1, epsilon=0.000000001, name=''):
 4 |   with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
 5 |     alphas = tf.get_variable('alpha'+name, _x.get_shape()[-1],
 6 |                          initializer=tf.constant_initializer(0.0),
 7 |                          dtype=tf.float32)
 8 |     input_shape = list(_x.get_shape())
 9 | 
10 |     reduction_axes = list(range(len(input_shape)))
11 |     del reduction_axes[axis]
12 |     broadcast_shape = [1] * len(input_shape)
13 |     broadcast_shape[axis] = input_shape[axis]
14 | 
15 |   # case: train mode (uses stats of the current batch)
16 |   mean = tf.reduce_mean(_x, axis=reduction_axes)
17 |   brodcast_mean = tf.reshape(mean, broadcast_shape)
18 |   std = tf.reduce_mean(tf.square(_x - brodcast_mean) + epsilon, axis=reduction_axes)
19 |   std = tf.sqrt(std)
20 |   brodcast_std = tf.reshape(std, broadcast_shape)
21 |   x_normed = (_x - brodcast_mean) / (brodcast_std + epsilon)
22 |   # x_normed = tf.layers.batch_normalization(_x, center=False, scale=False)
23 |   x_p = tf.sigmoid(x_normed)
24 | 
25 | 
26 |   return alphas * (1.0 - x_p) * _x + x_p * _x
27 | 
28 | def parametric_relu(_x):
29 |   alphas = tf.get_variable('alpha', _x.get_shape()[-1],
30 |                        initializer=tf.constant_initializer(0.0),
31 |                        dtype=tf.float32)
32 |   pos = tf.nn.relu(_x)
33 |   neg = alphas * (_x - abs(_x)) * 0.5
34 | 
35 |   return pos + neg
36 | 


--------------------------------------------------------------------------------
/script/calc_ckpt.py:
--------------------------------------------------------------------------------
 1 | 
 2 | ckpt = tf.train.get_checkpoint_state("./ckpt_path/").model_checkpoint_path
 3 | saver = tf.train.import_meta_graph(ckpt+'.meta')
 4 | variables = tf.trainable_variables()
 5 | total_parameters = 0
 6 | for variable in variables:
 7 |     shape = variable.get_shape()
 8 |     variable_parameters = 1
 9 |     for dim in shape:
10 |         # print(dim)
11 |         variable_parameters *= dim.value
12 |     # print(variable_parameters)
13 |     total_parameters += variable_parameters
14 | print(total_parameters)
15 | 


--------------------------------------------------------------------------------
/script/data_iterator.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import json
  3 | import cPickle as pkl
  4 | import random
  5 | 
  6 | import gzip
  7 | 
  8 | import shuffle
  9 | 
 10 | def unicode_to_utf8(d):
 11 |     return dict((key.encode("UTF-8"), value) for (key,value) in d.items())
 12 | def dict_unicode_to_utf8(d):
 13 |     return dict(((key[0].encode("UTF-8"), key[1].encode("UTF-8")), value) for (key,value) in d.items())
 14 | 
 15 | def load_dict(filename):
 16 |     try:
 17 |         with open(filename, 'rb') as f:
 18 |             return unicode_to_utf8(json.load(f))
 19 |     except:
 20 |         try:
 21 |             with open(filename, 'rb') as f:
 22 |                 return unicode_to_utf8(pkl.load(f))
 23 |         except:
 24 |             with open(filename, 'rb') as f:
 25 |                 return dict_unicode_to_utf8(pkl.load(f))
 26 | 
 27 | 
 28 | def fopen(filename, mode='r'):
 29 |     if filename.endswith('.gz'):
 30 |         return gzip.open(filename, mode)
 31 |     return open(filename, mode)
 32 | 
 33 | 
 34 | class DataIterator:
 35 | 
 36 |     def __init__(self, source,
 37 |                  uid_voc,
 38 |                  mid_voc,
 39 |                  cat_voc,
 40 |                  batch_size=128,
 41 |                  maxlen=100,
 42 |                  skip_empty=False,
 43 |                  shuffle_each_epoch=False,
 44 |                  sort_by_length=True,
 45 |                  max_batch_size=20,
 46 |                  minlen=None,
 47 |                  label_type=1):
 48 |         if shuffle_each_epoch:
 49 |             self.source_orig = source
 50 |             self.source = shuffle.main(self.source_orig, temporary=True)
 51 |         else:
 52 |             self.source = fopen(source, 'r')
 53 |         self.source_dicts = []
 54 |         #for source_dict in [uid_voc, mid_voc, cat_voc, cat_voc, cat_voc]:# 'item_carte_voc.pkl', 'cate_carte_voc.pkl']:
 55 |         for source_dict in [uid_voc, mid_voc, cat_voc, 'item_carte_voc.pkl', 'cate_carte_voc.pkl']:
 56 |             self.source_dicts.append(load_dict(source_dict))
 57 | 
 58 |         f_meta = open("item-info", "r")
 59 |         meta_map = {}
 60 |         for line in f_meta:
 61 |             arr = line.strip().split("\t")
 62 |             if arr[0] not in meta_map:
 63 |                 meta_map[arr[0]] = arr[1]
 64 |         self.meta_id_map ={}
 65 |         for key in meta_map:
 66 |             val = meta_map[key]
 67 |             if key in self.source_dicts[1]:
 68 |                 mid_idx = self.source_dicts[1][key]
 69 |             else:
 70 |                 mid_idx = 0
 71 |             if val in self.source_dicts[2]:
 72 |                 cat_idx = self.source_dicts[2][val]
 73 |             else:
 74 |                 cat_idx = 0
 75 |             self.meta_id_map[mid_idx] = cat_idx
 76 | 
 77 |         f_review = open("reviews-info", "r")
 78 |         self.mid_list_for_random = []
 79 |         for line in f_review:
 80 |             arr = line.strip().split("\t")
 81 |             tmp_idx = 0
 82 |             if arr[1] in self.source_dicts[1]:
 83 |                 tmp_idx = self.source_dicts[1][arr[1]]
 84 |             self.mid_list_for_random.append(tmp_idx)
 85 | 
 86 |         self.batch_size = batch_size
 87 |         self.maxlen = maxlen
 88 |         self.minlen = minlen
 89 |         self.skip_empty = skip_empty
 90 | 
 91 |         self.n_uid = len(self.source_dicts[0])
 92 |         self.n_mid = len(self.source_dicts[1])
 93 |         self.n_cat = len(self.source_dicts[2])
 94 |         self.n_carte = [len(self.source_dicts[3]), len(self.source_dicts[4])]
 95 |         print("n_uid=%d, n_mid=%d, n_cat=%d" % (self.n_uid, self.n_mid, self.n_cat))
 96 | 
 97 |         self.shuffle = shuffle_each_epoch
 98 |         self.sort_by_length = sort_by_length
 99 | 
100 |         self.source_buffer = []
101 |         self.k = batch_size * max_batch_size
102 | 
103 |         self.end_of_data = False
104 |         self.label_type = label_type
105 | 
106 |     def get_n(self):
107 |         return self.n_uid, self.n_mid, self.n_cat, self.n_carte
108 | 
109 |     def __iter__(self):
110 |         return self
111 | 
112 |     def reset(self):
113 |         if self.shuffle:
114 |             self.source= shuffle.main(self.source_orig, temporary=True)
115 |         else:
116 |             self.source.seek(0)
117 | 
118 |     def next(self):
119 |         if self.end_of_data:
120 |             self.end_of_data = False
121 |             self.reset()
122 |             raise StopIteration
123 | 
124 |         source = []
125 |         target = []
126 | 
127 |         if len(self.source_buffer) == 0:
128 |             for k_ in xrange(self.k):
129 |                 ss = self.source.readline()
130 |                 if ss == "":
131 |                     break
132 |                 self.source_buffer.append(ss.strip("\n").split("\t"))
133 | 
134 |             # sort by  history behavior length
135 |             if self.sort_by_length:
136 |                 his_length = numpy.array([len(s[4].split("")) for s in self.source_buffer])
137 |                 tidx = his_length.argsort()
138 | 
139 |                 _sbuf = [self.source_buffer[i] for i in tidx]
140 |                 self.source_buffer = _sbuf
141 |             else:
142 |                 self.source_buffer.reverse()
143 | 
144 |         if len(self.source_buffer) == 0:
145 |             self.end_of_data = False
146 |             self.reset()
147 |             raise StopIteration
148 | 
149 |         try:
150 | 
151 |             # actual work here
152 |             while True:
153 | 
154 |                 # read from source file and map to word index
155 |                 try:
156 |                     ss = self.source_buffer.pop()
157 |                 except IndexError:
158 |                     break
159 | 
160 |                 uid = self.source_dicts[0][ss[1]] if ss[1] in self.source_dicts[0] else 0
161 |                 mid = self.source_dicts[1][ss[2]] if ss[2] in self.source_dicts[1] else 0
162 |                 cat = self.source_dicts[2][ss[3]] if ss[3] in self.source_dicts[2] else 0
163 | 
164 |                 tmp = []
165 |                 item_carte = []
166 |                 for fea in ss[4].split(""):
167 |                     m = self.source_dicts[1][fea] if fea in self.source_dicts[1] else 0
168 |                     tmp.append(m)
169 |                     i_c = self.source_dicts[3][(ss[2], fea)] if (ss[2], fea) in self.source_dicts[3] else 0
170 |                     item_carte.append(i_c)
171 |                 mid_list = tmp
172 | 
173 |                 tmp1 = []
174 |                 cate_carte = []
175 |                 for fea in ss[5].split(""):
176 |                     c = self.source_dicts[2][fea] if fea in self.source_dicts[2] else 0
177 |                     tmp1.append(c)
178 |                     c_c = self.source_dicts[4][(ss[3], fea)] if (ss[3], fea) in self.source_dicts[4] else 0
179 |                     cate_carte.append(c_c)
180 |                 cat_list = tmp1
181 | 
182 |                 # read from source file and map to word index
183 | 
184 |                 if self.minlen != None:
185 |                     if len(mid_list) <= self.minlen:
186 |                         continue
187 |                 if self.skip_empty and (not mid_list):
188 |                     continue
189 | 
190 |                 noclk_mid_list = []
191 |                 noclk_cat_list = []
192 |                 for pos_mid in mid_list:
193 |                     noclk_tmp_mid = []
194 |                     noclk_tmp_cat = []
195 |                     noclk_index = 0
196 |                     while True:
197 |                         noclk_mid_indx = random.randint(0, len(self.mid_list_for_random)-1)
198 |                         noclk_mid = self.mid_list_for_random[noclk_mid_indx]
199 |                         if noclk_mid == pos_mid:
200 |                             continue
201 |                         noclk_tmp_mid.append(noclk_mid)
202 |                         noclk_tmp_cat.append(self.meta_id_map[noclk_mid])
203 |                         noclk_index += 1
204 |                         if noclk_index >= 5:
205 |                             break
206 |                     noclk_mid_list.append(noclk_tmp_mid)
207 |                     noclk_cat_list.append(noclk_tmp_cat)
208 |                 carte_list = [item_carte, cate_carte]
209 |                 source.append([uid, mid, cat, mid_list, cat_list, noclk_mid_list, noclk_cat_list, carte_list])
210 |                 if self.label_type == 1:
211 |                     target.append([float(ss[0])])
212 |                 else:
213 |                     target.append([float(ss[0]), 1-float(ss[0])])
214 | 
215 |                 if len(source) >= self.batch_size or len(target) >= self.batch_size:
216 |                     break
217 |         except IOError:
218 |             self.end_of_data = True
219 | 
220 |         # all sentence pairs in maxibatch filtered out because of length
221 |         if len(source) == 0 or len(target) == 0:
222 |             source, target = self.next()
223 | 
224 |         return source, target
225 | 
226 | 
227 | 


--------------------------------------------------------------------------------
/script/generate_voc.py:
--------------------------------------------------------------------------------
 1 | import cPickle
 2 | 
 3 | f_train = open("local_train_splitByUser", "r")
 4 | uid_dict = {}
 5 | mid_dict = {}
 6 | cat_dict = {}
 7 | item_carte_dict = {}
 8 | cate_carte_dict = {}
 9 | 
10 | iddd = 0
11 | for line in f_train:
12 |     arr = line.strip("\n").split("\t")
13 |     clk = arr[0]
14 |     uid = arr[1]
15 |     mid = arr[2]
16 |     cat = arr[3]
17 |     mid_list = arr[4]
18 |     cat_list = arr[5]
19 |     if uid not in uid_dict:
20 |         uid_dict[uid] = 0
21 |     uid_dict[uid] += 1
22 |     if mid not in mid_dict:
23 |         mid_dict[mid] = 0
24 |     mid_dict[mid] += 1
25 |     if cat not in cat_dict:
26 |         cat_dict[cat] = 0
27 |     cat_dict[cat] += 1
28 |     if len(mid_list) == 0:
29 |         continue
30 |     for m in mid_list.split(""):
31 |         if m not in mid_dict:
32 |             mid_dict[m] = 0
33 |         mid_dict[m] += 1
34 |         if (mid, m) not in item_carte_dict:
35 |             item_carte_dict[(mid, m)] = 0
36 |         item_carte_dict[(mid, m)] += 1
37 |     #print iddd
38 |     iddd+=1
39 |     for c in cat_list.split(""):
40 |         if c not in cat_dict:
41 |             cat_dict[c] = 0
42 |         cat_dict[c] += 1
43 |         if (cat, c) not in cate_carte_dict:
44 |             cate_carte_dict[(cat, c)] = 0
45 |         cate_carte_dict[(cat, c)] += 1
46 | 
47 | sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True)
48 | sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True)
49 | sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True)
50 | sorted_item_carte_dict = sorted(item_carte_dict.iteritems(), key=lambda x:x[1], reverse=True)
51 | sorted_cate_carte_dict = sorted(cate_carte_dict.iteritems(), key=lambda x:x[1], reverse=True)
52 | 
53 | uid_voc = {}
54 | index = 0
55 | for key, value in sorted_uid_dict:
56 |     uid_voc[key] = index
57 |     index += 1
58 | 
59 | mid_voc = {}
60 | mid_voc["default_mid"] = 0
61 | index = 1
62 | for key, value in sorted_mid_dict:
63 |     mid_voc[key] = index
64 |     index += 1
65 | 
66 | cat_voc = {}
67 | cat_voc["default_cat"] = 0
68 | index = 1
69 | for key, value in sorted_cat_dict:
70 |     cat_voc[key] = index
71 |     index += 1
72 | 
73 | item_carte_voc = {}
74 | item_carte_voc["default_item_carte"] = 0
75 | index = 1
76 | for key, value in sorted_item_carte_dict:
77 |     item_carte_voc[key] = index
78 |     index += 1
79 | 
80 | cate_carte_voc = {}
81 | cate_carte_voc["default_cate_carte"] = 0
82 | index = 1
83 | for key, value in sorted_cate_carte_dict:
84 |     cate_carte_voc[key] = index
85 |     index += 1
86 | 
87 | cPickle.dump(uid_voc, open("uid_voc.pkl", "w"))
88 | cPickle.dump(mid_voc, open("mid_voc.pkl", "w"))
89 | cPickle.dump(cat_voc, open("cat_voc.pkl", "w"))
90 | cPickle.dump(item_carte_voc, open("item_carte_voc.pkl", "w"))
91 | cPickle.dump(cate_carte_voc, open("cate_carte_voc.pkl", "w"))
92 | 


--------------------------------------------------------------------------------
/script/generate_voc.py.bk:
--------------------------------------------------------------------------------
 1 | import cPickle
 2 | 
 3 | f_train = open("local_train_splitByUser", "r")
 4 | uid_dict = {}
 5 | mid_dict = {}
 6 | cat_dict = {}
 7 | 
 8 | iddd = 0
 9 | for line in f_train:
10 |     arr = line.strip("\n").split("\t")
11 |     clk = arr[0]
12 |     uid = arr[1]
13 |     mid = arr[2]
14 |     cat = arr[3]
15 |     mid_list = arr[4]
16 |     cat_list = arr[5]
17 |     if uid not in uid_dict:
18 |         uid_dict[uid] = 0
19 |     uid_dict[uid] += 1
20 |     if mid not in mid_dict:
21 |         mid_dict[mid] = 0
22 |     mid_dict[mid] += 1
23 |     if cat not in cat_dict:
24 |         cat_dict[cat] = 0
25 |     cat_dict[cat] += 1
26 |     if len(mid_list) == 0:
27 |         continue
28 |     for m in mid_list.split(""):
29 |         if m not in mid_dict:
30 |             mid_dict[m] = 0
31 |         mid_dict[m] += 1
32 |     #print iddd
33 |     iddd+=1
34 |     for c in cat_list.split(""):
35 |         if c not in cat_dict:
36 |             cat_dict[c] = 0
37 |         cat_dict[c] += 1
38 | 
39 | sorted_uid_dict = sorted(uid_dict.iteritems(), key=lambda x:x[1], reverse=True)
40 | sorted_mid_dict = sorted(mid_dict.iteritems(), key=lambda x:x[1], reverse=True)
41 | sorted_cat_dict = sorted(cat_dict.iteritems(), key=lambda x:x[1], reverse=True)
42 | 
43 | uid_voc = {}
44 | index = 0
45 | for key, value in sorted_uid_dict:
46 |     uid_voc[key] = index
47 |     index += 1
48 | 
49 | mid_voc = {}
50 | mid_voc["default_mid"] = 0
51 | index = 1
52 | for key, value in sorted_mid_dict:
53 |     mid_voc[key] = index
54 |     index += 1
55 | 
56 | cat_voc = {}
57 | cat_voc["default_cat"] = 0
58 | index = 1
59 | for key, value in sorted_cat_dict:
60 |     cat_voc[key] = index
61 |     index += 1
62 | 
63 | cPickle.dump(uid_voc, open("uid_voc.pkl", "w"))
64 | cPickle.dump(mid_voc, open("mid_voc.pkl", "w"))
65 | cPickle.dump(cat_voc, open("cat_voc.pkl", "w"))
66 | 


--------------------------------------------------------------------------------
/script/local_aggretor.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import hashlib
 3 | import random
 4 | 
 5 | fin = open("jointed-new-split-info", "r")
 6 | ftrain = open("local_train", "w")
 7 | ftest = open("local_test", "w")
 8 | 
 9 | last_user = "0"
10 | common_fea = ""
11 | line_idx = 0
12 | for line in fin:
13 |     items = line.strip().split("\t")
14 |     ds = items[0]
15 |     clk = int(items[1])
16 |     user = items[2]
17 |     movie_id = items[3]
18 |     dt = items[5]
19 |     cat1 = items[6]
20 | 
21 |     if ds=="20180118":
22 |         fo = ftrain
23 |     else:
24 |         fo = ftest
25 |     if user != last_user:
26 |         movie_id_list = []
27 |         cate1_list = []
28 |         #print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + "" + "\t" + "" 
29 |     else:
30 |         history_clk_num = len(movie_id_list)
31 |         cat_str = ""
32 |         mid_str = ""
33 |         for c1 in cate1_list:
34 |             cat_str += c1 + ""
35 |         for mid in movie_id_list:
36 |             mid_str += mid + ""
37 |         if len(cat_str) > 0: cat_str = cat_str[:-1]
38 |         if len(mid_str) > 0: mid_str = mid_str[:-1]
39 |         if history_clk_num >= 1:    # 8 is the average length of user behavior
40 |             print >> fo, items[1] + "\t" + user + "\t" + movie_id + "\t" + cat1 +"\t" + mid_str + "\t" + cat_str
41 |     last_user = user
42 |     if clk:
43 |         movie_id_list.append(movie_id)
44 |         cate1_list.append(cat1)                
45 |     line_idx += 1
46 | 


--------------------------------------------------------------------------------
/script/model.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.ops.rnn_cell import GRUCell
  3 | from tensorflow.python.ops.rnn_cell import LSTMCell
  4 | from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn
  5 | #from tensorflow.python.ops.rnn import dynamic_rnn
  6 | from rnn import dynamic_rnn
  7 | from utils import *
  8 | from Dice import dice
  9 | 
 10 | #### CAN config #####
 11 | weight_emb_w = [[16, 8], [8,4]] 
 12 | weight_emb_b = [0, 0]
 13 | print(weight_emb_w, weight_emb_b)
 14 | orders = 3
 15 | order_indep = False # True
 16 | WEIGHT_EMB_DIM = (sum([w[0]*w[1] for w in weight_emb_w]) + sum(weight_emb_b)) #* orders
 17 | INDEP_NUM = 1
 18 | if order_indep:
 19 |     INDEP_NUM *= orders
 20 | 
 21 | print("orders: ",orders)
 22 | CALC_MODE = "can"
 23 | device = '/gpu:0'
 24 | #### CAN config #####
 25 | 
 26 | def gen_coaction(ad, his_items, dim, mode="can", mask=None):
 27 |     weight, bias = [], []
 28 |     idx = 0
 29 |     weight_orders = []
 30 |     bias_orders = []
 31 |     for i in range(orders):
 32 |         for w, b in zip(weight_emb_w, weight_emb_b):
 33 |             weight.append(tf.reshape(ad[:, idx:idx+w[0]*w[1]], [-1, w[0], w[1]]))
 34 |             idx += w[0] * w[1]
 35 |             if b == 0:
 36 |                 bias.append(None)
 37 |             else:
 38 |                 bias.append(tf.reshape(ad[:, idx:idx+b], [-1, 1, b]))
 39 |                 idx += b
 40 |         weight_orders.append(weight)
 41 |         bias_orders.append(bias)
 42 |         if not order_indep:
 43 |             break
 44 |  
 45 |     if mode == "can":
 46 |         out_seq = []
 47 |         hh = []
 48 |         for i in range(orders):
 49 |             hh.append(his_items**(i+1))
 50 |         #hh = [sum(hh)]
 51 |         for i, h in enumerate(hh):
 52 |             if order_indep:
 53 |                 weight, bias = weight_orders[i], bias_orders[i]
 54 |             else:
 55 |                 weight, bias = weight_orders[0], bias_orders[0]
 56 |             for j, (w, b) in enumerate(zip(weight, bias)):
 57 |                 h  = tf.matmul(h, w)
 58 |                 if b is not None:
 59 |                     h = h + b
 60 |                 if j != len(weight)-1:
 61 |                     h = tf.nn.tanh(h)
 62 |                 out_seq.append(h)
 63 |         out_seq = tf.concat(out_seq, 2)
 64 |         if mask is not None:
 65 |             mask = tf.expand_dims(mask, axis=-1) 
 66 |             out_seq = out_seq * mask
 67 |     out = tf.reduce_sum(out_seq, 1)
 68 |     if keep_fake_carte_seq and mode=="emb":
 69 |         return out, out_seq
 70 |     return out, None
 71 | 
 72 | class Model(object):
 73 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling = False, use_softmax=True, use_coaction=False, use_cartes=False):
 74 |         with tf.name_scope('Inputs'):
 75 |             self.mid_his_batch_ph = tf.placeholder(tf.int32, [None, None], name='mid_his_batch_ph')
 76 |             self.cate_his_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_his_batch_ph')
 77 |             self.uid_batch_ph = tf.placeholder(tf.int32, [None, ], name='uid_batch_ph')
 78 |             self.mid_batch_ph = tf.placeholder(tf.int32, [None, ], name='mid_batch_ph')
 79 |             self.cate_batch_ph = tf.placeholder(tf.int32, [None, ], name='cate_batch_ph')
 80 |             self.mask = tf.placeholder(tf.float32, [None, None], name='mask')
 81 |             self.seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph')
 82 |             self.target_ph = tf.placeholder(tf.float32, [None, None], name='target_ph')
 83 |             self.carte_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='carte_ph')
 84 |             self.lr = tf.placeholder(tf.float64, [])
 85 |             self.use_negsampling =use_negsampling
 86 |             self.use_softmax = False #use_softmax
 87 |             self.use_coaction = use_coaction
 88 |             self.use_cartes = use_cartes
 89 |             print("args:")
 90 |             print("negsampling: ", self.use_negsampling)
 91 |             print("softmax: ", self.use_softmax)
 92 |             print("co-action: ", self.use_coaction)
 93 |             print("carte: ", self.use_cartes)
 94 |             if use_negsampling:
 95 |                 self.noclk_mid_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_mid_batch_ph') #generate 3 item IDs from negative sampling.
 96 |                 self.noclk_cate_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_cate_batch_ph')
 97 | 
 98 |         # Embedding layer
 99 |         with tf.name_scope('Embedding_layer'):
100 |             self.uid_embeddings_var = tf.get_variable("uid_embedding_var", [n_uid, EMBEDDING_DIM])
101 |             tf.summary.histogram('uid_embeddings_var', self.uid_embeddings_var)
102 |             self.uid_batch_embedded = tf.nn.embedding_lookup(self.uid_embeddings_var, self.uid_batch_ph)
103 | 
104 |             self.mid_embeddings_var = tf.get_variable("mid_embedding_var", [n_mid, EMBEDDING_DIM])
105 |             tf.summary.histogram('mid_embeddings_var', self.mid_embeddings_var)
106 |             self.mid_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_batch_ph)
107 |             self.mid_his_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_his_batch_ph)
108 |             if self.use_negsampling:
109 |                 self.noclk_mid_his_batch_embedded = tf.nn.embedding_lookup(self.mid_embeddings_var, self.noclk_mid_batch_ph)
110 | 
111 |             self.cate_embeddings_var = tf.get_variable("cate_embedding_var", [n_cate, EMBEDDING_DIM])
112 |             tf.summary.histogram('cate_embeddings_var', self.cate_embeddings_var)
113 |             self.cate_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.cate_batch_ph)
114 |             self.cate_his_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.cate_his_batch_ph)
115 |             if self.use_negsampling:
116 |                 self.noclk_cate_his_batch_embedded = tf.nn.embedding_lookup(self.cate_embeddings_var, self.noclk_cate_batch_ph)
117 | 
118 |             if self.use_cartes:
119 |                 self.carte_embedding_vars = []
120 |                 self.carte_batch_embedded = []
121 |                 with tf.device(device):
122 |                     for i, num in enumerate(n_carte):
123 |                         print("carte num:", num)
124 |                         self.carte_embedding_vars.append(tf.get_variable("carte_embedding_var_{}".format(i), [num, EMBEDDING_DIM], trainable=True))
125 |                         self.carte_batch_embedded.append(tf.nn.embedding_lookup(self.carte_embedding_vars[i], self.carte_batch_ph[:,i,:]))
126 | 
127 |             ###  co-action ###
128 |             if self.use_coaction:
129 |                 ph_dict = {
130 |                     "item": [self.mid_batch_ph, self.mid_his_batch_ph, self.mid_his_batch_embedded],
131 |                     "cate": [self.cate_batch_ph, self.cate_his_batch_ph, self.cate_his_batch_embedded]
132 |                 }
133 |                 self.mlp_batch_embedded = []
134 |                 with tf.device(device):
135 |                     self.item_mlp_embeddings_var = tf.get_variable("item_mlp_embedding_var", [n_mid, INDEP_NUM * WEIGHT_EMB_DIM], trainable=True)
136 |                     self.cate_mlp_embeddings_var = tf.get_variable("cate_mlp_embedding_var", [n_cate, INDEP_NUM * WEIGHT_EMB_DIM], trainable=True)
137 | 
138 |                     self.mlp_batch_embedded.append(tf.nn.embedding_lookup(self.item_mlp_embeddings_var, ph_dict['item'][0]))
139 |                     self.mlp_batch_embedded.append(tf.nn.embedding_lookup(self.cate_mlp_embeddings_var, ph_dict['cate'][0]))
140 | 
141 |                     self.input_batch_embedded = []
142 |                     self.item_input_embeddings_var = tf.get_variable("item_input_embedding_var", [n_mid, weight_emb_w[0][0] * INDEP_NUM], trainable=True)
143 |                     self.cate_input_embeddings_var = tf.get_variable("cate_input_embedding_var", [n_cate, weight_emb_w[0][0] * INDEP_NUM], trainable=True)
144 |                     self.input_batch_embedded.append(tf.nn.embedding_lookup(self.item_input_embeddings_var, ph_dict['item'][1]))
145 |                     self.input_batch_embedded.append(tf.nn.embedding_lookup(self.cate_input_embeddings_var, ph_dict['cate'][1]))
146 | 
147 |         self.item_eb = tf.concat([self.mid_batch_embedded, self.cate_batch_embedded], 1)
148 |         self.item_his_eb = tf.concat([self.mid_his_batch_embedded, self.cate_his_batch_embedded], 2)
149 |         self.item_his_eb_sum = tf.reduce_sum(self.item_his_eb, 1)
150 |         if self.use_negsampling:
151 |             self.noclk_item_his_eb = tf.concat(
152 |                 [self.noclk_mid_his_batch_embedded[:, :, 0, :], self.noclk_cate_his_batch_embedded[:, :, 0, :]], -1)# 0 means only using the first negative item ID. 3 item IDs are inputed in the line 24.
153 |             self.noclk_item_his_eb = tf.reshape(self.noclk_item_his_eb,
154 |                                                 [-1, tf.shape(self.noclk_mid_his_batch_embedded)[1], 2*EMBEDDING_DIM])# cat embedding 18 concate item embedding 18.
155 | 
156 |             self.noclk_his_eb = tf.concat([self.noclk_mid_his_batch_embedded, self.noclk_cate_his_batch_embedded], -1)
157 |             self.noclk_his_eb_sum_1 = tf.reduce_sum(self.noclk_his_eb, 2)
158 |             self.noclk_his_eb_sum = tf.reduce_sum(self.noclk_his_eb_sum_1, 1)
159 | 
160 |         self.cross = []
161 |         if self.use_cartes:
162 |             if self.mask is not None:
163 |                 mask = tf.expand_dims(self.mask, axis=-1)
164 |             for i,emb in enumerate(self.carte_batch_embedded):
165 |                 emb = emb * mask
166 |                 carte_eb_sum = tf.reduce_sum(emb, 1) 
167 |                 self.cross.append(carte_eb_sum)
168 | 
169 |         if self.use_coaction:
170 |             input_batch = self.input_batch_embedded
171 |             tmp_sum, tmp_seq = [], []
172 |             if INDEP_NUM == 2:
173 |                 for i, mlp_batch in enumerate(self.mlp_batch_embedded):
174 |                     for j, input_batch in enumerate(self.input_batch_embedded):
175 |                         coaction_sum, coaction_seq = gen_coaction(mlp_batch[:, WEIGHT_EMB_DIM * j:  WEIGHT_EMB_DIM * (j+1)], input_batch[:, :, weight_emb_w[0][0] * i: weight_emb_w[0][0] * (i+1)],  EMBEDDING_DIM, mode=CALC_MODE,mask=self.mask) 
176 |                         tmp_sum.append(coaction_sum)
177 |                         tmp_seq.append(coaction_seq)
178 |             else:
179 |                 for i, (mlp_batch, input_batch) in enumerate(zip(self.mlp_batch_embedded, self.input_batch_embedded)):
180 |                     coaction_sum, coaction_seq = gen_coaction(mlp_batch[:, : INDEP_NUM * WEIGHT_EMB_DIM], input_batch[:, :, : weight_emb_w[0][0]],  EMBEDDING_DIM, mode=CALC_MODE, mask=self.mask) 
181 |                     tmp_sum.append(coaction_sum)
182 |                     tmp_seq.append(coaction_seq)
183 |             
184 |             self.coaction_sum = tf.concat(tmp_sum, axis=1)
185 |             self.cross.append(self.coaction_sum)
186 | 
187 |     def build_fcn_net(self, inp, use_dice = False):
188 |         bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1')
189 |         dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1')
190 |         if use_dice:
191 |             dnn1 = dice(dnn1, name='dice_1')
192 |         else:
193 |             dnn1 = prelu(dnn1, 'prelu1')
194 | 
195 |         dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2')
196 |         if use_dice:
197 |             dnn2 = dice(dnn2, name='dice_2')
198 |         else:
199 |             dnn2 = prelu(dnn2, 'prelu2')
200 |         dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3')
201 |         return dnn3
202 | 
203 |     def build_loss(self, inp, L2=False):
204 | 
205 |         with tf.name_scope('Metrics'):
206 |             # Cross-entropy loss and optimizer initialization
207 |             if self.use_softmax:
208 |                 self.y_hat = tf.nn.softmax(inp) + 0.00000001
209 |                 ctr_loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph)
210 |             else:
211 |                 self.y_hat = tf.nn.sigmoid(inp)
212 |                 ctr_loss = - tf.reduce_mean(tf.concat([tf.log(self.y_hat + 0.00000001) * self.target_ph, tf.log(1 - self.y_hat + 0.00000001) * (1-self.target_ph)], axis=1))
213 |             self.loss = ctr_loss
214 |             if self.use_negsampling:
215 |                 self.loss += self.aux_loss
216 |             if L2:
217 |                 self.loss += self.l2_loss
218 | 
219 |             tf.summary.scalar('loss', self.loss)
220 |             self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)
221 | 
222 |             # Accuracy metric
223 |             if self.use_softmax:
224 |                 self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32))
225 |             else:
226 |                 self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32))
227 |             tf.summary.scalar('accuracy', self.accuracy)
228 | 
229 | 
230 |     def auxiliary_loss(self, h_states, click_seq, noclick_seq, mask, stag = None):
231 |         mask = tf.cast(mask, tf.float32)
232 |         click_input_ = tf.concat([h_states, click_seq], -1)
233 |         noclick_input_ = tf.concat([h_states, noclick_seq], -1)
234 |         click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0]
235 |         noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0]
236 |         click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask
237 |         noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask
238 |         loss_ = tf.reduce_mean(click_loss_ + noclick_loss_)
239 |         return loss_
240 | 
241 |     def auxiliary_net(self, in_, stag='auxiliary_net'):
242 |         bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE)
243 |         dnn1 = tf.layers.dense(bn1, 100, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE)
244 |         dnn1 = tf.nn.sigmoid(dnn1)
245 |         dnn2 = tf.layers.dense(dnn1, 50, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE)
246 |         dnn2 = tf.nn.sigmoid(dnn2)
247 |         dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE)
248 |         if self.use_softmax:
249 |             y_hat = tf.nn.softmax(dnn3) + 0.00000001
250 |         else:
251 |             y_hat = tf.nn.sigmoid(dnn3) + 0.00000001
252 |         return y_hat
253 | 
254 | 
255 |     def train(self, sess, inps):
256 |         if self.use_negsampling:
257 |             loss, accuracy, aux_loss, _ = sess.run([self.loss, self.accuracy, self.aux_loss, self.optimizer], feed_dict={
258 |                 self.uid_batch_ph: inps[0],
259 |                 self.mid_batch_ph: inps[1],
260 |                 self.cate_batch_ph: inps[2],
261 |                 self.mid_his_batch_ph: inps[3],
262 |                 self.cate_his_batch_ph: inps[4],
263 |                 self.mask: inps[5],
264 |                 self.target_ph: inps[6],
265 |                 self.seq_len_ph: inps[7],
266 |                 self.lr: inps[8],
267 |                 self.noclk_mid_batch_ph: inps[9],
268 |                 self.noclk_cate_batch_ph: inps[10],
269 |                 self.carte_batch_ph: inps[11]
270 |             })
271 |             return loss, accuracy, aux_loss
272 |         else:
273 |             loss, accuracy, _ = sess.run([self.loss, self.accuracy, self.optimizer], feed_dict={
274 |                 self.uid_batch_ph: inps[0],
275 |                 self.mid_batch_ph: inps[1],
276 |                 self.cate_batch_ph: inps[2],
277 |                 self.mid_his_batch_ph: inps[3],
278 |                 self.cate_his_batch_ph: inps[4],
279 |                 self.mask: inps[5],
280 |                 self.target_ph: inps[6],
281 |                 self.seq_len_ph: inps[7],
282 |                 self.lr: inps[8],
283 |                 self.carte_batch_ph: inps[11]
284 |             })
285 |             return loss, accuracy, 0
286 | 
287 |     def calculate(self, sess, inps):
288 |         if self.use_negsampling:
289 |             probs, loss, accuracy, aux_loss = sess.run([self.y_hat, self.loss, self.accuracy, self.aux_loss], feed_dict={
290 |                 self.uid_batch_ph: inps[0],
291 |                 self.mid_batch_ph: inps[1],
292 |                 self.cate_batch_ph: inps[2],
293 |                 self.mid_his_batch_ph: inps[3],
294 |                 self.cate_his_batch_ph: inps[4],
295 |                 self.mask: inps[5],
296 |                 self.target_ph: inps[6],
297 |                 self.seq_len_ph: inps[7],
298 |                 self.noclk_mid_batch_ph: inps[8],
299 |                 self.noclk_cate_batch_ph: inps[9],
300 |                 self.carte_batch_ph: inps[10]
301 |             })
302 |             return probs, loss, accuracy, aux_loss
303 |         else:
304 |             probs, loss, accuracy = sess.run([self.y_hat, self.loss, self.accuracy], feed_dict={
305 |                 self.uid_batch_ph: inps[0],
306 |                 self.mid_batch_ph: inps[1],
307 |                 self.cate_batch_ph: inps[2],
308 |                 self.mid_his_batch_ph: inps[3],
309 |                 self.cate_his_batch_ph: inps[4],
310 |                 self.mask: inps[5],
311 |                 self.target_ph: inps[6],
312 |                 self.seq_len_ph: inps[7],
313 |                 self.carte_batch_ph: inps[10]
314 |             })
315 |             return probs, loss, accuracy, 0
316 | 
317 |     def save(self, sess, path):
318 |         saver = tf.train.Saver()
319 |         saver.save(sess, save_path=path)
320 | 
321 |     def restore(self, sess, path):
322 |         saver = tf.train.Saver()
323 |         saver.restore(sess, save_path=path)
324 |         print('model restored from %s' % path)
325 | 
326 | class Model_NCF(Model):
327 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True):
328 |         super(Model_NCF, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE,
329 |                                            ATTENTION_SIZE,
330 |                                            use_negsampling, use_softmax)
331 |         with tf.name_scope('ncf_embedding'):
332 |             self.ncf_item_embedding_var = tf.get_variable("ncf_item_embedding_var", [n_mid, EMBEDDING_DIM], trainable=True)
333 |             self.ncf_cate_embedding_var = tf.get_variable("ncf_cate_embedding_var", [n_cate, EMBEDDING_DIM], trainable=True)
334 | 
335 |             ncf_item_emb = tf.nn.embedding_lookup(self.ncf_item_embedding_var, self.mid_batch_ph)
336 |             ncf_item_his_emb = tf.nn.embedding_lookup(self.ncf_item_embedding_var, self.mid_his_batch_ph)
337 |             ncf_cate_emb = tf.nn.embedding_lookup(self.ncf_cate_embedding_var, self.cate_batch_ph)
338 |             ncf_cate_his_emb = tf.nn.embedding_lookup(self.ncf_cate_embedding_var, self.cate_his_batch_ph)            
339 | 
340 |         ncf_item_his_sum = tf.reduce_mean(ncf_item_his_emb, axis=1)
341 |         ncf_cate_his_sum = tf.reduce_mean(ncf_cate_his_emb, axis=1)
342 |         mf = tf.concat([ncf_item_emb * ncf_item_his_sum, ncf_cate_emb * ncf_cate_his_sum], axis=1)
343 | 
344 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1)
345 |         logit = self.build_fcn_net(inp, mf, use_dice=False)
346 |         self.build_loss(logit)
347 | 
348 |     def build_fcn_net(self, inp, mf, use_dice = False):
349 |         bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1')
350 |         dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1')
351 |         if use_dice:
352 |             dnn1 = dice(dnn1, name='dice_1')
353 |         else:
354 |             dnn1 = prelu(dnn1, scope='prelu_1')
355 | 
356 |         dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2')
357 |         if use_dice:
358 |             dnn2 = dice(dnn2, name='dice_2')
359 |         else:
360 |             dnn2 = prelu(dnn2, scope='prelu_2')
361 | 
362 |         dnn2 = tf.concat([dnn2, mf], axis=1)
363 |         dnn3 = tf.layers.dense(dnn2, 2 if self.use_softmax else 1, activation=None, name='f3')
364 |         return dnn3
365 | 
366 | def ProductLayer(feas, DIM, share=True):
367 |     row, col = [], []
368 |     num = len(feas)
369 |     pair = num * (num-1) / 2
370 |     for i in range(num - 1):
371 |         for j in range(i+1, num):
372 |             row.append(i)
373 |             col.append(j)
374 |     if share:
375 |         p = tf.stack([feas[i] for i in row], axis=1)
376 |         q = tf.stack([feas[i] for i in col], axis=1)
377 |     else:
378 |         tmp = []
379 |         count = {}
380 |         for i in row:
381 |             if i not in count:
382 |                 count[i] = 0
383 |             else:
384 |                 count[i] += 1
385 |             k = count[i]
386 |             tmp.append(feas[i][:, k*DIM:(k+1)*DIM])
387 |         p = tf.stack(tmp, axis=1)
388 |         tmp = []
389 |         for i in col:
390 |             if i not in count:
391 |                 count[i] = 0
392 |             else:
393 |                 count[i] += 1
394 |             k = count[i]
395 |             tmp.append(feas[i][:, k*DIM:(k+1)*DIM])
396 |         q = tf.stack(tmp, axis=1)
397 |         
398 |     ipnn = p * q
399 |     ipnn = tf.reduce_sum(ipnn, axis=2, keep_dims=False)
400 |     p = tf.expand_dims(p, axis=1)
401 |     w = tf.get_variable("pnn_var", [DIM, pair, DIM], trainable=True)
402 |     opnn = tf.reduce_sum((tf.multiply((tf.transpose(tf.reduce_sum(tf.multiply(p, w), axis=-1), [0, 2, 1])), q)), axis=-1)
403 |     pnn = tf.concat([ipnn, opnn], axis=1) 
404 |     return pnn
405 | 
406 | class Model_PNN(Model):
407 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True):
408 |         super(Model_PNN, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
409 |         
410 |         fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_mean(self.mid_his_batch_embedded, axis=1), tf.reduce_mean(self.cate_his_batch_embedded, axis=1)]
411 |         pnn = ProductLayer(fea_list, EMBEDDING_DIM)
412 |         inp = tf.concat([self.uid_batch_embedded[:, :18], self.item_eb[:, :36], self.item_his_eb_sum[:, :36], pnn], 1)
413 |         logit = self.build_fcn_net(inp, use_dice=False)
414 |         self.build_loss(logit)
415 | 
416 | def FMLayer(feas, output_dim=1):
417 |     feas = tf.stack(feas, axis=1)
418 |     square_of_sum = tf.reduce_sum(feas, axis=1, keep_dims=True) ** 2
419 |     sum_of_square = tf.reduce_sum(feas ** 2, axis=1, keep_dims=True)
420 |     fm_term = 0.5 * tf.reduce_sum(square_of_sum - sum_of_square, axis=2, keep_dims=False)
421 |     if output_dim==2:
422 |         fm_term = tf.concat([fm_term, tf.zeros_like(fm_term)], axis=1)
423 |     return fm_term
424 | 
425 | class Model_FM(Model):
426 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False):
427 |         super(Model_FM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
428 |         
429 |         w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True)
430 |         w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True)
431 |         wx = []
432 |         wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph))
433 |         wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph))
434 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1))
435 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1))        
436 |         b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True)
437 | 
438 |         wx = tf.concat(wx, axis=1)
439 |         lr_term = tf.reduce_sum(wx, axis=1) + b
440 | 
441 |         fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)]
442 |         logit = tf.reduce_sum(wx, axis=1) + b + FMLayer(fea_list, 1) 
443 | 
444 |         #self.l2_loss = 2e-5 * tf.add_n([tf.nn.l2_loss(v) for v in [wx, self.item_eb, self.item_his_eb_sum]])
445 |         self.build_loss(logit, L2=False)
446 | 
447 | class Model_FFM(Model):
448 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False):
449 |         super(Model_FFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
450 |         
451 |         w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True)
452 |         w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True)
453 |         wx = []
454 |         wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph))
455 |         wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph))
456 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1))
457 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1))        
458 |         b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True)
459 | 
460 |         wx = tf.concat(wx, axis=1)
461 |         lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b
462 | 
463 |         with tf.name_scope('FFM_embedding'):
464 | 
465 |             FFM_item_embedding_var = tf.get_variable("FFM_item_embedding_var", [n_mid, 3, EMBEDDING_DIM], trainable=True)
466 |             FFM_cate_embedding_var = tf.get_variable("FFM_cate_embedding_var", [n_cate, 3, EMBEDDING_DIM], trainable=True)
467 |             item_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_batch_ph)
468 |             item_his_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_his_batch_ph)
469 |             item_his_sum = tf.reduce_sum(item_his_emb, axis=1)
470 | 
471 |             cate_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_batch_ph)
472 |             cate_his_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_his_batch_ph)            
473 |             cate_his_sum = tf.reduce_sum(cate_his_emb, axis=1)
474 |         
475 |         fea_list = [item_emb, item_his_sum, cate_emb, cate_his_sum]
476 |         feas = tf.stack(fea_list, axis=1)
477 |         num = len(fea_list)
478 |         rows, cols = [], []
479 |         for i in range(num-1):
480 |             for j in range(i+1, num):
481 |                 rows.append([i, j-1])
482 |                 cols.append([j, i])
483 |         p = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), rows), [1,0,2])
484 |         q = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), cols), [1,0,2])
485 |         ffm_term = tf.reduce_sum(p * q, axis=2)
486 |         ffm_term = tf.reduce_sum(ffm_term, axis=1, keep_dims=True)
487 |         logit = lr_term + ffm_term
488 |         self.build_loss(logit)
489 | 
490 | 
491 | class Model_DeepFFM(Model):
492 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False):
493 |         super(Model_DeepFFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
494 |         
495 |         w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True)
496 |         w_cate_var = tf.get_variable("w_cate_var", [n_mid, 1], trainable=True)
497 |         wx = []
498 |         wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph))
499 |         wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph))
500 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1))
501 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1))        
502 |         b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True)
503 | 
504 |         wx = tf.concat(wx, axis=1)
505 |         lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b
506 | 
507 |         with tf.name_scope('FFM_embedding'):
508 | 
509 |             FFM_item_embedding_var = tf.get_variable("FFM_item_embedding_var", [n_mid, 3, EMBEDDING_DIM], trainable=True)
510 |             FFM_cate_embedding_var = tf.get_variable("FFM_cate_embedding_var", [n_cate, 3, EMBEDDING_DIM], trainable=True)
511 |             item_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_batch_ph)
512 |             item_his_emb = tf.nn.embedding_lookup(FFM_item_embedding_var, self.mid_his_batch_ph)
513 |             item_his_sum = tf.reduce_sum(item_his_emb, axis=1)
514 | 
515 |             cate_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_batch_ph)
516 |             cate_his_emb = tf.nn.embedding_lookup(FFM_cate_embedding_var, self.cate_his_batch_ph)            
517 |             cate_his_sum = tf.reduce_sum(cate_his_emb, axis=1)
518 |         
519 |         fea_list = [item_emb, item_his_sum, cate_emb, cate_his_sum]
520 |         feas = tf.stack(fea_list, axis=1)
521 |         num = len(fea_list)
522 |         rows, cols = [], []
523 |         for i in range(num-1):
524 |             for j in range(i+1, num):
525 |                 rows.append([i, j-1])
526 |                 cols.append([j, i])
527 |         p = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), rows), [1,0,2])
528 |         q = tf.transpose(tf.gather_nd(tf.transpose(feas, [1,2,0,3]), cols), [1,0,2])
529 |         ffm_term = tf.reduce_sum(p * q, axis=2)
530 |         ffm_term = tf.reduce_sum(ffm_term, axis=1, keep_dims=True)
531 |     
532 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1)
533 |         dnn_term = self.build_fcn_net(inp, use_dice=False)
534 | 
535 |         logit = dnn_term + lr_term + ffm_term
536 |         self.build_loss(logit)
537 | 
538 | class Model_DeepFM(Model):
539 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False):
540 |         super(Model_DeepFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
541 |         w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True)
542 |         w_cate_var = tf.get_variable("w_cate_var", [n_cate, 1], trainable=True)
543 |         wx = []
544 |         wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph))
545 |         wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph))
546 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1))
547 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1))        
548 |         b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True)
549 | 
550 |         wx = tf.concat(wx, axis=1)
551 |         lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b
552 | 
553 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1)
554 |         logit = self.build_fcn_net(inp, use_dice=False)
555 | 
556 |         fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)]
557 |         fm_term = FMLayer(fea_list)
558 |         logit = tf.layers.dense(tf.concat([logit, fm_term, lr_term], axis=1), 1, activation=None, name='fm_fc')
559 |         #self.l2_loss = 0.01 * tf.add_n([tf.nn.l2_loss(v) for v in [wx, self.item_eb, self.item_his_eb_sum]])
560 |         self.build_loss(logit, L2=False)
561 | 
562 | def ExtremeFMLayer(feas, dim, output_dim=1):
563 |     num = len(feas)
564 |     feas = tf.stack(feas, axis=1) # batch, field_num, emb_dim
565 |     hidden_nn_layers = []
566 |     field_nums = [num]
567 |     final_len = 0
568 |     hidden_nn_layers.append(feas)
569 |     final_result = []
570 |     cross_layers = [256, 256, 256]
571 | 
572 |     split_tensor0 = tf.split(hidden_nn_layers[0], dim * [1], 2)
573 | 
574 |     with tf.variable_scope("xfm", initializer=tf.contrib.layers.xavier_initializer(uniform=True)) as scope:
575 |         for idx, layer_size in enumerate(cross_layers):
576 |             split_tensor = tf.split(hidden_nn_layers[-1], dim * [1], 2)
577 |             dot_result_m = tf.matmul(split_tensor0, split_tensor, transpose_b=True)
578 |             dot_result_o = tf.reshape(dot_result_m, shape=[dim, -1, field_nums[0] * field_nums[-1]])
579 |             dot_result = tf.transpose(dot_result_o, perm=[1, 0, 2])
580 | 
581 |             filters = tf.get_variable(name="f_" + str(idx),
582 |                                       shape=[1, field_nums[-1] * field_nums[0], layer_size],
583 |                                       dtype=tf.float32)
584 | 
585 |             curr_out = tf.nn.conv1d(dot_result, filters=filters, stride=1, padding='VALID')
586 |             curr_out = tf.transpose(curr_out, perm=[0, 2, 1])
587 | 
588 |             if idx != len(cross_layers) - 1:
589 |                 next_hidden, direct_connect = tf.split(curr_out, 2 * [int(layer_size / 2)], 1)
590 |                 final_len += int(layer_size / 2)
591 |             else:
592 |                 direct_connect = curr_out
593 |                 next_hidden = 0
594 |                 final_len += layer_size
595 |             field_nums.append(int(layer_size / 2))
596 | 
597 |             final_result.append(direct_connect)
598 |             hidden_nn_layers.append(next_hidden)
599 | 
600 | 
601 |         result = tf.concat(final_result, axis=1)
602 |         result = tf.reduce_sum(result, -1)
603 | 
604 |         w_nn_output = tf.get_variable(name='w_nn_output',
605 |                                       shape=[final_len, 1],
606 |                                       dtype=tf.float32)
607 |         b_nn_output = tf.get_variable(name='b_nn_output',
608 |                                       shape=[1],
609 |                                       dtype=tf.float32,
610 |                                       initializer=tf.zeros_initializer())
611 |         xfm_term = tf.matmul(result, w_nn_output) + b_nn_output
612 | 
613 |         if output_dim==2:
614 |             xfm_term = tf.concat([xfm_term, tf.zeros_like(xfm_term)], axis=1)
615 |         return xfm_term
616 | 
617 | class Model_xDeepFM(Model):
618 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False):
619 |         super(Model_xDeepFM, self).__init__(n_uid, n_mid, n_cate, n_carte,EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
620 |         
621 |         w_item_var = tf.get_variable("w_item_var", [n_mid, 1], trainable=True)
622 |         w_cate_var = tf.get_variable("w_cate_var", [n_cate, 1], trainable=True)
623 |         wx = []
624 |         wx.append(tf.nn.embedding_lookup(w_item_var, self.mid_batch_ph))
625 |         wx.append(tf.nn.embedding_lookup(w_cate_var, self.cate_batch_ph))
626 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_item_var, self.mid_his_batch_ph), axis=1))
627 |         wx.append(tf.reduce_sum(tf.nn.embedding_lookup(w_cate_var, self.cate_his_batch_ph), axis=1))        
628 |         b = tf.get_variable("b_var", [1], initializer=tf.zeros_initializer(), trainable=True)
629 | 
630 |         wx = tf.concat(wx, axis=1)
631 |         lr_term = tf.reduce_sum(wx, axis=1, keep_dims=True) + b
632 |    
633 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1)
634 |         mlp_term =  self.build_fcn_net(inp, use_dice=False)
635 | 
636 |         fea_list = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded, axis=1), tf.reduce_sum(self.cate_his_batch_embedded, axis=1)]
637 |         fm_term = ExtremeFMLayer(fea_list, EMBEDDING_DIM)
638 |         self.build_loss(mlp_term + fm_term)
639 | 
640 | class Model_PIN(Model):
641 |     def __init__(self,n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
642 |         super(Model_PIN, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, 
643 |                                            BATCH_SIZE, SEQ_LEN, Flag="PIN")
644 |         
645 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1)
646 |         logit = self.build_fcn_net(inp, use_dice=False)
647 | 
648 |         feas = [self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_sum(self.mid_his_batch_embedded * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)), axis=1), tf.reduce_sum(self.cate_his_batch_embedded * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1)), axis=1)]
649 | 
650 |         self.feas = feas
651 |         row, col = [], []
652 |         num = len(feas)
653 |         for i in range(num - 1):
654 |             for j in range(i+1, num):
655 |                 row.append(i)
656 |                 col.append(j)
657 |         pairs = len(rows)
658 |         p = tf.concat([feas[i] for i in row], axis=1)
659 |         q = tf.concat([feas[i] for i in col], axis=1)
660 |         pq = p * q
661 |         inp = tf.concat([p,q,pq], axis=2) #batch, pair, 3*dim
662 |         logit = self.pin(inp)
663 |         self.build_loss(logit)
664 | 
665 |     def pin(self, inp):
666 |         batch, pair, dim = inp.shape.as_list()
667 |         with tf.variable_scope('product_network'):
668 |             inp = tf.transpose(inp, [1,0,2])
669 |             x = tf.layers.dense(inp, 20, activation=None, name='fc1')
670 |             x = tf.layers.batch_normalization(x, name='bn1')
671 |             x = tf.nn.relu(x)
672 |             x = tf.layers.dense(x, 1, activation=None, name='fc2')
673 |             x = tf.layers.batch_normalization(x, name='bn2')
674 |             x = tf.transpose(x, [1,0,2])
675 |             sub_out = tf.reshape(x, [-1, pair * dim])
676 | 
677 |         with tf.variable_scope('network'):
678 |             new_inp = tf.concat(self.feas+[sub_out], axis=1)
679 |             x = tf.layers.dense(sub_out, 400, activation=tf.nn.relu, name='fc1')
680 |             x = tf.layers.dense(x, 400, activation=tf.nn.relu, name='fc2')
681 |             x = tf.layers.dense(x, 400, activation=tf.nn.relu, name='fc3')
682 |             x = tf.layers.dense(x, 1, activation=None, name='fc4')
683 |         return x
684 | 
685 | class Model_ONN(Model):
686 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=False):
687 |         super(Model_ONN, self).__init__(n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling, use_softmax=use_softmax)
688 | 
689 |         dim = 5
690 |         self.item_embedding_var = tf.get_variable("item_embedding_var_onn", [n_mid, dim * 3], trainable=True)
691 |         self.item_emb = tf.nn.embedding_lookup(self.item_embedding_var, self.mid_batch_ph)
692 |         self.item_his_emb = tf.nn.embedding_lookup(self.item_embedding_var, self.mid_his_batch_ph)
693 |         self.item_his_emb_sum = tf.reduce_mean(self.item_his_emb, axis=1)
694 | 
695 |         self.cate_embedding_var = tf.get_variable("cate_embedding_var_onn", [n_cate, dim * 3], trainable=True)
696 |         self.cate_emb = tf.nn.embedding_lookup(self.cate_embedding_var, self.cate_batch_ph)
697 |         self.cate_his_emb = tf.nn.embedding_lookup(self.cate_embedding_var, self.cate_his_batch_ph)            
698 |         self.cate_his_emb_sum = tf.reduce_mean(self.cate_his_emb, axis=1)
699 | 
700 |         fea_list = [self.item_emb, self.cate_emb, self.item_his_emb_sum, self.cate_his_emb_sum]
701 |         onn = ProductLayer(fea_list, dim, False)
702 |         
703 |         inp = tf.concat([self.uid_batch_embedded, self.mid_batch_embedded, self.cate_batch_embedded, tf.reduce_mean(self.mid_his_batch_embedded, axis=1), tf.reduce_mean(self.cate_his_batch_embedded, axis=1), onn], 1)
704 |         logit = self.build_fcn_net(inp, use_dice=False)
705 |         self.build_loss(logit)
706 | 
707 | class Model_WideDeep(Model):
708 |     def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False):
709 |         super(Model_WideDeep, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE,
710 |                                         ATTENTION_SIZE,
711 |                                         use_negsampling)
712 | 
713 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum], 1)
714 |         # Fully connected layer
715 |         bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1')
716 |         dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1')
717 |         dnn1 = prelu(dnn1, 'p1')
718 |         dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2')
719 |         dnn2 = prelu(dnn2, 'p2')
720 |         dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3')
721 |         d_layer_wide = tf.concat([tf.concat([self.item_eb,self.item_his_eb_sum], axis=-1),
722 |                                 self.item_eb * self.item_his_eb_sum], axis=-1)
723 |         d_layer_wide = tf.layers.dense(d_layer_wide, 2, activation=None, name='f_fm')
724 |         self.y_hat = tf.nn.softmax(dnn3 + d_layer_wide)
725 | 
726 |         with tf.name_scope('Metrics'):
727 |             # Cross-entropy loss and optimizer initialization
728 |             self.loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph)
729 |             tf.summary.scalar('loss', self.loss)
730 |             self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)
731 | 
732 |             # Accuracy metric
733 |             self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32))
734 |             tf.summary.scalar('accuracy', self.accuracy)
735 |         self.merged = tf.summary.merge_all()
736 | 
737 | class Model_DNN(Model):
738 |     def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True, use_coaction=False, use_cartes=False):
739 |         #EMBEDDING_DIM = 4
740 |         super(Model_DNN, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE,
741 |                                                           ATTENTION_SIZE,
742 |                                                           use_negsampling, use_softmax=use_softmax, use_coaction=use_coaction, use_cartes=use_cartes)
743 | 
744 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum]+self.cross, 1)
745 |         logit = self.build_fcn_net(inp, use_dice=False)
746 |         self.build_loss(logit)
747 | 
748 | 
749 | class Model_DIN(Model):
750 |     def __init__(self, n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=False, use_softmax=True):
751 |         super(Model_DIN, self).__init__(n_uid, n_mid, n_cate, EMBEDDING_DIM, HIDDEN_SIZE,
752 |                                            ATTENTION_SIZE,
753 |                                            use_negsampling, use_softmax=use_softmax)
754 | 
755 |         # Attention layer
756 |         with tf.name_scope('Attention_layer'):
757 |             attention_output = din_attention(self.item_eb, self.item_his_eb, ATTENTION_SIZE, self.mask)
758 |             att_fea = tf.reduce_sum(attention_output, 1)
759 |             tf.summary.histogram('att_fea', att_fea)
760 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, att_fea], -1)
761 |         # Fully connected layer
762 |         logit = self.build_fcn_net(inp, use_dice=True)
763 |         self.build_loss(logit)
764 | 
765 | 
766 | class Model_DIEN(Model):
767 |     def __init__(self, n_uid, n_mid, n_cate, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_negsampling=True, use_coaction=False):
768 |         super(Model_DIEN, self).__init__(n_uid, n_mid, n_cate, n_carte,
769 |                                                           EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,
770 |                                                           use_negsampling, use_coaction=use_coaction)
771 | 
772 |         # RNN layer(-s)
773 |         with tf.name_scope('rnn_1'):
774 |             rnn_outputs, _ = dynamic_rnn(GRUCell(HIDDEN_SIZE), inputs=self.item_his_eb,
775 |                                          sequence_length=self.seq_len_ph, dtype=tf.float32,
776 |                                          scope="gru1")
777 |             tf.summary.histogram('GRU_outputs', rnn_outputs)
778 | 
779 |         aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :],
780 |                                          self.noclk_item_his_eb[:, 1:, :],
781 |                                          self.mask[:, 1:], stag="gru")
782 |         self.aux_loss = aux_loss_1
783 | 
784 |         # Attention layer
785 |         with tf.name_scope('Attention_layer_1'):
786 |             att_outputs, alphas = din_fcn_attention(self.item_eb, rnn_outputs, ATTENTION_SIZE, self.mask,
787 |                                                     softmax_stag=1, stag='1_1', mode='LIST', return_alphas=True)
788 |             tf.summary.histogram('alpha_outputs', alphas)
789 | 
790 |         with tf.name_scope('rnn_2'):
791 |             rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs,
792 |                                                      att_scores = tf.expand_dims(alphas, -1),
793 |                                                      sequence_length=self.seq_len_ph, dtype=tf.float32,
794 |                                                      scope="gru2")
795 |             tf.summary.histogram('GRU2_Final_State', final_state2)
796 | 
797 |         inp = tf.concat([self.uid_batch_embedded, self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, final_state2]+self.cross, 1)
798 |         prop = self.build_fcn_net(inp, use_dice=True)
799 |         self.build_loss(prop)
800 | 


--------------------------------------------------------------------------------
/script/model_avazu.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | import tensorflow as tf
  3 | from utils import *
  4 | from tensorflow.python.ops.rnn_cell import GRUCell
  5 | import mimn as mimn
  6 | import rum as rum
  7 | from rnn import dynamic_rnn 
  8 | # import mann_simple_cell as mann_cell
  9 | import random
 10 | 
 11 | ### Exp config ###
 12 | 
 13 | feature_num = [
 14 |     264,7,7,4842,7912,26,9136,580,36,
 15 |     7338655,8303,5,4,2885,8,9,474,4,69,172,62
 16 | ]
 17 | # id starts with 1
 18 | id_offset = [0] + [sum(feature_num[:i])  for i in range(1, len(feature_num))]
 19 | 
 20 | emb_as_weight = True #False #True
 21 | use_new_seq_emb = True #False # True
 22 | #edge_type = "item"
 23 | edge_type = "3-9"
 24 | use_cartes = ["item-his_item"]
 25 | use_cartes = ["cate-his_cate"]
 26 | use_cartes = [
 27 |     "3-9", "3-10", "4-9", "4-10", "6-9", "6-10", "7-9", "7-10",
 28 |     "16-9", "16-10", "19-9", "19-10", "13-16-19", "13-16-19-9", "13-16-19-10",
 29 |     "16-3", "16-6", "19-3", "19-6", "13-16-19-3", "13-16-19-6"
 30 | ]
 31 | use_cartes = []
 32 | 
 33 | WEIGHT_EMB_NUM = 1
 34 | orders = 5
 35 | CALC_MODE = "poly_x_x4"
 36 | weight_emb_w, weight_emb_b = [], []
 37 | alpha = 1
 38 | if CALC_MODE in ["seq_sum", "seq", "emb"]:
 39 |     weight_emb_w = [[4, 3], [3,4]]
 40 |     #weight_emb_w = [[16, 3], [3,4]]
 41 |     #weight_emb_w = [[16, 3], [3,4], [4,5],[5,5]]
 42 |     weight_emb_b = [3, 0]
 43 |     #weight_emb_b = [3, 4, 5, 0]
 44 |     WEIGHT_EMB_DIM = sum([w[0]*w[1] for w in weight_emb_w]) + sum(weight_emb_b)
 45 | elif CALC_MODE.startswith("poly"):
 46 |     WEIGHT_EMB_DIM = 16 
 47 |     if "vec" in CALC_MODE:
 48 |         WEIGHT_EMB_DIM = int(WEIGHT_EMB_DIM ** 0.5)
 49 |     elif "wx_ind" in CALC_MODE:
 50 |         WEIGHT_EMB_DIM *= 2
 51 |     elif "x_ind" in CALC_MODE:
 52 |         WEIGHT_EMB_DIM *= orders
 53 |     elif "x4" in CALC_MODE:
 54 |         alpha = 4
 55 |         WEIGHT_EMB_DIM *= alpha**2
 56 | 
 57 | keep_fake_carte_seq = False # True
 58 | carte_with_gru = True #False
 59 | 
 60 | carte_num_dict = {
 61 |     "3-6": 8315+1,
 62 |     "6-9": 1849306+1,
 63 |     "4-7": 4547+1,
 64 |     "3-9": 2102068+1,
 65 |     "3-10": 161045+1,
 66 |     "4-9": 2073680+1,
 67 |     "4-10": 146645+1,
 68 |     "6-9": 1851115+1,
 69 |     "6-10": 93771+1,
 70 |     "7-9": 1765776+1,
 71 |     "7-10": 23738+1,
 72 |     "16-9": 2135855+1,
 73 |     "16-10": 128321+1,
 74 |     "19-9": 1637771+1,
 75 |     "19-10": 57099+1,
 76 |     "13-16-19": 16905+1,
 77 |     "13-16-19-9": 2579867+1,
 78 |     "13-16-19-10": 447410+1,
 79 |     "16-3": 33287+1,
 80 |     "16-6": 25011+1,
 81 |     "19-3": 24748+1,
 82 |     "19-6": 22125+1,
 83 |     "13-16-19-3": 142791+1,
 84 |     "13-16-19-6": 86211+1,
 85 | }
 86 | if use_cartes:
 87 |     n_cid = sum([carte_num_dict[c] for c in use_cartes]) - (len(use_cartes) - 1)
 88 | #n_cid = 59201 #6689210 #8586832 #6689210 #6630010
 89 | 
 90 | def eb_as_weight(ad, his_items, dim, mode="seq"):
 91 |     ad = tf.reshape(ad, [-1, WEIGHT_EMB_DIM])
 92 |     weight, bias = [], []
 93 |     idx = 0
 94 |     for w, b in zip(weight_emb_w, weight_emb_b):
 95 |         weight.append(tf.reshape(ad[:, idx:idx+w[0]*w[1]], [-1, w[0], w[1]]))
 96 |         idx += w[0] * w[1]
 97 |         if b == 0:
 98 |             bias.append(None)
 99 |         else:
100 |             bias.append(tf.reshape(ad[:, idx:idx+b], [-1, 1, b]))
101 |             idx += b
102 |  
103 |     if mode == "seq_sum":
104 |         his_items_sum = tf.reduce_sum(his_items, 1)
105 |         his_items_sum = tf.reshape(his_items_sum, [-1, 1, dim])
106 |         out_seq = tf.nn.selu(tf.matmul(his_items_sum, w_1) + b)
107 |         out_seq = tf.matmul(out_seq, w_2)
108 |         out = tf.reduce_sum(out_seq, 1)
109 |     elif mode == "seq":
110 |         his_items_ = tf.unstack(his_items, axis=1)
111 |         out_seq = []
112 |         for item in his_items_:
113 |             item = tf.reshape(item, [-1, 1, dim])
114 |             #out.append(tf.nn.selu(tf.matmul(item, w) + b))
115 |             h = item
116 |             for w, b in zip(weight, bias):
117 |                 h = tf.matmul(h, w)
118 |                 if b is not None:
119 |                     h = tf.nn.selu(h + b)
120 |             out_seq.append(h)
121 |             #h = tf.nn.selu(tf.matmul(item, w_1) + b)
122 |             #out_seq.append(tf.matmul(h, w_2))
123 |         out_seq = tf.concat(out_seq, 1)
124 |         out = tf.reduce_sum(out_seq, 1)
125 |     elif mode == "emb":
126 |         inp = his_items
127 |         h = tf.reshape(inp, [-1, 1, dim])
128 |         for w, b in zip(weight, bias):
129 |             h = tf.matmul(h, w)
130 |             if b is not None:
131 |                 h = tf.nn.selu(h + b)
132 |         out = h
133 |         out = tf.reduce_sum(out, 1)
134 |     elif mode == "poly":
135 |         h = tf.reshape(his_items, [-1, 1, dim])
136 |         w = tf.reshape(ad, [-1, dim, dim])
137 |         ww = [w**(i+1) for i in range(orders)]
138 |         for i in range(orders):
139 |             h = tf.matmul(h, ww[i])
140 |             #if i < 2:
141 |             h = tf.nn.tanh(h)
142 |         out = h
143 |         out = tf.reduce_sum(out, 1)
144 |     elif mode == "poly_w":
145 |         h = tf.reshape(his_items, [-1, 1, dim])
146 |         w = tf.reshape(ad, [-1, dim, dim])
147 |         ww = [w**(i+1) for i in range(orders)]
148 |         out = []
149 |         for i in range(orders):
150 |             out.append(tf.nn.tanh(tf.matmul(h, ww[i])))
151 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
152 |     elif mode == "poly_x":
153 |         h = tf.reshape(his_items, [-1, 1, dim])
154 |         w = tf.reshape(ad, [-1, dim, dim])
155 |         hh = [h**(i+1) for i in range(orders)]
156 |         out = []
157 |         for i in range(orders):
158 |             #out.append(tf.nn.tanh(tf.matmul(hh[i], w)))
159 |             out.append(tf.matmul(hh[i], w))
160 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
161 |     elif mode == "poly_x_x4":
162 |         h = tf.reshape(his_items, [-1, 1, dim * alpha])
163 |         w = tf.reshape(ad, [-1, dim*alpha, dim*alpha])
164 |         hh = [h**(i+1) for i in range(orders)]
165 |         out = []
166 |         for i in range(orders):
167 |             out.append(tf.nn.tanh(tf.matmul(hh[i], w)))
168 |             #out.append(tf.matmul(hh[i], w))
169 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
170 |     elif mode == "poly_x_ind":
171 |         h = tf.reshape(his_items, [-1, 1, dim])
172 |         ww = tf.split(ad, num_or_size_splits=orders, axis=1)
173 |         ww = [tf.reshape(w, [-1, dim, dim]) for w in ww]
174 |         hh = [h**(i+1) for i in range(orders)]
175 |         out = []
176 |         for i in range(orders):
177 |             out.append(tf.nn.tanh(tf.matmul(hh[i], ww[i])))
178 |             #out.append(tf.matmul(hh[i], ww[i]))
179 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
180 |     elif mode == "poly_wx":
181 |         h = tf.reshape(his_items, [-1, 1, dim])
182 |         w = tf.reshape(ad, [-1, dim, dim])
183 |         ww = [w**(i+1) for i in range(orders)]
184 |         hh = [h**(i+1) for i in range(orders)]
185 |         out = []
186 |         for i in range(orders):
187 |             out.append(tf.nn.tanh(tf.matmul(hh[i], w)))
188 |             out.append(tf.nn.tanh(tf.matmul(h, ww[i])))
189 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
190 |     elif mode == "poly_wx_ind":
191 |         h = tf.reshape(his_items, [-1, 1, dim])
192 |         ww = tf.split(ad, num_or_size_splits=2, axis=1)
193 |         ww = [tf.reshape(w, [-1, dim, dim]) for w in ww]
194 |         ww1 = [ww[1]**(i+1) for i in range(orders)]
195 |         hh = [h**(i+1) for i in range(orders)]
196 |         out = []
197 |         for i in range(orders):
198 |             out.append(tf.nn.tanh(tf.matmul(hh[i], ww[0])))
199 |             out.append(tf.nn.tanh(tf.matmul(h, ww1[i])))
200 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
201 |     elif mode == "poly_x_vec":
202 |         h = tf.reshape(his_items, [-1, 1, dim])
203 |         w = tf.reshape(ad, [-1, 1, dim])
204 |         hh = [h**(i+1) for i in range(orders)]
205 |         out = []
206 |         for i in range(orders):
207 |             out.append(tf.nn.tanh(hh[i] * w))
208 |             #out.append(hh[i] * w)
209 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
210 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
211 |     elif mode == "poly_pure":
212 |         h = tf.reshape(his_items, [-1, 1, dim])
213 |         w = tf.reshape(ad, [-1, dim, dim])
214 |         ww = [w**(i+1) for i in range(orders)]
215 |         hh = [h**(i+1) for i in range(orders)]
216 |         out = []
217 |         for i in range(orders):
218 |             for j in range(orders):
219 |                 out.append(tf.nn.tanh(tf.matmul(hh[i], ww[j])))
220 |         out = tf.reduce_sum(tf.concat(out, axis=1), 1)
221 |             
222 |     #out = tf.nn.selu(out)
223 |     if keep_fake_carte_seq and mode=="seq":
224 |         return out, out_seq
225 |     return out, None
226 | 
227 | def FM(feas):
228 |     feas = tf.stack(feas, aixs=1)
229 |     square_of_sum = tf.reduce_sum(feas, axis=1) ** 2
230 |     sum_of_square = tf.reduce_sum(feas ** 2, axis=1)
231 |     return 0.5 * (square_of_sum - sum_of_square)
232 | 
233 | class Model(object):
234 |     def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN, use_negsample=False, Flag="DNN"):
235 |         self.model_flag = Flag
236 |         self.reg = False
237 |         self.use_negsample= use_negsample
238 |         with tf.name_scope('Inputs'):
239 |             self.user_batch_ph = tf.placeholder(tf.int32, [None, None], name='user_batch_ph')
240 |             self.ad_batch_ph = tf.placeholder(tf.int32, [None, None], name='ad_batch_ph')
241 |             self.scene_batch_ph = tf.placeholder(tf.int32, [None, None], name='scene_batch_ph')
242 |             self.time_batch_ph = tf.placeholder(tf.int32, [None, ], name='time_batch_ph')
243 |             self.clk_seq_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='clk_seq_batch_ph')
244 |             self.carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='carte_batch_ph')
245 |             #self.noclk_seq_batch_ph = tf.placeholder(tf.int32, [None, None], name='noclk_seq_batch_ph')
246 |             '''
247 |             self.item_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='item_carte_batch_ph')
248 |             self.cate_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_carte_batch_ph')
249 |             self.item_cate_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='item_cate_carte_batch_ph')
250 |             self.cate_item_carte_batch_ph = tf.placeholder(tf.int32, [None, None], name='cate_item_carte_batch_ph')
251 |             '''
252 |             self.clk_mask = tf.placeholder(tf.float32, [None, None], name='clk_mask_batch_ph')
253 |             self.target_ph = tf.placeholder(tf.float32, [None, 2], name='target_ph')
254 |             self.lr = tf.placeholder(tf.float64, [])
255 | 
256 |         # Embedding layer
257 |         with tf.name_scope('Embedding_layer'):
258 | 
259 |             ad_ph = tf.split(self.ad_batch_ph, num_or_size_splits=10, axis=1)
260 |             scene_ph = tf.split(self.scene_batch_ph, num_or_size_splits=6, axis=1)
261 |             user_ph = tf.split(self.user_batch_ph, num_or_size_splits=4, axis=1)
262 |             feature_ph = [self.time_batch_ph] + ad_ph[:2] + scene_ph + user_ph + ad_ph[2:]
263 | 
264 |             self.embedding_vars = []
265 |             features = []
266 |             for i, num in enumerate(feature_num):
267 |                 self.embedding_vars.append(tf.get_variable("embedding_var_fea{}".format(i), [num, EMBEDDING_DIM], trainable=True))
268 |                 features.append(tf.nn.embedding_lookup(self.embedding_vars[i], feature_ph[i] - id_offset[i]))
269 | 
270 |             self.user_batch_embedded = tf.concat(features[9:13], axis=1)
271 |             self.ad_batch_embedded = tf.concat(features[1:3]+features[13:], axis=1)
272 |             self.scene_batch_embedded = tf.concat(features[3:9], axis=1)
273 |             self.time_batch_embedded = features[0]
274 |             self.clk_seq_batch_embedded = tf.nn.embedding_lookup(self.embedding_vars[0], self.clk_seq_batch_ph)
275 | 
276 |             if use_cartes:
277 |                 self.carte_embeddings_var = [] 
278 |                 self.carte_batch_embedded = []
279 |                 for i, c in enumerate(use_cartes):
280 |                     self.carte_embeddings_var.append(tf.get_variable("carte_embedding_var_{}".format(c), [carte_num_dict[c], EMBEDDING_DIM], trainable=True))
281 |                     self.carte_batch_embedded.append(tf.nn.embedding_lookup(self.carte_embeddings_var[i], self.carte_batch_ph[:, i]))
282 | 
283 |             ###  fake carte ###
284 |             if emb_as_weight:
285 |                 '''
286 |                 TODO: support multi-group cartesian feature, e.g., 13-16-19
287 |                 '''
288 |                 idx_w, idx_x = map(int, edge_type.split('-'))
289 |  
290 |                 self.weight_embeddings_var = tf.get_variable("weight_embedding_var", [feature_num[idx_w] + 1, WEIGHT_EMB_NUM * WEIGHT_EMB_DIM], trainable=True)
291 |                 self.weight_batch_embedded = tf.nn.embedding_lookup(self.weight_embeddings_var, feature_ph[idx_w])
292 |                 if use_new_seq_emb:
293 |                     self.seq_embeddings_var = tf.get_variable("seq_embedding_var", [feature_num[idx_x], EMBEDDING_DIM * alpha], trainable=True)
294 |                     self.seq_his_batch_embedded = tf.nn.embedding_lookup(self.seq_embeddings_var, feature_ph[idx_x])
295 | 
296 |         with tf.name_scope('init_operation'):    
297 |             for i, num in enumerate(feature_num):
298 |                 embedding_placeholder = tf.placeholder(tf.float32,[num, EMBEDDING_DIM], name="emb_ph_{}".format(i))
299 |                 self.embedding_vars[i].assign(embedding_placeholder)
300 | 
301 |             if use_cartes:
302 |                 self.carte_embedding_placeholder = []
303 |                 self.carte_embedding_init = []
304 |                 for i, c in enumerate(use_cartes):
305 |                     self.carte_embedding_placeholder.append(tf.placeholder(tf.float32,[carte_num_dict[c], EMBEDDING_DIM], name="cid_emb_ph"))
306 |                     self.carte_embedding_init.append(self.carte_embeddings_var[i].assign(self.carte_embedding_placeholder[i]))
307 | 
308 |         if self.use_negsample:
309 |             self.noclk_seq_batch_ph = tf.placeholder(tf.int32, [None, None, None], name='noclk_seq_batch_ph')
310 |             self.noclk_seq_batch_embedded = tf.nn.embedding_lookup(self.embeddings_var, self.noclk_seq_batch_ph)
311 |             self.noclk_mask = tf.placeholder(tf.float32, [None, None], name='noclk_mask_batch_ph')
312 |             #self.mid_neg_batch_ph = tf.placeholder(tf.int32, [None, None], name='neg_his_batch_ph')
313 |             #self.cate_neg_batch_ph = tf.placeholder(tf.int32, [None, None], name='neg_cate_his_batch_ph')
314 |  
315 |             #self.neg_item_his_eb = tf.nn.embedding_lookup(self.mid_embeddings_var, self.mid_neg_batch_ph)
316 |             #self.neg_cate_his_eb = tf.nn.embedding_lookup(self.mid_embeddings_var, self.cate_neg_batch_ph)
317 |             #self.neg_his_eb = tf.concat([self.neg_item_his_eb,self.neg_cate_his_eb], axis=2) * tf.reshape(self.mask,(BATCH_SIZE, SEQ_LEN, 1))   
318 |             self.noclk_seq_eb = tf.concat(tf.unstack(tf.reshape(self.noclk_seq_batch_embedded,(BATCH_SIZE, 10, SEQ_LEN, EMBEDDING_DIM)), axis=1), axis=-1)  * tf.reshape(self.noclk_mask,(BATCH_SIZE, SEQ_LEN, 1))   
319 |             
320 |         self.user_eb = tf.reshape(self.user_batch_embedded, [-1, EMBEDDING_DIM * 4]) # [batch, 4, dim] -> [batch, 4*dim]
321 |         self.ad_eb = tf.reshape(self.ad_batch_embedded, [-1, EMBEDDING_DIM * 10]) 
322 |         self.scene_eb = tf.reshape(self.scene_batch_embedded, [-1, EMBEDDING_DIM * 6]) 
323 |         self.time_eb = self.time_batch_embedded
324 | 
325 |         self.clk_seq_eb = tf.concat(tf.unstack(tf.reshape(self.clk_seq_batch_embedded,(BATCH_SIZE, 10, SEQ_LEN, EMBEDDING_DIM)), axis=1), axis=-1) * tf.reshape(self.clk_mask, (BATCH_SIZE, SEQ_LEN, 1))
326 |         self.clk_seq_eb_sum = tf.reduce_sum(self.clk_seq_eb, 1)
327 | 
328 | 
329 |         self.carte_embs = []
330 |         if use_cartes:
331 |             self.carte_embs += self.carte_batch_embedded
332 | 
333 |         if emb_as_weight:
334 |             if use_new_seq_emb:
335 |                 seq_his_batch = self.seq_his_batch_embedded
336 |             else:
337 |                 seq_his_batch = features[int(edge_type.split('-')[1])]
338 |             tmp_sum, tmp_seq = [], []
339 |             if CALC_MODE.startswith("seq"):
340 |                 shape = (BATCH_SIZE, SEQ_LEN, EMBEDDING_DIM)
341 |             else:
342 |                 shape = (BATCH_SIZE, EMBEDDING_DIM * alpha)
343 |             for i in range(WEIGHT_EMB_NUM):
344 |                 fake_carte_sum, fake_carte_seq = eb_as_weight(self.weight_batch_embedded[:, i * WEIGHT_EMB_DIM: (i+1) * WEIGHT_EMB_DIM], tf.reshape(seq_his_batch, shape), EMBEDDING_DIM, mode=CALC_MODE) 
345 |                 tmp_sum.append(fake_carte_sum)
346 |                 tmp_seq.append(fake_carte_seq)
347 |             self.fake_carte_sum = tf.concat(tmp_sum, axis=1)
348 |             if keep_fake_carte_seq:
349 |                 self.fake_carte_seq = tmp_seq
350 |                 
351 | 
352 |     def build_fcn_net(self, inp, use_dice = False):
353 |         bn1 = tf.layers.batch_normalization(inputs=inp, name='bn1')
354 |         dnn1 = tf.layers.dense(bn1, 200, activation=None, name='f1')
355 |         if use_dice:
356 |             dnn1 = dice(dnn1, name='dice_1')
357 |         else:
358 |             dnn1 = prelu(dnn1, scope='prelu_1')
359 | 
360 |         dnn2 = tf.layers.dense(dnn1, 80, activation=None, name='f2')
361 |         if use_dice:
362 |             dnn2 = dice(dnn2, name='dice_2')
363 |         else:
364 |             dnn2 = prelu(dnn2, scope='prelu_2')
365 | 
366 |         dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3')
367 |         self.y_hat = tf.nn.softmax(dnn3) + 0.00000001
368 | 
369 |         with tf.name_scope('Metrics'):
370 |             # Cross-entropy loss and optimizer initialization
371 |             ctr_loss = - tf.reduce_mean(tf.log(self.y_hat) * self.target_ph)
372 |             self.loss = ctr_loss
373 |             if self.use_negsample:
374 |                 self.loss += self.aux_loss
375 |             if self.reg:
376 |                 self.loss += self.reg_loss
377 | 
378 |             tf.summary.scalar('loss', self.loss)
379 |             self.optimizer = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)
380 |             # Accuracy metric
381 |             self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(self.y_hat), self.target_ph), tf.float32))
382 |             tf.summary.scalar('accuracy', self.accuracy)
383 | 
384 |         self.merged = tf.summary.merge_all()
385 | 
386 |     def auxiliary_loss(self, h_states, click_seq, noclick_seq, clk_mask=None, noclk_mask = None, stag = None):
387 |         #mask = tf.cast(mask, tf.float32)
388 |         if noclk_mask is None:
389 |             noclk_mask = clk_mask
390 |         click_input_ = tf.concat([h_states, click_seq], -1)
391 |         noclick_input_ = tf.concat([h_states, noclick_seq], -1)
392 |         click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0]
393 |         noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0]
394 | 
395 |         click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * clk_mask
396 |         noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * noclk_mask
397 | 
398 |         loss_ = tf.reduce_mean(click_loss_ + noclick_loss_)
399 |         return loss_
400 | 
401 |     def auxiliary_net(self, in_, stag='auxiliary_net'):
402 |         bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE)
403 |         dnn1 = tf.layers.dense(bn1, 100, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE)
404 |         dnn1 = tf.nn.sigmoid(dnn1)
405 |         dnn2 = tf.layers.dense(dnn1, 50, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE)
406 |         dnn2 = tf.nn.sigmoid(dnn2)
407 |         dnn3 = tf.layers.dense(dnn2, 2, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE)
408 |         y_hat = tf.nn.softmax(dnn3) + 0.000001
409 |         return y_hat
410 | 
411 |     def init_uid_weight(self, sess, uid_weight):
412 |         sess.run(self.uid_embedding_init,feed_dict={self.uid_embedding_placeholder: uid_weight})
413 |     
414 |     def init_mid_weight(self, sess, mid_weight):
415 |         sess.run([self.mid_embedding_init],feed_dict={self.mid_embedding_placeholder: mid_weight})
416 | 
417 |     def save_mid_embedding_weight(self, sess):
418 |         embedding = sess.run(self.mid_embeddings_var)
419 |         return embedding
420 | 
421 |     def save_uid_embedding_weight(self, sess):
422 |         embedding = sess.run(self.uid_bp_memory)
423 |         return embedding                                 
424 |     
425 |     def train(self, sess, inps):
426 |         input_dict = {
427 |             self.user_batch_ph: inps[0],
428 |             self.ad_batch_ph: inps[1],
429 |             self.scene_batch_ph: inps[2],
430 |             self.time_batch_ph: inps[3],
431 |             self.clk_seq_batch_ph: inps[4],
432 |             self.clk_mask: inps[6],
433 |             self.target_ph: inps[-2],
434 |             self.lr: inps[-1],
435 |         }
436 |         if use_cartes:
437 |             input_dict[self.carte_batch_ph] = inps[-3]
438 |         if "item-his_item" in use_cartes:
439 |             input_dict[self.item_carte_batch_ph] = inps[10]
440 |         if "cate-his_cate" in use_cartes:
441 |             input_dict[self.cate_carte_batch_ph] = inps[11]
442 |         if "item-his_cate" in use_cartes:
443 |             input_dict[self.item_cate_carte_batch_ph] = inps[12]
444 |         if "cate-his_item" in use_cartes:
445 |             input_dict[self.cate_item_carte_batch_ph] = inps[13]
446 | 
447 |         if self.use_negsample:
448 |             input_dict[self.noclk_seq_batch_ph] = inps[5]
449 |             input_dict[self.noclk_mask] = inps[7]
450 |             loss, aux_loss, accuracy, _ = sess.run([self.loss, self.aux_loss, self.accuracy, self.optimizer], feed_dict=input_dict)
451 |         else:
452 |             loss, accuracy, _ = sess.run([self.loss, self.accuracy, self.optimizer], feed_dict=input_dict)
453 |             aux_loss = 0
454 |         return loss, accuracy, aux_loss            
455 | 
456 |     def calculate(self, sess, inps):
457 |         input_dict = {
458 |             self.user_batch_ph: inps[0],
459 |             self.ad_batch_ph: inps[1],
460 |             self.scene_batch_ph: inps[2],
461 |             self.time_batch_ph: inps[3],
462 |             self.clk_seq_batch_ph: inps[4],
463 |             self.clk_mask: inps[6],
464 |             self.target_ph: inps[-1],
465 |         }
466 |         if use_cartes:
467 |             input_dict[self.carte_batch_ph] = inps[-2]
468 |             
469 |         if "item-his_item" in use_cartes:
470 |             input_dict[self.item_carte_batch_ph] = inps[9]
471 |         if "cate-his_cate" in use_cartes:
472 |             input_dict[self.cate_carte_batch_ph] = inps[10]
473 |         if "item-his_cate" in use_cartes:
474 |             input_dict[self.item_cate_carte_batch_ph] = inps[11]
475 |         if "cate-his_item" in use_cartes:
476 |             input_dict[self.cate_item_carte_batch_ph] = inps[12]
477 | 
478 |         if self.use_negsample:
479 |             input_dict[self.noclk_seq_batch_ph] = inps[5]
480 |             input_dict[self.noclk_mask] = inps[7]
481 |             probs, loss, accuracy, aux_loss = sess.run([self.y_hat, self.loss, self.accuracy, self.aux_loss], feed_dict=input_dict)
482 |         else:
483 |             probs, loss, accuracy = sess.run([self.y_hat, self.loss, self.accuracy], feed_dict=input_dict)
484 |             aux_loss = 0
485 |         return probs, loss, accuracy, aux_loss
486 | 
487 |     def save(self, sess, path):
488 |         saver = tf.train.Saver()
489 |         saver.save(sess, save_path=path)
490 | 
491 |     def restore(self, sess, path):
492 |         saver = tf.train.Saver()
493 |         saver.restore(sess, save_path=path)
494 |         print('model restored from %s' % path)
495 | 
496 | class Model_DNN(Model):
497 |     def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
498 |         super(Model_DNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
499 |                                            BATCH_SIZE, SEQ_LEN, Flag="DNN")
500 |         
501 |         #inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1)
502 |         if emb_as_weight:
503 |             self.carte_embs.append(self.fake_carte_sum)
504 |         inp = tf.concat([self.user_eb, self.ad_eb, self.scene_eb, self.time_eb] + self.carte_embs, 1)
505 |         self.build_fcn_net(inp, use_dice=False)
506 |  
507 | 
508 | class Model_FFM(Model):
509 |     def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
510 |         super(Model_DNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
511 |                                            BATCH_SIZE, SEQ_LEN, Flag="DNN")
512 |         
513 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum], 1)
514 |         self.build_fcn_net(inp, use_dice=False)
515 |         
516 |        
517 | 
518 | class Model_PNN(Model):
519 |     def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
520 |         super(Model_PNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
521 |                                            BATCH_SIZE, SEQ_LEN, Flag="PNN")
522 |         
523 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum], 1)
524 |         self.build_fcn_net(inp, use_dice=False)
525 | 
526 | 
527 | class Model_GRU4REC(Model):
528 |     def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
529 |         super(Model_GRU4REC, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
530 |                                            BATCH_SIZE, SEQ_LEN, Flag="GRU4REC")
531 |         with tf.name_scope('rnn_1'):
532 |             self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE)
533 |             rnn_outputs, final_state1 = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb,
534 |                                          sequence_length=self.sequence_length, dtype=tf.float32,
535 |                                          scope="gru1")
536 |             tf.summary.histogram('GRU_outputs', rnn_outputs)
537 |                     
538 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1], 1)
539 |         self.build_fcn_net(inp, use_dice=False)
540 |         
541 | 
542 | class Model_DIN(Model):
543 |     def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
544 |         super(Model_DIN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
545 |                                            BATCH_SIZE, SEQ_LEN, Flag="DIN")
546 |         with tf.name_scope('Attention_layer'):
547 |             attention_output = din_attention(self.item_eb, self.item_his_eb, HIDDEN_SIZE, self.mask)
548 |             att_fea = tf.reduce_sum(attention_output, 1)
549 |             tf.summary.histogram('att_fea', att_fea)
550 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum, att_fea], -1)
551 |         self.build_fcn_net(inp, use_dice=False)
552 | 
553 | 
554 | class Model_ARNN(Model):
555 |     def __init__(self,n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=256):
556 |         super(Model_ARNN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
557 |                                            BATCH_SIZE, SEQ_LEN, Flag="ARNN")
558 |         with tf.name_scope('rnn_1'):
559 |             self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE)
560 |             rnn_outputs, final_state1 = dynamic_rnn(GRUCell(2*EMBEDDING_DIM), inputs=self.item_his_eb,
561 |                                          sequence_length=self.sequence_length, dtype=tf.float32,
562 |                                          scope="gru1")
563 |             tf.summary.histogram('GRU_outputs', rnn_outputs)
564 |         # Attention layer
565 |         with tf.name_scope('Attention_layer_1'):
566 |             att_gru = din_attention(self.item_eb, rnn_outputs, HIDDEN_SIZE, self.mask)
567 |             att_gru = tf.reduce_sum(att_gru, 1)
568 | 
569 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state1, att_gru], -1)
570 |         self.build_fcn_net(inp, use_dice=False)        
571 | 
572 | class Model_RUM(Model):
573 |     def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, MEMORY_SIZE, SEQ_LEN=400, mask_flag=True):
574 |         super(Model_RUM, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
575 |                                            BATCH_SIZE, SEQ_LEN, Flag="RUM")
576 | 
577 |         def clear_mask_state(state, begin_state, mask, t):
578 |             state["controller_state"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1))) * begin_state["controller_state"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1)) * state["controller_state"]
579 |             state["M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["M"]
580 |             return state
581 |       
582 |         cell = rum.RUMCell(controller_units=HIDDEN_SIZE, memory_size=MEMORY_SIZE, memory_vector_dim=2*EMBEDDING_DIM,read_head_num=1, write_head_num=1,
583 |             reuse=False, output_dim=HIDDEN_SIZE, clip_value=20, batch_size=BATCH_SIZE)
584 |         
585 |         state = cell.zero_state(BATCH_SIZE, tf.float32)
586 |         begin_state = state
587 |         for t in range(SEQ_LEN):
588 |             output, state = cell(self.item_his_eb[:, t, :], state)
589 |             if mask_flag:
590 |                 state = clear_mask_state(state, begin_state, self.mask, t)
591 |         
592 |         final_state = output
593 |         before_memory = state['M']
594 |         rum_att_hist = din_attention(self.item_eb, before_memory, HIDDEN_SIZE, None)
595 | 
596 |         inp = tf.concat([self.item_eb, self.item_his_eb_sum, final_state, tf.squeeze(rum_att_hist)], 1)
597 | 
598 |         self.build_fcn_net(inp, use_dice=False) 
599 | 
600 | class Model_DIEN(Model):
601 |     def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, SEQ_LEN=400, use_negsample=False, use_mi_cons=False):
602 |         super(Model_DIEN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
603 |                                            BATCH_SIZE, SEQ_LEN, use_negsample, Flag="DIEN")
604 | 
605 |         with tf.name_scope('rnn_1'):
606 |             self.sequence_length = tf.Variable([SEQ_LEN] * BATCH_SIZE)
607 |             rnn_outputs, _ = dynamic_rnn(GRUCell(10*EMBEDDING_DIM), inputs=self.clk_seq_eb,
608 |                                          sequence_length=self.sequence_length, dtype=tf.float32,
609 |                                          scope="gru1")
610 |             tf.summary.histogram('GRU_outputs', rnn_outputs)        
611 |         
612 |         if use_negsample:
613 |             if use_mi_cons:
614 |                 #aux_loss_1 = self.info_NCE(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], self.mask[:, 1:])
615 |                 #aux_loss_1 = self.info_NCE_aux(rnn_outputs[:, :-1, :], self.item_his_eb[:, 1:, :], self.neg_his_eb[:, 1:, :], self.mask[:, 1:])
616 |                 aux_loss_1 = self.mi_loss(rnn_outputs[:, :-1, :], self.clk_seq_eb[:, 1:, :],
617 |                                              self.noclk_seq_eb[:, 1:, :], self.mask[:, 1:], stag = "mi_0")
618 |             else:
619 |                 aux_loss_1 = self.auxiliary_loss(rnn_outputs[:, :-1, :], self.clk_seq_eb[:, 1:, :],
620 |                                              self.noclk_seq_eb[:, 1:, :], self.clk_mask[:, 1:], self.noclk_mask[:, 1:], stag = "bigru_0")
621 |             self.aux_loss = aux_loss_1
622 | 
623 |         # Attention layer
624 |         with tf.name_scope('Attention_layer_1'):
625 |             att_outputs, alphas = din_attention(self.ad_eb, rnn_outputs, HIDDEN_SIZE, mask=self.clk_mask, mode="LIST", return_alphas=True)
626 |             tf.summary.histogram('alpha_outputs', alphas)
627 | 
628 |         with tf.name_scope('rnn_2'):
629 |             rnn_outputs2, final_state2 = dynamic_rnn(VecAttGRUCell(HIDDEN_SIZE), inputs=rnn_outputs,
630 |                                                      att_scores = tf.expand_dims(alphas, -1),
631 |                                                      sequence_length=self.sequence_length, dtype=tf.float32,
632 |                                                      scope="gru2")
633 |             tf.summary.histogram('GRU2_Final_State', final_state2)
634 | 
635 |         #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum], 1)
636 |         #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_carte_eb_sum], 1)
637 |         #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.cate_carte_eb_sum], 1)
638 |         #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.item_cate_carte_eb_sum], 1)
639 |         #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum, self.cate_carte_eb_sum], 1)
640 |         #inp = tf.concat([self.item_eb, final_state2, self.item_his_eb_sum, self.item_eb*self.item_his_eb_sum,  self.item_carte_eb_sum, self.cate_carte_eb_sum], 1)
641 | 
642 |         
643 |         #if attention
644 |         
645 |         if emb_as_weight:
646 |             if keep_fake_carte_seq:
647 |                 if carte_with_gru:
648 |                     with tf.name_scope('rnn_3'):
649 |                         self.fake_carte_seq, _ = dynamic_rnn(GRUCell(EMBEDDING_DIM), inputs=self.fake_carte_seq,
650 |                                                  sequence_length=self.sequence_length, dtype=tf.float32,
651 |                                                  scope="gru3")
652 |  
653 |                 with tf.name_scope('Attention_layer_2'):
654 |                     carte_att_outputs, _ = din_attention(self.mid_batch_embedded, self.fake_carte_seq, HIDDEN_SIZE, mask=self.mask, stag="carte", mode="SUM", return_alphas=True)
655 |                 self.carte_embs.append(tf.reduce_sum(carte_att_outputs, 1))
656 |                 #self.carte_embs.append(self.fake_carte_sum)
657 |             else:
658 |                 self.carte_embs.append(self.fake_carte_sum)
659 |         inp = tf.concat([self.user_eb, self.ad_eb, self.scene_eb, self.time_eb, final_state2, self.clk_seq_eb_sum, self.ad_eb*self.clk_seq_eb_sum] + self.carte_embs, 1)
660 |         self.build_fcn_net(inp, use_dice=False)
661 | 
662 |     def neg_sample(self, neg_his_emb, K=10, mode="random"):
663 |         shape = tf.shape(neg_his_emb)
664 |         batch, seq, dim = shape[0], shape[1], shape[2]
665 |         
666 |         if mode == "random":
667 |             neg = tf.expand_dims(neg_his_emb, 1) #[batch, 1, seq, dim]
668 |             neg = tf.tile(neg, [1,seq, 1,1]) #[batch, seq, seq, dim]
669 |             # index = tf.random_uniform((batch, seq, K), minval=0, maxval=seq, dtype=tf.int32)
670 |             # neg = tf.batch_gather(neg, index) #[batch, seq, K, dim]
671 |             neg = neg[:, :, :K, :]
672 |             return neg
673 |         elif mode == "aux":
674 |             neg = tf.expand_dims(neg_his_emb, 1)
675 |             return neg
676 |             
677 |     def mi_loss_(self, h_states, click_seq, noclick_seq, mask = None, stag = None):
678 |         #mask = tf.cast(mask, tf.float32)
679 |         '''
680 |         h = self.mlp(h_states, stag = stag)
681 |         pos = self.mlp(click_seq, stag = stag)
682 |         neg = self.mlp(noclick_seq, stag = stag)
683 | 
684 |         scores_pos = tf.matmul(h, pos)
685 |         scores_neg = tf.matmul(h, neg)
686 |         joint = tf.linalg.diag_part(score_pos)
687 |         '''
688 |         pos = tf.concat([h_states, click_seq], axis=2)
689 |         f_pos = self.mlp(pos) # [batch, seq, 1]
690 | 
691 |         K = 99
692 |         neg = self.neg_sample(noclick_seq, K)
693 |         h_states_tiled = tf.tile(tf.expand_dims(h_states, 2), [1,1,K,1]) # [batch, seq, K, dim]
694 |         total = tf.concat([h_states_tiled, neg], axis=3)
695 |         f_neg = self.mlp(total) #[batch, seq, K, 1]
696 |         f_neg = tf.reduce_sum(f_neg, axis=2)
697 |         f_total = f_pos + f_neg
698 | 
699 |         loss_ = tf.reshape(tf.log(f_pos / f_total), [-1, tf.shape(click_seq)[1]]) * mask
700 |         loss_ = - tf.reduce_mean(loss_) 
701 | 
702 |         return loss_
703 |     
704 |     def mi_loss(self, h_states, click_seq, noclick_seq,  mask, stag='NCE'):
705 |         exp = 'random_1'
706 |         if exp == 'random_1':
707 |             shape = tf.shape(h_states)
708 |             batch, len_seq, dim = shape[0], shape[1], shape[2]
709 |             Wk_ct = []
710 |             x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc')
711 |             x = tf.unstack(x, axis=1)
712 |             neg = tf.layers.dense(noclick_seq, 256, activation=None, name='neg_enc')
713 |             neg = tf.unstack(neg, axis=1)
714 |             c_t = tf.unstack(h_states, axis=1)
715 |             with tf.name_scope(stag):
716 |                 for i in range(len(c_t)):
717 |                     Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i)))
718 |             #nce = 0        
719 |             nce = []
720 |             for i in range(len(c_t)):
721 |                 s_p = tf.reduce_sum(x[i] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1]
722 |                 s_n = tf.reduce_sum(neg[i] * Wk_ct[i], axis=1, keep_dims=True)
723 |                 score = tf.concat([s_p, s_n], axis=1)
724 |                 score = tf.nn.log_softmax(tf.exp(score), dim=1)
725 |                 score = tf.reshape(score[:, 0], [-1])
726 |                 nce.append(score)
727 |             nce = tf.stack(nce, axis=1) * mask
728 |             nce = tf.reduce_sum(nce)
729 |             nce /= -1.0 * tf.cast(batch*len_seq, tf.float32)
730 |             return nce
731 |         elif exp == 'random_all':
732 |             shape = tf.shape(h_states)
733 |             batch, len_seq, dim = shape[0], shape[1], shape[2]
734 |             Wk_ct = []
735 |             x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc')
736 |             x = tf.unstack(x, axis=1)
737 |             neg = tf.layers.dense(noclick_seq, 256, activation=None, name='neg_enc')
738 |             neg = tf.unstack(neg, axis=1)
739 |             c_t = tf.unstack(h_states, axis=1)
740 |             with tf.name_scope(stag):
741 |                 for i in range(len(c_t)):
742 |                     Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i)))
743 |             nce = []
744 |             for i in range(len(c_t)):
745 |                 s_p = tf.reduce_sum(x[i] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1]
746 |                 s_n = []
747 |                 for j in range(len(neg)):
748 |                     s_n.append(tf.reduce_sum(neg[j] * Wk_ct[i], axis=1, keep_dims=True))
749 |                 score = tf.concat([s_p] + s_n, axis=1)
750 |                 score = tf.nn.log_softmax(tf.exp(score), dim=1)
751 |                 score = tf.reshape(score[:, 0], [-1])
752 |                 nce.append(score)
753 |             nce = tf.stack(nce, axis=1) * mask
754 |             nce = tf.reduce_sum(nce)
755 |             nce /= -1.0 * tf.cast(batch*len_seq, tf.float32)
756 |             return nce
757 | 
758 |         elif exp == 'batch_1':
759 |             shape = tf.shape(click_seq)
760 |             batch, len_seq, dim = shape[0], shape[1], shape[2]
761 |             x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc')
762 |             x = tf.unstack(x, axis=1)
763 |             c_t = tf.unstack(h_states, axis=1)
764 |             # different W for every step
765 |             rand_idx = 12
766 |             Wk_ct = []
767 |             with tf.name_scope(stag):
768 |                 for i in range(len(c_t)):
769 |                     Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i)))
770 |             nce = []
771 |             for i in range(len(c_t)):
772 |                 x_i = tf.tile(x[i], [2,1])
773 |                 s_p = tf.reduce_sum(x_i[0:128, :] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1]
774 |                 s_n = tf.reduce_sum(x_i[rand_idx:rand_idx+128] * Wk_ct[i], axis=1, keep_dims=True) # shape=[batch,1]
775 |                 score = tf.concat([s_p, s_n], axis=1)
776 |                 score = tf.nn.log_softmax(tf.exp(score), dim=1) # softmax over batch
777 |                 score = tf.reshape(score[:, 0], [-1])
778 |                 nce.append(score)
779 |             nce =tf.stack(nce, axis=1) * mask
780 |             nce = tf.reduce_sum(nce)
781 |             nce /= -1.0*tf.cast(batch*len_seq, tf.float32)
782 |             return nce
783 | 
784 |         elif exp == 'batch_all':
785 |             shape = tf.shape(click_seq)
786 |             batch, len_seq, dim = shape[0], shape[1], shape[2]
787 |             x = tf.layers.dense(click_seq, 256, activation=None, name='pos_enc')
788 |             x = tf.unstack(x, axis=1)
789 |             c_t = tf.unstack(h_states, axis=1)
790 |             # different W for every step
791 |             Wk_ct = []
792 |             with tf.name_scope(stag):
793 |                 for i in range(len(c_t)):
794 |                     Wk_ct.append(tf.layers.dense(c_t[i], 256, activation=None, name='W{}'.format(i)))
795 |             nce = []
796 |             for i in range(len(c_t)):
797 |                 score = tf.exp(tf.matmul(x[i], tf.transpose(Wk_ct[i])))
798 |                 score = tf.nn.log_softmax(score, dim=0) # softmax over batch
799 |                 nce.append(tf.linalg.diag_part(score))
800 |                 #nce += tf.reduce_sum(tf.linalg.diag_part(score))
801 |             nce = tf.stack(nce, axis=1)  * mask
802 |             nce = tf.reduce_sum(nce)
803 |             nce /= -1.0*tf.cast(batch*len_seq, tf.float32)
804 |             return nce
805 | 
806 | 
807 |     def mlp(self, in_, stag='mlp'):
808 |         bn1 = tf.layers.batch_normalization(inputs=in_, name='bn1' + stag, reuse=tf.AUTO_REUSE)
809 |         dnn1 = tf.layers.dense(bn1, 1024, activation=None, name='f1' + stag, reuse=tf.AUTO_REUSE)
810 |         dnn2 = tf.layers.dense(dnn1, 512, activation=None, name='f2' + stag, reuse=tf.AUTO_REUSE)
811 |         dnn3 = tf.layers.dense(dnn2, 256, activation=None, name='f3' + stag, reuse=tf.AUTO_REUSE)
812 |         return dnn3
813 |         '''
814 |         dnn4 = tf.layers.dense(dnn3, 1, activation=None, name='f4' + stag, reuse=tf.AUTO_REUSE)
815 |         dnn4 = tf.nn.sigmoid(dnn4)
816 |         return dnn4
817 |         y_hat = tf.nn.softmax(dnn3) + 0.000001
818 |         return y_hat
819 |         '''
820 | 
821 |     def auxiliary_loss(self, h_states, click_seq, noclick_seq, clk_mask=None, noclk_mask=None, stag=None):
822 |         if noclk_mask is None:
823 |             noclk_mask = clk_mask
824 |         # postive 
825 |         click_input = tf.concat([h_states, click_seq], -1)
826 |         click_prop = self.auxiliary_net(click_input, stag = stag)[:, :, 0]
827 |         click_loss = - tf.reshape(tf.log(click_prop), [-1, tf.shape(click_seq)[1]]) * clk_mask
828 |         
829 |         # negative
830 |         exp = 'random_1'
831 |         if exp =='random_1':
832 |             return super(Model_DIEN, self).auxiliary_loss(h_states, click_seq, noclick_seq, clk_mask, noclk_mask, stag)
833 |         elif exp == 'random_all':
834 |             batch = 99
835 |             noclick_seq_ = tf.tile(noclick_seq, [1,2,1]) # shape = [batch, 2 * seq, dim] for sliding window
836 |             noclick_input = []
837 |             for i in range(99):
838 |                 noclick_input.append(tf.concat([h_states, noclick_seq_[:, i:i+batch, :]], axis=-1))
839 |             noclick_input = tf.concat(noclick_input, axis=0)
840 |             mask = tf.tile(mask, [batch, 1])
841 |         elif exp == 'batch_1':
842 |             batch = 128
843 |             h_states = tf.unstack(h_states, axis=1)
844 |             click_seq = tf.unstack(click_seq, axis=1)
845 |             noclick_input = []
846 |             rand_idx = 12
847 |             for i in range(len(click_seq)):
848 |                 h = h_states[i] # seq i of the batch, shape = [batch, dim]
849 |                 c = click_seq[i]
850 |                 c = tf.tile(c, [2, 1]) # sliding window
851 |                 noclick_input.append(tf.concat([h, c[rand_idx:rand_idx+batch,:]], axis=1))
852 |             noclick_input = tf.stack(noclick_input, axis=1)
853 |         elif exp == 'batch_all':
854 |             batch = 128
855 |             h_states = tf.unstack(h_states, axis=1)
856 |             click_seq = tf.unstack(click_seq, axis=1)
857 |             noclick_input = []
858 |             for i in range(len(click_seq)):
859 |                 h = h_states[i] # seq i of the batch, shape = [batch, dim]
860 |                 c = click_seq[i]
861 |                 c = tf.tile(c, [2, 1]) # sliding window
862 |                 neg = []
863 |                 for i in range(1, batch):
864 |                     neg.append(tf.concat([h, c[i:i+batch,:]], axis=1))
865 |                 noclick_input.append(tf.concat(neg, axis=0))
866 |             noclick_input = tf.stack(noclick_input, axis=1)
867 |             mask = tf.tile(mask, [batch-1, 1])
868 | 
869 |         noclick_prop = self.auxiliary_net(noclick_input, stag = stag)[:, :, 0]
870 |         noclick_loss = - tf.reshape(tf.log(1.0 - noclick_prop), [-1, tf.shape(noclick_seq)[1]])  * mask
871 |         loss_ = tf.reduce_mean(click_loss) + tf.reduce_mean(noclick_loss)
872 |         return loss_
873 | 
874 |     def aux_batch(self, h_states, click_seq, noclick_seq, mask = None, stag = None):
875 |         #mask = tf.cast(mask, tf.float32)
876 |         # batch = tf.shape(h_states)[0]
877 |         batch = 128
878 |         click_input_ = tf.concat([h_states, click_seq], -1)
879 |         h_states_ = tf.unstack(h_states, axis=1)
880 |         click_seq_ = tf.unstack(click_seq, axis=1)
881 |         neg_input_total = []
882 |         for i in range(len(click_seq_)):
883 |             h = h_states_[i] # seq i of the batch [batch, dim]
884 |             c = click_seq_[i]
885 |             c = tf.tile(c, [2, 1]) # sliding window
886 |             neg = []
887 |             for i in range(1, batch):
888 |                 neg.append(tf.concat([h, c[i:i+batch,:]], axis=1))
889 |             neg_input_total.append(tf.concat(neg, axis=0))
890 |         noclick_input_ = tf.stack(neg_input_total, axis=1)
891 |         #noclick_input_ = tf.concat([h_states, noclick_seq], -1)
892 |         click_prop_ = self.auxiliary_net(click_input_, stag = stag)[:, :, 0]
893 |         noclick_prop_ = self.auxiliary_net(noclick_input_, stag = stag)[:, :, 0]
894 | 
895 |         click_loss_ = - tf.reshape(tf.log(click_prop_), [-1, tf.shape(click_seq)[1]]) * mask
896 |         mask = tf.tile(mask, [batch-1, 1])
897 |         noclick_loss_ = - tf.reshape(tf.log(1.0 - noclick_prop_), [-1, tf.shape(noclick_seq)[1]]) * mask
898 | 
899 |         #loss_ = tf.reduce_mean(click_loss_ + noclick_loss_)
900 |         loss_ = tf.reduce_mean(click_loss_) + tf.reduce_mean(noclick_loss_)
901 |         return loss_
902 | 
903 | 
904 |        
905 |         
906 | class Model_MIMN(Model):
907 |     def __init__(self, n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, BATCH_SIZE, MEMORY_SIZE, SEQ_LEN=400, Mem_Induction=0, Util_Reg=0, use_negsample=False, mask_flag=False):
908 |         super(Model_MIMN, self).__init__(n_uid, n_mid, EMBEDDING_DIM, HIDDEN_SIZE, 
909 |                                            BATCH_SIZE, SEQ_LEN, use_negsample, Flag="MIMN")
910 |         self.reg = Util_Reg
911 | 
912 |         def clear_mask_state(state, begin_state, begin_channel_rnn_state, mask, cell, t):
913 |             state["controller_state"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1))) * begin_state["controller_state"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1)) * state["controller_state"]
914 |             state["M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["M"]
915 |             state["key_M"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["key_M"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["key_M"]
916 |             state["sum_aggre"] = (1-tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1))) * begin_state["sum_aggre"] + tf.reshape(mask[:,t], (BATCH_SIZE, 1, 1)) * state["sum_aggre"]
917 |             if Mem_Induction > 0:
918 |                 temp_channel_rnn_state = []
919 |                 for i in range(MEMORY_SIZE):
920 |                     temp_channel_rnn_state.append(cell.channel_rnn_state[i] * tf.expand_dims(mask[:,t], axis=1) + begin_channel_rnn_state[i]*(1- tf.expand_dims(mask[:,t], axis=1)))
921 |                 cell.channel_rnn_state = temp_channel_rnn_state
922 |                 temp_channel_rnn_output = []
923 |                 for i in range(MEMORY_SIZE):
924 |                     temp_output = cell.channel_rnn_output[i] * tf.expand_dims(mask[:,t], axis=1) + begin_channel_rnn_output[i]*(1- tf.expand_dims(self.mask[:,t], axis=1))
925 |                     temp_channel_rnn_output.append(temp_output)
926 |                 cell.channel_rnn_output = temp_channel_rnn_output
927 | 
928 |             return state
929 |       
930 |         cell = mimn.MIMNCell(controller_units=HIDDEN_SIZE, memory_size=MEMORY_SIZE, memory_vector_dim=2*EMBEDDING_DIM,read_head_num=1, write_head_num=1,
931 |             reuse=False, output_dim=HIDDEN_SIZE, clip_value=20, batch_size=BATCH_SIZE, mem_induction=Mem_Induction, util_reg=Util_Reg)
932 |         
933 |         state = cell.zero_state(BATCH_SIZE, tf.float32)
934 |         if Mem_Induction > 0:
935 |             begin_channel_rnn_output = cell.channel_rnn_output
936 |         else:
937 |             begin_channel_rnn_output = 0.0
938 |         
939 |         begin_state = state
940 |         self.state_list = [state]
941 |         self.mimn_o = []
942 |         for t in range(SEQ_LEN):
943 |             output, state, temp_output_list = cell(self.item_his_eb[:, t, :], state)
944 |             if mask_flag:
945 |                 state = clear_mask_state(state, begin_state, begin_channel_rnn_output, self.mask, cell, t)
946 |             self.mimn_o.append(output)
947 |             self.state_list.append(state)
948 |                 
949 |         self.mimn_o = tf.stack(self.mimn_o, axis=1)
950 |         self.state_list.append(state)
951 |         mean_memory = tf.reduce_mean(state['sum_aggre'], axis=-2)
952 | 
953 |         before_aggre = state['w_aggre']
954 |         read_out, _, _ = cell(self.item_eb, state)
955 |         
956 |         if use_negsample:
957 |             aux_loss_1 = self.auxiliary_loss(self.mimn_o[:, :-1, :], self.item_his_eb[:, 1:, :],
958 |                                              self.neg_his_eb[:, 1:, :], self.mask[:, 1:], stag = "bigru_0")
959 |             self.aux_loss = aux_loss_1  
960 | 
961 |         if self.reg:
962 |             self.reg_loss = cell.capacity_loss(before_aggre)
963 |         else:
964 |             self.reg_loss = tf.zeros(1)
965 | 
966 |         if Mem_Induction == 1:
967 |             channel_memory_tensor = tf.concat(temp_output_list, 1)            
968 |             multi_channel_hist = din_attention(self.item_eb, channel_memory_tensor, HIDDEN_SIZE, None, stag='pal')
969 |             inp = tf.concat([self.item_eb, self.item_his_eb_sum, read_out, tf.squeeze(multi_channel_hist), mean_memory*self.item_eb], 1)
970 |         else:
971 |             inp = tf.concat([self.item_eb, self.item_his_eb_sum, read_out, mean_memory*self.item_eb], 1)
972 | 
973 |         self.build_fcn_net(inp, use_dice=False) 
974 | 


--------------------------------------------------------------------------------
/script/process_data.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import random
  3 | import time
  4 | 
  5 | def process_meta(file):
  6 |     fi = open(file, "r")
  7 |     fo = open("item-info", "w")
  8 |     for line in fi:
  9 |         obj = eval(line)
 10 |         cat = obj["categories"][0][-1]
 11 |         print>>fo, obj["asin"] + "\t" + cat
 12 | 
 13 | def process_reviews(file):
 14 |     fi = open(file, "r")
 15 |     user_map = {}
 16 |     fo = open("reviews-info", "w")
 17 |     for line in fi:
 18 |         obj = eval(line)
 19 |         userID = obj["reviewerID"]
 20 |         itemID = obj["asin"]
 21 |         rating = obj["overall"]
 22 |         time = obj["unixReviewTime"]
 23 |         print>>fo, userID + "\t" + itemID + "\t" + str(rating) + "\t" + str(time)
 24 | 
 25 | def manual_join():
 26 |     f_rev = open("reviews-info", "r")
 27 |     user_map = {}
 28 |     item_list = []
 29 |     for line in f_rev:
 30 |         line = line.strip()
 31 |         items = line.split("\t")
 32 |         #loctime = time.localtime(float(items[-1]))
 33 |         #items[-1] = time.strftime('%Y-%m-%d', loctime)
 34 |         if items[0] not in user_map:
 35 |             user_map[items[0]]= []
 36 |         user_map[items[0]].append(("\t".join(items), float(items[-1])))
 37 |         item_list.append(items[1])
 38 |     f_meta = open("item-info", "r")
 39 |     meta_map = {}
 40 |     for line in f_meta:
 41 |         arr = line.strip().split("\t")
 42 |         if arr[0] not in meta_map:
 43 |             meta_map[arr[0]] = arr[1]
 44 |             arr = line.strip().split("\t")
 45 |     fo = open("jointed-new", "w")
 46 |     for key in user_map:
 47 |         sorted_user_bh = sorted(user_map[key], key=lambda x:x[1])
 48 |         for line, t in sorted_user_bh:
 49 |             items = line.split("\t")
 50 |             asin = items[1]
 51 |             j = 0
 52 |             while True:
 53 |                 asin_neg_index = random.randint(0, len(item_list) - 1)
 54 |                 asin_neg = item_list[asin_neg_index]
 55 |                 if asin_neg == asin:
 56 |                     continue 
 57 |                 items[1] = asin_neg
 58 |                 print>>fo, "0" + "\t" + "\t".join(items) + "\t" + meta_map[asin_neg]
 59 |                 j += 1
 60 |                 if j == 1:             #negative sampling frequency
 61 |                     break
 62 |             if asin in meta_map:
 63 |                 print>>fo, "1" + "\t" + line + "\t" + meta_map[asin]
 64 |             else:
 65 |                 print>>fo, "1" + "\t" + line + "\t" + "default_cat"
 66 | 
 67 | 
 68 | def split_test():
 69 |     fi = open("jointed-new", "r")
 70 |     fo = open("jointed-new-split-info", "w")
 71 |     user_count = {}
 72 |     for line in fi:
 73 |         line = line.strip()
 74 |         user = line.split("\t")[1]
 75 |         if user not in user_count:
 76 |             user_count[user] = 0
 77 |         user_count[user] += 1
 78 |     fi.seek(0)
 79 |     i = 0
 80 |     last_user = "A26ZDKC53OP6JD"
 81 |     for line in fi:
 82 |         line = line.strip()
 83 |         user = line.split("\t")[1]
 84 |         if user == last_user:
 85 |             if i < user_count[user] - 2:  # 1 + negative samples
 86 |                 print>> fo, "20180118" + "\t" + line
 87 |             else:
 88 |                 print>>fo, "20190119" + "\t" + line
 89 |         else:
 90 |             last_user = user
 91 |             i = 0
 92 |             if i < user_count[user] - 2:
 93 |                 print>> fo, "20180118" + "\t" + line
 94 |             else:
 95 |                 print>>fo, "20190119" + "\t" + line
 96 |         i += 1
 97 | 
 98 | process_meta(sys.argv[1])
 99 | process_reviews(sys.argv[2])
100 | manual_join()
101 | split_test()
102 | 


--------------------------------------------------------------------------------
/script/rnn.py:
--------------------------------------------------------------------------------
   1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
   2 | #
   3 | # Licensed under the Apache License, Version 2.0 (the "License");
   4 | # you may not use this file except in compliance with the License.
   5 | # You may obtain a copy of the License at
   6 | #
   7 | #     http://www.apache.org/licenses/LICENSE-2.0
   8 | #
   9 | # Unless required by applicable law or agreed to in writing, software
  10 | # distributed under the License is distributed on an "AS IS" BASIS,
  11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 | # See the License for the specific language governing permissions and
  13 | # limitations under the License.
  14 | # ==============================================================================
  15 | 
  16 | """RNN helpers for TensorFlow models.
  17 | 
  18 | 
  19 | @@bidirectional_dynamic_rnn
  20 | @@dynamic_rnn
  21 | @@raw_rnn
  22 | @@static_rnn
  23 | @@static_state_saving_rnn
  24 | @@static_bidirectional_rnn
  25 | """
  26 | from __future__ import absolute_import
  27 | from __future__ import division
  28 | from __future__ import print_function
  29 | 
  30 | from tensorflow.python.framework import constant_op
  31 | from tensorflow.python.framework import dtypes
  32 | from tensorflow.python.framework import ops
  33 | from tensorflow.python.framework import tensor_shape
  34 | from tensorflow.python.ops import array_ops
  35 | from tensorflow.python.ops import control_flow_ops
  36 | from tensorflow.python.ops import math_ops
  37 | from tensorflow.python.ops import rnn_cell_impl
  38 | from tensorflow.python.ops import tensor_array_ops
  39 | from tensorflow.python.ops import variable_scope as vs
  40 | from tensorflow.python.util import nest
  41 | 
  42 | 
  43 | # pylint: disable=protected-access
  44 | _concat = rnn_cell_impl._concat
  45 | _like_rnncell = rnn_cell_impl._like_rnncell
  46 | # pylint: enable=protected-access
  47 | 
  48 | 
  49 | def _transpose_batch_time(x):
  50 |   """Transpose the batch and time dimensions of a Tensor.
  51 | 
  52 |   Retains as much of the static shape information as possible.
  53 | 
  54 |   Args:
  55 |     x: A tensor of rank 2 or higher.
  56 | 
  57 |   Returns:
  58 |     x transposed along the first two dimensions.
  59 | 
  60 |   Raises:
  61 |     ValueError: if `x` is rank 1 or lower.
  62 |   """
  63 |   x_static_shape = x.get_shape()
  64 |   if x_static_shape.ndims is not None and x_static_shape.ndims < 2:
  65 |     raise ValueError(
  66 |         "Expected input tensor %s to have rank at least 2, but saw shape: %s" %
  67 |         (x, x_static_shape))
  68 |   x_rank = array_ops.rank(x)
  69 |   x_t = array_ops.transpose(
  70 |       x, array_ops.concat(
  71 |           ([1, 0], math_ops.range(2, x_rank)), axis=0))
  72 |   x_t.set_shape(
  73 |       tensor_shape.TensorShape([
  74 |           x_static_shape[1].value, x_static_shape[0].value
  75 |       ]).concatenate(x_static_shape[2:]))
  76 |   return x_t
  77 | 
  78 | 
  79 | def _best_effort_input_batch_size(flat_input):
  80 |   """Get static input batch size if available, with fallback to the dynamic one.
  81 | 
  82 |   Args:
  83 |     flat_input: An iterable of time major input Tensors of shape [max_time,
  84 |       batch_size, ...]. All inputs should have compatible batch sizes.
  85 | 
  86 |   Returns:
  87 |     The batch size in Python integer if available, or a scalar Tensor otherwise.
  88 | 
  89 |   Raises:
  90 |     ValueError: if there is any input with an invalid shape.
  91 |   """
  92 |   for input_ in flat_input:
  93 |     shape = input_.shape
  94 |     if shape.ndims is None:
  95 |       continue
  96 |     if shape.ndims < 2:
  97 |       raise ValueError(
  98 |           "Expected input tensor %s to have rank at least 2" % input_)
  99 |     batch_size = shape[1].value
 100 |     if batch_size is not None:
 101 |       return batch_size
 102 |   # Fallback to the dynamic batch size of the first input.
 103 |   return array_ops.shape(flat_input[0])[1]
 104 | 
 105 | 
 106 | def _infer_state_dtype(explicit_dtype, state):
 107 |   """Infer the dtype of an RNN state.
 108 | 
 109 |   Args:
 110 |     explicit_dtype: explicitly declared dtype or None.
 111 |     state: RNN's hidden state. Must be a Tensor or a nested iterable containing
 112 |       Tensors.
 113 | 
 114 |   Returns:
 115 |     dtype: inferred dtype of hidden state.
 116 | 
 117 |   Raises:
 118 |     ValueError: if `state` has heterogeneous dtypes or is empty.
 119 |   """
 120 |   if explicit_dtype is not None:
 121 |     return explicit_dtype
 122 |   elif nest.is_sequence(state):
 123 |     inferred_dtypes = [element.dtype for element in nest.flatten(state)]
 124 |     if not inferred_dtypes:
 125 |       raise ValueError("Unable to infer dtype from empty state.")
 126 |     all_same = all([x == inferred_dtypes[0] for x in inferred_dtypes])
 127 |     if not all_same:
 128 |       raise ValueError(
 129 |           "State has tensors of different inferred_dtypes. Unable to infer a "
 130 |           "single representative dtype.")
 131 |     return inferred_dtypes[0]
 132 |   else:
 133 |     return state.dtype
 134 | 
 135 | 
 136 | # pylint: disable=unused-argument
 137 | def _rnn_step(
 138 |     time, sequence_length, min_sequence_length, max_sequence_length,
 139 |     zero_output, state, call_cell, state_size, skip_conditionals=False):
 140 |   """Calculate one step of a dynamic RNN minibatch.
 141 | 
 142 |   Returns an (output, state) pair conditioned on the sequence_lengths.
 143 |   When skip_conditionals=False, the pseudocode is something like:
 144 | 
 145 |   if t >= max_sequence_length:
 146 |     return (zero_output, state)
 147 |   if t < min_sequence_length:
 148 |     return call_cell()
 149 | 
 150 |   # Selectively output zeros or output, old state or new state depending
 151 |   # on if we've finished calculating each row.
 152 |   new_output, new_state = call_cell()
 153 |   final_output = np.vstack([
 154 |     zero_output if time >= sequence_lengths[r] else new_output_r
 155 |     for r, new_output_r in enumerate(new_output)
 156 |   ])
 157 |   final_state = np.vstack([
 158 |     state[r] if time >= sequence_lengths[r] else new_state_r
 159 |     for r, new_state_r in enumerate(new_state)
 160 |   ])
 161 |   return (final_output, final_state)
 162 | 
 163 |   Args:
 164 |     time: Python int, the current time step
 165 |     sequence_length: int32 `Tensor` vector of size [batch_size]
 166 |     min_sequence_length: int32 `Tensor` scalar, min of sequence_length
 167 |     max_sequence_length: int32 `Tensor` scalar, max of sequence_length
 168 |     zero_output: `Tensor` vector of shape [output_size]
 169 |     state: Either a single `Tensor` matrix of shape `[batch_size, state_size]`,
 170 |       or a list/tuple of such tensors.
 171 |     call_cell: lambda returning tuple of (new_output, new_state) where
 172 |       new_output is a `Tensor` matrix of shape `[batch_size, output_size]`.
 173 |       new_state is a `Tensor` matrix of shape `[batch_size, state_size]`.
 174 |     state_size: The `cell.state_size` associated with the state.
 175 |     skip_conditionals: Python bool, whether to skip using the conditional
 176 |       calculations.  This is useful for `dynamic_rnn`, where the input tensor
 177 |       matches `max_sequence_length`, and using conditionals just slows
 178 |       everything down.
 179 | 
 180 |   Returns:
 181 |     A tuple of (`final_output`, `final_state`) as given by the pseudocode above:
 182 |       final_output is a `Tensor` matrix of shape [batch_size, output_size]
 183 |       final_state is either a single `Tensor` matrix, or a tuple of such
 184 |         matrices (matching length and shapes of input `state`).
 185 | 
 186 |   Raises:
 187 |     ValueError: If the cell returns a state tuple whose length does not match
 188 |       that returned by `state_size`.
 189 |   """
 190 | 
 191 |   # Convert state to a list for ease of use
 192 |   flat_state = nest.flatten(state)
 193 |   flat_zero_output = nest.flatten(zero_output)
 194 | 
 195 |   def _copy_one_through(output, new_output):
 196 |     # If the state contains a scalar value we simply pass it through.
 197 |     if output.shape.ndims == 0:
 198 |       return new_output
 199 |     copy_cond = (time >= sequence_length)
 200 |     with ops.colocate_with(new_output):
 201 |       return array_ops.where(copy_cond, output, new_output)
 202 | 
 203 |   def _copy_some_through(flat_new_output, flat_new_state):
 204 |     # Use broadcasting select to determine which values should get
 205 |     # the previous state & zero output, and which values should get
 206 |     # a calculated state & output.
 207 |     flat_new_output = [
 208 |         _copy_one_through(zero_output, new_output)
 209 |         for zero_output, new_output in zip(flat_zero_output, flat_new_output)]
 210 |     flat_new_state = [
 211 |         _copy_one_through(state, new_state)
 212 |         for state, new_state in zip(flat_state, flat_new_state)]
 213 |     return flat_new_output + flat_new_state
 214 | 
 215 |   def _maybe_copy_some_through():
 216 |     """Run RNN step.  Pass through either no or some past state."""
 217 |     new_output, new_state = call_cell()
 218 | 
 219 |     nest.assert_same_structure(state, new_state)
 220 | 
 221 |     flat_new_state = nest.flatten(new_state)
 222 |     flat_new_output = nest.flatten(new_output)
 223 |     return control_flow_ops.cond(
 224 |         # if t < min_seq_len: calculate and return everything
 225 |         time < min_sequence_length, lambda: flat_new_output + flat_new_state,
 226 |         # else copy some of it through
 227 |         lambda: _copy_some_through(flat_new_output, flat_new_state))
 228 | 
 229 |   # TODO(ebrevdo): skipping these conditionals may cause a slowdown,
 230 |   # but benefits from removing cond() and its gradient.  We should
 231 |   # profile with and without this switch here.
 232 |   if skip_conditionals:
 233 |     # Instead of using conditionals, perform the selective copy at all time
 234 |     # steps.  This is faster when max_seq_len is equal to the number of unrolls
 235 |     # (which is typical for dynamic_rnn).
 236 |     new_output, new_state = call_cell()
 237 |     nest.assert_same_structure(state, new_state)
 238 |     new_state = nest.flatten(new_state)
 239 |     new_output = nest.flatten(new_output)
 240 |     final_output_and_state = _copy_some_through(new_output, new_state)
 241 |   else:
 242 |     empty_update = lambda: flat_zero_output + flat_state
 243 |     final_output_and_state = control_flow_ops.cond(
 244 |         # if t >= max_seq_len: copy all state through, output zeros
 245 |         time >= max_sequence_length, empty_update,
 246 |         # otherwise calculation is required: copy some or all of it through
 247 |         _maybe_copy_some_through)
 248 | 
 249 |   if len(final_output_and_state) != len(flat_zero_output) + len(flat_state):
 250 |     raise ValueError("Internal error: state and output were not concatenated "
 251 |                      "correctly.")
 252 |   final_output = final_output_and_state[:len(flat_zero_output)]
 253 |   final_state = final_output_and_state[len(flat_zero_output):]
 254 | 
 255 |   for output, flat_output in zip(final_output, flat_zero_output):
 256 |     output.set_shape(flat_output.get_shape())
 257 |   for substate, flat_substate in zip(final_state, flat_state):
 258 |     substate.set_shape(flat_substate.get_shape())
 259 | 
 260 |   final_output = nest.pack_sequence_as(
 261 |       structure=zero_output, flat_sequence=final_output)
 262 |   final_state = nest.pack_sequence_as(
 263 |       structure=state, flat_sequence=final_state)
 264 | 
 265 |   return final_output, final_state
 266 | 
 267 | 
 268 | def _reverse_seq(input_seq, lengths):
 269 |   """Reverse a list of Tensors up to specified lengths.
 270 | 
 271 |   Args:
 272 |     input_seq: Sequence of seq_len tensors of dimension (batch_size, n_features)
 273 |                or nested tuples of tensors.
 274 |     lengths:   A `Tensor` of dimension batch_size, containing lengths for each
 275 |                sequence in the batch. If "None" is specified, simply reverses
 276 |                the list.
 277 | 
 278 |   Returns:
 279 |     time-reversed sequence
 280 |   """
 281 |   if lengths is None:
 282 |     return list(reversed(input_seq))
 283 | 
 284 |   flat_input_seq = tuple(nest.flatten(input_) for input_ in input_seq)
 285 | 
 286 |   flat_results = [[] for _ in range(len(input_seq))]
 287 |   for sequence in zip(*flat_input_seq):
 288 |     input_shape = tensor_shape.unknown_shape(
 289 |         ndims=sequence[0].get_shape().ndims)
 290 |     for input_ in sequence:
 291 |       input_shape.merge_with(input_.get_shape())
 292 |       input_.set_shape(input_shape)
 293 | 
 294 |     # Join into (time, batch_size, depth)
 295 |     s_joined = array_ops.stack(sequence)
 296 | 
 297 |     # Reverse along dimension 0
 298 |     s_reversed = array_ops.reverse_sequence(s_joined, lengths, 0, 1)
 299 |     # Split again into list
 300 |     result = array_ops.unstack(s_reversed)
 301 |     for r, flat_result in zip(result, flat_results):
 302 |       r.set_shape(input_shape)
 303 |       flat_result.append(r)
 304 | 
 305 |   results = [nest.pack_sequence_as(structure=input_, flat_sequence=flat_result)
 306 |              for input_, flat_result in zip(input_seq, flat_results)]
 307 |   return results
 308 | 
 309 | 
 310 | def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
 311 |                               initial_state_fw=None, initial_state_bw=None,
 312 |                               dtype=None, parallel_iterations=None,
 313 |                               swap_memory=False, time_major=False, scope=None):
 314 |   """Creates a dynamic version of bidirectional recurrent neural network.
 315 | 
 316 |   Takes input and builds independent forward and backward RNNs. The input_size
 317 |   of forward and backward cell must match. The initial state for both directions
 318 |   is zero by default (but can be set optionally) and no intermediate states are
 319 |   ever returned -- the network is fully unrolled for the given (passed in)
 320 |   length(s) of the sequence(s) or completely unrolled if length(s) is not
 321 |   given.
 322 | 
 323 |   Args:
 324 |     cell_fw: An instance of RNNCell, to be used for forward direction.
 325 |     cell_bw: An instance of RNNCell, to be used for backward direction.
 326 |     inputs: The RNN inputs.
 327 |       If time_major == False (default), this must be a tensor of shape:
 328 |         `[batch_size, max_time, ...]`, or a nested tuple of such elements.
 329 |       If time_major == True, this must be a tensor of shape:
 330 |         `[max_time, batch_size, ...]`, or a nested tuple of such elements.
 331 |     sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
 332 |       containing the actual lengths for each of the sequences in the batch.
 333 |       If not provided, all batch entries are assumed to be full sequences; and
 334 |       time reversal is applied from time `0` to `max_time` for each sequence.
 335 |     initial_state_fw: (optional) An initial state for the forward RNN.
 336 |       This must be a tensor of appropriate type and shape
 337 |       `[batch_size, cell_fw.state_size]`.
 338 |       If `cell_fw.state_size` is a tuple, this should be a tuple of
 339 |       tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
 340 |     initial_state_bw: (optional) Same as for `initial_state_fw`, but using
 341 |       the corresponding properties of `cell_bw`.
 342 |     dtype: (optional) The data type for the initial states and expected output.
 343 |       Required if initial_states are not provided or RNN states have a
 344 |       heterogeneous dtype.
 345 |     parallel_iterations: (Default: 32).  The number of iterations to run in
 346 |       parallel.  Those operations which do not have any temporal dependency
 347 |       and can be run in parallel, will be.  This parameter trades off
 348 |       time for space.  Values >> 1 use more memory but take less time,
 349 |       while smaller values use less memory but computations take longer.
 350 |     swap_memory: Transparently swap the tensors produced in forward inference
 351 |       but needed for back prop from GPU to CPU.  This allows training RNNs
 352 |       which would typically not fit on a single GPU, with very minimal (or no)
 353 |       performance penalty.
 354 |     time_major: The shape format of the `inputs` and `outputs` Tensors.
 355 |       If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
 356 |       If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
 357 |       Using `time_major = True` is a bit more efficient because it avoids
 358 |       transposes at the beginning and end of the RNN calculation.  However,
 359 |       most TensorFlow data is batch-major, so by default this function
 360 |       accepts input and emits output in batch-major form.
 361 |     scope: VariableScope for the created subgraph; defaults to
 362 |       "bidirectional_rnn"
 363 | 
 364 |   Returns:
 365 |     A tuple (outputs, output_states) where:
 366 |       outputs: A tuple (output_fw, output_bw) containing the forward and
 367 |         the backward rnn output `Tensor`.
 368 |         If time_major == False (default),
 369 |           output_fw will be a `Tensor` shaped:
 370 |           `[batch_size, max_time, cell_fw.output_size]`
 371 |           and output_bw will be a `Tensor` shaped:
 372 |           `[batch_size, max_time, cell_bw.output_size]`.
 373 |         If time_major == True,
 374 |           output_fw will be a `Tensor` shaped:
 375 |           `[max_time, batch_size, cell_fw.output_size]`
 376 |           and output_bw will be a `Tensor` shaped:
 377 |           `[max_time, batch_size, cell_bw.output_size]`.
 378 |         It returns a tuple instead of a single concatenated `Tensor`, unlike
 379 |         in the `bidirectional_rnn`. If the concatenated one is preferred,
 380 |         the forward and backward outputs can be concatenated as
 381 |         `tf.concat(outputs, 2)`.
 382 |       output_states: A tuple (output_state_fw, output_state_bw) containing
 383 |         the forward and the backward final states of bidirectional rnn.
 384 | 
 385 |   Raises:
 386 |     TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
 387 |   """
 388 | 
 389 |   if not _like_rnncell(cell_fw):
 390 |     raise TypeError("cell_fw must be an instance of RNNCell")
 391 |   if not _like_rnncell(cell_bw):
 392 |     raise TypeError("cell_bw must be an instance of RNNCell")
 393 | 
 394 |   with vs.variable_scope(scope or "bidirectional_rnn"):
 395 |     # Forward direction
 396 |     with vs.variable_scope("fw") as fw_scope:
 397 |       output_fw, output_state_fw = dynamic_rnn(
 398 |           cell=cell_fw, inputs=inputs, sequence_length=sequence_length,
 399 |           initial_state=initial_state_fw, dtype=dtype,
 400 |           parallel_iterations=parallel_iterations, swap_memory=swap_memory,
 401 |           time_major=time_major, scope=fw_scope)
 402 | 
 403 |     # Backward direction
 404 |     if not time_major:
 405 |       time_dim = 1
 406 |       batch_dim = 0
 407 |     else:
 408 |       time_dim = 0
 409 |       batch_dim = 1
 410 | 
 411 |     def _reverse(input_, seq_lengths, seq_dim, batch_dim):
 412 |       if seq_lengths is not None:
 413 |         return array_ops.reverse_sequence(
 414 |             input=input_, seq_lengths=seq_lengths,
 415 |             seq_dim=seq_dim, batch_dim=batch_dim)
 416 |       else:
 417 |         return array_ops.reverse(input_, axis=[seq_dim])
 418 | 
 419 |     with vs.variable_scope("bw") as bw_scope:
 420 |       inputs_reverse = _reverse(
 421 |           inputs, seq_lengths=sequence_length,
 422 |           seq_dim=time_dim, batch_dim=batch_dim)
 423 |       tmp, output_state_bw = dynamic_rnn(
 424 |           cell=cell_bw, inputs=inputs_reverse, sequence_length=sequence_length,
 425 |           initial_state=initial_state_bw, dtype=dtype,
 426 |           parallel_iterations=parallel_iterations, swap_memory=swap_memory,
 427 |           time_major=time_major, scope=bw_scope)
 428 | 
 429 |   output_bw = _reverse(
 430 |       tmp, seq_lengths=sequence_length,
 431 |       seq_dim=time_dim, batch_dim=batch_dim)
 432 | 
 433 |   outputs = (output_fw, output_bw)
 434 |   output_states = (output_state_fw, output_state_bw)
 435 | 
 436 |   return (outputs, output_states)
 437 | 
 438 | 
 439 | def dynamic_rnn(cell, inputs, att_scores=None, sequence_length=None, initial_state=None,
 440 |                 dtype=None, parallel_iterations=None, swap_memory=False,
 441 |                 time_major=False, scope=None):
 442 |   """Creates a recurrent neural network specified by RNNCell `cell`.
 443 | 
 444 |   Performs fully dynamic unrolling of `inputs`.
 445 | 
 446 |   Example:
 447 | 
 448 |   ```python
 449 |   # create a BasicRNNCell
 450 |   rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
 451 | 
 452 |   # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size]
 453 | 
 454 |   # defining initial state
 455 |   initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
 456 | 
 457 |   # 'state' is a tensor of shape [batch_size, cell_state_size]
 458 |   outputs, state = tf.nn.dynamic_rnn(rnn_cell, input_data,
 459 |                                      initial_state=initial_state,
 460 |                                      dtype=tf.float32)
 461 |   ```
 462 | 
 463 |   ```python
 464 |   # create 2 LSTMCells
 465 |   rnn_layers = [tf.nn.rnn_cell.LSTMCell(size) for size in [128, 256]]
 466 | 
 467 |   # create a RNN cell composed sequentially of a number of RNNCells
 468 |   multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
 469 | 
 470 |   # 'outputs' is a tensor of shape [batch_size, max_time, 256]
 471 |   # 'state' is a N-tuple where N is the number of LSTMCells containing a
 472 |   # tf.contrib.rnn.LSTMStateTuple for each cell
 473 |   outputs, state = tf.nn.dynamic_rnn(cell=multi_rnn_cell,
 474 |                                      inputs=data,
 475 |                                      dtype=tf.float32)
 476 |   ```
 477 | 
 478 | 
 479 |   Args:
 480 |     cell: An instance of RNNCell.
 481 |     inputs: The RNN inputs.
 482 |       If `time_major == False` (default), this must be a `Tensor` of shape:
 483 |         `[batch_size, max_time, ...]`, or a nested tuple of such
 484 |         elements.
 485 |       If `time_major == True`, this must be a `Tensor` of shape:
 486 |         `[max_time, batch_size, ...]`, or a nested tuple of such
 487 |         elements.
 488 |       This may also be a (possibly nested) tuple of Tensors satisfying
 489 |       this property.  The first two dimensions must match across all the inputs,
 490 |       but otherwise the ranks and other shape components may differ.
 491 |       In this case, input to `cell` at each time-step will replicate the
 492 |       structure of these tuples, except for the time dimension (from which the
 493 |       time is taken).
 494 |       The input to `cell` at each time step will be a `Tensor` or (possibly
 495 |       nested) tuple of Tensors each with dimensions `[batch_size, ...]`.
 496 |     sequence_length: (optional) An int32/int64 vector sized `[batch_size]`.
 497 |       Used to copy-through state and zero-out outputs when past a batch
 498 |       element's sequence length.  So it's more for correctness than performance.
 499 |     initial_state: (optional) An initial state for the RNN.
 500 |       If `cell.state_size` is an integer, this must be
 501 |       a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
 502 |       If `cell.state_size` is a tuple, this should be a tuple of
 503 |       tensors having shapes `[batch_size, s] for s in cell.state_size`.
 504 |     dtype: (optional) The data type for the initial state and expected output.
 505 |       Required if initial_state is not provided or RNN state has a heterogeneous
 506 |       dtype.
 507 |     parallel_iterations: (Default: 32).  The number of iterations to run in
 508 |       parallel.  Those operations which do not have any temporal dependency
 509 |       and can be run in parallel, will be.  This parameter trades off
 510 |       time for space.  Values >> 1 use more memory but take less time,
 511 |       while smaller values use less memory but computations take longer.
 512 |     swap_memory: Transparently swap the tensors produced in forward inference
 513 |       but needed for back prop from GPU to CPU.  This allows training RNNs
 514 |       which would typically not fit on a single GPU, with very minimal (or no)
 515 |       performance penalty.
 516 |     time_major: The shape format of the `inputs` and `outputs` Tensors.
 517 |       If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
 518 |       If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
 519 |       Using `time_major = True` is a bit more efficient because it avoids
 520 |       transposes at the beginning and end of the RNN calculation.  However,
 521 |       most TensorFlow data is batch-major, so by default this function
 522 |       accepts input and emits output in batch-major form.
 523 |     scope: VariableScope for the created subgraph; defaults to "rnn".
 524 | 
 525 |   Returns:
 526 |     A pair (outputs, state) where:
 527 | 
 528 |     outputs: The RNN output `Tensor`.
 529 | 
 530 |       If time_major == False (default), this will be a `Tensor` shaped:
 531 |         `[batch_size, max_time, cell.output_size]`.
 532 | 
 533 |       If time_major == True, this will be a `Tensor` shaped:
 534 |         `[max_time, batch_size, cell.output_size]`.
 535 | 
 536 |       Note, if `cell.output_size` is a (possibly nested) tuple of integers
 537 |       or `TensorShape` objects, then `outputs` will be a tuple having the
 538 |       same structure as `cell.output_size`, containing Tensors having shapes
 539 |       corresponding to the shape data in `cell.output_size`.
 540 | 
 541 |     state: The final state.  If `cell.state_size` is an int, this
 542 |       will be shaped `[batch_size, cell.state_size]`.  If it is a
 543 |       `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
 544 |       If it is a (possibly nested) tuple of ints or `TensorShape`, this will
 545 |       be a tuple having the corresponding shapes. If cells are `LSTMCells`
 546 |       `state` will be a tuple containing a `LSTMStateTuple` for each cell.
 547 | 
 548 |   Raises:
 549 |     TypeError: If `cell` is not an instance of RNNCell.
 550 |     ValueError: If inputs is None or an empty list.
 551 |   """
 552 |   if not _like_rnncell(cell):
 553 |     raise TypeError("cell must be an instance of RNNCell")
 554 | 
 555 |   # By default, time_major==False and inputs are batch-major: shaped
 556 |   #   [batch, time, depth]
 557 |   # For internal calculations, we transpose to [time, batch, depth]
 558 |   flat_input = nest.flatten(inputs)
 559 | 
 560 |   if not time_major:
 561 |     # (B,T,D) => (T,B,D)
 562 |     flat_input = [ops.convert_to_tensor(input_) for input_ in flat_input]
 563 |     flat_input = tuple(_transpose_batch_time(input_) for input_ in flat_input)
 564 | 
 565 |   parallel_iterations = parallel_iterations or 32
 566 |   if sequence_length is not None:
 567 |     sequence_length = math_ops.to_int32(sequence_length)
 568 |     if sequence_length.get_shape().ndims not in (None, 1):
 569 |       raise ValueError(
 570 |           "sequence_length must be a vector of length batch_size, "
 571 |           "but saw shape: %s" % sequence_length.get_shape())
 572 |     sequence_length = array_ops.identity(  # Just to find it in the graph.
 573 |         sequence_length, name="sequence_length")
 574 | 
 575 |   # Create a new scope in which the caching device is either
 576 |   # determined by the parent scope, or is set to place the cached
 577 |   # Variable using the same placement as for the rest of the RNN.
 578 |   with vs.variable_scope(scope or "rnn") as varscope:
 579 |     if varscope.caching_device is None:
 580 |       varscope.set_caching_device(lambda op: op.device)
 581 |     batch_size = _best_effort_input_batch_size(flat_input)
 582 | 
 583 |     if initial_state is not None:
 584 |       state = initial_state
 585 |     else:
 586 |       if not dtype:
 587 |         raise ValueError("If there is no initial_state, you must give a dtype.")
 588 |       state = cell.zero_state(batch_size, dtype)
 589 | 
 590 |     def _assert_has_shape(x, shape):
 591 |       x_shape = array_ops.shape(x)
 592 |       packed_shape = array_ops.stack(shape)
 593 |       return control_flow_ops.Assert(
 594 |           math_ops.reduce_all(math_ops.equal(x_shape, packed_shape)),
 595 |           ["Expected shape for Tensor %s is " % x.name,
 596 |            packed_shape, " but saw shape: ", x_shape])
 597 | 
 598 |     if sequence_length is not None:
 599 |       # Perform some shape validation
 600 |       with ops.control_dependencies(
 601 |           [_assert_has_shape(sequence_length, [batch_size])]):
 602 |         sequence_length = array_ops.identity(
 603 |             sequence_length, name="CheckSeqLen")
 604 | 
 605 |     inputs = nest.pack_sequence_as(structure=inputs, flat_sequence=flat_input)
 606 | 
 607 |     (outputs, final_state) = _dynamic_rnn_loop(
 608 |         cell,
 609 |         inputs,
 610 |         state,
 611 |         parallel_iterations=parallel_iterations,
 612 |         swap_memory=swap_memory,
 613 |         att_scores = att_scores,
 614 |         sequence_length=sequence_length,
 615 |         dtype=dtype)
 616 | 
 617 |     # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].
 618 |     # If we are performing batch-major calculations, transpose output back
 619 |     # to shape [batch, time, depth]
 620 |     if not time_major:
 621 |       # (T,B,D) => (B,T,D)
 622 |       outputs = nest.map_structure(_transpose_batch_time, outputs)
 623 | 
 624 |     return (outputs, final_state)
 625 | 
 626 | 
 627 | def _dynamic_rnn_loop(cell,
 628 |                       inputs,
 629 |                       initial_state,
 630 |                       parallel_iterations,
 631 |                       swap_memory,
 632 |                       att_scores = None,
 633 |                       sequence_length=None,
 634 |                       dtype=None):
 635 |   """Internal implementation of Dynamic RNN.
 636 | 
 637 |   Args:
 638 |     cell: An instance of RNNCell.
 639 |     inputs: A `Tensor` of shape [time, batch_size, input_size], or a nested
 640 |       tuple of such elements.
 641 |     initial_state: A `Tensor` of shape `[batch_size, state_size]`, or if
 642 |       `cell.state_size` is a tuple, then this should be a tuple of
 643 |       tensors having shapes `[batch_size, s] for s in cell.state_size`.
 644 |     parallel_iterations: Positive Python int.
 645 |     swap_memory: A Python boolean
 646 |     sequence_length: (optional) An `int32` `Tensor` of shape [batch_size].
 647 |     dtype: (optional) Expected dtype of output. If not specified, inferred from
 648 |       initial_state.
 649 | 
 650 |   Returns:
 651 |     Tuple `(final_outputs, final_state)`.
 652 |     final_outputs:
 653 |       A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
 654 |       `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
 655 |       objects, then this returns a (possibly nsted) tuple of Tensors matching
 656 |       the corresponding shapes.
 657 |     final_state:
 658 |       A `Tensor`, or possibly nested tuple of Tensors, matching in length
 659 |       and shapes to `initial_state`.
 660 | 
 661 |   Raises:
 662 |     ValueError: If the input depth cannot be inferred via shape inference
 663 |       from the inputs.
 664 |   """
 665 |   state = initial_state
 666 |   assert isinstance(parallel_iterations, int), "parallel_iterations must be int"
 667 | 
 668 |   state_size = cell.state_size
 669 | 
 670 |   flat_input = nest.flatten(inputs)
 671 |   flat_output_size = nest.flatten(cell.output_size)
 672 | 
 673 |   # Construct an initial output
 674 |   input_shape = array_ops.shape(flat_input[0])
 675 |   time_steps = input_shape[0]
 676 |   batch_size = _best_effort_input_batch_size(flat_input)
 677 | 
 678 |   inputs_got_shape = tuple(input_.get_shape().with_rank_at_least(3)
 679 |                            for input_ in flat_input)
 680 | 
 681 |   const_time_steps, const_batch_size = inputs_got_shape[0].as_list()[:2]
 682 | 
 683 |   for shape in inputs_got_shape:
 684 |     if not shape[2:].is_fully_defined():
 685 |       raise ValueError(
 686 |           "Input size (depth of inputs) must be accessible via shape inference,"
 687 |           " but saw value None.")
 688 |     got_time_steps = shape[0].value
 689 |     got_batch_size = shape[1].value
 690 |     if const_time_steps != got_time_steps:
 691 |       raise ValueError(
 692 |           "Time steps is not the same for all the elements in the input in a "
 693 |           "batch.")
 694 |     if const_batch_size != got_batch_size:
 695 |       raise ValueError(
 696 |           "Batch_size is not the same for all the elements in the input.")
 697 | 
 698 |   # Prepare dynamic conditional copying of state & output
 699 |   def _create_zero_arrays(size):
 700 |     size = _concat(batch_size, size)
 701 |     return array_ops.zeros(
 702 |         array_ops.stack(size), _infer_state_dtype(dtype, state))
 703 | 
 704 |   flat_zero_output = tuple(_create_zero_arrays(output)
 705 |                            for output in flat_output_size)
 706 |   zero_output = nest.pack_sequence_as(structure=cell.output_size,
 707 |                                       flat_sequence=flat_zero_output)
 708 | 
 709 |   if sequence_length is not None:
 710 |     min_sequence_length = math_ops.reduce_min(sequence_length)
 711 |     max_sequence_length = math_ops.reduce_max(sequence_length)
 712 | 
 713 |   time = array_ops.constant(0, dtype=dtypes.int32, name="time")
 714 | 
 715 |   with ops.name_scope("dynamic_rnn") as scope:
 716 |     base_name = scope
 717 | 
 718 |   def _create_ta(name, dtype):
 719 |     return tensor_array_ops.TensorArray(dtype=dtype,
 720 |                                         size=time_steps,
 721 |                                         tensor_array_name=base_name + name)
 722 | 
 723 |   output_ta = tuple(_create_ta("output_%d" % i,
 724 |                                _infer_state_dtype(dtype, state))
 725 |                     for i in range(len(flat_output_size)))
 726 |   input_ta = tuple(_create_ta("input_%d" % i, flat_input[i].dtype)
 727 |                    for i in range(len(flat_input)))
 728 | 
 729 |   input_ta = tuple(ta.unstack(input_)
 730 |                    for ta, input_ in zip(input_ta, flat_input))
 731 | 
 732 |   def _time_step(time, output_ta_t, state, att_scores=None):
 733 |     """Take a time step of the dynamic RNN.
 734 | 
 735 |     Args:
 736 |       time: int32 scalar Tensor.
 737 |       output_ta_t: List of `TensorArray`s that represent the output.
 738 |       state: nested tuple of vector tensors that represent the state.
 739 | 
 740 |     Returns:
 741 |       The tuple (time + 1, output_ta_t with updated flow, new_state).
 742 |     """
 743 | 
 744 |     input_t = tuple(ta.read(time) for ta in input_ta)
 745 |     # Restore some shape information
 746 |     for input_, shape in zip(input_t, inputs_got_shape):
 747 |       input_.set_shape(shape[1:])
 748 | 
 749 |     input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
 750 |     if att_scores is not None:
 751 |         att_score = att_scores[:, time, :]
 752 |         call_cell = lambda: cell(input_t, state, att_score)
 753 |     else:
 754 |         call_cell = lambda: cell(input_t, state)
 755 | 
 756 |     if sequence_length is not None:
 757 |       (output, new_state) = _rnn_step(
 758 |           time=time,
 759 |           sequence_length=sequence_length,
 760 |           min_sequence_length=min_sequence_length,
 761 |           max_sequence_length=max_sequence_length,
 762 |           zero_output=zero_output,
 763 |           state=state,
 764 |           call_cell=call_cell,
 765 |           state_size=state_size,
 766 |           skip_conditionals=True)
 767 |     else:
 768 |       (output, new_state) = call_cell()
 769 | 
 770 |     # Pack state if using state tuples
 771 |     output = nest.flatten(output)
 772 | 
 773 |     output_ta_t = tuple(
 774 |         ta.write(time, out) for ta, out in zip(output_ta_t, output))
 775 |     if att_scores is not None:
 776 |         return (time + 1, output_ta_t, new_state, att_scores)
 777 |     else:
 778 |         return (time + 1, output_ta_t, new_state)
 779 | 
 780 |   if att_scores is not None:  
 781 |       _, output_final_ta, final_state, _ = control_flow_ops.while_loop(
 782 |           cond=lambda time, *_: time < time_steps,
 783 |           body=_time_step,
 784 |           loop_vars=(time, output_ta, state, att_scores),
 785 |           parallel_iterations=parallel_iterations,
 786 |           swap_memory=swap_memory)
 787 |   else:
 788 |       _, output_final_ta, final_state = control_flow_ops.while_loop(
 789 |           cond=lambda time, *_: time < time_steps,
 790 |           body=_time_step,
 791 |           loop_vars=(time, output_ta, state),
 792 |           parallel_iterations=parallel_iterations,
 793 |           swap_memory=swap_memory)
 794 | 
 795 |   # Unpack final output if not using output tuples.
 796 |   final_outputs = tuple(ta.stack() for ta in output_final_ta)
 797 | 
 798 |   # Restore some shape information
 799 |   for output, output_size in zip(final_outputs, flat_output_size):
 800 |     shape = _concat(
 801 |         [const_time_steps, const_batch_size], output_size, static=True)
 802 |     output.set_shape(shape)
 803 | 
 804 |   final_outputs = nest.pack_sequence_as(
 805 |       structure=cell.output_size, flat_sequence=final_outputs)
 806 | 
 807 |   return (final_outputs, final_state)
 808 | 
 809 | 
 810 | def raw_rnn(cell, loop_fn,
 811 |             parallel_iterations=None, swap_memory=False, scope=None):
 812 |   """Creates an `RNN` specified by RNNCell `cell` and loop function `loop_fn`.
 813 | 
 814 |   **NOTE: This method is still in testing, and the API may change.**
 815 | 
 816 |   This function is a more primitive version of `dynamic_rnn` that provides
 817 |   more direct access to the inputs each iteration.  It also provides more
 818 |   control over when to start and finish reading the sequence, and
 819 |   what to emit for the output.
 820 | 
 821 |   For example, it can be used to implement the dynamic decoder of a seq2seq
 822 |   model.
 823 | 
 824 |   Instead of working with `Tensor` objects, most operations work with
 825 |   `TensorArray` objects directly.
 826 | 
 827 |   The operation of `raw_rnn`, in pseudo-code, is basically the following:
 828 | 
 829 |   ```python
 830 |   time = tf.constant(0, dtype=tf.int32)
 831 |   (finished, next_input, initial_state, _, loop_state) = loop_fn(
 832 |       time=time, cell_output=None, cell_state=None, loop_state=None)
 833 |   emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype)
 834 |   state = initial_state
 835 |   while not all(finished):
 836 |     (output, cell_state) = cell(next_input, state)
 837 |     (next_finished, next_input, next_state, emit, loop_state) = loop_fn(
 838 |         time=time + 1, cell_output=output, cell_state=cell_state,
 839 |         loop_state=loop_state)
 840 |     # Emit zeros and copy forward state for minibatch entries that are finished.
 841 |     state = tf.where(finished, state, next_state)
 842 |     emit = tf.where(finished, tf.zeros_like(emit), emit)
 843 |     emit_ta = emit_ta.write(time, emit)
 844 |     # If any new minibatch entries are marked as finished, mark these.
 845 |     finished = tf.logical_or(finished, next_finished)
 846 |     time += 1
 847 |   return (emit_ta, state, loop_state)
 848 |   ```
 849 | 
 850 |   with the additional properties that output and state may be (possibly nested)
 851 |   tuples, as determined by `cell.output_size` and `cell.state_size`, and
 852 |   as a result the final `state` and `emit_ta` may themselves be tuples.
 853 | 
 854 |   A simple implementation of `dynamic_rnn` via `raw_rnn` looks like this:
 855 | 
 856 |   ```python
 857 |   inputs = tf.placeholder(shape=(max_time, batch_size, input_depth),
 858 |                           dtype=tf.float32)
 859 |   sequence_length = tf.placeholder(shape=(batch_size,), dtype=tf.int32)
 860 |   inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time)
 861 |   inputs_ta = inputs_ta.unstack(inputs)
 862 | 
 863 |   cell = tf.contrib.rnn.LSTMCell(num_units)
 864 | 
 865 |   def loop_fn(time, cell_output, cell_state, loop_state):
 866 |     emit_output = cell_output  # == None for time == 0
 867 |     if cell_output is None:  # time == 0
 868 |       next_cell_state = cell.zero_state(batch_size, tf.float32)
 869 |     else:
 870 |       next_cell_state = cell_state
 871 |     elements_finished = (time >= sequence_length)
 872 |     finished = tf.reduce_all(elements_finished)
 873 |     next_input = tf.cond(
 874 |         finished,
 875 |         lambda: tf.zeros([batch_size, input_depth], dtype=tf.float32),
 876 |         lambda: inputs_ta.read(time))
 877 |     next_loop_state = None
 878 |     return (elements_finished, next_input, next_cell_state,
 879 |             emit_output, next_loop_state)
 880 | 
 881 |   outputs_ta, final_state, _ = raw_rnn(cell, loop_fn)
 882 |   outputs = outputs_ta.stack()
 883 |   ```
 884 | 
 885 |   Args:
 886 |     cell: An instance of RNNCell.
 887 |     loop_fn: A callable that takes inputs
 888 |       `(time, cell_output, cell_state, loop_state)`
 889 |       and returns the tuple
 890 |       `(finished, next_input, next_cell_state, emit_output, next_loop_state)`.
 891 |       Here `time` is an int32 scalar `Tensor`, `cell_output` is a
 892 |       `Tensor` or (possibly nested) tuple of tensors as determined by
 893 |       `cell.output_size`, and `cell_state` is a `Tensor`
 894 |       or (possibly nested) tuple of tensors, as determined by the `loop_fn`
 895 |       on its first call (and should match `cell.state_size`).
 896 |       The outputs are: `finished`, a boolean `Tensor` of
 897 |       shape `[batch_size]`, `next_input`: the next input to feed to `cell`,
 898 |       `next_cell_state`: the next state to feed to `cell`,
 899 |       and `emit_output`: the output to store for this iteration.
 900 | 
 901 |       Note that `emit_output` should be a `Tensor` or (possibly nested)
 902 |       tuple of tensors with shapes and structure matching `cell.output_size`
 903 |       and `cell_output` above.  The parameter `cell_state` and output
 904 |       `next_cell_state` may be either a single or (possibly nested) tuple
 905 |       of tensors.  The parameter `loop_state` and
 906 |       output `next_loop_state` may be either a single or (possibly nested) tuple
 907 |       of `Tensor` and `TensorArray` objects.  This last parameter
 908 |       may be ignored by `loop_fn` and the return value may be `None`.  If it
 909 |       is not `None`, then the `loop_state` will be propagated through the RNN
 910 |       loop, for use purely by `loop_fn` to keep track of its own state.
 911 |       The `next_loop_state` parameter returned may be `None`.
 912 | 
 913 |       The first call to `loop_fn` will be `time = 0`, `cell_output = None`,
 914 |       `cell_state = None`, and `loop_state = None`.  For this call:
 915 |       The `next_cell_state` value should be the value with which to initialize
 916 |       the cell's state.  It may be a final state from a previous RNN or it
 917 |       may be the output of `cell.zero_state()`.  It should be a
 918 |       (possibly nested) tuple structure of tensors.
 919 |       If `cell.state_size` is an integer, this must be
 920 |       a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
 921 |       If `cell.state_size` is a `TensorShape`, this must be a `Tensor` of
 922 |       appropriate type and shape `[batch_size] + cell.state_size`.
 923 |       If `cell.state_size` is a (possibly nested) tuple of ints or
 924 |       `TensorShape`, this will be a tuple having the corresponding shapes.
 925 |       The `emit_output` value may be either `None` or a (possibly nested)
 926 |       tuple structure of tensors, e.g.,
 927 |       `(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`.
 928 |       If this first `emit_output` return value is `None`,
 929 |       then the `emit_ta` result of `raw_rnn` will have the same structure and
 930 |       dtypes as `cell.output_size`.  Otherwise `emit_ta` will have the same
 931 |       structure, shapes (prepended with a `batch_size` dimension), and dtypes
 932 |       as `emit_output`.  The actual values returned for `emit_output` at this
 933 |       initializing call are ignored.  Note, this emit structure must be
 934 |       consistent across all time steps.
 935 | 
 936 |     parallel_iterations: (Default: 32).  The number of iterations to run in
 937 |       parallel.  Those operations which do not have any temporal dependency
 938 |       and can be run in parallel, will be.  This parameter trades off
 939 |       time for space.  Values >> 1 use more memory but take less time,
 940 |       while smaller values use less memory but computations take longer.
 941 |     swap_memory: Transparently swap the tensors produced in forward inference
 942 |       but needed for back prop from GPU to CPU.  This allows training RNNs
 943 |       which would typically not fit on a single GPU, with very minimal (or no)
 944 |       performance penalty.
 945 |     scope: VariableScope for the created subgraph; defaults to "rnn".
 946 | 
 947 |   Returns:
 948 |     A tuple `(emit_ta, final_state, final_loop_state)` where:
 949 | 
 950 |     `emit_ta`: The RNN output `TensorArray`.
 951 |        If `loop_fn` returns a (possibly nested) set of Tensors for
 952 |        `emit_output` during initialization, (inputs `time = 0`,
 953 |        `cell_output = None`, and `loop_state = None`), then `emit_ta` will
 954 |        have the same structure, dtypes, and shapes as `emit_output` instead.
 955 |        If `loop_fn` returns `emit_output = None` during this call,
 956 |        the structure of `cell.output_size` is used:
 957 |        If `cell.output_size` is a (possibly nested) tuple of integers
 958 |        or `TensorShape` objects, then `emit_ta` will be a tuple having the
 959 |        same structure as `cell.output_size`, containing TensorArrays whose
 960 |        elements' shapes correspond to the shape data in `cell.output_size`.
 961 | 
 962 |     `final_state`: The final cell state.  If `cell.state_size` is an int, this
 963 |       will be shaped `[batch_size, cell.state_size]`.  If it is a
 964 |       `TensorShape`, this will be shaped `[batch_size] + cell.state_size`.
 965 |       If it is a (possibly nested) tuple of ints or `TensorShape`, this will
 966 |       be a tuple having the corresponding shapes.
 967 | 
 968 |     `final_loop_state`: The final loop state as returned by `loop_fn`.
 969 | 
 970 |   Raises:
 971 |     TypeError: If `cell` is not an instance of RNNCell, or `loop_fn` is not
 972 |       a `callable`.
 973 |   """
 974 | 
 975 |   if not _like_rnncell(cell):
 976 |     raise TypeError("cell must be an instance of RNNCell")
 977 |   if not callable(loop_fn):
 978 |     raise TypeError("loop_fn must be a callable")
 979 | 
 980 |   parallel_iterations = parallel_iterations or 32
 981 | 
 982 |   # Create a new scope in which the caching device is either
 983 |   # determined by the parent scope, or is set to place the cached
 984 |   # Variable using the same placement as for the rest of the RNN.
 985 |   with vs.variable_scope(scope or "rnn") as varscope:
 986 |     if varscope.caching_device is None:
 987 |       varscope.set_caching_device(lambda op: op.device)
 988 | 
 989 |     time = constant_op.constant(0, dtype=dtypes.int32)
 990 |     (elements_finished, next_input, initial_state, emit_structure,
 991 |      init_loop_state) = loop_fn(
 992 |          time, None, None, None)  # time, cell_output, cell_state, loop_state
 993 |     flat_input = nest.flatten(next_input)
 994 | 
 995 |     # Need a surrogate loop state for the while_loop if none is available.
 996 |     loop_state = (init_loop_state if init_loop_state is not None
 997 |                   else constant_op.constant(0, dtype=dtypes.int32))
 998 | 
 999 |     input_shape = [input_.get_shape() for input_ in flat_input]
1000 |     static_batch_size = input_shape[0][0]
1001 | 
1002 |     for input_shape_i in input_shape:
1003 |       # Static verification that batch sizes all match
1004 |       static_batch_size.merge_with(input_shape_i[0])
1005 | 
1006 |     batch_size = static_batch_size.value
1007 |     if batch_size is None:
1008 |       batch_size = array_ops.shape(flat_input[0])[0]
1009 | 
1010 |     nest.assert_same_structure(initial_state, cell.state_size)
1011 |     state = initial_state
1012 |     flat_state = nest.flatten(state)
1013 |     flat_state = [ops.convert_to_tensor(s) for s in flat_state]
1014 |     state = nest.pack_sequence_as(structure=state,
1015 |                                   flat_sequence=flat_state)
1016 | 
1017 |     if emit_structure is not None:
1018 |       flat_emit_structure = nest.flatten(emit_structure)
1019 |       flat_emit_size = [emit.shape if emit.shape.is_fully_defined() else
1020 |                         array_ops.shape(emit) for emit in flat_emit_structure]
1021 |       flat_emit_dtypes = [emit.dtype for emit in flat_emit_structure]
1022 |     else:
1023 |       emit_structure = cell.output_size
1024 |       flat_emit_size = nest.flatten(emit_structure)
1025 |       flat_emit_dtypes = [flat_state[0].dtype] * len(flat_emit_size)
1026 | 
1027 |     flat_emit_ta = [
1028 |         tensor_array_ops.TensorArray(
1029 |             dtype=dtype_i, dynamic_size=True, size=0, name="rnn_output_%d" % i)
1030 |         for i, dtype_i in enumerate(flat_emit_dtypes)]
1031 |     emit_ta = nest.pack_sequence_as(structure=emit_structure,
1032 |                                     flat_sequence=flat_emit_ta)
1033 |     flat_zero_emit = [
1034 |         array_ops.zeros(_concat(batch_size, size_i), dtype_i)
1035 |         for size_i, dtype_i in zip(flat_emit_size, flat_emit_dtypes)]
1036 |     zero_emit = nest.pack_sequence_as(structure=emit_structure,
1037 |                                       flat_sequence=flat_zero_emit)
1038 | 
1039 |     def condition(unused_time, elements_finished, *_):
1040 |       return math_ops.logical_not(math_ops.reduce_all(elements_finished))
1041 | 
1042 |     def body(time, elements_finished, current_input,
1043 |              emit_ta, state, loop_state):
1044 |       """Internal while loop body for raw_rnn.
1045 | 
1046 |       Args:
1047 |         time: time scalar.
1048 |         elements_finished: batch-size vector.
1049 |         current_input: possibly nested tuple of input tensors.
1050 |         emit_ta: possibly nested tuple of output TensorArrays.
1051 |         state: possibly nested tuple of state tensors.
1052 |         loop_state: possibly nested tuple of loop state tensors.
1053 | 
1054 |       Returns:
1055 |         Tuple having the same size as Args but with updated values.
1056 |       """
1057 |       (next_output, cell_state) = cell(current_input, state)
1058 | 
1059 |       nest.assert_same_structure(state, cell_state)
1060 |       nest.assert_same_structure(cell.output_size, next_output)
1061 | 
1062 |       next_time = time + 1
1063 |       (next_finished, next_input, next_state, emit_output,
1064 |        next_loop_state) = loop_fn(
1065 |            next_time, next_output, cell_state, loop_state)
1066 | 
1067 |       nest.assert_same_structure(state, next_state)
1068 |       nest.assert_same_structure(current_input, next_input)
1069 |       nest.assert_same_structure(emit_ta, emit_output)
1070 | 
1071 |       # If loop_fn returns None for next_loop_state, just reuse the
1072 |       # previous one.
1073 |       loop_state = loop_state if next_loop_state is None else next_loop_state
1074 | 
1075 |       def _copy_some_through(current, candidate):
1076 |         """Copy some tensors through via array_ops.where."""
1077 |         def copy_fn(cur_i, cand_i):
1078 |           with ops.colocate_with(cand_i):
1079 |             return array_ops.where(elements_finished, cur_i, cand_i)
1080 |         return nest.map_structure(copy_fn, current, candidate)
1081 | 
1082 |       emit_output = _copy_some_through(zero_emit, emit_output)
1083 |       next_state = _copy_some_through(state, next_state)
1084 | 
1085 |       emit_ta = nest.map_structure(
1086 |           lambda ta, emit: ta.write(time, emit), emit_ta, emit_output)
1087 | 
1088 |       elements_finished = math_ops.logical_or(elements_finished, next_finished)
1089 | 
1090 |       return (next_time, elements_finished, next_input,
1091 |               emit_ta, next_state, loop_state)
1092 | 
1093 |     returned = control_flow_ops.while_loop(
1094 |         condition, body, loop_vars=[
1095 |             time, elements_finished, next_input,
1096 |             emit_ta, state, loop_state],
1097 |         parallel_iterations=parallel_iterations,
1098 |         swap_memory=swap_memory)
1099 | 
1100 |     (emit_ta, final_state, final_loop_state) = returned[-3:]
1101 | 
1102 |     if init_loop_state is None:
1103 |       final_loop_state = None
1104 | 
1105 |     return (emit_ta, final_state, final_loop_state)
1106 | 
1107 | 
1108 | def static_rnn(cell,
1109 |                inputs,
1110 |                initial_state=None,
1111 |                dtype=None,
1112 |                sequence_length=None,
1113 |                scope=None):
1114 |   """Creates a recurrent neural network specified by RNNCell `cell`.
1115 | 
1116 |   The simplest form of RNN network generated is:
1117 | 
1118 |   ```python
1119 |     state = cell.zero_state(...)
1120 |     outputs = []
1121 |     for input_ in inputs:
1122 |       output, state = cell(input_, state)
1123 |       outputs.append(output)
1124 |     return (outputs, state)
1125 |   ```
1126 |   However, a few other options are available:
1127 | 
1128 |   An initial state can be provided.
1129 |   If the sequence_length vector is provided, dynamic calculation is performed.
1130 |   This method of calculation does not compute the RNN steps past the maximum
1131 |   sequence length of the minibatch (thus saving computational time),
1132 |   and properly propagates the state at an example's sequence length
1133 |   to the final state output.
1134 | 
1135 |   The dynamic calculation performed is, at time `t` for batch row `b`,
1136 | 
1137 |   ```python
1138 |     (output, state)(b, t) =
1139 |       (t >= sequence_length(b))
1140 |         ? (zeros(cell.output_size), states(b, sequence_length(b) - 1))
1141 |         : cell(input(b, t), state(b, t - 1))
1142 |   ```
1143 | 
1144 |   Args:
1145 |     cell: An instance of RNNCell.
1146 |     inputs: A length T list of inputs, each a `Tensor` of shape
1147 |       `[batch_size, input_size]`, or a nested tuple of such elements.
1148 |     initial_state: (optional) An initial state for the RNN.
1149 |       If `cell.state_size` is an integer, this must be
1150 |       a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
1151 |       If `cell.state_size` is a tuple, this should be a tuple of
1152 |       tensors having shapes `[batch_size, s] for s in cell.state_size`.
1153 |     dtype: (optional) The data type for the initial state and expected output.
1154 |       Required if initial_state is not provided or RNN state has a heterogeneous
1155 |       dtype.
1156 |     sequence_length: Specifies the length of each sequence in inputs.
1157 |       An int32 or int64 vector (tensor) size `[batch_size]`, values in `[0, T)`.
1158 |     scope: VariableScope for the created subgraph; defaults to "rnn".
1159 | 
1160 |   Returns:
1161 |     A pair (outputs, state) where:
1162 | 
1163 |     - outputs is a length T list of outputs (one for each input), or a nested
1164 |       tuple of such elements.
1165 |     - state is the final state
1166 | 
1167 |   Raises:
1168 |     TypeError: If `cell` is not an instance of RNNCell.
1169 |     ValueError: If `inputs` is `None` or an empty list, or if the input depth
1170 |       (column size) cannot be inferred from inputs via shape inference.
1171 |   """
1172 | 
1173 |   if not _like_rnncell(cell):
1174 |     raise TypeError("cell must be an instance of RNNCell")
1175 |   if not nest.is_sequence(inputs):
1176 |     raise TypeError("inputs must be a sequence")
1177 |   if not inputs:
1178 |     raise ValueError("inputs must not be empty")
1179 | 
1180 |   outputs = []
1181 |   # Create a new scope in which the caching device is either
1182 |   # determined by the parent scope, or is set to place the cached
1183 |   # Variable using the same placement as for the rest of the RNN.
1184 |   with vs.variable_scope(scope or "rnn") as varscope:
1185 |     if varscope.caching_device is None:
1186 |       varscope.set_caching_device(lambda op: op.device)
1187 | 
1188 |     # Obtain the first sequence of the input
1189 |     first_input = inputs
1190 |     while nest.is_sequence(first_input):
1191 |       first_input = first_input[0]
1192 | 
1193 |     # Temporarily avoid EmbeddingWrapper and seq2seq badness
1194 |     # TODO(lukaszkaiser): remove EmbeddingWrapper
1195 |     if first_input.get_shape().ndims != 1:
1196 | 
1197 |       input_shape = first_input.get_shape().with_rank_at_least(2)
1198 |       fixed_batch_size = input_shape[0]
1199 | 
1200 |       flat_inputs = nest.flatten(inputs)
1201 |       for flat_input in flat_inputs:
1202 |         input_shape = flat_input.get_shape().with_rank_at_least(2)
1203 |         batch_size, input_size = input_shape[0], input_shape[1:]
1204 |         fixed_batch_size.merge_with(batch_size)
1205 |         for i, size in enumerate(input_size):
1206 |           if size.value is None:
1207 |             raise ValueError(
1208 |                 "Input size (dimension %d of inputs) must be accessible via "
1209 |                 "shape inference, but saw value None." % i)
1210 |     else:
1211 |       fixed_batch_size = first_input.get_shape().with_rank_at_least(1)[0]
1212 | 
1213 |     if fixed_batch_size.value:
1214 |       batch_size = fixed_batch_size.value
1215 |     else:
1216 |       batch_size = array_ops.shape(first_input)[0]
1217 |     if initial_state is not None:
1218 |       state = initial_state
1219 |     else:
1220 |       if not dtype:
1221 |         raise ValueError("If no initial_state is provided, "
1222 |                          "dtype must be specified")
1223 |       state = cell.zero_state(batch_size, dtype)
1224 | 
1225 |     if sequence_length is not None:  # Prepare variables
1226 |       sequence_length = ops.convert_to_tensor(
1227 |           sequence_length, name="sequence_length")
1228 |       if sequence_length.get_shape().ndims not in (None, 1):
1229 |         raise ValueError(
1230 |             "sequence_length must be a vector of length batch_size")
1231 | 
1232 |       def _create_zero_output(output_size):
1233 |         # convert int to TensorShape if necessary
1234 |         size = _concat(batch_size, output_size)
1235 |         output = array_ops.zeros(
1236 |             array_ops.stack(size), _infer_state_dtype(dtype, state))
1237 |         shape = _concat(fixed_batch_size.value, output_size, static=True)
1238 |         output.set_shape(tensor_shape.TensorShape(shape))
1239 |         return output
1240 | 
1241 |       output_size = cell.output_size
1242 |       flat_output_size = nest.flatten(output_size)
1243 |       flat_zero_output = tuple(
1244 |           _create_zero_output(size) for size in flat_output_size)
1245 |       zero_output = nest.pack_sequence_as(
1246 |           structure=output_size, flat_sequence=flat_zero_output)
1247 | 
1248 |       sequence_length = math_ops.to_int32(sequence_length)
1249 |       min_sequence_length = math_ops.reduce_min(sequence_length)
1250 |       max_sequence_length = math_ops.reduce_max(sequence_length)
1251 | 
1252 |     for time, input_ in enumerate(inputs):
1253 |       if time > 0:
1254 |         varscope.reuse_variables()
1255 |       # pylint: disable=cell-var-from-loop
1256 |       call_cell = lambda: cell(input_, state)
1257 |       # pylint: enable=cell-var-from-loop
1258 |       if sequence_length is not None:
1259 |         (output, state) = _rnn_step(
1260 |             time=time,
1261 |             sequence_length=sequence_length,
1262 |             min_sequence_length=min_sequence_length,
1263 |             max_sequence_length=max_sequence_length,
1264 |             zero_output=zero_output,
1265 |             state=state,
1266 |             call_cell=call_cell,
1267 |             state_size=cell.state_size)
1268 |       else:
1269 |         (output, state) = call_cell()
1270 | 
1271 |       outputs.append(output)
1272 | 
1273 |     return (outputs, state)
1274 | 
1275 | 
1276 | def static_state_saving_rnn(cell,
1277 |                             inputs,
1278 |                             state_saver,
1279 |                             state_name,
1280 |                             sequence_length=None,
1281 |                             scope=None):
1282 |   """RNN that accepts a state saver for time-truncated RNN calculation.
1283 | 
1284 |   Args:
1285 |     cell: An instance of `RNNCell`.
1286 |     inputs: A length T list of inputs, each a `Tensor` of shape
1287 |       `[batch_size, input_size]`.
1288 |     state_saver: A state saver object with methods `state` and `save_state`.
1289 |     state_name: Python string or tuple of strings.  The name to use with the
1290 |       state_saver. If the cell returns tuples of states (i.e.,
1291 |       `cell.state_size` is a tuple) then `state_name` should be a tuple of
1292 |       strings having the same length as `cell.state_size`.  Otherwise it should
1293 |       be a single string.
1294 |     sequence_length: (optional) An int32/int64 vector size [batch_size].
1295 |       See the documentation for rnn() for more details about sequence_length.
1296 |     scope: VariableScope for the created subgraph; defaults to "rnn".
1297 | 
1298 |   Returns:
1299 |     A pair (outputs, state) where:
1300 |       outputs is a length T list of outputs (one for each input)
1301 |       states is the final state
1302 | 
1303 |   Raises:
1304 |     TypeError: If `cell` is not an instance of RNNCell.
1305 |     ValueError: If `inputs` is `None` or an empty list, or if the arity and
1306 |      type of `state_name` does not match that of `cell.state_size`.
1307 |   """
1308 |   state_size = cell.state_size
1309 |   state_is_tuple = nest.is_sequence(state_size)
1310 |   state_name_tuple = nest.is_sequence(state_name)
1311 | 
1312 |   if state_is_tuple != state_name_tuple:
1313 |     raise ValueError("state_name should be the same type as cell.state_size.  "
1314 |                      "state_name: %s, cell.state_size: %s" % (str(state_name),
1315 |                                                               str(state_size)))
1316 | 
1317 |   if state_is_tuple:
1318 |     state_name_flat = nest.flatten(state_name)
1319 |     state_size_flat = nest.flatten(state_size)
1320 | 
1321 |     if len(state_name_flat) != len(state_size_flat):
1322 |       raise ValueError("#elems(state_name) != #elems(state_size): %d vs. %d" %
1323 |                        (len(state_name_flat), len(state_size_flat)))
1324 | 
1325 |     initial_state = nest.pack_sequence_as(
1326 |         structure=state_size,
1327 |         flat_sequence=[state_saver.state(s) for s in state_name_flat])
1328 |   else:
1329 |     initial_state = state_saver.state(state_name)
1330 | 
1331 |   (outputs, state) = static_rnn(
1332 |       cell,
1333 |       inputs,
1334 |       initial_state=initial_state,
1335 |       sequence_length=sequence_length,
1336 |       scope=scope)
1337 | 
1338 |   if state_is_tuple:
1339 |     flat_state = nest.flatten(state)
1340 |     state_name = nest.flatten(state_name)
1341 |     save_state = [
1342 |         state_saver.save_state(name, substate)
1343 |         for name, substate in zip(state_name, flat_state)
1344 |     ]
1345 |   else:
1346 |     save_state = [state_saver.save_state(state_name, state)]
1347 | 
1348 |   with ops.control_dependencies(save_state):
1349 |     last_output = outputs[-1]
1350 |     flat_last_output = nest.flatten(last_output)
1351 |     flat_last_output = [
1352 |         array_ops.identity(output) for output in flat_last_output
1353 |     ]
1354 |     outputs[-1] = nest.pack_sequence_as(
1355 |         structure=last_output, flat_sequence=flat_last_output)
1356 | 
1357 |   return (outputs, state)
1358 | 
1359 | 
1360 | def static_bidirectional_rnn(cell_fw,
1361 |                              cell_bw,
1362 |                              inputs,
1363 |                              initial_state_fw=None,
1364 |                              initial_state_bw=None,
1365 |                              dtype=None,
1366 |                              sequence_length=None,
1367 |                              scope=None):
1368 |   """Creates a bidirectional recurrent neural network.
1369 | 
1370 |   Similar to the unidirectional case above (rnn) but takes input and builds
1371 |   independent forward and backward RNNs with the final forward and backward
1372 |   outputs depth-concatenated, such that the output will have the format
1373 |   [time][batch][cell_fw.output_size + cell_bw.output_size]. The input_size of
1374 |   forward and backward cell must match. The initial state for both directions
1375 |   is zero by default (but can be set optionally) and no intermediate states are
1376 |   ever returned -- the network is fully unrolled for the given (passed in)
1377 |   length(s) of the sequence(s) or completely unrolled if length(s) is not given.
1378 | 
1379 |   Args:
1380 |     cell_fw: An instance of RNNCell, to be used for forward direction.
1381 |     cell_bw: An instance of RNNCell, to be used for backward direction.
1382 |     inputs: A length T list of inputs, each a tensor of shape
1383 |       [batch_size, input_size], or a nested tuple of such elements.
1384 |     initial_state_fw: (optional) An initial state for the forward RNN.
1385 |       This must be a tensor of appropriate type and shape
1386 |       `[batch_size, cell_fw.state_size]`.
1387 |       If `cell_fw.state_size` is a tuple, this should be a tuple of
1388 |       tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
1389 |     initial_state_bw: (optional) Same as for `initial_state_fw`, but using
1390 |       the corresponding properties of `cell_bw`.
1391 |     dtype: (optional) The data type for the initial state.  Required if
1392 |       either of the initial states are not provided.
1393 |     sequence_length: (optional) An int32/int64 vector, size `[batch_size]`,
1394 |       containing the actual lengths for each of the sequences.
1395 |     scope: VariableScope for the created subgraph; defaults to
1396 |       "bidirectional_rnn"
1397 | 
1398 |   Returns:
1399 |     A tuple (outputs, output_state_fw, output_state_bw) where:
1400 |       outputs is a length `T` list of outputs (one for each input), which
1401 |         are depth-concatenated forward and backward outputs.
1402 |       output_state_fw is the final state of the forward rnn.
1403 |       output_state_bw is the final state of the backward rnn.
1404 | 
1405 |   Raises:
1406 |     TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`.
1407 |     ValueError: If inputs is None or an empty list.
1408 |   """
1409 | 
1410 |   if not _like_rnncell(cell_fw):
1411 |     raise TypeError("cell_fw must be an instance of RNNCell")
1412 |   if not _like_rnncell(cell_bw):
1413 |     raise TypeError("cell_bw must be an instance of RNNCell")
1414 |   if not nest.is_sequence(inputs):
1415 |     raise TypeError("inputs must be a sequence")
1416 |   if not inputs:
1417 |     raise ValueError("inputs must not be empty")
1418 | 
1419 |   with vs.variable_scope(scope or "bidirectional_rnn"):
1420 |     # Forward direction
1421 |     with vs.variable_scope("fw") as fw_scope:
1422 |       output_fw, output_state_fw = static_rnn(
1423 |           cell_fw,
1424 |           inputs,
1425 |           initial_state_fw,
1426 |           dtype,
1427 |           sequence_length,
1428 |           scope=fw_scope)
1429 | 
1430 |     # Backward direction
1431 |     with vs.variable_scope("bw") as bw_scope:
1432 |       reversed_inputs = _reverse_seq(inputs, sequence_length)
1433 |       tmp, output_state_bw = static_rnn(
1434 |           cell_bw,
1435 |           reversed_inputs,
1436 |           initial_state_bw,
1437 |           dtype,
1438 |           sequence_length,
1439 |           scope=bw_scope)
1440 | 
1441 |   output_bw = _reverse_seq(tmp, sequence_length)
1442 |   # Concat each of the forward/backward outputs
1443 |   flat_output_fw = nest.flatten(output_fw)
1444 |   flat_output_bw = nest.flatten(output_bw)
1445 | 
1446 |   flat_outputs = tuple(
1447 |       array_ops.concat([fw, bw], 1)
1448 |       for fw, bw in zip(flat_output_fw, flat_output_bw))
1449 | 
1450 |   outputs = nest.pack_sequence_as(
1451 |       structure=output_fw, flat_sequence=flat_outputs)
1452 | 
1453 |   return (outputs, output_state_fw, output_state_bw)
1454 | 


--------------------------------------------------------------------------------
/script/shuffle.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import random
 4 | 
 5 | import tempfile
 6 | from subprocess import call
 7 | 
 8 | 
 9 | def main(file, temporary=False):
10 |     tf_os, tpath = tempfile.mkstemp(dir='~/DIN-V2-CODE')
11 |     tf = open(tpath, 'w')
12 | 
13 |     fd = open(file, "r")
14 |     for l in fd:
15 |         print >> tf, l.strip("\n")
16 |     tf.close()
17 | 
18 |     lines = open(tpath, 'r').readlines()
19 |     random.shuffle(lines)
20 |     if temporary:
21 |         path, filename = os.path.split(os.path.realpath(file))
22 |         fd = tempfile.TemporaryFile(prefix=filename + '.shuf', dir=path)
23 |     else:
24 |         fd = open(file + '.shuf', 'w')
25 | 
26 |     for l in lines:
27 |         s = l.strip("\n")
28 |         print >> fd, s
29 | 
30 |     if temporary:
31 |         fd.seek(0)
32 |     else:
33 |         fd.close()
34 | 
35 |     os.remove(tpath)
36 | 
37 |     return fd
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main(sys.argv[1])
42 | 
43 | 


--------------------------------------------------------------------------------
/script/split_by_user.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | fi = open("local_test", "r")
 4 | ftrain = open("local_train_splitByUser", "w")
 5 | ftest = open("local_test_splitByUser", "w")
 6 | 
 7 | while True:
 8 |     rand_int = random.randint(1, 10)
 9 |     noclk_line = fi.readline().strip()
10 |     clk_line = fi.readline().strip()
11 |     if noclk_line == "" or clk_line == "":
12 |         break
13 |     if rand_int == 2:
14 |         print >> ftest, noclk_line
15 |         print >> ftest, clk_line
16 |     else:
17 |         print >> ftrain, noclk_line
18 |         print >> ftrain, clk_line
19 |         
20 | 
21 | 


--------------------------------------------------------------------------------
/script/train.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | from data_iterator import DataIterator
  3 | import tensorflow as tf
  4 | from model import *
  5 | import time
  6 | import random
  7 | import sys
  8 | from utils import *
  9 | 
 10 | EMBEDDING_DIM = 18
 11 | HIDDEN_SIZE = 18 * 2
 12 | ATTENTION_SIZE = 18 * 2
 13 | best_auc = 0.0
 14 | 
 15 | def prepare_data(input, target, maxlen = None, return_neg = False):
 16 |     # x: a list of sentences
 17 |     lengths_x = [len(s[4]) for s in input]
 18 |     seqs_mid = [inp[3] for inp in input]
 19 |     seqs_cat = [inp[4] for inp in input]
 20 |     noclk_seqs_mid = [inp[5] for inp in input]
 21 |     noclk_seqs_cat = [inp[6] for inp in input]
 22 |     seqs_item_carte = [inp[7][0] for inp in input]
 23 |     seqs_cate_carte = [inp[7][1] for inp in input]
 24 | 
 25 |     if maxlen is not None:
 26 |         new_seqs_mid = []
 27 |         new_seqs_cat = []
 28 |         new_noclk_seqs_mid = []
 29 |         new_noclk_seqs_cat = []
 30 |         new_lengths_x = []
 31 |         new_seqs_item_carte = []
 32 |         new_seqs_cate_carte = []
 33 |         for l_x, inp in zip(lengths_x, input):
 34 |             if l_x > maxlen:
 35 |                 new_seqs_mid.append(inp[3][l_x - maxlen:])
 36 |                 new_seqs_cat.append(inp[4][l_x - maxlen:])
 37 |                 new_noclk_seqs_mid.append(inp[5][l_x - maxlen:])
 38 |                 new_noclk_seqs_cat.append(inp[6][l_x - maxlen:])
 39 |                 new_seqs_item_carte.append(inp[7][0][l_x - maxlen:])
 40 |                 new_seqs_cate_carte.append(inp[7][1][l_x - maxlen:])
 41 |                 new_lengths_x.append(maxlen)
 42 |             else:
 43 |                 new_seqs_mid.append(inp[3])
 44 |                 new_seqs_cat.append(inp[4])
 45 |                 new_noclk_seqs_mid.append(inp[5])
 46 |                 new_noclk_seqs_cat.append(inp[6])
 47 |                 new_seqs_item_carte.append(inp[7][0])
 48 |                 new_seqs_cate_carte.append(inp[7][1])
 49 |                 new_lengths_x.append(l_x)
 50 |         lengths_x = new_lengths_x
 51 |         seqs_mid = new_seqs_mid
 52 |         seqs_cat = new_seqs_cat
 53 |         noclk_seqs_mid = new_noclk_seqs_mid
 54 |         noclk_seqs_cat = new_noclk_seqs_cat
 55 |         seqs_item_carte = new_seqs_item_carte
 56 |         seqs_cate_carte = new_seqs_cate_carte
 57 | 
 58 |         if len(lengths_x) < 1:
 59 |             return None, None, None, None
 60 | 
 61 |     n_samples = len(seqs_mid)
 62 |     maxlen_x = numpy.max(lengths_x)
 63 |     neg_samples = len(noclk_seqs_mid[0][0])
 64 | 
 65 |     mid_his = numpy.zeros((n_samples, maxlen_x)).astype('int64')
 66 |     cat_his = numpy.zeros((n_samples, maxlen_x)).astype('int64')
 67 |     noclk_mid_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64')
 68 |     noclk_cat_his = numpy.zeros((n_samples, maxlen_x, neg_samples)).astype('int64')
 69 |     item_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64')
 70 |     cate_carte = numpy.zeros((n_samples, maxlen_x)).astype('int64')
 71 |     mid_mask = numpy.zeros((n_samples, maxlen_x)).astype('float32')
 72 |     for idx, [s_x, s_y, no_sx, no_sy, i_c, c_c] in enumerate(zip(seqs_mid, seqs_cat, noclk_seqs_mid, noclk_seqs_cat, seqs_item_carte, seqs_cate_carte)):
 73 |         mid_mask[idx, :lengths_x[idx]] = 1.
 74 |         mid_his[idx, :lengths_x[idx]] = s_x
 75 |         cat_his[idx, :lengths_x[idx]] = s_y
 76 |         noclk_mid_his[idx, :lengths_x[idx], :] = no_sx
 77 |         noclk_cat_his[idx, :lengths_x[idx], :] = no_sy
 78 |         item_carte[idx, :lengths_x[idx]] = i_c
 79 |         cate_carte[idx, :lengths_x[idx]] = c_c
 80 | 
 81 |     uids = numpy.array([inp[0] for inp in input])
 82 |     mids = numpy.array([inp[1] for inp in input])
 83 |     cats = numpy.array([inp[2] for inp in input])
 84 | 
 85 |     carte = numpy.stack([item_carte, cate_carte], axis=1)
 86 | 
 87 |     if return_neg:
 88 |         return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), noclk_mid_his, noclk_cat_his, carte
 89 | 
 90 |     else:
 91 |         return uids, mids, cats, mid_his, cat_his, mid_mask, numpy.array(target), numpy.array(lengths_x), carte
 92 | 
 93 | def eval(sess, test_data, model, model_path):
 94 | 
 95 |     loss_sum = 0.
 96 |     accuracy_sum = 0.
 97 |     aux_loss_sum = 0.
 98 |     nums = 0
 99 |     stored_arr = []
100 |     for src, tgt in test_data:
101 |         nums += 1
102 |         uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, return_neg=True)
103 |         prob, loss, acc, aux_loss = model.calculate(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte])
104 |         loss_sum += loss
105 |         aux_loss_sum = aux_loss
106 |         accuracy_sum += acc
107 |         prob_1 = prob[:, 0].tolist()
108 |         target_1 = target[:, 0].tolist()
109 |         for p ,t in zip(prob_1, target_1):
110 |             stored_arr.append([p, t])
111 |     test_auc = calc_auc(stored_arr)
112 |     accuracy_sum = accuracy_sum / nums
113 |     loss_sum = loss_sum / nums
114 |     aux_loss_sum / nums
115 |     global best_auc
116 |     if best_auc < test_auc:
117 |         best_auc = test_auc
118 |         #model.save(sess, model_path)
119 |     return test_auc, loss_sum, accuracy_sum, aux_loss_sum
120 | 
121 | def train(
122 |         train_file = "local_train_splitByUser",
123 |         test_file = "local_test_splitByUser",
124 |         uid_voc = "uid_voc.pkl",
125 |         mid_voc = "mid_voc.pkl",
126 |         cat_voc = "cat_voc.pkl",
127 |         batch_size = 128,
128 |         maxlen = 100,
129 |         test_iter = 8400,
130 |         save_iter = 8400,
131 |         model_type = 'DNN',
132 |         seed = 2,
133 | ):
134 |     model_path = "dnn_save_path/ckpt_noshuff" + model_type + str(seed)
135 |     best_model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed)
136 |     gpu_options = tf.GPUOptions(allow_growth=True)
137 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
138 |         label_type = 1
139 |         train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, shuffle_each_epoch=False, label_type=label_type)
140 |         test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen, label_type=label_type)
141 |         n_uid, n_mid, n_cat, n_carte = train_data.get_n()
142 |         if model_type == 'DNN':
143 |             model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False)
144 |         elif model_type == 'Cartesion':
145 |             model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE,use_softmax=False, use_cartes=True)
146 |         elif model_type == 'CAN+Cartesion':
147 |             model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True, use_cartes=True)
148 |         elif model_type == 'CAN':
149 |             model = Model_DNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True)
150 |         elif model_type == 'PNN':
151 |             model = Model_PNN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
152 |         elif model_type == 'ONN':
153 |             model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
154 |         elif model_type == 'Wide':
155 |             model = Model_WideDeep(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
156 |         elif model_type == 'NCF':
157 |             model = Model_NCF(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
158 |         elif model_type == 'FM':
159 |             model = Model_FM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
160 |         elif model_type == 'FFM':
161 |             model = Model_FFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
162 |         elif model_type == 'DeepFM':
163 |             model = Model_DeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
164 |         elif model_type == 'DeepFFM':
165 |             model = Model_DeepFFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
166 |         elif model_type == 'xDeepFM':
167 |             model = Model_xDeepFM(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_softmax=False)
168 |         elif model_type == 'ONN':
169 |             model = Model_ONN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
170 |         elif model_type == 'DIN':
171 |             model = Model_DIN(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
172 |         elif model_type == 'DIEN':
173 |             model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
174 |         elif model_type == 'CAN+DIEN':
175 |             model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, n_carte, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE, use_coaction=True)
176 |         else:
177 |             print ("Invalid model_type : %s"% model_type)
178 |             return
179 |         print("Model: ", model_type)
180 |         sess.run(tf.global_variables_initializer())
181 |         sess.run(tf.local_variables_initializer())
182 |         sys.stdout.flush()
183 | 
184 |         count()
185 |         start_time = time.time()
186 |         iter = 0
187 |         lr = 0.001
188 |         for itr in range(1):
189 |             loss_sum = 0.0
190 |             accuracy_sum = 0.
191 |             aux_loss_sum = 0.
192 |             for src, tgt in train_data:
193 |                 uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, noclk_mids, noclk_cats, carte = prepare_data(src, tgt, maxlen, return_neg=True)
194 |                 loss, acc, aux_loss = model.train(sess, [uids, mids, cats, mid_his, cat_his, mid_mask, target, sl, lr, noclk_mids, noclk_cats, carte])
195 |                 loss_sum += loss
196 |                 accuracy_sum += acc
197 |                 aux_loss_sum += aux_loss
198 |                 iter += 1
199 |                 sys.stdout.flush()
200 |                 if (iter % 100) == 0:
201 |                     print('iter: %d ----> train_loss: %.4f ---- train_accuracy: %.4f ---- train_aux_loss: %.4f' %  (iter, loss_sum / 100, accuracy_sum / 100, aux_loss_sum / 100))
202 |                     loss_sum = 0.0
203 |                     accuracy_sum = 0.0
204 |                     aux_loss_sum = 0.0
205 |                 if (iter % test_iter) == 0:
206 |                     auc_, loss_, acc_, aux_ = eval(sess, test_data, model, best_model_path) 
207 |                     print('iter: %d --- test_auc: %.4f ---- test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % (iter, auc_, loss_, acc_, aux_))
208 |                     loss_sum = 0.0
209 |                     accuracy_sum = 0.0
210 |                     aux_loss_sum = 0.0
211 |                 if (iter % save_iter) == 0:
212 |                     print('save model iter: %d' %(iter))
213 |                     model.save(sess, model_path+"--"+str(iter))
214 |             lr *= 0.5
215 | 
216 | def count_flops(graph):
217 |     flops = tf.profiler.profile(graph, options=tf.profiler.ProfileOptionBuilder.float_operation())
218 |     print('FLOPs: {}'.format(flops.total_float_ops))
219 | 
220 | def count():
221 |     total_parameters = 0
222 |     for variable in tf.trainable_variables():
223 |         # shape is an array of tf.Dimension
224 |         shape = variable.get_shape()
225 |         variable_parameters = 1
226 |         for dim in shape:
227 |             variable_parameters *= dim.value
228 |         total_parameters += variable_parameters
229 |     print("Prameter: ", total_parameters)
230 | 
231 | def test(
232 |         train_file = "local_train_splitByUser",
233 |         test_file = "local_test_splitByUser",
234 |         uid_voc = "uid_voc.pkl",
235 |         mid_voc = "mid_voc.pkl",
236 |         cat_voc = "cat_voc.pkl",
237 |         batch_size = 128,
238 |         maxlen = 100,
239 |         model_type = 'DNN',
240 | 	seed = 2
241 | ):
242 | 
243 |     model_path = "dnn_best_model/ckpt_noshuff" + model_type + str(seed)
244 |     gpu_options = tf.GPUOptions(allow_growth=True)
245 |     with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
246 |         train_data = DataIterator(train_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen)
247 |         test_data = DataIterator(test_file, uid_voc, mid_voc, cat_voc, batch_size, maxlen)
248 |         n_uid, n_mid, n_cat = train_data.get_n()
249 |         if model_type == 'DNN':
250 |             model = Model_DNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
251 |         elif model_type == 'PNN':
252 |             model = Model_PNN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
253 |         elif model_type == 'Wide':
254 | 	    model = Model_WideDeep(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
255 |         elif model_type == 'DIN':
256 |             model = Model_DIN(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
257 |         elif model_type == 'DIN-V2-gru-att-gru':
258 |             model = Model_DIN_V2_Gru_att_Gru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
259 |         elif model_type == 'DIN-V2-gru-gru-att':
260 |             model = Model_DIN_V2_Gru_Gru_att(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
261 |         elif model_type == 'DIN-V2-gru-qa-attGru':
262 |             model = Model_DIN_V2_Gru_QA_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
263 |         elif model_type == 'DIN-V2-gru-vec-attGru':
264 |             model = Model_DIN_V2_Gru_Vec_attGru(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
265 | 	elif model_type == 'DIEN':
266 |             model = Model_DIN_V2_Gru_Vec_attGru_Neg(n_uid, n_mid, n_cat, EMBEDDING_DIM, HIDDEN_SIZE, ATTENTION_SIZE)
267 |         else:
268 |             print ("Invalid model_type : %s", model_type)
269 |             return
270 |         model.restore(sess, model_path)
271 |         print('test_auc: %.4f ----test_loss: %.4f ---- test_accuracy: %.4f ---- test_aux_loss: %.4f' % eval(sess, test_data, model, model_path))
272 | 
273 | if __name__ == '__main__':
274 |     if len(sys.argv) == 4:
275 |         SEED = int(sys.argv[3])
276 |     else:
277 |         SEED = 3
278 |     tf.set_random_seed(SEED)
279 |     numpy.random.seed(SEED)
280 |     random.seed(SEED)
281 |     if sys.argv[1] == 'train':
282 |         train(model_type=sys.argv[2], seed=SEED)
283 |     elif sys.argv[1] == 'test':
284 |         test(model_type=sys.argv[2], seed=SEED)
285 |     else:
286 |         print('do nothing...')
287 | 
288 | 
289 | 


--------------------------------------------------------------------------------
/script/utils.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.ops.rnn_cell import *
  3 | from tensorflow.python.ops.rnn_cell_impl import  _Linear
  4 | #from tensorflow import keras
  5 | from tensorflow.python.ops import math_ops
  6 | from tensorflow.python.ops import init_ops
  7 | from tensorflow.python.ops import array_ops
  8 | from tensorflow.python.ops import variable_scope as vs
  9 | #from keras import backend as K
 10 | 
 11 | class QAAttGRUCell(RNNCell):
 12 |   """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
 13 |   Args:
 14 |     num_units: int, The number of units in the GRU cell.
 15 |     activation: Nonlinearity to use.  Default: `tanh`.
 16 |     reuse: (optional) Python boolean describing whether to reuse variables
 17 |      in an existing scope.  If not `True`, and the existing scope already has
 18 |      the given variables, an error is raised.
 19 |     kernel_initializer: (optional) The initializer to use for the weight and
 20 |     projection matrices.
 21 |     bias_initializer: (optional) The initializer to use for the bias.
 22 |   """
 23 | 
 24 |   def __init__(self,
 25 |                num_units,
 26 |                activation=None,
 27 |                reuse=None,
 28 |                kernel_initializer=None,
 29 |                bias_initializer=None):
 30 |     super(QAAttGRUCell, self).__init__(_reuse=reuse)
 31 |     self._num_units = num_units
 32 |     self._activation = activation or math_ops.tanh
 33 |     self._kernel_initializer = kernel_initializer
 34 |     self._bias_initializer = bias_initializer
 35 |     self._gate_linear = None
 36 |     self._candidate_linear = None
 37 | 
 38 |   @property
 39 |   def state_size(self):
 40 |     return self._num_units
 41 | 
 42 |   @property
 43 |   def output_size(self):
 44 |     return self._num_units
 45 | 
 46 |   def __call__(self, inputs, state, att_score):
 47 |       return self.call(inputs, state, att_score)
 48 | 
 49 |   def call(self, inputs, state, att_score=None):
 50 |     """Gated recurrent unit (GRU) with nunits cells."""
 51 |     if self._gate_linear is None:
 52 |       bias_ones = self._bias_initializer
 53 |       if self._bias_initializer is None:
 54 |         bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
 55 |       with vs.variable_scope("gates"):  # Reset gate and update gate.
 56 |         self._gate_linear = _Linear(
 57 |             [inputs, state],
 58 |             2 * self._num_units,
 59 |             True,
 60 |             bias_initializer=bias_ones,
 61 |             kernel_initializer=self._kernel_initializer)
 62 | 
 63 |     value = math_ops.sigmoid(self._gate_linear([inputs, state]))
 64 |     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
 65 | 
 66 |     r_state = r * state
 67 |     if self._candidate_linear is None:
 68 |       with vs.variable_scope("candidate"):
 69 |         self._candidate_linear = _Linear(
 70 |             [inputs, r_state],
 71 |             self._num_units,
 72 |             True,
 73 |             bias_initializer=self._bias_initializer,
 74 |             kernel_initializer=self._kernel_initializer)
 75 |     c = self._activation(self._candidate_linear([inputs, r_state]))
 76 |     new_h = (1. - att_score) * state + att_score * c
 77 |     return new_h, new_h
 78 | 
 79 | class VecAttGRUCell(RNNCell):
 80 |   """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).
 81 |   Args:
 82 |     num_units: int, The number of units in the GRU cell.
 83 |     activation: Nonlinearity to use.  Default: `tanh`.
 84 |     reuse: (optional) Python boolean describing whether to reuse variables
 85 |      in an existing scope.  If not `True`, and the existing scope already has
 86 |      the given variables, an error is raised.
 87 |     kernel_initializer: (optional) The initializer to use for the weight and
 88 |     projection matrices.
 89 |     bias_initializer: (optional) The initializer to use for the bias.
 90 |   """
 91 | 
 92 |   def __init__(self,
 93 |                num_units,
 94 |                activation=None,
 95 |                reuse=None,
 96 |                kernel_initializer=None,
 97 |                bias_initializer=None):
 98 |     super(VecAttGRUCell, self).__init__(_reuse=reuse)
 99 |     self._num_units = num_units
100 |     self._activation = activation or math_ops.tanh
101 |     self._kernel_initializer = kernel_initializer
102 |     self._bias_initializer = bias_initializer
103 |     self._gate_linear = None
104 |     self._candidate_linear = None
105 | 
106 |   @property
107 |   def state_size(self):
108 |     return self._num_units
109 | 
110 |   @property
111 |   def output_size(self):
112 |     return self._num_units
113 |   def __call__(self, inputs, state, att_score):
114 |       return self.call(inputs, state, att_score)
115 |   def call(self, inputs, state, att_score=None):
116 |     """Gated recurrent unit (GRU) with nunits cells."""
117 |     if self._gate_linear is None:
118 |       bias_ones = self._bias_initializer
119 |       if self._bias_initializer is None:
120 |         bias_ones = init_ops.constant_initializer(1.0, dtype=inputs.dtype)
121 |       with vs.variable_scope("gates"):  # Reset gate and update gate.
122 |         self._gate_linear = _Linear(
123 |             [inputs, state],
124 |             2 * self._num_units,
125 |             True,
126 |             bias_initializer=bias_ones,
127 |             kernel_initializer=self._kernel_initializer)
128 | 
129 |     value = math_ops.sigmoid(self._gate_linear([inputs, state]))
130 |     r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1)
131 | 
132 |     r_state = r * state
133 |     if self._candidate_linear is None:
134 |       with vs.variable_scope("candidate"):
135 |         self._candidate_linear = _Linear(
136 |             [inputs, r_state],
137 |             self._num_units,
138 |             True,
139 |             bias_initializer=self._bias_initializer,
140 |             kernel_initializer=self._kernel_initializer)
141 |     c = self._activation(self._candidate_linear([inputs, r_state]))
142 |     u = (1.0 - att_score) * u
143 |     new_h = u * state + (1 - u) * c
144 |     return new_h, new_h
145 | 
146 | def prelu(_x, scope=''):
147 |     """parametric ReLU activation"""
148 |     with tf.variable_scope(name_or_scope=scope, default_name="prelu"):
149 |         _alpha = tf.get_variable("prelu_"+scope, shape=_x.get_shape()[-1],
150 |                                  dtype=_x.dtype, initializer=tf.constant_initializer(0.1))
151 |         return tf.maximum(0.0, _x) + _alpha * tf.minimum(0.0, _x)
152 | 
153 | def calc_auc(raw_arr):
154 |     """Summary
155 | 
156 |     Args:
157 |         raw_arr (TYPE): Description
158 | 
159 |     Returns:
160 |         TYPE: Description
161 |     """
162 | 
163 |     arr = sorted(raw_arr, key=lambda d:d[0], reverse=True)
164 |     pos, neg = 0., 0.
165 |     for record in arr:
166 |         if record[1] == 1.:
167 |             pos += 1
168 |         else:
169 |             neg += 1
170 | 
171 |     fp, tp = 0., 0.
172 |     xy_arr = []
173 |     for record in arr:
174 |         if record[1] == 1.:
175 |             tp += 1
176 |         else:
177 |             fp += 1
178 |         xy_arr.append([fp/neg, tp/pos])
179 | 
180 |     auc = 0.
181 |     prev_x = 0.
182 |     prev_y = 0.
183 |     for x, y in xy_arr:
184 |         if x != prev_x:
185 |             auc += ((x - prev_x) * (y + prev_y) / 2.)
186 |             prev_x = x
187 |             prev_y = y
188 | 
189 |     return auc
190 | 
191 | def attention(query, facts, attention_size, mask, stag='null', mode='LIST', softmax_stag=1, time_major=False, return_alphas=False):
192 |     if isinstance(facts, tuple):
193 |         # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
194 |         facts = tf.concat(facts, 2)
195 | 
196 |     if time_major:
197 |         # (T,B,D) => (B,T,D)
198 |         facts = tf.array_ops.transpose(facts, [1, 0, 2])
199 | 
200 |     mask = tf.equal(mask, tf.ones_like(mask))
201 |     hidden_size = facts.get_shape().as_list()[-1]  # D value - hidden size of the RNN layer
202 |     input_size = query.get_shape().as_list()[-1]
203 | 
204 |     # Trainable parameters
205 |     w1 = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
206 |     w2 = tf.Variable(tf.random_normal([input_size, attention_size], stddev=0.1))
207 |     b = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
208 |     v = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
209 | 
210 |     with tf.name_scope('v'):
211 |         # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
212 |         #  the shape of `tmp` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
213 |         tmp1 = tf.tensordot(facts, w1, axes=1)
214 |         tmp2 = tf.tensordot(query, w2, axes=1)
215 |         tmp2 = tf.reshape(tmp2, [-1, 1, tf.shape(tmp2)[-1]])
216 |         tmp = tf.tanh((tmp1 + tmp2) + b)
217 | 
218 |     # For each of the timestamps its vector of size A from `tmp` is reduced with `v` vector
219 |     v_dot_tmp = tf.tensordot(tmp, v, axes=1, name='v_dot_tmp')  # (B,T) shape
220 |     key_masks = mask # [B, 1, T]
221 |     # key_masks = tf.expand_dims(mask, 1) # [B, 1, T]
222 |     paddings = tf.ones_like(v_dot_tmp) * (-2 ** 32 + 1)
223 |     v_dot_tmp = tf.where(key_masks, v_dot_tmp, paddings)  # [B, 1, T]
224 |     alphas = tf.nn.softmax(v_dot_tmp, name='alphas')         # (B,T) shape
225 | 
226 |     # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
227 |     #output = tf.reduce_sum(facts * tf.expand_dims(alphas, -1), 1)
228 |     output = facts * tf.expand_dims(alphas, -1)
229 |     output = tf.reshape(output, tf.shape(facts))
230 |     # output = output / (facts.get_shape().as_list()[-1] ** 0.5)
231 |     if not return_alphas:
232 |         return output
233 |     else:
234 |         return output, alphas
235 | 
236 | def din_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False):
237 |     if isinstance(facts, tuple):
238 |         # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
239 |         facts = tf.concat(facts, 2)
240 |         print ("querry_size mismatch")
241 |         query = tf.concat(values = [
242 |         query,
243 |         query,
244 |         ], axis=1)
245 | 
246 |     if time_major:
247 |         # (T,B,D) => (B,T,D)
248 |         facts = tf.array_ops.transpose(facts, [1, 0, 2])
249 |     mask = tf.equal(mask, tf.ones_like(mask))
250 |     facts_size = facts.get_shape().as_list()[-1]  # D value - hidden size of the RNN layer
251 |     querry_size = query.get_shape().as_list()[-1]
252 |     queries = tf.tile(query, [1, tf.shape(facts)[1]])
253 |     queries = tf.reshape(queries, tf.shape(facts))
254 |     din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1)
255 |     d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag)
256 |     d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag)
257 |     d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag)
258 |     d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]])
259 |     scores = d_layer_3_all
260 |     # Mask
261 |     # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1])   # [B, T]
262 |     key_masks = tf.expand_dims(mask, 1) # [B, 1, T]
263 |     paddings = tf.ones_like(scores) * (-2 ** 32 + 1)
264 |     scores = tf.where(key_masks, scores, paddings)  # [B, 1, T]
265 | 
266 |     # Scale
267 |     # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5)
268 | 
269 |     # Activation
270 |     if softmax_stag:
271 |         scores = tf.nn.softmax(scores)  # [B, 1, T]
272 | 
273 |     # Weighted sum
274 |     if mode == 'SUM':
275 |         output = tf.matmul(scores, facts)  # [B, 1, H]
276 |         # output = tf.reshape(output, [-1, tf.shape(facts)[-1]])
277 |     else:
278 |         scores = tf.reshape(scores, [-1, tf.shape(facts)[1]])
279 |         output = facts * tf.expand_dims(scores, -1)
280 |         output = tf.reshape(output, tf.shape(facts))
281 |     return output
282 | 
283 | def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False):
284 |     if isinstance(facts, tuple):
285 |         # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
286 |         facts = tf.concat(facts, 2)
287 |     if len(facts.get_shape().as_list()) == 2:
288 |         facts = tf.expand_dims(facts, 1)
289 | 
290 |     if time_major:
291 |         # (T,B,D) => (B,T,D)
292 |         facts = tf.array_ops.transpose(facts, [1, 0, 2])
293 |     # Trainable parameters
294 |     mask = tf.equal(mask, tf.ones_like(mask))
295 |     facts_size = facts.get_shape().as_list()[-1]  # D value - hidden size of the RNN layer
296 |     querry_size = query.get_shape().as_list()[-1]
297 |     query = tf.layers.dense(query, facts_size, activation=None, name='f1' + stag)
298 |     query = prelu(query)
299 |     queries = tf.tile(query, [1, tf.shape(facts)[1]])
300 |     queries = tf.reshape(queries, tf.shape(facts))
301 |     din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1)
302 |     d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name='f1_att' + stag)
303 |     d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name='f2_att' + stag)
304 |     d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name='f3_att' + stag)
305 |     d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]])
306 |     scores = d_layer_3_all
307 |     # Mask
308 |     # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1])   # [B, T]
309 |     key_masks = tf.expand_dims(mask, 1) # [B, 1, T]
310 |     paddings = tf.ones_like(scores) * (-2 ** 32 + 1)
311 |     if not forCnn:
312 |         scores = tf.where(key_masks, scores, paddings)  # [B, 1, T]
313 | 
314 |     # Scale
315 |     # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5)
316 | 
317 |     # Activation
318 |     if softmax_stag:
319 |         scores = tf.nn.softmax(scores)  # [B, 1, T]
320 | 
321 |     # Weighted sum
322 |     if mode == 'SUM':
323 |         output = tf.matmul(scores, facts)  # [B, 1, H]
324 |         # output = tf.reshape(output, [-1, tf.shape(facts)[-1]])
325 |     else:
326 |         scores = tf.reshape(scores, [-1, tf.shape(facts)[1]])
327 |         output = facts * tf.expand_dims(scores, -1)
328 |         output = tf.reshape(output, tf.shape(facts))
329 |     if return_alphas:
330 |         return output, scores
331 |     return output
332 | 
333 | def self_attention(facts, ATTENTION_SIZE, mask, stag='null'):
334 |     if len(facts.get_shape().as_list()) == 2:
335 |         facts = tf.expand_dims(facts, 1)
336 | 
337 |     def cond(batch, output, i):
338 |         return tf.less(i, tf.shape(batch)[1])
339 | 
340 |     def body(batch, output, i):
341 |         self_attention_tmp = din_fcn_attention(batch[:, i, :], batch[:, 0:i+1, :],
342 |                                                ATTENTION_SIZE, mask[:, 0:i+1], softmax_stag=1, stag=stag,
343 |                                                mode='LIST')
344 |         self_attention_tmp = tf.reduce_sum(self_attention_tmp, 1)
345 |         output = output.write(i, self_attention_tmp)
346 |         return batch, output, i + 1
347 | 
348 |     output_ta = tf.TensorArray(dtype=tf.float32,
349 |                                size=0,
350 |                                dynamic_size=True,
351 |                                element_shape=(facts[:, 0, :].get_shape()))
352 |     _, output_op, _ = tf.while_loop(cond, body, [facts, output_ta, 0])
353 |     self_attention = output_op.stack()
354 |     self_attention = tf.transpose(self_attention, perm = [1, 0, 2])
355 |     return self_attention
356 | 
357 | def self_all_attention(facts, ATTENTION_SIZE, mask, stag='null'):
358 |     if len(facts.get_shape().as_list()) == 2:
359 |         facts = tf.expand_dims(facts, 1)
360 | 
361 |     def cond(batch, output, i):
362 |         return tf.less(i, tf.shape(batch)[1])
363 | 
364 |     def body(batch, output, i):
365 |         self_attention_tmp = din_fcn_attention(batch[:, i, :], batch,
366 |                                                ATTENTION_SIZE, mask, softmax_stag=1, stag=stag,
367 |                                                mode='LIST')
368 |         self_attention_tmp = tf.reduce_sum(self_attention_tmp, 1)
369 |         output = output.write(i, self_attention_tmp)
370 |         return batch, output, i + 1
371 | 
372 |     output_ta = tf.TensorArray(dtype=tf.float32,
373 |                                size=0,
374 |                                dynamic_size=True,
375 |                                element_shape=(facts[:, 0, :].get_shape()))
376 |     _, output_op, _ = tf.while_loop(cond, body, [facts, output_ta, 0])
377 |     self_attention = output_op.stack()
378 |     self_attention = tf.transpose(self_attention, perm = [1, 0, 2])
379 |     return self_attention
380 | 
381 | def din_fcn_shine(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False):
382 |     if isinstance(facts, tuple):
383 |         # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
384 |         facts = tf.concat(facts, 2)
385 | 
386 |     if time_major:
387 |         # (T,B,D) => (B,T,D)
388 |         facts = tf.array_ops.transpose(facts, [1, 0, 2])
389 |     # Trainable parameters
390 |     mask = tf.equal(mask, tf.ones_like(mask))
391 |     facts_size = facts.get_shape().as_list()[-1]  # D value - hidden size of the RNN layer
392 |     querry_size = query.get_shape().as_list()[-1]
393 |     query = tf.layers.dense(query, facts_size, activation=None, name='f1_trans_shine' + stag)
394 |     query = prelu(query)
395 |     queries = tf.tile(query, [1, tf.shape(facts)[1]])
396 |     queries = tf.reshape(queries, tf.shape(facts))
397 |     din_all = tf.concat([queries, facts, queries-facts, queries*facts], axis=-1)
398 |     d_layer_1_all = tf.layers.dense(din_all, facts_size, activation=tf.nn.sigmoid, name='f1_shine_att' + stag)
399 |     d_layer_2_all = tf.layers.dense(d_layer_1_all, facts_size, activation=tf.nn.sigmoid, name='f2_shine_att' + stag)
400 |     d_layer_2_all = tf.reshape(d_layer_2_all, tf.shape(facts))
401 |     output = d_layer_2_all
402 |     return output
403 | 
404 | 


--------------------------------------------------------------------------------