└── -Topic-Enhanced-Memory-Networks--master
    ├── README.md
    ├── main_tlda.py
    ├── gen_data.py
    ├── basemodel.py
    ├── main_TEMN.py
    ├── process_TEMN_data.py
    ├── TLDA.py
    └── TEMN.py


/-Topic-Enhanced-Memory-Networks--master/README.md:
--------------------------------------------------------------------------------
 1 | # -Topic-Enhanced-Memory-Networks-
 2 | Paper title: Topic-Enhanced Memory Networks for Personalised Point-of-Interest Recommendation
 3 | 
 4 | Environment: Python3.7 TensorFlow1.12
 5 | 
 6 | Step 1. Run gen_data.py to generate random fake data to test the model.
 7 | 
 8 | Step 2. Run main_tlda.py to get the topic distributions for users.
 9 | 
10 | Step 3. Run main_TEMN.py 
11 | 


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/main_tlda.py:
--------------------------------------------------------------------------------
 1 | #encoding:utf-8
 2 | """
 3 | xz331@cam.ac.uk
 4 | abnerzxzhao@tencent.com
 5 | """
 6 | 
 7 | import TLDA
 8 | import json
 9 | 
10 | doc_num = 0
11 | poi_num = 0
12 | time_num = 0
13 | topic_k = 7
14 | 
15 | input_data = []
16 | doc_index = []
17 | doc_len = []
18 | 
19 | f_i = open("sample_data")
20 | readlines = f_i.readlines()
21 | f_i.close()
22 | 
23 | for line in readlines:
24 |     new_line = line.strip("\n").split("\t")
25 |     ii = int(new_line[0])
26 |     if ii > doc_num:
27 |         doc_num = ii 
28 |     json_data = json.loads(new_line[1])
29 |     
30 |     cur_data = []
31 |     for item in json_data:
32 |         cur_poi_index = item[0]
33 |         cur_time_index = item[1]
34 |         if cur_poi_index > poi_num:
35 |             poi_num = cur_poi_index
36 |         if cur_time_index > time_num:
37 |             time_num = cur_time_index
38 |         cur_item = (ii,item[0],item[1])
39 |         cur_data.append(cur_item)
40 |     doc_len.append(len(cur_data))
41 |     doc_index.append(ii)
42 |     input_data.append(cur_data)
43 | doc_num = doc_num + 1
44 | poi_num = poi_num + 1
45 | time_num = time_num + 1
46 | 
47 | print("doc_num:",doc_num)
48 | print("poi_num:",poi_num)
49 | print("time_num:",time_num)
50 | print(topic_k)
51 | 
52 | tlda = TLDA.tlda(input_data,doc_index,doc_len,doc_num,poi_num,time_num,topic_k,10)
53 | tlda.train_model()
54 | tlda.save_model_data("tlda_model_file")


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/gen_data.py:
--------------------------------------------------------------------------------
 1 | #encoding:utf-8
 2 | '''
 3 | xz331@cam.ac.uk
 4 | abnerzxzhao@tencent.com
 5 | '''
 6 | 
 7 | import json
 8 | import random
 9 | 
10 | doc_num = 5000
11 | word_num = 10
12 | time_num = 7
13 | train_k = 100
14 | test_k = 3
15 | neg_k = 10
16 | 
17 | f_o = open("sample_data","w")
18 | f_poi = open("poi2_xy","w")
19 | 
20 | for i in range(doc_num):
21 |     train_data = []
22 |     test_data = []
23 |     neg_data = []
24 |     for j in range(train_k):
25 |         pos_id = abs(int(random.gauss(3,10)))%word_num
26 |         time_id = abs(int(random.gauss(3,3)))%time_num
27 |         neg_id = abs(int(random.gauss(7,10)))%word_num
28 |         cur_item = [pos_id,time_id,neg_id]
29 |         train_data.append([pos_id,time_id,neg_id])
30 |     for k in range(test_k):
31 |         pos_id = abs(int(random.gauss(3,10)))%word_num
32 |         time_id = abs(int(random.gauss(3,3)))%time_num
33 |         neg_id = abs(int(random.gauss(7,10)))%word_num
34 |         cur_item = [pos_id,time_id,neg_id]
35 |         test_data.append([pos_id,time_id,neg_id])
36 |     for n in range(neg_k):
37 |         neg_id = abs(int(random.gauss(7,10)))%word_num
38 |         neg_data.append(neg_id)
39 |     r_line = str(i) + "\t" + json.dumps(train_data) + "\t" + json.dumps(test_data) + "\t" + json.dumps(neg_data) + "\n"
40 |     f_o.write(r_line)
41 | f_o.close()
42 | 
43 | for poi in range(word_num):
44 |     cur_x = random.uniform(115,117)
45 |     cur_y = random.uniform(39,41)
46 |     r_line = str(poi) + "\t" +  str(cur_x) +  "\t" + str(cur_y) + "\n"
47 |     f_poi.write(r_line)
48 | f_poi.close()


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/basemodel.py:
--------------------------------------------------------------------------------
 1 | # encoding:utf-8
 2 | """
 3 | xz331@cam.ac.uk
 4 | abnerzxzhao@tencent.com
 5 | """
 6 | import tensorflow as tf
 7 | 
 8 | 
 9 | class basemodel(object):
10 |     def __init__(self, args):
11 |         self.args = args
12 | 
13 |     def _get_initializer(self):
14 |         if self.args.init_method == 'tnormal':
15 |             return tf.truncated_normal_initializer(stddev=self.args.stddev)
16 |         elif self.args.init_method == 'uniform':
17 |             return tf.random_uniform_initializer(-self.args.stddev, self.args.stddev)
18 |         elif self.args.init_method == 'normal':
19 |             return tf.random_normal_initializer(stddev=self.args.stddev)
20 |         elif self.args.init_method == 'xavier_normal':
21 |             return tf.contrib.layers.xavier_initializer(uniform=False)
22 |         elif self.args.init_method == 'xavier_uniform':
23 |             return tf.contrib.layers.xavier_initializer(uniform=True)
24 |         elif self.args.init_method == 'he_normal':
25 |             return tf.contrib.layers.variance_scaling_initializer(
26 |                 factor=2.0, mode='FAN_IN', uniform=False)
27 |         elif self.args.init_method == 'he_uniform':
28 |             return tf.contrib.layers.variance_scaling_initializer(
29 |                 factor=2.0, mode='FAN_IN', uniform=True)
30 |         else:
31 |             return tf.truncated_normal_initializer(stddev=self.args.stddev)
32 | 
33 |     def _set_opt(self):
34 |         if self.args.opt == 'SGD':
35 |             self.opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=self.learn_rate)
36 |         elif self.args.opt == 'Adam':
37 |             self.opt = tf.train.AdamOptimizer(learning_rate=self.learn_rate)
38 |         elif self.args.opt == 'Adadelta':
39 |             self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learn_rate)
40 |         elif self.args.opt == 'Adagrad':
41 |             self.opt = tf.train.AdagradOptimizer(learning_rate=self.learn_rate,
42 |                                                  initial_accumulator_value=0.9)
43 |         elif self.args.opt == 'RMS':
44 |             self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learn_rate,
45 |                                                  decay=0.9, epsilon=1e-6)
46 |         elif self.args.opt == 'Moment':
47 |             self.opt = tf.train.MomentumOptimizer(self.args.learn_rate, 0.9)
48 | 


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/main_TEMN.py:
--------------------------------------------------------------------------------
  1 | #encoding:utf-8
  2 | """
  3 | xz331@cam.ac.uk
  4 | abnerzxzhao@tencent.com
  5 | """
  6 | 
  7 | import TEMN
  8 | import random
  9 | import tensorflow as tf
 10 | import process_TEMN_data
 11 | 
 12 | all_data, user_num, poi_num, topic_k, all_test_data = process_TEMN_data.process_fun("sample_data")
 13 | random.shuffle(all_data)
 14 | 
 15 | num_users = user_num
 16 | num_items = poi_num
 17 | topic_num = topic_k
 18 | 
 19 | class args:
 20 |     std = 0.1
 21 |     num_mem = 10
 22 |     embedding_size = 50
 23 |     constraint = True
 24 |     rnn_type = set(['PAIR'])
 25 |     margin = 0.1
 26 |     topic_num = topic_num
 27 |     l2_reg = 0.00001
 28 |     opt = 'SGD'
 29 |     clip_norm = 2
 30 |     dropout = 0.7
 31 |     learn_rate = 0.01
 32 |     max_p_num = 100
 33 |     stddev = 0.1
 34 |     lamb_m = 0.1
 35 |     lamb_d = 0.1
 36 |     ratio1 = 0.1
 37 |     ratio2 = 0.1
 38 |     init_method = "normal"
 39 | 
 40 | 
 41 | ar = args()
 42 | user_num = num_users
 43 | item_num = num_items
 44 | 
 45 | model = TEMN.TEMN(user_num,item_num,ar)
 46 | saver = tf.train.get_or_create_global_step()
 47 | init = tf.global_variables_initializer()
 48 | 
 49 | # Launch the graph.
 50 | sess = tf.Session()
 51 | sess.run(init)
 52 | 
 53 | print("build finish")
 54 | 
 55 | n_sample = len(all_data)
 56 | n_sample_test = len(all_test_data)
 57 | batch_size = 64
 58 | batch_num = int((n_sample + batch_size - 1)/batch_size)
 59 | batch_num_test = int((n_sample_test + batch_size - 1)/batch_size)
 60 | 
 61 | print("batch_num", batch_num)
 62 | print("batch_num_test", batch_num_test)
 63 | 
 64 | Iter = 10
 65 | 
 66 | for it in range(Iter):
 67 |     for i in range(batch_num):
 68 |         beg = i*batch_size
 69 |         end = min((i + 1)*batch_size, n_sample)
 70 |         cur_train_data = all_data[beg:end]
 71 |         feed_dict = model.get_list_feed_dict(cur_train_data)
 72 |         cost = sess.run([model.cost, model.dist_cost, model.mem_cost, model.topic_cost, model.train_op], feed_dict)
 73 |         if i % 50 == 0:
 74 |             print(it, i, cost[0])
 75 | 
 76 | 
 77 | # Test
 78 | print("start testing")
 79 | def get_topk(scores):
 80 |     f = scores[0]
 81 |     sort_scores = sorted(scores, reverse=True)
 82 |     cur_index = sort_scores.index(f)
 83 |     return cur_index
 84 | 
 85 | 
 86 | f_out = open("top_k_label", "w")
 87 | f_o = open("test_scores", "w")
 88 | ii_x = 0
 89 | 
 90 | for t_data in all_test_data:
 91 |     cur_test_data = t_data
 92 |     feed_dict_test = model.get_list_feed_dict(cur_test_data, "")
 93 |     scores = sess.run(model.predict_op, feed_dict_test)
 94 |     cur_pos = get_topk(scores)
 95 |     f_out.write(str(cur_pos) + "\n")
 96 |     for j in range(len(cur_test_data)):
 97 |         cur_u = cur_test_data[j][0]
 98 |         cur_i = cur_test_data[j][1]
 99 |         cur_s = scores[j]
100 |         r_line = str(cur_u) + "\t" + str(cur_i) + "\t" + str(cur_s)
101 |         f_o.write(r_line + "\n")
102 |     ii_x += 1
103 |     if ii_x % 10 == 0:
104 |         print(ii_x)
105 | 
106 | f_o.close()
107 | f_out.close()


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/process_TEMN_data.py:
--------------------------------------------------------------------------------
  1 | # encoding:utf-8
  2 | """
  3 | xz331@cam.ac.uk
  4 | abnerzxzhao@tencent.com
  5 | """
  6 | import json
  7 | import math
  8 | import pickle
  9 | 
 10 | 
 11 | def get_dis(node1, node2):
 12 |     x = node1[0] - node2[0]
 13 |     y = node1[1] - node2[1]
 14 |     return math.sqrt(x * x + y * y)
 15 | 
 16 | 
 17 | def get_poi_xy():
 18 |     poi2xy = {}
 19 |     f_i = open("poi2_xy")
 20 |     readlines = f_i.readlines()
 21 |     f_i.close()
 22 |     for line in readlines:
 23 |         new_line = line.strip().split("\t")
 24 |         poi = int(new_line[0])
 25 |         x = float(new_line[1])
 26 |         y = float(new_line[2])
 27 |         poi2xy[poi] = [x, y]
 28 |     return poi2xy
 29 | 
 30 | 
 31 | def process_fun(file_name):
 32 |     poi2xy = get_poi_xy()
 33 | 
 34 |     document_topic, time_topic, topic_word = pickle.load(open("tlda_model_file", "rb"))
 35 |     print("document_topic,", document_topic.shape)
 36 |     topic_k = document_topic.shape[1]
 37 |     f_i = open(file_name)
 38 |     readlines = f_i.readlines()
 39 |     f_i.close()
 40 |     ret = []
 41 |     user_num = 0
 42 |     poi_num = 0
 43 |     ret_test = []
 44 |     for line in readlines:
 45 |         new_line = line.strip().split("\t")
 46 |         cur_doc_id = int(new_line[0])
 47 |         cur_data = json.loads(new_line[1])
 48 |         test_data = json.loads(new_line[2])
 49 |         neg_datas = json.loads(new_line[3])
 50 |         all_i = set()
 51 |         cur_c = 0
 52 |         poi_x = 0
 53 |         poi_y = 0
 54 |         for item in cur_data:
 55 |             if item[0] not in poi2xy:
 56 |                 continue
 57 |             all_i.add(item[0])
 58 |             poi_x += poi2xy[item[0]][0]
 59 |             poi_y += poi2xy[item[0]][1]
 60 |             cur_c += 1
 61 |         if cur_c < 1:
 62 |             continue
 63 |         poi_x = poi_x / cur_c
 64 |         poi_y = poi_y / cur_c
 65 |         if cur_doc_id > user_num:
 66 |             user_num = cur_doc_id
 67 |         for item in cur_data:
 68 |             if item[0] > poi_num:
 69 |                 poi_num = item[0]
 70 |             if item[2] > poi_num:
 71 |                 poi_num = item[2]
 72 |             if item[0] not in poi2xy or item[2] not in poi2xy:
 73 |                 continue
 74 |             cur_u = cur_doc_id
 75 |             cur_i = item[0]
 76 |             cur_x = poi2xy[cur_i][0]
 77 |             cur_y = poi2xy[cur_i][1]
 78 |             neg_i = item[2]
 79 |             neg_x = poi2xy[neg_i][0]
 80 |             neg_y = poi2xy[neg_i][1]
 81 |             opt_dis = get_dis([poi_x, poi_y], [cur_x, cur_y])
 82 |             neg_dis = get_dis([poi_x, poi_y], [neg_x, neg_y])
 83 |             cur_topic = document_topic[cur_u]
 84 |             cur_all_item = []
 85 |             for p in all_i:
 86 |                 if p == cur_i:
 87 |                     continue
 88 |                 cur_all_item.append(p)
 89 |             if len(cur_all_item) > 0:
 90 |                 xx = cur_u, cur_i, opt_dis, cur_all_item, neg_i, cur_topic, neg_dis
 91 |                 ret.append(xx)
 92 | 
 93 |         for item in test_data:
 94 |             cur_test_data = []
 95 |             if item[0] > poi_num:
 96 |                 poi_num = item[0]
 97 |             if item[2] > poi_num:
 98 |                 poi_num = item[2]
 99 |             if item[0] not in poi2xy or item[2] not in poi2xy:
100 |                 continue
101 |             cur_u = cur_doc_id
102 |             cur_i = item[0]
103 |             cur_x = poi2xy[cur_i][0]
104 |             cur_y = poi2xy[cur_i][1]
105 |             opt_dis = get_dis([poi_x, poi_y], [cur_x, cur_y])
106 |             cur_all_item = []
107 |             for p in all_i:
108 |                 if p == cur_i:
109 |                     continue
110 |                 cur_all_item.append(p)
111 |             if len(cur_all_item) > 0:
112 |                 xx = cur_u, cur_i, opt_dis, cur_all_item
113 |                 cur_test_data.append(xx)
114 |                 for neg_i in neg_datas[0:10]:
115 |                     if neg_i not in poi2xy:
116 |                         continue
117 |                     neg_x = poi2xy[neg_i][0]
118 |                     neg_y = poi2xy[neg_i][1]
119 |                     opt_dis = get_dis([poi_x, poi_y], [neg_x, neg_y])
120 |                     xx = cur_u, neg_i, opt_dis, cur_all_item
121 |                     cur_test_data.append(xx)
122 |             ret_test.append(cur_test_data)
123 | 
124 |     return ret, user_num + 1, poi_num + 1, topic_k, ret_test
125 | 
126 | 
127 | if __name__ == '__main__':
128 |     train_data = process_fun("sample_data")
129 |     print(len(train_data))
130 | 


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/TLDA.py:
--------------------------------------------------------------------------------
  1 | # encoding:utf-8
  2 | """
  3 | xz331@cam.ac.uk
  4 | abnerzxzhao@tencent.com
  5 | """
  6 | '''
  7 | the demo of TLDA
  8 | input_data = [(doc_idx1,poi_idx1,time_idx1),(doc_idx2,poi_idx2,time_idx2),....]
  9 | Gibson sampling
 10 | '''
 11 | import numpy as np
 12 | import random
 13 | import pickle
 14 | 
 15 | 
 16 | class tlda:
 17 |     """
 18 |     @input_data:input_train_data
 19 |     @doc_index_list:index of the document
 20 |     @doc_len:length of each document
 21 |     @doc_num:number of documents
 22 |     @poi_num:number of POIs
 23 |     @time_num:number of time slots
 24 |     @topic_k:set k topics
 25 |     @iter:number of iterations
 26 |     @alpha:hyperparameter of pattern-user distribution
 27 |     @beta:hyperparameter of venue-pattern distribution
 28 |     @gamma:hyperparameter of pattern-time distribution
 29 |     """
 30 | 
 31 |     def __init__(self, input_data, doc_index, doc_len, doc_num, poi_num, time_num, topic_k, iter, alpha=0.1, beta=0.1,
 32 |                  gamma=10):
 33 |         self.input_data = input_data
 34 |         self.doc_len = doc_len
 35 |         self.doc_index = doc_index
 36 |         self.doc_num = doc_num
 37 |         self.poi_num = poi_num
 38 |         self.time_num = time_num
 39 |         self.topic_k = topic_k
 40 |         self.iter = iter
 41 |         self.alpha = alpha
 42 |         self.gamma = gamma
 43 |         self.beta = beta
 44 |         self.n_data = len(input_data)
 45 |         self.max_doc_len = max(self.doc_len)
 46 |         self._init_para()
 47 | 
 48 |     def _init_para(self):
 49 |         self.document_topic = np.zeros((self.doc_num, self.topic_k))
 50 |         self.time_topic = np.zeros((self.time_num, self.topic_k))
 51 |         self.topic_word = np.zeros((self.topic_k, self.poi_num))
 52 |         self.pre_topic_select = np.zeros((self.doc_num, self.max_doc_len), dtype=int)
 53 |         self.nwsum = np.zeros(self.topic_k, dtype="int")
 54 |         self.theta = np.zeros((self.doc_num, self.topic_k))
 55 |         self.phi = np.zeros((self.topic_k, self.poi_num))
 56 |         self.vt = np.zeros((self.time_num, self.topic_k))
 57 | 
 58 |     def _gen_result(self):
 59 |         for i in range(len(self.doc_index)):
 60 |             cur_doc_len = self.doc_len[i]
 61 |             cur_doc_index = self.doc_index[i]
 62 |             self.theta[cur_doc_index] = (self.document_topic[cur_doc_index] + self.alpha) / (
 63 |                         cur_doc_len + self.topic_k * self.alpha)
 64 |         for i in range(self.topic_k):
 65 |             self.phi[i] = (self.topic_word[i] + self.beta) / (self.nwsum[i] + self.poi_num * self.beta)
 66 |         for i in range(self.time_num):
 67 |             cur_time_len = sum(self.time_topic[i])
 68 |             self.vt[i] = (self.time_topic[i] + self.gamma) / (cur_time_len + self.topic_k * self.gamma)
 69 | 
 70 |     def Sample_topic(self, i, j, l, cur_len, pre_topic):
 71 |         self.document_topic[i, pre_topic] = self.document_topic[i, pre_topic] - 1
 72 |         self.time_topic[l, pre_topic] = self.time_topic[l, pre_topic] - 1
 73 |         self.topic_word[pre_topic, j] = self.topic_word[pre_topic, j] - 1
 74 |         self.nwsum[pre_topic] = self.nwsum[pre_topic] - 1
 75 |         cur_doc_topic = self.document_topic[i, :]
 76 |         cur_time_topic = self.time_topic[l, :]
 77 |         cur_topic_word = self.topic_word[:, j]
 78 |         Vbeta = self.poi_num * self.beta
 79 |         Kalpha = self.topic_k * self.alpha
 80 |         Kvt = self.topic_k * self.gamma
 81 |         cur_topic = -1
 82 |         cur_time_len = sum(self.time_topic[l])
 83 |         cur_topic_prob = (cur_topic_word + self.beta) / (self.nwsum + Vbeta) * \
 84 |                          (cur_doc_topic + self.alpha) / (cur_len + Kalpha) * \
 85 |                          (cur_time_topic + self.gamma) / (cur_time_len + Kvt)
 86 |         all_sum_t = sum(cur_topic_prob)
 87 |         cur_topic_prob = cur_topic_prob / all_sum_t
 88 |         for k in range(1, self.topic_k):
 89 |             cur_topic_prob[k] += cur_topic_prob[k - 1]
 90 |         u = random.uniform(0, cur_topic_prob[self.topic_k - 1])
 91 |         for topic in range(self.topic_k):
 92 |             if cur_topic_prob[topic] > u:
 93 |                 cur_topic = topic
 94 |                 break
 95 |         self.document_topic[i, cur_topic] = self.document_topic[i, cur_topic] + 1
 96 |         self.time_topic[l, cur_topic] = self.time_topic[l, cur_topic] + 1
 97 |         self.topic_word[cur_topic, j] = self.topic_word[cur_topic, j] + 1
 98 |         self.nwsum[cur_topic] = self.nwsum[cur_topic] + 1
 99 |         return cur_topic
100 | 
101 |     def train_model(self):
102 |         # first random initialization topic
103 |         for i in range(len(self.input_data)):
104 |             for j in range(self.doc_len[i]):
105 |                 cur_u = self.input_data[i][j][0]
106 |                 cur_p = self.input_data[i][j][1]
107 |                 cur_t = self.input_data[i][j][2]
108 |                 topic = random.randint(0, self.topic_k - 1)
109 |                 self.document_topic[cur_u, topic] += 1
110 |                 self.time_topic[cur_t, topic] += 1
111 |                 self.topic_word[topic, cur_p] += 1
112 |                 self.nwsum[topic] += 1
113 |                 self.pre_topic_select[cur_u, j] = topic
114 |         self._gen_result();
115 | 
116 |         for it in range(self.iter):
117 |             print("iter:", it)
118 |             for i in range(len(self.input_data)):
119 |                 for j in range(self.doc_len[i]):
120 |                     cur_u = self.input_data[i][j][0]
121 |                     cur_p = self.input_data[i][j][1]
122 |                     cur_t = self.input_data[i][j][2]
123 |                     cur_doc_len = self.doc_len[i]
124 |                     cur_pre_topic = self.pre_topic_select[cur_u][j]
125 |                     cur_topic = self.Sample_topic(cur_u, cur_p, cur_t, cur_doc_len, cur_pre_topic)
126 |                     self.pre_topic_select[cur_u, j] = cur_topic
127 |             self._gen_result();
128 | 
129 |     def save_model_data(self, input_file):
130 |         print("save_model...")
131 |         f_o = open(input_file, "wb")
132 |         pickle.dump([self.theta, self.phi, self.vt], f_o)
133 |         f_o.close()
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     # program process
138 |     tlda = TLDA(input_data, doc_index, doc_len, doc_num, poi_num, time_num, topic_k, iter);
139 |     tlda.train_model()
140 |     tlda.save_model_data("model_file")
141 | 


--------------------------------------------------------------------------------
/-Topic-Enhanced-Memory-Networks--master/TEMN.py:
--------------------------------------------------------------------------------
  1 | # encoding:utf-8
  2 | """
  3 | xz331@cam.ac.uk
  4 | abnerzxzhao@tencent.com
  5 | """
  6 | from __future__ import division
  7 | import tensorflow as tf
  8 | from basemodel import basemodel
  9 | 
 10 | 
 11 | class TEMN(basemodel):
 12 |     """
 13 |     @num_users:number of users
 14 |     @num_items:number of POIs
 15 |     @lamb_m:the margin of memory network
 16 |     @lamb_d:the margin of dis_model
 17 |     @ratio1:the influence of topic_model
 18 |     @ratio2:the influence of dis_model
 19 |     """
 20 | 
 21 |     def __init__(self, num_users, num_items, args):
 22 |         print('creating my TEMN!')
 23 |         self.num_users = num_users
 24 |         self.num_items = num_items
 25 |         self.graph = tf.Graph()
 26 |         self.args = args
 27 |         self.stddev = self.args.stddev
 28 |         self.learn_rate = self.args.learn_rate
 29 |         self.lamb_m = args.lamb_m
 30 |         self.lamb_d = args.lamb_d
 31 |         self.ratio1 = args.ratio1
 32 |         self.ratio2 = args.ratio2
 33 |         self.attention = None
 34 |         self.selected_memory = None
 35 |         self.num_mem = self.args.num_mem
 36 | 
 37 |         self.initializer = self._get_initializer()
 38 |         self._set_opt()
 39 |         self._creat_model_inputs()
 40 |         self._build_list_network()
 41 | 
 42 |     def get_list_feed_dict(self, batch, mode='training'):
 43 |         def process_all_items(x_all_item):
 44 |             cur_all_ii = [0 for j in range(self.args.max_p_num)]
 45 |             for j in range(len(x_all_item)):
 46 |                 if j < self.args.max_p_num:
 47 |                     cur_all_ii[j] = x_all_item[j]
 48 |             return cur_all_ii
 49 | 
 50 |         if mode == 'training':
 51 |             user_input = [x[0] for x in batch]
 52 |             item_input = [x[1] for x in batch]
 53 |             uindist = [x[2] for x in batch]
 54 |             all_items_data = [process_all_items(x[3]) for x in batch]
 55 |             ll = [len(x[3]) for x in batch]
 56 |             item_input_neg = [x[4] for x in batch]
 57 |             topic_input = [x[5] for x in batch]
 58 |             uindistneg = [x[6] for x in batch]
 59 |             feed_dict = {
 60 |                 self.user_input: user_input,  # user id
 61 |                 self.item_input: item_input,  # item id
 62 |                 self.item_input_neg: item_input_neg,
 63 |                 self.L: ll,  # number of POIs a user visited
 64 |                 self.all_items: all_items_data,  # POI list a user has visited
 65 |                 self.label: topic_input,  # user-topic from TLDA
 66 |                 self.DIST: uindist,  # distance from user to positive POI
 67 |                 self.DIST_neg: uindistneg,  # distance from user to negative POI
 68 |                 self.dropout: self.args.dropout
 69 |             }
 70 |         else:
 71 |             user_input = [x[0] for x in batch]
 72 |             item_input = [x[1] for x in batch]
 73 |             uindist = [x[2] for x in batch]
 74 |             all_items_data = [process_all_items(x[3]) for x in batch]
 75 |             ll = [len(x[3]) for x in batch]
 76 |             feed_dict = {
 77 |                 self.user_input: user_input,
 78 |                 self.item_input: item_input,
 79 |                 self.L: ll,
 80 |                 self.DIST: uindist,
 81 |                 self.all_items: all_items_data,
 82 |                 self.dropout: 1
 83 |             }
 84 |         feed_dict[self.learn_rate] = self.args.learn_rate
 85 |         return feed_dict
 86 | 
 87 |     def _creat_model_inputs(self):
 88 |         self.user_input = tf.placeholder(tf.int32, shape=[None], name='user')
 89 |         self.item_input = tf.placeholder(tf.int32, shape=[None], name='item')
 90 |         self.item_input_neg = tf.placeholder(tf.int32, shape=[None], name='item_neg')
 91 |         self.input_type = tf.placeholder(tf.int32, shape=[None], name='type')
 92 |         self.dropout = tf.placeholder(tf.float32, name='dropout')
 93 |         self.label = tf.placeholder(tf.float32, shape=[None, self.args.topic_num], name='labels')
 94 | 
 95 |         self.learn_rate = tf.placeholder(tf.float32, name='learn_rate')
 96 |         self.L = tf.placeholder(tf.float32, shape=[None], name='L')
 97 |         self.DIST = tf.placeholder(tf.float32, shape=[None], name='DIST')
 98 |         self.DIST_neg = tf.placeholder(tf.float32, shape=[None], name='DIST_neg')
 99 |         self.all_items = tf.placeholder(tf.int32, shape=[None, self.args.max_p_num], name="HISTORY")
100 |         self.batch_size = tf.shape(self.item_input)[0]
101 | 
102 |     def _composition_layer(self, user_emb, item_emb, dist='L2', selected_memory=None):
103 |         energy = item_emb - (user_emb + selected_memory)
104 |         if 'L2' in dist:
105 |             final_layer = -tf.sqrt(tf.reduce_sum(tf.square(energy), 1) + 1E-3)
106 |         elif 'L1' in dist:
107 |             final_layer = -tf.reduce_sum(tf.abs(energy), 1)
108 |         else:
109 |             raise Exception('Please specify distance metric')
110 |         final_layer = tf.reshape(final_layer, [-1, 1])
111 |         return final_layer
112 | 
113 |     def _get_prediction(self, user_emb, item_emb, memory_key):
114 |         _key = tf.multiply(self.user_emb, self.item_emb)
115 |         _key = tf.expand_dims(_key, 1)
116 |         key_attention = tf.squeeze(tf.matmul(_key, memory_key))
117 |         key_attention = tf.nn.softmax(key_attention)
118 |         selected_memory = tf.matmul(key_attention, self.memory_value)
119 |         final_layer = self._composition_layer(user_emb, item_emb, selected_memory=selected_memory)
120 |         return final_layer
121 | 
122 |     def _build_list_network(self):
123 |         stddev = self.stddev
124 |         with tf.variable_scope('embedding_layer', initializer=self.initializer):
125 |             with tf.device('/cpu:0'):
126 |                 self.user_item_key = tf.Variable(
127 |                     tf.random_normal(
128 |                         [self.args.embedding_size, self.num_mem],
129 |                         stddev=stddev))
130 |                 self.memories = tf.Variable(
131 |                     tf.random_normal(
132 |                         [self.num_mem, self.args.embedding_size],
133 |                         stddev=stddev))
134 | 
135 |                 self.item_embeddings = tf.get_variable('item_emb', [self.num_items + 1, self.args.embedding_size],
136 |                                                        initializer=self.initializer)
137 |                 self.all_items_emb = tf.nn.embedding_lookup(self.item_embeddings, self.all_items)
138 |                 self.item_emb = tf.nn.embedding_lookup(self.item_embeddings, self.item_input)
139 |                 self.item_emb_neg = tf.nn.embedding_lookup(self.item_embeddings, self.item_input_neg)
140 |                 self.dis_W = tf.get_variable("W", [self.num_users + 1, 1], initializer=self.initializer)
141 |                 self.dis_b = tf.get_variable("b", [self.num_users + 1, 1], initializer=self.initializer)
142 |                 self.dis_W_item = tf.get_variable("W_item", [self.num_items + 1, 1], initializer=self.initializer)
143 | 
144 |                 if self.args.constraint:
145 |                     self.all_items_emb = tf.clip_by_norm(self.all_items_emb, 1.0, axes=1)
146 |                     self.item_emb = tf.clip_by_norm(self.item_emb, 1.0, axes=1)
147 |                     self.item_emb_neg = tf.clip_by_norm(self.item_emb_neg, 1.0, axes=1)
148 | 
149 |                 self.all_items_emb = tf.transpose(self.all_items_emb, perm=[0, 2, 1])
150 |                 self.cur_mask = tf.sequence_mask(self.L, self.args.max_p_num)
151 |                 self.cur_mask = tf.expand_dims(self.cur_mask, -1)
152 |                 self.cur_mask = tf.transpose(self.cur_mask, perm=[0, 2, 1])
153 |                 kept_indices = tf.cast(self.cur_mask, dtype=tf.float32)
154 |                 self.all_items_emb = self.all_items_emb * kept_indices
155 |                 self.user_emb_sum = tf.reduce_sum(self.all_items_emb, 2)
156 |                 self.LL = tf.expand_dims(self.L, -1)
157 |                 self.user_emb = self.user_emb_sum / self.LL
158 | 
159 |                 self.item_emb = tf.nn.embedding_lookup(self.item_embeddings, self.item_input)
160 | 
161 |                 self.user_topic_W = tf.Variable(
162 |                     tf.random_normal([self.args.embedding_size, self.args.topic_num], stddev=stddev))
163 |                 self.user_topic_b = tf.Variable(tf.random_normal([self.args.topic_num], stddev=stddev))
164 |                 self.topic_out = tf.matmul(self.user_emb, self.user_topic_W) + self.user_topic_b
165 |                 self.predict_topic = tf.nn.softmax(self.topic_out)
166 |                 self.topic_cost = tf.reduce_sum(
167 |                     tf.nn.softmax_cross_entropy_with_logits(logits=self.topic_out, labels=self.label))
168 | 
169 |                 self._key = tf.multiply(self.user_emb, self.item_emb)
170 |                 self.key_attention = tf.matmul(self._key, self.user_item_key)
171 |                 self.key_attention = tf.nn.softmax(self.key_attention)
172 |                 self.selected_memory = tf.matmul(self.key_attention, self.memories)
173 |                 final_layer = self._composition_layer(self.user_emb, self.item_emb,
174 |                                                       selected_memory=self.selected_memory)
175 |                 final_layer_neg = self._composition_layer(self.user_emb, self.item_emb_neg,
176 |                                                           selected_memory=self.selected_memory)
177 |                 self.predict_op = tf.squeeze(final_layer)
178 |                 self.mem_cost = tf.reduce_sum(tf.nn.relu((tf.squeeze(final_layer_neg - final_layer) + self.lamb_m)))
179 | 
180 |                 self.dis_W_emb = tf.squeeze(tf.nn.embedding_lookup(self.dis_W, self.user_input))
181 |                 self.dis_b_emb = tf.squeeze(tf.nn.embedding_lookup(self.dis_b, self.user_input))
182 |                 self.dist_W_item_emb = tf.squeeze(tf.nn.embedding_lookup(self.dis_W_item, self.item_input))
183 |                 self.dist_W_item_emb_neg = tf.squeeze(tf.nn.embedding_lookup(self.dis_W_item, self.item_input_neg))
184 |                 self.Wis = (self.dis_W_emb * self.DIST + self.dis_b_emb + self.dist_W_item_emb * self.DIST)
185 |                 self.Wis_neg = (
186 |                             self.dis_W_emb * self.DIST_neg + self.dis_b_emb + self.dist_W_item_emb_neg * self.DIST_neg)
187 |                 self.dist_cost = tf.reduce_sum(tf.nn.relu((self.lamb_d - self.Wis + self.Wis_neg)))
188 | 
189 |                 self.cost = self.mem_cost + self.topic_cost * self.ratio1 + self.dist_cost * self.ratio2
190 |                 if self.args.l2_reg > 0:
191 |                     vars = tf.trainable_variables()
192 |                     lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name]) * self.args.l2_reg
193 |                     self.cost += lossL2
194 |                 if self.args.opt == 'SGD':
195 |                     self.opt = tf.train.GradientDescentOptimizer(learning_rate=self.learn_rate)
196 |                 elif self.args.opt == 'Adam':
197 |                     self.opt = tf.train.AdamOptimizer(learning_rate=self.learn_rate)
198 |                 elif self.args.opt == 'Adadelta':
199 |                     self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.learn_rate)
200 |                 elif self.args.opt == 'Adagrad':
201 |                     self.opt = tf.train.AdagradOptimizer(learning_rate=self.learn_rate, initial_accumulator_value=0.9)
202 |                 elif self.args.opt == 'RMS':
203 |                     self.opt = tf.train.RMSPropOptimizer(learning_rate=self.learn_rate, decay=0.9, epsilon=1e-6)
204 |                 elif self.args.opt == 'Moment':
205 |                     self.opt = tf.train.MomentumOptimizer(self.args.learn_rate, 0.9)
206 |                 # grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 1)
207 |                 gradients = self.opt.compute_gradients(self.cost)
208 |                 self.gradients = gradients
209 | 
210 |                 def ClipIfNotNone(grad):
211 |                     if grad is None:
212 |                         return grad
213 |                     grad = tf.clip_by_value(grad, -10, 10, name=None)
214 |                     return tf.clip_by_norm(grad, self.args.clip_norm)
215 | 
216 |                 if self.args.clip_norm > 0:
217 |                     clipped_gradients = [(ClipIfNotNone(grad), var) for grad, var in gradients]
218 |                 else:
219 |                     clipped_gradients = [(grad, var) for grad, var in gradients]
220 | 
221 |                 # grads, _ = tf.clip_by_value(tf.gradients(self.cost, tvars),-10,10)
222 |                 self.optimizer = self.opt.apply_gradients(clipped_gradients)
223 |                 self.train_op = self.optimizer
224 | 


--------------------------------------------------------------------------------