├── README.md ├── bleu.py ├── load_data.py ├── module.py ├── rouge.py ├── run.py ├── tripadviosr.1.log ├── tripadvisor.1.test.txt ├── tripadvisor_run.sh └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # NETE (NEural TEmplate) 2 | 3 | ## Papers 4 | - Lei Li, Yongfeng Zhang, Li Chen. Generate Neural Template Explanations for Recommendation. CIKM'20. \[[Paper](https://lileipisces.github.io/files/CIKM20-NETE-paper.pdf)\] 5 | - Lei Li, Li Chen, Yongfeng Zhang. Towards Controllable Explanation Generation for Recommender Systems via Neural Template. WWW'20 Demo. \[[Paper](https://lileipisces.github.io/files/WWW20-NETE-paper.pdf)\] 6 | 7 | **A small unpretrained Transformer version is available at [PETER](https://github.com/lileipisces/PETER)!** 8 | 9 | **A large pretrained GPT-2 version is available at [PEPLER](https://github.com/lileipisces/PEPLER)!** 10 | 11 | **A small ecosystem for Recommender Systems-based Natural Language Generation is available at [NLG4RS](https://github.com/lileipisces/NLG4RS)!** 12 | 13 | ## Code dependencies 14 | - TensorFlow 1.4 15 | - Python 3.6 16 | 17 | ## Datasets to [download](https://drive.google.com/drive/folders/1yB-EFuApAOJ0RzTI0VfZ0pignytguU0_?usp=sharing) 18 | - Amazon Movies & TV 19 | - TripAdvisor Hong Kong 20 | - Yelp 2019 21 | 22 | For those who are interested in how to obtain (feature, opinion, template, sentiment) quadruples, please refer to [Sentires-Guide](https://github.com/lileipisces/Sentires-Guide). 23 | 24 | ## Citations 25 | ``` 26 | @inproceedings{CIKM20-NETE, 27 | title={Generate Neural Template Explanations for Recommendation}, 28 | author={Li, Lei and Zhang, Yongfeng and Chen, Li}, 29 | booktitle={CIKM}, 30 | year={2020} 31 | } 32 | @inproceedings{WWW20-NETE, 33 | title={Towards Controllable Explanation Generation for Recommender Systems via Neural Template}, 34 | author={Li, Lei and Chen, Li and Zhang, Yongfeng}, 35 | booktitle={WWW Demo}, 36 | year={2020} 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /bleu.py: -------------------------------------------------------------------------------- 1 | """ 2 | Borrowed from https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py 3 | 4 | Python implementation of BLEU and smooth-BLEU. 5 | 6 | This module provides a Python implementation of BLEU and smooth-BLEU. 7 | Smooth BLEU is computed following the method outlined in the paper: 8 | Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic 9 | evaluation metrics for machine translation. COLING 2004. 10 | """ 11 | 12 | import collections 13 | import math 14 | 15 | 16 | def _get_ngrams(segment, max_order): 17 | """Extracts all n-grams upto a given maximum order from an input segment. 18 | 19 | Args: 20 | segment: text segment from which n-grams will be extracted. 21 | max_order: maximum length in tokens of the n-grams returned by this 22 | methods. 23 | 24 | Returns: 25 | The Counter containing all n-grams upto max_order in segment 26 | with a count of how many times each n-gram occurred. 27 | """ 28 | ngram_counts = collections.Counter() 29 | for order in range(1, max_order + 1): 30 | for i in range(0, len(segment) - order + 1): 31 | ngram = tuple(segment[i:i+order]) 32 | ngram_counts[ngram] += 1 33 | return ngram_counts 34 | 35 | 36 | def compute_bleu(reference_corpus, translation_corpus, max_order=4, 37 | smooth=False): 38 | """Computes BLEU score of translated segments against one or more references. 39 | 40 | Args: 41 | reference_corpus: list of lists of references for each translation. Each 42 | reference should be tokenized into a list of tokens. 43 | translation_corpus: list of translations to score. Each translation 44 | should be tokenized into a list of tokens. 45 | max_order: Maximum n-gram order to use when computing BLEU score. 46 | smooth: Whether or not to apply Lin et al. 2004 smoothing. 47 | 48 | Returns: 49 | 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram 50 | precisions and brevity penalty. 51 | """ 52 | matches_by_order = [0] * max_order 53 | possible_matches_by_order = [0] * max_order 54 | reference_length = 0 55 | translation_length = 0 56 | for (references, translation) in zip(reference_corpus, 57 | translation_corpus): 58 | reference_length += min(len(r) for r in references) 59 | translation_length += len(translation) 60 | 61 | merged_ref_ngram_counts = collections.Counter() 62 | for reference in references: 63 | merged_ref_ngram_counts |= _get_ngrams(reference, max_order) 64 | translation_ngram_counts = _get_ngrams(translation, max_order) 65 | overlap = translation_ngram_counts & merged_ref_ngram_counts 66 | for ngram in overlap: 67 | matches_by_order[len(ngram)-1] += overlap[ngram] 68 | for order in range(1, max_order+1): 69 | possible_matches = len(translation) - order + 1 70 | if possible_matches > 0: 71 | possible_matches_by_order[order-1] += possible_matches 72 | 73 | precisions = [0] * max_order 74 | for i in range(0, max_order): 75 | if smooth: 76 | precisions[i] = ((matches_by_order[i] + 1.) / 77 | (possible_matches_by_order[i] + 1.)) 78 | else: 79 | if possible_matches_by_order[i] > 0: 80 | precisions[i] = (float(matches_by_order[i]) / 81 | possible_matches_by_order[i]) 82 | else: 83 | precisions[i] = 0.0 84 | 85 | if min(precisions) > 0: 86 | p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions) 87 | geo_mean = math.exp(p_log_sum) 88 | else: 89 | geo_mean = 0 90 | 91 | ratio = float(translation_length) / reference_length 92 | 93 | if ratio > 1.0: 94 | bp = 1. 95 | else: 96 | bp = math.exp(1 - 1. / ratio) 97 | 98 | bleu = geo_mean * bp 99 | 100 | return (bleu, precisions, bp, ratio, translation_length, reference_length) 101 | -------------------------------------------------------------------------------- /load_data.py: -------------------------------------------------------------------------------- 1 | from sklearn.feature_extraction.text import CountVectorizer 2 | from utils import * 3 | 4 | 5 | def load_data(data_path, index_dir, max_word_num, seq_max_len, use_predicted_feature=False): 6 | # collect all users id and items id 7 | user_set = set() 8 | item_set = set() 9 | 10 | max_rating = 5 11 | min_rating = 1 12 | 13 | reviews = pickle.load(open(data_path, 'rb')) 14 | for review in reviews: 15 | user_set.add(review['user']) 16 | item_set.add(review['item']) 17 | rating = review['rating'] 18 | if max_rating < rating: 19 | max_rating = rating 20 | if min_rating > rating: 21 | min_rating = rating 22 | 23 | # convert id to array index 24 | user_list = list(user_set) 25 | item_list = list(item_set) 26 | user2index = {x: i for i, x in enumerate(user_list)} 27 | item2index = {x: i for i, x in enumerate(item_list)} 28 | 29 | with open(index_dir + 'train.index', 'r') as f: 30 | line = f.readline() 31 | indexes = [int(x) for x in line.split(' ')] 32 | doc_list = [] 33 | for idx in indexes: 34 | rev = reviews[idx] 35 | (fea, adj, tem, sco) = rev['template'] 36 | doc_list.append(tem) 37 | word2index, word_list = get_word2index(doc_list, max_word_num) 38 | 39 | def format_data(data_type): 40 | with open(index_dir + data_type + '.index', 'r') as f: 41 | line = f.readline() 42 | indexes = [int(x) for x in line.split(' ')] 43 | tuple_list = [] 44 | fea_set = set() 45 | for idx in indexes: 46 | rev = reviews[idx] 47 | u = user2index[rev['user']] 48 | i = item2index[rev['item']] 49 | r = rev['rating'] 50 | (fea, adj, tem, sco) = rev['template'] 51 | w_list = [word2index.get(w, word2index['']) for w in tem.split(' ')] 52 | w_list.append(word2index['']) 53 | if len(w_list) > seq_max_len: 54 | w_list = w_list[:seq_max_len] 55 | if use_predicted_feature != 0 and data_type == 'test': 56 | fea = rev['predicted'] 57 | fea_id = word2index.get(fea, word2index['']) 58 | fea_set.add(fea_id) 59 | 60 | if sco == 1: 61 | sco = 5 62 | tuple_list.append([u, i, r, fea_id, w_list, fea, tem, sco]) 63 | return tuple_list, fea_set 64 | 65 | train_tuple_list, fea_set_tr = format_data('train') 66 | validation_tuple_list, fea_set_va = format_data('validation') 67 | test_tuple_list, fea_set_te = format_data('test') 68 | user2items_test = {} 69 | for x in test_tuple_list: 70 | u = x[0] 71 | i = x[1] 72 | if u in user2items_test: 73 | user2items_test[u].add(i) 74 | else: 75 | user2items_test[u] = {i} 76 | 77 | feature_set = set() 78 | feature_set = feature_set | fea_set_tr 79 | feature_set = feature_set | fea_set_va 80 | feature_set = feature_set | fea_set_te 81 | 82 | return train_tuple_list, validation_tuple_list, test_tuple_list, max_rating, min_rating, user2index, item2index, word2index, user_list, item_list, word_list, feature_set, user2items_test 83 | 84 | 85 | def get_word2index(doc_list, max_word_num): 86 | def split_words_by_space(text): 87 | return text.split(' ') 88 | 89 | vectorizer = CountVectorizer(max_features=max_word_num, analyzer=split_words_by_space) 90 | vectorizer.fit(doc_list) 91 | word_list = vectorizer.get_feature_names() 92 | word_list.extend(['', '', '', '']) 93 | word2index = {w: i for i, w in enumerate(word_list)} 94 | 95 | return word2index, word_list 96 | -------------------------------------------------------------------------------- /module.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | import tensorflow as tf 3 | import numpy as np 4 | import random 5 | import math 6 | import heapq 7 | 8 | 9 | def decode_train(cell_w, cell_f, seq_max_len, initial_state, seq_embeddings, feature_emb, latent_dim, mapping_layer): 10 | seq_embed = tf.TensorArray(dtype=tf.float32, size=seq_max_len) 11 | seq_embed = seq_embed.unstack(seq_embeddings) 12 | 13 | def condition(step, new_state, new_output): # arguments returned from the body function, cannot be removed 14 | return step < seq_max_len 15 | 16 | def loop_body(step, state, output): 17 | inputs = seq_embed.read(step) 18 | output_w, state_w = cell_w(inputs=inputs, state=state) 19 | output_f, state_f = cell_f(inputs=feature_emb, state=state) 20 | gamma = fusion_unit(state_w, state_f, latent_dim) 21 | gamma = tf.clip_by_value(gamma, clip_value_min=0.0, clip_value_max=1.0) 22 | new_state = (1.0 - gamma) * state_w + gamma * state_f # (batch_size, hidden_size) 23 | logits = mapping_layer(new_state) # (batch_size, vocab_size) 24 | new_output = output.write(index=step, value=logits) 25 | return step + 1, new_state, new_output 26 | 27 | outputs = tf.TensorArray(dtype=tf.float32, size=seq_max_len) 28 | loop_init = [tf.constant(value=0, dtype=tf.int32), initial_state, outputs] 29 | 30 | _, _, last_out = tf.while_loop(cond=condition, body=loop_body, loop_vars=loop_init) # (seq_max_len, batch_size, vocab_size) 31 | final_out = tf.transpose(last_out.stack(), perm=[1, 0, 2]) # (batch_size, seq_max_len, vocab_size) 32 | 33 | return final_out 34 | 35 | 36 | def decode_infer(cell_w, cell_f, seq_max_len, initial_state, start_token, feature_emb, latent_dim, mapping_layer, word_embeddings): 37 | def condition(step, new_token, new_state, new_word_out): # arguments returned from the body function, cannot be removed 38 | return step < seq_max_len 39 | 40 | def loop_body(step, token, state, word_out): 41 | inputs = tf.nn.embedding_lookup(word_embeddings, token) # (batch_size, word_dim) 42 | output_w, state_w = cell_w(inputs=inputs, state=state) 43 | output_f, state_f = cell_f(inputs=feature_emb, state=state) 44 | gamma = fusion_unit(state_w, state_f, latent_dim) 45 | gamma = tf.clip_by_value(gamma, clip_value_min=0.0, clip_value_max=1.0) 46 | new_state = (1.0 - gamma) * state_w + gamma * state_f # (batch_size, hidden_size) 47 | logits = mapping_layer(new_state) # (batch_size, vocab_size) 48 | new_token = tf.argmax(logits, axis=1, output_type=tf.int32) # (batch_size,) 49 | new_word_out = word_out.write(index=step, value=new_token) 50 | return step + 1, new_token, new_state, new_word_out 51 | 52 | word_ids = tf.TensorArray(dtype=tf.int32, size=seq_max_len) 53 | loop_init = [tf.constant(value=0, dtype=tf.int32), start_token, initial_state, word_ids] 54 | 55 | _, _, _, word_ids_out = tf.while_loop(cond=condition, body=loop_body, loop_vars=loop_init) 56 | word_ids_out = tf.transpose(word_ids_out.stack(), perm=[1, 0]) # (batch_size, seq_max_len) 57 | 58 | return word_ids_out 59 | 60 | 61 | def fusion_unit(state_w, state_f, latent_dim): 62 | with tf.variable_scope('fusion_unit', reuse=tf.AUTO_REUSE): 63 | state_w_ = tf.layers.dense(inputs=state_w, units=latent_dim, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), 64 | use_bias=False, name='state_w_') # (batch_size, latent_dim) 65 | state_f_ = tf.layers.dense(inputs=state_f, units=latent_dim, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), 66 | use_bias=False, name='state_f_') # (batch_size, latent_dim) 67 | state_w_f = tf.concat(values=[state_w_, state_f_], axis=1) # (batch_size, hidden_size * 2) 68 | gamma = tf.layers.dense(inputs=state_w_f, units=1, activation=tf.nn.sigmoid, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), 69 | use_bias=False, name='gamma') # (batch_size, 1) 70 | return gamma 71 | 72 | 73 | class NETE_r: 74 | def __init__(self, train_tuple_list, user_num, item_num, rating_layer_num=4, latent_dim=200, learning_rate=0.0001, 75 | batch_size=128, reg_rate=0.0001): 76 | 77 | self.train_tuple_list = train_tuple_list 78 | self.batch_size = batch_size 79 | 80 | graph = tf.Graph() 81 | with graph.as_default(): 82 | # input 83 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') # (batch_size,) 84 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 85 | self.rating = tf.placeholder(dtype=tf.float32, shape=[None], name='rating') 86 | 87 | # embeddings 88 | user_embeddings = tf.get_variable('user_embeddings', shape=[user_num, latent_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) 89 | item_embeddings = tf.get_variable('item_embeddings', shape=[item_num, latent_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) 90 | 91 | # rating prediction 92 | user_feature = tf.nn.embedding_lookup(user_embeddings, self.user_id) # (batch_size, latent_dim) 93 | item_feature = tf.nn.embedding_lookup(item_embeddings, self.item_id) 94 | hidden = tf.concat(values=[user_feature, item_feature], axis=1) # (batch_size, latent_dim * 2) 95 | for k in range(rating_layer_num): 96 | hidden = tf.layers.dense(inputs=hidden, units=latent_dim * 2, activation=tf.nn.sigmoid, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), 97 | bias_initializer=tf.constant_initializer(0.0), name='layer-{}'.format(k)) # (batch_size, latent_dim * 2) 98 | prediction = tf.layers.dense(inputs=hidden, units=1, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), 99 | bias_initializer=tf.constant_initializer(0.0), name='prediction') # (batch_size, 1) 100 | self.predicted_rating = tf.reshape(prediction, shape=[-1]) # (batch_size,) 101 | rating_loss = tf.losses.mean_squared_error(self.rating, self.predicted_rating) 102 | 103 | regularization_cost = tf.reduce_sum([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 104 | 105 | # optimization 106 | self.total_loss = rating_loss + reg_rate * regularization_cost 107 | self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.total_loss) 108 | 109 | init = tf.global_variables_initializer() 110 | 111 | config = tf.ConfigProto() 112 | config.gpu_options.allow_growth = True 113 | self.sess = tf.Session(graph=graph, config=config) 114 | self.sess.run(init) 115 | 116 | def train_one_epoch(self): 117 | sample_num = len(self.train_tuple_list) 118 | index_list = list(range(sample_num)) 119 | random.shuffle(index_list) 120 | 121 | total_loss = 0 122 | 123 | step_num = int(math.ceil(sample_num / self.batch_size)) 124 | for step in range(step_num): 125 | start = step * self.batch_size 126 | offset = min(start + self.batch_size, sample_num) 127 | 128 | user = [] 129 | item = [] 130 | rating = [] 131 | for idx in index_list[start:offset]: 132 | x = self.train_tuple_list[idx] 133 | user.append(x[0]) 134 | item.append(x[1]) 135 | rating.append(x[2]) 136 | user = np.asarray(user, dtype=np.int32) 137 | item = np.asarray(item, dtype=np.int32) 138 | rating = np.asarray(rating, dtype=np.float32) 139 | 140 | feed_dict = {self.user_id: user, 141 | self.item_id: item, 142 | self.rating: rating} 143 | _, loss = self.sess.run([self.optimizer, self.total_loss], feed_dict=feed_dict) 144 | total_loss += loss * (offset - start) 145 | 146 | return total_loss / sample_num 147 | 148 | def validate(self, tuple_list): 149 | sample_num = len(tuple_list) 150 | 151 | total_loss = 0 152 | 153 | step_num = int(math.ceil(sample_num / self.batch_size)) 154 | for step in range(step_num): 155 | start = step * self.batch_size 156 | offset = min(start + self.batch_size, sample_num) 157 | 158 | user = [] 159 | item = [] 160 | rating = [] 161 | for x in tuple_list[start:offset]: 162 | user.append(x[0]) 163 | item.append(x[1]) 164 | rating.append(x[2]) 165 | user = np.asarray(user, dtype=np.int32) 166 | item = np.asarray(item, dtype=np.int32) 167 | rating = np.asarray(rating, dtype=np.float32) 168 | 169 | feed_dict = {self.user_id: user, 170 | self.item_id: item, 171 | self.rating: rating} 172 | loss = self.sess.run(self.total_loss, feed_dict=feed_dict) 173 | total_loss += loss * (offset - start) 174 | 175 | return total_loss / sample_num 176 | 177 | def get_prediction(self, tuple_list): 178 | sample_num = len(tuple_list) 179 | rating_prediction = [] 180 | 181 | step_num = int(math.ceil(sample_num / self.batch_size)) 182 | for step in range(step_num): 183 | start = step * self.batch_size 184 | offset = min(start + self.batch_size, sample_num) 185 | 186 | user = [] 187 | item = [] 188 | for x in tuple_list[start:offset]: 189 | user.append(x[0]) 190 | item.append(x[1]) 191 | user = np.asarray(user, dtype=np.int32) 192 | item = np.asarray(item, dtype=np.int32) 193 | 194 | feed_dict = {self.user_id: user, 195 | self.item_id: item} 196 | rating_p = self.sess.run(self.predicted_rating, feed_dict=feed_dict) 197 | rating_prediction.extend(rating_p) 198 | 199 | return np.asarray(rating_prediction, dtype=np.float32) 200 | 201 | def get_prediction_ranking(self, top_k, users_test, item_num): 202 | user2items_train = {} 203 | for x in self.train_tuple_list: 204 | u = x[0] 205 | i = x[1] 206 | if u in user2items_train: 207 | user2items_train[u].add(i) 208 | else: 209 | user2items_train[u] = {i} 210 | 211 | user2items_top = {} 212 | for u in users_test: 213 | items = set(list(range(item_num))) - user2items_train[u] 214 | tuple_list = [[u, i] for i in items] 215 | predicted = self.get_prediction(tuple_list) 216 | item2rating = {} 217 | for i, p in zip(items, predicted): 218 | rating = p 219 | if rating == 0: 220 | rating = random.random() 221 | item2rating[i] = rating 222 | top_list = heapq.nlargest(top_k, item2rating, key=item2rating.get) 223 | user2items_top[u] = top_list 224 | 225 | return user2items_top 226 | 227 | 228 | class NETE_t: 229 | def __init__(self, train_tuple_list, user_num, item_num, word2index, mean_r=3, sentiment_num=2, latent_dim=200, 230 | word_dim=200, rnn_size=256, learning_rate=0.0001, batch_size=128, seq_max_len=15): 231 | 232 | self.train_tuple_list = train_tuple_list 233 | self.word2index = word2index 234 | self.batch_size = batch_size 235 | self.seq_max_len = seq_max_len 236 | 237 | graph = tf.Graph() 238 | with graph.as_default(): 239 | # input 240 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') # (batch_size,) 241 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 242 | self.rating = tf.placeholder(dtype=tf.float32, shape=[None], name='rating') 243 | self.feature = tf.placeholder(dtype=tf.int32, shape=[None], name='feature') 244 | self.word_id_seq = tf.placeholder(dtype=tf.int32, shape=[None, None], name='word_id_seq') # (batch_size, batch_max_len) 245 | self.seq_len = tf.placeholder(dtype=tf.int32, shape=[None], name='seq_len') # (batch_size,) 246 | self.batch_max_len = tf.placeholder(dtype=tf.int32, shape=[], name='batch_max_len') 247 | self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') 248 | 249 | # embeddings 250 | user_embeddings = tf.get_variable('user_embeddings', shape=[user_num, latent_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) 251 | item_embeddings = tf.get_variable('item_embeddings', shape=[item_num, latent_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) 252 | sentiment_embeddings = tf.get_variable('sentiment_embeddings', shape=[sentiment_num, latent_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) 253 | word_embeddings = tf.get_variable('word_embeddings', shape=[len(self.word2index), word_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) 254 | 255 | # text generation 256 | b_size = tf.shape(input=self.user_id)[0] 257 | start_token = tf.fill(dims=[b_size], value=self.word2index['']) # (batch_size,) 258 | ending = tf.strided_slice(input_=self.word_id_seq, begin=[0, 0], end=[b_size, -1], strides=[1, 1]) # remove the last column 259 | train_input = tf.concat(values=[tf.reshape(tensor=start_token, shape=[-1, 1]), ending], axis=1) # add to the head of each sample 260 | train_input_emb = tf.nn.embedding_lookup(params=word_embeddings, ids=train_input) # (batch_size, batch_max_len, word_dim) 261 | feature_emb = tf.nn.embedding_lookup(params=word_embeddings, ids=self.feature) # (batch_size, word_dim) 262 | 263 | # encoder 264 | user_feature = tf.nn.embedding_lookup(user_embeddings, self.user_id) # (batch_size, latent_dim) 265 | item_feature = tf.nn.embedding_lookup(item_embeddings, self.item_id) 266 | # sentiment feature 267 | one = tf.ones_like(self.rating, dtype=tf.int32) 268 | zero = tf.zeros_like(self.rating, dtype=tf.int32) 269 | sentiment_index = tf.where(self.rating < mean_r, x=zero, y=one) 270 | sentiment_feature = tf.nn.embedding_lookup(sentiment_embeddings, sentiment_index) 271 | encoder_input = tf.concat(values=[user_feature, item_feature, sentiment_feature], axis=1) # (batch_size, word_dim * 3) 272 | initial_state = tf.layers.dense(inputs=encoder_input, units=rnn_size, activation=tf.nn.tanh, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), bias_initializer=tf.constant_initializer(0.0)) # (batch_size, rnn_size) 273 | 274 | # decoder 275 | word_cell = tf.nn.rnn_cell.GRUCell(num_units=rnn_size, kernel_initializer=tf.orthogonal_initializer(), reuse=tf.AUTO_REUSE) # rnn_size: the dimension of h(t) 276 | word_decoder = tf.nn.rnn_cell.DropoutWrapper(cell=word_cell, input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) 277 | feature_cell = tf.nn.rnn_cell.GRUCell(num_units=rnn_size, kernel_initializer=tf.orthogonal_initializer(), reuse=tf.AUTO_REUSE) # rnn_size: the dimension of h(t) 278 | feature_decoder = tf.nn.rnn_cell.DropoutWrapper(cell=feature_cell, input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) 279 | output_layer = tf.layers.Dense(units=len(self.word2index), kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1), bias_initializer=tf.constant_initializer(0.0), name='output_layer') 280 | 281 | # decoding 282 | seq_emb = tf.transpose(train_input_emb, perm=[1, 0, 2]) # (batch_max_len, batch_size, word_dim) 283 | train_logits = decode_train(word_decoder, feature_decoder, self.batch_max_len, initial_state, seq_emb, feature_emb, latent_dim, output_layer) # (batch_size, batch_max_len, vocab_size) 284 | self.argmax_tokens = decode_infer(word_decoder, feature_decoder, seq_max_len, initial_state, start_token, feature_emb, latent_dim, output_layer, word_embeddings) # (batch_size, seq_max_len) 285 | 286 | # text generation loss 287 | masks = tf.sequence_mask(lengths=self.seq_len, maxlen=self.batch_max_len, dtype=tf.float32) # only compute the loss of valid words, (batch_size, batch_max_len) 288 | text_loss = tf.contrib.seq2seq.sequence_loss(logits=train_logits, targets=self.word_id_seq, weights=masks) 289 | 290 | # optimization 291 | self.total_loss = text_loss 292 | self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.total_loss) 293 | 294 | init = tf.global_variables_initializer() 295 | 296 | config = tf.ConfigProto() 297 | config.gpu_options.allow_growth = True 298 | self.sess = tf.Session(graph=graph, config=config) 299 | self.sess.run(init) 300 | 301 | def train_one_epoch(self, dropout_keep=0.8): 302 | sample_num = len(self.train_tuple_list) 303 | index_list = list(range(sample_num)) 304 | random.shuffle(index_list) 305 | 306 | total_loss = 0 307 | 308 | step_num = int(math.ceil(sample_num / self.batch_size)) 309 | for step in range(step_num): 310 | start = step * self.batch_size 311 | offset = min(start + self.batch_size, sample_num) 312 | 313 | user = [] 314 | item = [] 315 | rating = [] 316 | feature = [] 317 | word_seq = [] 318 | for idx in index_list[start:offset]: 319 | x = self.train_tuple_list[idx] 320 | user.append(x[0]) 321 | item.append(x[1]) 322 | rating.append(x[7]) 323 | feature.append(x[3]) 324 | word_seq.append(x[4]) 325 | user = np.asarray(user, dtype=np.int32) 326 | item = np.asarray(item, dtype=np.int32) 327 | rating = np.asarray(rating, dtype=np.float32) 328 | feature = np.asarray(feature, dtype=np.int32) 329 | word_seq, seq_len = pad_sequence_4_generation(word_seq, self.word2index['']) 330 | 331 | feed_dict = {self.user_id: user, 332 | self.item_id: item, 333 | self.rating: rating, 334 | self.feature: feature, 335 | self.word_id_seq: word_seq, 336 | self.seq_len: seq_len, 337 | self.batch_max_len: max(seq_len), 338 | self.dropout_keep_prob: dropout_keep} 339 | _, loss = self.sess.run([self.optimizer, self.total_loss], feed_dict=feed_dict) 340 | total_loss += loss * (offset - start) 341 | 342 | return total_loss / sample_num 343 | 344 | def validate(self, tuple_list): 345 | sample_num = len(tuple_list) 346 | 347 | total_loss = 0 348 | 349 | step_num = int(math.ceil(sample_num / self.batch_size)) 350 | for step in range(step_num): 351 | start = step * self.batch_size 352 | offset = min(start + self.batch_size, sample_num) 353 | 354 | user = [] 355 | item = [] 356 | rating = [] 357 | feature = [] 358 | word_seq = [] 359 | for x in tuple_list[start:offset]: 360 | user.append(x[0]) 361 | item.append(x[1]) 362 | rating.append(x[7]) 363 | feature.append(x[3]) 364 | word_seq.append(x[4]) 365 | user = np.asarray(user, dtype=np.int32) 366 | item = np.asarray(item, dtype=np.int32) 367 | rating = np.asarray(rating, dtype=np.float32) 368 | feature = np.asarray(feature, dtype=np.int32) 369 | word_seq, seq_len = pad_sequence_4_generation(word_seq, self.word2index['']) 370 | 371 | feed_dict = {self.user_id: user, 372 | self.item_id: item, 373 | self.rating: rating, 374 | self.feature: feature, 375 | self.word_id_seq: word_seq, 376 | self.seq_len: seq_len, 377 | self.batch_max_len: max(seq_len), 378 | self.dropout_keep_prob: 1.0} 379 | loss = self.sess.run(self.total_loss, feed_dict=feed_dict) 380 | total_loss += loss * (offset - start) 381 | 382 | return total_loss / sample_num 383 | 384 | def get_prediction(self, tuple_list): 385 | sample_num = len(tuple_list) 386 | seq_prediction = [] 387 | 388 | step_num = int(math.ceil(sample_num / self.batch_size)) 389 | for step in range(step_num): 390 | start = step * self.batch_size 391 | offset = min(start + self.batch_size, sample_num) 392 | 393 | user = [] 394 | item = [] 395 | rating = [] 396 | feature = [] 397 | for x in tuple_list[start:offset]: 398 | user.append(x[0]) 399 | item.append(x[1]) 400 | rating.append(x[7]) 401 | feature.append(x[3]) 402 | user = np.asarray(user, dtype=np.int32) 403 | item = np.asarray(item, dtype=np.int32) 404 | rating = np.asarray(rating, dtype=np.float32) 405 | feature = np.asarray(feature, dtype=np.int32) 406 | 407 | feed_dict = {self.user_id: user, 408 | self.item_id: item, 409 | self.rating: rating, 410 | self.feature: feature, 411 | self.dropout_keep_prob: 1.0} 412 | predicted_ids = self.sess.run(self.argmax_tokens, feed_dict=feed_dict) 413 | if predicted_ids.shape[1] != self.seq_max_len: 414 | pad = np.full((offset - start, self.seq_max_len - predicted_ids.shape[1]), self.word2index['']) 415 | predicted_ids = np.concatenate([predicted_ids, pad], axis=1) 416 | seq_prediction.append(predicted_ids) 417 | 418 | return np.concatenate(seq_prediction, axis=0) 419 | -------------------------------------------------------------------------------- /rouge.py: -------------------------------------------------------------------------------- 1 | """ 2 | Borrowed from https://github.com/tensorflow/nmt/blob/master/nmt/scripts/rouge.py 3 | 4 | ROUGE metric implementation. 5 | 6 | Copy from tf_seq2seq/seq2seq/metrics/rouge.py. 7 | This is a modified and slightly extended verison of 8 | https://github.com/miso-belica/sumy/blob/dev/sumy/evaluation/rouge.py. 9 | """ 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | from __future__ import unicode_literals 15 | 16 | import itertools 17 | import numpy as np 18 | 19 | #pylint: disable=C0103 20 | 21 | 22 | def _get_ngrams(n, text): 23 | """Calcualtes n-grams. 24 | 25 | Args: 26 | n: which n-grams to calculate 27 | text: An array of tokens 28 | 29 | Returns: 30 | A set of n-grams 31 | """ 32 | ngram_set = set() 33 | text_length = len(text) 34 | max_index_ngram_start = text_length - n 35 | for i in range(max_index_ngram_start + 1): 36 | ngram_set.add(tuple(text[i:i + n])) 37 | return ngram_set 38 | 39 | 40 | def _split_into_words(sentences): 41 | """Splits multiple sentences into words and flattens the result""" 42 | return list(itertools.chain(*[_.split(" ") for _ in sentences])) 43 | 44 | 45 | def _get_word_ngrams(n, sentences): 46 | """Calculates word n-grams for multiple sentences. 47 | """ 48 | assert len(sentences) > 0 49 | assert n > 0 50 | 51 | words = _split_into_words(sentences) 52 | return _get_ngrams(n, words) 53 | 54 | 55 | def _len_lcs(x, y): 56 | """ 57 | Returns the length of the Longest Common Subsequence between sequences x 58 | and y. 59 | Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence 60 | 61 | Args: 62 | x: sequence of words 63 | y: sequence of words 64 | 65 | Returns 66 | integer: Length of LCS between x and y 67 | """ 68 | table = _lcs(x, y) 69 | n, m = len(x), len(y) 70 | return table[n, m] 71 | 72 | 73 | def _lcs(x, y): 74 | """ 75 | Computes the length of the longest common subsequence (lcs) between two 76 | strings. The implementation below uses a DP programming algorithm and runs 77 | in O(nm) time where n = len(x) and m = len(y). 78 | Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence 79 | 80 | Args: 81 | x: collection of words 82 | y: collection of words 83 | 84 | Returns: 85 | Table of dictionary of coord and len lcs 86 | """ 87 | n, m = len(x), len(y) 88 | table = dict() 89 | for i in range(n + 1): 90 | for j in range(m + 1): 91 | if i == 0 or j == 0: 92 | table[i, j] = 0 93 | elif x[i - 1] == y[j - 1]: 94 | table[i, j] = table[i - 1, j - 1] + 1 95 | else: 96 | table[i, j] = max(table[i - 1, j], table[i, j - 1]) 97 | return table 98 | 99 | 100 | def _recon_lcs(x, y): 101 | """ 102 | Returns the Longest Subsequence between x and y. 103 | Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence 104 | 105 | Args: 106 | x: sequence of words 107 | y: sequence of words 108 | 109 | Returns: 110 | sequence: LCS of x and y 111 | """ 112 | i, j = len(x), len(y) 113 | table = _lcs(x, y) 114 | 115 | def _recon(i, j): 116 | """private recon calculation""" 117 | if i == 0 or j == 0: 118 | return [] 119 | elif x[i - 1] == y[j - 1]: 120 | return _recon(i - 1, j - 1) + [(x[i - 1], i)] 121 | elif table[i - 1, j] > table[i, j - 1]: 122 | return _recon(i - 1, j) 123 | else: 124 | return _recon(i, j - 1) 125 | 126 | recon_tuple = tuple(map(lambda x: x[0], _recon(i, j))) 127 | return recon_tuple 128 | 129 | 130 | def rouge_n(evaluated_sentences, reference_sentences, n=2): 131 | """ 132 | Computes ROUGE-N of two text collections of sentences. 133 | Sourece: http://research.microsoft.com/en-us/um/people/cyl/download/ 134 | papers/rouge-working-note-v1.3.1.pdf 135 | 136 | Args: 137 | evaluated_sentences: The sentences that have been picked by the summarizer 138 | reference_sentences: The sentences from the referene set 139 | n: Size of ngram. Defaults to 2. 140 | 141 | Returns: 142 | A tuple (f1, precision, recall) for ROUGE-N 143 | 144 | Raises: 145 | ValueError: raises exception if a param has len <= 0 146 | """ 147 | if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: 148 | raise ValueError("Collections must contain at least 1 sentence.") 149 | 150 | evaluated_ngrams = _get_word_ngrams(n, evaluated_sentences) 151 | reference_ngrams = _get_word_ngrams(n, reference_sentences) 152 | reference_count = len(reference_ngrams) 153 | evaluated_count = len(evaluated_ngrams) 154 | 155 | # Gets the overlapping ngrams between evaluated and reference 156 | overlapping_ngrams = evaluated_ngrams.intersection(reference_ngrams) 157 | overlapping_count = len(overlapping_ngrams) 158 | 159 | # Handle edge case. This isn't mathematically correct, but it's good enough 160 | if evaluated_count == 0: 161 | precision = 0.0 162 | else: 163 | precision = overlapping_count / evaluated_count 164 | 165 | if reference_count == 0: 166 | recall = 0.0 167 | else: 168 | recall = overlapping_count / reference_count 169 | 170 | f1_score = 2.0 * ((precision * recall) / (precision + recall + 1e-8)) 171 | 172 | # return overlapping_count / reference_count 173 | return f1_score, precision, recall 174 | 175 | 176 | def _f_p_r_lcs(llcs, m, n): 177 | """ 178 | Computes the LCS-based F-measure score 179 | Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ 180 | rouge-working-note-v1.3.1.pdf 181 | 182 | Args: 183 | llcs: Length of LCS 184 | m: number of words in reference summary 185 | n: number of words in candidate summary 186 | 187 | Returns: 188 | Float. LCS-based F-measure score 189 | """ 190 | r_lcs = llcs / m 191 | p_lcs = llcs / n 192 | beta = p_lcs / (r_lcs + 1e-12) 193 | num = (1 + (beta**2)) * r_lcs * p_lcs 194 | denom = r_lcs + ((beta**2) * p_lcs) 195 | f_lcs = num / (denom + 1e-12) 196 | return f_lcs, p_lcs, r_lcs 197 | 198 | 199 | def rouge_l_sentence_level(evaluated_sentences, reference_sentences): 200 | """ 201 | Computes ROUGE-L (sentence level) of two text collections of sentences. 202 | http://research.microsoft.com/en-us/um/people/cyl/download/papers/ 203 | rouge-working-note-v1.3.1.pdf 204 | 205 | Calculated according to: 206 | R_lcs = LCS(X,Y)/m 207 | P_lcs = LCS(X,Y)/n 208 | F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) 209 | 210 | where: 211 | X = reference summary 212 | Y = Candidate summary 213 | m = length of reference summary 214 | n = length of candidate summary 215 | 216 | Args: 217 | evaluated_sentences: The sentences that have been picked by the summarizer 218 | reference_sentences: The sentences from the referene set 219 | 220 | Returns: 221 | A float: F_lcs 222 | 223 | Raises: 224 | ValueError: raises exception if a param has len <= 0 225 | """ 226 | if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: 227 | raise ValueError("Collections must contain at least 1 sentence.") 228 | reference_words = _split_into_words(reference_sentences) 229 | evaluated_words = _split_into_words(evaluated_sentences) 230 | m = len(reference_words) 231 | n = len(evaluated_words) 232 | lcs = _len_lcs(evaluated_words, reference_words) 233 | return _f_p_r_lcs(lcs, m, n) 234 | 235 | 236 | def _union_lcs(evaluated_sentences, reference_sentence): 237 | """ 238 | Returns LCS_u(r_i, C) which is the LCS score of the union longest common 239 | subsequence between reference sentence ri and candidate summary C. For example 240 | if r_i= w1 w2 w3 w4 w5, and C contains two sentences: c1 = w1 w2 w6 w7 w8 and 241 | c2 = w1 w3 w8 w9 w5, then the longest common subsequence of r_i and c1 is 242 | "w1 w2" and the longest common subsequence of r_i and c2 is "w1 w3 w5". The 243 | union longest common subsequence of r_i, c1, and c2 is "w1 w2 w3 w5" and 244 | LCS_u(r_i, C) = 4/5. 245 | 246 | Args: 247 | evaluated_sentences: The sentences that have been picked by the summarizer 248 | reference_sentence: One of the sentences in the reference summaries 249 | 250 | Returns: 251 | float: LCS_u(r_i, C) 252 | 253 | ValueError: 254 | Raises exception if a param has len <= 0 255 | """ 256 | if len(evaluated_sentences) <= 0: 257 | raise ValueError("Collections must contain at least 1 sentence.") 258 | 259 | lcs_union = set() 260 | reference_words = _split_into_words([reference_sentence]) 261 | combined_lcs_length = 0 262 | for eval_s in evaluated_sentences: 263 | evaluated_words = _split_into_words([eval_s]) 264 | lcs = set(_recon_lcs(reference_words, evaluated_words)) 265 | combined_lcs_length += len(lcs) 266 | lcs_union = lcs_union.union(lcs) 267 | 268 | union_lcs_count = len(lcs_union) 269 | union_lcs_value = union_lcs_count / combined_lcs_length 270 | return union_lcs_value 271 | 272 | 273 | def rouge_l_summary_level(evaluated_sentences, reference_sentences): 274 | """ 275 | Computes ROUGE-L (summary level) of two text collections of sentences. 276 | http://research.microsoft.com/en-us/um/people/cyl/download/papers/ 277 | rouge-working-note-v1.3.1.pdf 278 | 279 | Calculated according to: 280 | R_lcs = SUM(1, u)[LCS(r_i,C)]/m 281 | P_lcs = SUM(1, u)[LCS(r_i,C)]/n 282 | F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) 283 | 284 | where: 285 | SUM(i,u) = SUM from i through u 286 | u = number of sentences in reference summary 287 | C = Candidate summary made up of v sentences 288 | m = number of words in reference summary 289 | n = number of words in candidate summary 290 | 291 | Args: 292 | evaluated_sentences: The sentences that have been picked by the summarizer 293 | reference_sentence: One of the sentences in the reference summaries 294 | 295 | Returns: 296 | A float: F_lcs 297 | 298 | Raises: 299 | ValueError: raises exception if a param has len <= 0 300 | """ 301 | if len(evaluated_sentences) <= 0 or len(reference_sentences) <= 0: 302 | raise ValueError("Collections must contain at least 1 sentence.") 303 | 304 | # total number of words in reference sentences 305 | m = len(_split_into_words(reference_sentences)) 306 | 307 | # total number of words in evaluated sentences 308 | n = len(_split_into_words(evaluated_sentences)) 309 | 310 | union_lcs_sum_across_all_references = 0 311 | for ref_s in reference_sentences: 312 | union_lcs_sum_across_all_references += _union_lcs(evaluated_sentences, 313 | ref_s) 314 | return _f_p_r_lcs(union_lcs_sum_across_all_references, m, n) 315 | 316 | 317 | def rouge(hypotheses, references): 318 | """Calculates average rouge scores for a list of hypotheses and 319 | references""" 320 | 321 | # Filter out hyps that are of 0 length 322 | # hyps_and_refs = zip(hypotheses, references) 323 | # hyps_and_refs = [_ for _ in hyps_and_refs if len(_[0]) > 0] 324 | # hypotheses, references = zip(*hyps_and_refs) 325 | 326 | # Calculate ROUGE-1 F1, precision, recall scores 327 | rouge_1 = [ 328 | rouge_n([hyp], [ref], 1) for hyp, ref in zip(hypotheses, references) 329 | ] 330 | rouge_1_f, rouge_1_p, rouge_1_r = map(np.mean, zip(*rouge_1)) 331 | 332 | # Calculate ROUGE-2 F1, precision, recall scores 333 | rouge_2 = [ 334 | rouge_n([hyp], [ref], 2) for hyp, ref in zip(hypotheses, references) 335 | ] 336 | rouge_2_f, rouge_2_p, rouge_2_r = map(np.mean, zip(*rouge_2)) 337 | 338 | # Calculate ROUGE-L F1, precision, recall scores 339 | rouge_l = [ 340 | rouge_l_sentence_level([hyp], [ref]) 341 | for hyp, ref in zip(hypotheses, references) 342 | ] 343 | rouge_l_f, rouge_l_p, rouge_l_r = map(np.mean, zip(*rouge_l)) 344 | 345 | return { 346 | "rouge_1/f_score": rouge_1_f, 347 | "rouge_1/r_score": rouge_1_r, 348 | "rouge_1/p_score": rouge_1_p, 349 | "rouge_2/f_score": rouge_2_f, 350 | "rouge_2/r_score": rouge_2_r, 351 | "rouge_2/p_score": rouge_2_p, 352 | "rouge_l/f_score": rouge_l_f, 353 | "rouge_l/r_score": rouge_l_r, 354 | "rouge_l/p_score": rouge_l_p, 355 | } 356 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from module import NETE_r, NETE_t 2 | from load_data import load_data 3 | from utils import * 4 | import argparse 5 | import sys 6 | 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('-gd', '--gpu_device', type=str, help='device(s) on GPU, default=0', default='0') 10 | parser.add_argument('-dp', '--data_path', type=str, help='path for loading pickle data', default=None) 11 | parser.add_argument('-dr', '--data_ratio', type=str, help='ratio of train:validation:test', default='8:1:1') 12 | parser.add_argument('-id', '--index_dir', type=str, help='create new indexes if the directory is empty, otherwise load indexes', default=None) 13 | 14 | parser.add_argument('-rn', '--rating_layer_num', type=int, help='rating prediction layer number, default=4', default=4) 15 | parser.add_argument('-ld', '--latent_dim', type=int, help='latent dimension of users and items, default=200', default=200) 16 | parser.add_argument('-wd', '--word_dim', type=int, help='dimension of word embeddings, default=200', default=200) 17 | parser.add_argument('-rd', '--rnn_dim', type=int, help='dimension of RNN hidden states, default=256', default=256) 18 | parser.add_argument('-sm', '--seq_max_len', type=int, help='seq max len of a text, default=15', default=15) 19 | parser.add_argument('-wn', '--max_word_num', type=int, help='number of words in vocabulary, default=20000', default=20000) 20 | parser.add_argument('-dk', '--dropout_keep', type=float, help='dropout ratio in RNN, default=0.8', default=0.8) 21 | 22 | parser.add_argument('-en', '--max_epoch_num', type=int, help='max epoch number, default=100', default=100) 23 | parser.add_argument('-bs', '--batch_size', type=int, help='batch size, default=128', default=128) 24 | parser.add_argument('-lr', '--learning_rate', type=float, help='learning rate, default=0.0001', default=0.0001) 25 | parser.add_argument('-rr', '--reg_rate', type=float, help='regularization rate, default=0.0001', default=0.0001) 26 | 27 | parser.add_argument('-pf', '--use_predicted_feature', type=int, help='use predicted features from PMI when testing, 0 means no, otherwise yes', default=0) 28 | parser.add_argument('-pp', '--prediction_path', type=str, help='the path for saving predictions', default=None) 29 | parser.add_argument('-tk', '--top_k', type=int, help='select top k to evaluate, default=5', default=5) 30 | args = parser.parse_args() 31 | 32 | 33 | print('-----------------------------ARGUMENTS-----------------------------') 34 | for arg in vars(args): 35 | value = getattr(args, arg) 36 | if value is None: 37 | value = '' 38 | print('{:30} {}'.format(arg, value)) 39 | print('-----------------------------ARGUMENTS-----------------------------') 40 | 41 | 42 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_device 43 | if args.data_path is None: 44 | sys.exit(get_now_time() + 'provide data_path for loading data') 45 | if args.index_dir is None: 46 | sys.exit(get_now_time() + 'provide index_dir for saving and loading indexes') 47 | if args.prediction_path is None: 48 | sys.exit(get_now_time() + 'provide prediction_path for saving predicted text') 49 | if not os.path.exists(args.index_dir) or len(os.listdir(args.index_dir)) == 0: 50 | split_data(args.data_path, args.index_dir, args.data_ratio) 51 | 52 | 53 | train_tuple_list, validation_tuple_list, test_tuple_list, max_rating, min_rating, user2index, item2index, word2index, \ 54 | user_list, item_list, word_list, feature_id_set, user2items_test = load_data(args.data_path, args.index_dir, args.max_word_num, 55 | args.seq_max_len, args.use_predicted_feature) 56 | mean_r = (max_rating + min_rating) / 2 57 | sentiment_num = 2 58 | 59 | 60 | model_r = NETE_r(train_tuple_list, len(user_list), len(item_list), args.rating_layer_num, args.latent_dim, args.learning_rate, 61 | args.batch_size, args.reg_rate) 62 | # first train rating prediction module 63 | previous_loss = 1e10 64 | rating_validation, rating_test = None, None 65 | for en in range(1, args.max_epoch_num + 1): 66 | print(get_now_time() + 'iteration {}'.format(en)) 67 | 68 | train_loss = model_r.train_one_epoch() 69 | print(get_now_time() + 'loss on train set: {}'.format(train_loss)) 70 | validation_loss = model_r.validate(validation_tuple_list) 71 | print(get_now_time() + 'loss on validation set: {}'.format(validation_loss)) 72 | 73 | # early stop setting 74 | if validation_loss > previous_loss: 75 | print(get_now_time() + 'early stopped') 76 | break 77 | previous_loss = validation_loss 78 | 79 | rating_validation = model_r.get_prediction(validation_tuple_list) 80 | rating_test = model_r.get_prediction(test_tuple_list) 81 | 82 | # evaluating 83 | predicted_rating = [] 84 | for (x, r_p) in zip(test_tuple_list, rating_test): 85 | predicted_rating.append((x[2], r_p)) 86 | test_rmse = root_mean_square_error(predicted_rating, max_rating, min_rating) 87 | print(get_now_time() + 'RMSE on test set: {}'.format(test_rmse)) 88 | test_mae = mean_absolute_error(predicted_rating, max_rating, min_rating) 89 | print(get_now_time() + 'MAE on test set: {}'.format(test_mae)) 90 | 91 | user2items_top = model_r.get_prediction_ranking(args.top_k, list(user2items_test.keys()), len(item_list)) 92 | ndcg = evaluate_ndcg(user2items_test, user2items_top) 93 | print(get_now_time() + 'NDCG on test set: {}'.format(ndcg)) 94 | precision, recall, f1 = evaluate_precision_recall_f1(user2items_test, user2items_top) 95 | print(get_now_time() + 'Precision on test set: {}'.format(precision)) 96 | print(get_now_time() + 'HR on test set: {}'.format(recall)) 97 | print(get_now_time() + 'F1 on test set: {}'.format(f1)) 98 | 99 | 100 | # replace the ground-truth sentiments with predicted ratings 101 | new_validation_list = [] 102 | new_test_list = [] 103 | for (x, r_p) in zip(validation_tuple_list, rating_validation): 104 | x[-1] = r_p 105 | new_validation_list.append(x) 106 | for (x, r_p) in zip(test_tuple_list, rating_test): 107 | x[-1] = r_p 108 | new_test_list.append(x) 109 | validation_tuple_list = new_validation_list 110 | test_tuple_list = new_test_list 111 | 112 | # then start to train the explanation generation module 113 | model = NETE_t(train_tuple_list, len(user_list), len(item_list), word2index, mean_r, sentiment_num, args.latent_dim, 114 | args.word_dim, args.rnn_dim, args.learning_rate, args.batch_size, args.seq_max_len) 115 | # early stop setting 116 | previous_loss = 1e10 117 | seq_prediction = None 118 | for en in range(1, args.max_epoch_num + 1): 119 | print(get_now_time() + 'iteration {}'.format(en)) 120 | 121 | train_loss = model.train_one_epoch(args.dropout_keep) 122 | print(get_now_time() + 'loss on train set: {}'.format(train_loss)) 123 | validation_loss = model.validate(validation_tuple_list) 124 | print(get_now_time() + 'loss on validation set: {}'.format(validation_loss)) 125 | 126 | # early stop setting 127 | if validation_loss > previous_loss: 128 | print(get_now_time() + 'early stopped') 129 | break 130 | previous_loss = validation_loss 131 | 132 | seq_prediction = model.get_prediction(test_tuple_list) 133 | 134 | 135 | ids_predict = [] 136 | for s_p in seq_prediction: 137 | ids = chop_before_eos(word2index, s_p) 138 | ids_predict.append(ids) 139 | 140 | PUS, NUS = unique_sentence_percent(ids_predict) 141 | print(get_now_time() + 'USN on test set: {}'.format(NUS)) 142 | print(get_now_time() + 'USR on test set: {}'.format(PUS)) 143 | 144 | feature_batch = feature_detect(ids_predict, feature_id_set) 145 | # DIV really takes time 146 | DIV = feature_diversity(feature_batch) 147 | print(get_now_time() + 'DIV on test set: {}'.format(DIV)) 148 | FCR = feature_coverage_ratio(feature_batch, feature_id_set) 149 | print(get_now_time() + 'FCR on test set: {}'.format(FCR)) 150 | 151 | feature_test = [] 152 | ids_test = [] 153 | for x in test_tuple_list: 154 | # [u, i, r, fea_id, w_list, fea, tem, p_r] 155 | feature_test.append(x[3]) 156 | ids_test.append(x[4]) 157 | FMR = feature_matching_ratio(feature_batch, feature_test) 158 | print(get_now_time() + 'FMR on test set: {}'.format(FMR)) 159 | 160 | token_predict = [ids2tokens(word_list, ids) for ids in ids_predict] 161 | token_test = [ids2tokens(word_list, ids) for ids in ids_test] 162 | BLEU_1 = bleu_score(token_test, token_predict, n_gram=1, smooth=False) 163 | print(get_now_time() + 'BLEU-1 on test set: {}'.format(BLEU_1)) 164 | BLEU_4 = bleu_score(token_test, token_predict, n_gram=4, smooth=False) 165 | print(get_now_time() + 'BLEU-4 on test set: {}'.format(BLEU_4)) 166 | 167 | text_predict = [' '.join(tokens) for tokens in token_predict] 168 | text_test = [' '.join(tokens) for tokens in token_test] 169 | ROUGE = rouge_score(text_test, text_predict) # a dictionary 170 | print(get_now_time() + 'ROUGE on test set:') 171 | for (k, v) in ROUGE.items(): 172 | print('{}: {}'.format(k, v)) 173 | 174 | formatted_out = [] 175 | for (x, s_p) in zip(test_tuple_list, seq_prediction): 176 | text = ids2sentence(word2index, word_list, s_p) 177 | formatted_out.append('{}\n{}, {}, {}\n{}\n\n'.format(x[6], x[5], x[2], x[7], text)) 178 | with open(args.prediction_path + '.test.txt', 'w', encoding='utf-8') as f: 179 | f.write(''.join(formatted_out)) 180 | print(get_now_time() + 'saved predicted text on test set') 181 | -------------------------------------------------------------------------------- /tripadviosr.1.log: -------------------------------------------------------------------------------- 1 | -----------------------------ARGUMENTS----------------------------- 2 | gpu_device 0 3 | data_path ../data/TripAdvisor/reviews.pickle 4 | data_ratio 8:1:1 5 | index_dir ../data/TripAdvisor/1/ 6 | rating_layer_num 4 7 | latent_dim 200 8 | word_dim 200 9 | rnn_dim 256 10 | seq_max_len 15 11 | max_word_num 20000 12 | dropout_keep 0.8 13 | max_epoch_num 100 14 | batch_size 128 15 | learning_rate 0.0001 16 | reg_rate 0.0001 17 | use_predicted_feature 0 18 | prediction_path tripadvisor.1 19 | top_k 5 20 | -----------------------------ARGUMENTS----------------------------- 21 | [2020-08-12 15:36:47.399983]: iteration 1 22 | [2020-08-12 15:37:00.599668]: loss on train set: 1.130553807747136 23 | [2020-08-12 15:37:01.158575]: loss on validation set: 0.9264054888915616 24 | [2020-08-12 15:37:01.817642]: iteration 2 25 | [2020-08-12 15:37:14.195880]: loss on train set: 0.8360102910338537 26 | [2020-08-12 15:37:14.697749]: loss on validation set: 0.8101324167175447 27 | [2020-08-12 15:37:15.318053]: iteration 3 28 | [2020-08-12 15:37:27.707913]: loss on train set: 0.7715903631915892 29 | [2020-08-12 15:37:28.241750]: loss on validation set: 0.7963375362275011 30 | [2020-08-12 15:37:28.897900]: iteration 4 31 | [2020-08-12 15:37:41.930881]: loss on train set: 0.7522262064405173 32 | [2020-08-12 15:37:42.414709]: loss on validation set: 0.7842284015611324 33 | [2020-08-12 15:37:43.015418]: iteration 5 34 | [2020-08-12 15:37:56.073890]: loss on train set: 0.7402424369317894 35 | [2020-08-12 15:37:56.593781]: loss on validation set: 0.7734414771097718 36 | [2020-08-12 15:37:57.246574]: iteration 6 37 | [2020-08-12 15:38:10.545366]: loss on train set: 0.7298189917608205 38 | [2020-08-12 15:38:11.090095]: loss on validation set: 0.7650007148935127 39 | [2020-08-12 15:38:11.745065]: iteration 7 40 | [2020-08-12 15:38:24.998353]: loss on train set: 0.7211827758253322 41 | [2020-08-12 15:38:25.550074]: loss on validation set: 0.7645158852072748 42 | [2020-08-12 15:38:26.224618]: iteration 8 43 | [2020-08-12 15:38:39.106285]: loss on train set: 0.7140957626780978 44 | [2020-08-12 15:38:39.584158]: loss on validation set: 0.748384203030075 45 | [2020-08-12 15:38:40.106112]: iteration 9 46 | [2020-08-12 15:38:52.939533]: loss on train set: 0.70633038810948 47 | [2020-08-12 15:38:53.488027]: loss on validation set: 0.7752407275824299 48 | [2020-08-12 15:38:53.488198]: early stopped 49 | [2020-08-12 15:38:53.840877]: RMSE on test set: 0.7919474844253149 50 | [2020-08-12 15:38:54.161398]: MAE on test set: 0.6060067049969704 51 | [2020-08-12 15:54:29.921824]: NDCG on test set: 0.0009998090745070702 52 | [2020-08-12 15:54:29.947348]: Precision on test set: 0.0009581881533101046 53 | [2020-08-12 15:54:29.947475]: HR on test set: 0.0013135335434987002 54 | [2020-08-12 15:54:29.947507]: F1 on test set: 0.0010351981061911381 55 | [2020-08-12 15:54:33.260223]: iteration 1 56 | [2020-08-12 15:56:43.480705]: loss on train set: 5.928447370844536 57 | [2020-08-12 15:56:50.025059]: loss on validation set: 5.266956607324393 58 | [2020-08-12 15:57:00.352093]: iteration 2 59 | [2020-08-12 15:59:10.134453]: loss on train set: 5.0373267139414875 60 | [2020-08-12 15:59:16.607376]: loss on validation set: 4.845521304080072 61 | [2020-08-12 15:59:26.768525]: iteration 3 62 | [2020-08-12 16:01:36.658872]: loss on train set: 4.753875388043001 63 | [2020-08-12 16:01:43.094468]: loss on validation set: 4.654811049511966 64 | [2020-08-12 16:01:53.309214]: iteration 4 65 | [2020-08-12 16:04:03.224136]: loss on train set: 4.58572408112193 66 | [2020-08-12 16:04:09.697834]: loss on validation set: 4.5283577242953115 67 | [2020-08-12 16:04:19.904706]: iteration 5 68 | [2020-08-12 16:06:29.364457]: loss on train set: 4.456819274648654 69 | [2020-08-12 16:06:35.959144]: loss on validation set: 4.42231089669044 70 | [2020-08-12 16:06:46.151573]: iteration 6 71 | [2020-08-12 16:08:55.505748]: loss on train set: 4.308668853165489 72 | [2020-08-12 16:09:02.009652]: loss on validation set: 4.2606889665011325 73 | [2020-08-12 16:09:12.293672]: iteration 7 74 | [2020-08-12 16:11:21.191471]: loss on train set: 4.155090634838807 75 | [2020-08-12 16:11:27.667848]: loss on validation set: 4.1422799995009445 76 | [2020-08-12 16:11:37.987579]: iteration 8 77 | [2020-08-12 16:13:47.751501]: loss on train set: 4.042737207253363 78 | [2020-08-12 16:13:54.229551]: loss on validation set: 4.059697506524765 79 | [2020-08-12 16:14:04.389852]: iteration 9 80 | [2020-08-12 16:16:14.527614]: loss on train set: 3.9564651890193794 81 | [2020-08-12 16:16:21.061588]: loss on validation set: 3.999866931440383 82 | [2020-08-12 16:16:31.260563]: iteration 10 83 | [2020-08-12 16:18:41.802634]: loss on train set: 3.8869119161036307 84 | [2020-08-12 16:18:48.304391]: loss on validation set: 3.951555372111448 85 | [2020-08-12 16:18:58.512138]: iteration 11 86 | [2020-08-12 16:21:08.301805]: loss on train set: 3.8277145468110017 87 | [2020-08-12 16:21:14.823091]: loss on validation set: 3.913284335551534 88 | [2020-08-12 16:21:25.057006]: iteration 12 89 | [2020-08-12 16:23:35.471299]: loss on train set: 3.7767417527374545 90 | [2020-08-12 16:23:41.976348]: loss on validation set: 3.8812884721791145 91 | [2020-08-12 16:23:52.170948]: iteration 13 92 | [2020-08-12 16:26:02.448685]: loss on train set: 3.730880811417852 93 | [2020-08-12 16:26:08.988774]: loss on validation set: 3.855218724111983 94 | [2020-08-12 16:26:19.181489]: iteration 14 95 | [2020-08-12 16:28:29.674196]: loss on train set: 3.689661892686568 96 | [2020-08-12 16:28:36.079625]: loss on validation set: 3.8323904388018395 97 | [2020-08-12 16:28:46.293977]: iteration 15 98 | [2020-08-12 16:30:56.544454]: loss on train set: 3.652248691205586 99 | [2020-08-12 16:31:03.050660]: loss on validation set: 3.813087768267411 100 | [2020-08-12 16:31:13.297895]: iteration 16 101 | [2020-08-12 16:33:23.370258]: loss on train set: 3.6171105885828077 102 | [2020-08-12 16:33:29.853473]: loss on validation set: 3.7961530413942617 103 | [2020-08-12 16:33:40.096176]: iteration 17 104 | [2020-08-12 16:35:49.606092]: loss on train set: 3.584955030690831 105 | [2020-08-12 16:35:56.155881]: loss on validation set: 3.782277455144238 106 | [2020-08-12 16:36:06.368091]: iteration 18 107 | [2020-08-12 16:38:16.533380]: loss on train set: 3.554867918110349 108 | [2020-08-12 16:38:23.060930]: loss on validation set: 3.7694379940799427 109 | [2020-08-12 16:38:33.317122]: iteration 19 110 | [2020-08-12 16:40:43.370435]: loss on train set: 3.5265016311293893 111 | [2020-08-12 16:40:49.836934]: loss on validation set: 3.7576595243755917 112 | [2020-08-12 16:41:00.088695]: iteration 20 113 | [2020-08-12 16:43:10.173988]: loss on train set: 3.4997820997321227 114 | [2020-08-12 16:43:16.727458]: loss on validation set: 3.7482998682241067 115 | [2020-08-12 16:43:27.015578]: iteration 21 116 | [2020-08-12 16:45:37.291316]: loss on train set: 3.474292882142971 117 | [2020-08-12 16:45:43.754707]: loss on validation set: 3.7400107956492867 118 | [2020-08-12 16:45:53.999205]: iteration 22 119 | [2020-08-12 16:48:04.125008]: loss on train set: 3.4502589394546352 120 | [2020-08-12 16:48:10.632847]: loss on validation set: 3.7330596909494105 121 | [2020-08-12 16:48:20.880893]: iteration 23 122 | [2020-08-12 16:50:30.862362]: loss on train set: 3.427186363668619 123 | [2020-08-12 16:50:37.398473]: loss on validation set: 3.7275455890242126 124 | [2020-08-12 16:50:47.636085]: iteration 24 125 | [2020-08-12 16:52:57.333892]: loss on train set: 3.405634684494478 126 | [2020-08-12 16:53:03.909844]: loss on validation set: 3.723139159746911 127 | [2020-08-12 16:53:14.264055]: iteration 25 128 | [2020-08-12 16:55:23.852366]: loss on train set: 3.3840863628597693 129 | [2020-08-12 16:55:30.388124]: loss on validation set: 3.717286692791511 130 | [2020-08-12 16:55:40.696269]: iteration 26 131 | [2020-08-12 16:57:50.706508]: loss on train set: 3.3639047734453866 132 | [2020-08-12 16:57:57.227518]: loss on validation set: 3.7142539676089084 133 | [2020-08-12 16:58:07.511384]: iteration 27 134 | [2020-08-12 17:00:17.933427]: loss on train set: 3.3447995540567876 135 | [2020-08-12 17:00:24.449332]: loss on validation set: 3.711165109639406 136 | [2020-08-12 17:00:34.642097]: iteration 28 137 | [2020-08-12 17:02:44.594669]: loss on train set: 3.325935606958009 138 | [2020-08-12 17:02:51.020012]: loss on validation set: 3.7098418644730518 139 | [2020-08-12 17:03:01.251482]: iteration 29 140 | [2020-08-12 17:05:11.172983]: loss on train set: 3.3075255159409447 141 | [2020-08-12 17:05:17.749538]: loss on validation set: 3.70795406254297 142 | [2020-08-12 17:05:27.987486]: iteration 30 143 | [2020-08-12 17:07:38.448194]: loss on train set: 3.289891774751109 144 | [2020-08-12 17:07:44.917608]: loss on validation set: 3.7071932347057297 145 | [2020-08-12 17:07:55.121610]: iteration 31 146 | [2020-08-12 17:10:04.157634]: loss on train set: 3.272759637982462 147 | [2020-08-12 17:10:10.677628]: loss on validation set: 3.7060183536469227 148 | [2020-08-12 17:10:20.886247]: iteration 32 149 | [2020-08-12 17:12:27.726612]: loss on train set: 3.2560920481214 150 | [2020-08-12 17:12:34.050944]: loss on validation set: 3.7054396682467714 151 | [2020-08-12 17:12:44.267500]: iteration 33 152 | [2020-08-12 17:14:49.848877]: loss on train set: 3.239693456444874 153 | [2020-08-12 17:14:56.132534]: loss on validation set: 3.705630825248467 154 | [2020-08-12 17:14:56.133115]: early stopped 155 | [2020-08-12 17:15:53.490967]: USN on test set: 18725 156 | [2020-08-12 17:15:53.491487]: USR on test set: 0.5851013967440553 157 | [2020-08-12 17:34:14.648193]: DIV on test set: 2.242828951084817 158 | [2020-08-12 17:34:14.667225]: FCR on test set: 0.2811832875876792 159 | [2020-08-12 17:34:14.948827]: FMR on test set: 0.7701153016904665 160 | [2020-08-12 17:34:16.299148]: BLEU-1 on test set: 22.439736218717286 161 | [2020-08-12 17:34:20.134335]: BLEU-4 on test set: 3.5679012138761306 162 | [2020-08-12 17:34:23.482678]: ROUGE on test set: 163 | rouge_1/f_score: 27.499900945288005 164 | rouge_1/r_score: 24.872329356596456 165 | rouge_1/p_score: 34.95033509821185 166 | rouge_2/f_score: 7.424527714540482 167 | rouge_2/r_score: 6.800076084830639 168 | rouge_2/p_score: 9.800007562561072 169 | rouge_l/f_score: 20.862521528885274 170 | rouge_l/r_score: 21.823924564292653 171 | rouge_l/p_score: 28.284305018204964 172 | [2020-08-12 17:34:24.405509]: saved predicted text on test set 173 | -------------------------------------------------------------------------------- /tripadvisor_run.sh: -------------------------------------------------------------------------------- 1 | python -u run.py -gd 0 -dp ../data/TripAdvisor/reviews.pickle -id ../data/TripAdvisor/1/ -pf 0 -pp tripadvisor.1 >> tripadviosr.1.log -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from bleu import compute_bleu 2 | from rouge import rouge 3 | import numpy as np 4 | import datetime 5 | import random 6 | import pickle 7 | import math 8 | import os 9 | 10 | 11 | def get_now_time(): 12 | """a string of current time""" 13 | return '[' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') + ']: ' 14 | 15 | 16 | def mean_absolute_error(predicted, max_r, min_r): 17 | total = 0 18 | for (r, p) in predicted: 19 | if p > max_r: 20 | p = max_r 21 | if p < min_r: 22 | p = min_r 23 | 24 | sub = p - r 25 | total += abs(sub) 26 | 27 | return total / len(predicted) 28 | 29 | 30 | def mean_square_error(predicted, max_r, min_r): 31 | total = 0 32 | for (r, p) in predicted: 33 | if p > max_r: 34 | p = max_r 35 | if p < min_r: 36 | p = min_r 37 | 38 | sub = p - r 39 | total += sub ** 2 40 | 41 | return total / len(predicted) 42 | 43 | 44 | def root_mean_square_error(predicted, max_r, min_r): 45 | mse = mean_square_error(predicted, max_r, min_r) 46 | return math.sqrt(mse) 47 | 48 | 49 | def split_data(data_path, save_dir, ratio_str): 50 | ''' 51 | :param data_path: pickle file, a list of all instances 52 | :param save_dir: save the indexes 53 | :param ratio_str: in the format of train:validation:test 54 | ''' 55 | 56 | # process rating and review 57 | user2item = {} 58 | item2user = {} 59 | user2item2idx = {} 60 | reviews = pickle.load(open(data_path, 'rb')) 61 | for idx, review in enumerate(reviews): 62 | u = review['user'] 63 | i = review['item'] 64 | 65 | if u in user2item: 66 | user2item[u].append(i) 67 | else: 68 | user2item[u] = [i] 69 | if i in item2user: 70 | item2user[i].append(u) 71 | else: 72 | item2user[i] = [u] 73 | 74 | if u in user2item2idx: 75 | user2item2idx[u][i] = idx 76 | else: 77 | user2item2idx[u] = {i: idx} 78 | 79 | # split data 80 | train_set = set() 81 | for (u, item_list) in user2item.items(): 82 | i = random.choice(item_list) 83 | train_set.add(user2item2idx[u][i]) 84 | for (i, user_list) in item2user.items(): 85 | u = random.choice(user_list) 86 | train_set.add(user2item2idx[u][i]) 87 | 88 | total_num = len(reviews) 89 | ratio = [float(r) for r in ratio_str.split(':')] 90 | train_num = int(ratio[0] / sum(ratio) * total_num) 91 | validation_num = int(ratio[1] / sum(ratio) * total_num) 92 | 93 | index_list = list(range(total_num)) 94 | while len(train_set) < train_num: 95 | train_set.add(random.choice(index_list)) 96 | remains_list = list(set(index_list) - train_set) 97 | 98 | validation_set = set() 99 | while len(validation_set) < validation_num: 100 | validation_set.add(random.choice(remains_list)) 101 | test_set = set(remains_list) - validation_set 102 | 103 | def write_to_file(path, data_set): 104 | idx_list = [str(x) for x in data_set] 105 | with open(path, 'w', encoding='utf-8') as f: 106 | f.write(' '.join(idx_list)) 107 | 108 | # save data 109 | if not os.path.exists(save_dir): 110 | os.makedirs(save_dir) 111 | print(get_now_time() + 'writing index data to {}'.format(save_dir)) 112 | write_to_file(save_dir + 'train.index', train_set) 113 | write_to_file(save_dir + 'validation.index', validation_set) 114 | write_to_file(save_dir + 'test.index', test_set) 115 | 116 | 117 | def two_seq_same(sa, sb): 118 | if len(sa) != len(sb): 119 | return False 120 | for (wa, wb) in zip(sa, sb): 121 | if wa != wb: 122 | return False 123 | return True 124 | 125 | 126 | def unique_sentence_percent(sequence_batch): 127 | unique_seq = [] 128 | for seq in sequence_batch: 129 | count = 0 130 | for uni_seq in unique_seq: 131 | if two_seq_same(seq, uni_seq): 132 | count += 1 133 | break 134 | if count == 0: 135 | unique_seq.append(seq) 136 | 137 | return len(unique_seq) / len(sequence_batch), len(unique_seq) 138 | 139 | 140 | def chop_before_eos(word2index, ids): 141 | end = len(ids) 142 | for idx, i in enumerate(ids): 143 | if i == word2index['']: 144 | end = idx 145 | break 146 | return ids[:end] 147 | 148 | 149 | def feature_detect(seq_batch, feature_set): 150 | feature_batch = [] 151 | for ids in seq_batch: 152 | feature_list = [] 153 | for i in ids: 154 | if i in feature_set: 155 | feature_list.append(i) 156 | feature_batch.append(feature_list) 157 | 158 | return feature_batch 159 | 160 | 161 | def feature_matching_ratio(feature_batch, test_feature): 162 | count = 0 163 | for (fea_list, fea) in zip(feature_batch, test_feature): 164 | for f in fea_list: 165 | if f == fea: 166 | count += 1 167 | break 168 | 169 | return count / len(feature_batch) 170 | 171 | 172 | def feature_coverage_ratio(feature_batch, feature_set): 173 | feature_list = [] 174 | for fb in feature_batch: 175 | feature_list.extend(fb) 176 | 177 | return len(set(feature_list)) / len(feature_set) 178 | 179 | 180 | def feature_diversity(feature_batch): 181 | list_len = len(feature_batch) 182 | 183 | total_count = 0 184 | for i, x in enumerate(feature_batch): 185 | for j in range(i + 1, list_len): 186 | y = feature_batch[j] 187 | for k in y: 188 | if k in x: 189 | total_count += 1 190 | 191 | denominator = list_len * (list_len - 1) / 2 192 | 193 | return total_count / denominator 194 | 195 | 196 | def ids2tokens(word_list, ids): 197 | result = [] 198 | for i in ids: 199 | result.append(word_list[i]) 200 | 201 | return result 202 | 203 | 204 | def bleu_score(references, generated, n_gram=4, smooth=False): 205 | """a list of lists of tokens""" 206 | formatted_ref = [[ref] for ref in references] 207 | bleu_s, _, _, _, _, _ = compute_bleu(formatted_ref, generated, n_gram, smooth) 208 | return bleu_s * 100 209 | 210 | 211 | def rouge_score(references, generated): 212 | """both are a list of strings""" 213 | score = rouge(generated, references) 214 | rouge_s = {k: (v * 100) for (k, v) in score.items()} 215 | ''' 216 | "rouge_1/f_score": rouge_1_f, 217 | "rouge_1/r_score": rouge_1_r, 218 | "rouge_1/p_score": rouge_1_p, 219 | "rouge_2/f_score": rouge_2_f, 220 | "rouge_2/r_score": rouge_2_r, 221 | "rouge_2/p_score": rouge_2_p, 222 | "rouge_l/f_score": rouge_l_f, 223 | "rouge_l/r_score": rouge_l_r, 224 | "rouge_l/p_score": rouge_l_p, 225 | ''' 226 | return rouge_s 227 | 228 | 229 | def ids2sentence(word2index, word_list, ids): 230 | result = [] 231 | for i in ids: 232 | if i != word2index['']: 233 | result.append(word_list[i]) 234 | else: 235 | break 236 | return ' '.join(result) 237 | 238 | 239 | def pad_sequence_4_generation(sequence_batch, pad_int): 240 | ''' 241 | Pad sentences with so that each sentence of a batch has the same length 242 | :param sequence_batch: a list of lists 243 | :return: 2d numpy matrix, 1d numpy vector 244 | ''' 245 | seq_len = [len(sequence) for sequence in sequence_batch] 246 | max_seq_len = max(seq_len) 247 | new_batch = [sequence + [pad_int] * (max_seq_len - len(sequence)) for sequence in sequence_batch] 248 | new_batch = np.asarray(new_batch, dtype=np.int32) 249 | new_seq_len = np.asarray(seq_len, dtype=np.int32) 250 | 251 | return new_batch, new_seq_len 252 | 253 | 254 | def evaluate_ndcg(user2items_test, user2items_top): 255 | top_k = len(list(user2items_top.values())[0]) 256 | dcgs = [1 / math.log(i + 2) for i in range(top_k)] 257 | 258 | ndcg = 0 259 | for u, test_items in user2items_test.items(): 260 | rank_list = user2items_top[u] 261 | dcg_u = 0 262 | for idx, item in enumerate(rank_list): 263 | if item in test_items: 264 | dcg_u += dcgs[idx] 265 | ndcg += dcg_u 266 | 267 | return ndcg / (sum(dcgs) * len(user2items_test)) 268 | 269 | 270 | def evaluate_precision_recall_f1(user2items_test, user2items_top): 271 | top_k = len(list(user2items_top.values())[0]) 272 | 273 | precision_sum = 0 274 | recall_sum = 0 # it is also named hit ratio 275 | f1_sum = 0 276 | for u, test_items in user2items_test.items(): 277 | rank_list = user2items_top[u] 278 | hits = len(test_items & set(rank_list)) 279 | pre = hits / top_k 280 | rec = hits / len(test_items) 281 | precision_sum += pre 282 | recall_sum += rec 283 | if (pre + rec) > 0: 284 | f1_sum += 2 * pre * rec / (pre + rec) 285 | 286 | precision = precision_sum / len(user2items_test) 287 | recall = recall_sum / len(user2items_test) 288 | f1 = f1_sum / len(user2items_test) 289 | 290 | return precision, recall, f1 291 | --------------------------------------------------------------------------------