├── README.md ├── config └── BPRMF.conf ├── models ├── __init__.py ├── __init__.pyc ├── __pycache__ │ └── __init__.cpython-36.pyc ├── item_ranking │ ├── NeuMF_cikm.py │ ├── NeuMF_cikm_p.py │ ├── NeuMF_cikm_que.py │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── NeuMF_cikm.cpython-36.pyc │ │ ├── NeuMF_cikm_p.cpython-36.pyc │ │ ├── NeuMF_cikm_que.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── bprmf.cpython-36.pyc │ │ ├── cdae.cpython-36.pyc │ │ ├── cml.cpython-36.pyc │ │ ├── gmf.cpython-36.pyc │ │ ├── jrl.cpython-36.pyc │ │ ├── lrml.cpython-36.pyc │ │ ├── mlp.cpython-36.pyc │ │ ├── neumf.cpython-36.pyc │ │ ├── neumf_my.cpython-36.pyc │ │ └── neumf_my_tail.cpython-36.pyc │ ├── bprmf.py │ ├── bprmf.pyc │ ├── cdae.py │ ├── cdae.pyc │ ├── cml.py │ ├── cml.pyc │ ├── dmf.py │ ├── dssm.py │ ├── gmf.py │ ├── gmf.pyc │ ├── jrl.py │ ├── jrl.pyc │ ├── lrml.py │ ├── lrml.pyc │ ├── mlp.py │ ├── mlp.pyc │ ├── neumf.py │ ├── neumf.pyc │ ├── neumf_my.py │ ├── neumf_my_tail.py │ ├── neurec.py │ └── widedeep.py ├── rating_prediction │ ├── __init__.py │ ├── afm.py │ ├── autorec.py │ ├── fm.py │ ├── fml.py │ ├── mf.py │ ├── nfm.py │ ├── nnmf.py │ └── nrr.py └── seq_rec │ ├── AttRec.py │ ├── Caser.py │ ├── PRME.py │ └── gru4rec.py ├── test ├── KMMD.py ├── __init__.py ├── center_loss.py ├── process_data.py ├── testSeqRec.py ├── test_cikm.py ├── test_item_ranking.py ├── test_movielen.py ├── test_myself.py └── test_rating_pred.py ├── test_cikm.py └── utils ├── __init__.py ├── __init__.pyc ├── __pycache__ └── __init__.cpython-36.pyc ├── config └── config.py ├── evaluation ├── RankingMetrics.py ├── RankingMetrics.pyc ├── RatingMetrics.py ├── SeqRecMetrics.py ├── __init__.py ├── __init__.pyc └── __pycache__ │ ├── RankingMetrics.cpython-36.pyc │ └── __init__.cpython-36.pyc ├── load_data ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── load_data_my.cpython-36.pyc │ └── load_data_ranking.cpython-36.pyc ├── load_data_cikm.py ├── load_data_content.py ├── load_data_my.py ├── load_data_ranking.py ├── load_data_ranking.pyc ├── load_data_rating.py └── load_data_seq.py └── log └── Log.py /README.md: -------------------------------------------------------------------------------- 1 | # Updating, detailed process and explanation are coming soon 2 | 3 | ## Training 4 | ``` 5 | python test_cikm.py 6 | ``` 7 | -------------------------------------------------------------------------------- /config/BPRMF.conf: -------------------------------------------------------------------------------- 1 | ratings=../dataset/filmtrust/ratings.txt 2 | ratings.setup=-columns 0 1 2 3 | recommender=BPR 4 | evaluation.setup=-ap 0.2 -b 1 5 | item.ranking=on -topN 10,20 6 | num.factors=20 7 | num.max.iter=20 8 | learnRate=-init 0.02 -max 1 9 | reg.lambda=-u 0.01 -i 0.01 -b 0.2 -s 0.2 10 | output.setup=on -dir ../results/ -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/__init__.py -------------------------------------------------------------------------------- /models/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/__init__.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__init__.py -------------------------------------------------------------------------------- /models/item_ranking/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__init__.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/NeuMF_cikm.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/NeuMF_cikm.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/NeuMF_cikm_p.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/NeuMF_cikm_p.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/NeuMF_cikm_que.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/NeuMF_cikm_que.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/bprmf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/bprmf.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/cdae.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/cdae.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/cml.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/cml.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/gmf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/gmf.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/jrl.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/jrl.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/lrml.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/lrml.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/mlp.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/mlp.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/neumf.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/neumf.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/neumf_my.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/neumf_my.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/__pycache__/neumf_my_tail.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/__pycache__/neumf_my_tail.cpython-36.pyc -------------------------------------------------------------------------------- /models/item_ranking/bprmf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Bayesain Personalized Ranking Model. 3 | Reference: Rendle, Steffen, et al. "BPR: Bayesian personalized ranking from implicit feedback." Proceedings of the twenty-fifth conference on uncertainty in artificial intelligence. AUAI Press, 2009.. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RankingMetrics import * 11 | 12 | __author__ = "Shuai Zhang" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Shuai Zhang" 18 | __email__ = "cheungdaven@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class BPRMF(): 23 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.1, epoch=500, batch_size=1024, 24 | verbose=False, T=5, display_step=1000): 25 | self.learning_rate = learning_rate 26 | self.epochs = epoch 27 | self.batch_size = batch_size 28 | self.reg_rate = reg_rate 29 | self.sess = sess 30 | self.num_user = num_user 31 | self.num_item = num_item 32 | self.verbose = verbose 33 | self.T = T 34 | self.display_step = display_step 35 | print("BPRMF.") 36 | 37 | def build_network(self, num_factor=30): 38 | 39 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 40 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 41 | self.neg_item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='neg_item_id') 42 | self.y = tf.placeholder("float", [None], 'rating') 43 | 44 | self.P = tf.Variable(tf.random_normal([self.num_user, num_factor], stddev=0.01)) 45 | self.Q = tf.Variable(tf.random_normal([self.num_item, num_factor], stddev=0.01)) 46 | 47 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 48 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 49 | neg_item_latent_factor = tf.nn.embedding_lookup(self.Q, self.neg_item_id) 50 | 51 | self.pred_y = tf.reduce_sum(tf.multiply(user_latent_factor, item_latent_factor), 1) 52 | self.pred_y_neg = tf.reduce_sum(tf.multiply(user_latent_factor, neg_item_latent_factor), 1) 53 | 54 | self.loss = - tf.reduce_sum(tf.log(tf.sigmoid(self.pred_y - self.pred_y_neg))) + self.reg_rate * ( 55 | tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q)) 56 | 57 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 58 | 59 | return self 60 | 61 | def prepare_data(self, train_data, test_data): 62 | ''' 63 | You must prepare the data before train and test the model 64 | :param train_data: 65 | :param test_data: 66 | :return: 67 | ''' 68 | t = train_data.tocoo() 69 | self.user = t.row.reshape(-1) 70 | self.item = t.col.reshape(-1) 71 | self.num_training = len(self.item) 72 | self.test_data = test_data 73 | self.total_batch = int(self.num_training / self.batch_size) 74 | self.neg_items = self._get_neg_items(train_data.tocsr()) 75 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 76 | print("data preparation finished.") 77 | return self 78 | 79 | def train(self): 80 | idxs = np.random.permutation(self.num_training) # shuffled ordering 81 | user_random = list(self.user[idxs]) 82 | item_random = list(self.item[idxs]) 83 | item_random_neg = [] 84 | for u in user_random: 85 | neg_i = self.neg_items[u] 86 | s = np.random.randint(len(neg_i)) 87 | item_random_neg.append(neg_i[s]) 88 | 89 | # train 90 | for i in range(self.total_batch): 91 | start_time = time.time() 92 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 93 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 94 | batch_item_neg = item_random_neg[i * self.batch_size:(i + 1) * self.batch_size] 95 | 96 | _, loss = self.sess.run((self.optimizer, self.loss), feed_dict={self.user_id: batch_user, 97 | self.item_id: batch_item, 98 | self.neg_item_id: batch_item_neg}) 99 | 100 | if i % self.display_step == 0: 101 | if self.verbose: 102 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 103 | print("one iteration: %s seconds." % (time.time() - start_time)) 104 | 105 | def test(self): 106 | evaluate(self) 107 | 108 | def execute(self, train_data, test_data): 109 | 110 | self.prepare_data(train_data, test_data) 111 | 112 | init = tf.global_variables_initializer() 113 | self.sess.run(init) 114 | 115 | for epoch in range(self.epochs): 116 | self.train() 117 | if (epoch) % self.T == 0: 118 | print("Epoch: %04d; " % (epoch)) 119 | self.test() 120 | 121 | def save(self, path): 122 | saver = tf.train.Saver() 123 | saver.save(self.sess, path) 124 | 125 | def predict(self, user_id, item_id): 126 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 127 | 128 | def _get_neg_items(self, data): 129 | all_items = set(np.arange(self.num_item)) 130 | neg_items = {} 131 | for u in range(self.num_user): 132 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 133 | 134 | return neg_items 135 | -------------------------------------------------------------------------------- /models/item_ranking/bprmf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/bprmf.pyc -------------------------------------------------------------------------------- /models/item_ranking/cdae.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of CDAE. 3 | Reference: Wu, Yao, et al. "Collaborative denoising auto-encoders for top-n recommender systems." Proceedings of the Ninth ACM International Conference on Web Search and Data Mining. ACM, 2016. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RankingMetrics import evaluate 11 | 12 | __author__ = "Shuai Zhang" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Shuai Zhang" 18 | __email__ = "cheungdaven@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class CDAE(): 23 | def __init__(self, sess, num_user, num_item, learning_rate=0.01, reg_rate=0.01, epoch=500, batch_size=100, 24 | verbose=False, T=1, display_step=1000): 25 | self.learning_rate = learning_rate 26 | self.epochs = epoch 27 | self.batch_size = batch_size 28 | self.reg_rate = reg_rate 29 | self.sess = sess 30 | self.num_user = num_user 31 | self.num_item = num_item 32 | self.verbose = verbose 33 | self.T = T 34 | self.display_step = display_step 35 | print("CDAE.") 36 | 37 | def build_network(self, hidden_neuron=500, corruption_level=0): 38 | self.corrupted_rating_matrix = tf.placeholder(dtype=tf.float32, shape=[None, self.num_item]) 39 | self.rating_matrix = tf.placeholder(dtype=tf.float32, shape=[None, self.num_item]) 40 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None]) 41 | self.corruption_level = corruption_level 42 | 43 | W = tf.Variable(tf.random_normal([self.num_item, hidden_neuron], stddev=0.01)) 44 | W_prime = tf.Variable(tf.random_normal([hidden_neuron, self.num_item], stddev=0.01)) 45 | V = tf.Variable(tf.random_normal([self.num_user, hidden_neuron], stddev=0.01)) 46 | 47 | b = tf.Variable(tf.random_normal([hidden_neuron], stddev=0.01)) 48 | b_prime = tf.Variable(tf.random_normal([self.num_item], stddev=0.01)) 49 | print(np.shape(tf.matmul(self.corrupted_rating_matrix, W))) 50 | print(np.shape(tf.nn.embedding_lookup(V, self.user_id))) 51 | layer_1 = tf.sigmoid(tf.matmul(self.corrupted_rating_matrix, W) + tf.nn.embedding_lookup(V, self.user_id) + b) 52 | self.layer_2 = tf.sigmoid(tf.matmul(layer_1, W_prime) + b_prime) 53 | 54 | self.loss = - tf.reduce_sum( 55 | self.rating_matrix * tf.log(self.layer_2) + (1 - self.rating_matrix) * tf.log(1 - self.layer_2)) \ 56 | + self.reg_rate * ( 57 | tf.nn.l2_loss(W) + tf.nn.l2_loss(W_prime) + tf.nn.l2_loss(V) + tf.nn.l2_loss(b) + tf.nn.l2_loss(b_prime)) 58 | 59 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 60 | 61 | def prepare_data(self, train_data, test_data): 62 | self.train_data = self._data_process(train_data) 63 | self.neg_items = self._get_neg_items(train_data) 64 | self.num_training = self.num_user 65 | self.total_batch = int(self.num_training / self.batch_size) 66 | self.test_data = test_data 67 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 68 | print("data preparation finished.") 69 | 70 | def train(self): 71 | 72 | idxs = np.random.permutation(self.num_training) # shuffled ordering 73 | 74 | for i in range(self.total_batch): 75 | start_time = time.time() 76 | if i == self.total_batch - 1: 77 | batch_set_idx = idxs[i * self.batch_size:] 78 | elif i < self.total_batch - 1: 79 | batch_set_idx = idxs[i * self.batch_size: (i + 1) * self.batch_size] 80 | 81 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={ 82 | self.corrupted_rating_matrix: self._get_corrupted_input(self.train_data[batch_set_idx, :], 83 | self.corruption_level), 84 | self.rating_matrix: self.train_data[batch_set_idx, :], 85 | self.user_id: batch_set_idx 86 | }) 87 | if self.verbose and i % self.display_step == 0: 88 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 89 | if self.verbose: 90 | print("one iteration: %s seconds." % (time.time() - start_time)) 91 | 92 | def test(self): 93 | self.reconstruction = self.sess.run(self.layer_2, feed_dict={self.corrupted_rating_matrix: self.train_data, 94 | self.user_id: range(self.num_user)}) 95 | 96 | evaluate(self) 97 | 98 | def execute(self, train_data, test_data): 99 | self.prepare_data(train_data, test_data) 100 | init = tf.global_variables_initializer() 101 | self.sess.run(init) 102 | for epoch in range(self.epochs): 103 | self.train() 104 | if (epoch) % self.T == 0: 105 | # print("Epoch: %04d; " % (epoch), end='') 106 | print("Epoch: %04d; " % (epoch)) 107 | self.test() 108 | 109 | def save(self, path): 110 | saver = tf.train.Saver() 111 | saver.save(self.sess, path) 112 | 113 | def predict(self, user_id, item_id): 114 | return np.array(self.reconstruction[np.array(user_id), np.array(item_id)]) 115 | 116 | def _data_process(self, data): 117 | return np.asmatrix(data) 118 | 119 | def _get_neg_items(self, data): 120 | neg_items = {} 121 | for u in range(self.num_user): 122 | neg_items[u] = [k for k, i in enumerate(data[u]) if data[u][k] == 0] 123 | # print(neg_items[u]) 124 | 125 | return neg_items 126 | 127 | def _get_corrupted_input(self, input, corruption_level): 128 | return np.random.binomial(n=1, p=1 - corruption_level) * input 129 | 130 | 131 | class ICDAE(): 132 | ''' 133 | Based on CDAE and I-AutoRec, I designed the following item based CDAE, it seems to perform better than CDAE slightly. 134 | ''' 135 | 136 | def __init__(self, sess, num_user, num_item, learning_rate=0.01, reg_rate=0.01, epoch=500, batch_size=300, 137 | verbose=False, T=2, display_step=1000): 138 | self.learning_rate = learning_rate 139 | self.epochs = epoch 140 | self.batch_size = batch_size 141 | self.reg_rate = reg_rate 142 | self.sess = sess 143 | self.num_user = num_user 144 | self.num_item = num_item 145 | self.verbose = verbose 146 | self.T = T 147 | self.display_step = display_step 148 | print("Item based CDAE.") 149 | 150 | def build_network(self, hidden_neuron=500, corruption_level=0): 151 | self.corrupted_interact_matrix = tf.placeholder(dtype=tf.float32, shape=[None, self.num_user]) 152 | self.interact_matrix = tf.placeholder(dtype=tf.float32, shape=[None, self.num_user]) 153 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None]) 154 | self.corruption_level = corruption_level 155 | 156 | W = tf.Variable(tf.random_normal([self.num_user, hidden_neuron], stddev=0.01)) 157 | W_prime = tf.Variable(tf.random_normal([hidden_neuron, self.num_user], stddev=0.01)) 158 | V = tf.Variable(tf.random_normal([self.num_item, hidden_neuron], stddev=0.01)) 159 | 160 | b = tf.Variable(tf.random_normal([hidden_neuron], stddev=0.01)) 161 | b_prime = tf.Variable(tf.random_normal([self.num_user], stddev=0.01)) 162 | # print(np.shape(tf.matmul(self.corrupted_interact_matrix, W))) 163 | # print(np.shape( tf.nn.embedding_lookup(V, self.item_id))) 164 | layer_1 = tf.sigmoid(tf.matmul(self.corrupted_interact_matrix, W) + b) 165 | self.layer_2 = tf.sigmoid(tf.matmul(layer_1, W_prime) + b_prime) 166 | 167 | self.loss = - tf.reduce_sum( 168 | self.interact_matrix * tf.log(self.layer_2) + (1 - self.interact_matrix) * tf.log(1 - self.layer_2)) \ 169 | + self.reg_rate * ( 170 | tf.nn.l2_loss(W) + tf.nn.l2_loss(W_prime) + tf.nn.l2_loss(b) + tf.nn.l2_loss(b_prime)) 171 | 172 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 173 | 174 | def prepare_data(self, train_data, test_data): 175 | self.train_data = self._data_process(train_data).transpose() 176 | self.neg_items = self._get_neg_items(train_data) 177 | self.num_training = self.num_item 178 | self.total_batch = int(self.num_training / self.batch_size) 179 | self.test_data = test_data 180 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 181 | print("data preparation finished.") 182 | 183 | def train(self): 184 | 185 | idxs = np.random.permutation(self.num_training) # shuffled ordering 186 | 187 | for i in range(self.total_batch): 188 | start_time = time.time() 189 | if i == self.total_batch - 1: 190 | batch_set_idx = idxs[i * self.batch_size:] 191 | elif i < self.total_batch - 1: 192 | batch_set_idx = idxs[i * self.batch_size: (i + 1) * self.batch_size] 193 | 194 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={ 195 | self.corrupted_interact_matrix: self._get_corrupted_input(self.train_data[batch_set_idx, :], 196 | self.corruption_level), 197 | self.interact_matrix: self.train_data[batch_set_idx, :], 198 | self.item_id: batch_set_idx 199 | }) 200 | if self.verbose and i % self.display_step == 0: 201 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 202 | if self.verbose: 203 | print("one iteration: %s seconds." % (time.time() - start_time)) 204 | 205 | def test(self): 206 | self.reconstruction = self.sess.run(self.layer_2, feed_dict={self.corrupted_interact_matrix: self.train_data, 207 | self.item_id: range(self.num_item)}).transpose() 208 | 209 | evaluate(self) 210 | 211 | def execute(self, train_data, test_data): 212 | self.prepare_data(train_data, test_data) 213 | init = tf.global_variables_initializer() 214 | self.sess.run(init) 215 | for epoch in range(self.epochs): 216 | self.train() 217 | if (epoch) % self.T == 0: 218 | print("Epoch: %04d; " % (epoch)) 219 | self.test() 220 | 221 | def save(self, path): 222 | saver = tf.train.Saver() 223 | saver.save(self.sess, path) 224 | 225 | def predict(self, user_id, item_id): 226 | return np.array(self.reconstruction[np.array(user_id), np.array(item_id)]) 227 | 228 | def _data_process(self, data): 229 | return np.asmatrix(data) 230 | 231 | def _get_neg_items(self, data): 232 | neg_items = {} 233 | for u in range(self.num_user): 234 | neg_items[u] = [k for k, i in enumerate(data[u]) if data[u][k] == 0] 235 | # print(neg_items[u]) 236 | 237 | return neg_items 238 | 239 | def _get_corrupted_input(self, input, corruption_level): 240 | return np.random.binomial(n=1, p=1 - corruption_level) * input 241 | -------------------------------------------------------------------------------- /models/item_ranking/cdae.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/cdae.pyc -------------------------------------------------------------------------------- /models/item_ranking/cml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Collaborative Metric Learning. 3 | Reference: Hsieh, Cheng-Kang, et al. "Collaborative metric learning." Proceedings of the 26th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2017. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RankingMetrics import * 11 | 12 | __author__ = "Shuai Zhang" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Shuai Zhang" 18 | __email__ = "cheungdaven@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class CML(): 23 | def __init__(self, sess, num_user, num_item, learning_rate=0.1, reg_rate=0.1, epoch=500, batch_size=500, 24 | verbose=False, T=5, display_step=1000): 25 | self.learning_rate = learning_rate 26 | self.epochs = epoch 27 | self.batch_size = batch_size 28 | self.reg_rate = reg_rate 29 | self.sess = sess 30 | self.num_user = num_user 31 | self.num_item = num_item 32 | self.verbose = verbose 33 | self.T = T 34 | self.display_step = display_step 35 | print("CML.") 36 | 37 | def build_network(self, num_factor=100, margin=0.5, norm_clip_value=1): 38 | 39 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 40 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 41 | self.neg_item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='neg_item_id') 42 | self.keep_rate = tf.placeholder(tf.float32) 43 | 44 | P = tf.Variable(tf.random_normal([self.num_user, num_factor], stddev=1 / (num_factor ** 0.5)), dtype=tf.float32) 45 | Q = tf.Variable(tf.random_normal([self.num_item, num_factor], stddev=1 / (num_factor ** 0.5)), dtype=tf.float32) 46 | 47 | user_embedding = tf.nn.embedding_lookup(P, self.user_id) 48 | item_embedding = tf.nn.embedding_lookup(Q, self.item_id) 49 | neg_item_embedding = tf.nn.embedding_lookup(Q, self.neg_item_id) 50 | 51 | self.pred_distance = tf.reduce_sum( 52 | tf.nn.dropout(tf.squared_difference(user_embedding, item_embedding), self.keep_rate), 1) 53 | self.pred_distance_neg = tf.reduce_sum( 54 | tf.nn.dropout(tf.squared_difference(user_embedding, neg_item_embedding), self.keep_rate), 1) 55 | 56 | self.loss = tf.reduce_sum(tf.maximum(self.pred_distance - self.pred_distance_neg + margin, 0)) 57 | 58 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss, var_list=[P, Q]) 59 | self.clip_P = tf.assign(P, tf.clip_by_norm(P, norm_clip_value, axes=[1])) 60 | self.clip_Q = tf.assign(Q, tf.clip_by_norm(Q, norm_clip_value, axes=[1])) 61 | 62 | return self 63 | 64 | def prepare_data(self, train_data, test_data): 65 | ''' 66 | You must prepare the data before train and test the model 67 | :param train_data: 68 | :param test_data: 69 | :return: 70 | ''' 71 | t = train_data.tocoo() 72 | self.user = t.row.reshape(-1) 73 | self.item = t.col.reshape(-1) 74 | self.num_training = len(self.item) 75 | self.test_data = test_data 76 | self.total_batch = int(self.num_training / self.batch_size) 77 | self.neg_items = self._get_neg_items(train_data.tocsr()) 78 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 79 | print(self.total_batch) 80 | print("data preparation finished.") 81 | return self 82 | 83 | def train(self): 84 | idxs = np.random.permutation(self.num_training) # shuffled ordering 85 | user_random = list(self.user[idxs]) 86 | item_random = list(self.item[idxs]) 87 | item_random_neg = [] 88 | for u in user_random: 89 | neg_i = self.neg_items[u] 90 | s = np.random.randint(len(neg_i)) 91 | item_random_neg.append(neg_i[s]) 92 | 93 | # train 94 | for i in range(self.total_batch): 95 | start_time = time.time() 96 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 97 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 98 | batch_item_neg = item_random_neg[i * self.batch_size:(i + 1) * self.batch_size] 99 | 100 | _, loss, _, _ = self.sess.run((self.optimizer, self.loss, self.clip_P, self.clip_Q), 101 | feed_dict={self.user_id: batch_user, 102 | self.item_id: batch_item, 103 | self.neg_item_id: batch_item_neg, 104 | self.keep_rate: 0.98}) 105 | 106 | if i % self.display_step == 0: 107 | if self.verbose: 108 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 109 | print("one iteration: %s seconds." % (time.time() - start_time)) 110 | 111 | def test(self): 112 | evaluate(self) 113 | 114 | def execute(self, train_data, test_data): 115 | 116 | self.prepare_data(train_data, test_data) 117 | 118 | init = tf.global_variables_initializer() 119 | self.sess.run(init) 120 | 121 | for epoch in range(self.epochs): 122 | self.train() 123 | if (epoch) % self.T == 0: 124 | print("Epoch: %04d; " % (epoch)) 125 | self.test() 126 | 127 | def save(self, path): 128 | saver = tf.train.Saver() 129 | saver.save(self.sess, path) 130 | 131 | def predict(self, user_id, item_id): 132 | return -self.sess.run([self.pred_distance], 133 | feed_dict={self.user_id: user_id, self.item_id: item_id, self.keep_rate: 1})[0] 134 | 135 | def _get_neg_items(self, data): 136 | all_items = set(np.arange(self.num_item)) 137 | neg_items = {} 138 | for u in range(self.num_user): 139 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 140 | 141 | return neg_items 142 | 143 | 144 | class CMLwarp(): 145 | """ 146 | To appear. 147 | 148 | 149 | """ 150 | 151 | def __init__(self, sess, num_user, num_item, learning_rate=0.1, reg_rate=0.1, epoch=500, batch_size=500, 152 | verbose=False, T=5, display_step=1000): 153 | self.learning_rate = learning_rate 154 | self.epochs = epoch 155 | self.batch_size = batch_size 156 | self.reg_rate = reg_rate 157 | self.sess = sess 158 | self.num_user = num_user 159 | self.num_item = num_item 160 | self.verbose = verbose 161 | self.T = T 162 | self.display_step = display_step 163 | print("CML warp loss.") 164 | 165 | def build_network(self, num_factor=100, margin=0.5, norm_clip_value=1): 166 | 167 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 168 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 169 | self.neg_item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='neg_item_id') 170 | 171 | P = tf.Variable(tf.random_normal([self.num_user, num_factor], stddev=1 / (num_factor ** 0.5))) 172 | Q = tf.Variable(tf.random_normal([self.num_item, num_factor], stddev=1 / (num_factor ** 0.5))) 173 | 174 | user_embedding = tf.nn.embedding_lookup(P, self.user_id) 175 | item_embedding = tf.nn.embedding_lookup(Q, self.item_id) 176 | neg_item_embedding = tf.nn.embedding_lookup(Q, self.neg_item_id) 177 | 178 | self.pred_distance = tf.reduce_sum(tf.squared_difference(user_embedding, item_embedding), 1) 179 | self.pred_distance_neg = tf.reduce_sum(tf.squared_difference(user_embedding, neg_item_embedding), 1) 180 | 181 | self.loss = tf.reduce_sum(tf.maximum(self.pred_distance - self.pred_distance_neg + margin, 0)) 182 | 183 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss, var_list=[P, Q]) 184 | self.clip_P = tf.assign(P, tf.clip_by_norm(P, norm_clip_value, axes=[1])) 185 | self.clip_Q = tf.assign(Q, tf.clip_by_norm(Q, norm_clip_value, axes=[1])) 186 | 187 | return self 188 | 189 | def prepare_data(self, train_data, test_data): 190 | ''' 191 | You must prepare the data before train and test the model 192 | :param train_data: 193 | :param test_data: 194 | :return: 195 | ''' 196 | t = train_data.tocoo() 197 | self.user = t.row.reshape(-1) 198 | self.item = t.col.reshape(-1) 199 | self.num_training = len(self.item) 200 | self.test_data = test_data 201 | self.total_batch = int(self.num_training / self.batch_size) 202 | self.neg_items = self._get_neg_items(train_data.tocsr()) 203 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 204 | print("data preparation finished.") 205 | return self 206 | 207 | def train(self): 208 | idxs = np.random.permutation(self.num_training) # shuffled ordering 209 | user_random = list(self.user[idxs]) 210 | item_random = list(self.item[idxs]) 211 | item_random_neg = [] 212 | for u in user_random: 213 | neg_i = self.neg_items[u] 214 | s = np.random.randint(len(neg_i)) 215 | item_random_neg.append(neg_i[s]) 216 | 217 | # train 218 | for i in range(self.total_batch): 219 | start_time = time.time() 220 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 221 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 222 | batch_item_neg = item_random_neg[i * self.batch_size:(i + 1) * self.batch_size] 223 | 224 | _, loss, _, _ = self.sess.run((self.optimizer, self.loss, self.clip_P, self.clip_Q), 225 | feed_dict={self.user_id: batch_user, 226 | self.item_id: batch_item, 227 | self.neg_item_id: batch_item_neg}) 228 | 229 | if i % self.display_step == 0: 230 | if self.verbose: 231 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 232 | print("one iteration: %s seconds." % (time.time() - start_time)) 233 | 234 | def test(self): 235 | evaluate(self) 236 | 237 | def execute(self, train_data, test_data): 238 | 239 | self.prepare_data(train_data, test_data) 240 | 241 | init = tf.global_variables_initializer() 242 | self.sess.run(init) 243 | 244 | for epoch in range(self.epochs): 245 | self.train() 246 | if (epoch) % self.T == 0: 247 | print("Epoch: %04d; " % (epoch)) 248 | self.test() 249 | 250 | def save(self, path): 251 | saver = tf.train.Saver() 252 | saver.save(self.sess, path) 253 | 254 | def predict(self, user_id, item_id): 255 | return - self.sess.run([self.pred_distance], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 256 | 257 | def _get_neg_items(self, data): 258 | all_items = set(np.arange(self.num_item)) 259 | neg_items = {} 260 | for u in range(self.num_user): 261 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 262 | 263 | return neg_items 264 | -------------------------------------------------------------------------------- /models/item_ranking/cml.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/cml.pyc -------------------------------------------------------------------------------- /models/item_ranking/dmf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/dmf.py -------------------------------------------------------------------------------- /models/item_ranking/dssm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Deep Semantic Similarity Model with BPR. 3 | Reference: Huang, Po-Sen, et al. "Learning deep structured semantic models for web search using clickthrough data." Proceedings of the 22nd ACM international conference on Conference on information & knowledge management. ACM, 2013. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RankingMetrics import * 11 | 12 | __author__ = "Shuai Zhang" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Shuai Zhang" 18 | __email__ = "cheungdaven@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class DSSM(): 23 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.1, epoch=500, batch_size=1024, 24 | verbose=False, T=5, display_step=1000): 25 | self.learning_rate = learning_rate 26 | self.epochs = epoch 27 | self.batch_size = batch_size 28 | self.reg_rate = reg_rate 29 | self.sess = sess 30 | self.num_user = num_user 31 | self.num_item = num_item 32 | self.verbose = verbose 33 | self.T = T 34 | self.display_step = display_step 35 | print("BPRMF.") 36 | 37 | def build_network(self, user_side_info, item_side_info, hidden_dim=100, output_size=30): 38 | 39 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 40 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 41 | self.neg_item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='neg_item_id') 42 | self.y = tf.placeholder("float", [None], 'rating') 43 | 44 | self.user_side_info = tf.constant(user_side_info, dtype=tf.float32) 45 | self.item_side_info = tf.constant(item_side_info, dtype=tf.float32) 46 | 47 | user_input_dim = len(user_side_info[0]) 48 | item_input_dim = len(item_side_info[0]) 49 | 50 | user_input = tf.gather(self.user_side_info, self.user_id, axis=0) 51 | item_input = tf.gather(self.item_side_info, self.item_id, axis=0) 52 | neg_item_input = tf.gather(self.item_side_info, self.neg_item_id, axis=0) 53 | 54 | layer_1 = tf.layers.dense(inputs=user_input, units=user_input_dim, 55 | bias_initializer=tf.random_normal_initializer, 56 | kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid, 57 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 58 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dim, activation=tf.sigmoid, 59 | bias_initializer=tf.random_normal_initializer, 60 | kernel_initializer=tf.random_normal_initializer, 61 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 62 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dim, activation=tf.sigmoid, 63 | bias_initializer=tf.random_normal_initializer, 64 | kernel_initializer=tf.random_normal_initializer, 65 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 66 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dim, activation=tf.sigmoid, 67 | bias_initializer=tf.random_normal_initializer, 68 | kernel_initializer=tf.random_normal_initializer, 69 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 70 | user_output = tf.layers.dense(inputs=layer_4, units=output_size, activation=None, 71 | bias_initializer=tf.random_normal_initializer, 72 | kernel_initializer=tf.random_normal_initializer, 73 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 74 | 75 | layer_1 = tf.layers.dense(inputs=item_input, units=item_input_dim, 76 | bias_initializer=tf.random_normal_initializer, 77 | kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid, 78 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 79 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dim, activation=tf.sigmoid, 80 | bias_initializer=tf.random_normal_initializer, 81 | kernel_initializer=tf.random_normal_initializer, 82 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 83 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dim, activation=tf.sigmoid, 84 | bias_initializer=tf.random_normal_initializer, 85 | kernel_initializer=tf.random_normal_initializer, 86 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 87 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dim, activation=tf.sigmoid, 88 | bias_initializer=tf.random_normal_initializer, 89 | kernel_initializer=tf.random_normal_initializer, 90 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 91 | item_output = tf.layers.dense(inputs=layer_4, units=output_size, activation=None, 92 | bias_initializer=tf.random_normal_initializer, 93 | kernel_initializer=tf.random_normal_initializer, 94 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 95 | 96 | self.pred_rating = tf.reshape(output, [-1]) 97 | 98 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 99 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 100 | neg_item_latent_factor = tf.nn.embedding_lookup(self.Q, self.neg_item_id) 101 | 102 | self.pred_y = tf.reduce_sum(tf.multiply(user_latent_factor, item_latent_factor), 1) 103 | self.pred_y_neg = tf.reduce_sum(tf.multiply(user_latent_factor, neg_item_latent_factor), 1) 104 | 105 | self.loss = - tf.reduce_sum(tf.log(tf.sigmoid(self.pred_y - self.pred_y_neg))) + self.reg_rate * ( 106 | tf.norm(self.P) + tf.norm(self.Q)) 107 | 108 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 109 | 110 | return self 111 | 112 | def prepare_data(self, train_data, test_data): 113 | ''' 114 | You must prepare the data before train and test the model 115 | :param train_data: 116 | :param test_data: 117 | :return: 118 | ''' 119 | t = train_data.tocoo() 120 | self.user = t.row.reshape(-1) 121 | self.item = t.col.reshape(-1) 122 | self.num_training = len(self.item) 123 | self.test_data = test_data 124 | self.total_batch = int(self.num_training / self.batch_size) 125 | self.neg_items = self._get_neg_items(train_data.tocsr()) 126 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 127 | print("data preparation finished.") 128 | return self 129 | 130 | def train(self): 131 | idxs = np.random.permutation(self.num_training) # shuffled ordering 132 | user_random = list(self.user[idxs]) 133 | item_random = list(self.item[idxs]) 134 | item_random_neg = [] 135 | for u in user_random: 136 | neg_i = self.neg_items[u] 137 | s = np.random.randint(len(neg_i)) 138 | item_random_neg.append(neg_i[s]) 139 | 140 | # train 141 | for i in range(self.total_batch): 142 | start_time = time.time() 143 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 144 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 145 | batch_item_neg = item_random_neg[i * self.batch_size:(i + 1) * self.batch_size] 146 | 147 | _, loss = self.sess.run((self.optimizer, self.loss), feed_dict={self.user_id: batch_user, 148 | self.item_id: batch_item, 149 | self.neg_item_id: batch_item_neg}) 150 | 151 | if i % self.display_step == 0: 152 | if self.verbose: 153 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 154 | print("one iteration: %s seconds." % (time.time() - start_time)) 155 | 156 | def test(self): 157 | evaluate(self) 158 | 159 | def execute(self, train_data, test_data): 160 | 161 | self.prepare_data(train_data, test_data) 162 | 163 | init = tf.global_variables_initializer() 164 | self.sess.run(init) 165 | 166 | for epoch in range(self.epochs): 167 | self.train() 168 | if (epoch) % self.T == 0: 169 | print("Epoch: %04d; " % (epoch), end='') 170 | self.test() 171 | 172 | def save(self, path): 173 | saver = tf.train.Saver() 174 | saver.save(self.sess, path) 175 | 176 | def predict(self, user_id, item_id): 177 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 178 | 179 | def _get_neg_items(self, data): 180 | all_items = set(np.arange(self.num_item)) 181 | neg_items = {} 182 | for u in range(self.num_user): 183 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 184 | 185 | return neg_items 186 | -------------------------------------------------------------------------------- /models/item_ranking/gmf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Collaborative Filtering. 3 | Reference: He, Xiangnan, et al. "Neural collaborative filtering." Proceedings of the 26th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2017. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | 9 | import random 10 | 11 | from utils.evaluation.RankingMetrics import * 12 | 13 | __author__ = "Shuai Zhang" 14 | __copyright__ = "Copyright 2018, The DeepRec Project" 15 | 16 | __license__ = "GPL" 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Shuai Zhang" 19 | __email__ = "cheungdaven@gmail.com" 20 | __status__ = "Development" 21 | 22 | 23 | class GMF(): 24 | def __init__(self, sess, num_user, num_item, learning_rate=0.5, reg_rate=0.01, epoch=500, batch_size=256, 25 | verbose=False, T=1, display_step=1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.verbose = verbose 34 | self.T = T 35 | self.display_step = display_step 36 | print("NeuMF.") 37 | 38 | def build_network(self, num_factor=10, num_neg_sample=20): 39 | self.num_neg_sample = num_neg_sample 40 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 41 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 42 | self.y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') 43 | 44 | self.P = tf.Variable(tf.random_normal([self.num_user, num_factor]), dtype=tf.float32) 45 | self.Q = tf.Variable(tf.random_normal([self.num_item, num_factor]), dtype=tf.float32) 46 | 47 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 48 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 49 | GMF = tf.multiply(user_latent_factor, item_latent_factor) 50 | 51 | self.pred_y = tf.nn.sigmoid(tf.reduce_sum(GMF, axis=1)) 52 | 53 | self.loss = - tf.reduce_sum( 54 | self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) \ 55 | + self.reg_rate * (tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q)) 56 | 57 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 58 | 59 | return self 60 | 61 | def prepare_data(self, train_data, test_data): 62 | ''' 63 | You must prepare the data before train and test the model 64 | :param train_data: 65 | :param test_data: 66 | :return: 67 | ''' 68 | t = train_data.tocoo() 69 | self.user = list(t.row.reshape(-1)) 70 | self.item = list(t.col.reshape(-1)) 71 | self.label = list(t.data) 72 | self.test_data = test_data 73 | 74 | self.neg_items = self._get_neg_items(train_data.tocsr()) 75 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 76 | 77 | print("data preparation finished.") 78 | return self 79 | 80 | def train(self): 81 | 82 | item_temp = self.item[:] 83 | user_temp = self.user[:] 84 | labels_temp = self.label[:] 85 | 86 | user_append = [] 87 | item_append = [] 88 | values_append = [] 89 | for u in self.user: 90 | list_of_random_items = random.sample(self.neg_items[u], self.num_neg_sample) 91 | user_append += [u] * self.num_neg_sample 92 | item_append += list_of_random_items 93 | values_append += [0] * self.num_neg_sample 94 | 95 | item_temp += item_append 96 | user_temp += user_append 97 | labels_temp += values_append 98 | 99 | self.num_training = len(item_temp) 100 | self.total_batch = int(self.num_training / self.batch_size) 101 | # print(self.total_batch) 102 | idxs = np.random.permutation(self.num_training) # shuffled ordering 103 | user_random = list(np.array(user_temp)[idxs]) 104 | item_random = list(np.array(item_temp)[idxs]) 105 | labels_random = list(np.array(labels_temp)[idxs]) 106 | 107 | # train 108 | for i in range(self.total_batch): 109 | start_time = time.time() 110 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 111 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 112 | batch_label = labels_random[i * self.batch_size:(i + 1) * self.batch_size] 113 | 114 | _, loss = self.sess.run((self.optimizer, self.loss), 115 | feed_dict={self.user_id: batch_user, self.item_id: batch_item, self.y: batch_label}) 116 | 117 | if i % self.display_step == 0: 118 | if self.verbose: 119 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 120 | print("one iteration: %s seconds." % (time.time() - start_time)) 121 | 122 | def test(self): 123 | evaluate(self) 124 | 125 | def execute(self, train_data, test_data): 126 | 127 | self.prepare_data(train_data, test_data) 128 | 129 | init = tf.global_variables_initializer() 130 | self.sess.run(init) 131 | 132 | for epoch in range(self.epochs): 133 | self.train() 134 | if (epoch) % self.T == 0: 135 | print("Epoch: %04d; " % (epoch)) 136 | self.test() 137 | 138 | def save(self, path): 139 | saver = tf.train.Saver() 140 | saver.save(self.sess, path) 141 | 142 | def predict(self, user_id, item_id): 143 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 144 | 145 | def _get_neg_items(self, data): 146 | all_items = set(np.arange(self.num_item)) 147 | neg_items = {} 148 | for u in range(self.num_user): 149 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 150 | 151 | return neg_items 152 | -------------------------------------------------------------------------------- /models/item_ranking/gmf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/gmf.pyc -------------------------------------------------------------------------------- /models/item_ranking/jrl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Joint Representation Learning . 3 | Reference: Zhang, Yongfeng, et al. "Joint representation learning for top-n recommendation with heterogeneous information sources." Proceedings of the 2017 ACM on Conference on Information and Knowledge Management. ACM, 2017. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | 9 | import random 10 | 11 | from utils.evaluation.RankingMetrics import * 12 | 13 | __author__ = "Shuai Zhang" 14 | __copyright__ = "Copyright 2018, The DeepRec Project" 15 | 16 | __license__ = "GPL" 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Shuai Zhang" 19 | __email__ = "cheungdaven@gmail.com" 20 | __status__ = "Development" 21 | 22 | 23 | class JRL(): 24 | """ 25 | Here we do not use the side information. 26 | """ 27 | 28 | def __init__(self, sess, num_user, num_item, learning_rate=0.5, reg_rate=0.01, epoch=500, batch_size=256, 29 | verbose=False, T=1, display_step=1000): 30 | self.learning_rate = learning_rate 31 | self.epochs = epoch 32 | self.batch_size = batch_size 33 | self.reg_rate = reg_rate 34 | self.sess = sess 35 | self.num_user = num_user 36 | self.num_item = num_item 37 | self.verbose = verbose 38 | self.T = T 39 | self.display_step = display_step 40 | print("NeuMF.") 41 | 42 | def build_network(self, num_factor=10, num_neg_sample=20, hidden_dimension=10): 43 | self.num_neg_sample = num_neg_sample 44 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 45 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 46 | self.y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') 47 | 48 | self.P = tf.Variable(tf.random_normal([self.num_user, num_factor]), dtype=tf.float32) 49 | self.Q = tf.Variable(tf.random_normal([self.num_item, num_factor]), dtype=tf.float32) 50 | 51 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 52 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 53 | GMF = tf.multiply(user_latent_factor, item_latent_factor) 54 | 55 | layer_1 = tf.layers.dense(inputs=GMF, units=num_factor, kernel_initializer=tf.random_normal_initializer, 56 | activation=tf.sigmoid, 57 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 58 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dimension, activation=tf.sigmoid, 59 | kernel_initializer=tf.random_normal_initializer, 60 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 61 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dimension, activation=tf.sigmoid, 62 | kernel_initializer=tf.random_normal_initializer, 63 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 64 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dimension, activation=tf.sigmoid, 65 | kernel_initializer=tf.random_normal_initializer, 66 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 67 | output = tf.layers.dense(inputs=layer_4, units=hidden_dimension, activation=tf.sigmoid, 68 | kernel_initializer=tf.random_normal_initializer, 69 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 70 | 71 | self.pred_y = tf.nn.sigmoid(tf.reduce_sum(output, 1)) 72 | 73 | self.loss = - tf.reduce_sum( 74 | self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) \ 75 | + tf.losses.get_regularization_loss() + self.reg_rate * ( 76 | tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q)) 77 | 78 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 79 | 80 | return self 81 | 82 | def prepare_data(self, train_data, test_data): 83 | ''' 84 | You must prepare the data before train and test the model 85 | :param train_data: 86 | :param test_data: 87 | :return: 88 | ''' 89 | t = train_data.tocoo() 90 | self.user = list(t.row.reshape(-1)) 91 | self.item = list(t.col.reshape(-1)) 92 | self.label = list(t.data) 93 | self.test_data = test_data 94 | 95 | self.neg_items = self._get_neg_items(train_data.tocsr()) 96 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 97 | 98 | print("data preparation finished.") 99 | return self 100 | 101 | def train(self): 102 | 103 | item_temp = self.item[:] 104 | user_temp = self.user[:] 105 | labels_temp = self.label[:] 106 | 107 | user_append = [] 108 | item_append = [] 109 | values_append = [] 110 | for u in self.user: 111 | list_of_random_items = random.sample(self.neg_items[u], self.num_neg_sample) 112 | user_append += [u] * self.num_neg_sample 113 | item_append += list_of_random_items 114 | values_append += [0] * self.num_neg_sample 115 | 116 | item_temp += item_append 117 | user_temp += user_append 118 | labels_temp += values_append 119 | 120 | self.num_training = len(item_temp) 121 | self.total_batch = int(self.num_training / self.batch_size) 122 | # print(self.total_batch) 123 | idxs = np.random.permutation(self.num_training) # shuffled ordering 124 | user_random = list(np.array(user_temp)[idxs]) 125 | item_random = list(np.array(item_temp)[idxs]) 126 | labels_random = list(np.array(labels_temp)[idxs]) 127 | 128 | # train 129 | for i in range(self.total_batch): 130 | start_time = time.time() 131 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 132 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 133 | batch_label = labels_random[i * self.batch_size:(i + 1) * self.batch_size] 134 | 135 | _, loss = self.sess.run((self.optimizer, self.loss), 136 | feed_dict={self.user_id: batch_user, self.item_id: batch_item, self.y: batch_label}) 137 | 138 | if i % self.display_step == 0: 139 | if self.verbose: 140 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 141 | print("one iteration: %s seconds." % (time.time() - start_time)) 142 | 143 | def test(self): 144 | evaluate(self) 145 | 146 | def execute(self, train_data, test_data): 147 | 148 | self.prepare_data(train_data, test_data) 149 | 150 | init = tf.global_variables_initializer() 151 | self.sess.run(init) 152 | 153 | for epoch in range(self.epochs): 154 | self.train() 155 | if (epoch) % self.T == 0: 156 | print("Epoch: %04d; " % (epoch)) 157 | self.test() 158 | 159 | def save(self, path): 160 | saver = tf.train.Saver() 161 | saver.save(self.sess, path) 162 | 163 | def predict(self, user_id, item_id): 164 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 165 | 166 | def _get_neg_items(self, data): 167 | all_items = set(np.arange(self.num_item)) 168 | neg_items = {} 169 | for u in range(self.num_user): 170 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 171 | 172 | return neg_items 173 | -------------------------------------------------------------------------------- /models/item_ranking/jrl.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/jrl.pyc -------------------------------------------------------------------------------- /models/item_ranking/lrml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Latent Relational Metric Learning (LRML) 3 | WWW 2018. Authors - Yi Tay, Luu Anh Tuan, Siu Cheung Hui 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RankingMetrics import * 11 | 12 | __author__ = "Yi Tay" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Yi Tay" 18 | __email__ = "ytay017@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class LRML(): 23 | """ This is a reference implementation of the LRML model 24 | proposed in WWW'18. 25 | Note: This was mainly adapted for the DeepRec repository 26 | and is copied from the first author's 27 | private code repository. This has NOT undergone sanity checks. 28 | """ 29 | 30 | def __init__(self, sess, num_user, num_item, learning_rate=0.1, 31 | reg_rate=0.1, epoch=500, batch_size=500, 32 | verbose=False, T=1, display_step=1000, mode=1, 33 | copy_relations=True, dist='L1', num_mem=100): 34 | """ This model takes after the CML structure implemented by Shuai. 35 | There are several new hyperparameters introduced which are explained 36 | as follows: 37 | Args: 38 | mode:`int`.1 or 2. varies the attention computation. 39 | 2 corresponds to the implementation in the paper. 40 | But 1 seems to produce better results. 41 | copy_relations: `bool`. Reuse relation vector for negative sample. 42 | dist: `str`. L1 or L2. Use L1 or L2 distance. 43 | num_mem: `int`. Controls the number of memory rows. 44 | """ 45 | self.learning_rate = learning_rate 46 | self.epochs = epoch 47 | self.batch_size = batch_size 48 | self.reg_rate = reg_rate 49 | self.sess = sess 50 | self.num_user = num_user 51 | self.num_item = num_item 52 | self.verbose = verbose 53 | self.T = T 54 | self.mode = mode 55 | self.display_step = display_step 56 | # self.init = 1 / (num_factor ** 0.5) 57 | self.num_mem = num_mem 58 | self.copy_relations = copy_relations 59 | self.dist = dist 60 | print("LRML.") 61 | 62 | def lram(self, a, b, 63 | reuse=None, initializer=None, k=10, relation=None): 64 | """ Generates relation given user (a) and item(b) 65 | """ 66 | with tf.variable_scope('lrml', reuse=reuse) as scope: 67 | if (relation is None): 68 | _dim = a.get_shape().as_list()[1] 69 | key_matrix = tf.get_variable('key_matrix', [_dim, k], 70 | initializer=initializer) 71 | memories = tf.get_variable('memory', [_dim, k], 72 | initializer=initializer) 73 | user_item_key = a * b 74 | key_attention = tf.matmul(user_item_key, key_matrix) 75 | key_attention = tf.nn.softmax(key_attention) # bsz x k 76 | if (self.mode == 1): 77 | relation = tf.matmul(key_attention, memories) 78 | elif (self.mode == 2): 79 | key_attention = tf.expand_dims(key_attention, 1) 80 | relation = key_attention * memories 81 | relation = tf.reduce_sum(relation, 2) 82 | return relation 83 | 84 | def build_network(self, num_factor=100, margin=0.5, norm_clip_value=1): 85 | """ Main computational graph 86 | """ 87 | # stddev initialize 88 | init = 1 / (num_factor ** 0.5) 89 | 90 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 91 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 92 | self.neg_item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='neg_item_id') 93 | self.keep_rate = tf.placeholder(tf.float32) 94 | 95 | P = tf.Variable(tf.random_normal([self.num_user, num_factor], stddev=init), dtype=tf.float32) 96 | Q = tf.Variable(tf.random_normal([self.num_item, num_factor], stddev=init), dtype=tf.float32) 97 | 98 | user_embedding = tf.nn.embedding_lookup(P, self.user_id) 99 | item_embedding = tf.nn.embedding_lookup(Q, self.item_id) 100 | neg_item_embedding = tf.nn.embedding_lookup(Q, self.neg_item_id) 101 | 102 | selected_memory = self.lram(user_embedding, item_embedding, 103 | reuse=None, 104 | initializer=tf.random_normal_initializer(init), 105 | k=self.num_mem) 106 | if (self.copy_relations == False): 107 | selected_memory_neg = self.lram(user_embedding, neg_item_embedding, 108 | reuse=True, 109 | initializer=tf.random_normal_initializer(init), 110 | k=self.num_mem) 111 | else: 112 | selected_memory_neg = selected_memory 113 | 114 | energy_pos = item_embedding - (user_embedding + selected_memory) 115 | energy_neg = neg_item_embedding - (user_embedding + selected_memory_neg) 116 | 117 | if (self.dist == 'L2'): 118 | pos_dist = tf.sqrt(tf.reduce_sum(tf.square(energy_pos), 1) + 1E-3) 119 | neg_dist = tf.sqrt(tf.reduce_sum(tf.square(energy_neg), 1) + 1E-3) 120 | elif (self.dist == 'L1'): 121 | pos_dist = tf.reduce_sum(tf.abs(energy_pos), 1) 122 | neg_dist = tf.reduce_sum(tf.abs(energy_neg), 1) 123 | 124 | self.pred_distance = pos_dist 125 | self.pred_distance_neg = neg_dist 126 | 127 | self.loss = tf.reduce_sum(tf.maximum(self.pred_distance - self.pred_distance_neg + margin, 0)) 128 | 129 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss, var_list=[P, Q]) 130 | self.clip_P = tf.assign(P, tf.clip_by_norm(P, norm_clip_value, axes=[1])) 131 | self.clip_Q = tf.assign(Q, tf.clip_by_norm(Q, norm_clip_value, axes=[1])) 132 | 133 | return self 134 | 135 | def prepare_data(self, train_data, test_data): 136 | ''' 137 | You must prepare the data before train and test the model 138 | :param train_data: 139 | :param test_data: 140 | :return: 141 | ''' 142 | t = train_data.tocoo() 143 | self.user = t.row.reshape(-1) 144 | self.item = t.col.reshape(-1) 145 | self.num_training = len(self.item) 146 | self.test_data = test_data 147 | self.total_batch = int(self.num_training / self.batch_size) 148 | self.neg_items = self._get_neg_items(train_data.tocsr()) 149 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 150 | print(self.total_batch) 151 | print("data preparation finished.") 152 | return self 153 | 154 | def train(self): 155 | idxs = np.random.permutation(self.num_training) # shuffled ordering 156 | user_random = list(self.user[idxs]) 157 | item_random = list(self.item[idxs]) 158 | item_random_neg = [] 159 | for u in user_random: 160 | neg_i = self.neg_items[u] 161 | s = np.random.randint(len(neg_i)) 162 | item_random_neg.append(neg_i[s]) 163 | 164 | # train 165 | for i in range(self.total_batch): 166 | start_time = time.time() 167 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 168 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 169 | batch_item_neg = item_random_neg[i * self.batch_size:(i + 1) * self.batch_size] 170 | 171 | _, loss, _, _ = self.sess.run((self.optimizer, self.loss, self.clip_P, self.clip_Q), 172 | feed_dict={self.user_id: batch_user, 173 | self.item_id: batch_item, 174 | self.neg_item_id: batch_item_neg, 175 | self.keep_rate: 0.98}) 176 | 177 | if i % self.display_step == 0: 178 | if self.verbose: 179 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 180 | print("one iteration: %s seconds." % (time.time() - start_time)) 181 | 182 | def test(self): 183 | evaluate(self) 184 | 185 | def execute(self, train_data, test_data): 186 | 187 | self.prepare_data(train_data, test_data) 188 | 189 | init = tf.global_variables_initializer() 190 | self.sess.run(init) 191 | 192 | for epoch in range(self.epochs): 193 | self.train() 194 | if (epoch) % self.T == 0: 195 | print("Epoch: %04d; " % (epoch)) 196 | self.test() 197 | 198 | def save(self, path): 199 | saver = tf.train.Saver() 200 | saver.save(self.sess, path) 201 | 202 | def predict(self, user_id, item_id): 203 | return -self.sess.run([self.pred_distance], 204 | feed_dict={self.user_id: user_id, 205 | self.item_id: item_id, self.keep_rate: 1})[0] 206 | 207 | def _get_neg_items(self, data): 208 | all_items = set(np.arange(self.num_item)) 209 | neg_items = {} 210 | for u in range(self.num_user): 211 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 212 | 213 | return neg_items 214 | -------------------------------------------------------------------------------- /models/item_ranking/lrml.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/lrml.pyc -------------------------------------------------------------------------------- /models/item_ranking/mlp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Collaborative Filtering. 3 | Reference: He, Xiangnan, et al. "Neural collaborative filtering." Proceedings of the 26th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2017. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | import random 10 | 11 | from utils.evaluation.RankingMetrics import * 12 | 13 | __author__ = "Shuai Zhang" 14 | __copyright__ = "Copyright 2018, The DeepRec Project" 15 | 16 | __license__ = "GPL" 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Shuai Zhang" 19 | __email__ = "cheungdaven@gmail.com" 20 | __status__ = "Development" 21 | 22 | 23 | class MLP(): 24 | def __init__(self, sess, num_user, num_item, learning_rate=0.5, reg_rate=0.001, epoch=500, batch_size=256, 25 | verbose=False, T=1, display_step=1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.verbose = verbose 34 | self.T = T 35 | self.display_step = display_step 36 | print("You are running MLP.") 37 | 38 | def build_network(self, num_factor_mlp=10, hidden_dimension=10, num_neg_sample=2): 39 | self.num_neg_sample = num_neg_sample 40 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 41 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 42 | self.y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') 43 | 44 | self.mlp_P = tf.Variable(tf.random_normal([self.num_user, num_factor_mlp]), dtype=tf.float32) 45 | self.mlp_Q = tf.Variable(tf.random_normal([self.num_item, num_factor_mlp]), dtype=tf.float32) 46 | 47 | mlp_user_latent_factor = tf.nn.embedding_lookup(self.mlp_P, self.user_id) 48 | mlp_item_latent_factor = tf.nn.embedding_lookup(self.mlp_Q, self.item_id) 49 | 50 | layer_1 = tf.layers.dense(inputs=tf.concat([mlp_item_latent_factor, mlp_user_latent_factor], axis=1), 51 | units=num_factor_mlp * 2, kernel_initializer=tf.random_normal_initializer, 52 | activation=tf.nn.relu, 53 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 54 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dimension * 2, activation=tf.nn.relu, 55 | kernel_initializer=tf.random_normal_initializer, 56 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 57 | MLP = tf.layers.dense(inputs=layer_2, units=hidden_dimension, activation=tf.nn.relu, 58 | kernel_initializer=tf.random_normal_initializer, 59 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 60 | 61 | self.pred_y = tf.nn.sigmoid(tf.reduce_sum(MLP, axis=1)) 62 | 63 | # self.pred_y = tf.layers.dense(inputs=MLP, units=1, activation=tf.sigmoid) 64 | 65 | self.loss = - tf.reduce_sum( 66 | self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) \ 67 | + tf.losses.get_regularization_loss() + self.reg_rate * ( 68 | tf.nn.l2_loss(self.mlp_P) + tf.nn.l2_loss(self.mlp_Q)) 69 | 70 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 71 | 72 | return self 73 | 74 | def prepare_data(self, train_data, test_data): 75 | ''' 76 | You must prepare the data before train and test the model 77 | :param train_data: 78 | :param test_data: 79 | :return: 80 | ''' 81 | t = train_data.tocoo() 82 | self.user = list(t.row.reshape(-1)) 83 | self.item = list(t.col.reshape(-1)) 84 | self.label = list(t.data) 85 | self.test_data = test_data 86 | 87 | self.neg_items = self._get_neg_items(train_data.tocsr()) 88 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 89 | 90 | print("data preparation finished.") 91 | return self 92 | 93 | def train(self): 94 | 95 | item_temp = self.item[:] 96 | user_temp = self.user[:] 97 | labels_temp = self.label[:] 98 | 99 | user_append = [] 100 | item_append = [] 101 | values_append = [] 102 | for u in self.user: 103 | list_of_random_items = random.sample(self.neg_items[u], self.num_neg_sample) 104 | user_append += [u] * self.num_neg_sample 105 | item_append += list_of_random_items 106 | values_append += [0] * self.num_neg_sample 107 | 108 | item_temp += item_append 109 | user_temp += user_append 110 | labels_temp += values_append 111 | 112 | self.num_training = len(item_temp) 113 | self.total_batch = int(self.num_training / self.batch_size) 114 | print(self.total_batch) 115 | idxs = np.random.permutation(self.num_training) # shuffled ordering 116 | user_random = list(np.array(user_temp)[idxs]) 117 | item_random = list(np.array(item_temp)[idxs]) 118 | labels_random = list(np.array(labels_temp)[idxs]) 119 | 120 | # train 121 | for i in range(self.total_batch): 122 | start_time = time.time() 123 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 124 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 125 | batch_label = labels_random[i * self.batch_size:(i + 1) * self.batch_size] 126 | 127 | _, loss = self.sess.run((self.optimizer, self.loss), 128 | feed_dict={self.user_id: batch_user, self.item_id: batch_item, self.y: batch_label}) 129 | 130 | if i % self.display_step == 0: 131 | if self.verbose: 132 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 133 | print("one iteration: %s seconds." % (time.time() - start_time)) 134 | 135 | def test(self): 136 | evaluate(self) 137 | 138 | def execute(self, train_data, test_data): 139 | 140 | self.prepare_data(train_data, test_data) 141 | 142 | init = tf.global_variables_initializer() 143 | self.sess.run(init) 144 | 145 | for epoch in range(self.epochs): 146 | self.train() 147 | if (epoch) % self.T == 0: 148 | print("Epoch: %04d; " % (epoch)) 149 | self.test() 150 | 151 | def save(self, path): 152 | saver = tf.train.Saver() 153 | saver.save(self.sess, path) 154 | 155 | def predict(self, user_id, item_id): 156 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 157 | 158 | def _get_neg_items(self, data): 159 | all_items = set(np.arange(self.num_item)) 160 | neg_items = {} 161 | for u in range(self.num_user): 162 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 163 | 164 | return neg_items 165 | -------------------------------------------------------------------------------- /models/item_ranking/mlp.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/mlp.pyc -------------------------------------------------------------------------------- /models/item_ranking/neumf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Collaborative Filtering. 3 | Reference: He, Xiangnan, et al. "Neural collaborative filtering." Proceedings of the 26th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2017. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | import random 10 | 11 | from utils.evaluation.RankingMetrics import * 12 | 13 | __author__ = "Shuai Zhang" 14 | __copyright__ = "Copyright 2018, The DeepRec Project" 15 | 16 | __license__ = "GPL" 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Shuai Zhang" 19 | __email__ = "cheungdaven@gmail.com" 20 | __status__ = "Development" 21 | 22 | 23 | class NeuMF(): 24 | def __init__(self, sess, num_user, num_item, learning_rate=0.5, reg_rate=0.01, epoch=500, batch_size=256, 25 | verbose=True, T=1, display_step=1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.verbose = verbose 34 | self.T = T 35 | self.display_step = display_step 36 | print("NeuMF.") 37 | 38 | def build_network(self, num_factor=10, num_factor_mlp=64, hidden_dimension=10, num_neg_sample=30): 39 | self.num_neg_sample = num_neg_sample 40 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 41 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 42 | self.y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') 43 | 44 | self.P = tf.Variable(tf.random_normal([self.num_user, num_factor]), dtype=tf.float32) 45 | self.Q = tf.Variable(tf.random_normal([self.num_item, num_factor]), dtype=tf.float32) 46 | 47 | self.mlp_P = tf.Variable(tf.random_normal([self.num_user, num_factor_mlp]), dtype=tf.float32) 48 | self.mlp_Q = tf.Variable(tf.random_normal([self.num_item, num_factor_mlp]), dtype=tf.float32) 49 | 50 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 51 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 52 | mlp_user_latent_factor = tf.nn.embedding_lookup(self.mlp_P, self.user_id) 53 | mlp_item_latent_factor = tf.nn.embedding_lookup(self.mlp_Q, self.item_id) 54 | 55 | GMF = tf.multiply(user_latent_factor, item_latent_factor) 56 | 57 | layer_1 = tf.layers.dense(inputs=tf.concat([mlp_item_latent_factor, mlp_user_latent_factor], axis=1), 58 | units=num_factor_mlp * 2, kernel_initializer=tf.random_normal_initializer, 59 | activation=tf.nn.relu, 60 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 61 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dimension * 8, activation=tf.nn.relu, 62 | kernel_initializer=tf.random_normal_initializer, 63 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 64 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dimension * 4, activation=tf.nn.relu, 65 | kernel_initializer=tf.random_normal_initializer, 66 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 67 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dimension * 2, activation=tf.nn.relu, 68 | kernel_initializer=tf.random_normal_initializer, 69 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 70 | MLP = tf.layers.dense(inputs=layer_4, units=hidden_dimension, activation=tf.nn.relu, 71 | kernel_initializer=tf.random_normal_initializer, 72 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 73 | 74 | self.pred_y = tf.nn.sigmoid(tf.reduce_sum(tf.concat([GMF, MLP], axis=1), 1)) 75 | 76 | # self.pred_y = tf.layers.dense(inputs=tf.concat([GMF, MLP], axis=1), units=1, activation=tf.sigmoid, kernel_initializer=tf.random_normal_initializer, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 77 | 78 | self.loss = - tf.reduce_sum( 79 | self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) \ 80 | + tf.losses.get_regularization_loss() + self.reg_rate * ( 81 | tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q) + tf.nn.l2_loss(self.mlp_P) + tf.nn.l2_loss(self.mlp_Q)) 82 | 83 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 84 | 85 | return self 86 | 87 | def prepare_data(self, train_data, test_data): 88 | ''' 89 | You must prepare the data before train and test the model 90 | :param train_data: 91 | :param test_data: 92 | :return: 93 | ''' 94 | t = train_data.tocoo() 95 | self.user = list(t.row.reshape(-1)) 96 | self.item = list(t.col.reshape(-1)) 97 | self.label = list(t.data) 98 | self.test_data = test_data 99 | 100 | self.neg_items = self._get_neg_items(train_data.tocsr()) 101 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 102 | 103 | print("data preparation finished.") 104 | return self 105 | 106 | def train(self): 107 | 108 | item_temp = self.item[:] 109 | user_temp = self.user[:] 110 | labels_temp = self.label[:] 111 | 112 | user_append = [] 113 | item_append = [] 114 | values_append = [] 115 | for u in self.user: 116 | list_of_random_items = random.sample(self.neg_items[u], self.num_neg_sample) 117 | user_append += [u] * self.num_neg_sample 118 | item_append += list_of_random_items 119 | values_append += [0] * self.num_neg_sample 120 | 121 | item_temp += item_append 122 | user_temp += user_append 123 | labels_temp += values_append 124 | 125 | self.num_training = len(item_temp) 126 | self.total_batch = int(self.num_training / self.batch_size) 127 | print(self.total_batch) 128 | idxs = np.random.permutation(self.num_training) # shuffled ordering 129 | user_random = list(np.array(user_temp)[idxs]) 130 | item_random = list(np.array(item_temp)[idxs]) 131 | labels_random = list(np.array(labels_temp)[idxs]) 132 | 133 | # train 134 | for i in range(self.total_batch): 135 | start_time = time.time() 136 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 137 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 138 | batch_label = labels_random[i * self.batch_size:(i + 1) * self.batch_size] 139 | 140 | _, loss = self.sess.run((self.optimizer, self.loss), 141 | feed_dict={self.user_id: batch_user, self.item_id: batch_item, self.y: batch_label}) 142 | 143 | if i % self.display_step == 0: 144 | if self.verbose: 145 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 146 | print("one iteration: %s seconds." % (time.time() - start_time)) 147 | 148 | def test(self): 149 | evaluate(self) 150 | 151 | def execute(self, train_data, test_data): 152 | 153 | self.prepare_data(train_data, test_data) 154 | 155 | init = tf.global_variables_initializer() 156 | self.sess.run(init) 157 | self.test() 158 | for epoch in range(self.epochs): 159 | self.train() 160 | if (epoch) % self.T == 0: 161 | print("Epoch: %04d; " % (epoch)) 162 | self.test() 163 | 164 | def save(self, path): 165 | saver = tf.train.Saver() 166 | saver.save(self.sess, path) 167 | 168 | def predict(self, user_id, item_id): 169 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 170 | 171 | def _get_neg_items(self, data): 172 | all_items = set(np.arange(self.num_item)) 173 | neg_items = {} 174 | for u in range(self.num_user): 175 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 176 | 177 | return neg_items 178 | -------------------------------------------------------------------------------- /models/item_ranking/neumf.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/neumf.pyc -------------------------------------------------------------------------------- /models/item_ranking/neumf_my.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Collaborative Filtering. 3 | Reference: He, Xiangnan, et al. "Neural collaborative filtering." Proceedings of the 26th International Conference on World Wide Web. International World Wide Web Conferences Steering Committee, 2017. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | import random 10 | 11 | from utils.evaluation.RankingMetrics import * 12 | 13 | __author__ = "Shuai Zhang" 14 | __copyright__ = "Copyright 2018, The DeepRec Project" 15 | 16 | __license__ = "GPL" 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Shuai Zhang" 19 | __email__ = "cheungdaven@gmail.com" 20 | __status__ = "Development" 21 | 22 | 23 | class NeuMF_my(): 24 | def __init__(self, sess, num_user, num_item, learning_rate=0.5, reg_rate=0.01, epoch=500, batch_size=256, 25 | verbose=True, T=1, display_step=1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.verbose = verbose 34 | self.T = T 35 | self.display_step = display_step 36 | print("NeuMF.") 37 | 38 | def build_network(self, num_factor=10, num_factor_mlp=64, hidden_dimension=10, num_neg_sample=30): 39 | self.num_neg_sample = num_neg_sample 40 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 41 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 42 | self.y = tf.placeholder(dtype=tf.float32, shape=[None], name='y') 43 | 44 | self.P = tf.Variable(tf.random_normal([self.num_user, num_factor]), dtype=tf.float32) 45 | self.Q = tf.Variable(tf.random_normal([self.num_item, num_factor]), dtype=tf.float32) 46 | 47 | self.mlp_P = tf.Variable(tf.random_normal([self.num_user, num_factor_mlp]), dtype=tf.float32) 48 | self.mlp_Q = tf.Variable(tf.random_normal([self.num_item, num_factor_mlp]), dtype=tf.float32) 49 | 50 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 51 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 52 | mlp_user_latent_factor = tf.nn.embedding_lookup(self.mlp_P, self.user_id) 53 | mlp_item_latent_factor = tf.nn.embedding_lookup(self.mlp_Q, self.item_id) 54 | 55 | GMF = tf.multiply(user_latent_factor, item_latent_factor) 56 | 57 | layer_1 = tf.layers.dense(inputs=tf.concat([mlp_item_latent_factor, mlp_user_latent_factor], axis=1), 58 | units=num_factor_mlp * 2, kernel_initializer=tf.random_normal_initializer, 59 | activation=tf.nn.relu, 60 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 61 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dimension * 8, activation=tf.nn.relu, 62 | kernel_initializer=tf.random_normal_initializer, 63 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 64 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dimension * 4, activation=tf.nn.relu, 65 | kernel_initializer=tf.random_normal_initializer, 66 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 67 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dimension * 2, activation=tf.nn.relu, 68 | kernel_initializer=tf.random_normal_initializer, 69 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 70 | MLP = tf.layers.dense(inputs=layer_4, units=hidden_dimension, activation=tf.nn.relu, 71 | kernel_initializer=tf.random_normal_initializer, 72 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 73 | 74 | self.pred_y = tf.nn.sigmoid(tf.reduce_sum(tf.concat([GMF, MLP], axis=1), 1)) 75 | 76 | # self.pred_y = tf.layers.dense(inputs=tf.concat([GMF, MLP], axis=1), units=1, activation=tf.sigmoid, kernel_initializer=tf.random_normal_initializer, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 77 | 78 | self.loss = - tf.reduce_sum( 79 | self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) \ 80 | + tf.losses.get_regularization_loss() 81 | # self.loss = - tf.reduce_sum( 82 | # self.y * tf.log(self.pred_y + 1e-10) + (1 - self.y) * tf.log(1 - self.pred_y + 1e-10)) \ 83 | # + tf.losses.get_regularization_loss() + self.reg_rate * ( 84 | # tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q) + tf.nn.l2_loss(self.mlp_P) + tf.nn.l2_loss( 85 | # self.mlp_Q)) 86 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 87 | 88 | return self 89 | 90 | def prepare_data(self, train_data, test_data): 91 | ''' 92 | You must prepare the data before train and test the model 93 | :param train_data: 94 | :param test_data: 95 | :return: 96 | ''' 97 | t = train_data.tocoo() 98 | self.user = list(t.row.reshape(-1)) 99 | self.item = list(t.col.reshape(-1)) 100 | self.label = list(t.data) 101 | self.test_data = test_data 102 | 103 | self.neg_items = self._get_neg_items(train_data.tocsr()) 104 | self.test_users = set([u for u in self.test_data.keys() if len(self.test_data[u]) > 0]) 105 | 106 | print("data preparation finished.") 107 | return self 108 | 109 | def train(self): 110 | 111 | item_temp = self.item[:] 112 | user_temp = self.user[:] 113 | labels_temp = self.label[:] 114 | 115 | user_append = [] 116 | item_append = [] 117 | values_append = [] 118 | for u in self.user: 119 | list_of_random_items = random.sample(self.neg_items[u], self.num_neg_sample) 120 | user_append += [u] * self.num_neg_sample 121 | item_append += list_of_random_items 122 | values_append += [0] * self.num_neg_sample 123 | 124 | item_temp += item_append 125 | user_temp += user_append 126 | labels_temp += values_append 127 | 128 | self.num_training = len(item_temp) 129 | self.total_batch = int(self.num_training / self.batch_size) 130 | print(self.total_batch) 131 | idxs = np.random.permutation(self.num_training) # shuffled ordering 132 | user_random = list(np.array(user_temp)[idxs]) 133 | item_random = list(np.array(item_temp)[idxs]) 134 | labels_random = list(np.array(labels_temp)[idxs]) 135 | 136 | # train 137 | for i in range(self.total_batch): 138 | start_time = time.time() 139 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 140 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 141 | batch_label = labels_random[i * self.batch_size:(i + 1) * self.batch_size] 142 | 143 | _, loss = self.sess.run((self.optimizer, self.loss), 144 | feed_dict={self.user_id: batch_user, self.item_id: batch_item, self.y: batch_label}) 145 | 146 | if i % self.display_step == 0: 147 | if self.verbose: 148 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 149 | print("one iteration: %s seconds." % (time.time() - start_time)) 150 | 151 | def test(self): 152 | evaluate(self) 153 | 154 | def execute(self, train_data, test_data): 155 | 156 | self.prepare_data(train_data, test_data) 157 | 158 | init = tf.global_variables_initializer() 159 | self.sess.run(init) 160 | self.test() 161 | for epoch in range(self.epochs): 162 | self.train() 163 | if (epoch) % self.T == 0: 164 | print("Epoch: %04d; " % (epoch)) 165 | self.test() 166 | 167 | def save(self, path): 168 | saver = tf.train.Saver() 169 | saver.save(self.sess, path) 170 | 171 | def predict(self, user_id, item_id): 172 | return self.sess.run([self.pred_y], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 173 | 174 | def _get_neg_items(self, data): 175 | all_items = set(np.arange(self.num_item)) 176 | neg_items = {} 177 | for u in range(self.num_user): 178 | neg_items[u] = list(all_items - set(data.getrow(u).nonzero()[1])) 179 | 180 | return neg_items 181 | -------------------------------------------------------------------------------- /models/item_ranking/neurec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/neurec.py -------------------------------------------------------------------------------- /models/item_ranking/widedeep.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/item_ranking/widedeep.py -------------------------------------------------------------------------------- /models/rating_prediction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/rating_prediction/__init__.py -------------------------------------------------------------------------------- /models/rating_prediction/afm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Attention Factorization Machine. 3 | Reference: Xiao, Jun, et al. "Attentional factorization machines: Learning the weight of feature interactions via attention networks." arXiv preprint arXiv:1708.04617 (2017). 4 | Orginal Implementation: 5 | """ 6 | 7 | import tensorflow as tf 8 | import time 9 | from sklearn.metrics import mean_squared_error 10 | import math 11 | 12 | from utils.evaluation.RatingMetrics import * 13 | 14 | __author__ = "Shuai Zhang" 15 | __copyright__ = "Copyright 2018, The DeepRec Project" 16 | 17 | __license__ = "GPL" 18 | __version__ = "1.0.0" 19 | __maintainer__ = "Shuai Zhang" 20 | __email__ = "cheungdaven@gmail.com" 21 | __status__ = "Development" 22 | 23 | class AFM(): 24 | 25 | def __init__(self, sess, num_user, num_item, learning_rate = 0.01, reg_rate = 0.1, epoch = 500, batch_size = 4096, show_time = False, T =2, display_step= 1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.show_time = show_time 34 | self.T = T 35 | self.display_step = display_step 36 | print("Attention Factorization Machine.") 37 | 38 | 39 | def build_network(self, feature_M, valid_dim = 3, num_factor = 16, num_hidden = 64): 40 | 41 | 42 | # model dependent arguments 43 | self.train_features = tf.placeholder(tf.int32, shape=[None, None]) 44 | self.y = tf.placeholder(tf.float32, shape=[None, 1]) 45 | self.dropout_keep = tf.placeholder(tf.float32) 46 | 47 | self.feature_embeddings = tf.Variable(tf.random_normal([feature_M, num_factor], mean=0.0, stddev=0.01)) 48 | 49 | self.feature_bias = tf.Variable(tf.random_uniform([feature_M, 1], 0.0, 0.0)) 50 | self.bias = tf.Variable(tf.constant(0.0)) 51 | self.pred_weight = tf.Variable(np.ones((num_factor, 1), dtype=np.float32)) 52 | glorot = np.sqrt(2.0 / (num_factor + num_factor)) 53 | self.attention_W = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(num_factor, num_factor)),dtype=np.float32, name="attention_W") # K * AK 54 | self.attention_b = tf.Variable(np.random.normal(loc=0, scale=glorot, size=(1, num_factor)), dtype=np.float32,name="attention_b") # 1 * AK 55 | self.attention_p = tf.Variable(np.random.normal(loc=0, scale=1, size=(num_factor)), dtype=np.float32, name="attention_p") 56 | 57 | nonzero_embeddings = tf.nn.embedding_lookup(self.feature_embeddings, self.train_features) 58 | 59 | element_wise_product_list = [] 60 | count = 0 61 | 62 | for i in range(0, valid_dim): 63 | for j in range(i+1, valid_dim): 64 | element_wise_product_list.append(tf.multiply(nonzero_embeddings[:, i, :], nonzero_embeddings[:, j, :])) 65 | count += 1 66 | 67 | self.element_wise_product = tf.stack(element_wise_product_list) 68 | self.element_wise_product = tf.transpose(self.element_wise_product, perm=[1,0,2]) 69 | self.interactions = tf.reduce_sum(self.element_wise_product, 2) 70 | 71 | num_interactions = int(valid_dim * (valid_dim - 1 ) / 2) 72 | 73 | self.attention_mul = tf.reshape(tf.matmul(tf.reshape(self.element_wise_product, shape=[-1, num_factor]),self.attention_W), shape=[-1, num_interactions,num_factor]) 74 | self.attention_relu = tf.reduce_sum(tf.multiply(self.attention_p, tf.nn.relu(self.attention_mul + self.attention_b)), 2, keep_dims=True) 75 | self.attention_out = tf.nn.softmax(self.attention_relu) 76 | self.attention_out = tf.nn.dropout(self.attention_out, self.dropout_keep) 77 | 78 | self.AFM = tf.reduce_sum(tf.multiply(self.attention_out, self.element_wise_product), 1) 79 | self.AFM_FM = tf.reduce_sum(self.element_wise_product, 1) / num_interactions 80 | self.AFM = tf.nn.dropout(self.AFM, self.dropout_keep) 81 | 82 | # 83 | # self.summed_features_embedding = tf.reduce_sum(nonzero_embeddings, 1) 84 | # self.squared_summed_features_embedding = tf.square(self.summed_features_embedding) 85 | # self.squared_features_embedding = tf.square(nonzero_embeddings) 86 | # self.summed_squared_features_embedding = tf.reduce_sum(self.squared_features_embedding, 1) 87 | # 88 | # self.FM = 0.5 * tf.subtract(self.summed_squared_features_embedding, self.squared_summed_features_embedding) 89 | # # if batch_norm: 90 | # # self.FM = self 91 | # layer_1 = tf.layers.dense(inputs=self.FM, units=num_hidden, 92 | # bias_initializer=tf.random_normal_initializer, 93 | # kernel_initializer=tf.random_normal_initializer, activation=tf.nn.relu, 94 | # kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 95 | 96 | self.prediction = tf.matmul( self.AFM, self.pred_weight) 97 | 98 | 99 | 100 | bilinear = tf.reduce_sum(self.prediction, 1, keep_dims=True) 101 | self.f_b = tf.reduce_sum(tf.nn.embedding_lookup(self.feature_bias, self.train_features), 1) 102 | b = self.bias * tf.ones_like(self.y) 103 | self.pred_rating = tf.add_n([bilinear, self.f_b, b]) 104 | 105 | self.loss = tf.nn.l2_loss(tf.subtract(self.y, self.pred_rating)) \ 106 | + tf.contrib.layers.l2_regularizer(self.reg_rate)(self.feature_embeddings) 107 | 108 | 109 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 110 | 111 | def prepare_data(self, train_data, test_data): 112 | 113 | 114 | print("data preparation finished.") 115 | return self 116 | 117 | 118 | def train(self, train_data): 119 | self.num_training = len(train_data['Y']) 120 | total_batch = int( self.num_training/ self.batch_size) 121 | 122 | rng_state = np.random.get_state() 123 | np.random.shuffle(train_data['Y']) 124 | np.random.set_state(rng_state) 125 | np.random.shuffle(train_data['X']) 126 | # train 127 | for i in range(total_batch): 128 | start_time = time.time() 129 | batch_y = train_data['Y'][i * self.batch_size:(i + 1) * self.batch_size] 130 | batch_x = train_data['X'][i * self.batch_size:(i + 1) * self.batch_size] 131 | 132 | loss, opt = self.sess.run((self.loss, self.optimizer), feed_dict={self.train_features: batch_x, 133 | self.y: batch_y, 134 | self.dropout_keep:0.5}) 135 | if i % self.display_step == 0: 136 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 137 | if self.show_time: 138 | print("one iteration: %s seconds." % (time.time() - start_time)) 139 | 140 | def test(self, test_data): 141 | # error = 0 142 | # error_mae = 0 143 | # test_set = list(test_data.keys()) 144 | # for (u, i) in test_set: 145 | # pred_rating_test = self.predict([u], [i]) 146 | # error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 147 | # error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 148 | num_example = len(test_data['Y']) 149 | feed_dict = {self.train_features: test_data['X'], self.y: test_data['Y'],self.dropout_keep: 1.0} 150 | predictions = self.sess.run((self.pred_rating), feed_dict=feed_dict) 151 | y_pred = np.reshape(predictions, (num_example,)) 152 | y_true = np.reshape(test_data['Y'], (num_example,)) 153 | predictions_bounded = np.maximum(y_pred, np.ones(num_example) * min(y_true)) # bound the lower values 154 | predictions_bounded = np.minimum(predictions_bounded, 155 | np.ones(num_example) * max(y_true)) # bound the higher values 156 | RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded)) 157 | 158 | print("RMSE:" + str(RMSE)) 159 | 160 | def execute(self, train_data, test_data): 161 | 162 | init = tf.global_variables_initializer() 163 | self.sess.run(init) 164 | 165 | for epoch in range(self.epochs): 166 | print("Epoch: %04d;" % (epoch)) 167 | self.train(train_data) 168 | if (epoch) % self.T == 0: 169 | self.test(test_data) 170 | 171 | def save(self, path): 172 | saver = tf.train.Saver() 173 | saver.save(self.sess, path) 174 | 175 | def predict(self, user_id, item_id): 176 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 177 | 178 | -------------------------------------------------------------------------------- /models/rating_prediction/autorec.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Item based AutoRec and user based AutoRec. 3 | Reference: Sedhain, Suvash, et al. "Autorec: Autoencoders meet collaborative filtering." Proceedings of the 24th International Conference on World Wide Web. ACM, 2015. 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | import scipy 10 | 11 | from utils.evaluation.RatingMetrics import * 12 | 13 | __author__ = "Shuai Zhang" 14 | __copyright__ = "Copyright 2018, The DeepRec Project" 15 | 16 | __license__ = "GPL" 17 | __version__ = "1.0.0" 18 | __maintainer__ = "Shuai Zhang" 19 | __email__ = "cheungdaven@gmail.com" 20 | __status__ = "Development" 21 | 22 | 23 | class IAutoRec(): 24 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.1, epoch=500, batch_size=500, 25 | verbose=False, T=3, display_step=1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.verbose = verbose 34 | self.T = T 35 | self.display_step = display_step 36 | print("IAutoRec.") 37 | 38 | def build_network(self, hidden_neuron=500): 39 | 40 | self.rating_matrix = tf.placeholder(dtype=tf.float32, shape=[self.num_user, None]) 41 | self.rating_matrix_mask = tf.placeholder(dtype=tf.float32, shape=[self.num_user, None]) 42 | self.keep_rate_net = tf.placeholder(tf.float32) 43 | self.keep_rate_input = tf.placeholder(tf.float32) 44 | 45 | V = tf.Variable(tf.random_normal([hidden_neuron, self.num_user], stddev=0.01)) 46 | W = tf.Variable(tf.random_normal([self.num_user, hidden_neuron], stddev=0.01)) 47 | 48 | mu = tf.Variable(tf.random_normal([hidden_neuron], stddev=0.01)) 49 | b = tf.Variable(tf.random_normal([self.num_user], stddev=0.01)) 50 | layer_1 = tf.nn.dropout(tf.sigmoid(tf.expand_dims(mu, 1) + tf.matmul(V, self.rating_matrix)), 51 | self.keep_rate_net) 52 | self.layer_2 = tf.matmul(W, layer_1) + tf.expand_dims(b, 1) 53 | self.loss = tf.reduce_mean(tf.square( 54 | tf.norm(tf.multiply((self.rating_matrix - self.layer_2), self.rating_matrix_mask)))) + self.reg_rate * ( 55 | tf.square(tf.norm(W)) + tf.square(tf.norm(V))) 56 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 57 | 58 | def train(self, train_data): 59 | self.num_training = self.num_item 60 | total_batch = int(self.num_training / self.batch_size) 61 | idxs = np.random.permutation(self.num_training) # shuffled ordering 62 | 63 | for i in range(total_batch): 64 | start_time = time.time() 65 | if i == total_batch - 1: 66 | batch_set_idx = idxs[i * self.batch_size:] 67 | elif i < total_batch - 1: 68 | batch_set_idx = idxs[i * self.batch_size: (i + 1) * self.batch_size] 69 | 70 | _, loss = self.sess.run([self.optimizer, self.loss], 71 | feed_dict={self.rating_matrix: self.train_data[:, batch_set_idx], 72 | self.rating_matrix_mask: self.train_data_mask[:, batch_set_idx], 73 | self.keep_rate_net: 0.95 74 | }) 75 | if i % self.display_step == 0: 76 | if self.verbose: 77 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 78 | print("one iteration: %s seconds." % (time.time() - start_time)) 79 | 80 | def test(self, test_data): 81 | self.reconstruction = self.sess.run(self.layer_2, feed_dict={self.rating_matrix: self.train_data, 82 | self.rating_matrix_mask: self.train_data_mask, 83 | self.keep_rate_net: 1}) 84 | error = 0 85 | error_mae = 0 86 | test_set = list(test_data.keys()) 87 | for (u, i) in test_set: 88 | pred_rating_test = self.predict(u, i) 89 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 90 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 91 | print("RMSE:" + str(RMSE(error, len(test_set))) + "; MAE:" + str(MAE(error_mae, len(test_set)))) 92 | 93 | def execute(self, train_data, test_data): 94 | self.train_data = self._data_process(train_data) 95 | self.train_data_mask = scipy.sign(self.train_data) 96 | init = tf.global_variables_initializer() 97 | self.sess.run(init) 98 | for epoch in range(self.epochs): 99 | if self.verbose: 100 | print("Epoch: %04d;" % (epoch)) 101 | self.train(train_data) 102 | if (epoch) % self.T == 0: 103 | print("Epoch: %04d; " % (epoch), end='') 104 | self.test(test_data) 105 | 106 | def save(self, path): 107 | saver = tf.train.Saver() 108 | saver.save(self.sess, path) 109 | 110 | def predict(self, user_id, item_id): 111 | return self.reconstruction[user_id, item_id] 112 | 113 | def _data_process(self, data): 114 | output = np.zeros((self.num_user, self.num_item)) 115 | for u in range(self.num_user): 116 | for i in range(self.num_item): 117 | output[u, i] = data.get((u, i)) 118 | return output 119 | 120 | 121 | class UAutoRec(): 122 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.1, epoch=500, batch_size=200, 123 | verbose=False, T=3, display_step=1000): 124 | self.learning_rate = learning_rate 125 | self.epochs = epoch 126 | self.batch_size = batch_size 127 | self.reg_rate = reg_rate 128 | self.sess = sess 129 | self.num_user = num_user 130 | self.num_item = num_item 131 | self.verbose = verbose 132 | self.T = T 133 | self.display_step = display_step 134 | print("UAutoRec.") 135 | 136 | def build_network(self, hidden_neuron=500): 137 | 138 | self.rating_matrix = tf.placeholder(dtype=tf.float32, shape=[self.num_item, None]) 139 | self.rating_matrix_mask = tf.placeholder(dtype=tf.float32, shape=[self.num_item, None]) 140 | 141 | V = tf.Variable(tf.random_normal([hidden_neuron, self.num_item], stddev=0.01)) 142 | W = tf.Variable(tf.random_normal([self.num_item, hidden_neuron], stddev=0.01)) 143 | 144 | mu = tf.Variable(tf.random_normal([hidden_neuron], stddev=0.01)) 145 | b = tf.Variable(tf.random_normal([self.num_item], stddev=0.01)) 146 | layer_1 = tf.sigmoid(tf.expand_dims(mu, 1) + tf.matmul(V, self.rating_matrix)) 147 | self.layer_2 = tf.matmul(W, layer_1) + tf.expand_dims(b, 1) 148 | self.loss = tf.reduce_mean(tf.square( 149 | tf.norm(tf.multiply((self.rating_matrix - self.layer_2), self.rating_matrix_mask)))) + self.reg_rate * ( 150 | tf.square(tf.norm(W)) + tf.square(tf.norm(V))) 151 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 152 | 153 | def train(self, train_data): 154 | self.num_training = self.num_user 155 | total_batch = int(self.num_training / self.batch_size) 156 | idxs = np.random.permutation(self.num_training) # shuffled ordering 157 | 158 | for i in range(total_batch): 159 | start_time = time.time() 160 | if i == total_batch - 1: 161 | batch_set_idx = idxs[i * self.batch_size:] 162 | elif i < total_batch - 1: 163 | batch_set_idx = idxs[i * self.batch_size: (i + 1) * self.batch_size] 164 | 165 | _, loss = self.sess.run([self.optimizer, self.loss], 166 | feed_dict={self.rating_matrix: self.train_data[:, batch_set_idx], 167 | self.rating_matrix_mask: self.train_data_mask[:, batch_set_idx] 168 | }) 169 | if self.verbose and i % self.display_step == 0: 170 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 171 | if self.verbose: 172 | print("one iteration: %s seconds." % (time.time() - start_time)) 173 | 174 | def test(self, test_data): 175 | self.reconstruction = self.sess.run(self.layer_2, feed_dict={self.rating_matrix: self.train_data, 176 | self.rating_matrix_mask: 177 | self.train_data_mask}) 178 | error = 0 179 | error_mae = 0 180 | test_set = list(test_data.keys()) 181 | for (u, i) in test_set: 182 | pred_rating_test = self.predict(u, i) 183 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 184 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 185 | print("RMSE:" + str(RMSE(error, len(test_set))) + "; MAE:" + str(MAE(error_mae, len(test_set)))) 186 | 187 | def execute(self, train_data, test_data): 188 | self.train_data = self._data_process(train_data.transpose()) 189 | self.train_data_mask = scipy.sign(self.train_data) 190 | init = tf.global_variables_initializer() 191 | self.sess.run(init) 192 | for epoch in range(self.epochs): 193 | self.train(train_data) 194 | if (epoch) % self.T == 0: 195 | print("Epoch: %04d; " % (epoch), end='') 196 | self.test(test_data) 197 | 198 | def save(self, path): 199 | saver = tf.train.Saver() 200 | saver.save(self.sess, path) 201 | 202 | def predict(self, user_id, item_id): 203 | return self.reconstruction[item_id, user_id] 204 | 205 | def _data_process(self, data): 206 | output = np.zeros((self.num_item, self.num_user)) 207 | for u in range(self.num_user): 208 | for i in range(self.num_item): 209 | output[i, u] = data.get((i, u)) 210 | return output 211 | -------------------------------------------------------------------------------- /models/rating_prediction/fm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Matrix Factorization with tensorflow. 3 | Reference: Koren, Yehuda, Robert Bell, and Chris Volinsky. "Matrix factorization techniques for recommender systems." Computer 42.8 (2009). 4 | Orginal Implementation: 5 | """ 6 | 7 | import tensorflow as tf 8 | import time 9 | from sklearn.metrics import mean_squared_error 10 | import math 11 | 12 | from utils.evaluation.RatingMetrics import * 13 | 14 | __author__ = "Shuai Zhang" 15 | __copyright__ = "Copyright 2018, The DeepRec Project" 16 | 17 | __license__ = "GPL" 18 | __version__ = "1.0.0" 19 | __maintainer__ = "Shuai Zhang" 20 | __email__ = "cheungdaven@gmail.com" 21 | __status__ = "Development" 22 | 23 | class FM(): 24 | 25 | def __init__(self, sess, num_user, num_item, learning_rate = 0.05, reg_rate = 0.01, epoch = 500, batch_size = 128, show_time = False, T =2, display_step= 1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.show_time = show_time 34 | self.T = T 35 | self.display_step = display_step 36 | print("FM.") 37 | 38 | 39 | def build_network(self, feature_M, num_factor = 64): 40 | 41 | 42 | # model dependent arguments 43 | self.train_features = tf.placeholder(tf.int32, shape=[None, None]) 44 | self.y = tf.placeholder(tf.float32, shape=[None, 1]) 45 | self.dropout_keep = tf.placeholder(tf.float32) 46 | 47 | self.feature_embeddings = tf.Variable(tf.random_normal([feature_M, num_factor], mean=0.0, stddev=0.01)) 48 | 49 | self.feature_bias = tf.Variable(tf.random_uniform([feature_M, 1], 0.0, 0.0)) 50 | self.bias = tf.Variable(tf.constant(0.0)) 51 | 52 | 53 | nonzero_embeddings = tf.nn.embedding_lookup(self.feature_embeddings, self.train_features) 54 | 55 | self.summed_features_embedding = tf.reduce_sum(nonzero_embeddings, 1) 56 | self.squared_summed_features_embedding = tf.square(self.summed_features_embedding) 57 | self.squared_features_embedding = tf.square(nonzero_embeddings) 58 | self.summed_squared_features_embedding = tf.reduce_sum(self.squared_features_embedding, 1) 59 | 60 | self.FM = 0.5 * tf.subtract(self.summed_squared_features_embedding, self.squared_summed_features_embedding) 61 | # if batch_norm: 62 | # self.FM = self 63 | self.FM = tf.nn.dropout(self.FM, self.dropout_keep) 64 | 65 | bilinear = tf.reduce_sum(self.FM, 1, keep_dims=True) 66 | self.f_b = tf.reduce_sum(tf.nn.embedding_lookup(self.feature_bias, self.train_features), 1) 67 | b = self.bias * tf.ones_like(self.y) 68 | self.pred_rating = tf.add_n([bilinear, self.f_b, b]) 69 | 70 | self.loss = tf.nn.l2_loss(tf.subtract(self.y, self.pred_rating)) \ 71 | + tf.contrib.layers.l2_regularizer(self.reg_rate)(self.feature_embeddings) 72 | 73 | 74 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 75 | 76 | def prepare_data(self, train_data, test_data): 77 | 78 | 79 | print("data preparation finished.") 80 | return self 81 | 82 | 83 | def train(self, train_data): 84 | self.num_training = len(train_data['Y']) 85 | total_batch = int( self.num_training/ self.batch_size) 86 | 87 | rng_state = np.random.get_state() 88 | np.random.shuffle(train_data['Y']) 89 | np.random.set_state(rng_state) 90 | np.random.shuffle(train_data['X']) 91 | # train 92 | for i in range(total_batch): 93 | start_time = time.time() 94 | batch_y = train_data['Y'][i * self.batch_size:(i + 1) * self.batch_size] 95 | batch_x = train_data['X'][i * self.batch_size:(i + 1) * self.batch_size] 96 | print(batch_x) 97 | loss, opt = self.sess.run((self.loss, self.optimizer), feed_dict={self.train_features: batch_x, 98 | self.y: batch_y, 99 | self.dropout_keep:0.5}) 100 | if i % self.display_step == 0: 101 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 102 | if self.show_time: 103 | print("one iteration: %s seconds." % (time.time() - start_time)) 104 | 105 | def test(self, test_data): 106 | num_example = len(test_data['Y']) 107 | feed_dict = {self.train_features: test_data['X'], self.y: test_data['Y'],self.dropout_keep: 1.0} 108 | predictions = self.sess.run((self.pred_rating), feed_dict=feed_dict) 109 | y_pred = np.reshape(predictions, (num_example,)) 110 | y_true = np.reshape(test_data['Y'], (num_example,)) 111 | predictions_bounded = np.maximum(y_pred, np.ones(num_example) * min(y_true)) # bound the lower values 112 | predictions_bounded = np.minimum(predictions_bounded, 113 | np.ones(num_example) * max(y_true)) # bound the higher values 114 | RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded)) 115 | 116 | print("RMSE:" + str(RMSE)) 117 | 118 | def execute(self, train_data, test_data): 119 | 120 | init = tf.global_variables_initializer() 121 | self.sess.run(init) 122 | 123 | for epoch in range(self.epochs): 124 | print("Epoch: %04d;" % (epoch)) 125 | self.train(train_data) 126 | if (epoch) % self.T == 0: 127 | self.test(test_data) 128 | 129 | def save(self, path): 130 | saver = tf.train.Saver() 131 | saver.save(self.sess, path) 132 | 133 | def predict(self, user_id, item_id): 134 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 135 | 136 | -------------------------------------------------------------------------------- /models/rating_prediction/fml.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/rating_prediction/fml.py -------------------------------------------------------------------------------- /models/rating_prediction/mf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Matrix Factorization with tensorflow. 3 | Reference: Koren, Yehuda, Robert Bell, and Chris Volinsky. "Matrix factorization techniques for recommender systems." Computer 42.8 (2009). 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RatingMetrics import * 11 | 12 | __author__ = "Shuai Zhang" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Shuai Zhang" 18 | __email__ = "cheungdaven@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class MF(): 23 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.01, epoch=500, batch_size=128, 24 | show_time=False, T=2, display_step=1000): 25 | self.learning_rate = learning_rate 26 | self.epochs = epoch 27 | self.batch_size = batch_size 28 | self.reg_rate = reg_rate 29 | self.sess = sess 30 | self.num_user = num_user 31 | self.num_item = num_item 32 | self.show_time = show_time 33 | self.T = T 34 | self.display_step = display_step 35 | print("MF.") 36 | 37 | def build_network(self, num_factor=30): 38 | 39 | # model dependent arguments 40 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 41 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 42 | self.y = tf.placeholder("float", [None], 'rating') 43 | 44 | self.P = tf.Variable(tf.random_normal([self.num_user, num_factor], stddev=0.01)) 45 | self.Q = tf.Variable(tf.random_normal([self.num_item, num_factor], stddev=0.01)) 46 | 47 | self.B_U = tf.Variable(tf.random_normal([self.num_user], stddev=0.01)) 48 | self.B_I = tf.Variable(tf.random_normal([self.num_item], stddev=0.01)) 49 | 50 | user_latent_factor = tf.nn.embedding_lookup(self.P, self.user_id) 51 | item_latent_factor = tf.nn.embedding_lookup(self.Q, self.item_id) 52 | user_bias = tf.nn.embedding_lookup(self.B_U, self.user_id) 53 | item_bias = tf.nn.embedding_lookup(self.B_I, self.item_id) 54 | 55 | self.pred_rating = tf.reduce_sum(tf.multiply(user_latent_factor, item_latent_factor), 1) + user_bias + item_bias 56 | 57 | def train(self, train_data): 58 | self.num_training = len(self.rating) 59 | total_batch = int(self.num_training / self.batch_size) 60 | idxs = np.random.permutation(self.num_training) # shuffled ordering 61 | user_random = list(self.user[idxs]) 62 | item_random = list(self.item[idxs]) 63 | rating_random = list(self.rating[idxs]) 64 | # train 65 | for i in range(total_batch): 66 | start_time = time.time() 67 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 68 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 69 | batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size] 70 | 71 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.user_id: batch_user, 72 | self.item_id: batch_item, 73 | self.y: batch_rating 74 | }) 75 | if i % self.display_step == 0: 76 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 77 | if self.show_time: 78 | print("one iteration: %s seconds." % (time.time() - start_time)) 79 | 80 | def test(self, test_data): 81 | error = 0 82 | error_mae = 0 83 | test_set = list(test_data.keys()) 84 | for (u, i) in test_set: 85 | pred_rating_test = self.predict([u], [i]) 86 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 87 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 88 | print("RMSE:" + str(RMSE(error, len(test_set))[0]) + "; MAE:" + str(MAE(error_mae, len(test_set))[0])) 89 | 90 | def execute(self, train_data, test_data): 91 | 92 | t = train_data.tocoo() 93 | self.user = t.row.reshape(-1) 94 | self.item = t.col.reshape(-1) 95 | self.rating = t.data 96 | self.pred_rating += np.mean(list(self.rating)) 97 | self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \ 98 | + self.reg_rate * ( 99 | tf.nn.l2_loss(self.B_I) + tf.nn.l2_loss(self.B_U) + tf.nn.l2_loss(self.P) + tf.nn.l2_loss(self.Q)) 100 | # tf.norm(self.B_I) + tf.norm(self.B_U) + tf.norm(self.P) + tf.norm(self.Q)) 101 | # tf.reduce_sum(tf.square(P)) 102 | # tf.reduce_sum(tf.multiply(P,P)) 103 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 104 | init = tf.global_variables_initializer() 105 | self.sess.run(init) 106 | 107 | for epoch in range(self.epochs): 108 | print("Epoch: %04d;" % (epoch)) 109 | self.train(train_data) 110 | if (epoch) % self.T == 0: 111 | self.test(test_data) 112 | 113 | def save(self, path): 114 | saver = tf.train.Saver() 115 | saver.save(self.sess, path) 116 | 117 | def predict(self, user_id, item_id): 118 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 119 | -------------------------------------------------------------------------------- /models/rating_prediction/nfm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Matrix Factorization with tensorflow. 3 | Reference: Koren, Yehuda, Robert Bell, and Chris Volinsky. "Matrix factorization techniques for recommender systems." Computer 42.8 (2009). 4 | Orginal Implementation: 5 | """ 6 | 7 | import tensorflow as tf 8 | import time 9 | from sklearn.metrics import mean_squared_error 10 | import math 11 | 12 | from utils.evaluation.RatingMetrics import * 13 | 14 | __author__ = "Shuai Zhang" 15 | __copyright__ = "Copyright 2018, The DeepRec Project" 16 | 17 | __license__ = "GPL" 18 | __version__ = "1.0.0" 19 | __maintainer__ = "Shuai Zhang" 20 | __email__ = "cheungdaven@gmail.com" 21 | __status__ = "Development" 22 | 23 | class NFM(): 24 | 25 | def __init__(self, sess, num_user, num_item, learning_rate = 0.05, reg_rate = 0.01, epoch = 500, batch_size = 128, show_time = False, T =2, display_step= 1000): 26 | self.learning_rate = learning_rate 27 | self.epochs = epoch 28 | self.batch_size = batch_size 29 | self.reg_rate = reg_rate 30 | self.sess = sess 31 | self.num_user = num_user 32 | self.num_item = num_item 33 | self.show_time = show_time 34 | self.T = T 35 | self.display_step = display_step 36 | print("NFM.") 37 | 38 | 39 | def build_network(self, feature_M, num_factor = 128, num_hidden = 128): 40 | 41 | 42 | # model dependent arguments 43 | self.train_features = tf.placeholder(tf.int32, shape=[None, None]) 44 | self.y = tf.placeholder(tf.float32, shape=[None, 1]) 45 | self.dropout_keep = tf.placeholder(tf.float32) 46 | 47 | self.feature_embeddings = tf.Variable(tf.random_normal([feature_M, num_factor], mean=0.0, stddev=0.01)) 48 | 49 | self.feature_bias = tf.Variable(tf.random_uniform([feature_M, 1], 0.0, 0.0)) 50 | self.bias = tf.Variable(tf.constant(0.0)) 51 | self.pred_weight = tf.Variable(np.random.normal(loc=0, scale= np.sqrt(2.0 / (num_factor + num_hidden)), size=(num_hidden, 1)), 52 | dtype=np.float32) 53 | 54 | nonzero_embeddings = tf.nn.embedding_lookup(self.feature_embeddings, self.train_features) 55 | 56 | self.summed_features_embedding = tf.reduce_sum(nonzero_embeddings, 1) 57 | self.squared_summed_features_embedding = tf.square(self.summed_features_embedding) 58 | self.squared_features_embedding = tf.square(nonzero_embeddings) 59 | self.summed_squared_features_embedding = tf.reduce_sum(self.squared_features_embedding, 1) 60 | 61 | self.FM = 0.5 * tf.subtract( self.squared_summed_features_embedding, self.summed_squared_features_embedding) 62 | # if batch_norm: 63 | # self.FM = self 64 | layer_1 = tf.layers.dense(inputs=self.FM, units=num_hidden, 65 | bias_initializer=tf.random_normal_initializer, 66 | kernel_initializer=tf.random_normal_initializer, activation=tf.nn.relu, 67 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 68 | 69 | self.FM = tf.matmul(tf.nn.dropout(layer_1, 0.8), self.pred_weight) 70 | 71 | 72 | 73 | bilinear = tf.reduce_sum(self.FM, 1, keep_dims=True) 74 | self.f_b = tf.reduce_sum(tf.nn.embedding_lookup(self.feature_bias, self.train_features), 1) 75 | b = self.bias * tf.ones_like(self.y) 76 | self.pred_rating = tf.add_n([bilinear, self.f_b, b]) 77 | 78 | self.loss = tf.nn.l2_loss(tf.subtract(self.y, self.pred_rating)) \ 79 | + tf.contrib.layers.l2_regularizer(self.reg_rate)(self.feature_embeddings) 80 | 81 | 82 | self.optimizer = tf.train.AdagradOptimizer(self.learning_rate).minimize(self.loss) 83 | 84 | def prepare_data(self, train_data, test_data): 85 | 86 | 87 | print("data preparation finished.") 88 | return self 89 | 90 | 91 | def train(self, train_data): 92 | self.num_training = len(train_data['Y']) 93 | total_batch = int( self.num_training/ self.batch_size) 94 | 95 | rng_state = np.random.get_state() 96 | np.random.shuffle(train_data['Y']) 97 | np.random.set_state(rng_state) 98 | np.random.shuffle(train_data['X']) 99 | # train 100 | for i in range(total_batch): 101 | start_time = time.time() 102 | batch_y = train_data['Y'][i * self.batch_size:(i + 1) * self.batch_size] 103 | batch_x = train_data['X'][i * self.batch_size:(i + 1) * self.batch_size] 104 | 105 | loss, opt = self.sess.run((self.loss, self.optimizer), feed_dict={self.train_features: batch_x, 106 | self.y: batch_y, 107 | self.dropout_keep:0.5}) 108 | if i % self.display_step == 0: 109 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 110 | if self.show_time: 111 | print("one iteration: %s seconds." % (time.time() - start_time)) 112 | 113 | def test(self, test_data): 114 | # error = 0 115 | # error_mae = 0 116 | # test_set = list(test_data.keys()) 117 | # for (u, i) in test_set: 118 | # pred_rating_test = self.predict([u], [i]) 119 | # error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 120 | # error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 121 | num_example = len(test_data['Y']) 122 | feed_dict = {self.train_features: test_data['X'], self.y: test_data['Y'],self.dropout_keep: 1.0} 123 | predictions = self.sess.run((self.pred_rating), feed_dict=feed_dict) 124 | y_pred = np.reshape(predictions, (num_example,)) 125 | y_true = np.reshape(test_data['Y'], (num_example,)) 126 | predictions_bounded = np.maximum(y_pred, np.ones(num_example) * min(y_true)) # bound the lower values 127 | predictions_bounded = np.minimum(predictions_bounded, 128 | np.ones(num_example) * max(y_true)) # bound the higher values 129 | RMSE = math.sqrt(mean_squared_error(y_true, predictions_bounded)) 130 | 131 | print("RMSE:" + str(RMSE)) 132 | 133 | def execute(self, train_data, test_data): 134 | 135 | init = tf.global_variables_initializer() 136 | self.sess.run(init) 137 | 138 | for epoch in range(self.epochs): 139 | print("Epoch: %04d;" % (epoch)) 140 | self.train(train_data) 141 | if (epoch) % self.T == 0 and epoch > 100: 142 | self.test(test_data) 143 | 144 | def save(self, path): 145 | saver = tf.train.Saver() 146 | saver.save(self.sess, path) 147 | 148 | def predict(self, user_id, item_id): 149 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 150 | 151 | -------------------------------------------------------------------------------- /models/rating_prediction/nnmf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Network Matrix Factorization. 3 | Reference: Dziugaite, Gintare Karolina, and Daniel M. Roy. "Neural network matrix factorization." arXiv preprint arXiv:1511.06443 (2015). 4 | """ 5 | 6 | import tensorflow as tf 7 | import time 8 | import numpy as np 9 | 10 | from utils.evaluation.RatingMetrics import * 11 | 12 | __author__ = "Shuai Zhang" 13 | __copyright__ = "Copyright 2018, The DeepRec Project" 14 | 15 | __license__ = "GPL" 16 | __version__ = "1.0.0" 17 | __maintainer__ = "Shuai Zhang" 18 | __email__ = "cheungdaven@gmail.com" 19 | __status__ = "Development" 20 | 21 | 22 | class NNMF(): 23 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.01, epoch=500, batch_size=256, 24 | show_time=False, T=1, display_step=1000): 25 | self.learning_rate = learning_rate 26 | self.epochs = epoch 27 | self.batch_size = batch_size 28 | self.reg_rate = reg_rate 29 | self.sess = sess 30 | self.num_user = num_user 31 | self.num_item = num_item 32 | self.show_time = show_time 33 | self.T = T 34 | self.display_step = display_step 35 | print("NNMF.") 36 | 37 | def build_network(self, num_factor_1=100, num_factor_2=10, hidden_dimension=50): 38 | print("num_factor_1=%d, num_factor_2=%d, hidden_dimension=%d" % (num_factor_1, num_factor_2, hidden_dimension)) 39 | 40 | # model dependent arguments 41 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 42 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 43 | self.y = tf.placeholder("float", [None], 'rating') 44 | 45 | P = tf.Variable(tf.random_normal([self.num_user, num_factor_1], stddev=0.01)) 46 | Q = tf.Variable(tf.random_normal([self.num_item, num_factor_1], stddev=0.01)) 47 | 48 | U = tf.Variable(tf.random_normal([self.num_user, num_factor_2], stddev=0.01)) 49 | V = tf.Variable(tf.random_normal([self.num_item, num_factor_2], stddev=0.01)) 50 | 51 | input = tf.concat(values=[tf.nn.embedding_lookup(P, self.user_id), 52 | tf.nn.embedding_lookup(Q, self.item_id), 53 | tf.multiply(tf.nn.embedding_lookup(U, self.user_id), 54 | tf.nn.embedding_lookup(V, self.item_id)) 55 | ], axis=1) 56 | 57 | layer_1 = tf.layers.dense(inputs=input, units=2 * num_factor_1 + num_factor_2, 58 | bias_initializer=tf.random_normal_initializer, 59 | kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid, 60 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 61 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dimension, activation=tf.sigmoid, 62 | bias_initializer=tf.random_normal_initializer, 63 | kernel_initializer=tf.random_normal_initializer, 64 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 65 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dimension, activation=tf.sigmoid, 66 | bias_initializer=tf.random_normal_initializer, 67 | kernel_initializer=tf.random_normal_initializer, 68 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 69 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dimension, activation=tf.sigmoid, 70 | bias_initializer=tf.random_normal_initializer, 71 | kernel_initializer=tf.random_normal_initializer, 72 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 73 | output = tf.layers.dense(inputs=layer_4, units=1, activation=None, 74 | bias_initializer=tf.random_normal_initializer, 75 | kernel_initializer=tf.random_normal_initializer, 76 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 77 | self.pred_rating = tf.reshape(output, [-1]) 78 | 79 | # print(np.shape(output)) 80 | # reg_losses = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 81 | self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \ 82 | + tf.losses.get_regularization_loss() + self.reg_rate * ( 83 | tf.norm(U) + tf.norm(V) + tf.norm(P) + tf.norm(Q)) 84 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 85 | 86 | def train(self, train_data): 87 | self.num_training = len(self.rating) 88 | total_batch = int(self.num_training / self.batch_size) 89 | idxs = np.random.permutation(self.num_training) # shuffled ordering 90 | user_random = list(self.user[idxs]) 91 | item_random = list(self.item[idxs]) 92 | rating_random = list(self.rating[idxs]) 93 | # train 94 | for i in range(total_batch): 95 | start_time = time.time() 96 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 97 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 98 | batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size] 99 | 100 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.user_id: batch_user, 101 | self.item_id: batch_item, 102 | self.y: batch_rating 103 | }) 104 | if i % self.display_step == 0: 105 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 106 | if self.show_time: 107 | print("one iteration: %s seconds." % (time.time() - start_time)) 108 | 109 | def test(self, test_data): 110 | error = 0 111 | error_mae = 0 112 | test_set = list(test_data.keys()) 113 | # users, items = map(list, zip(*[(1, 2), (3, 4), (5, 6)])) 114 | for (u, i) in test_set: 115 | pred_rating_test = self.predict([u], [i]) 116 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 117 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 118 | print("RMSE:" + str(RMSE(error, len(test_set))) + "; MAE:" + str(MAE(error_mae, len(test_set)))) 119 | 120 | def execute(self, train_data, test_data): 121 | init = tf.global_variables_initializer() 122 | t = train_data.tocoo() 123 | self.user = t.row.reshape(-1) 124 | self.item = t.col.reshape(-1) 125 | self.rating = t.data 126 | self.sess.run(init) 127 | for epoch in range(self.epochs): 128 | print("Epoch: %04d;" % (epoch)) 129 | self.train(train_data) 130 | if (epoch) % self.T == 0: 131 | self.test(test_data) 132 | 133 | def save(self, path): 134 | saver = tf.train.Saver() 135 | saver.save(self.sess, path) 136 | 137 | def predict(self, user_id, item_id): 138 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 139 | -------------------------------------------------------------------------------- /models/rating_prediction/nrr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Rating Regression. 3 | Reference: Piji Li, Zihao Wang, Zhaochun Ren, Lidong Bing, Wai Lam. "Neural Rating Regression with Abstractive Tips Generation for Recommendation 4 | Authors." https://arxiv.org/pdf/1708.00154.pdf 5 | """ 6 | 7 | 8 | 9 | 10 | import tensorflow as tf 11 | import time 12 | import numpy as np 13 | 14 | from utils.evaluation.RatingMetrics import * 15 | 16 | __author__ = "Shuai Zhang" 17 | __copyright__ = "Copyright 2018, The DeepRec Project" 18 | 19 | __license__ = "GPL" 20 | __version__ = "1.0.0" 21 | __maintainer__ = "Shuai Zhang" 22 | __email__ = "cheungdaven@gmail.com" 23 | __status__ = "Development" 24 | 25 | 26 | class NRR(): 27 | def __init__(self, sess, num_user, num_item, learning_rate=0.001, reg_rate=0.1, epoch=50, batch_size=256, 28 | show_time=False, T=1, display_step=1000): 29 | self.learning_rate = learning_rate 30 | self.epochs = epoch 31 | self.batch_size = batch_size 32 | self.reg_rate = reg_rate 33 | self.sess = sess 34 | self.num_user = num_user 35 | self.num_item = num_item 36 | self.show_time = show_time 37 | self.T = T 38 | self.display_step = display_step 39 | print("NRR.") 40 | 41 | def build_network(self, num_factor_user=40, num_factor_item=40, d=50, hidden_dimension=40): 42 | 43 | # model dependent arguments 44 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 45 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 46 | self.y = tf.placeholder("float", [None], 'rating') 47 | 48 | U = tf.Variable(tf.random_normal([self.num_user, num_factor_user], stddev=0.01)) 49 | V = tf.Variable(tf.random_normal([self.num_item, num_factor_item], stddev=0.01)) 50 | b = tf.Variable(tf.random_normal([d])) 51 | 52 | user_latent_factor = tf.nn.embedding_lookup(U, self.user_id) 53 | item_latent_factor = tf.nn.embedding_lookup(V, self.item_id) 54 | 55 | W_User = tf.Variable(tf.random_normal([num_factor_user, d], stddev=0.01)) 56 | W_Item = tf.Variable(tf.random_normal([num_factor_item, d], stddev=0.01)) 57 | 58 | input = tf.matmul(user_latent_factor, W_User) + tf.matmul(item_latent_factor, W_Item) + b 59 | 60 | layer_1 = tf.layers.dense(inputs=input, units=d, bias_initializer=tf.random_normal_initializer, 61 | kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid, 62 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 63 | layer_2 = tf.layers.dense(inputs=layer_1, units=hidden_dimension, activation=tf.sigmoid, 64 | bias_initializer=tf.random_normal_initializer, 65 | kernel_initializer=tf.random_normal_initializer, 66 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 67 | layer_3 = tf.layers.dense(inputs=layer_2, units=hidden_dimension, activation=tf.sigmoid, 68 | bias_initializer=tf.random_normal_initializer, 69 | kernel_initializer=tf.random_normal_initializer, 70 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 71 | layer_4 = tf.layers.dense(inputs=layer_3, units=hidden_dimension, activation=tf.sigmoid, 72 | bias_initializer=tf.random_normal_initializer, 73 | kernel_initializer=tf.random_normal_initializer, 74 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 75 | output = tf.layers.dense(inputs=layer_4, units=1, activation=None, 76 | bias_initializer=tf.random_normal_initializer, 77 | kernel_initializer=tf.random_normal_initializer, 78 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 79 | self.pred_rating = tf.reshape(output, [-1]) 80 | 81 | # print(np.shape(output)) 82 | reg_losses = tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 83 | self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \ 84 | + tf.losses.get_regularization_loss() + self.reg_rate * ( 85 | tf.norm(U) + tf.norm(V) + tf.norm(b) + tf.norm(W_Item) + tf.norm(W_User)) 86 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 87 | 88 | def train(self, train_data): 89 | self.num_training = len(self.rating) 90 | total_batch = int(self.num_training / self.batch_size) 91 | idxs = np.random.permutation(self.num_training) # shuffled ordering 92 | user_random = list(self.user[idxs]) 93 | item_random = list(self.item[idxs]) 94 | rating_random = list(self.rating[idxs]) 95 | 96 | # train 97 | for i in range(total_batch): 98 | start_time = time.time() 99 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 100 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 101 | batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size] 102 | 103 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.user_id: batch_user, 104 | self.item_id: batch_item, 105 | self.y: batch_rating 106 | }) 107 | if i % self.display_step == 0: 108 | print("Index: %04d; cost= %.9f" % (i + 1, np.mean(loss))) 109 | if self.show_time: 110 | print("one iteration: %s seconds." % (time.time() - start_time)) 111 | 112 | def test(self, test_data): 113 | error = 0 114 | error_mae = 0 115 | test_set = list(test_data.keys()) 116 | for (u, i) in test_set: 117 | pred_rating_test = self.predict([u], [i], False) 118 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 119 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 120 | print("RMSE:" + str(RMSE(error, len(test_set))) + "; MAE:" + str(MAE(error_mae, len(test_set)))) 121 | 122 | def execute(self, train_data, test_data): 123 | init = tf.global_variables_initializer() 124 | t = train_data.tocoo() 125 | self.user = t.row.reshape(-1) 126 | self.item = t.col.reshape(-1) 127 | self.rating = t.data 128 | self.sess.run(init) 129 | 130 | for epoch in range(self.epochs): 131 | print("Epoch: %04d;" % (epoch)) 132 | self.train(train_data) 133 | if (epoch) % self.T == 0: 134 | self.test(test_data) 135 | 136 | def save(self, path): 137 | saver = tf.train.Saver() 138 | saver.save(self.sess, path) 139 | 140 | def predict(self, user_id, item_id, prt=True): 141 | score = self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 142 | if prt: 143 | print(score) 144 | return score 145 | -------------------------------------------------------------------------------- /models/seq_rec/gru4rec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/models/seq_rec/gru4rec.py -------------------------------------------------------------------------------- /test/KMMD.py: -------------------------------------------------------------------------------- 1 | # import random 2 | import tensorflow as tf 3 | from functools import partial 4 | # from numpy import * 5 | 6 | def compute_pairwise_distances(x, y): 7 | if not len(x.get_shape()) == len(y.get_shape()) == 2: 8 | raise ValueError('Both inputs should be matrices.') 9 | if x.get_shape().as_list()[1] != y.get_shape().as_list()[1]: 10 | raise ValueError('The number of features should be the same.') 11 | 12 | norm = lambda x: tf.reduce_sum(tf.square(x), 1) 13 | return tf.transpose(norm(tf.expand_dims(x, 2) - tf.transpose(y))) 14 | 15 | 16 | def gaussian_kernel_matrix(x, y, sigmas): 17 | beta = 1. / (2. * (tf.expand_dims(sigmas, 1))) 18 | dist = compute_pairwise_distances(x, y) 19 | s = tf.matmul(beta, tf.reshape(dist, (1, -1))) 20 | return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist)) 21 | 22 | 23 | def maximum_mean_discrepancy(x, y, kernel=gaussian_kernel_matrix): 24 | cost = tf.reduce_mean(kernel(x, x)) 25 | cost += tf.reduce_mean(kernel(y, y)) 26 | cost -= 2 * tf.reduce_mean(kernel(x, y)) 27 | # We do not allow the loss to become negative. 28 | cost = tf.where(cost > 0, cost, 0, name='value') 29 | return cost 30 | 31 | 32 | def KMMD(Xs,Xt): 33 | # sigmas=[1e-2,0.1,1,5,10,20,25,30,35,100] 34 | # guassian_kernel=partial(kernel,sigmas=tf.constant(sigmas)) 35 | # cost = tf.reduce_mean(guassian_kernel(Xs, Xs)) 36 | # cost += tf.reduce_mean(guassian_kernel(Xt, Xt)) 37 | # cost -= 2 * tf.reduce_mean(guassian_kernel(Xs, Xt)) 38 | # cost = tf.where(cost > 0, cost, 0) 39 | 40 | sigmas = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 20, 25, 30, 35, 100, 1e3, 1e4, 1e5, 1e6] 41 | gaussian_kernel = partial(gaussian_kernel_matrix, sigmas=tf.constant(sigmas)) 42 | cost= maximum_mean_discrepancy(Xs, Xt, kernel=gaussian_kernel) 43 | 44 | 45 | return cost 46 | 47 | def kernel(X, Y, sigmas): 48 | beta = 1.0/(2.0 * (tf.expand_dims(sigmas,1))) 49 | dist = Cal_pairwise_dist(X,Y) 50 | s = tf.matmul(beta, tf.reshape(dist,(1,-1))) 51 | return tf.reshape(tf.reduce_sum(tf.exp(-s), 0), tf.shape(dist)) -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/test/__init__.py -------------------------------------------------------------------------------- /test/center_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | from tensorflow.python.ops import array_ops 4 | from tensorflow.python.ops import gen_math_ops 5 | 6 | 7 | 8 | def self_center(data, segment_ids, num_segments,name=None): 9 | segment_ids=tf.reshape(segment_ids,[-1]) 10 | # a=tf.where(tf.equal(segment_ids,1)) 11 | # a=tf.reshape(a,[-1,]) 12 | center1=tf.gather(data,tf.reshape(tf.where(tf.equal(segment_ids,0)),[-1,])) 13 | center0=tf.gather(data,tf.reshape(tf.where(tf.equal(segment_ids,1)),[-1,])) 14 | t1=tf.reduce_mean(center1,axis=0,keep_dims=True) 15 | t2=tf.reduce_mean(center0,axis=0,keep_dims=True) 16 | return tf.concat([t1,t2],axis=0) 17 | 18 | 19 | def _unsorted_segment_N(data, segment_ids, num_segments): 20 | """ Helper function for unsorted_segment_mean/_sqrtN. Computes the number 21 | of segment entries with 0-entries set to 1 to allow division by N. 22 | """ 23 | # bincount doesn't support negative indices so we use unsorted_segment_sum 24 | segment_ids_shape = array_ops.shape_internal(segment_ids) 25 | ones_tensor = array_ops.ones(segment_ids_shape, dtype=data.dtype) 26 | N = gen_math_ops.unsorted_segment_sum(ones_tensor, segment_ids, num_segments) 27 | # add dimensions for all non-reduced axes 28 | ndims_output = data.shape.ndims - segment_ids.shape.ndims 29 | broadcast_shape = [num_segments] + [1] * ndims_output 30 | N = array_ops.reshape(N, broadcast_shape) 31 | return gen_math_ops.maximum(N, 1) 32 | 33 | def unsorted_segment_mean(data, segment_ids, num_segments,name=None): 34 | with ops.name_scope(name, "UnsortedSegmentMean"): 35 | data = ops.convert_to_tensor(data) 36 | segment_ids = ops.convert_to_tensor(segment_ids) 37 | N = _unsorted_segment_N(data, segment_ids, num_segments) 38 | summed = gen_math_ops.unsorted_segment_sum(data, segment_ids, num_segments) 39 | return summed / N 40 | 41 | 42 | def self_center(data, segment_ids, num_segments,name=None): 43 | segment_ids=tf.reshape(segment_ids,[-1]) 44 | # a=tf.where(tf.equal(segment_ids,1)) 45 | # a=tf.reshape(a,[-1,]) 46 | center1=tf.gather(data,tf.reshape(tf.where(tf.equal(segment_ids,0)),[-1,])) 47 | center0=tf.gather(data,tf.reshape(tf.where(tf.equal(segment_ids,1)),[-1,])) 48 | t1=tf.reduce_mean(center1,axis=0,keep_dims=True) 49 | t2=tf.reduce_mean(center0,axis=0,keep_dims=True) 50 | return tf.concat([t1,t2],axis=0) 51 | def get_center_loss(features, labels, num_classes): 52 | 53 | labels = tf.reshape(labels, [-1]) 54 | labels = tf.cast(labels, dtype=tf.int32) 55 | ############################################################## 56 | centers0=unsorted_segment_mean(features,labels,num_classes) 57 | EdgeWeights=tf.ones((num_classes,num_classes))-tf.eye(num_classes) 58 | margin=tf.constant(0.9,dtype="float32") 59 | margin1=tf.constant(0.2,dtype="float32") 60 | norm = lambda x: tf.reduce_sum(tf.square(x), 1) 61 | center_pairwise_dist = tf.transpose(norm(tf.expand_dims(centers0, 2) - tf.transpose(centers0))) 62 | loss_0= tf.reduce_sum(tf.multiply(tf.maximum(0.0, margin-center_pairwise_dist),EdgeWeights)) 63 | ########################################################################### 64 | centers_batch = tf.gather(centers0, labels) 65 | loss_1 = tf.maximum(0.0,tf.nn.l2_loss(features - centers_batch)- margin1) 66 | 67 | return (loss_0 + loss_1)/(2*10+2*2) 68 | -------------------------------------------------------------------------------- /test/process_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os 5 | import os.path 6 | import random 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 9 | import pandas as pd 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from sklearn.model_selection import train_test_split 13 | from scipy.sparse import csr_matrix 14 | n_users=6040 15 | n_items=3952 16 | def load_data_neg(path="../data/ml100k/movielens_100k.dat", header=['user_id', 'item_id', 'rating', 'category'], 17 | test_size=0.2, sep="\t"): 18 | df = pd.read_csv(path, sep=sep, names=header, engine='python') 19 | 20 | n_users = df.user_id.unique().shape[0] 21 | n_items = df.item_id.unique().shape[0] 22 | 23 | train_data, test_data = train_test_split(df, test_size=test_size) 24 | train_data = pd.DataFrame(train_data) 25 | test_data = pd.DataFrame(test_data) 26 | 27 | train_row = [] 28 | train_col = [] 29 | train_rating = [] 30 | 31 | for line in train_data.itertuples(): 32 | u = line[1] - 1 33 | i = line[2] - 1 34 | train_row.append(u) 35 | train_col.append(i) 36 | train_rating.append(1) 37 | train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items)) 38 | 39 | # all_items = set(np.arange(n_items)) 40 | # neg_items = {} 41 | # for u in range(n_users): 42 | # neg_items[u] = list(all_items - set(train_matrix.getrow(u).nonzero()[1])) 43 | 44 | test_row = [] 45 | test_col = [] 46 | test_rating = [] 47 | for line in test_data.itertuples(): 48 | test_row.append(line[1] - 1) 49 | test_col.append(line[2] - 1) 50 | test_rating.append(1) 51 | test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items)) 52 | 53 | test_dict = {} 54 | for u in range(n_users): 55 | test_dict[u] = test_matrix.getrow(u).nonzero()[1] 56 | 57 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 58 | return train_matrix.todok(), test_dict, n_users, n_items 59 | 60 | 61 | path_rat="../ml-1m/ratings.dat" 62 | path_user="../ml-1m/users.dat" 63 | path_movies="../ml-1m/movies.dat" 64 | df_rating = pd.read_csv(path_rat, sep="::", header=None,names=['user_id', 'item_id', 'rating', 'time'], engine='python') 65 | df_user=pd.read_csv(path_user, sep="::", header=None,names=['user_id','Gender','Age','Occupation','Zip-code'], engine='python') 66 | df_movie=pd.read_csv(path_movies, sep="::", header=None,names=['item_id','Title','Genres'], engine='python') 67 | # print(df_rating.loc[0]) 68 | # print('=======') 69 | # print(df_user.loc[0]) 70 | # print('=======') 71 | # print(df_movie.loc[0]) 72 | 73 | # user_gender={} 74 | # for i in df_user.iterrows(): 75 | # if i[1][0] not in user_gender: 76 | # if i[1][1]=='F': 77 | # user_gender[i[1][0]]=0 78 | # else: 79 | # user_gender[i[1][0]]=1 80 | 81 | # *1: "Under 18" 82 | # *18: "18-24" 83 | # *25: "25-34" 84 | # *35: "35-44" 85 | # *45: "45-49" 86 | # *50: "50-55" 87 | # *56: "56+" 88 | # user_age={} 89 | # for i in df_user.iterrows(): 90 | # if i[1][0] not in user_age: 91 | # if i[1][2]==1: 92 | # user_age[i[1][0]]=0 93 | # elif i[1][2] == 18: 94 | # user_age[i[1][0]] = 1 95 | # elif i[1][2] == 25: 96 | # user_age[i[1][0]] = 2 97 | # elif i[1][2] == 35: 98 | # user_age[i[1][0]] = 3 99 | # elif i[1][2] == 45: 100 | # user_age[i[1][0]] = 4 101 | # elif i[1][2] == 50: 102 | # user_age[i[1][0]] = 5 103 | # elif i[1][2] == 56: 104 | # user_age[i[1][0]] = 6 105 | # else: 106 | # user_age[i[1][0]] = i[1][2] 107 | 108 | # - Genres are pipe-separated and are selected from the following genres: 109 | # 110 | # * Action 111 | # * Adventure 112 | # * Animation 113 | # * Children's 114 | # * Comedy 115 | # * Crime 116 | # * Documentary 117 | # * Drama 118 | # * Fantasy 119 | # * Film-Noir 120 | # * Horror 121 | # * Musical 122 | # * Mystery 123 | # * Romance 124 | # * Sci-Fi 125 | # * Thriller 126 | # * War 127 | # # * Western 128 | movie_style={} 129 | for i in df_movie.iterrows(): 130 | if i[1][0] not in movie_style: 131 | # print(i[1][2].split('|')) 132 | m=[] 133 | for j in i[1][2].split('|'): 134 | if j=='Action': 135 | m.append(0) 136 | if j=='Adventure': 137 | m.append(1) 138 | if j=='Animation': 139 | m.append(2) 140 | if j=='Children\'s': 141 | m.append(3) 142 | if j=='Comedy': 143 | m.append(4) 144 | if j=='Crime': 145 | m.append(5) 146 | if j=='Documentary': 147 | m.append(6) 148 | if j=='Film-Noir': 149 | m.append(7) 150 | if j=='Fantasy': 151 | m.append(8) 152 | if j=='Horror': 153 | m.append(9) 154 | if j=='Musical': 155 | m.append(10) 156 | if j=='Mystery': 157 | m.append(11) 158 | if j=='Romance': 159 | m.append(12) 160 | if j=='Sci-Fi': 161 | m.append(13) 162 | if j=='Thriller': 163 | m.append(14) 164 | if j=='War': 165 | m.append(15) 166 | if j=='Western': 167 | m.append(16) 168 | if j=='Drama': 169 | m.append(17) 170 | movie_style[i[1][0]]=m 171 | 172 | movie_group={0:[],1:[],2:[],3:[],4:[],5:[],6:[],7:[],8:[],9:[],10:[],11:[],12:[],13:[],14:[],15:[],16:[],17:[]} 173 | for i in df_movie.iterrows(): 174 | 175 | for j in i[1][2].split('|'): 176 | if j=='Action': 177 | movie_group[0].append(i[1][0]) 178 | if j=='Adventure': 179 | movie_group[1].append(i[1][0]) 180 | if j=='Animation': 181 | movie_group[2].append(i[1][0]) 182 | if j=='Children\'s': 183 | movie_group[3].append(i[1][0]) 184 | if j=='Comedy': 185 | movie_group[4].append(i[1][0]) 186 | if j=='Crime': 187 | movie_group[5].append(i[1][0]) 188 | if j=='Documentary': 189 | movie_group[6].append(i[1][0]) 190 | if j=='Film-Noir': 191 | movie_group[7].append(i[1][0]) 192 | if j=='Fantasy': 193 | movie_group[8].append(i[1][0]) 194 | if j=='Horror': 195 | movie_group[9].append(i[1][0]) 196 | if j=='Musical': 197 | movie_group[10].append(i[1][0]) 198 | if j=='Mystery': 199 | movie_group[11].append(i[1][0]) 200 | if j=='Romance': 201 | movie_group[12].append(i[1][0]) 202 | if j=='Sci-Fi': 203 | movie_group[13].append(i[1][0]) 204 | if j=='Thriller': 205 | movie_group[14].append(i[1][0]) 206 | if j=='War': 207 | movie_group[15].append(i[1][0]) 208 | if j=='Western': 209 | movie_group[16].append(i[1][0]) 210 | if j=='Drama': 211 | movie_group[17].append(i[1][0]) 212 | 213 | 214 | # mclicks={} 215 | # labels={} 216 | # j=0 217 | # for i in df_rating.iterrows(): 218 | # j=j+1 219 | # if i[1][0] not in clicks: 220 | # m=[] 221 | # n=[] 222 | # m.append(i[1][1]) 223 | # n.append(1) 224 | # clicks[i[1][0]]=m 225 | # labels[i[1][0]]=n 226 | # else: 227 | # clicks[i[1][0]].append(i[1][1]) 228 | # labels[i[1][0]].append(1) 229 | # if j==100: 230 | # break 231 | ## generate long-tail 232 | train_row = [] 233 | train_col = [] 234 | train_rating = [] 235 | 236 | for line in df_rating.itertuples(): 237 | u = line[1] - 1 238 | i = line[2] - 1 239 | train_row.append(u) 240 | train_col.append(i) 241 | # print(max(train_row)) 242 | # print(max(train_col)) 243 | train_rating.append(1) 244 | train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items)) 245 | train=train_matrix.A 246 | sum_m=np.sum(train,axis=0) 247 | cold=np.where(sum_m==0) 248 | s=[] 249 | for i,k in enumerate(sum_m): 250 | s.append((k,i)) 251 | long_tail=sorted(s,reverse=True) 252 | hot = [x[1] for x in long_tail[0:800]] 253 | long_item = [x[1] for x in long_tail[800:]] 254 | # # x_id=[] 255 | # # y=[] 256 | # # for i in long_tail: 257 | # # x_id.append(i[1]) 258 | # # y.append(i[0]) 259 | # # x=list(np.arange(len(sum))) 260 | # # plt.plot(x,y) 261 | # # plt.show() 262 | # 263 | # #generate source train 264 | # 265 | neg_items={} 266 | all_items = set(np.arange(n_items))-set(cold[0]) 267 | source_train={} 268 | 269 | j=0 270 | for i in df_rating.iterrows(): 271 | print(i) 272 | item_append = [] 273 | item_append.append(i[1][1]) 274 | neg_items[i[1][0]-1] = list(all_items - set(train_matrix.getrow(i[1][0]-1).nonzero()[1])) 275 | list_of_random_items=random.sample(neg_items[i[1][0]-1], 2) 276 | item_append += list_of_random_items 277 | source_train[j]=(i[1][0],item_append) 278 | j=j+1 279 | # if j==100: 280 | # # # 281 | # break 282 | output = open('source_train.txt', 'w') 283 | for i in source_train: 284 | output.write(str(source_train[i][0])) 285 | output.write(';;') 286 | for j in source_train[i][1]: 287 | output.write(str(j)) 288 | output.write('|') 289 | output.write('\n') 290 | output.close() 291 | # target_train={} 292 | # k=0 293 | # for i in df_rating.iterrows(): 294 | # a=[] 295 | # for j in movie_style[i[1][1]]: 296 | # a+=movie_group[j] 297 | # # print(i[1][0]-1) 298 | # # print('========') 299 | # # print(k) 300 | # s=source_train[k] 301 | # # a=a-set(s) 302 | # target_train[k]=(i[1][0],list(random.sample(a, 6))) 303 | # k=k+1 304 | # if k==100: 305 | # break 306 | # output = open('target_train.txt', 'w') 307 | # for i in target_train: 308 | # output.write(str(target_train[i][0])) 309 | # output.write(';;') 310 | # for j in target_train[i][1]: 311 | # output.write(str(j)) 312 | # output.write('|') 313 | # output.write('\n') 314 | # output.close() 315 | 316 | print('sucess') -------------------------------------------------------------------------------- /test/testSeqRec.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os 5 | 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 7 | from models.seq_rec.Caser import Caser 8 | from models.seq_rec.AttRec import AttRec 9 | from models.seq_rec.PRME import PRME 10 | from utils.load_data.load_data_seq import DataSet 11 | from utils.load_data.load_data_ranking import * 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='DeepRec') 16 | parser.add_argument('--model', choices=['Caser','PRME', 'AttRec'], default = 'AttRec') 17 | parser.add_argument('--epochs', type=int, default=1000) 18 | parser.add_argument('--num_factors', type=int, default=10) 19 | parser.add_argument('--display_step', type=int, default=256) 20 | parser.add_argument('--batch_size', type=int, default=1024 ) #128 for unlimpair 21 | parser.add_argument('--learning_rate', type=float, default=1e-3) #1e-4 for unlimpair 22 | parser.add_argument('--reg_rate', type=float, default=0.1) #0.01 for unlimpair 23 | return parser.parse_args() 24 | 25 | 26 | if __name__ == '__main__': 27 | args = parse_args() 28 | epochs = args.epochs 29 | learning_rate = args.learning_rate 30 | reg_rate = args.reg_rate 31 | num_factors = args.num_factors 32 | display_step = args.display_step 33 | batch_size = args.batch_size 34 | 35 | 36 | config = tf.ConfigProto() 37 | config.gpu_options.allow_growth = True 38 | 39 | 40 | 41 | with tf.Session(config=config) as sess: 42 | model = None 43 | # Model selection 44 | 45 | if args.model == "Caser": 46 | train_data = DataSet(path="../data/ml100k/temp/train.dat", sep="\t", 47 | header=['user', 'item', 'rating', 'time'], 48 | isTrain=True, seq_len=5, target_len=3, num_users=943, num_items=1682) 49 | test_data = DataSet(path="../data/ml100k/temp/test.dat", sep="\t", 50 | header=['user', 'item', 'rating', 'time'], 51 | user_map=train_data.user_map, item_map=train_data.item_map) 52 | # train_data = DataSet(path="../Data/ml100k/seq/train.dat", isTrain=True) 53 | # test_data = DataSet(path="../Data/ml100k/seq/test.dat", user_map=train_data.user_map, item_map=train_data.item_map) 54 | model = Caser(sess, train_data.num_user, train_data.num_item) 55 | model.build_network(L = train_data.sequences.L, num_T=train_data.sequences.T) 56 | model.execute(train_data, test_data) 57 | if args.model == "PRME": 58 | train_data = DataSet(path="../data/ml100k/temp/train.dat", sep="\t",header=['user', 'item', 'rating', 'time'],isTrain=True, seq_len=1, target_len=1) 59 | test_data = DataSet(path="../data/ml100k/temp/test.dat", sep="\t", header=['user', 'item', 'rating', 'time'], user_map=train_data.user_map, item_map=train_data.item_map) 60 | model = PRME(sess, train_data.num_user, train_data.num_item) 61 | model.build_network(L = train_data.sequences.L, num_T=train_data.sequences.T) 62 | model.execute(train_data, test_data) 63 | if args.model == "AttRec": 64 | train_data = DataSet(path="../data/ml100k/temp/train.dat", sep="\t",header=['user', 'item', 'rating', 'time'],isTrain=True, seq_len=5, target_len=3, num_users=943, num_items=1682) 65 | test_data = DataSet(path="../data/ml100k/temp/test.dat", sep="\t", header=['user', 'item', 'rating', 'time'], user_map=train_data.user_map, item_map=train_data.item_map) 66 | model = AttRec(sess, train_data.num_user, train_data.num_item) 67 | # print(train_data.user_map) 68 | # print(train_data.item_map) 69 | model.build_network(L = train_data.sequences.L, num_T=train_data.sequences.T) 70 | model.execute(train_data, test_data) 71 | -------------------------------------------------------------------------------- /test/test_cikm.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os.path 5 | 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 7 | 8 | from models.item_ranking.cdae import ICDAE 9 | from models.item_ranking.bprmf import BPRMF 10 | from models.item_ranking.cml import CML 11 | from models.item_ranking.neumf import NeuMF 12 | from models.item_ranking.gmf import GMF 13 | from models.item_ranking.jrl import JRL 14 | from models.item_ranking.mlp import MLP 15 | from models.item_ranking.lrml import LRML 16 | from models.item_ranking.neumf_my import NeuMF_my 17 | from models.item_ranking.neumf_my_tail import NeuMF_my_tail 18 | from models.item_ranking.NeuMF_cikm import NeuMF_my_cikm 19 | from models.item_ranking.NeuMF_cikm_que import NeuMF_my_cikm_que 20 | from models.item_ranking.NeuMF_cikm_p import NeuMF_my_cikm_p 21 | # from utils.load_data.load_data_ranking import * 22 | from utils.load_data.load_data_my import * 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='DeepRec') 26 | parser.add_argument('--model', choices=['CDAE', 'CML', 'NeuMF', 'GMF', 'MLP', 'BPRMF', 'JRL', 'LRML'], 27 | default='NeuMF_my_cikm_p') 28 | parser.add_argument('--epochs', type=int, default=40) 29 | parser.add_argument('--num_factors', type=int, default=10) 30 | parser.add_argument('--display_step', type=int, default=1000) 31 | parser.add_argument('--batch_size', type=int, default=1024) # 128 for unlimpair 32 | parser.add_argument('--learning_rate', type=float, default=1e-3) # 1e-4 for unlimpair 33 | parser.add_argument('--reg_rate', type=float, default=0.1) # 0.01 for unlimpair 34 | parser.add_argument('--A2C_weight', type=float, default=100) # 0.01 for unlimpair 35 | parser.add_argument('--center_weight', type=float, default=0.001) # 0.01 for unlimpair 36 | parser.add_argument('--pseudo_weight', type=float, default=0.001) # 0.01 for unlimpair 37 | return parser.parse_args() 38 | 39 | 40 | if __name__ == '__main__': 41 | args = parse_args() 42 | epochs = args.epochs 43 | learning_rate = args.learning_rate 44 | reg_rate = args.reg_rate 45 | num_factors = args.num_factors 46 | display_step = args.display_step 47 | batch_size = args.batch_size 48 | 49 | # train_data, test_data, n_user, n_item = load_data_neg(test_size=0.2, sep="\t") 50 | # train_data, test_data, n_user, n_item = load_data_myneg(test_size=0.2, sep=";;") 51 | train_data, test_data , n_qids, test_data_hot, test_data_long, hot_item, long_item, hot_dic, long_dic = load_data_myneg_cikm() 52 | config = tf.ConfigProto() 53 | config.gpu_options.allow_growth = True 54 | 55 | with tf.Session(config=config) as sess: 56 | model = None 57 | # Model selection 58 | if args.model == "CDAE": 59 | train_data, test_data, n_user, n_item = load_data_all(test_size=0.2, sep="\t") 60 | model = ICDAE(sess, n_user, n_item) 61 | if args.model == "CML": 62 | model = CML(sess, n_user, n_item) 63 | if args.model == "LRML": 64 | model = LRML(sess, n_user, n_item) 65 | if args.model == "BPRMF": 66 | model = BPRMF(sess, n_user, n_item) 67 | if args.model == "NeuMF": 68 | model = NeuMF(sess, n_user, n_item) 69 | if args.model == "GMF": 70 | model = GMF(sess, n_user, n_item) 71 | if args.model == "MLP": 72 | model = MLP(sess, n_user, n_item) 73 | if args.model == "JRL": 74 | model = JRL(sess, n_user, n_item) 75 | if args.model == "NeuMF_my": 76 | model = NeuMF_my(sess, n_user, n_item) 77 | if args.model == "NeuMF_my_tail": 78 | model = NeuMF_my_tail(sess, n_user, n_item) 79 | 80 | if args.model == "NeuMF_my_cikm": 81 | model = NeuMF_my_cikm(sess, 1, 1) 82 | 83 | if args.model == "NeuMF_my_cikm_query": 84 | model = NeuMF_my_cikm_que(sess, 1, 1) 85 | if args.model == "NeuMF_my_cikm_p": 86 | model = NeuMF_my_cikm_p(sess, 1, 1,epoch=epochs,A2C_weight=args.A2C_weight,center_weight=args.center_weight,pseudo_weight=args.pseudo_weight) 87 | # build and execute the model 88 | if model is not None: 89 | model.build_network_my() 90 | # model.execute(train_data, test_data) 91 | model.execute_my(train_data, test_data, n_qids, test_data_hot, test_data_long, hot_item,long_item, hot_dic, long_dic) 92 | -------------------------------------------------------------------------------- /test/test_item_ranking.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os.path 5 | 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 7 | 8 | from models.item_ranking.cdae import ICDAE 9 | from models.item_ranking.bprmf import BPRMF 10 | from models.item_ranking.cml import CML 11 | from models.item_ranking.neumf import NeuMF 12 | from models.item_ranking.gmf import GMF 13 | from models.item_ranking.jrl import JRL 14 | from models.item_ranking.mlp import MLP 15 | from models.item_ranking.lrml import LRML 16 | 17 | from utils.load_data.load_data_ranking import * 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description='DeepRec') 22 | parser.add_argument('--model', choices=['CDAE', 'CML', 'NeuMF', 'GMF', 'MLP', 'BPRMF', 'JRL', 'LRML'], 23 | default='NeuMF') 24 | parser.add_argument('--epochs', type=int, default=1000) 25 | parser.add_argument('--num_factors', type=int, default=10) 26 | parser.add_argument('--display_step', type=int, default=1000) 27 | parser.add_argument('--batch_size', type=int, default=1024) # 128 for unlimpair 28 | parser.add_argument('--learning_rate', type=float, default=1e-3) # 1e-4 for unlimpair 29 | parser.add_argument('--reg_rate', type=float, default=0.1) # 0.01 for unlimpair 30 | return parser.parse_args() 31 | 32 | 33 | if __name__ == '__main__': 34 | args = parse_args() 35 | epochs = args.epochs 36 | learning_rate = args.learning_rate 37 | reg_rate = args.reg_rate 38 | num_factors = args.num_factors 39 | display_step = args.display_step 40 | batch_size = args.batch_size 41 | 42 | train_data, test_data, n_user, n_item = load_data_neg(test_size=0.2, sep="\t") 43 | 44 | config = tf.ConfigProto() 45 | config.gpu_options.allow_growth = True 46 | 47 | with tf.Session(config=config) as sess: 48 | model = None 49 | # Model selection 50 | if args.model == "CDAE": 51 | train_data, test_data, n_user, n_item = load_data_all(test_size=0.2, sep="\t") 52 | model = ICDAE(sess, n_user, n_item) 53 | if args.model == "CML": 54 | model = CML(sess, n_user, n_item) 55 | if args.model == "LRML": 56 | model = LRML(sess, n_user, n_item) 57 | if args.model == "BPRMF": 58 | model = BPRMF(sess, n_user, n_item) 59 | if args.model == "NeuMF": 60 | model = NeuMF(sess, n_user, n_item) 61 | if args.model == "GMF": 62 | model = GMF(sess, n_user, n_item) 63 | if args.model == "MLP": 64 | model = MLP(sess, n_user, n_item) 65 | if args.model == "JRL": 66 | model = JRL(sess, n_user, n_item) 67 | # build and execute the model 68 | if model is not None: 69 | model.build_network() 70 | model.execute(train_data, test_data) 71 | -------------------------------------------------------------------------------- /test/test_movielen.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os 5 | import os.path 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 7 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 8 | 9 | from models.item_ranking.cdae import ICDAE 10 | from models.item_ranking.bprmf import BPRMF 11 | from models.item_ranking.cml import CML 12 | from models.item_ranking.neumf import NeuMF 13 | from models.item_ranking.gmf import GMF 14 | from models.item_ranking.jrl import JRL 15 | from models.item_ranking.mlp import MLP 16 | from models.item_ranking.lrml import LRML 17 | from models.item_ranking.neumf_my import NeuMF_my 18 | from models.item_ranking.neumf_my_tail import NeuMF_my_tail 19 | # from utils.load_data.load_data_ranking import * 20 | from utils.load_data.load_data_my import * 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='DeepRec') 24 | parser.add_argument('--model', choices=['CDAE', 'CML', 'NeuMF', 'GMF', 'MLP', 'BPRMF', 'JRL', 'LRML'], 25 | default='NeuMF_my_tail') 26 | parser.add_argument('--epochs', type=int, default=1000) 27 | parser.add_argument('--num_factors', type=int, default=10) 28 | parser.add_argument('--display_step', type=int, default=1000) 29 | parser.add_argument('--batch_size', type=int, default=1024) # 128 for unlimpair 30 | parser.add_argument('--learning_rate', type=float, default=1e-3) # 1e-4 for unlimpair 31 | parser.add_argument('--reg_rate', type=float, default=0.1) # 0.01 for unlimpair 32 | return parser.parse_args() 33 | 34 | 35 | if __name__ == '__main__': 36 | args = parse_args() 37 | epochs = args.epochs 38 | learning_rate = args.learning_rate 39 | reg_rate = args.reg_rate 40 | num_factors = args.num_factors 41 | display_step = args.display_step 42 | batch_size = args.batch_size 43 | # train_data, test_data, n_user, n_item = load_data_neg(test_size=0.2, sep="\t") 44 | # train_data, test_data, n_user, n_item = load_data_myneg(test_size=0.2, sep=";;") 45 | train_data, test_data, n_user, n_item, test_data_hot, test_data_long,hot_item,long_item = load_data_myneg_tail(test_size=0.2, sep=";;") 46 | config = tf.ConfigProto() 47 | config.gpu_options.allow_growth = True 48 | 49 | with tf.Session(config=config) as sess: 50 | model = None 51 | # Model selection 52 | if args.model == "CDAE": 53 | train_data, test_data, n_user, n_item = load_data_all(test_size=0.2, sep="\t") 54 | model = ICDAE(sess, n_user, n_item) 55 | if args.model == "CML": 56 | model = CML(sess, n_user, n_item) 57 | if args.model == "LRML": 58 | model = LRML(sess, n_user, n_item) 59 | if args.model == "BPRMF": 60 | model = BPRMF(sess, n_user, n_item) 61 | if args.model == "NeuMF": 62 | model = NeuMF(sess, n_user, n_item) 63 | if args.model == "GMF": 64 | model = GMF(sess, n_user, n_item) 65 | if args.model == "MLP": 66 | model = MLP(sess, n_user, n_item) 67 | if args.model == "JRL": 68 | model = JRL(sess, n_user, n_item) 69 | if args.model == "NeuMF_my": 70 | model = NeuMF_my(sess, n_user, n_item) 71 | if args.model == "NeuMF_my_tail": 72 | model = NeuMF_my_tail(sess, n_user, n_item) 73 | # build and execute the model 74 | if model is not None: 75 | model.build_network_my() 76 | # model.execute(train_data, test_data) 77 | model.execute_my(train_data, test_data, test_data_hot, test_data_long, hot_item,long_item) 78 | -------------------------------------------------------------------------------- /test/test_myself.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os 5 | import os.path 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 7 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 8 | 9 | from models.item_ranking.cdae import ICDAE 10 | from models.item_ranking.bprmf import BPRMF 11 | from models.item_ranking.cml import CML 12 | from models.item_ranking.neumf import NeuMF 13 | from models.item_ranking.gmf import GMF 14 | from models.item_ranking.jrl import JRL 15 | from models.item_ranking.mlp import MLP 16 | from models.item_ranking.lrml import LRML 17 | 18 | from utils.load_data.load_data_ranking import * 19 | 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser(description='DeepRec') 23 | parser.add_argument('--model', choices=['CDAE', 'CML', 'NeuMF', 'GMF', 'MLP', 'BPRMF', 'JRL', 'LRML'], 24 | default='LRML') 25 | parser.add_argument('--epochs', type=int, default=1000) 26 | parser.add_argument('--num_factors', type=int, default=10) 27 | parser.add_argument('--display_step', type=int, default=1000) 28 | parser.add_argument('--batch_size', type=int, default=1024) # 128 for unlimpair 29 | parser.add_argument('--learning_rate', type=float, default=1e-3) # 1e-4 for unlimpair 30 | parser.add_argument('--reg_rate', type=float, default=0.1) # 0.01 for unlimpair 31 | return parser.parse_args() 32 | 33 | 34 | if __name__ == '__main__': 35 | args = parse_args() 36 | epochs = args.epochs 37 | learning_rate = args.learning_rate 38 | reg_rate = args.reg_rate 39 | num_factors = args.num_factors 40 | display_step = args.display_step 41 | batch_size = args.batch_size 42 | 43 | train_data, test_data, n_user, n_item = load_data_neg(test_size=0.2, sep="\t") 44 | 45 | config = tf.ConfigProto() 46 | config.gpu_options.allow_growth = True 47 | 48 | with tf.Session(config=config) as sess: 49 | model = None 50 | # Model selection 51 | if args.model == "CDAE": 52 | train_data, test_data, n_user, n_item = load_data_all(test_size=0.2, sep="\t") 53 | model = ICDAE(sess, n_user, n_item) 54 | if args.model == "CML": 55 | model = CML(sess, n_user, n_item) 56 | if args.model == "LRML": 57 | model = LRML(sess, n_user, n_item) 58 | if args.model == "BPRMF": 59 | model = BPRMF(sess, n_user, n_item) 60 | if args.model == "NeuMF": 61 | model = NeuMF(sess, n_user, n_item) 62 | if args.model == "GMF": 63 | model = GMF(sess, n_user, n_item) 64 | if args.model == "MLP": 65 | model = MLP(sess, n_user, n_item) 66 | if args.model == "JRL": 67 | model = JRL(sess, n_user, n_item) 68 | # build and execute the model 69 | if model is not None: 70 | model.build_network() 71 | model.execute(train_data, test_data) 72 | -------------------------------------------------------------------------------- /test/test_rating_pred.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os.path 5 | 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 7 | 8 | from models.rating_prediction.nnmf import NNMF 9 | from models.rating_prediction.mf import MF 10 | from models.rating_prediction.nrr import NRR 11 | from models.rating_prediction.autorec import * 12 | from models.rating_prediction.nfm import NFM 13 | from utils.load_data.load_data_rating import * 14 | from utils.load_data.load_data_content import * 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='nnRec') 18 | parser.add_argument('--model', choices=['MF', 'NNMF', 'NRR', 'I-AutoRec', 'U-AutoRec', 'FM', 'NFM', 'AFM'], default='U-AutoRec') 19 | parser.add_argument('--epochs', type=int, default=1000) 20 | parser.add_argument('--num_factors', type=int, default=10) 21 | parser.add_argument('--display_step', type=int, default=1000) 22 | parser.add_argument('--batch_size', type=int, default=256) # 128 for unlimpair 23 | parser.add_argument('--learning_rate', type=float, default=1e-3) # 1e-4 for unlimpair 24 | parser.add_argument('--reg_rate', type=float, default=0.1) # 0.01 for unlimpair 25 | return parser.parse_args() 26 | 27 | 28 | if __name__ == '__main__': 29 | args = parse_args() 30 | epochs = args.epochs 31 | learning_rate = args.learning_rate 32 | reg_rate = args.reg_rate 33 | num_factors = args.num_factors 34 | display_step = args.display_step 35 | batch_size = args.batch_size 36 | 37 | train_data, test_data, n_user, n_item = load_data_rating(path="../Data/ml100k/movielens_100k.dat", 38 | header=['user_id', 'item_id', 'rating', 't'], 39 | test_size=0.1, sep="\t") 40 | 41 | 42 | config = tf.ConfigProto() 43 | config.gpu_options.allow_growth = True 44 | 45 | with tf.Session(config=config) as sess: 46 | model = None 47 | # Model selection 48 | if args.model == "MF": 49 | model = MF(sess, n_user, n_item, batch_size=batch_size) 50 | if args.model == "NNMF": 51 | model = NNMF(sess, n_user, n_item, learning_rate=learning_rate) 52 | if args.model == "NRR": 53 | model = NRR(sess, n_user, n_item) 54 | if args.model == "I-AutoRec": 55 | model = IAutoRec(sess, n_user, n_item) 56 | if args.model == "U-AutoRec": 57 | model = UAutoRec(sess, n_user, n_item) 58 | if args.model == "NFM": 59 | train_data, test_data, feature_M = load_data_fm() 60 | n_user = 957 61 | n_item = 4082 62 | model = NFM(sess, n_user, n_item) 63 | model.build_network(feature_M) 64 | model.execute(train_data, test_data) 65 | # build and execute the model 66 | if model is not None: 67 | model.build_network() 68 | model.execute(train_data, test_data) 69 | -------------------------------------------------------------------------------- /test_cikm.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | import sys 4 | import os.path 5 | 6 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 7 | 8 | from models.item_ranking.cdae import ICDAE 9 | from models.item_ranking.bprmf import BPRMF 10 | from models.item_ranking.cml import CML 11 | from models.item_ranking.neumf import NeuMF 12 | from models.item_ranking.gmf import GMF 13 | from models.item_ranking.jrl import JRL 14 | from models.item_ranking.mlp import MLP 15 | from models.item_ranking.lrml import LRML 16 | from models.item_ranking.neumf_my import NeuMF_my 17 | from models.item_ranking.neumf_my_tail import NeuMF_my_tail 18 | from models.item_ranking.NeuMF_cikm import NeuMF_my_cikm 19 | from models.item_ranking.NeuMF_cikm_que import NeuMF_my_cikm_que 20 | from models.item_ranking.NeuMF_cikm_p import NeuMF_my_cikm_p 21 | # from utils.load_data.load_data_ranking import * 22 | from utils.load_data.load_data_my import * 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='DeepRec') 26 | parser.add_argument('--model', choices=['CDAE', 'CML', 'NeuMF', 'GMF', 'MLP', 'BPRMF', 'JRL', 'LRML'], 27 | default='NeuMF_my_cikm_p') 28 | parser.add_argument('--epochs', type=int, default=40) 29 | parser.add_argument('--num_factors', type=int, default=10) 30 | parser.add_argument('--display_step', type=int, default=1000) 31 | parser.add_argument('--batch_size', type=int, default=1024) # 128 for unlimpair 32 | parser.add_argument('--learning_rate', type=float, default=1e-3) # 1e-4 for unlimpair 33 | parser.add_argument('--reg_rate', type=float, default=0.1) # 0.01 for unlimpair 34 | parser.add_argument('--A2C_weight', type=float, default=100) # 0.01 for unlimpair 35 | parser.add_argument('--center_weight', type=float, default=0.001) # 0.01 for unlimpair 36 | parser.add_argument('--pseudo_weight', type=float, default=0.001) # 0.01 for unlimpair 37 | return parser.parse_args() 38 | 39 | 40 | if __name__ == '__main__': 41 | args = parse_args() 42 | epochs = args.epochs 43 | learning_rate = args.learning_rate 44 | reg_rate = args.reg_rate 45 | num_factors = args.num_factors 46 | display_step = args.display_step 47 | batch_size = args.batch_size 48 | 49 | # train_data, test_data, n_user, n_item = load_data_neg(test_size=0.2, sep="\t") 50 | # train_data, test_data, n_user, n_item = load_data_myneg(test_size=0.2, sep=";;") 51 | train_data, test_data , n_qids, test_data_hot, test_data_long, hot_item, long_item, hot_dic, long_dic = load_data_myneg_cikm() 52 | config = tf.ConfigProto() 53 | config.gpu_options.allow_growth = True 54 | 55 | with tf.Session(config=config) as sess: 56 | model = None 57 | # Model selection 58 | if args.model == "CDAE": 59 | train_data, test_data, n_user, n_item = load_data_all(test_size=0.2, sep="\t") 60 | model = ICDAE(sess, n_user, n_item) 61 | if args.model == "CML": 62 | model = CML(sess, n_user, n_item) 63 | if args.model == "LRML": 64 | model = LRML(sess, n_user, n_item) 65 | if args.model == "BPRMF": 66 | model = BPRMF(sess, n_user, n_item) 67 | if args.model == "NeuMF": 68 | model = NeuMF(sess, n_user, n_item) 69 | if args.model == "GMF": 70 | model = GMF(sess, n_user, n_item) 71 | if args.model == "MLP": 72 | model = MLP(sess, n_user, n_item) 73 | if args.model == "JRL": 74 | model = JRL(sess, n_user, n_item) 75 | if args.model == "NeuMF_my": 76 | model = NeuMF_my(sess, n_user, n_item) 77 | if args.model == "NeuMF_my_tail": 78 | model = NeuMF_my_tail(sess, n_user, n_item) 79 | 80 | if args.model == "NeuMF_my_cikm": 81 | model = NeuMF_my_cikm(sess, 1, 1) 82 | 83 | if args.model == "NeuMF_my_cikm_query": 84 | model = NeuMF_my_cikm_que(sess, 1, 1) 85 | if args.model == "NeuMF_my_cikm_p": 86 | model = NeuMF_my_cikm_p(sess, 1, 1,epoch=epochs,A2C_weight=args.A2C_weight,center_weight=args.center_weight,pseudo_weight=args.pseudo_weight) 87 | # build and execute the model 88 | if model is not None: 89 | model.build_network_my() 90 | # model.execute(train_data, test_data) 91 | model.execute_my(train_data, test_data, n_qids, test_data_hot, test_data_long, hot_item,long_item, hot_dic, long_dic) 92 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/__init__.py -------------------------------------------------------------------------------- /utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/__init__.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/config/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/config/config.py -------------------------------------------------------------------------------- /utils/evaluation/RankingMetrics.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/evaluation/RankingMetrics.pyc -------------------------------------------------------------------------------- /utils/evaluation/RatingMetrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RMSE(error, num): 5 | return np.sqrt(error / num) 6 | 7 | 8 | def MAE(error_mae, num): 9 | return (error_mae / num) 10 | -------------------------------------------------------------------------------- /utils/evaluation/SeqRecMetrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Evaluation Metrics for Top N Recommendation 4 | """ 5 | 6 | import numpy as np 7 | import time 8 | from numpy.linalg import norm 9 | 10 | __author__ = "Shuai Zhang" 11 | __copyright__ = "Copyright 2018, The DeepRec Project" 12 | 13 | __license__ = "GPL" 14 | __version__ = "1.0.0" 15 | __maintainer__ = "Shuai Zhang" 16 | __email__ = "cheungdaven@gmail.com" 17 | __status__ = "Development" 18 | 19 | 20 | import math 21 | 22 | # efficient version 23 | def precision_recall_ndcg_at_k(k, rankedlist, test_matrix): 24 | idcg_k = 0 25 | dcg_k = 0 26 | n_k = k if len(test_matrix) > k else len(test_matrix) 27 | for i in range(n_k): 28 | idcg_k += 1 / math.log(i + 2, 2) 29 | 30 | b1 = rankedlist 31 | b2 = test_matrix 32 | s2 = set(b2) 33 | hits = [ (idx, val) for idx, val in enumerate(b1) if val in s2] 34 | 35 | count = len(hits) 36 | 37 | count_test = len(test_matrix) 38 | 39 | 40 | for c in range(count): 41 | dcg_k += 1 / math.log(hits[c][0] + 2, 2) 42 | 43 | return count, float(count / len(test_matrix)), float(dcg_k / idcg_k) 44 | 45 | # def hitratio(k, rankedlist, test_matrix) 46 | 47 | def map_mrr_ndcg(rankedlist, test_matrix): 48 | ap = 0 49 | map = 0 50 | dcg = 0 51 | idcg = 0 52 | mrr = 0 53 | for i in range(len(test_matrix)): 54 | idcg += 1 / math.log(i + 2, 2) 55 | 56 | b1 = rankedlist 57 | b2 = test_matrix 58 | s2 = set(b2) 59 | hits = [ (idx, val) for idx, val in enumerate(b1) if val in s2] 60 | # for idx, vale in enumerate(b1): 61 | # print(idx, vale) 62 | count = len(hits) 63 | 64 | 65 | for c in range(count): 66 | ap += (c+1) / (hits[c][0] + 1) 67 | dcg += 1 / math.log(hits[c][0] + 2, 2) 68 | 69 | if count != 0: 70 | mrr = 1 / (hits[0][0] + 1) 71 | 72 | if count != 0: 73 | map = ap / count 74 | 75 | max = len(b1) - 1 76 | if count != 0: 77 | count_test = max - hits[0][0] 78 | else: 79 | count_test = 0 80 | auc = 1.0 * count_test /max 81 | return auc, mrr, float(dcg / idcg) 82 | 83 | 84 | def hitratio_at_k(): 85 | print(" test") 86 | 87 | def ndcg_at_k(): 88 | print("test") 89 | 90 | 91 | def evaluate1(self): 92 | pred_ratings_10 = {} 93 | pred_ratings_50 = {} 94 | pred_ratings = {} 95 | ranked_list = {} 96 | p_at_5 = [] 97 | hr_at_50 = [] 98 | r_at_5 = [] 99 | r_at_10 = [] 100 | map = [] 101 | mrr = [] 102 | auc = [] 103 | ndcg = [] 104 | ndcg_at_5 = [] 105 | ndcg_at_10 = [] 106 | start_time = time.time() 107 | user_factors, seq_factors, user_factors_2, _ = self.getUserParam(self.test_users) 108 | item_factors_1, item_factors_2, bias_item = self.getItemParam(np.expand_dims(np.arange(self.num_item), axis=1)) 109 | 110 | # 111 | print(np.shape(user_factors)) 112 | # print(np.shape(seq_factors)) 113 | print(np.shape(item_factors_1)) 114 | # print(np.shape(item_factors_2)) 115 | # print(type(user_factors)) 116 | # print(type(seq_factors)) 117 | # print(type(item_factors_1)) 118 | # print(type(item_factors_2)) 119 | 120 | # print(np.shape(user_factors[:,None]- item_factors_1)) 121 | # print(np.shape(bias_item)) 122 | results = - self.alpha * np.sum((user_factors[:,None]- item_factors_1)**2, axis=2) \ 123 | - (1- self.alpha) * np.sum((seq_factors[:,None] - item_factors_2)**2, axis=2) 124 | #results = - np.sum((seq_factors[:,None] + user_factors[:,None]- item_factors_2)**2, axis=2) 125 | 126 | #- np.reshape(bias_item, [ np.shape(bias_item)[1], np.shape(bias_item)[0]]) 127 | # print(np.shape(results)) 128 | # print(time.time() - start_time) 129 | for u in self.test_users: 130 | user_ids = [] 131 | user_neg_items = self.neg_items[u] 132 | item_ids = [] 133 | scores = [] 134 | 135 | 136 | for j in user_neg_items: 137 | item_ids.append(j) 138 | user_ids.append(u) 139 | 140 | 141 | scores.append(results[u, j]) 142 | 143 | 144 | #scores = self.predict(user_ids, item_ids) 145 | #print(np.shape(scores)) 146 | #print( scores) 147 | # 148 | 149 | # print(type(scores)) 150 | # print(scores) 151 | # print(np.shape(scores)) 152 | # print(ratings) 153 | neg_item_index = list(zip(item_ids, scores)) 154 | 155 | ranked_list[u] = sorted(neg_item_index, key=lambda tup: tup[1], reverse=True) 156 | 157 | # print(ranked_list[u]) 158 | pred_ratings[u] = [r[0] for r in ranked_list[u]] 159 | pred_ratings_50[u] = pred_ratings[u][:50] 160 | 161 | 162 | hr, _, _ = precision_recall_ndcg_at_k(50, pred_ratings_50[u], self.test_data[u]) 163 | # if hr > 0: 164 | # print(u) 165 | # print(self.test_sequences[u, :]) 166 | # print( self.test_data[u]) 167 | # print(seq_weights[u]) 168 | auc_t, mrr_t, _ = map_mrr_ndcg(pred_ratings[u], self.test_data[u]) 169 | 170 | hr_at_50.append(hr) 171 | mrr.append(mrr_t) 172 | auc.append(auc_t) 173 | print(np.sum(hr_at_50)) 174 | print("------------------------") 175 | print("HR@50:" + str(np.mean(hr_at_50))) 176 | print("MRR:" + str(np.mean(mrr))) 177 | print("AUC:" + str(np.mean(auc))) 178 | 179 | def evaluate_caser(self): 180 | pred_ratings_10 = {} 181 | pred_ratings_50 = {} 182 | pred_ratings = {} 183 | ranked_list = {} 184 | hr_at_50 = [] 185 | p_at_10 = [] 186 | r_at_5 = [] 187 | r_at_10 = [] 188 | map = [] 189 | mrr = [] 190 | ndcg = [] 191 | ndcg_at_5 = [] 192 | ndcg_at_10 = [] 193 | all_users = np.arange(500) 194 | 195 | user_factors = self.getUserParam(self.test_users) 196 | item_factors, bias_item = self.getItemParam(np.expand_dims(np.arange(self.num_item), axis=1)) 197 | 198 | # 199 | 200 | # print(np.shape(item_factors_1)) 201 | # print(np.shape(item_factors_2)) 202 | # print(type(user_factors)) 203 | # print(type(seq_factors)) 204 | # print(type(item_factors_1)) 205 | # print(type(item_factors_2)) 206 | 207 | # print(np.shape(user_factors[:,None]- item_factors_1)) 208 | # print( bias_item[0] ) 209 | #res = tf.reduce_sum(tf.multiply(x, self.w_items), 2) + self.b_items 210 | 211 | # print(np.shape(bias_item)) 212 | # print(np.shape( np.dot(user_factors, item_factors.T))) 213 | results = np.dot(user_factors, item_factors.T) + bias_item 214 | 215 | # - self.alpha * np.sum((user_factors[:, None] - item_factors_1) ** 2, axis=2) \ 216 | # - (1 - self.alpha) * np.sum((seq_factors[:, None] - item_factors_2) ** 2, axis=2) 217 | 218 | for u in self.test_users:#all_users:# 219 | user_ids = [] 220 | user_neg_items = self.neg_items[u] # self.all_items 221 | item_ids = [] 222 | scores = [] 223 | for j in user_neg_items: 224 | item_ids.append(j) 225 | user_ids.append(u) 226 | scores.append(results[u, j]) 227 | #scores = self.predict(user_ids, item_ids) 228 | # print(type(scores)) 229 | # print(scores) 230 | # print(np.shape(scores)) 231 | # print(ratings) 232 | neg_item_index = list(zip(item_ids, scores)) 233 | 234 | ranked_list[u] = sorted(neg_item_index, key=lambda tup: tup[1], reverse=True) 235 | 236 | # print(ranked_list[u]) 237 | pred_ratings[u] = [r[0] for r in ranked_list[u]] 238 | pred_ratings_50[u] = pred_ratings[u][:50] 239 | 240 | hr, _, _ = precision_recall_ndcg_at_k(50, pred_ratings_50[u], self.test_data[u]) 241 | _, mrr_t, _ = map_mrr_ndcg(pred_ratings[u], self.test_data[u]) 242 | 243 | hr_at_50.append(hr) 244 | mrr.append(mrr_t) 245 | print(np.sum(hr_at_50)) 246 | print("------------------------") 247 | print("HR@50:" + str(np.mean(hr_at_50))) 248 | print("MRR:" + str(np.mean(mrr))) 249 | 250 | def evaluate(self): 251 | pred_ratings_10 = {} 252 | pred_ratings_50 = {} 253 | pred_ratings = {} 254 | ranked_list = {} 255 | hr_at_50 = [] 256 | p_at_10 = [] 257 | r_at_5 = [] 258 | r_at_10 = [] 259 | map = [] 260 | mrr = [] 261 | ndcg = [] 262 | ndcg_at_5 = [] 263 | ndcg_at_10 = [] 264 | all_users = np.arange(500) 265 | for u in self.test_users:#all_users:# 266 | user_ids = [] 267 | user_neg_items = self.neg_items[u] # self.all_items 268 | item_ids = [] 269 | #scores = [] 270 | for j in user_neg_items: 271 | item_ids.append(j) 272 | user_ids.append(u) 273 | 274 | scores = self.predict(user_ids, item_ids) 275 | # print(type(scores)) 276 | # print(scores) 277 | # print(np.shape(scores)) 278 | # print(ratings) 279 | neg_item_index = list(zip(item_ids, scores)) 280 | 281 | ranked_list[u] = sorted(neg_item_index, key=lambda tup: tup[1], reverse=True) 282 | 283 | # print(ranked_list[u]) 284 | pred_ratings[u] = [r[0] for r in ranked_list[u]] 285 | pred_ratings_50[u] = pred_ratings[u][:50] 286 | 287 | hr, _, _ = precision_recall_ndcg_at_k(50, pred_ratings_50[u], self.test_data[u]) 288 | _, mrr_t, _ = map_mrr_ndcg(pred_ratings[u], self.test_data[u]) 289 | 290 | hr_at_50.append(hr) 291 | mrr.append(mrr_t) 292 | print(np.sum(hr_at_50)) 293 | print("------------------------") 294 | print("HR@50:" + str(np.mean(hr_at_50))) 295 | print("MRR:" + str(np.mean(mrr))) 296 | -------------------------------------------------------------------------------- /utils/evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/evaluation/__init__.py -------------------------------------------------------------------------------- /utils/evaluation/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/evaluation/__init__.pyc -------------------------------------------------------------------------------- /utils/evaluation/__pycache__/RankingMetrics.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/evaluation/__pycache__/RankingMetrics.cpython-36.pyc -------------------------------------------------------------------------------- /utils/evaluation/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/evaluation/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/load_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/load_data/__init__.py -------------------------------------------------------------------------------- /utils/load_data/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/load_data/__init__.pyc -------------------------------------------------------------------------------- /utils/load_data/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/load_data/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /utils/load_data/__pycache__/load_data_my.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/load_data/__pycache__/load_data_my.cpython-36.pyc -------------------------------------------------------------------------------- /utils/load_data/__pycache__/load_data_ranking.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/load_data/__pycache__/load_data_ranking.cpython-36.pyc -------------------------------------------------------------------------------- /utils/load_data/load_data_content.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from sklearn.model_selection import train_test_split 5 | from scipy.sparse import csr_matrix 6 | 7 | features = {} 8 | 9 | def load_data_fm(path=""): 10 | 11 | train_file = "../Data/frappe/frappe.train.libfm" 12 | test_file = "../Data/frappe/frappe.test.libfm" 13 | 14 | 15 | count_num_feature_field(train_file) 16 | count_num_feature_field(test_file) 17 | features_M = len(features) 18 | 19 | train_data = read_data(train_file) 20 | test_data = read_data(test_file) 21 | 22 | return train_data, test_data, features_M 23 | 24 | 25 | def count_num_feature_field(file): 26 | f = open(file) 27 | line = f.readline() 28 | i = len(features) 29 | while line: 30 | elements = line.strip().split(' ') 31 | for e in elements[1:]: 32 | if e not in features: 33 | features[e] = i 34 | i = i + 1 35 | line = f.readline() 36 | f.close() 37 | 38 | def read_data(file): 39 | f = open(file) 40 | X = [] 41 | Y = [] 42 | 43 | line = f.readline() 44 | while line: 45 | elements = line.strip().split(' ') 46 | Y.append([float(elements[0])]) 47 | X.append([ features[e] for e in elements[1:]]) 48 | line = f.readline() 49 | f.close() 50 | Data_Dict = {} 51 | Data_Dict['Y'] = Y 52 | Data_Dict['X'] = X 53 | return Data_Dict 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /utils/load_data/load_data_ranking.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from sklearn.model_selection import train_test_split 5 | from scipy.sparse import csr_matrix 6 | 7 | 8 | def load_data_all(path="../data/ml100k/movielens_100k.dat", header=['user_id', 'item_id', 'rating', 'time'], 9 | test_size=0.2, sep="\t"): 10 | df = pd.read_csv(path, sep=sep, names=header, engine='python') 11 | 12 | n_users = df.user_id.unique().shape[0] 13 | n_items = df.item_id.unique().shape[0] 14 | 15 | train_data, test_data = train_test_split(df, test_size=test_size) 16 | train_data = pd.DataFrame(train_data) 17 | test_data = pd.DataFrame(test_data) 18 | 19 | train_row = [] 20 | train_col = [] 21 | train_rating = [] 22 | 23 | train_dict = {} 24 | for line in train_data.itertuples(): 25 | u = line[1] - 1 26 | i = line[2] - 1 27 | train_dict[(u, i)] = 1 28 | 29 | for u in range(n_users): 30 | for i in range(n_items): 31 | train_row.append(u) 32 | train_col.append(i) 33 | if (u, i) in train_dict.keys(): 34 | train_rating.append(1) 35 | else: 36 | train_rating.append(0) 37 | train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items)) 38 | all_items = set(np.arange(n_items)) 39 | 40 | neg_items = {} 41 | train_interaction_matrix = [] 42 | for u in range(n_users): 43 | neg_items[u] = list(all_items - set(train_matrix.getrow(u).nonzero()[1])) 44 | train_interaction_matrix.append(list(train_matrix.getrow(u).toarray()[0])) 45 | 46 | test_row = [] 47 | test_col = [] 48 | test_rating = [] 49 | for line in test_data.itertuples(): 50 | test_row.append(line[1] - 1) 51 | test_col.append(line[2] - 1) 52 | test_rating.append(1) 53 | test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items)) 54 | 55 | test_dict = {} 56 | for u in range(n_users): 57 | test_dict[u] = test_matrix.getrow(u).nonzero()[1] 58 | 59 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 60 | 61 | return train_interaction_matrix, test_dict, n_users, n_items 62 | 63 | 64 | def load_data_neg(path="../data/ml100k/movielens_100k.dat", header=['user_id', 'item_id', 'rating', 'category'], 65 | test_size=0.2, sep="\t"): 66 | df = pd.read_csv(path, sep=sep, names=header, engine='python') 67 | 68 | n_users = df.user_id.unique().shape[0] 69 | n_items = df.item_id.unique().shape[0] 70 | 71 | train_data, test_data = train_test_split(df, test_size=test_size) 72 | train_data = pd.DataFrame(train_data) 73 | test_data = pd.DataFrame(test_data) 74 | 75 | train_row = [] 76 | train_col = [] 77 | train_rating = [] 78 | 79 | for line in train_data.itertuples(): 80 | u = line[1] - 1 81 | i = line[2] - 1 82 | train_row.append(u) 83 | train_col.append(i) 84 | train_rating.append(1) 85 | train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items)) 86 | 87 | # all_items = set(np.arange(n_items)) 88 | # neg_items = {} 89 | # for u in range(n_users): 90 | # neg_items[u] = list(all_items - set(train_matrix.getrow(u).nonzero()[1])) 91 | 92 | test_row = [] 93 | test_col = [] 94 | test_rating = [] 95 | for line in test_data.itertuples(): 96 | test_row.append(line[1] - 1) 97 | test_col.append(line[2] - 1) 98 | test_rating.append(1) 99 | test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items)) 100 | 101 | test_dict = {} 102 | for u in range(n_users): 103 | test_dict[u] = test_matrix.getrow(u).nonzero()[1] 104 | 105 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 106 | return train_matrix.todok(), test_dict, n_users, n_items 107 | 108 | 109 | def load_data_separately(path_train=None, path_test=None, path_val=None, header=['user_id', 'item_id', 'rating'], 110 | sep=" ", n_users=0, n_items=0): 111 | n_users = n_users 112 | n_items = n_items 113 | print("start") 114 | train_matrix = None 115 | if path_train is not None: 116 | train_data = pd.read_csv(path_train, sep=sep, names=header, engine='python') 117 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 118 | 119 | train_row = [] 120 | train_col = [] 121 | train_rating = [] 122 | 123 | for line in train_data.itertuples(): 124 | u = line[1] # - 1 125 | i = line[2] # - 1 126 | train_row.append(u) 127 | train_col.append(i) 128 | train_rating.append(1) 129 | 130 | train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items)) 131 | 132 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 133 | test_dict = None 134 | if path_test is not None: 135 | test_data = pd.read_csv(path_test, sep=sep, names=header, engine='python') 136 | test_row = [] 137 | test_col = [] 138 | test_rating = [] 139 | for line in test_data.itertuples(): 140 | test_row.append(line[1]) 141 | i = line[2] # - 1 142 | test_col.append(i) 143 | test_rating.append(1) 144 | 145 | test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items)) 146 | 147 | test_dict = {} 148 | for u in range(n_users): 149 | test_dict[u] = test_matrix.getrow(u).nonzero()[1] 150 | all_items = set(np.arange(n_items)) 151 | train_interaction_matrix = [] 152 | for u in range(n_users): 153 | train_interaction_matrix.append(list(train_matrix.getrow(u).toarray()[0])) 154 | 155 | if path_val is not None: 156 | val_data = pd.read_csv(path_val, sep=sep, names=header, engine='python') 157 | 158 | print("end") 159 | return train_interaction_matrix, test_dict, n_users, n_items 160 | -------------------------------------------------------------------------------- /utils/load_data/load_data_ranking.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/load_data/load_data_ranking.pyc -------------------------------------------------------------------------------- /utils/load_data/load_data_rating.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from sklearn.model_selection import train_test_split 5 | from scipy.sparse import csr_matrix 6 | 7 | 8 | def load_data_rating(path="../data/ml100k/movielens_100k.dat", header=['user_id', 'item_id', 'rating', 'category'], 9 | test_size=0.1, sep="\t"): 10 | ''' 11 | Loading the data for rating prediction task 12 | :param path: the path of the dataset, datasets should be in the CSV format 13 | :param header: the header of the CSV format, the first three should be: user_id, item_id, rating 14 | :param test_size: the test ratio, default 0.1 15 | :param sep: the seperator for csv colunms, defalut space 16 | :return: 17 | ''' 18 | 19 | df = pd.read_csv(path, sep=sep, names=header, engine='python') 20 | 21 | n_users = df.user_id.unique().shape[0] 22 | n_items = df.item_id.unique().shape[0] 23 | 24 | train_data, test_data = train_test_split(df, test_size=test_size) 25 | train_data = pd.DataFrame(train_data) 26 | test_data = pd.DataFrame(test_data) 27 | 28 | train_row = [] 29 | train_col = [] 30 | train_rating = [] 31 | 32 | for line in train_data.itertuples(): 33 | u = line[1] - 1 34 | i = line[2] - 1 35 | train_row.append(u) 36 | train_col.append(i) 37 | train_rating.append(line[3]) 38 | train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items)) 39 | 40 | test_row = [] 41 | test_col = [] 42 | test_rating = [] 43 | for line in test_data.itertuples(): 44 | test_row.append(line[1] - 1) 45 | test_col.append(line[2] - 1) 46 | test_rating.append(line[3]) 47 | test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items)) 48 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 49 | return train_matrix.todok(), test_matrix.todok(), n_users, n_items 50 | -------------------------------------------------------------------------------- /utils/load_data/load_data_seq.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from sklearn.model_selection import train_test_split 5 | from scipy.sparse import csr_matrix 6 | 7 | 8 | class DataSet(): 9 | class SeqData(): 10 | 11 | def __init__(self, user_ids, sequences, targets=None): 12 | self.user_ids = user_ids 13 | self.sequences = sequences 14 | self.targets = targets 15 | self.L = sequences.shape[1] 16 | self.T = None 17 | 18 | if np.any(targets): 19 | self.T = targets.shape[1] 20 | 21 | def __init__(self, path="../../Data/ml1m/seq/train.txt", header=['user', 'item', 'rating'], sep=" ", seq_len=1, 22 | target_len=1, isTrain=False, user_map=None, 23 | item_map=None, num_users=None, num_items=None): 24 | self.path = path 25 | self.header = header 26 | self.sep = sep 27 | self.seq_len = seq_len 28 | self.target_len = target_len 29 | self.isTrain = isTrain 30 | 31 | if not user_map and not item_map: 32 | user_map = dict() 33 | item_map = dict() 34 | 35 | self.num_user = 0 36 | self.num_item = 0 37 | else: 38 | self.num_user = len(user_map) 39 | self.num_item = len(item_map) 40 | 41 | # TODO: 1. remove cold start user with less than 5 items; 42 | 43 | # TODO: 2.split the data into 70-10-20 based on the timestamp 44 | 45 | df_train = pd.read_csv(self.path, sep=self.sep, names=self.header) 46 | ''' 47 | if not num_users and not num_items: 48 | n_users = df_train.user.unique().shape[0] 49 | n_items = df_train.item.unique().shape[0] 50 | else: 51 | n_users = num_users #7390 #43117 #df_train.user.unique().shape[0] 52 | n_items = num_items #10159#26018 #df_train.item.unique().shape[0] 53 | 54 | print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 55 | ''' 56 | train_data = pd.DataFrame(df_train) 57 | 58 | self.user_ids = list() 59 | self.item_ids = list() 60 | 61 | train_row = [] 62 | train_col = [] 63 | train_rating = [] 64 | 65 | for line in train_data.itertuples(): 66 | self.user_ids.append(line[1]) 67 | self.item_ids.append(line[2]) 68 | 69 | for u in self.user_ids: 70 | if u not in user_map: 71 | user_map[u] = self.num_user 72 | self.num_user += 1 73 | 74 | for i in self.item_ids: 75 | if i not in item_map: 76 | item_map[i] = self.num_item 77 | self.num_item += 1 78 | if num_users and num_items: 79 | self.num_user = num_users 80 | self.num_item = num_items 81 | print("....Load data finished. Number of users:", self.num_user, "Number of items:", self.num_item) 82 | self.user_map = user_map 83 | self.item_map = item_map 84 | 85 | self.user_ids = np.array([self.user_map[u] for u in self.user_ids]) 86 | self.item_ids = np.array([self.item_map[i] for i in self.item_ids]) 87 | print(len(self.item_ids)) 88 | 89 | if isTrain: 90 | self.load_data_seq() 91 | # else: 92 | # self.num_item += 1 93 | 94 | def load_data_seq(self): 95 | for k, v in self.item_map.items(): 96 | self.item_map[k] = v + 1 97 | self.item_ids = self.item_ids + 1 98 | self.num_item += 1 99 | 100 | max_seq_len = self.seq_len + self.target_len 101 | 102 | sort_indices = np.lexsort((self.user_ids,)) 103 | 104 | u_ids = self.user_ids[sort_indices] 105 | i_ids = self.item_ids[sort_indices] 106 | 107 | u_ids, indices, counts = np.unique(u_ids, return_index=True, return_counts=True) 108 | 109 | num_subsequences = sum([c - max_seq_len + 1 if c >= max_seq_len else 1 for c in counts]) 110 | 111 | sequences = np.zeros((num_subsequences, self.seq_len)) 112 | sequences_targets = np.zeros((num_subsequences, self.target_len)) 113 | 114 | sequence_users = np.empty(num_subsequences) 115 | 116 | test_sequences = np.zeros((self.num_user, self.seq_len)) 117 | test_users = np.empty(self.num_user) 118 | 119 | _uid = None 120 | # print(u_ids) 121 | # print(len(i_ids)) 122 | for i, (uid, item_seq) in enumerate(self._generate_sequences(u_ids, 123 | i_ids, 124 | indices, 125 | max_seq_len)): 126 | if uid != _uid: 127 | test_sequences[uid][:] = item_seq[-self.seq_len:] 128 | test_users[uid] = uid 129 | _uid = uid 130 | sequences_targets[i][:] = item_seq[-self.target_len:] 131 | sequences[i][:] = item_seq[:self.seq_len] 132 | sequence_users[i] = uid 133 | 134 | self.sequences = self.SeqData(sequence_users, sequences, sequences_targets) 135 | self.test_sequences = self.SeqData(test_users, test_sequences) 136 | 137 | # user_seq = [] 138 | # for i in range(len(indices)): 139 | # start_idx = indices[1] 140 | # 141 | # if i >= len(indices) - 1: 142 | # stop_idx = None 143 | # else: 144 | # stop_idx = indices[ i + 1] 145 | # 146 | # # seq = [] 147 | # tensor = i_ids[start_idx:stop_idx] 148 | # if len(tensor) - max_seq_len >= 0: 149 | # for j in range(len(tensor), 0, -1): 150 | # if j - max_seq_len >= 0: 151 | # user_seq.append((u_ids[i], tensor[j - max_seq_len:j])) 152 | # else: 153 | # break 154 | # else: 155 | # user_seq.append((u_ids[i],tensor)) 156 | # 157 | # _uid = None 158 | # for i, (uid, item_seq) in enumerate(user_seq): 159 | # if uid != _uid: 160 | # test_sequences[uid][:] = item_seq[-sequence_len:] 161 | # test_users[uid] = uid 162 | # _uid = uid 163 | # sequence_targets[i][:] = item_seq[-target_len:] 164 | # sequences[i][:] = item_seq[:sequence_len] 165 | # sequence_users[i] = uid 166 | 167 | 168 | 169 | def _sliding_window(self, tensor, window_size, step_size=1): 170 | if len(tensor) - window_size >= 0: 171 | for i in range(len(tensor), 0, -step_size): 172 | if i - window_size >= 0: 173 | yield tensor[i - window_size:i] 174 | else: 175 | break 176 | else: 177 | yield tensor 178 | 179 | def _generate_sequences(self, user_ids, item_ids, 180 | indices, 181 | max_sequence_length): 182 | for i in range(len(indices)): 183 | 184 | start_idx = indices[i] 185 | 186 | if i >= len(indices) - 1: 187 | stop_idx = None 188 | else: 189 | stop_idx = indices[i + 1] 190 | 191 | for seq in self._sliding_window(item_ids[start_idx:stop_idx], 192 | max_sequence_length): 193 | yield (user_ids[i], seq) 194 | 195 | def tocsr(self): 196 | 197 | row = self.user_ids 198 | col = self.item_ids 199 | data = np.ones(len(row)) 200 | 201 | return csr_matrix((data, (row, col)), shape=(self.num_user, self.num_item)) 202 | 203 | 204 | 205 | 206 | 207 | # 208 | # for u in range(n_users): 209 | # for i in range(n_items): 210 | # train_row.append(u) 211 | # train_col.append(i) 212 | # if (u, i) in train_dict.keys(): 213 | # train_rating.append(1) 214 | # else: 215 | # train_rating.append(0) 216 | # 217 | # all_items = set(np.arange(n_items)) 218 | # 219 | # neg_items = {} 220 | # train_interaction_matrix = [] 221 | # for u in range(n_users): 222 | # neg_items[u] = list(all_items - set(train_matrix.getrow(u).nonzero()[1])) 223 | # train_interaction_matrix.append(list(train_matrix.getrow(u).toarray()[0])) 224 | # 225 | # test_row = [] 226 | # test_col = [] 227 | # test_rating = [] 228 | # for line in test_data.itertuples(): 229 | # test_row.append(line[1] - 1) 230 | # test_col.append(line[2] - 1) 231 | # test_rating.append(1) 232 | # test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items)) 233 | # 234 | # test_dict = {} 235 | # for u in range(n_users): 236 | # test_dict[u] = test_matrix.getrow(u).nonzero()[1] 237 | # 238 | # print("Load data finished. Number of users:", n_users, "Number of items:", n_items) 239 | # 240 | # 241 | # 242 | # num_subsequences = sum([ c - max]) 243 | # 244 | # return train_interaction_matrix, test_dict, n_users, n_items 245 | 246 | # def remove_cold_start_user(): 247 | # print() 248 | # 249 | # 250 | # def train_valid_test_split(time_order=True): 251 | # # if time_order: 252 | # 253 | # print() 254 | 255 | # if __name__ == '__main__': 256 | # 257 | 258 | def data_preprocess(path, path_save, sep="\t", header = ['user_id', 'item_id', 'rating', 'timestampe']): 259 | 260 | #TODO: leave the recent one for test, seperately the data into two parts. 261 | df = pd.read_csv(path, sep=sep, names=header, engine='python') 262 | test_items = {} 263 | n_users = df.user_id.unique().shape[0] # 943 # 6040 #.user_id.unique().shape[0] 264 | n_items = df.item_id.unique().shape[0] # 1682 # 3952 ##df.item_id.unique().shape[0] 265 | print("Number of users: %d; Number of items: %d;" % (n_users, n_items)) 266 | train_items = {} 267 | user_set = set() 268 | for line in df.itertuples(): 269 | u = line[1] 270 | i = line[2] 271 | user_set.add(u) 272 | train_items.setdefault(u, []).append((u, i, line[3],line[4])) 273 | if u not in test_items: 274 | test_items[u] = (i, line[3], line[4]) 275 | else: 276 | if test_items[u][2] < line[4]: 277 | test_items[u] = (i, line[3], line[4]) 278 | 279 | 280 | test_data = [(key, value[0], value[1], value[2]) for key, value in test_items.items()] 281 | test_data_map = {} 282 | for i in range(len(test_data)): 283 | test_data_map[test_data[i][0]] = test_data[i] 284 | 285 | test_file = open(path_save+"test.dat", 'a', encoding='utf-8') 286 | test_writer = csv.writer(test_file, delimiter='\t', lineterminator='\n', quoting=csv.QUOTE_MINIMAL) 287 | for i in test_data: 288 | #test_writer.writerow([i[0] - 1 , i[1]-1 , i[2]]) 289 | test_writer.writerow([i[0] , i[1] , i[2], i[3]]) 290 | 291 | train_file = open(path_save+"train.dat", 'a', encoding='utf-8') 292 | train_writer = csv.writer(train_file, delimiter='\t', lineterminator='\n', quoting=csv.QUOTE_MINIMAL) 293 | 294 | for u in user_set: 295 | sorted_items = sorted(train_items[u ], key=lambda tup: tup[3], reverse=False) 296 | #print(sorted_items) 297 | 298 | for i in sorted_items: 299 | #print(test_data[u]) 300 | #print(sorted_items[i]) 301 | #print(u) 302 | if i != test_data_map[u]: 303 | train_writer.writerow([u , i[1], i[2], i[3]]) 304 | -------------------------------------------------------------------------------- /utils/log/Log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A-bone1/ESAM/6c96a8c860f23bf2bae9091be5a20d889c293f95/utils/log/Log.py --------------------------------------------------------------------------------