├── base
    ├── __init__.py
    ├── seq_recommender.py
    ├── ssl_interface.py
    ├── torch_interface.py
    ├── tf_interface.py
    ├── recommender.py
    └── graph_recommender.py
├── conf
    ├── __init__.py
    └── CPTPP.conf
├── data
    ├── __init__.py
    ├── feature.py
    ├── data.py
    ├── sequence.py
    ├── graph.py
    ├── augmentor.py
    ├── loader.py
    ├── social.py
    └── ui_graph.py
├── dataset
    ├── gowalla
    │   └── process.py
    ├── douban
    │   └── split.py
    └── ml-1M
    │   └── split.py
├── README.MD
├── util
    ├── logger.py
    ├── loss_tf.py
    ├── loss_torch.py
    ├── structure.py
    ├── conf.py
    ├── sampler.py
    ├── algorithm.py
    └── evaluation.py
├── main.py
├── SELFRec.py
└── model
    └── graph
        └── CPTPP.py


/base/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/conf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/feature.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/base/seq_recommender.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/base/ssl_interface.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/data.py:
--------------------------------------------------------------------------------
 1 | class Data(object):
 2 |     def __init__(self, conf, training, test):
 3 |         self.config = conf
 4 |         self.training_data = training[:]
 5 |         self.test_data = test[:]
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/dataset/gowalla/process.py:
--------------------------------------------------------------------------------
1 | record = []
2 | with open('train.txt') as f:
3 |     for line in f:
4 |         items = line.strip().split()
5 |         for i in items[1:]:
6 |             record.append(items[0]+' '+i+' 1\n')
7 | with open('train.txt','w') as f:
8 |     f.writelines(record)


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
1 | The source codes take [SELFRec](https://github.com/Coder-Yu/SELFRec) as the backbone to implement baselines and our proposed method. Please follow the detailed instructions in SELFRec to run the codes. The hyper-parameter settings are provided in our paper. 
2 | 
3 | Please cite both our work and [SELFRec](https://github.com/Coder-Yu/SELFRec) if you would like to use our source codes.


--------------------------------------------------------------------------------
/base/torch_interface.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class TorchGraphInterface(object):
 4 |     def __init__(self):
 5 |         pass
 6 | 
 7 |     @staticmethod
 8 |     def convert_sparse_mat_to_tensor(X):
 9 |         coo = X.tocoo()
10 |         i = torch.LongTensor([coo.row, coo.col])
11 |         v = torch.from_numpy(coo.data).float()
12 |         return torch.sparse.FloatTensor(i, v, coo.shape)


--------------------------------------------------------------------------------
/dataset/douban/split.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | random.seed(12345)
 3 | train = []
 4 | test = []
 5 | test_ratio=0.2
 6 | with open('ratings.txt') as f:
 7 |     for line in f:
 8 |         items = line.strip().split()
 9 |         if random.random()>test_ratio:
10 |             train.append(line)
11 |         else:
12 |             test.append(line)
13 | 
14 | with open('train.txt','w') as f:
15 |     f.writelines(train)
16 | 
17 | with open('test.txt','w') as f:
18 |     f.writelines(test)
19 | 
20 | 


--------------------------------------------------------------------------------
/dataset/ml-1M/split.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | train = []
 3 | test = []
 4 | with open('ratings.dat') as f:
 5 |     for line in f:
 6 |         items = line.strip().split('::')
 7 |         new_line = ' '.join(items[:-1])+'\n'
 8 |         if int(items[-2])<4:
 9 |             continue
10 |         if random.random() > 0.2:
11 |             train.append(new_line)
12 |         else:
13 |             test.append(new_line)
14 | 
15 | with open('train.txt','w') as f:
16 |     f.writelines(train)
17 | 
18 | with open('test.txt','w') as f:
19 |     f.writelines(test)


--------------------------------------------------------------------------------
/util/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | 
 5 | class Log(object):
 6 |     def __init__(self,module,filename):
 7 |         self.logger = logging.getLogger(module)
 8 |         self.logger.setLevel(level=logging.INFO)
 9 |         if not os.path.exists('./log/'):
10 |             os.makedirs('./log/')
11 |         handler = logging.FileHandler('./log/'+filename+'.log')
12 |         formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13 |         handler.setFormatter(formatter)
14 |         self.logger.addHandler(handler)
15 | 
16 |     def add(self,text):
17 |         self.logger.info(text)
18 | 


--------------------------------------------------------------------------------
/base/tf_interface.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | 
 5 | class TFGraphInterface(object):
 6 |     def __init__(self):
 7 |         pass
 8 | 
 9 |     @staticmethod
10 |     def convert_sparse_mat_to_tensor(adj):
11 |         row, col = adj.nonzero()
12 |         indices = np.array(list(zip(row, col)))
13 |         adj_tensor = tf.SparseTensor(indices=indices, values=adj.data, dense_shape=adj.shape)
14 |         return adj_tensor
15 | 
16 |     @staticmethod
17 |     def convert_sparse_mat_to_tensor_inputs(X):
18 |         coo = X.tocoo()
19 |         indices = np.mat([coo.row, coo.col]).transpose()
20 |         return indices, coo.data, coo.shape


--------------------------------------------------------------------------------
/data/sequence.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from data.data import Data
 3 | 
 4 | 
 5 | class Sequence(Data):
 6 |     def __init__(self, conf, training, test):
 7 |         super(Sequence, self).__init__(conf, training, test)
 8 |         self.item = {}
 9 |         self.id2item = {}
10 |         self.__generate_set()
11 |         self.raw_seq_num = len(self.training_data)
12 |         self.item_num = len(self.item)
13 | 
14 |     def __generate_set(self):
15 |         for seq in self.training_data:
16 |             for item in seq:
17 |                 if item not in self.item:
18 |                     self.item[item] = len(self.item)
19 |                     self.id2item[self.item[item]] = item
20 | 
21 | 
22 | 
23 |     def get_item_id(self, i):
24 |         if i in self.item:
25 |             return self.item[i]
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/data/graph.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.sparse as sp
 3 | 
 4 | 
 5 | class Graph(object):
 6 |     def __init__(self):
 7 |         pass
 8 | 
 9 |     @staticmethod
10 |     def normalize_graph_mat(adj_mat):
11 |         shape = adj_mat.get_shape()
12 |         rowsum = np.array(adj_mat.sum(1))
13 |         if shape[0] == shape[1]:
14 |             d_inv = np.power(rowsum, -0.5).flatten()
15 |             d_inv[np.isinf(d_inv)] = 0.
16 |             d_mat_inv = sp.diags(d_inv)
17 |             norm_adj_tmp = d_mat_inv.dot(adj_mat)
18 |             norm_adj_mat = norm_adj_tmp.dot(d_mat_inv)
19 |         else:
20 |             d_inv = np.power(rowsum, -1).flatten()
21 |             d_inv[np.isinf(d_inv)] = 0.
22 |             d_mat_inv = sp.diags(d_inv)
23 |             norm_adj_mat = d_mat_inv.dot(adj_mat)
24 |         return norm_adj_mat
25 | 
26 |     def convert_to_laplacian_mat(self, adj_mat):
27 |         pass
28 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from SELFRec import SELFRec
 2 | from util.conf import ModelConf
 3 | 
 4 | if __name__ == '__main__':
 5 |     # Register your model here
 6 |     baseline = ['LightGCN','MF']
 7 |     graph_models = ['SGL', 'SimGCL', 'BUIR', 'SelfCF', 'NCL', 'CPTPP']
 8 |     sequential_models = []
 9 | 
10 |     print('=' * 80)
11 |     print('   SELFRec: A library for self-supervised recommendation.   ')
12 |     print('=' * 80)
13 | 
14 |     print('Baseline Models:')
15 |     print('   '.join(baseline))
16 |     print('-' * 80)
17 |     print('Graph-Based Models:')
18 |     print('   '.join(graph_models))
19 | 
20 |     print('=' * 80)
21 |     model = input('Please enter the model you want to run:')
22 |     import time
23 | 
24 |     s = time.time()
25 |     if model in baseline or model in graph_models or model in sequential_models:
26 |         conf = ModelConf('./conf/' + model + '.conf')
27 |     else:
28 |         print('Wrong model name!')
29 |         exit(-1)
30 |     rec = SELFRec(conf)
31 |     rec.execute()
32 |     e = time.time()
33 |     print("Running time: %f s" % (e - s))
34 | 


--------------------------------------------------------------------------------
/conf/CPTPP.conf:
--------------------------------------------------------------------------------
 1 | # training.set=./dataset/ml-1M/train.txt
 2 | # test.set=./dataset/ml-1M/test.txt
 3 | # model.name=CPTPP
 4 | # model.type=graph
 5 | # item.ranking=-topN 5,20
 6 | # embbedding.size=64
 7 | # num.max.preepoch=10
 8 | # num.max.epoch=100
 9 | # batch_size=512
10 | # learnRate=0.003
11 | # reg.lambda=0.0001
12 | # CPTPP=-n_layer 2 -lambda 0.1 -droprate 0.1 -augtype 1 -temp 0.2 -inputs_type 2 -prompt_size 256 
13 | # output.setup=-dir ./results/
14 | 
15 | 
16 | # training.set=./dataset/douban/train.txt
17 | # test.set=./dataset/douban/test.txt
18 | # model.name=CPTPP
19 | # model.type=graph
20 | # item.ranking=-topN 5,20
21 | # embbedding.size=64
22 | # num.max.preepoch=10
23 | # num.max.epoch=100
24 | # batch_size=512
25 | # learnRate=0.001
26 | # reg.lambda=0.0001
27 | # CPTPP=-n_layer 2 -lambda 0.1 -droprate 0.1 -augtype 1 -temp 0.2 -inputs_type 2 -prompt_size 256 
28 | # output.setup=-dir ./results/
29 | 
30 | 
31 | training.set=./dataset/gowalla/train.txt
32 | test.set=./dataset/gowalla/test.txt
33 | model.name=CPTPP
34 | model.type=graph
35 | item.ranking=-topN 5,20
36 | embbedding.size=64
37 | num.max.preepoch=10
38 | num.max.epoch=100
39 | batch_size=2048
40 | learnRate=0.001
41 | reg.lambda=0.0001
42 | CPTPP=-n_layer 2 -lambda 0.1 -droprate 0.1 -augtype 1 -temp 0.2 -inputs_type 2 -prompt_size 256 
43 | output.setup=-dir ./results/


--------------------------------------------------------------------------------
/util/loss_tf.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def bpr_loss(user_emb, pos_item_emb, neg_item_emb):
 5 |     score = tf.reduce_sum(tf.multiply(user_emb, pos_item_emb), 1) - tf.reduce_sum(tf.multiply(user_emb, neg_item_emb), 1)
 6 |     loss = -tf.reduce_sum(tf.log(tf.sigmoid(score) + 10e-8))
 7 |     return loss
 8 | 
 9 | 
10 | def InfoNCE(view1, view2, temperature):
11 |     pos_score = tf.reduce_sum(tf.multiply(view1, view2), axis=1)
12 |     ttl_score = tf.matmul(view1, view2, transpose_a=False, transpose_b=True)
13 |     pos_score = tf.exp(pos_score / temperature)
14 |     ttl_score = tf.reduce_sum(tf.exp(ttl_score / temperature), axis=1)
15 |     cl_loss = -tf.reduce_sum(tf.log(pos_score / ttl_score))
16 |     return cl_loss
17 | 
18 | 
19 | # Sampled Softmax
20 | def ssm_loss(user_emb, pos_item_emb, neg_item_emb):
21 |     user_emb = tf.nn.l2_normalize(user_emb, 1)
22 |     pos_item_emb = tf.nn.l2_normalize(pos_item_emb, 1)
23 |     neg_item_emb = tf.nn.l2_normalize(neg_item_emb, 1)
24 |     pos_score = tf.reduce_sum(tf.multiply(user_emb, pos_item_emb), 1)
25 |     ttl_score = tf.matmul(user_emb, neg_item_emb, transpose_a=False, transpose_b=True)
26 |     ttl_score = tf.concat([tf.reshape(pos_score, (-1, 1)), ttl_score], axis=1)
27 |     pos_score = tf.exp(pos_score / 0.2)
28 |     ttl_score = tf.reduce_sum(tf.exp(ttl_score / 0.2), axis=1)
29 |     return -tf.reduce_mean(tf.log(pos_score / ttl_score))
30 | 


--------------------------------------------------------------------------------
/SELFRec.py:
--------------------------------------------------------------------------------
 1 | from data.loader import FileIO
 2 | 
 3 | 
 4 | class SELFRec(object):
 5 |     def __init__(self, config):
 6 |         self.social_data = []
 7 |         self.feature_data = []
 8 |         self.config = config
 9 |         if config['model.type'] == 'sequential':
10 |             self.training_data, self.test_data = FileIO.load_data_set(config['sequence.data'], config['model.type'])
11 |         else:
12 |             self.training_data = FileIO.load_data_set(config['training.set'], config['model.type'])
13 |             self.test_data = FileIO.load_data_set(config['test.set'], config['model.type'])
14 | 
15 |         self.kwargs = {}
16 |         if config.contain('social.data'):
17 |             social_data = FileIO.load_social_data(self.config['social.data'])
18 |             self.kwargs['social.data'] = social_data
19 |         # if config.contains('feature.data'):
20 |         #     self.social_data = FileIO.loadFeature(config,self.config['feature.data'])
21 |         print('Reading data and preprocessing...')
22 | 
23 |     def execute(self):
24 |         # import the model module
25 |         import_str = 'from model.'+ self.config['model.type'] +'.' + self.config['model.name'] + ' import ' + self.config['model.name']
26 |         exec(import_str)
27 |         recommender = self.config['model.name'] + '(self.config,self.training_data,self.test_data,**self.kwargs)'
28 |         eval(recommender).execute()
29 | 


--------------------------------------------------------------------------------
/data/augmentor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import scipy.sparse as sp
 4 | 
 5 | class GraphAugmentor(object):
 6 |     def __init__(self):
 7 |         pass
 8 | 
 9 |     @staticmethod
10 |     def node_dropout(sp_adj, drop_rate):
11 |         """Input: a sparse adjacency matrix and a dropout rate."""
12 |         adj_shape = sp_adj.get_shape()
13 |         row_idx, col_idx = sp_adj.nonzero()
14 |         drop_user_idx = random.sample(range(adj_shape[0]), int(adj_shape[0] * drop_rate))
15 |         drop_item_idx = random.sample(range(adj_shape[1]), int(adj_shape[1] * drop_rate))
16 |         indicator_user = np.ones(adj_shape[0], dtype=np.float32)
17 |         indicator_item = np.ones(adj_shape[1], dtype=np.float32)
18 |         indicator_user[drop_user_idx] = 0.
19 |         indicator_item[drop_item_idx] = 0.
20 |         diag_indicator_user = sp.diags(indicator_user)
21 |         diag_indicator_item = sp.diags(indicator_item)
22 |         mat = sp.csr_matrix(
23 |             (np.ones_like(row_idx, dtype=np.float32), (row_idx, col_idx)),
24 |             shape=(adj_shape[0], adj_shape[1]))
25 |         mat_prime = diag_indicator_user.dot(mat).dot(diag_indicator_item)
26 |         return mat_prime
27 | 
28 |     @staticmethod
29 |     def edge_dropout(sp_adj, drop_rate):
30 |         """Input: a sparse user-item adjacency matrix and a dropout rate."""
31 |         adj_shape = sp_adj.get_shape()
32 |         edge_count = sp_adj.count_nonzero()
33 |         row_idx, col_idx = sp_adj.nonzero()
34 |         keep_idx = random.sample(range(edge_count), int(edge_count * (1 - drop_rate)))
35 |         user_np = np.array(row_idx)[keep_idx]
36 |         item_np = np.array(col_idx)[keep_idx]
37 |         edges = np.ones_like(user_np, dtype=np.float32)
38 |         dropped_adj = sp.csr_matrix((edges, (user_np, item_np)), shape=adj_shape)
39 |         return dropped_adj
40 | 
41 | 


--------------------------------------------------------------------------------
/util/loss_torch.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def bpr_loss(user_emb, pos_item_emb, neg_item_emb):
 6 |     pos_score = torch.mul(user_emb, pos_item_emb).sum(dim=1)
 7 |     neg_score = torch.mul(user_emb, neg_item_emb).sum(dim=1)
 8 |     loss = -torch.log(10e-8 + torch.sigmoid(pos_score - neg_score))
 9 |     return torch.mean(loss)
10 | 
11 | 
12 | def l2_reg_loss(reg, *args):
13 |     emb_loss = 0
14 |     for emb in args:
15 |         emb_loss += torch.norm(emb, p=2)
16 |     return emb_loss * reg
17 | 
18 | 
19 | def batch_softmax_loss(user_emb, item_emb, temperature):
20 |     user_emb, item_emb = F.normalize(user_emb, dim=1), F.normalize(item_emb, dim=1)
21 |     pos_score = (user_emb * item_emb).sum(dim=-1)
22 |     pos_score = torch.exp(pos_score / temperature)
23 |     ttl_score = torch.matmul(user_emb, item_emb.transpose(0, 1))
24 |     ttl_score = torch.exp(ttl_score / temperature).sum(dim=1)
25 |     loss = -torch.log(pos_score / ttl_score)
26 |     return torch.mean(loss)
27 | 
28 | 
29 | def InfoNCE(view1, view2, temperature):
30 |     view1, view2 = F.normalize(view1, dim=1), F.normalize(view2, dim=1)
31 |     pos_score = (view1 * view2).sum(dim=-1)
32 |     pos_score = torch.exp(pos_score / temperature)
33 |     ttl_score = torch.matmul(view1, view2.transpose(0, 1))
34 |     ttl_score = torch.exp(ttl_score / temperature).sum(dim=1)
35 |     cl_loss = -torch.log(pos_score / ttl_score)
36 |     return torch.mean(cl_loss)
37 | 
38 | 
39 | def kl_divergence(p_logit, q_logit):
40 |     p = F.softmax(p_logit, dim=-1)
41 |     kl = torch.sum(p * (F.log_softmax(p_logit, dim=-1) - F.log_softmax(q_logit, dim=-1)), 1)
42 |     return torch.mean(kl)
43 | 
44 | def js_divergence(p_logit, q_logit):
45 |     p = F.softmax(p_logit, dim=-1)
46 |     q = F.softmax(q_logit, dim=-1)
47 |     kl_p = torch.sum(p * (F.log_softmax(p_logit, dim=-1) - F.log_softmax(q_logit, dim=-1)), 1)
48 |     kl_q = torch.sum(q * (F.log_softmax(q_logit, dim=-1) - F.log_softmax(p_logit, dim=-1)), 1)
49 |     return torch.mean(kl_p+kl_q)


--------------------------------------------------------------------------------
/util/structure.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class SparseMatrix():
 5 |     def __init__(self,triple):
 6 |         self.matrix_user = {}
 7 |         self.matrix_item = {}
 8 |         for item in triple:
 9 |             if item[0] not in self.matrix_user:
10 |                 self.matrix_user[item[0]] = {}
11 |             if item[1] not in self.matrix_item:
12 |                 self.matrix_item[item[1]] = {}
13 |             self.matrix_user[item[0]][item[1]] = item[2]
14 |             self.matrix_item[item[1]][item[0]] = item[2]
15 |         self.elemNum = len(triple)
16 |         self.size = len(self.matrix_user), len(self.matrix_item)
17 | 
18 |     def row(self,r):
19 |         if r not in self.matrix_user:
20 |             return {}
21 |         else:
22 |             return self.matrix_user[r]
23 | 
24 |     def col(self,c):
25 |         if c not in self.matrix_item:
26 |             return {}
27 |         else:
28 |             return self.matrix_item[c]
29 | 
30 |     def dense_row(self,r):
31 |         if r not in self.matrix_user:
32 |             return np.zeros((1,self.size[1]))
33 |         else:
34 |             array = np.zeros((1,self.size[1]))
35 |             ind = list(self.matrix_user[r].keys())
36 |             val = list(self.matrix_user[r].values())
37 |             array[0][ind] = val
38 |             return array
39 | 
40 |     def dense_col(self,c):
41 |         if c not in self.matrix_item:
42 |             return np.zeros((1,self.size[0]))
43 |         else:
44 |             array = np.zeros((1,self.size[0]))
45 |             ind = list(self.matrix_item[c].keys())
46 |             val = list(self.matrix_item[c].values())
47 |             array[0][ind] = val
48 |             return array
49 | 
50 |     def elem(self,r,c):
51 |         if not self.contain(r,c):
52 |             return 0
53 |         return self.matrix_user[r][c]
54 | 
55 |     def contain(self,r,c):
56 |         if r in self.matrix_user and c in self.matrix_user[r]:
57 |             return True
58 |         return False
59 | 
60 |     def elem_count(self):
61 |         return self.elemNum
62 | 
63 |     def size(self):
64 |         return self.size
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/data/loader.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | from os import remove
 3 | from re import split
 4 | 
 5 | 
 6 | class FileIO(object):
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     @staticmethod
11 |     def write_file(dir, file, content, op='w'):
12 |         if not os.path.exists(dir):
13 |             os.makedirs(dir)
14 |         with open(dir + file, op) as f:
15 |             f.writelines(content)
16 | 
17 |     @staticmethod
18 |     def delete_file(file_path):
19 |         if os.path.exists(file_path):
20 |             remove(file_path)
21 | 
22 |     @staticmethod
23 |     def load_data_set(file, dtype):
24 |         data = []
25 |         if dtype == 'graph':
26 |             with open(file) as f:
27 |                 for line in f:
28 |                     items = split(' ', line.strip())
29 |                     user_id = items[0]
30 |                     item_id = items[1]
31 |                     weight = items[2]
32 |                     data.append([user_id, item_id, float(weight)])
33 | 
34 |         if dtype == 'sequential':
35 |             training_data, test_data = [], []
36 |             with open(file) as f:
37 |                 for line in f:
38 |                     items = split(':', line.strip())
39 |                     user_id = items[0]
40 |                     seq = items[1].strip().split()
41 |                     training_data.append(seq[:-1])
42 |                     test_data.append(seq[-1])
43 |                 data = (training_data, test_data)
44 |         return data
45 | 
46 |     @staticmethod
47 |     def load_user_list(file):
48 |         user_list = []
49 |         print('loading user List...')
50 |         with open(file) as f:
51 |             for line in f:
52 |                 user_list.append(line.strip().split()[0])
53 |         return user_list
54 | 
55 |     @staticmethod
56 |     def load_social_data(file):
57 |         social_data = []
58 |         print('loading social data...')
59 |         with open(file) as f:
60 |             for line in f:
61 |                 items = split(' ', line.strip())
62 |                 user1 = items[0]
63 |                 user2 = items[1]
64 |                 if len(items) < 3:
65 |                     weight = 1
66 |                 else:
67 |                     weight = float(items[2])
68 |                 social_data.append([user1, user2, weight])
69 |         return social_data
70 | 


--------------------------------------------------------------------------------
/util/conf.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | 
 3 | 
 4 | class ModelConf(object):
 5 |     def __init__(self,file):
 6 |         self.config = {}
 7 |         self.read_configuration(file)
 8 | 
 9 |     def __getitem__(self, item):
10 |         if not self.contain(item):
11 |             print('parameter '+item+' is not found in the configuration file!')
12 |             exit(-1)
13 |         return self.config[item]
14 | 
15 |     def contain(self,key):
16 |         return key in self.config
17 | 
18 |     def read_configuration(self,file):
19 |         if not os.path.exists(file):
20 |             print('config file is not found!')
21 |             raise IOError
22 |         with open(file) as f:
23 |             for ind,line in enumerate(f):
24 |                 if line.strip()!='':
25 |                     try:
26 |                         key,value=line.strip().split('=')
27 |                         self.config[key]=value
28 |                     except ValueError:
29 |                         print('config file is not in the correct format! Error Line:%d' % ind)
30 | 
31 | 
32 | class OptionConf(object):
33 |     def __init__(self,content):
34 |         self.line = content.strip().split(' ')
35 |         self.options = {}
36 |         self.mainOption = False
37 |         if self.line[0] == 'on':
38 |             self.mainOption = True
39 |         elif self.line[0] == 'off':
40 |             self.mainOption = False
41 |         for i,item in enumerate(self.line):
42 |             if (item.startswith('-') or item.startswith('--')) and  not item[1:].isdigit():
43 |                 ind = i+1
44 |                 for j,sub in enumerate(self.line[ind:]):
45 |                     if (sub.startswith('-') or sub.startswith('--')) and  not sub[1:].isdigit():
46 |                         ind = j
47 |                         break
48 |                     if j == len(self.line[ind:])-1:
49 |                         ind=j+1
50 |                         break
51 |                 try:
52 |                     self.options[item] = ' '.join(self.line[i+1:i+1+ind])
53 |                 except IndexError:
54 |                     self.options[item] = 1
55 | 
56 |     def __getitem__(self, item):
57 |         if not self.contain(item):
58 |             print('parameter '+item+' is invalid!')
59 |             exit(-1)
60 |         return self.options[item]
61 | 
62 |     def keys(self):
63 |         return self.options.keys()
64 | 
65 |     def is_main_on(self):
66 |         return self.mainOption
67 | 
68 |     def contain(self,key):
69 |         return key in self.options
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/util/sampler.py:
--------------------------------------------------------------------------------
 1 | from random import shuffle,randint,choice
 2 | 
 3 | 
 4 | def next_batch_pairwise(data,batch_size):
 5 |     training_data = data.training_data
 6 |     shuffle(training_data)
 7 |     batch_id = 0
 8 |     data_size = len(training_data)
 9 |     while batch_id < data_size:
10 |         if batch_id + batch_size <= data_size:
11 |             users = [training_data[idx][0] for idx in range(batch_id, batch_size + batch_id)]
12 |             items = [training_data[idx][1] for idx in range(batch_id, batch_size + batch_id)]
13 |             batch_id += batch_size
14 |         else:
15 |             users = [training_data[idx][0] for idx in range(batch_id, data_size)]
16 |             items = [training_data[idx][1] for idx in range(batch_id, data_size)]
17 |             batch_id = data_size
18 |         u_idx, i_idx, j_idx = [], [], []
19 |         item_list = list(data.item.keys())
20 |         for i, user in enumerate(users):
21 |             i_idx.append(data.item[items[i]])
22 |             u_idx.append(data.user[user])
23 |             neg_item = choice(item_list)
24 |             while neg_item in data.training_set_u[user]:
25 |                 neg_item = choice(item_list)
26 |             j_idx.append(data.item[neg_item])
27 |         yield u_idx, i_idx, j_idx
28 | 
29 | 
30 | def next_batch_pointwise(data,batch_size):
31 |     training_data = data.training_data
32 |     data_size = len(training_data)
33 |     batch_id = 0
34 |     while batch_id < data_size:
35 |         if batch_id + batch_size <= data_size:
36 |             users = [training_data[idx][0] for idx in range(batch_id, batch_size + batch_id)]
37 |             items = [training_data[idx][1] for idx in range(batch_id, batch_size + batch_id)]
38 |             batch_id += batch_size
39 |         else:
40 |             users = [training_data[idx][0] for idx in range(batch_id, data_size)]
41 |             items = [training_data[idx][1] for idx in range(batch_id, data_size)]
42 |             batch_id = data_size
43 |         u_idx, i_idx, y = [], [], []
44 |         for i, user in enumerate(users):
45 |             i_idx.append(data.item[items[i]])
46 |             u_idx.append(data.user[user])
47 |             y.append(1)
48 |             for instance in range(4):
49 |                 item_j = randint(0, data.item_num - 1)
50 |                 while data.id2item[item_j] in data.training_set_u[user]:
51 |                     item_j = randint(0, data.item_num - 1)
52 |                 u_idx.append(data.user[user])
53 |                 i_idx.append(item_j)
54 |                 y.append(0)
55 |         yield u_idx, i_idx, y


--------------------------------------------------------------------------------
/base/recommender.py:
--------------------------------------------------------------------------------
 1 | from data.data import Data
 2 | from util.conf import OptionConf
 3 | from util.logger import Log
 4 | from os.path import abspath
 5 | from time import strftime, localtime, time
 6 | 
 7 | 
 8 | class Recommender(object):
 9 |     def __init__(self, conf, training_set, test_set, **kwargs):
10 |         self.config = conf
11 |         self.data = Data(self.config, training_set, test_set)
12 |         self.model_name = self.config['model.name']
13 |         self.ranking = OptionConf(self.config['item.ranking'])
14 |         self.emb_size = int(self.config['embbedding.size'])
15 |         self.maxEpoch = int(self.config['num.max.epoch'])
16 |         self.maxPreEpoch = int(self.config['num.max.preepoch'])
17 |         self.batch_size = int(self.config['batch_size'])
18 |         self.lRate = float(self.config['learnRate'])
19 |         self.reg = float(self.config['reg.lambda'])
20 |         self.output = OptionConf(self.config['output.setup'])
21 |         current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
22 |         self.model_log = Log(self.model_name, self.model_name + ' ' + current_time)
23 |         self.result = []
24 |         self.recOutput = []
25 | 
26 |     def initializing_log(self):
27 |         self.model_log.add('### model configuration ###')
28 |         for k in self.config.config:
29 |             self.model_log.add(k + '=' + self.config[k])
30 | 
31 |     def print_model_info(self):
32 |         print('Model:', self.config['model.name'])
33 |         print('Training Set:', abspath(self.config['training.set']))
34 |         print('Test Set:', abspath(self.config['test.set']))
35 |         print('Embedding Dimension:', self.emb_size)
36 |         print('Maximum Epoch:', self.maxEpoch)
37 |         print('Learning Rate:', self.lRate)
38 |         print('Batch Size:', self.batch_size)
39 |         print('Regularization Parameter: reg %.4f' % self.reg)
40 |         parStr = ''
41 |         if self.config.contain(self.config['model.name']):
42 |             args = OptionConf(self.config[self.config['model.name']])
43 |             for key in args.keys():
44 |                 parStr += key[1:] + ':' + args[key] + '  '
45 |             print('Specific parameters:', parStr)
46 | 
47 |     def build(self):
48 |         pass
49 | 
50 |     def train(self):
51 |         pass
52 | 
53 |     def predict(self, u):
54 |         pass
55 | 
56 |     def test(self):
57 |         pass
58 | 
59 |     def save(self):
60 |         pass
61 | 
62 |     def load(self):
63 |         pass
64 | 
65 |     def evaluate(self, rec_list):
66 |         pass
67 | 
68 |     def execute(self):
69 |         self.initializing_log()
70 |         self.print_model_info()
71 |         print('Initializing and building model...')
72 |         self.build()
73 |         print('Training Model...')
74 |         self.train()
75 |         print('Testing...')
76 |         rec_list = self.test()
77 |         print('Evaluating...')
78 |         self.evaluate(rec_list)
79 | 


--------------------------------------------------------------------------------
/data/social.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from data.graph import Graph
 3 | import numpy as np
 4 | import scipy.sparse as sp
 5 | 
 6 | 
 7 | class Relation(Graph):
 8 |     def __init__(self, conf, relation, user):
 9 |         super().__init__()
10 |         self.config = conf
11 |         self.social_user = {}
12 |         self.relation = relation
13 |         self.followees = defaultdict(dict)
14 |         self.followers = defaultdict(dict)
15 |         self.user = user
16 |         self.__initialize()
17 | 
18 |     def __initialize(self):
19 |         idx = []
20 |         for n, pair in enumerate(self.relation):
21 |             if pair[0] not in self.user or pair[1] not in self.user:
22 |                 idx.append(n)
23 |         for item in reversed(idx):
24 |             del self.relation[item]
25 |         for line in self.relation:
26 |             user1, user2, weight = line
27 |             # add relations to dict
28 |             self.followees[user1][user2] = weight
29 |             self.followers[user2][user1] = weight
30 | 
31 |     def get_social_mat(self):
32 |         row, col, entries = [], [], []
33 |         for pair in self.relation:
34 |             row += [self.user[pair[0]]]
35 |             col += [self.user[pair[1]]]
36 |             entries += [1.0]
37 |         social_mat = sp.csr_matrix((entries, (row, col)), shape=(len(self.user), len(self.user)), dtype=np.float32)
38 |         return social_mat
39 | 
40 |     def get_birectional_social_mat(self):
41 |         social_mat = self.get_social_mat()
42 |         bi_social_mat = social_mat.multiply(social_mat)
43 |         return bi_social_mat
44 | 
45 |     def convert_to_laplacian_mat(self, adj_mat):
46 |         adj_shape = adj_mat.get_shape()
47 |         (row_np_keep, col_np_keep) = adj_mat.nonzero()
48 |         ratings_keep = adj_mat.data
49 |         tmp_adj = sp.csr_matrix((ratings_keep, (row_np_keep, col_np_keep)), shape=adj_shape, dtype=np.float32)
50 |         return self.normalize_graph_mat(tmp_adj)
51 | 
52 |     def weight(self, u1, u2):
53 |         if u1 in self.followees and u2 in self.followees[u1]:
54 |             return self.followees[u1][u2]
55 |         else:
56 |             return 0
57 | 
58 |     def get_followers(self, u):
59 |         if u in self.followers:
60 |             return self.followers[u]
61 |         else:
62 |             return {}
63 | 
64 |     def get_followees(self, u):
65 |         if u in self.followees:
66 |             return self.followees[u]
67 |         else:
68 |             return {}
69 | 
70 |     def has_followee(self, u1, u2):
71 |         if u1 in self.followees:
72 |             if u2 in self.followees[u1]:
73 |                 return True
74 |             else:
75 |                 return False
76 |         return False
77 | 
78 |     def has_follower(self, u1, u2):
79 |         if u1 in self.followers:
80 |             if u2 in self.followers[u1]:
81 |                 return True
82 |             else:
83 |                 return False
84 |         return False
85 | 
86 |     def size(self):
87 |         return len(self.followers), len(self.relation)
88 | 


--------------------------------------------------------------------------------
/util/algorithm.py:
--------------------------------------------------------------------------------
  1 | from numpy.linalg import norm
  2 | from math import sqrt, exp
  3 | from numba import jit
  4 | 
  5 | 
  6 | def l1(x):
  7 |     return norm(x, ord=1)
  8 | 
  9 | 
 10 | def l2(x):
 11 |     return norm(x)
 12 | 
 13 | 
 14 | def common(x1, x2):
 15 |     # find common ratings
 16 |     overlap = (x1 != 0) & (x2 != 0)
 17 |     new_x1 = x1[overlap]
 18 |     new_x2 = x2[overlap]
 19 |     return new_x1, new_x2
 20 | 
 21 | 
 22 | def cosine_sp(x1, x2):
 23 |     'x1,x2 are dicts,this version is for sparse representation'
 24 |     total = 0
 25 |     denom1 = 0
 26 |     denom2 = 0
 27 |     try:
 28 |         for k in x1:
 29 |             if k in x2:
 30 |                 total += x1[k] * x2[k]
 31 |                 denom1 += x1[k] ** 2
 32 |                 denom2 += x2[k] ** 2
 33 |         return total / (sqrt(denom1) * sqrt(denom2))
 34 |     except ZeroDivisionError:
 35 |         return 0
 36 | 
 37 | 
 38 | def euclidean_sp(x1, x2):
 39 |     'x1,x2 are dicts,this version is for sparse representation'
 40 |     total = 0
 41 |     try:
 42 |         for k in x1:
 43 |             if k in x2:
 44 |                 total += x1[k] ** 2 - x2[k] ** 2
 45 |         return 1 / total
 46 |     except ZeroDivisionError:
 47 |         return 0
 48 | 
 49 | 
 50 | def cosine(x1, x2):
 51 |     # find common ratings
 52 |     # new_x1, new_x2 = common(x1,x2)
 53 |     # compute the cosine similarity between two vectors
 54 |     total = x1.dot(x2)
 55 |     denom = sqrt(x1.dot(x1) * x2.dot(x2))
 56 |     try:
 57 |         return total / denom
 58 |     except ZeroDivisionError:
 59 |         return 0
 60 | 
 61 |     # return cosine_similarity(x1,x2)[0][0]
 62 | 
 63 | 
 64 | def pearson_sp(x1, x2):
 65 |     total = 0
 66 |     denom1 = 0
 67 |     denom2 = 0
 68 |     overlapped = False
 69 |     try:
 70 |         mean1 = sum(x1.values()) / len(x1)
 71 |         mean2 = sum(x2.values()) / len(x2)
 72 |         for k in x1:
 73 |             if k in x2:
 74 |                 total += (x1[k] - mean1) * (x2[k] - mean2)
 75 |                 denom1 += (x1[k] - mean1) ** 2
 76 |                 denom2 += (x2[k] - mean2) ** 2
 77 |                 overlapped = True
 78 |         return total / (sqrt(denom1) * sqrt(denom2))
 79 |     except ZeroDivisionError:
 80 |         if overlapped:
 81 |             return 1
 82 |         return 0
 83 | 
 84 | 
 85 | def euclidean(x1, x2):
 86 |     # find common ratings
 87 |     new_x1, new_x2 = common(x1, x2)
 88 |     # compute the euclidean between two vectors
 89 |     diff = new_x1 - new_x2
 90 |     denom = sqrt((diff.dot(diff)))
 91 |     try:
 92 |         return 1 / denom
 93 |     except ZeroDivisionError:
 94 |         return 0
 95 | 
 96 | 
 97 | def pearson(x1, x2):
 98 |     # find common ratings
 99 |     # new_x1, new_x2 = common(x1, x2)
100 |     # compute the pearson similarity between two vectors
101 |     # ind1 = new_x1 > 0
102 |     # ind2 = new_x2 > 0
103 |     try:
104 |         mean_x1 = x1.sum() / len(x1)
105 |         mean_x2 = x2.sum() / len(x2)
106 |         new_x1 = x1 - mean_x1
107 |         new_x2 = x2 - mean_x2
108 |         total = new_x1.dot(new_x2)
109 |         denom = sqrt((new_x1.dot(new_x1)) * (new_x2.dot(new_x2)))
110 |         return total / denom
111 |     except ZeroDivisionError:
112 |         return 0
113 | 
114 | 
115 | def similarity(x1, x2, sim):
116 |     if sim == 'pcc':
117 |         return pearson_sp(x1, x2)
118 |     if sim == 'euclidean':
119 |         return euclidean_sp(x1, x2)
120 |     else:
121 |         return cosine_sp(x1, x2)
122 | 
123 | 
124 | def normalize(vec, maxVal, minVal):
125 |     'get the normalized value using min-max normalization'
126 |     if maxVal > minVal:
127 |         return (vec - minVal) / (maxVal - minVal)
128 |     elif maxVal == minVal:
129 |         return vec / maxVal
130 |     else:
131 |         print('error... maximum value is less than minimum value.')
132 |         raise ArithmeticError
133 | 
134 | 
135 | def sigmoid(val):
136 |     return 1 / (1 + exp(-val))
137 | 
138 | 
139 | def denormalize(vec, max_val, min_val):
140 |     return min_val + (vec - 0.01) * (max_val - min_val)
141 | 
142 | 
143 | @jit(nopython=True)
144 | def find_k_largest(K, candidates):
145 |     n_candidates = []
146 |     for iid, score in enumerate(candidates[:K]):
147 |         n_candidates.append((iid, score))
148 |     n_candidates.sort(key=lambda d: d[1], reverse=True)
149 |     k_largest_scores = [item[1] for item in n_candidates]
150 |     ids = [item[0] for item in n_candidates]
151 |     # find the K biggest scores
152 |     for iid, score in enumerate(candidates):
153 |         ind = K
154 |         l = 0
155 |         r = K - 1
156 |         if k_largest_scores[r] < score:
157 |             while r >= l:
158 |                 mid = int((r - l) / 2) + l
159 |                 if k_largest_scores[mid] >= score:
160 |                     l = mid + 1
161 |                 elif k_largest_scores[mid] < score:
162 |                     r = mid - 1
163 |                 if r < l:
164 |                     ind = r
165 |                     break
166 |         # move the items backwards
167 |         if ind < K - 2:
168 |             k_largest_scores[ind + 2:] = k_largest_scores[ind + 1:-1]
169 |             ids[ind + 2:] = ids[ind + 1:-1]
170 |         if ind < K - 1:
171 |             k_largest_scores[ind + 1] = score
172 |             ids[ind + 1] = iid
173 |     return ids, k_largest_scores
174 | 


--------------------------------------------------------------------------------
/util/evaluation.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | 
  4 | class Metric(object):
  5 |     def __init__(self):
  6 |         pass
  7 | 
  8 |     @staticmethod
  9 |     def hits(origin, res):
 10 |         hit_count = {}
 11 |         for user in origin:
 12 |             items = list(origin[user].keys())
 13 |             predicted = [item[0] for item in res[user]]
 14 |             hit_count[user] = len(set(items).intersection(set(predicted)))
 15 |         return hit_count
 16 | 
 17 |     @staticmethod
 18 |     def hit_ratio(origin, hits):
 19 |         """
 20 |         Note: This type of hit ratio calculates the fraction:
 21 |          (# retrieved interactions in the test set / #all the interactions in the test set)
 22 |         """
 23 |         total_num = 0
 24 |         for user in origin:
 25 |             items = list(origin[user].keys())
 26 |             total_num += len(items)
 27 |         hit_num = 0
 28 |         for user in hits:
 29 |             hit_num += hits[user]
 30 |         return hit_num/total_num
 31 | 
 32 |     # # @staticmethod
 33 |     # def hit_ratio(origin, hits):
 34 |     #     """
 35 |     #     Note: This type of hit ratio calculates the fraction:
 36 |     #      (# users who are recommended items in the test set / #all the users in the test set)
 37 |     #     """
 38 |     #     hit_num = 0
 39 |     #     for user in hits:
 40 |     #         if hits[user] > 0:
 41 |     #             hit_num += 1
 42 |     #     return hit_num / len(origin)
 43 | 
 44 |     @staticmethod
 45 |     def precision(hits, N):
 46 |         prec = sum([hits[user] for user in hits])
 47 |         return prec / (len(hits) * N)
 48 | 
 49 |     @staticmethod
 50 |     def recall(hits, origin):
 51 |         recall_list = [hits[user]/len(origin[user]) for user in hits]
 52 |         recall = sum(recall_list) / len(recall_list)
 53 |         return recall
 54 | 
 55 |     @staticmethod
 56 |     def F1(prec, recall):
 57 |         if (prec + recall) != 0:
 58 |             return 2 * prec * recall / (prec + recall)
 59 |         else:
 60 |             return 0
 61 | 
 62 |     @staticmethod
 63 |     def MAE(res):
 64 |         error = 0
 65 |         count = 0
 66 |         for entry in res:
 67 |             error+=abs(entry[2]-entry[3])
 68 |             count+=1
 69 |         if count==0:
 70 |             return error
 71 |         return error/count
 72 | 
 73 |     @staticmethod
 74 |     def RMSE(res):
 75 |         error = 0
 76 |         count = 0
 77 |         for entry in res:
 78 |             error += (entry[2] - entry[3])**2
 79 |             count += 1
 80 |         if count==0:
 81 |             return error
 82 |         return math.sqrt(error/count)
 83 | 
 84 |     @staticmethod
 85 |     def NDCG(origin,res,N):
 86 |         sum_NDCG = 0
 87 |         for user in res:
 88 |             DCG = 0
 89 |             IDCG = 0
 90 |             #1 = related, 0 = unrelated
 91 |             for n, item in enumerate(res[user]):
 92 |                 if item[0] in origin[user]:
 93 |                     DCG+= 1.0/math.log(n+2)
 94 |             for n, item in enumerate(list(origin[user].keys())[:N]):
 95 |                 IDCG+=1.0/math.log(n+2)
 96 |             sum_NDCG += DCG / IDCG
 97 |         return sum_NDCG / len(res)
 98 | 
 99 |     # @staticmethod
100 |     # def MAP(origin, res, N):
101 |     #     sum_prec = 0
102 |     #     for user in res:
103 |     #         hits = 0
104 |     #         precision = 0
105 |     #         for n, item in enumerate(res[user]):
106 |     #             if item[0] in origin[user]:
107 |     #                 hits += 1
108 |     #                 precision += hits / (n + 1.0)
109 |     #         sum_prec += precision / min(len(origin[user]), N)
110 |     #     return sum_prec / len(res)
111 | 
112 |     # @staticmethod
113 |     # def AUC(origin, res, rawRes):
114 |     #
115 |     #     from random import choice
116 |     #     sum_AUC = 0
117 |     #     for user in origin:
118 |     #         count = 0
119 |     #         larger = 0
120 |     #         itemList = rawRes[user].keys()
121 |     #         for item in origin[user]:
122 |     #             item2 = choice(itemList)
123 |     #             count += 1
124 |     #             try:
125 |     #                 if rawRes[user][item] > rawRes[user][item2]:
126 |     #                     larger += 1
127 |     #             except KeyError:
128 |     #                 count -= 1
129 |     #         if count:
130 |     #             sum_AUC += float(larger) / count
131 |     #
132 |     #     return float(sum_AUC) / len(origin)
133 | 
134 | 
135 | def ranking_evaluation(origin, res, N):
136 |     measure = []
137 |     for n in N:
138 |         predicted = {}
139 |         for user in res:
140 |             predicted[user] = res[user][:n]
141 |         indicators = []
142 |         if len(origin) != len(predicted):
143 |             print('The Lengths of test set and predicted set do not match!')
144 |             exit(-1)
145 |         hits = Metric.hits(origin, predicted)
146 |         hr = Metric.hit_ratio(origin, hits)
147 |         indicators.append('Hit Ratio:' + str(hr) + '\n')
148 |         prec = Metric.precision(hits, n)
149 |         indicators.append('Precision:' + str(prec) + '\n')
150 |         recall = Metric.recall(hits, origin)
151 |         indicators.append('Recall:' + str(recall) + '\n')
152 |         # F1 = Metric.F1(prec, recall)
153 |         # indicators.append('F1:' + str(F1) + '\n')
154 |         #MAP = Measure.MAP(origin, predicted, n)
155 |         #indicators.append('MAP:' + str(MAP) + '\n')
156 |         NDCG = Metric.NDCG(origin, predicted, n)
157 |         indicators.append('NDCG:' + str(NDCG) + '\n')
158 |         # AUC = Measure.AUC(origin,res,rawRes)
159 |         # measure.append('AUC:' + str(AUC) + '\n')
160 |         measure.append('Top ' + str(n) + '\n')
161 |         measure += indicators
162 |     return measure
163 | 
164 | def rating_evaluation(res):
165 |     measure = []
166 |     mae = Metric.MAE(res)
167 |     measure.append('MAE:' + str(mae) + '\n')
168 |     rmse = Metric.RMSE(res)
169 |     measure.append('RMSE:' + str(rmse) + '\n')
170 |     return measure


--------------------------------------------------------------------------------
/base/graph_recommender.py:
--------------------------------------------------------------------------------
  1 | from base.recommender import Recommender
  2 | from data.ui_graph import Interaction
  3 | from util.algorithm import find_k_largest
  4 | from time import strftime, localtime, time
  5 | from data.loader import FileIO
  6 | from os.path import abspath
  7 | from util.evaluation import ranking_evaluation
  8 | import sys
  9 | 
 10 | 
 11 | class GraphRecommender(Recommender):
 12 |     def __init__(self, conf, training_set, test_set, **kwargs):
 13 |         super(GraphRecommender, self).__init__(conf, training_set, test_set, **kwargs)
 14 |         self.data = Interaction(conf, training_set, test_set)
 15 |         self.bestPerformance = []
 16 |         top = self.ranking['-topN'].split(',')
 17 |         self.topN = [int(num) for num in top]
 18 |         self.max_N = max(self.topN)
 19 | 
 20 |     def print_model_info(self):
 21 |         super(GraphRecommender, self).print_model_info()
 22 |         # # print dataset statistics
 23 |         print('Training Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.training_size()))
 24 |         print('Test Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.test_size()))
 25 |         print('=' * 80)
 26 | 
 27 |     def build(self):
 28 |         pass
 29 | 
 30 |     def train(self):
 31 |         pass
 32 | 
 33 |     def predict(self, u):
 34 |         pass
 35 | 
 36 |     def test(self):
 37 |         def process_bar(num, total):
 38 |             rate = float(num) / total
 39 |             ratenum = int(50 * rate)
 40 |             r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
 41 |             sys.stdout.write(r)
 42 |             sys.stdout.flush()
 43 | 
 44 |         # predict
 45 |         rec_list = {}
 46 |         user_count = len(self.data.test_set)
 47 |         for i, user in enumerate(self.data.test_set):
 48 |             candidates = self.predict(user)
 49 |             # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
 50 |             rated_list, li = self.data.user_rated(user)
 51 |             for item in rated_list:
 52 |                 candidates[self.data.item[item]] = -10e8
 53 |             ids, scores = find_k_largest(self.max_N, candidates)
 54 |             item_names = [self.data.id2item[iid] for iid in ids]
 55 |             rec_list[user] = list(zip(item_names, scores))
 56 |             if i % 1000 == 0:
 57 |                 process_bar(i, user_count)
 58 |         process_bar(user_count, user_count)
 59 |         print('')
 60 |         return rec_list
 61 | 
 62 |     def evaluate(self, rec_list):
 63 |         self.recOutput.append('userId: recommendations in (itemId, ranking score) pairs, * means the item is hit.\n')
 64 |         for user in self.data.test_set:
 65 |             line = user + ':'
 66 |             for item in rec_list[user]:
 67 |                 line += ' (' + item[0] + ',' + str(item[1]) + ')'
 68 |                 if item[0] in self.data.test_set[user]:
 69 |                     line += '*'
 70 |             line += '\n'
 71 |             self.recOutput.append(line)
 72 |         current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
 73 |         # output prediction result
 74 |         out_dir = self.output['-dir']
 75 |         file_name = self.config['model.name'] + '@' + current_time + '-top-' + str(self.max_N) + 'items' + '.txt'
 76 |         FileIO.write_file(out_dir, file_name, self.recOutput)
 77 |         print('The result has been output to ', abspath(out_dir), '.')
 78 |         file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
 79 |         self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
 80 |         self.model_log.add('###Evaluation Results###')
 81 |         self.model_log.add(self.result)
 82 |         FileIO.write_file(out_dir, file_name, self.result)
 83 |         print('The result of %s:\n%s' % (self.model_name, ''.join(self.result)))
 84 | 
 85 |     def fast_evaluation(self, epoch):
 86 |         print('evaluating the model...')
 87 |         rec_list = self.test()
 88 |         measure = ranking_evaluation(self.data.test_set, rec_list, [self.max_N])
 89 |         if len(self.bestPerformance) > 0:
 90 |             count = 0
 91 |             performance = {}
 92 |             for m in measure[1:]:
 93 |                 k, v = m.strip().split(':')
 94 |                 performance[k] = float(v)
 95 |             for k in self.bestPerformance[1]:
 96 |                 if self.bestPerformance[1][k] > performance[k]:
 97 |                     count += 1
 98 |                 else:
 99 |                     count -= 1
100 |             if count < 0:
101 |                 self.bestPerformance[1] = performance
102 |                 self.bestPerformance[0] = epoch + 1
103 |                 self.save()
104 |         else:
105 |             self.bestPerformance.append(epoch + 1)
106 |             performance = {}
107 |             for m in measure[1:]:
108 |                 k, v = m.strip().split(':')
109 |                 performance[k] = float(v)
110 |                 self.bestPerformance.append(performance)
111 |             self.save()
112 |         print('-' * 120)
113 |         print('Quick Ranking Performance ' + ' (Top-' + str(self.max_N) + ' Item Recommendation)')
114 |         measure = [m.strip() for m in measure[1:]]
115 |         print('*Current Performance*')
116 |         print('Epoch:', str(epoch + 1) + ',', ' | '.join(measure))
117 |         bp = ''
118 |         # for k in self.bestPerformance[1]:
119 |         #     bp+=k+':'+str(self.bestPerformance[1][k])+' | '
120 |         bp += 'Hit Ratio' + ':' + str(self.bestPerformance[1]['Hit Ratio']) + ' | '
121 |         bp += 'Precision' + ':' + str(self.bestPerformance[1]['Precision']) + ' | '
122 |         bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + ' | '
123 |         # bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
124 |         bp += 'NDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
125 |         print('*Best Performance* ')
126 |         print('Epoch:', str(self.bestPerformance[0]) + ',', bp)
127 |         print('-' * 120)
128 |         return measure
129 | 


--------------------------------------------------------------------------------
/data/ui_graph.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from collections import defaultdict
  3 | from data.data import Data
  4 | from data.graph import Graph
  5 | import scipy.sparse as sp
  6 | import pickle
  7 | 
  8 | class Interaction(Data,Graph):
  9 |     def __init__(self, conf, training, test):
 10 |         Graph.__init__(self)
 11 |         Data.__init__(self,conf,training,test)
 12 | 
 13 |         self.user = {}
 14 |         self.item = {}
 15 |         self.id2user = {}
 16 |         self.id2item = {}
 17 |         self.training_set_u = defaultdict(dict)
 18 |         self.training_set_i = defaultdict(dict)
 19 |         self.test_set = defaultdict(dict)
 20 |         self.test_set_item = set()
 21 |         self.__generate_set()
 22 |         self.user_num = len(self.training_set_u)
 23 |         self.item_num = len(self.training_set_i)
 24 |         self.ui_adj = self.__create_sparse_bipartite_adjacency()
 25 |         self.norm_adj = self.normalize_graph_mat(self.ui_adj)
 26 |         self.interaction_mat = self.__create_sparse_interaction_matrix()
 27 |         # popularity_user = {}
 28 |         # for u in self.user:
 29 |         #     popularity_user[self.user[u]] = len(self.training_set_u[u])
 30 |         # popularity_item = {}
 31 |         # for u in self.item:
 32 |         #     popularity_item[self.item[u]] = len(self.training_set_i[u])
 33 | 
 34 | 
 35 |     def __generate_set(self):
 36 |         for entry in self.training_data:
 37 |             user, item, rating = entry
 38 |             if user not in self.user:
 39 |                 self.user[user] = len(self.user)
 40 |                 self.id2user[self.user[user]] = user
 41 |             if item not in self.item:
 42 |                 self.item[item] = len(self.item)
 43 |                 self.id2item[self.item[item]] = item
 44 |                 # userList.append
 45 |             self.training_set_u[user][item] = rating
 46 |             self.training_set_i[item][user] = rating
 47 |         for entry in self.test_data:
 48 |             user, item, rating = entry
 49 |             if user not in self.user:
 50 |                 continue
 51 |             self.test_set[user][item] = rating
 52 |             self.test_set_item.add(item)
 53 | 
 54 |     def __create_sparse_bipartite_adjacency(self, self_connection=False):
 55 |         '''
 56 |         return a sparse adjacency matrix with the shape (user number + item number, user number + item number)
 57 |         '''
 58 |         n_nodes = self.user_num + self.item_num
 59 |         row_idx = [self.user[pair[0]] for pair in self.training_data]
 60 |         col_idx = [self.item[pair[1]] for pair in self.training_data]
 61 |         user_np = np.array(row_idx)
 62 |         item_np = np.array(col_idx)
 63 |         ratings = np.ones_like(user_np, dtype=np.float32)
 64 |         tmp_adj = sp.csr_matrix((ratings, (user_np, item_np + self.user_num)), shape=(n_nodes, n_nodes),dtype=np.float32)
 65 |         adj_mat = tmp_adj + tmp_adj.T
 66 |         if self_connection:
 67 |             adj_mat += sp.eye(n_nodes)
 68 |         return adj_mat
 69 | 
 70 |     def convert_to_laplacian_mat(self, adj_mat):
 71 |         adj_shape = adj_mat.get_shape()
 72 |         n_nodes = adj_shape[0]+adj_shape[1]
 73 |         (user_np_keep, item_np_keep) = adj_mat.nonzero()
 74 |         ratings_keep = adj_mat.data
 75 |         tmp_adj = sp.csr_matrix((ratings_keep, (user_np_keep, item_np_keep + adj_shape[0])),shape=(n_nodes, n_nodes),dtype=np.float32)
 76 |         tmp_adj = tmp_adj + tmp_adj.T
 77 |         return self.normalize_graph_mat(tmp_adj)
 78 | 
 79 |     def __create_sparse_interaction_matrix(self):
 80 |         """
 81 |         return a sparse adjacency matrix with the shape (user number, item number)
 82 |         """
 83 |         row, col, entries = [], [], []
 84 |         for pair in self.training_data:
 85 |             row += [self.user[pair[0]]]
 86 |             col += [self.item[pair[1]]]
 87 |             entries += [1.0]
 88 |         interaction_mat = sp.csr_matrix((entries, (row, col)), shape=(self.user_num,self.item_num),dtype=np.float32)
 89 |         return interaction_mat
 90 | 
 91 |     def get_user_id(self, u):
 92 |         if u in self.user:
 93 |             return self.user[u]
 94 | 
 95 |     def get_item_id(self, i):
 96 |         if i in self.item:
 97 |             return self.item[i]
 98 | 
 99 |     def training_size(self):
100 |         return len(self.user), len(self.item), len(self.training_data)
101 | 
102 |     def test_size(self):
103 |         return len(self.test_set), len(self.test_set_item), len(self.test_data)
104 | 
105 |     def contain(self, u, i):
106 |         'whether user u rated item i'
107 |         if u in self.user and i in self.training_set_u[u]:
108 |             return True
109 |         else:
110 |             return False
111 | 
112 |     def contain_user(self, u):
113 |         'whether user is in training set'
114 |         if u in self.user:
115 |             return True
116 |         else:
117 |             return False
118 | 
119 |     def contain_item(self, i):
120 |         """whether item is in training set"""
121 |         if i in self.item:
122 |             return True
123 |         else:
124 |             return False
125 | 
126 |     def user_rated(self, u):
127 |         return list(self.training_set_u[u].keys()), list(self.training_set_u[u].values())
128 | 
129 |     def item_rated(self, i):
130 |         return list(self.training_set_i[i].keys()), list(self.training_set_i[i].values())
131 | 
132 |     def row(self, u):
133 |         u = self.id2user[u]
134 |         k, v = self.user_rated(u)
135 |         vec = np.zeros(len(self.item))
136 |         # print vec
137 |         for pair in zip(k, v):
138 |             iid = self.item[pair[0]]
139 |             vec[iid] = pair[1]
140 |         return vec
141 | 
142 |     def col(self, i):
143 |         i = self.id2item[i]
144 |         k, v = self.item_rated(i)
145 |         vec = np.zeros(len(self.user))
146 |         # print vec
147 |         for pair in zip(k, v):
148 |             uid = self.user[pair[0]]
149 |             vec[uid] = pair[1]
150 |         return vec
151 | 
152 |     def matrix(self):
153 |         m = np.zeros((len(self.user), len(self.item)))
154 |         for u in self.user:
155 |             k, v = self.user_rated(u)
156 |             vec = np.zeros(len(self.item))
157 |             # print vec
158 |             for pair in zip(k, v):
159 |                 iid = self.item[pair[0]]
160 |                 vec[iid] = pair[1]
161 |             m[self.user[u]] = vec
162 |         return m
163 | 


--------------------------------------------------------------------------------
/model/graph/CPTPP.py:
--------------------------------------------------------------------------------
  1 | ##########################
  2 | # This code take SGL, implemented by Coder-Yu on Github, as the backbone.
  3 | ##########################
  4 | 
  5 | 
  6 | from turtle import forward
  7 | import torch
  8 | torch.manual_seed(12345)
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | from base.graph_recommender import GraphRecommender
 12 | from util.conf import OptionConf
 13 | from util.sampler import next_batch_pairwise
 14 | from base.torch_interface import TorchGraphInterface
 15 | from util.loss_torch import bpr_loss, l2_reg_loss, InfoNCE
 16 | from data.augmentor import GraphAugmentor
 17 | from sklearn.decomposition import NMF
 18 | import numpy as np
 19 | 
 20 | # Paper: self-supervised graph learning for recommendation. SIGIR'21
 21 | 
 22 | 
 23 | class CPTPP(GraphRecommender):
 24 |     def __init__(self, conf, training_set, test_set):
 25 |         super(CPTPP, self).__init__(conf, training_set, test_set)
 26 | 
 27 |         args = OptionConf(self.config['CPTPP'])
 28 |         self.cl_rate = float(args['-lambda'])
 29 |         aug_type = self.aug_type = int(args['-augtype'])
 30 |         drop_rate = float(args['-droprate'])
 31 |         self.n_layers = int(args['-n_layer'])
 32 |         temp = float(args['-temp'])
 33 |         self.inputs_type = int(args['-inputs_type'])
 34 |         prompt_size = int(args['-prompt_size'])
 35 | 
 36 |         self.model = SGL_Encoder(self.data, self.emb_size, drop_rate, self.n_layers, temp, aug_type)
 37 |         self.prompts_generator = Prompts_Generator(self.emb_size, prompt_size).cuda()
 38 |         self.fusion_mlp = Fusion_MLP(self.emb_size, prompt_size).cuda()
 39 | 
 40 |         if self.inputs_type == 0:
 41 |             self.interaction_mat = TorchGraphInterface.convert_sparse_mat_to_tensor(self.data.interaction_mat).cuda()
 42 |         if self.inputs_type == 2:
 43 |             # small dataset
 44 |             # self.adj_sparse = TorchGraphInterface.convert_sparse_mat_to_tensor(self.data.ui_adj)
 45 |             # self.ui_high_order = torch.sparse.mm(self.adj_sparse, self.adj_sparse.to_dense()).cuda()
 46 | 
 47 |             # big dataset Ciao
 48 |             self.sparse_norm_adj = TorchGraphInterface.convert_sparse_mat_to_tensor(self.data.norm_adj).cuda()
 49 | 
 50 |     def _pre_train(self):
 51 |         pre_trained_model = self.model.cuda()
 52 |         optimizer = torch.optim.Adam(pre_trained_model.parameters(), lr=self.lRate)
 53 | 
 54 |         print('############## Pre-Training Phase ##############')
 55 |         for epoch in range(self.maxPreEpoch):
 56 |             dropped_adj1 = pre_trained_model.graph_reconstruction()
 57 |             dropped_adj2 = pre_trained_model.graph_reconstruction()
 58 | 
 59 |             for n, batch in enumerate(next_batch_pairwise(self.data, self.batch_size)):
 60 |                 user_idx, pos_idx, neg_idx = batch
 61 |                 cl_loss = pre_trained_model.cal_cl_loss([user_idx,pos_idx],dropped_adj1,dropped_adj2)
 62 |                 batch_loss = cl_loss
 63 |                 # Backward and optimize
 64 |                 optimizer.zero_grad()
 65 |                 if epoch == self.maxEpoch-1:
 66 |                     batch_loss.backward(retain_graph=True)
 67 |                 else:
 68 |                     batch_loss.backward()
 69 |                 optimizer.step()
 70 |                 if n % 100==0:
 71 |                     print('pre-training:', epoch + 1, 'batch', n, 'cl_loss', cl_loss.item())
 72 | 
 73 |     def _csr_to_pytorch_dense(self, csr):
 74 |         array = csr.toarray()
 75 |         dense = torch.Tensor(array)
 76 |         return dense.cuda()
 77 | 
 78 |     def _prompts_generation(self, item_emb, user_emb):
 79 |         if self.inputs_type == 0:
 80 |             inputs = self._historical_records(item_emb)
 81 |         # elif self.inputs_type == 1:
 82 |         #     inputs = self._adjacency_matrix_factorization()
 83 |         elif self.inputs_type == 2:
 84 |             inputs = self._high_order_u_relations(item_emb, user_emb)
 85 |         prompts = self.prompts_generator(inputs)
 86 |         return prompts
 87 | 
 88 |     def _historical_records(self, item_emb):
 89 |         inputs = torch.mm(self.interaction_mat, item_emb)
 90 |         return inputs
 91 | 
 92 |     # def _adjacency_matrix_factorization(self):
 93 |     #     adjacency_matrix = self.data.interaction_mat
 94 |     #     adjacency_matrix = adjacency_matrix.toarray()
 95 | 
 96 |     #     print('######### Adjacency Matrix Factorization #############')
 97 |     #     nmf = NMF(n_components=self.emb_size)
 98 |     #     user_profiles = nmf.fit_transform(adjacency_matrix)
 99 |     #     inputs = torch.Tensor(user_profiles).cuda()
100 |     #     return inputs
101 | 
102 |     def _high_order_u_relations(self, item_emb, user_emb):
103 |         # small dataset
104 |         # emb = torch.cat((user_emb, item_emb), 0)
105 |         # inputs = torch.sparse.mm(self.ui_high_order, emb)
106 |         # inputs = inputs[:self.data.user_num, :]
107 |         # return inputs
108 | 
109 |         # big dataset Ciao
110 |         ego_embeddings = torch.cat((user_emb, item_emb), 0)
111 |         all_embeddings = [ego_embeddings]
112 |         for k in range(self.n_layers):
113 |             ego_embeddings = torch.sparse.mm(self.sparse_norm_adj, ego_embeddings)
114 |             all_embeddings.append(ego_embeddings)
115 |         all_embeddings = torch.stack(all_embeddings, dim=1)
116 |         all_embeddings = torch.mean(all_embeddings, dim=1)
117 |         inputs, item_all_embeddings = torch.split(all_embeddings, [self.data.user_num, self.data.item_num])
118 |         return inputs
119 | 
120 |     def _prompts_u_embeddings_fusion(self, prompts, user_emb):
121 |         prompts_user_emb = torch.cat((prompts, user_emb), 1)
122 |         prompted_user_emb = self.fusion_mlp(prompts_user_emb)
123 |         return prompted_user_emb
124 | 
125 |     def train(self):
126 |         self._pre_train()
127 | 
128 |         model = self.model.cuda()
129 |         optimizer = torch.optim.Adam(model.parameters(), lr=self.lRate)
130 | 
131 |         if self.inputs_type == 1:
132 |             nmf = NMF(n_components=self.emb_size, max_iter=1000)
133 |             self.user_profiles = torch.Tensor(nmf.fit_transform(self.data.interaction_mat.toarray())).cuda()
134 | 
135 |         print('############## Downstream Training Phase ##############')
136 |         for epoch in range(self.maxEpoch):
137 |             # dropped_adj1 = model.graph_reconstruction()
138 |             # dropped_adj2 = model.graph_reconstruction()
139 |             for n, batch in enumerate(next_batch_pairwise(self.data, self.batch_size)):
140 |                 user_emb, item_emb = model()
141 |                 if self.inputs_type == 0 or self.inputs_type == 2:
142 |                     prompts = self._prompts_generation(item_emb, user_emb)
143 |                 else:
144 |                     prompts = self.prompts_generator(self.user_profiles)
145 |                 prompted_user_emb = self._prompts_u_embeddings_fusion(prompts, user_emb)
146 | 
147 |                 user_idx, pos_idx, neg_idx = batch
148 |                 # rec_user_emb, rec_item_emb = model()
149 |                 rec_user_emb, rec_item_emb = prompted_user_emb, item_emb
150 |                 user_emb, pos_item_emb, neg_item_emb = rec_user_emb[user_idx], rec_item_emb[pos_idx], rec_item_emb[neg_idx]
151 |                 rec_loss = bpr_loss(user_emb, pos_item_emb, neg_item_emb)
152 |                 # cl_loss = self.cl_rate * model.cal_cl_loss([user_idx,pos_idx],dropped_adj1,dropped_adj2)
153 |                 batch_loss =  rec_loss + l2_reg_loss(self.reg, user_emb, pos_item_emb) #+ cl_loss
154 |                 # Backward and optimize
155 |                 
156 |                 batch_loss.backward()
157 |                 optimizer.step()
158 |                 optimizer.zero_grad()
159 | 
160 |                 if n % 100==0:
161 |                     print('training:', epoch + 1, 'batch', n, 'rec_loss:', rec_loss.item())#, 'cl_loss', cl_loss.item())
162 |             with torch.no_grad():
163 |                 user_emb, self.item_emb = self.model()
164 |                 if self.inputs_type == 0 or self.inputs_type == 2:
165 |                     prompts = self._prompts_generation(self.item_emb, user_emb)
166 |                 else:
167 |                     prompts = self.prompts_generator(self.user_profiles)
168 |                 prompted_user_emb = self._prompts_u_embeddings_fusion(prompts, user_emb)
169 |                 self.user_emb = prompted_user_emb
170 |             if epoch>=5:
171 |                 self.fast_evaluation(epoch)
172 |         self.user_emb, self.item_emb = self.best_user_emb, self.best_item_emb
173 | 
174 |         #### save user embeddings
175 |         # np_user_emb = self.user_emb.cpu().numpy()
176 |         # np.save('./user_emb/cptpp-r-gowalla.npy', np_user_emb)
177 | 
178 |     def save(self):
179 |         with torch.no_grad():
180 |             best_user_emb, self.best_item_emb = self.model.forward()
181 |             if self.inputs_type == 0 or self.inputs_type == 2:
182 |                 prompts = self._prompts_generation(self.best_item_emb, best_user_emb)
183 |             else:
184 |                 prompts = self.prompts_generator(self.user_profiles)
185 |             prompted_user_emb = self._prompts_u_embeddings_fusion(prompts, best_user_emb)
186 |             self.best_user_emb = prompted_user_emb
187 | 
188 |     def predict(self, u):
189 |         u = self.data.get_user_id(u)
190 |         score = torch.matmul(self.user_emb[u], self.item_emb.transpose(0, 1))
191 |         return score.cpu().numpy()
192 | 
193 | 
194 | class Prompts_Generator(nn.Module):
195 |     def __init__(self, emb_size, prompt_size):
196 |         super(Prompts_Generator, self).__init__()
197 | 
198 |         self.layers = nn.ModuleList([nn.Linear(emb_size, prompt_size), nn.Linear(prompt_size, prompt_size)])
199 |         self.activation = nn.Tanh()
200 |         #self.activation = nn.Sigmoid()
201 | 
202 |     def forward(self, inputs):
203 |         prompts = inputs
204 |         for i in range(len(self.layers)):
205 |             prompts = self.layers[i](prompts)
206 |             prompts = self.activation(prompts)
207 |         
208 |         return prompts
209 | 
210 | 
211 | class Fusion_MLP(nn.Module):
212 |     def __init__(self, emb_size, prompt_size):
213 |         super(Fusion_MLP, self).__init__()
214 |         
215 |         self.layers = nn.ModuleList([nn.Linear(emb_size+prompt_size, emb_size), nn.Linear(emb_size, emb_size)])
216 |         self.activation = nn.Tanh()
217 | 
218 |     def forward(self, x):
219 |         for i in range(len(self.layers)):
220 |             x = self.layers[i](x)
221 |             x = self.activation(x)
222 |         
223 |         return x
224 | 
225 | 
226 | class SGL_Encoder(nn.Module):
227 |     def __init__(self, data, emb_size, drop_rate, n_layers, temp, aug_type):
228 |         super(SGL_Encoder, self).__init__()
229 |         self.data = data
230 |         self.drop_rate = drop_rate
231 |         self.emb_size = emb_size
232 |         self.n_layers = n_layers
233 |         self.temp = temp
234 |         self.aug_type = aug_type
235 |         self.norm_adj = data.norm_adj
236 |         self.embedding_dict = self._init_model()
237 |         self.sparse_norm_adj = TorchGraphInterface.convert_sparse_mat_to_tensor(self.norm_adj).cuda()
238 | 
239 |     def _init_model(self):
240 |         initializer = nn.init.xavier_uniform_
241 |         embedding_dict = nn.ParameterDict({
242 |             'user_emb': nn.Parameter(initializer(torch.empty(self.data.user_num, self.emb_size))),
243 |             'item_emb': nn.Parameter(initializer(torch.empty(self.data.item_num, self.emb_size))),
244 |         })
245 |         return embedding_dict
246 | 
247 |     def graph_reconstruction(self):
248 |         if self.aug_type==0 or 1:
249 |             dropped_adj = self.random_graph_augment()
250 |         else:
251 |             dropped_adj = []
252 |             for k in range(self.n_layers):
253 |                 dropped_adj.append(self.random_graph_augment())
254 |         return dropped_adj
255 | 
256 |     def random_graph_augment(self):
257 |         dropped_mat = None
258 |         if self.aug_type == 0:
259 |             dropped_mat = GraphAugmentor.node_dropout(self.data.interaction_mat, self.drop_rate)
260 |         elif self.aug_type == 1 or self.aug_type == 2:
261 |             dropped_mat = GraphAugmentor.edge_dropout(self.data.interaction_mat, self.drop_rate)
262 |         dropped_mat = self.data.convert_to_laplacian_mat(dropped_mat)
263 |         return TorchGraphInterface.convert_sparse_mat_to_tensor(dropped_mat).cuda()
264 | 
265 |     def forward(self, perturbed_adj=None):
266 |         ego_embeddings = torch.cat([self.embedding_dict['user_emb'], self.embedding_dict['item_emb']], 0)
267 |         all_embeddings = [ego_embeddings]
268 |         for k in range(self.n_layers):
269 |             if perturbed_adj is not None:
270 |                 if isinstance(perturbed_adj,list):
271 |                     ego_embeddings = torch.sparse.mm(perturbed_adj[k], ego_embeddings)
272 |                 else:
273 |                     ego_embeddings = torch.sparse.mm(perturbed_adj, ego_embeddings)
274 |             else:
275 |                 ego_embeddings = torch.sparse.mm(self.sparse_norm_adj, ego_embeddings)
276 |             all_embeddings.append(ego_embeddings)
277 |         all_embeddings = torch.stack(all_embeddings, dim=1)
278 |         all_embeddings = torch.mean(all_embeddings, dim=1)
279 |         user_all_embeddings, item_all_embeddings = torch.split(all_embeddings, [self.data.user_num, self.data.item_num])
280 |         return user_all_embeddings, item_all_embeddings
281 | 
282 |     def cal_cl_loss(self, idx, perturbed_mat1, perturbed_mat2):
283 |         u_idx = torch.unique(torch.Tensor(idx[0]).type(torch.long)).cuda()
284 |         i_idx = torch.unique(torch.Tensor(idx[1]).type(torch.long)).cuda()
285 |         user_view_1, item_view_1 = self.forward(perturbed_mat1)
286 |         user_view_2, item_view_2 = self.forward(perturbed_mat2)
287 |         view1 = torch.cat((user_view_1[u_idx],item_view_1[i_idx]),0)
288 |         view2 = torch.cat((user_view_2[u_idx],item_view_2[i_idx]),0)
289 |         # user_cl_loss = InfoNCE(user_view_1[u_idx], user_view_2[u_idx], self.temp)
290 |         # item_cl_loss = InfoNCE(item_view_1[i_idx], item_view_2[i_idx], self.temp)
291 |         #return user_cl_loss + item_cl_loss
292 |         return InfoNCE(view1,view2,self.temp)


--------------------------------------------------------------------------------