├── DiffGraph-Rec ├── DataHandler.py ├── Model.py ├── Utils │ ├── TimeLogger.py │ └── Utils.py ├── data │ ├── ijcai_15 │ │ ├── test_mat.pkl │ │ ├── train_mat_buy.pkl │ │ ├── train_mat_cart.pkl │ │ ├── train_mat_click.pkl │ │ └── train_mat_fav.pkl │ ├── retail_rocket │ │ ├── test_mat.pkl │ │ ├── train_mat_buy.pkl │ │ ├── train_mat_cart.pkl │ │ └── train_mat_view.pkl │ └── tmall │ │ ├── test_mat.pkl │ │ ├── train_mat_buy.pkl │ │ ├── train_mat_cart.pkl │ │ ├── train_mat_fav.pkl │ │ └── train_mat_pv.pkl ├── main.py └── params.py ├── DiffGraph_NC ├── DataHandler.py ├── Model.py ├── Utils │ ├── TimeLogger.py │ └── Utils.py ├── data │ ├── DBLP │ │ ├── a_feat.npz │ │ ├── apa.npz │ │ ├── apcpa.npz │ │ ├── aptpa.npz │ │ ├── labels.npy │ │ ├── nei_p.npy │ │ ├── p_feat.npz │ │ ├── pa.txt │ │ ├── pc.txt │ │ ├── pos.npz │ │ ├── pt.txt │ │ ├── t_feat.npz │ │ ├── test_20.npy │ │ ├── test_40.npy │ │ ├── test_60.npy │ │ ├── train_20.npy │ │ ├── train_40.npy │ │ ├── train_60.npy │ │ ├── val_20.npy │ │ ├── val_40.npy │ │ └── val_60.npy │ └── aminer │ │ ├── labels.npy │ │ ├── nei_a.npy │ │ ├── nei_r.npy │ │ ├── pa.txt │ │ ├── pap.npz │ │ ├── pos.npz │ │ ├── pr.txt │ │ ├── prp.npz │ │ ├── test_20.npy │ │ ├── test_40.npy │ │ ├── test_60.npy │ │ ├── train_20.npy │ │ ├── train_40.npy │ │ ├── train_60.npy │ │ ├── val_20.npy │ │ ├── val_40.npy │ │ └── val_60.npy ├── main.py ├── params.py └── script.sh ├── HDL.jpg └── README.md /DiffGraph-Rec/DataHandler.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import datetime 3 | import numpy as np 4 | from scipy.sparse import csr_matrix, coo_matrix, dok_matrix 5 | from params import args 6 | import scipy.sparse as sp 7 | import dgl 8 | from Utils.TimeLogger import log 9 | import torch as t 10 | import torch 11 | import torch.utils.data as data 12 | import torch.utils.data as dataloader 13 | device = "cuda" if t.cuda.is_available() else "cpu" 14 | class DataHandler: 15 | def __init__(self): 16 | 17 | if args.data == 'ijcai_15': 18 | self.predir = './data/ijcai_15/' 19 | self.behaviors = ['click', 'fav', 'cart', 'buy'] 20 | self.beh_meta_path = ['buy', 'click_buy', 'click_fav_buy', 'click_fav_cart_buy'] 21 | 22 | elif args.data == 'tmall': 23 | self.predir = './data/tmall/' 24 | self.behaviors = ['pv', 'fav', 'cart', 'buy'] 25 | self.beh_meta_path = ['buy', 'pv_buy', 'pv_fav_buy', 'pv_fav_cart_buy'] 26 | elif args.data == 'retail_rocket': 27 | self.predir = './data/retail_rocket/' 28 | self.behaviors = ['view', 'cart', 'buy'] 29 | self.beh_meta_path = ['buy', 'view_buy', 'view_cart_buy'] 30 | 31 | self.train_file = self.predir + 'train_mat_' 32 | self.val_file = self.predir + 'test_mat.pkl' 33 | self.test_file = self.predir + 'test_mat.pkl' 34 | 35 | def _load_data(self): 36 | test_mode = 'lightgc' 37 | self.t_max = -1 38 | self.t_min = 0x7FFFFFFF 39 | self.time_number = -1 40 | 41 | self.user_num = -1 42 | self.item_num = -1 43 | self.behavior_mats = {} 44 | self.behaviors_data = {} 45 | for i in range(0, len(self.behaviors)): 46 | with open(self.train_file + self.behaviors[i] + '.pkl', 'rb') as fs: 47 | data = pickle.load(fs) 48 | 49 | if self.behaviors[i] == 'buy': 50 | self.train_mat = data 51 | self.trainLabel = 1 * (self.train_mat != 0) 52 | self.labelP = np.squeeze(np.array(np.sum(self.trainLabel, axis=0))) 53 | continue 54 | self.behaviors_data[i] = 1*(data != 0) 55 | if data.get_shape()[0] > self.user_num: 56 | self.user_num = data.get_shape()[0] 57 | if data.get_shape()[1] > self.item_num: 58 | self.item_num = data.get_shape()[1] 59 | if data.data.max() > self.t_max: 60 | self.t_max = data.data.max() 61 | if data.data.min() < self.t_min: 62 | self.t_min = data.data.min() 63 | self.test_mat = pickle.load(open(self.test_file, 'rb')) 64 | self.userNum = self.behaviors_data[0].shape[0] 65 | self.itemNum = self.behaviors_data[0].shape[1] 66 | self._data2mat() 67 | if test_mode == 'muti': 68 | self.target_adj = self._dataTargetmat() 69 | elif test_mode == 'lightgcn': 70 | self.target_adj = self._make_bitorch_adj(self.train_mat) 71 | else: 72 | self.target_adj = self.makeBiAdj(self.train_mat,self.userNum,self.itemNum).to(device) 73 | for i in range(0, len(self.behaviors_data)): 74 | self.behavior_mats[i] = self.makeBiAdj(self.behaviors_data[i],self.userNum,self.itemNum).to(device) 75 | 76 | 77 | self.beh_degree_list = [] 78 | for i in range(len(self.behaviors_data)): 79 | self.beh_degree_list.append(torch.tensor(((self.behaviors_data[i] != 0) * 1).sum(axis=-1)).cuda()) 80 | # self.beh_degree_list.append(torch.tensor(((self.train_mat != 0) * 1).sum(axis=-1)).cuda()) 81 | 82 | def _data2mat(self): 83 | time = datetime.datetime.now() 84 | print("Start building: ", time) 85 | for i in range(0, len(self.behaviors_data)): 86 | self.behaviors_data[i] = 1*(self.behaviors_data[i] != 0) 87 | self.behavior_mats[i] = self._get_use(self.behaviors_data[i]) 88 | time = datetime.datetime.now() 89 | print("End building: ", time) 90 | def _dataTargetmat(self): 91 | target_adj = 1*(self.train_mat!=0) 92 | target_adj = self._get_use(target_adj) 93 | return target_adj 94 | 95 | def _get_use(self, behaviors_data): 96 | behavior_mats = {} 97 | behaviors_data = (behaviors_data != 0) * 1 98 | behavior_mats['A'] = self._matrix_to_tensor(self._normalize_adj(behaviors_data)) 99 | behavior_mats['AT'] = self._matrix_to_tensor(self._normalize_adj(behaviors_data.T)) 100 | behavior_mats['A_ori'] = None 101 | return behavior_mats 102 | 103 | def _normalize_adj(self, adj): 104 | """Symmetrically normalize adjacency matrix.""" 105 | adj = sp.coo_matrix(adj) 106 | rowsum = np.array(adj.sum(1)) 107 | rowsum_diag = sp.diags(np.power(rowsum+1e-8, -0.5).flatten()) 108 | colsum = np.array(adj.sum(0)) 109 | colsum_diag = sp.diags(np.power(colsum+1e-8, -0.5).flatten()) 110 | return rowsum_diag*adj*colsum_diag 111 | 112 | def _matrix_to_tensor(self, cur_matrix): 113 | if type(cur_matrix) != sp.coo_matrix: 114 | cur_matrix = cur_matrix.tocoo() 115 | indices = torch.from_numpy(np.vstack((cur_matrix.row, cur_matrix.col)).astype(np.int64)) 116 | values = torch.from_numpy(cur_matrix.data) 117 | shape = torch.Size(cur_matrix.shape) 118 | return torch.sparse.FloatTensor(indices, values, shape).to(torch.float32).cuda() 119 | 120 | def makeBiAdj(self, mat,n_user,n_item): 121 | a = sp.csr_matrix((n_user, n_user)) 122 | b = sp.csr_matrix((n_item, n_item)) 123 | mat = sp.vstack([sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])]) 124 | mat = (mat != 0) * 1.0 125 | mat = mat.tocoo() 126 | edge_src,edge_dst = mat.nonzero() 127 | ui_graph = dgl.graph(data=(edge_src, edge_dst), 128 | idtype=torch.int32, 129 | num_nodes=mat.shape[0] 130 | ) 131 | 132 | return ui_graph 133 | 134 | def load_data(self): 135 | 136 | self._load_data() 137 | args.user_num, args.item_num = self.train_mat.shape 138 | 139 | # with open(self.predir + 'user_group_10000.pkl', 'rb') as fs: 140 | # self.grup_list = pickle.load(fs) 141 | 142 | # trnData = TrnData(self.train_mat) 143 | # self.train_dataloader = dataloader.DataLoader(trnData, batch_size=args.batch, shuffle=True, num_workers=0) 144 | # tstData = TstData(self.test_mat, self.train_mat) 145 | # self.test_dataloader = dataloader.DataLoader(tstData, batch_size=args.tstBat, shuffle=False, num_workers=0) 146 | 147 | 148 | test_data = AllRankTestData(self.test_mat, self.train_mat) 149 | self.test_dataloader = dataloader.DataLoader(test_data, batch_size=args.tstBat, shuffle=False, num_workers=0) 150 | train_dataset = PairwiseTrnData(self.trainLabel.tocoo()) 151 | self.train_dataloader = dataloader.DataLoader(train_dataset, batch_size=args.batch, shuffle=True, num_workers=4, pin_memory=True) 152 | 153 | 154 | 155 | def _normalize_biadj(self, mat): 156 | """Laplacian normalization for mat in coo_matrix 157 | 158 | Args: 159 | mat (scipy.sparse.coo_matrix): the un-normalized adjacent matrix 160 | 161 | Returns: 162 | scipy.sparse.coo_matrix: normalized adjacent matrix 163 | """ 164 | # Add epsilon to avoid divide by zero 165 | degree = np.array(mat.sum(axis=-1)) + 1e-10 166 | d_inv_sqrt = np.reshape(np.power(degree, -0.5), [-1]) 167 | d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0 168 | d_inv_sqrt_mat = sp.diags(d_inv_sqrt) 169 | return mat.dot(d_inv_sqrt_mat).transpose().dot(d_inv_sqrt_mat).tocoo() 170 | 171 | def _make_bitorch_adj(self, mat): 172 | """Transform uni-directional adjacent matrix in coo_matrix into bi-directional adjacent matrix in torch.sparse.FloatTensor 173 | 174 | Args: 175 | mat (coo_matrix): the uni-directional adjacent matrix 176 | 177 | Returns: 178 | torch.sparse.FloatTensor: the bi-directional matrix 179 | """ 180 | if type(mat) != sp.csr_matrix: 181 | mat = mat.tocsr() 182 | a = csr_matrix((self.userNum, self.userNum)) 183 | b = csr_matrix((self.itemNum, self.itemNum)) 184 | mat = sp.vstack([sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])]) 185 | mat = (mat != 0) * 1.0 186 | # mat = (mat + sp.eye(mat.shape[0])) * 1.0# MARK 187 | mat = self._normalize_biadj(mat) 188 | 189 | # make torch tensor 190 | idxs = t.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64)) 191 | vals = t.from_numpy(mat.data.astype(np.float32)) 192 | shape = t.Size(mat.shape) 193 | return t.sparse.FloatTensor(idxs, vals, shape).cuda() 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | class TrnData(data.Dataset): 203 | def __init__(self, coomat): 204 | coomat = coomat.tocoo() 205 | self.rows = coomat.row 206 | self.cols = coomat.col 207 | self.dokmat = coomat.todok() 208 | self.negs = np.zeros(len(self.rows)).astype(np.int32) 209 | 210 | def negSampling(self): 211 | for i in range(len(self.rows)): 212 | u = self.rows[i] 213 | while True: 214 | iNeg = np.random.randint(args.item_num) 215 | if (u, iNeg) not in self.dokmat: 216 | break 217 | self.negs[i] = iNeg 218 | 219 | def __len__(self): 220 | return len(self.rows) 221 | 222 | def __getitem__(self, idx): 223 | return self.rows[idx], self.cols[idx], self.negs[idx] 224 | 225 | class TstData(data.Dataset): 226 | def __init__(self, coomat, trnMat): 227 | coomat = coomat.tocoo() 228 | self.csrmat = (trnMat.tocsr() != 0) * 1.0 229 | 230 | tstLocs = [None] * coomat.shape[0] 231 | tstUsrs = set() 232 | for i in range(len(coomat.data)): 233 | row = coomat.row[i] 234 | col = coomat.col[i] 235 | if tstLocs[row] is None: 236 | tstLocs[row] = list() 237 | tstLocs[row].append(col) 238 | tstUsrs.add(row) 239 | tstUsrs = np.array(list(tstUsrs)) 240 | self.tstUsrs = tstUsrs 241 | self.user_pos_lists = tstLocs 242 | 243 | def __len__(self): 244 | return len(self.tstUsrs) 245 | 246 | def __getitem__(self, idx): 247 | return self.tstUsrs[idx], np.reshape(self.csrmat[self.tstUsrs[idx]].toarray(), [-1]) 248 | 249 | 250 | class AllRankTestData(data.Dataset): 251 | def __init__(self, coomat, trn_mat): 252 | self.csrmat = (trn_mat.tocsr() != 0) * 1.0 253 | 254 | user_pos_lists = [list() for i in range(coomat.shape[0])] 255 | # user_pos_lists = set() 256 | test_users = set() 257 | for i in range(len(coomat.data)): 258 | row = coomat.row[i] 259 | col = coomat.col[i] 260 | user_pos_lists[row].append(col) 261 | test_users.add(row) 262 | self.test_users = np.array(list(test_users)) 263 | self.user_pos_lists = user_pos_lists 264 | 265 | def __len__(self): 266 | return len(self.test_users) 267 | 268 | def __getitem__(self, idx): 269 | pck_user = self.test_users[idx] 270 | pck_mask = self.csrmat[pck_user].toarray() 271 | pck_mask = np.reshape(pck_mask, [-1]) 272 | return pck_user, pck_mask 273 | 274 | 275 | class PairwiseTrnData(data.Dataset): 276 | def __init__(self, coomat): 277 | self.rows = coomat.row 278 | self.cols = coomat.col 279 | self.dokmat = coomat.todok() 280 | self.negs = np.zeros(len(self.rows)).astype(np.int32) 281 | 282 | def negSampling(self): 283 | for i in range(len(self.rows)): 284 | u = self.rows[i] 285 | while True: 286 | iNeg = np.random.randint(args.item_num) 287 | if (u, iNeg) not in self.dokmat: 288 | break 289 | self.negs[i] = iNeg 290 | 291 | def __len__(self): 292 | return len(self.rows) 293 | 294 | def __getitem__(self, idx): 295 | return self.rows[idx], self.cols[idx], self.negs[idx] 296 | 297 | 298 | class DiffusionData(data.Dataset): 299 | def __init__(self,y_data): 300 | self.y_data = y_data 301 | self.x_data = np.arange(0,len(y_data)) 302 | def __len__(self): 303 | return len(self.x_data) 304 | 305 | def __getitem__(self, idx): 306 | return self.x_data[idx],self.y_data[idx] 307 | 308 | 309 | 310 | -------------------------------------------------------------------------------- /DiffGraph-Rec/Model.py: -------------------------------------------------------------------------------- 1 | from statistics import mean 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | from params import args 6 | import numpy as np 7 | import math 8 | from Utils.Utils import * 9 | import dgl.function as fn 10 | init = nn.init.xavier_uniform_ 11 | uniformInit = nn.init.uniform 12 | from torch.nn.init import xavier_uniform_ 13 | device = t.device('cuda:0' if t.cuda.is_available() else 'cpu') 14 | #Models 15 | 16 | class HGDM(nn.Module): 17 | def __init__(self,data_handler): 18 | super(HGDM, self).__init__() 19 | self.n_user = data_handler.userNum 20 | self.n_item = data_handler.itemNum 21 | self.behavior_mats = data_handler.behavior_mats 22 | self.target_adj = data_handler.target_adj 23 | self.n_hid = args.latdim 24 | self.n_layers = args.gcn_layer 25 | self.embedding_dict = self.init_weight(self.n_user, self.n_item, self.n_hid) 26 | self.act = nn.LeakyReLU(0.5, inplace=True) 27 | self.layers = nn.ModuleList() 28 | 29 | self.hter_layers = nn.ModuleList() 30 | self.weight = False 31 | for i in range(0, self.n_layers): 32 | self.layers.append(DGLLayer(self.n_hid, self.n_hid, weight=self.weight, bias=False, activation=self.act)) 33 | for i in range(0,len(self.behavior_mats)): 34 | single_layers = nn.ModuleList() 35 | for i in range(0, self.n_layers): 36 | single_layers.append(DGLLayer(self.n_hid, self.n_hid, weight=self.weight, bias=False, activation=self.act)) 37 | self.hter_layers.append(single_layers) 38 | self.diffusion_process = GaussianDiffusion(args.noise_scale, args.noise_min, args.noise_max, args.steps).to(device) 39 | out_dims = eval(args.dims) + [args.latdim] 40 | in_dims = out_dims[::-1] 41 | self.usr_denoiser = Denoise(in_dims, out_dims, args.d_emb_size, norm=args.norm).to(device) 42 | self.item_denoiser = Denoise(in_dims, out_dims, args.d_emb_size, norm=args.norm).to(device) 43 | self.final_act = nn.LeakyReLU(negative_slope=0.5) 44 | 45 | def init_weight(self, userNum, itemNum, hide_dim): 46 | initializer = nn.init.xavier_uniform_ 47 | 48 | embedding_dict = nn.ParameterDict({ 49 | 'user_emb': nn.Parameter(initializer(torch.empty(userNum, hide_dim))), 50 | 'item_emb': nn.Parameter(initializer(torch.empty(itemNum, hide_dim))), 51 | }) 52 | return embedding_dict 53 | def forward(self): 54 | 55 | init_embedding = torch.concat([self.embedding_dict['user_emb'],self.embedding_dict['item_emb']],axis=0) 56 | init_heter_embedding = torch.concat([self.embedding_dict['user_emb'],self.embedding_dict['item_emb']],axis=0) 57 | all_embeddings = [init_embedding] 58 | all_heter_embeddings = [] 59 | 60 | 61 | for i, layer in enumerate(self.layers): 62 | if i == 0: 63 | embeddings = layer(self.target_adj, self.embedding_dict['user_emb'], self.embedding_dict['item_emb']) 64 | else: 65 | embeddings = layer(self.target_adj, embeddings[:self.n_user], embeddings[self.n_user:]) 66 | 67 | norm_embeddings = F.normalize(embeddings, p=2, dim=1) 68 | 69 | all_embeddings += [norm_embeddings] 70 | ui_embeddings = sum(all_embeddings) 71 | 72 | 73 | for i in range(0,len(self.behavior_mats)): 74 | sub_heter_embeddings = [init_heter_embedding] 75 | for j, layer in enumerate(self.layers): 76 | if j == 0: 77 | embeddings = layer(self.behavior_mats[i] , self.embedding_dict['user_emb'], self.embedding_dict['item_emb']) 78 | else: 79 | embeddings = layer(self.behavior_mats[i] , embeddings[:self.n_user], embeddings[self.n_user:]) 80 | 81 | norm_embeddings = F.normalize(embeddings, p=2, dim=1) 82 | 83 | sub_heter_embeddings += [norm_embeddings] 84 | sub_heter_embeddings = sum(sub_heter_embeddings) 85 | all_heter_embeddings.append(sub_heter_embeddings) 86 | 87 | all_heter_embeddings = sum(all_heter_embeddings) 88 | 89 | target_user_embedding,target_item_embedding = ui_embeddings[:self.n_user],ui_embeddings[self.n_user:] 90 | heter_user_embedding,heter_item_embedding = all_heter_embeddings[:self.n_user],all_heter_embeddings[self.n_user:] 91 | 92 | return target_user_embedding,target_item_embedding,heter_user_embedding,heter_item_embedding 93 | 94 | def cal_loss(self,ancs,poss,negs): 95 | usrEmbeds, itmEmbeds, h_usrEmbeds, h_itemEmbeds = self.forward() 96 | u_diff_loss,diff_usrEmbeds= self.diffusion_process.training_losses2(self.usr_denoiser, usrEmbeds, h_usrEmbeds, ancs) 97 | i_diff_loss,diff_itemEmbeds = self.diffusion_process.training_losses2(self.item_denoiser, itmEmbeds, h_itemEmbeds, poss) 98 | diff_loss = (u_diff_loss.mean()+i_diff_loss.mean()) 99 | usrEmbeds = usrEmbeds+diff_usrEmbeds 100 | itmEmbeds = itmEmbeds+diff_itemEmbeds 101 | 102 | ancEmbeds = usrEmbeds[ancs] 103 | posEmbeds = itmEmbeds[poss] 104 | negEmbeds = itmEmbeds[negs] 105 | 106 | 107 | scoreDiff = pairPredict(ancEmbeds, posEmbeds, negEmbeds) 108 | bprLoss = - (scoreDiff).sigmoid().log().sum() / args.batch 109 | regLoss = ((torch.norm(ancEmbeds) ** 2 + torch.norm(posEmbeds) ** 2 + torch.norm(negEmbeds) ** 2) * args.reg)/args.batch 110 | loss = bprLoss + regLoss + diff_loss 111 | return loss,bprLoss,regLoss,diff_loss 112 | def predict(self): 113 | usrEmbeds, itmEmbeds, h_usrEmbeds, h_itemEmbeds = self.forward() 114 | denoised_u = self.diffusion_process.p_sample(self.usr_denoiser, h_usrEmbeds, args.sampling_steps) 115 | denoised_i = self.diffusion_process.p_sample(self.item_denoiser, h_itemEmbeds, args.sampling_steps) 116 | usrEmbeds = usrEmbeds+denoised_u 117 | itmEmbeds = itmEmbeds+denoised_i 118 | return usrEmbeds,itmEmbeds 119 | 120 | class DGLLayer(nn.Module): 121 | def __init__(self, 122 | in_feats, 123 | out_feats, 124 | weight=False, 125 | bias=False, 126 | activation=None): 127 | super(DGLLayer, self).__init__() 128 | self.bias = bias 129 | self._in_feats = in_feats 130 | self._out_feats = out_feats 131 | self.weight = weight 132 | if self.weight: 133 | self.u_w = nn.Parameter(torch.Tensor(in_feats, out_feats)) 134 | self.v_w = nn.Parameter(torch.Tensor(in_feats, out_feats)) 135 | # self.e_w = nn.Parameter(t.Tensor(in_feats, out_feats)) 136 | xavier_uniform_(self.u_w) 137 | xavier_uniform_(self.v_w) 138 | # init.xavier_uniform_(self.e_w) 139 | self._activation = activation 140 | 141 | # def forward(self, graph, feat): 142 | def forward(self, graph, u_f, v_f): 143 | with graph.local_scope(): 144 | if self.weight: 145 | u_f = torch.mm(u_f, self.u_w) 146 | v_f = torch.mm(v_f, self.v_w) 147 | # e_f = t.mm(e_f, self.e_w) 148 | node_f = torch.cat([u_f, v_f], dim=0) 149 | # D^-1/2 150 | # degs = graph.out_degrees().to(feat.device).float().clamp(min=1) 151 | degs = graph.out_degrees().to(u_f.device).float().clamp(min=1) 152 | norm = torch.pow(degs, -0.5).view(-1, 1) 153 | # norm = norm.view(-1,1) 154 | # shp = norm.shape + (1,) * (feat.dim() - 1) 155 | # norm = t.reshape(norm, shp) 156 | 157 | node_f = node_f * norm 158 | 159 | graph.ndata['n_f'] = node_f 160 | # graph.edata['e_f'] = e_f 161 | graph.update_all(fn.copy_u(u='n_f', out='m'), reduce_func=fn.sum(msg='m', out='n_f')) 162 | 163 | rst = graph.ndata['n_f'] 164 | 165 | degs = graph.in_degrees().to(u_f.device).float().clamp(min=1) 166 | norm = torch.pow(degs, -0.5).view(-1, 1) 167 | # shp = norm.shape + (1,) * (feat.dim() - 1) 168 | # norm = t.reshape(norm, shp) 169 | rst = rst * norm 170 | 171 | if self._activation is not None: 172 | rst = self._activation(rst) 173 | 174 | return rst 175 | 176 | class Denoise(nn.Module): 177 | def __init__(self, in_dims, out_dims, emb_size, norm=False, dropout=0.5): 178 | super(Denoise, self).__init__() 179 | self.in_dims = in_dims 180 | self.out_dims = out_dims 181 | self.time_emb_dim = emb_size 182 | self.norm = norm 183 | 184 | self.emb_layer = nn.Linear(self.time_emb_dim, self.time_emb_dim) 185 | 186 | in_dims_temp = [self.in_dims[0] + self.time_emb_dim] + self.in_dims[1:] 187 | 188 | out_dims_temp = self.out_dims 189 | 190 | self.in_layers = nn.ModuleList([nn.Linear(d_in, d_out) for d_in, d_out in zip(in_dims_temp[:-1], in_dims_temp[1:])]) 191 | self.out_layers = nn.ModuleList([nn.Linear(d_in, d_out) for d_in, d_out in zip(out_dims_temp[:-1], out_dims_temp[1:])]) 192 | 193 | self.drop = nn.Dropout(dropout) 194 | self.init_weights() 195 | 196 | def init_weights(self): 197 | for layer in self.in_layers: 198 | size = layer.weight.size() 199 | std = np.sqrt(2.0 / (size[0] + size[1])) 200 | layer.weight.data.normal_(0.0, std) 201 | layer.bias.data.normal_(0.0, 0.001) 202 | 203 | for layer in self.out_layers: 204 | size = layer.weight.size() 205 | std = np.sqrt(2.0 / (size[0] + size[1])) 206 | layer.weight.data.normal_(0.0, std) 207 | layer.bias.data.normal_(0.0, 0.001) 208 | 209 | size = self.emb_layer.weight.size() 210 | std = np.sqrt(2.0 / (size[0] + size[1])) 211 | self.emb_layer.weight.data.normal_(0.0, std) 212 | self.emb_layer.bias.data.normal_(0.0, 0.001) 213 | 214 | def forward(self, x, timesteps, mess_dropout=True): 215 | freqs = torch.exp(-math.log(10000) * torch.arange(start=0, end=self.time_emb_dim//2, dtype=torch.float32) / (self.time_emb_dim//2)).to(device) 216 | temp = timesteps[:, None].float() * freqs[None] 217 | time_emb = torch.cat([torch.cos(temp), torch.sin(temp)], dim=-1) 218 | if self.time_emb_dim % 2: 219 | time_emb = torch.cat([time_emb, torch.zeros_like(time_emb[:, :1])], dim=-1) 220 | emb = self.emb_layer(time_emb) 221 | if self.norm: 222 | x = F.normalize(x) 223 | if mess_dropout: 224 | x = self.drop(x) 225 | h = torch.cat([x, emb], dim=-1) 226 | for i, layer in enumerate(self.in_layers): 227 | h = layer(h) 228 | h = torch.tanh(h) 229 | for i, layer in enumerate(self.out_layers): 230 | h = layer(h) 231 | if i != len(self.out_layers) - 1: 232 | h = torch.tanh(h) 233 | 234 | return h 235 | 236 | class GaussianDiffusion(nn.Module): 237 | def __init__(self, noise_scale, noise_min, noise_max, steps, beta_fixed=True): 238 | super(GaussianDiffusion, self).__init__() 239 | 240 | self.noise_scale = noise_scale 241 | self.noise_min = noise_min 242 | self.noise_max = noise_max 243 | self.steps = steps 244 | 245 | 246 | self.history_num_per_term = 10 247 | self.Lt_history = torch.zeros(steps, 10, dtype=torch.float64).to(device) 248 | self.Lt_count = torch.zeros(steps, dtype=int).to(device) 249 | 250 | if noise_scale != 0: 251 | self.betas = torch.tensor(self.get_betas(), dtype=torch.float64).to(device) 252 | if beta_fixed: 253 | self.betas[0] = 0.0001 254 | 255 | self.calculate_for_diffusion() 256 | 257 | def get_betas(self): 258 | start = self.noise_scale * self.noise_min 259 | end = self.noise_scale * self.noise_max 260 | variance = np.linspace(start, end, self.steps, dtype=np.float64) 261 | alpha_bar = 1 - variance 262 | betas = [] 263 | betas.append(1 - alpha_bar[0]) 264 | for i in range(1, self.steps): 265 | betas.append(min(1 - alpha_bar[i] / alpha_bar[i-1], 0.999)) 266 | return np.array(betas) 267 | 268 | def calculate_for_diffusion(self): 269 | alphas = 1.0 - self.betas 270 | self.alphas_cumprod = torch.cumprod(alphas, axis=0).to(device) 271 | self.alphas_cumprod_prev = torch.cat([torch.tensor([1.0]).to(device), self.alphas_cumprod[:-1]]).to(device) 272 | self.alphas_cumprod_next = torch.cat([self.alphas_cumprod[1:], torch.tensor([0.0]).to(device)]).to(device) 273 | 274 | self.sqrt_alphas_cumprod = torch.sqrt(self.alphas_cumprod) 275 | self.sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - self.alphas_cumprod) 276 | self.log_one_minus_alphas_cumprod = torch.log(1.0 - self.alphas_cumprod) 277 | self.sqrt_recip_alphas_cumprod = torch.sqrt(1.0 / self.alphas_cumprod) 278 | self.sqrt_recipm1_alphas_cumprod = torch.sqrt(1.0 / self.alphas_cumprod - 1) 279 | 280 | self.posterior_variance = ( 281 | self.betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod) 282 | ) 283 | self.posterior_log_variance_clipped = torch.log(torch.cat([self.posterior_variance[1].unsqueeze(0), self.posterior_variance[1:]])) 284 | self.posterior_mean_coef1 = (self.betas * torch.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) 285 | self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * torch.sqrt(alphas) / (1.0 - self.alphas_cumprod)) 286 | 287 | def p_sample(self, model, x_start, steps): 288 | if steps == 0: 289 | x_t = x_start 290 | else: 291 | t = torch.tensor([steps-1] * x_start.shape[0]).to(device) 292 | x_t = self.q_sample(x_start, t) 293 | 294 | indices = list(range(self.steps))[::-1] 295 | 296 | for i in indices: 297 | t = torch.tensor([i] * x_t.shape[0]).to(device) 298 | model_mean, model_log_variance = self.p_mean_variance(model, x_t, t) 299 | x_t = model_mean 300 | return x_t 301 | 302 | def q_sample(self, x_start, t, noise=None): 303 | if noise is None: 304 | noise = torch.randn_like(x_start) 305 | return self._extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + self._extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise 306 | 307 | def _extract_into_tensor(self, arr, timesteps, broadcast_shape): 308 | arr = arr.to(device) 309 | res = arr[timesteps].float() 310 | while len(res.shape) < len(broadcast_shape): 311 | res = res[..., None] 312 | return res.expand(broadcast_shape) 313 | 314 | def p_mean_variance(self, model, x, t): 315 | model_output = model(x, t, False) 316 | 317 | model_variance = self.posterior_variance 318 | model_log_variance = self.posterior_log_variance_clipped 319 | 320 | model_variance = self._extract_into_tensor(model_variance, t, x.shape) 321 | model_log_variance = self._extract_into_tensor(model_log_variance, t, x.shape) 322 | 323 | model_mean = (self._extract_into_tensor(self.posterior_mean_coef1, t, x.shape) * model_output + self._extract_into_tensor(self.posterior_mean_coef2, t, x.shape) * x) 324 | 325 | return model_mean, model_log_variance 326 | 327 | def training_losses(self, model, targetEmbeds, x_start): 328 | batch_size = x_start.size(0) 329 | ts = torch.randint(0, self.steps, (batch_size,)).long().to(device) 330 | noise = torch.randn_like(x_start) 331 | if self.noise_scale != 0: 332 | x_t = self.q_sample(targetEmbeds, ts, noise) 333 | else: 334 | x_t = x_start 335 | 336 | model_output = model(x_t, ts) 337 | mse = self.mean_flat((targetEmbeds - model_output) ** 2) 338 | # mse = cal_infonce_loss(targetEmbeds,model_output,args.temp) 339 | 340 | weight = self.SNR(ts - 1) - self.SNR(ts) 341 | weight = torch.where((ts == 0), 1.0, weight) 342 | 343 | diff_loss = weight * mse 344 | # cal_loss = cal_infonce_loss(model_output,targetEmbeds,args.temp) 345 | # return diff_loss, cal_loss,model_output 346 | return diff_loss,model_output 347 | 348 | def training_losses2(self, model, targetEmbeds, x_start, batch): 349 | batch_size = x_start.size(0) 350 | device = x_start.device 351 | # ts, pt = self.sample_timesteps(batch_size, device,'importance') 352 | ts = torch.randint(0, self.steps, (batch_size,)).long().to(device) 353 | noise = torch.randn_like(x_start) 354 | if self.noise_scale != 0: 355 | x_t = self.q_sample(x_start, ts, noise) 356 | else: 357 | x_t = x_start 358 | 359 | model_output = model(x_t, ts) 360 | mse = self.mean_flat((targetEmbeds - model_output) ** 2) 361 | # mse = cal_infonce_loss(targetEmbeds,model_output,args.temp) 362 | weight = self.SNR(ts - 1) - self.SNR(ts) 363 | weight = torch.where((ts == 0), 1.0, weight) 364 | diff_loss = weight * mse 365 | diff_loss = diff_loss[batch] 366 | return diff_loss,model_output 367 | 368 | def mean_flat(self, tensor): 369 | return tensor.mean(dim=list(range(1, len(tensor.shape)))) 370 | 371 | def SNR(self, t): 372 | self.alphas_cumprod = self.alphas_cumprod.to(device) 373 | return self.alphas_cumprod[t] / (1 - self.alphas_cumprod[t]) 374 | def sample_timesteps(self, batch_size, device, method='uniform', uniform_prob=0.001): 375 | if method == 'importance': # importance sampling 376 | if not (self.Lt_count == self.history_num_per_term).all(): 377 | return self.sample_timesteps(batch_size, device, method='uniform') 378 | 379 | Lt_sqrt = torch.sqrt(torch.mean(self.Lt_history ** 2, axis=-1)) 380 | pt_all = Lt_sqrt / torch.sum(Lt_sqrt) 381 | pt_all *= 1 - uniform_prob 382 | pt_all += uniform_prob / len(pt_all) 383 | 384 | assert pt_all.sum(-1) - 1. < 1e-5 385 | 386 | t = torch.multinomial(pt_all, num_samples=batch_size, replacement=True) 387 | pt = pt_all.gather(dim=0, index=t) * len(pt_all) 388 | 389 | 390 | return t, pt 391 | 392 | elif method == 'uniform': # uniform sampling 393 | t = torch.randint(0, self.steps, (batch_size,), device=device).long() 394 | pt = torch.ones_like(t).float() 395 | 396 | return t, pt 397 | 398 | else: 399 | raise ValueError -------------------------------------------------------------------------------- /DiffGraph-Rec/Utils/TimeLogger.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | logmsg = '' 4 | timemark = dict() 5 | saveDefault = False 6 | def log(msg, save=None, oneline=False): 7 | global logmsg 8 | global saveDefault 9 | time = datetime.datetime.now() 10 | tem = '%s: %s' % (time, msg) 11 | if save != None: 12 | if save: 13 | logmsg += tem + '\n' 14 | elif saveDefault: 15 | logmsg += tem + '\n' 16 | if oneline: 17 | print(tem, end='\r') 18 | else: 19 | print(tem) 20 | 21 | if __name__ == '__main__': 22 | log('') -------------------------------------------------------------------------------- /DiffGraph-Rec/Utils/Utils.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn.functional as F 3 | 4 | def innerProduct(usrEmbeds, itmEmbeds): 5 | return t.sum(usrEmbeds * itmEmbeds, dim=-1) 6 | 7 | def pairPredict(ancEmbeds, posEmbeds, negEmbeds): 8 | return innerProduct(ancEmbeds, posEmbeds) - innerProduct(ancEmbeds, negEmbeds) 9 | 10 | def calcRegLoss(model): 11 | ret = 0 12 | for W in model.parameters(): 13 | ret += W.norm(2).square() 14 | return ret 15 | def reg_pick_embeds(embeds_list): 16 | reg_loss = 0 17 | for embeds in embeds_list: 18 | reg_loss += embeds.square().sum() 19 | return reg_loss 20 | 21 | def contrast(nodes, allEmbeds, allEmbeds2=None): 22 | if allEmbeds2 is not None: 23 | pckEmbeds = allEmbeds[nodes] 24 | scores = t.log(t.exp(pckEmbeds @ allEmbeds2.T).sum(-1)).mean() 25 | else: 26 | uniqNodes = t.unique(nodes) 27 | pckEmbeds = allEmbeds[uniqNodes] 28 | scores = t.log(t.exp(pckEmbeds @ allEmbeds.T).sum(-1)).mean() 29 | return scores 30 | 31 | def cal_infonce_loss(embeds1, embeds2, temp): 32 | """ InfoNCE Loss (specify nodes for contrastive learning) 33 | """ 34 | embeds1 = F.normalize(embeds1 + 1e-8, p=2) 35 | embeds2 = F.normalize(embeds2 + 1e-8, p=2) 36 | pckEmbeds1 = embeds1 37 | pckEmbeds2 = embeds2 38 | nume = t.exp(t.sum(pckEmbeds1 * pckEmbeds2, dim=-1) / temp) 39 | deno = t.exp(pckEmbeds1 @ embeds2.T / temp).sum(-1) + 1e-8 40 | return -t.log(nume / deno) 41 | 42 | def cal_infonce_loss_spec_nodes(embeds1, embeds2, nodes, temp): 43 | """ InfoNCE Loss (specify nodes for contrastive learning) 44 | """ 45 | embeds1 = F.normalize(embeds1 + 1e-8, p=2) 46 | embeds2 = F.normalize(embeds2 + 1e-8, p=2) 47 | pckEmbeds1 = embeds1[nodes] 48 | pckEmbeds2 = embeds2[nodes] 49 | nume = t.exp(t.sum(pckEmbeds1 * pckEmbeds2, dim=-1) / temp) 50 | deno = t.exp(pckEmbeds1 @ embeds2.T / temp).sum(-1) + 1e-8 51 | return -t.log(nume / deno).mean() -------------------------------------------------------------------------------- /DiffGraph-Rec/data/ijcai_15/test_mat.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/ijcai_15/test_mat.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/ijcai_15/train_mat_buy.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/ijcai_15/train_mat_buy.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/ijcai_15/train_mat_cart.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/ijcai_15/train_mat_cart.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/ijcai_15/train_mat_click.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/ijcai_15/train_mat_click.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/ijcai_15/train_mat_fav.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/ijcai_15/train_mat_fav.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/retail_rocket/test_mat.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/retail_rocket/test_mat.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/retail_rocket/train_mat_buy.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/retail_rocket/train_mat_buy.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/retail_rocket/train_mat_cart.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/retail_rocket/train_mat_cart.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/retail_rocket/train_mat_view.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/retail_rocket/train_mat_view.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/tmall/test_mat.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/tmall/test_mat.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/tmall/train_mat_buy.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/tmall/train_mat_buy.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/tmall/train_mat_cart.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/tmall/train_mat_cart.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/tmall/train_mat_fav.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/tmall/train_mat_fav.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/data/tmall/train_mat_pv.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph-Rec/data/tmall/train_mat_pv.pkl -------------------------------------------------------------------------------- /DiffGraph-Rec/main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import Utils.TimeLogger as logger 3 | from Utils.TimeLogger import log 4 | from params import args 5 | from Model import HGDM 6 | from DataHandler import DataHandler 7 | import numpy as np 8 | import pickle 9 | from Utils.Utils import * 10 | import os 11 | import logging 12 | import sys 13 | device = t.device('cuda:0' if t.cuda.is_available() else 'cpu') 14 | 15 | 16 | class Coach: 17 | def __init__(self, handler): 18 | self.handler = handler 19 | print('USER', self.handler.userNum, 'ITEM', self.handler.itemNum) 20 | print('NUM OF INTERACTIONS', self.handler.train_dataloader.dataset.__len__()) 21 | self.metrics = dict() 22 | mets = ['Loss', 'preLoss', 'Recall', 'NDCG'] 23 | for met in mets: 24 | self.metrics['Train' + met] = list() 25 | self.metrics['Test' + met] = list() 26 | 27 | def makePrint(self, name, ep, reses, save): 28 | ret = 'Epoch %d/%d, %s: ' % (ep, args.epoch, name) 29 | for metric in reses: 30 | val = reses[metric] 31 | ret += '%s = %.4f, ' % (metric, val) 32 | tem = name + metric 33 | if save and tem in self.metrics: 34 | self.metrics[tem].append(val) 35 | ret = ret[:-2] + ' ' 36 | return ret 37 | def makePrintAllK(self, name, ep, reses, save): 38 | ret = 'Epoch %d/%d, %s: ' % (ep, args.epoch, name) 39 | for metric in reses: 40 | val = reses[metric] 41 | ret += '%s = %s, ' % (metric, str(val)) 42 | tem = name + metric 43 | if save and tem in self.metrics: 44 | self.metrics[tem].append(val) 45 | ret = ret[:-2] + ' ' 46 | return ret 47 | 48 | def run(self): 49 | self.prepareModel() 50 | log('Model Prepared') 51 | log('Model Initialized') 52 | 53 | recallMax = 0 54 | ndcgMax = 0 55 | bestEpoch = 0 56 | 57 | wait = 0 58 | 59 | #file save setting 60 | log_format = '%(asctime)s %(message)s' 61 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 62 | format=log_format, datefmt='%m/%d %I:%M:%S %p') 63 | log_save = './History/' + args.data + '/' 64 | log_file = f'{args.data}_hid_{args.latdim}_layer_{args.gcn_layer}_' + \ 65 | f'lr_{args.lr}_difflr_{args.difflr}_diff_dim_{args.dims}_reg_{args.reg}_batch_{args.batch}_diffstep_{args.steps}_T_dim_{args.d_emb_size}_noise_scale_{args.noise_scale}_'+\ 66 | f'new_data' 67 | fname = f'{log_file}.txt' 68 | fh = logging.FileHandler(os.path.join(log_save, fname)) 69 | fh.setFormatter(logging.Formatter(log_format)) 70 | logger = logging.getLogger() 71 | logger.addHandler(fh) 72 | logger.info(args) 73 | logger.info('================') 74 | args.save_path = args.data + '/'+log_file 75 | 76 | 77 | 78 | for ep in range(0, args.epoch): 79 | tstFlag = (ep % 1 == 0) 80 | reses = self.trainEpoch() 81 | log(self.makePrint('Train', ep, reses, tstFlag)) 82 | if tstFlag: 83 | reses = self.testEpoch() 84 | if (reses['Recall'] > recallMax): 85 | recallMax = reses['Recall'] 86 | ndcgMax = reses['NDCG'] 87 | bestEpoch = ep 88 | wait = 0 89 | self.saveModel() 90 | else: 91 | wait+=1 92 | # log(self.makePrint('Test', ep, reses, tstFlag)) 93 | logger.info(self.makePrint('Test', ep, reses, tstFlag)) 94 | self.saveHistory() 95 | print() 96 | if wait >= args.patience: 97 | print(f'Early stop at epoch {ep}, best epoch {bestEpoch}') 98 | break 99 | print('Best epoch : ', bestEpoch, ' , Recall@20 : ', recallMax, ' , NDCG@20 : ', ndcgMax) 100 | 101 | 102 | def prepareModel(self): 103 | 104 | self.model = HGDM(self.handler).to(device) 105 | self.opt = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=0) 106 | 107 | def trainEpoch(self): 108 | trnLoader = self.handler.train_dataloader 109 | 110 | trnLoader.dataset.negSampling() 111 | epLoss, epRecLoss, eDiffLoss, eSSLoss = 0, 0, 0 ,0 112 | steps = trnLoader.dataset.__len__() // args.batch 113 | mode = 'all_diff' 114 | for i, tem in enumerate(trnLoader): 115 | 116 | ancs, poss, negs = tem 117 | ancs = ancs.long().cuda() 118 | poss = poss.long().cuda() 119 | negs = negs.long().cuda() 120 | self.opt.zero_grad() 121 | loss,bprLoss,regLoss,diff_loss = self.model.cal_loss(ancs,poss,negs) 122 | epLoss += loss.item() 123 | epRecLoss += bprLoss.item() 124 | eDiffLoss += diff_loss.item() 125 | 126 | 127 | loss.backward() 128 | self.opt.step() 129 | log('Step %d/%d: loss = %.3f, diffLoss = %.3f,regLoss = %.3f' % (i, steps, loss, diff_loss , regLoss), save=False, oneline=True) 130 | 131 | 132 | ret = dict() 133 | ret['Loss'] = epLoss / steps 134 | ret['recLoss'] = epRecLoss / steps 135 | ret['diffLoss'] = eDiffLoss / steps 136 | return ret 137 | 138 | def testEpoch(self): 139 | tstLoader = self.handler.test_dataloader 140 | epRecall, epNdcg = [0] * 2 141 | i = 0 142 | num = tstLoader.dataset.__len__() 143 | steps = num // args.tstBat 144 | self.model.eval() 145 | 146 | with torch.no_grad(): 147 | usrEmbeds, itmEmbeds = self.model.predict() 148 | user_emb = usrEmbeds.cpu().numpy() 149 | item_emb = itmEmbeds.cpu().numpy() 150 | emb_save = './'+args.data+'hdl_emb.pkl' 151 | with open(emb_save,'wb') as f: 152 | pickle.dump({'user_embed':user_emb,'item_embed':item_emb},f) 153 | for usr, trnMask in tstLoader: 154 | i += 1 155 | usr = usr.long().cuda() 156 | trnMask = trnMask.cuda() 157 | 158 | allPreds = t.mm(usrEmbeds[usr], t.transpose(itmEmbeds, 1, 0)) * (1 - trnMask) - trnMask * 1e8 159 | _, topLocs = t.topk(allPreds, args.topk) 160 | recall, ndcg = self.calcRes(topLocs.cpu().numpy(), self.handler.test_dataloader.dataset.user_pos_lists, usr) 161 | epRecall += recall 162 | epNdcg += ndcg 163 | log('Steps %d/%d: recall = %.2f, ndcg = %.2f ' % (i, steps, recall, ndcg), save=False, oneline=True) 164 | ret = dict() 165 | ret['Recall'] = epRecall / num 166 | ret['NDCG'] = epNdcg / num 167 | return ret 168 | def calcRes(self, topLocs, tstLocs, batIds): 169 | assert topLocs.shape[0] == len(batIds) 170 | allRecall = allNdcg = 0 171 | recallBig = 0 172 | ndcgBig = 0 173 | for i in range(len(batIds)): 174 | temTopLocs = list(topLocs[i]) 175 | temTstLocs = tstLocs[batIds[i]] 176 | tstNum = len(temTstLocs) 177 | maxDcg = np.sum([np.reciprocal(np.log2(loc + 2)) for loc in range(min(tstNum, args.topk))]) 178 | recall = dcg = 0 179 | for val in temTstLocs: 180 | if val in temTopLocs: 181 | recall += 1 182 | dcg += np.reciprocal(np.log2(temTopLocs.index(val) + 2)) 183 | recall = recall / tstNum 184 | ndcg = dcg / maxDcg 185 | allRecall += recall 186 | allNdcg += ndcg 187 | return allRecall, allNdcg 188 | 189 | def saveHistory(self): 190 | if args.epoch == 0: 191 | return 192 | with open('./History/' + args.save_path + '.his', 'wb') as fs: 193 | pickle.dump(self.metrics, fs) 194 | 195 | 196 | def saveModel(self): 197 | content = { 198 | 'model': self.model, 199 | } 200 | t.save(content, './Models/' + args.save_path + '.mod') 201 | log('Model Saved: %s' % args.save_path) 202 | 203 | def loadModel(self): 204 | 205 | ckp = t.load('./Models/' + args.load_model ) 206 | self.model = ckp['model'] 207 | self.opt = t.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=0) 208 | 209 | # with open('../History/' + args.load_model + '.his', 'rb') as fs: 210 | # self.metrics = pickle.load(fs) 211 | log('Model Loaded') 212 | def test(self): 213 | self.prepareModel() 214 | log('Model Prepared') 215 | log('Model Initialized') 216 | 217 | recallMax = 0 218 | ndcgMax = 0 219 | bestEpoch = 0 220 | 221 | args.load_model = args.data+'/'+'temp.mod' 222 | self.loadModel() 223 | 224 | reses = self.testEpoch() 225 | 226 | log(self.makePrint('Test', 1, reses, 1)) 227 | 228 | 229 | if __name__ == '__main__': 230 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 231 | logger.saveDefault = True 232 | log('Start') 233 | handler = DataHandler() 234 | handler.load_data() 235 | log('Load Data') 236 | 237 | coach = Coach(handler) 238 | coach.run() 239 | # coach.test() -------------------------------------------------------------------------------- /DiffGraph-Rec/params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def ParseArgs(): 4 | parser = argparse.ArgumentParser(description='Model Params') 5 | parser.add_argument('--lr', default=1e-3, type=float, help='learning rate') 6 | parser.add_argument('--difflr', default=1e-3, type=float, help='learning rate') 7 | parser.add_argument('--batch', default=2048, type=int, help='batch size') 8 | parser.add_argument('--tstBat', default=1024, type=int, help='number of users in a testing batch') 9 | parser.add_argument('--reg', default=3e-2, type=float, help='weight decay regularizer') 10 | parser.add_argument('--patience', type=int, default=20) 11 | #retain params 12 | parser.add_argument('--threshold', default=0.5, type=float, help='threshold to filter users') 13 | parser.add_argument('--data', default='retail_rocket', type=str, help='name of dataset') 14 | parser.add_argument('--save_path', default='tem', help='file name to save model and training record') 15 | 16 | 17 | #gcn_setting 18 | parser.add_argument('--epoch', default=100, type=int, help='number of epochs') 19 | parser.add_argument('--decay', default=0.96, type=float, help='weight decay rate') 20 | parser.add_argument('--decay_step', type=int, default=1) 21 | parser.add_argument('--init', default=False, type=bool, help='whether initial embedding') 22 | parser.add_argument('--latdim', default=64, type=int, help='embedding size') 23 | parser.add_argument('--gcn_layer', default=2, type=int, help='number of gcn layers') 24 | parser.add_argument('--uugcn_layer', default=2, type=int, help='number of gcn layers') 25 | parser.add_argument('--load_model', default=None, help='model name to load') 26 | parser.add_argument('--topk', default=20, type=int, help='K of top K') 27 | parser.add_argument('--dropRate', default=0.5, type=float, help='rate for dropout layer') 28 | parser.add_argument('--gpu', default='0', type=str, help='indicates which gpu to use') 29 | 30 | 31 | #diffusion setting 32 | parser.add_argument('--dims', type=str, default='[64]') 33 | parser.add_argument('--d_emb_size', type=int, default=8) 34 | parser.add_argument('--norm', type=bool, default=True) 35 | parser.add_argument('--steps', type=int, default=200) 36 | parser.add_argument('--noise_scale', type=float, default=1e-4) 37 | parser.add_argument('--noise_min', type=float, default=0.0001) 38 | parser.add_argument('--noise_max', type=float, default=0.001) 39 | parser.add_argument('--sampling_steps', type=int, default=0) 40 | 41 | 42 | 43 | return parser.parse_args() 44 | args = ParseArgs() 45 | 46 | 47 | #tmall :lr 1e-3 batch:4096 2048 layer:2 reg:3e-2 steps:200 noise-scale:1e-4 48 | 49 | #retail_rocket :lr 1e-3 batch:4096 2048 layer:2 reg:3e-2 steps:150 noise-scale:1e-4 -------------------------------------------------------------------------------- /DiffGraph_NC/DataHandler.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from scipy.sparse import csr_matrix, coo_matrix, dok_matrix 4 | from params import args 5 | import scipy.sparse as sp 6 | import dgl 7 | from Utils.TimeLogger import log 8 | import torch as t 9 | from sklearn.preprocessing import OneHotEncoder 10 | device = "cuda:0" if t.cuda.is_available() else "cpu" 11 | 12 | class DataHandler: 13 | def __init__(self): 14 | 15 | if args.data == 'DBLP': 16 | predir = './data/DBLP/' 17 | if args.data == 'aminer': 18 | predir = './data/aminer/' 19 | self.predir = predir 20 | 21 | 22 | def loadOneFile(self, filename): 23 | with open(filename, 'rb') as fs: 24 | ret = (pickle.load(fs) != 0).astype(np.float32) 25 | if type(ret) != coo_matrix: 26 | ret = sp.coo_matrix(ret) 27 | return ret 28 | 29 | def normalizeAdj(self, mat): 30 | degree = np.array(mat.sum(axis=-1)) 31 | dInvSqrt = np.reshape(np.power(degree, -0.5), [-1]) 32 | dInvSqrt[np.isinf(dInvSqrt)] = 0.0 33 | dInvSqrtMat = sp.diags(dInvSqrt) 34 | return mat.dot(dInvSqrtMat).transpose().dot(dInvSqrtMat).tocoo() 35 | 36 | def makeTorchAdj(self, mat): 37 | # make ui adj 38 | user,item = mat.shape[0],mat.shape[1] 39 | a = sp.csr_matrix((user, user)) 40 | b = sp.csr_matrix((item, item)) 41 | mat = sp.vstack([sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])]) 42 | mat = (mat != 0) * 1.0 43 | # mat = (mat + sp.eye(mat.shape[0])) * 1.0 44 | mat = self.normalizeAdj(mat) 45 | 46 | # make cuda tensor 47 | idxs = t.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64)) 48 | vals = t.from_numpy(mat.data.astype(np.float32)) 49 | shape = t.Size(mat.shape) 50 | return t.sparse.FloatTensor(idxs, vals, shape).to(device) 51 | 52 | def makeTorchuAdj(self, mat): 53 | """Create tensor-based adjacency matrix for user social graph. 54 | 55 | Args: 56 | mat: Adjacency matrix. 57 | 58 | Returns: 59 | Tensor-based adjacency matrix. 60 | """ 61 | mat = (mat != 0) * 1.0 62 | mat = (mat + sp.eye(mat.shape[0])) * 1.0 63 | mat = self.normalizeAdj(mat) 64 | 65 | # make cuda tensor 66 | idxs = t.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64)) 67 | vals = t.from_numpy(mat.data.astype(np.float32)) 68 | shape = t.Size(mat.shape) 69 | return t.sparse.FloatTensor(idxs, vals, shape).to(device) 70 | def makeBiAdj(self, mat): 71 | n_user = mat.shape[0] 72 | n_item = mat.shape[1] 73 | a = sp.csr_matrix((n_user, n_user)) 74 | b = sp.csr_matrix((n_item, n_item)) 75 | mat = sp.vstack([sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])]) 76 | mat = (mat != 0) * 1.0 77 | mat = mat.tocoo() 78 | edge_src,edge_dst = mat.nonzero() 79 | ui_graph = dgl.graph(data=(edge_src, edge_dst), 80 | idtype=t.int32, 81 | num_nodes=mat.shape[0] 82 | ) 83 | 84 | return ui_graph 85 | 86 | 87 | def LoadData(self): 88 | if args.data == 'DBLP': 89 | features_list,apa_mat,ata_mat,ava_mat,train,val,test,labels = self.load_dblp_data() 90 | self.feature_list = t.FloatTensor(features_list).to(device) 91 | 92 | self.hete_adj1 = dgl.from_scipy(apa_mat).to(device) 93 | self.hete_adj2 = dgl.from_scipy(ata_mat).to(device) 94 | self.hete_adj3 = dgl.from_scipy(ava_mat).to(device) 95 | self.train_idx = train 96 | self.val_idx = val 97 | self.test_idx = test 98 | self.labels = labels 99 | 100 | 101 | 102 | # self.train_idx_generator = index_generator(batch_size=args.batch, indices=self.train_idx) 103 | # self.val_idx_generator = index_generator(batch_size=args.batch, indices=self.val_idx, shuffle=False) 104 | # self.test_idx_generator = index_generator(batch_size=args.batch, indices=self.test_idx, shuffle=False) 105 | self.he_adjs = [self.hete_adj1,self.hete_adj2,self.hete_adj3] 106 | if args.data == 'Freebase': 107 | features_list,mam_mat,mdm_mat,mwm_mat,train,val,test,labels = self.load_Freebase_data() 108 | self.feature_list = t.FloatTensor(features_list).to(device) 109 | 110 | self.hete_adj1 = dgl.from_scipy(mam_mat).to(device) 111 | self.hete_adj2 = dgl.from_scipy(mdm_mat).to(device) 112 | self.hete_adj3 = dgl.from_scipy(mwm_mat).to(device) 113 | self.train_idx = train 114 | self.val_idx = val 115 | self.test_idx = test 116 | self.labels = labels 117 | 118 | self.he_adjs = [self.hete_adj1,self.hete_adj2,self.hete_adj3] 119 | 120 | if args.data == 'aminer': 121 | features_list,pap_mat,prp_mat,pos_mat,train,val,test,labels = self.load_aminer_data() 122 | self.feature_list = t.FloatTensor(features_list).to(device) 123 | 124 | self.hete_adj1 = dgl.from_scipy(pap_mat).to(device) 125 | self.hete_adj2 = dgl.from_scipy(prp_mat).to(device) 126 | self.hete_adj3 = dgl.from_scipy(pos_mat).to(device) 127 | self.train_idx = train 128 | self.val_idx = val 129 | self.test_idx = test 130 | self.labels = labels 131 | 132 | self.he_adjs = [self.hete_adj1,self.hete_adj2,self.hete_adj3] 133 | 134 | 135 | 136 | 137 | def load_dblp_data(self): 138 | features_a = sp.load_npz(self.predir + 'a_feat.npz').astype("float32") 139 | # features_1 = sp.load_npz(self.predir + '/features_1.npz').toarray() 140 | # features_2 = sp.load_npz(self.predir + '/features_2.npy') 141 | features_a = t.FloatTensor(preprocess_features(features_a)) 142 | 143 | apa_mat=sp.load_npz(self.predir + "apa.npz") 144 | ata_mat=sp.load_npz(self.predir + "apcpa.npz") 145 | ava_mat=sp.load_npz(self.predir + "aptpa.npz") 146 | labels = np.load(self.predir + 'labels.npy') 147 | labels = encode_onehot(labels) 148 | labels= t.FloatTensor(labels).to(device) 149 | train = [np.load(self.predir + "train_" + str(i) + ".npy") for i in args.ratio] 150 | test = [np.load(self.predir + "test_" + str(i) + ".npy") for i in args.ratio] 151 | val = [np.load(self.predir + "val_" + str(i) + ".npy") for i in args.ratio] 152 | train = [t.LongTensor(i) for i in train] 153 | val = [t.LongTensor(i) for i in val] 154 | test = [t.LongTensor(i) for i in test] 155 | 156 | return features_a,apa_mat,ata_mat,ava_mat,train,val,test,labels 157 | 158 | def load_Freebase_data(self): 159 | type_num = [3492, 2502, 33401, 4459] 160 | 161 | # features_1 = sp.load_npz(self.predir + '/features_1.npz').toarray() 162 | # features_2 = sp.load_npz(self.predir + '/features_2.npy') 163 | features_m = sp.eye(type_num[0]) 164 | features_m=t.FloatTensor(preprocess_features(features_m)) 165 | mam = sp.load_npz(self.predir + "mam.npz") 166 | mdm = sp.load_npz(self.predir + "mdm.npz") 167 | mwm = sp.load_npz(self.predir + "mwm.npz") 168 | labels = np.load(self.predir + 'labels.npy') 169 | labels = encode_onehot(labels) 170 | labels= t.FloatTensor(labels).to(device) 171 | train = [np.load(self.predir + "train_" + str(i) + ".npy") for i in args.ratio] 172 | test = [np.load(self.predir + "test_" + str(i) + ".npy") for i in args.ratio] 173 | val = [np.load(self.predir + "val_" + str(i) + ".npy") for i in args.ratio] 174 | train = [t.LongTensor(i) for i in train] 175 | val = [t.LongTensor(i) for i in val] 176 | test = [t.LongTensor(i) for i in test] 177 | 178 | return features_m,mam,mdm,mwm,train,val,test,labels 179 | 180 | def load_aminer_data(self): 181 | type_num = [6564, 13329, 35890] 182 | 183 | # features_1 = sp.load_npz(self.predir + '/features_1.npz').toarray() 184 | # features_2 = sp.load_npz(self.predir + '/features_2.npy') 185 | features_p = sp.eye(type_num[0]) 186 | features_p=t.FloatTensor(preprocess_features(features_p)) 187 | pap = sp.load_npz(self.predir + "pap.npz") 188 | prp = sp.load_npz(self.predir + "prp.npz") 189 | pos = sp.load_npz(self.predir + "pos.npz") 190 | labels = np.load(self.predir + 'labels.npy') 191 | labels = encode_onehot(labels) 192 | labels= t.FloatTensor(labels).to(device) 193 | train = [np.load(self.predir + "train_" + str(i) + ".npy") for i in args.ratio] 194 | test = [np.load(self.predir + "test_" + str(i) + ".npy") for i in args.ratio] 195 | val = [np.load(self.predir + "val_" + str(i) + ".npy") for i in args.ratio] 196 | train = [t.LongTensor(i) for i in train] 197 | val = [t.LongTensor(i) for i in val] 198 | test = [t.LongTensor(i) for i in test] 199 | 200 | return features_p,pap,prp,pos,train,val,test,labels 201 | 202 | 203 | 204 | def preprocess_features(features): 205 | """Row-normalize feature matrix and convert to tuple representation""" 206 | rowsum = np.array(features.sum(1)) 207 | r_inv = np.power(rowsum, -1).flatten() 208 | r_inv[np.isinf(r_inv)] = 0. 209 | r_mat_inv = sp.diags(r_inv) 210 | features = r_mat_inv.dot(features) 211 | return features.todense() 212 | def encode_onehot(labels): 213 | labels = labels.reshape(-1, 1) 214 | enc = OneHotEncoder() 215 | enc.fit(labels) 216 | labels_onehot = enc.transform(labels).toarray() 217 | return labels_onehot 218 | 219 | class index_generator: 220 | def __init__(self, batch_size, num_data=None, indices=None, shuffle=True): 221 | if num_data is not None: 222 | self.num_data = num_data 223 | self.indices = np.arange(num_data) 224 | if indices is not None: 225 | self.num_data = len(indices) 226 | self.indices = np.copy(indices) 227 | self.batch_size = batch_size 228 | self.iter_counter = 0 229 | self.shuffle = shuffle 230 | if shuffle: 231 | np.random.shuffle(self.indices) 232 | 233 | def next(self): 234 | if self.num_iterations_left() <= 0: 235 | self.reset() 236 | self.iter_counter += 1 237 | return np.copy(self.indices[(self.iter_counter - 1) * self.batch_size:self.iter_counter * self.batch_size]) 238 | 239 | def num_iterations(self): 240 | return int(np.ceil(self.num_data / self.batch_size)) 241 | 242 | def num_iterations_left(self): 243 | return self.num_iterations() - self.iter_counter 244 | 245 | def reset(self): 246 | if self.shuffle: 247 | np.random.shuffle(self.indices) 248 | self.iter_counter = 0 249 | 250 | 251 | -------------------------------------------------------------------------------- /DiffGraph_NC/Model.py: -------------------------------------------------------------------------------- 1 | from statistics import mean 2 | import torch 3 | from torch import nn 4 | import torch.nn.functional as F 5 | from params import args 6 | from sklearn.metrics import roc_auc_score 7 | import numpy as np 8 | import math 9 | 10 | from Utils.Utils import cal_infonce_loss 11 | import dgl.function as fn 12 | from dgl.nn.pytorch import GraphConv 13 | init = nn.init.xavier_uniform_ 14 | uniformInit = nn.init.uniform 15 | from torch.nn.init import xavier_normal_, constant_, xavier_uniform_ 16 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 17 | 18 | class HGDM(nn.Module): 19 | def __init__(self,f_dim): 20 | super(HGDM, self).__init__() 21 | out_dims = eval(args.dims) + [args.latdim] 22 | in_dims = out_dims[::-1] 23 | self.user_denoise_model = Denoise(in_dims, out_dims, args.d_emb_size, norm=args.norm) 24 | self.diffusion_model = GaussianDiffusion(args.noise_scale, args.noise_min, args.noise_max, args.steps) 25 | 26 | self.act = nn.LeakyReLU(0.5, inplace=True) 27 | self.helayers1 = nn.ModuleList() 28 | self.helayers2 = nn.ModuleList() 29 | self.main_layers = nn.ModuleList() 30 | self.weight = False 31 | for i in range(0, args.gcn_layer): 32 | self.helayers1.append(UUGCNLayer(args.latdim, args.latdim, weight=self.weight, bias=False, activation=self.act)) 33 | for i in range(0, args.gcn_layer): 34 | self.helayers2.append(UUGCNLayer(args.latdim, args.latdim, weight=self.weight, bias=False, activation=self.act)) 35 | for i in range(0, args.uugcn_layer): 36 | self.main_layers.append(UUGCNLayer(args.latdim, args.latdim, weight=self.weight, bias=False, activation=self.act)) 37 | 38 | self.transform_layer = torch.nn.Linear(f_dim,args.latdim,bias=True) 39 | nn.init.xavier_normal_(self.transform_layer.weight, gain=1.414) 40 | self.dense = torch.nn.Linear(args.latdim,4) 41 | 42 | 43 | self.pool = 'sum' 44 | def forward(self, he_adjs,feature_list, is_training=True): 45 | 46 | 47 | embed = self.transform_layer(feature_list) 48 | target_embedding = [embed] 49 | source_embeddings1 = [embed] 50 | source_embeddings2= [embed] 51 | 52 | for i, layer in enumerate(self.main_layers): 53 | embeddings = layer(he_adjs[0], target_embedding[-1]) 54 | norm_embeddings = F.normalize(embeddings, p=2, dim=1) 55 | target_embedding += [norm_embeddings] 56 | 57 | target_embedding = sum(target_embedding) 58 | 59 | for i, layer in enumerate(self.helayers1): 60 | embeddings = layer(he_adjs[1], source_embeddings1[-1]) 61 | norm_embeddings = F.normalize(embeddings, p=2, dim=1) 62 | source_embeddings1 += [norm_embeddings] 63 | 64 | source_embeddings1 = sum(source_embeddings1) 65 | 66 | for i, layer in enumerate(self.helayers2): 67 | embeddings = layer(he_adjs[2], source_embeddings2[-1]) 68 | norm_embeddings = F.normalize(embeddings, p=2, dim=1) 69 | source_embeddings2 += [norm_embeddings] 70 | 71 | source_embeddings2 = sum(source_embeddings2) 72 | 73 | return source_embeddings1,source_embeddings2,target_embedding 74 | 75 | def cal_loss(self, ancs, label,he_adjs,initial_feature): 76 | source_embeddings1,source_embeddings2,target_embedding = self.forward(he_adjs,initial_feature) 77 | 78 | source_embeddings = source_embeddings1+source_embeddings2 79 | 80 | diff_loss,diff_embeddings= self.diffusion_model.training_losses2(self.user_denoise_model, target_embedding, source_embeddings, ancs) 81 | diff_loss = diff_loss.mean() 82 | all_embeddings = target_embedding+diff_embeddings 83 | scores = self.dense(all_embeddings) 84 | scores = F.log_softmax(scores,dim=1) 85 | 86 | batch_u = scores[ancs] 87 | batch_label = torch.argmax(label[ancs], dim=-1) 88 | nll_loss = F.nll_loss(batch_u,batch_label) 89 | return nll_loss,diff_loss 90 | def get_embeds(self, ancs, label,he_adjs,initial_feature): 91 | source_embeddings1,source_embeddings2,target_embedding = self.forward(he_adjs,initial_feature) 92 | 93 | source_embeddings = source_embeddings1+source_embeddings2 94 | 95 | diff_embeddings= self.diffusion_model.p_sample(self.user_denoise_model, source_embeddings, args.sampling_steps) 96 | 97 | all_embeddings = target_embedding+diff_embeddings 98 | 99 | return all_embeddings[ancs] 100 | def get_allembeds(self, he_adjs,initial_feature): 101 | source_embeddings1,source_embeddings2,target_embedding = self.forward(he_adjs,initial_feature) 102 | 103 | source_embeddings = source_embeddings1+source_embeddings2 104 | 105 | diff_embeddings= self.diffusion_model.p_sample(self.user_denoise_model, source_embeddings, args.sampling_steps) 106 | all_embeddings = target_embedding+diff_embeddings 107 | scores = self.dense(all_embeddings) 108 | return all_embeddings,scores 109 | 110 | class DGLLayer(nn.Module): 111 | def __init__(self, 112 | in_feats, 113 | out_feats, 114 | weight=False, 115 | bias=False, 116 | activation=None): 117 | super(DGLLayer, self).__init__() 118 | self.bias = bias 119 | self._in_feats = in_feats 120 | self._out_feats = out_feats 121 | self.weight = weight 122 | if self.weight: 123 | self.u_w = nn.Parameter(torch.Tensor(in_feats, out_feats)) 124 | self.v_w = nn.Parameter(torch.Tensor(in_feats, out_feats)) 125 | # self.e_w = nn.Parameter(t.Tensor(in_feats, out_feats)) 126 | xavier_uniform_(self.u_w) 127 | xavier_uniform_(self.v_w) 128 | # init.xavier_uniform_(self.e_w) 129 | self._activation = activation 130 | 131 | # def forward(self, graph, feat): 132 | def forward(self, graph, u_f, v_f): 133 | with graph.local_scope(): 134 | if self.weight: 135 | u_f = torch.mm(u_f, self.u_w) 136 | v_f = torch.mm(v_f, self.v_w) 137 | # e_f = t.mm(e_f, self.e_w) 138 | node_f = torch.cat([u_f, v_f], dim=0) 139 | # D^-1/2 140 | # degs = graph.out_degrees().to(feat.device).float().clamp(min=1) 141 | degs = graph.out_degrees().to(u_f.device).float().clamp(min=1) 142 | norm = torch.pow(degs, -0.5).view(-1, 1) 143 | # norm = norm.view(-1,1) 144 | # shp = norm.shape + (1,) * (feat.dim() - 1) 145 | # norm = t.reshape(norm, shp) 146 | 147 | node_f = node_f * norm 148 | 149 | graph.ndata['n_f'] = node_f 150 | # graph.edata['e_f'] = e_f 151 | graph.update_all(fn.copy_u(u='n_f', out='m'), reduce_func=fn.sum(msg='m', out='n_f')) 152 | 153 | rst = graph.ndata['n_f'] 154 | 155 | degs = graph.in_degrees().to(u_f.device).float().clamp(min=1) 156 | norm = torch.pow(degs, -0.5).view(-1, 1) 157 | # shp = norm.shape + (1,) * (feat.dim() - 1) 158 | # norm = t.reshape(norm, shp) 159 | rst = rst * norm 160 | 161 | if self._activation is not None: 162 | rst = self._activation(rst) 163 | 164 | return rst 165 | 166 | class UUGCNLayer(nn.Module): 167 | def __init__(self, 168 | in_feats, 169 | out_feats, 170 | weight=False, 171 | bias=False, 172 | activation=None): 173 | super(UUGCNLayer, self).__init__() 174 | self.bias = bias 175 | self._in_feats = in_feats 176 | self._out_feats = out_feats 177 | self.weight = weight 178 | if self.weight: 179 | self.u_w = nn.Parameter(torch.Tensor(in_feats, out_feats)) 180 | init(self.u_w) 181 | self._activation = activation 182 | 183 | # def forward(self, graph, feat): 184 | def forward(self, graph, u_f): 185 | with graph.local_scope(): 186 | if self.weight: 187 | u_f = torch.mm(u_f, self.u_w) 188 | node_f = u_f 189 | # D^-1/2 190 | # degs = graph.out_degrees().to(feat.device).float().clamp(min=1) 191 | degs = graph.out_degrees().to(u_f.device).float().clamp(min=1) 192 | norm = torch.pow(degs, -0.5).view(-1, 1) 193 | # norm = norm.view(-1,1) 194 | # shp = norm.shape + (1,) * (feat.dim() - 1) 195 | # norm = t.reshape(norm, shp) 196 | 197 | node_f = node_f * norm 198 | 199 | graph.ndata['n_f'] = node_f 200 | # graph.edata['e_f'] = e_f 201 | graph.update_all(fn.copy_u(u='n_f', out='m'), reduce_func=fn.sum(msg='m', out='n_f')) 202 | 203 | rst = graph.ndata['n_f'] 204 | 205 | degs = graph.in_degrees().to(u_f.device).float().clamp(min=1) 206 | norm = torch.pow(degs, -0.5).view(-1, 1) 207 | # shp = norm.shape + (1,) * (feat.dim() - 1) 208 | # norm = t.reshape(norm, shp) 209 | rst = rst * norm 210 | 211 | if self._activation is not None: 212 | rst = self._activation(rst) 213 | 214 | return rst 215 | 216 | 217 | class Denoise(nn.Module): 218 | def __init__(self, in_dims, out_dims, emb_size, norm=False, dropout=0.5): 219 | super(Denoise, self).__init__() 220 | self.in_dims = in_dims 221 | self.out_dims = out_dims 222 | self.time_emb_dim = emb_size 223 | self.norm = norm 224 | 225 | self.emb_layer = nn.Linear(self.time_emb_dim, self.time_emb_dim) 226 | 227 | in_dims_temp = [self.in_dims[0] + self.time_emb_dim] + self.in_dims[1:] 228 | 229 | out_dims_temp = self.out_dims 230 | 231 | self.in_layers = nn.ModuleList([nn.Linear(d_in, d_out) for d_in, d_out in zip(in_dims_temp[:-1], in_dims_temp[1:])]) 232 | self.out_layers = nn.ModuleList([nn.Linear(d_in, d_out) for d_in, d_out in zip(out_dims_temp[:-1], out_dims_temp[1:])]) 233 | 234 | self.drop = nn.Dropout(dropout) 235 | self.init_weights() 236 | 237 | def init_weights(self): 238 | for layer in self.in_layers: 239 | size = layer.weight.size() 240 | std = np.sqrt(2.0 / (size[0] + size[1])) 241 | layer.weight.data.normal_(0.0, std) 242 | layer.bias.data.normal_(0.0, 0.001) 243 | 244 | for layer in self.out_layers: 245 | size = layer.weight.size() 246 | std = np.sqrt(2.0 / (size[0] + size[1])) 247 | layer.weight.data.normal_(0.0, std) 248 | layer.bias.data.normal_(0.0, 0.001) 249 | 250 | size = self.emb_layer.weight.size() 251 | std = np.sqrt(2.0 / (size[0] + size[1])) 252 | self.emb_layer.weight.data.normal_(0.0, std) 253 | self.emb_layer.bias.data.normal_(0.0, 0.001) 254 | 255 | def forward(self, x, timesteps, mess_dropout=True): 256 | freqs = torch.exp(-math.log(10000) * torch.arange(start=0, end=self.time_emb_dim//2, dtype=torch.float32) / (self.time_emb_dim//2)).to(device) 257 | temp = timesteps[:, None].float() * freqs[None] 258 | time_emb = torch.cat([torch.cos(temp), torch.sin(temp)], dim=-1) 259 | if self.time_emb_dim % 2: 260 | time_emb = torch.cat([time_emb, torch.zeros_like(time_emb[:, :1])], dim=-1) 261 | emb = self.emb_layer(time_emb) 262 | if self.norm: 263 | x = F.normalize(x) 264 | if mess_dropout: 265 | x = self.drop(x) 266 | h = torch.cat([x, emb], dim=-1) 267 | for i, layer in enumerate(self.in_layers): 268 | h = layer(h) 269 | h = torch.tanh(h) 270 | for i, layer in enumerate(self.out_layers): 271 | h = layer(h) 272 | if i != len(self.out_layers) - 1: 273 | h = torch.tanh(h) 274 | 275 | return h 276 | 277 | class GaussianDiffusion(nn.Module): 278 | def __init__(self, noise_scale, noise_min, noise_max, steps, beta_fixed=True): 279 | super(GaussianDiffusion, self).__init__() 280 | 281 | self.noise_scale = noise_scale 282 | self.noise_min = noise_min 283 | self.noise_max = noise_max 284 | self.steps = steps 285 | 286 | if noise_scale != 0: 287 | self.betas = torch.tensor(self.get_betas(), dtype=torch.float64).to(device) 288 | if beta_fixed: 289 | self.betas[0] = 0.0001 290 | 291 | self.calculate_for_diffusion() 292 | 293 | def get_betas(self): 294 | start = self.noise_scale * self.noise_min 295 | end = self.noise_scale * self.noise_max 296 | variance = np.linspace(start, end, self.steps, dtype=np.float64) 297 | alpha_bar = 1 - variance 298 | betas = [] 299 | betas.append(1 - alpha_bar[0]) 300 | for i in range(1, self.steps): 301 | betas.append(min(1 - alpha_bar[i] / alpha_bar[i-1], 0.999)) 302 | return np.array(betas) 303 | 304 | def calculate_for_diffusion(self): 305 | alphas = 1.0 - self.betas 306 | self.alphas_cumprod = torch.cumprod(alphas, axis=0).to(device) 307 | self.alphas_cumprod_prev = torch.cat([torch.tensor([1.0]).to(device), self.alphas_cumprod[:-1]]).to(device) 308 | self.alphas_cumprod_next = torch.cat([self.alphas_cumprod[1:], torch.tensor([0.0]).to(device)]).to(device) 309 | 310 | self.sqrt_alphas_cumprod = torch.sqrt(self.alphas_cumprod) 311 | self.sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - self.alphas_cumprod) 312 | self.log_one_minus_alphas_cumprod = torch.log(1.0 - self.alphas_cumprod) 313 | self.sqrt_recip_alphas_cumprod = torch.sqrt(1.0 / self.alphas_cumprod) 314 | self.sqrt_recipm1_alphas_cumprod = torch.sqrt(1.0 / self.alphas_cumprod - 1) 315 | 316 | self.posterior_variance = ( 317 | self.betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod) 318 | ) 319 | self.posterior_log_variance_clipped = torch.log(torch.cat([self.posterior_variance[1].unsqueeze(0), self.posterior_variance[1:]])) 320 | self.posterior_mean_coef1 = (self.betas * torch.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)) 321 | self.posterior_mean_coef2 = ((1.0 - self.alphas_cumprod_prev) * torch.sqrt(alphas) / (1.0 - self.alphas_cumprod)) 322 | 323 | def p_sample(self, model, x_start, steps): 324 | if steps == 0: 325 | x_t = x_start 326 | else: 327 | t = torch.tensor([steps-1] * x_start.shape[0]).to(device) 328 | x_t = self.q_sample(x_start, t) 329 | 330 | indices = list(range(self.steps))[::-1] 331 | 332 | for i in indices: 333 | t = torch.tensor([i] * x_t.shape[0]).to(device) 334 | model_mean, model_log_variance = self.p_mean_variance(model, x_t, t) 335 | x_t = model_mean 336 | return x_t 337 | 338 | def q_sample(self, x_start, t, noise=None): 339 | if noise is None: 340 | noise = torch.randn_like(x_start) 341 | return self._extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + self._extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise 342 | 343 | def _extract_into_tensor(self, arr, timesteps, broadcast_shape): 344 | arr = arr.to(device) 345 | res = arr[timesteps].float() 346 | while len(res.shape) < len(broadcast_shape): 347 | res = res[..., None] 348 | return res.expand(broadcast_shape) 349 | 350 | def p_mean_variance(self, model, x, t): 351 | model_output = model(x, t, False) 352 | 353 | model_variance = self.posterior_variance 354 | model_log_variance = self.posterior_log_variance_clipped 355 | 356 | model_variance = self._extract_into_tensor(model_variance, t, x.shape) 357 | model_log_variance = self._extract_into_tensor(model_log_variance, t, x.shape) 358 | 359 | model_mean = (self._extract_into_tensor(self.posterior_mean_coef1, t, x.shape) * model_output + self._extract_into_tensor(self.posterior_mean_coef2, t, x.shape) * x) 360 | 361 | return model_mean, model_log_variance 362 | 363 | def training_losses(self, model, targetEmbeds, x_start): 364 | batch_size = x_start.size(0) 365 | ts = torch.randint(0, self.steps, (batch_size,)).long().to(device) 366 | noise = torch.randn_like(x_start) 367 | if self.noise_scale != 0: 368 | x_t = self.q_sample(targetEmbeds, ts, noise) 369 | else: 370 | x_t = x_start 371 | 372 | model_output = model(x_t, ts) 373 | mse = self.mean_flat((targetEmbeds - model_output) ** 2) 374 | # mse = cal_infonce_loss(targetEmbeds,model_output,args.temp) 375 | 376 | weight = self.SNR(ts - 1) - self.SNR(ts) 377 | weight = torch.where((ts == 0), 1.0, weight) 378 | 379 | diff_loss = weight * mse 380 | # cal_loss = cal_infonce_loss(model_output,targetEmbeds,args.temp) 381 | # return diff_loss, cal_loss,model_output 382 | return diff_loss,model_output 383 | 384 | def training_losses2(self, model, targetEmbeds, x_start, batch): 385 | batch_size = x_start.size(0) 386 | ts = torch.randint(0, self.steps, (batch_size,)).long().to(device) 387 | noise = torch.randn_like(x_start) 388 | if self.noise_scale != 0: 389 | x_t = self.q_sample(x_start, ts, noise) 390 | else: 391 | x_t = x_start 392 | 393 | model_output = model(x_t, ts) 394 | mse = self.mean_flat((targetEmbeds - model_output) ** 2) 395 | weight = self.SNR(ts - 1) - self.SNR(ts) 396 | weight = torch.where((ts == 0), 1.0, weight) 397 | diff_loss = weight * mse 398 | diff_loss = diff_loss[batch] 399 | 400 | # mse = cal_infonce_loss(targetEmbeds[batch],model_output[batch],args.temp) 401 | # weight = self.SNR(ts - 1) - self.SNR(ts) 402 | # weight = torch.where((ts == 0), 1.0, weight) 403 | # diff_loss = weight[batch]*mse 404 | 405 | 406 | # cal_loss = cal_infonce_loss(model_output,targetEmbeds,args.temp) 407 | # return diff_loss, cal_loss,model_output 408 | return diff_loss,model_output 409 | 410 | def mean_flat(self, tensor): 411 | return tensor.mean(dim=list(range(1, len(tensor.shape)))) 412 | 413 | def SNR(self, t): 414 | self.alphas_cumprod = self.alphas_cumprod.to(device) 415 | return self.alphas_cumprod[t] / (1 - self.alphas_cumprod[t]) 416 | 417 | 418 | class MAE(nn.Module): 419 | def __init__(self, in_dims, out_dims, norm=False, dropout=0.5): 420 | super(MAE, self).__init__() 421 | 422 | self.in_dims = in_dims 423 | self.out_dims = out_dims 424 | assert out_dims[0] == in_dims[-1], "In and out dimensions must equal to each other." 425 | self.norm = norm 426 | 427 | in_dims_temp = self.in_dims 428 | out_dims_temp = self.out_dims 429 | 430 | self.in_layers = nn.ModuleList([nn.Linear(d_in, d_out) \ 431 | for d_in, d_out in zip(in_dims_temp[:-1], in_dims_temp[1:])]) 432 | self.out_layers = nn.ModuleList([nn.Linear(d_in, d_out) \ 433 | for d_in, d_out in zip(out_dims_temp[:-1], out_dims_temp[1:])]) 434 | 435 | self.drop = nn.Dropout(dropout) 436 | self.init_weights() 437 | 438 | def init_weights(self): 439 | for layer in self.in_layers: 440 | # Xavier Initialization for weights 441 | size = layer.weight.size() 442 | fan_out = size[0] 443 | fan_in = size[1] 444 | std = np.sqrt(2.0 / (fan_in + fan_out)) 445 | layer.weight.data.normal_(0.0, std) 446 | 447 | # Normal Initialization for weights 448 | layer.bias.data.normal_(0.0, 0.001) 449 | 450 | for layer in self.out_layers: 451 | # Xavier Initialization for weights 452 | size = layer.weight.size() 453 | fan_out = size[0] 454 | fan_in = size[1] 455 | std = np.sqrt(2.0 / (fan_in + fan_out)) 456 | layer.weight.data.normal_(0.0, std) 457 | 458 | # Normal Initialization for weights 459 | layer.bias.data.normal_(0.0, 0.001) 460 | # 461 | # size = self.emb_layer.weight.size() 462 | # fan_out = size[0] 463 | # fan_in = size[1] 464 | # std = np.sqrt(2.0 / (fan_in + fan_out)) 465 | # self.emb_layer.weight.data.normal_(0.0, std) 466 | # self.emb_layer.bias.data.normal_(0.0, 0.001) 467 | 468 | def forward(self, targetEmbeds,x, ancs,is_training=True): 469 | 470 | x_start = x 471 | if self.norm: 472 | x = F.normalize(x) 473 | keepsample = (torch.rand(x.shape) < 0.8) * 1.0 474 | keepsample = keepsample.to(device) 475 | h = x * keepsample 476 | 477 | for i, layer in enumerate(self.in_layers): 478 | h = layer(h) 479 | h = torch.tanh(h) 480 | 481 | for i, layer in enumerate(self.out_layers): 482 | h = layer(h) 483 | if i != len(self.out_layers) - 1: 484 | h = torch.tanh(h) 485 | diff_loss = self.training_losses(targetEmbeds, h,ancs) 486 | if is_training: 487 | return diff_loss,h 488 | return h 489 | 490 | def training_losses(self, targetEmbeds, model_output,batch): 491 | 492 | mse = self.mean_flat((targetEmbeds - model_output) ** 2) 493 | diff_loss = mse[batch] 494 | return diff_loss 495 | def mean_flat(self,tensor): 496 | 497 | return tensor.mean(dim=list(range(1, len(tensor.shape)))) 498 | 499 | -------------------------------------------------------------------------------- /DiffGraph_NC/Utils/TimeLogger.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | logmsg = '' 4 | timemark = dict() 5 | saveDefault = False 6 | def log(msg, save=None, oneline=False): 7 | global logmsg 8 | global saveDefault 9 | time = datetime.datetime.now() 10 | tem = '%s: %s' % (time, msg) 11 | if save != None: 12 | if save: 13 | logmsg += tem + '\n' 14 | elif saveDefault: 15 | logmsg += tem + '\n' 16 | if oneline: 17 | print(tem, end='\r') 18 | else: 19 | print(tem) 20 | 21 | if __name__ == '__main__': 22 | log('') -------------------------------------------------------------------------------- /DiffGraph_NC/Utils/Utils.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import torch.nn.functional as F 3 | 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.metrics import f1_score, normalized_mutual_info_score, adjusted_rand_score 6 | from sklearn.cluster import KMeans 7 | from sklearn.svm import LinearSVC 8 | import numpy as np 9 | def innerProduct(usrEmbeds, itmEmbeds): 10 | return t.sum(usrEmbeds * itmEmbeds, dim=-1) 11 | 12 | def pairPredict(ancEmbeds, posEmbeds, negEmbeds): 13 | return innerProduct(ancEmbeds, posEmbeds) - innerProduct(ancEmbeds, negEmbeds) 14 | 15 | def calcRegLoss(model): 16 | ret = 0 17 | for W in model.parameters(): 18 | ret += W.norm(2).square() 19 | return ret 20 | def reg_pick_embeds(embeds_list): 21 | reg_loss = 0 22 | for embeds in embeds_list: 23 | reg_loss += embeds.square().sum() 24 | return reg_loss 25 | 26 | def contrast(nodes, allEmbeds, allEmbeds2=None): 27 | if allEmbeds2 is not None: 28 | pckEmbeds = allEmbeds[nodes] 29 | scores = t.log(t.exp(pckEmbeds @ allEmbeds2.T).sum(-1)).mean() 30 | else: 31 | uniqNodes = t.unique(nodes) 32 | pckEmbeds = allEmbeds[uniqNodes] 33 | scores = t.log(t.exp(pckEmbeds @ allEmbeds.T).sum(-1)).mean() 34 | return scores 35 | 36 | def cal_infonce_loss(embeds1, embeds2, temp): 37 | """ InfoNCE Loss (specify nodes for contrastive learning) 38 | """ 39 | embeds1 = F.normalize(embeds1 + 1e-8, p=2) 40 | embeds2 = F.normalize(embeds2 + 1e-8, p=2) 41 | pckEmbeds1 = embeds1 42 | pckEmbeds2 = embeds2 43 | nume = t.exp(t.sum(pckEmbeds1 * pckEmbeds2, dim=-1) / temp) 44 | deno = t.exp(pckEmbeds1 @ embeds2.T / temp).sum(-1) + 1e-8 45 | return -t.log(nume / deno) 46 | 47 | def cal_infonce_loss_spec_nodes(embeds1, embeds2, nodes, temp): 48 | """ InfoNCE Loss (specify nodes for contrastive learning) 49 | """ 50 | embeds1 = F.normalize(embeds1 + 1e-8, p=2) 51 | embeds2 = F.normalize(embeds2 + 1e-8, p=2) 52 | pckEmbeds1 = embeds1[nodes] 53 | pckEmbeds2 = embeds2[nodes] 54 | nume = t.exp(t.sum(pckEmbeds1 * pckEmbeds2, dim=-1) / temp) 55 | deno = t.exp(pckEmbeds1 @ embeds2.T / temp).sum(-1) + 1e-8 56 | return -t.log(nume / deno).mean() 57 | 58 | 59 | def evaluate_results_nc(embeddings, labels, num_classes): 60 | print('SVM test') 61 | svm_macro_f1_list, svm_micro_f1_list = svm_test(embeddings, labels) 62 | print('Macro-F1: ' + ', '.join(['{:.6f}~{:.6f} ({:.1f})'.format(macro_f1_mean, macro_f1_std, train_size) for 63 | (macro_f1_mean, macro_f1_std), train_size in 64 | zip(svm_macro_f1_list, [0.8, 0.6, 0.4, 0.2])])) 65 | print('Micro-F1: ' + ', '.join(['{:.6f}~{:.6f} ({:.1f})'.format(micro_f1_mean, micro_f1_std, train_size) for 66 | (micro_f1_mean, micro_f1_std), train_size in 67 | zip(svm_micro_f1_list, [0.8, 0.6, 0.4, 0.2])])) 68 | print('K-means test') 69 | nmi_mean, nmi_std, ari_mean, ari_std = kmeans_test(embeddings, labels, num_classes) 70 | print('NMI: {:.6f}~{:.6f}'.format(nmi_mean, nmi_std)) 71 | print('ARI: {:.6f}~{:.6f}'.format(ari_mean, ari_std)) 72 | 73 | return svm_macro_f1_list, svm_micro_f1_list, nmi_mean, nmi_std, ari_mean, ari_std 74 | 75 | def svm_test(X, y, test_sizes=(0.2, 0.4, 0.6, 0.8), repeat=10): 76 | random_states = [182318 + i for i in range(repeat)] 77 | result_macro_f1_list = [] 78 | result_micro_f1_list = [] 79 | for test_size in test_sizes: 80 | macro_f1_list = [] 81 | micro_f1_list = [] 82 | for i in range(repeat): 83 | X_train, X_test, y_train, y_test = train_test_split( 84 | X, y, test_size=test_size, shuffle=True, random_state=random_states[i]) 85 | svm = LinearSVC(dual=False) 86 | svm.fit(X_train, y_train) 87 | y_pred = svm.predict(X_test) 88 | macro_f1 = f1_score(y_test, y_pred, average='macro') 89 | micro_f1 = f1_score(y_test, y_pred, average='micro') 90 | macro_f1_list.append(macro_f1) 91 | micro_f1_list.append(micro_f1) 92 | result_macro_f1_list.append((np.mean(macro_f1_list), np.std(macro_f1_list))) 93 | result_micro_f1_list.append((np.mean(micro_f1_list), np.std(micro_f1_list))) 94 | return result_macro_f1_list, result_micro_f1_list 95 | def kmeans_test(X, y, n_clusters, repeat=10): 96 | nmi_list = [] 97 | ari_list = [] 98 | for _ in range(repeat): 99 | kmeans = KMeans(n_clusters=n_clusters) 100 | y_pred = kmeans.fit_predict(X) 101 | nmi_score = normalized_mutual_info_score(y, y_pred, average_method='arithmetic') 102 | ari_score = adjusted_rand_score(y, y_pred) 103 | nmi_list.append(nmi_score) 104 | ari_list.append(ari_score) 105 | return np.mean(nmi_list), np.std(nmi_list), np.mean(ari_list), np.std(ari_list) 106 | 107 | 108 | def evaluate(embeds, scores,ratio, idx_train, idx_val, idx_test, label, nb_classes, 109 | isTest=True): 110 | hid_units = embeds.shape[1] 111 | val_logits = scores[idx_val] 112 | test_logits = scores[idx_test] 113 | 114 | val_lbls = t.argmax(label[idx_val], dim=-1) 115 | test_lbls = t.argmax(label[idx_test], dim=-1) 116 | 117 | 118 | preds = t.argmax(val_logits, dim=1) 119 | 120 | val_acc = t.sum(preds == val_lbls).float() / val_lbls.shape[0] 121 | val_f1_macro = f1_score(val_lbls.cpu(), preds.cpu(), average='macro') 122 | val_f1_micro = f1_score(val_lbls.cpu(), preds.cpu(), average='micro') 123 | 124 | # val_accs.append(val_acc.item()) 125 | # val_macro_f1s.append(val_f1_macro) 126 | # val_micro_f1s.append(val_f1_micro) 127 | 128 | # test 129 | 130 | preds = t.argmax(test_logits, dim=1) 131 | 132 | test_acc = t.sum(preds == test_lbls).float() / test_lbls.shape[0] 133 | test_f1_macro = f1_score(test_lbls.cpu(), preds.cpu(), average='macro') 134 | test_f1_micro = f1_score(test_lbls.cpu(), preds.cpu(), average='micro') 135 | 136 | return val_acc,val_f1_macro,val_f1_micro,test_acc,test_f1_macro,test_f1_micro,test_logits 137 | 138 | 139 | -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/a_feat.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/a_feat.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/apa.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/apa.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/apcpa.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/apcpa.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/aptpa.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/aptpa.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/labels.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/nei_p.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/nei_p.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/p_feat.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/p_feat.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/pos.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/pos.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/t_feat.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/t_feat.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/test_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/test_20.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/test_40.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/test_40.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/test_60.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/test_60.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/train_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/train_20.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/train_40.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/train_40.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/train_60.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/train_60.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/val_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/val_20.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/val_40.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/val_40.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/DBLP/val_60.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/DBLP/val_60.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/labels.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/nei_a.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/nei_a.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/nei_r.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/nei_r.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/pap.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/pap.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/pos.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/pos.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/prp.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/prp.npz -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/test_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/test_20.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/test_40.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/test_40.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/test_60.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/test_60.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/train_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/train_20.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/train_40.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/train_40.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/train_60.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/train_60.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/val_20.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/val_20.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/val_40.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/val_40.npy -------------------------------------------------------------------------------- /DiffGraph_NC/data/aminer/val_60.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/DiffGraph_NC/data/aminer/val_60.npy -------------------------------------------------------------------------------- /DiffGraph_NC/main.py: -------------------------------------------------------------------------------- 1 | import torch as t 2 | import Utils.TimeLogger as logger 3 | from Utils.TimeLogger import log 4 | from params import args 5 | from Model import HGDM 6 | from sklearn.metrics import roc_auc_score 7 | from torch.nn.functional import softmax 8 | from DataHandler import DataHandler,index_generator 9 | import numpy as np 10 | import pickle 11 | from Utils.Utils import * 12 | from Utils.Utils import contrast 13 | import os 14 | import logging 15 | import datetime 16 | import sys 17 | 18 | 19 | device = t.device('cuda:0' if t.cuda.is_available() else 'cpu') 20 | 21 | class Coach: 22 | def __init__(self, handler): 23 | self.handler = handler 24 | 25 | self.metrics = dict() 26 | mets = ['bceLoss','AUC'] 27 | for met in mets: 28 | self.metrics['Train' + met] = list() 29 | self.metrics['Test' + met] = list() 30 | 31 | def makePrint(self, name, ep, reses, save): 32 | ret = 'Epoch %d/%d, %s: ' % (ep, args.epoch, name) 33 | for metric in reses: 34 | val = reses[metric] 35 | ret += '%s = %.4f, ' % (metric, val) 36 | tem = name + metric 37 | if save and tem in self.metrics: 38 | self.metrics[tem].append(val) 39 | ret = ret[:-2] + ' ' 40 | return ret 41 | 42 | def run(self): 43 | for ratio in range(len(self.handler.train_idx)): 44 | log('Ratio Type: '+str(ratio)) 45 | accs = [] 46 | micro_f1s = [] 47 | macro_f1s = [] 48 | macro_f1s_val = [] 49 | auc_score_list = [] 50 | for repeat in range(10): 51 | self.prepareModel() 52 | log('Repeat: '+str(repeat)) 53 | 54 | macroMax = 0 55 | 56 | 57 | 58 | log_format = '%(asctime)s %(message)s' 59 | logging.basicConfig(stream=sys.stdout, level=logging.INFO, 60 | format=log_format, datefmt='%m/%d %I:%M:%S %p') 61 | log_save = './History/' 62 | log_file = f'{args.data}_' + \ 63 | f'lr_{args.lr}_batch_{args.batch}_noise_scale_{args.noise_scale}_step_{args.steps}_ratio_{ratio}_public' 64 | fname = f'{log_file}.txt' 65 | fh = logging.FileHandler(os.path.join(log_save, fname)) 66 | fh.setFormatter(logging.Formatter(log_format)) 67 | logger = logging.getLogger() 68 | logger.addHandler(fh) 69 | # logger.info(args) 70 | # logger.info('================') 71 | args.save_path = log_file 72 | 73 | val_accs = [] 74 | val_micro_f1s = [] 75 | val_macro_f1s = [] 76 | test_accs = [] 77 | test_micro_f1s = [] 78 | test_macro_f1s = [] 79 | logits_list = [] 80 | test_lbls = t.argmax(self.label[self.test_idx[ratio]], dim=-1) 81 | for ep in range(args.epoch): 82 | tstFlag = (ep % 1 == 0) 83 | reses = self.trainEpoch(ratio) 84 | # log(self.makePrint('Train', ep, reses, tstFlag)) 85 | if tstFlag: 86 | val_reses,test_reses = self.testEpoch(ratio) 87 | val_accs.append(val_reses['acc'].item()) 88 | val_macro_f1s.append(val_reses['macro']) 89 | val_micro_f1s.append(val_reses['micro']) 90 | 91 | test_accs.append(test_reses['acc'].item()) 92 | test_macro_f1s.append(test_reses['macro']) 93 | test_micro_f1s.append(test_reses['micro']) 94 | logits_list.append(test_reses['logits']) 95 | # print("\t[Val_Classification] Macro-F1_epoch: {:.4f} Micro-F1_epoch: {:.4f} Test-acc_epoch: {:.4f}" 96 | # .format(val_reses['macro'], 97 | # val_reses['micro'], 98 | # val_reses['acc'] 99 | # ) 100 | # ) 101 | 102 | 103 | # print("\t[Test_Classification] Macro-F1_epoch: {:.4f} Micro-F1_epoch: {:.4f} Test-acc_epoch: {:.4f}" 104 | # .format(test_reses['macro'], 105 | # test_reses['micro'], 106 | # test_reses['acc'] 107 | # ) 108 | # ) 109 | 110 | 111 | 112 | # log(self.makePrint('Test', ep, reses, tstFlag)) 113 | # if (val_reses['macro'] > macroMax): 114 | # macroMax = test_reses['macro'] 115 | # self.saveModel() 116 | # logger.info(self.makePrint('Test', ep, test_reses, tstFlag)) 117 | # self.saveHistory() 118 | 119 | 120 | # self.saveHistory() 121 | 122 | max_iter = test_accs.index(max(test_accs)) 123 | accs.append(test_accs[max_iter]) 124 | max_iter = test_macro_f1s.index(max(test_macro_f1s)) 125 | macro_f1s.append(test_macro_f1s[max_iter]) 126 | macro_f1s_val.append(val_macro_f1s[max_iter]) 127 | 128 | max_iter = test_micro_f1s.index(max(test_micro_f1s)) 129 | micro_f1s.append(test_micro_f1s[max_iter]) 130 | 131 | best_logits = logits_list[max_iter] 132 | best_proba = softmax(best_logits, dim=1) 133 | auc_score_list.append(roc_auc_score(y_true=test_lbls.detach().cpu().numpy(), 134 | y_score=best_proba.detach().cpu().numpy(), 135 | multi_class='ovr' 136 | )) 137 | 138 | # print("\t[Test_Classification] Macro-F1_one_time: {:.4f} Micro-F1_one_time: {:.4f} Test-AUC_one_time: {:.4f}" 139 | # .format(macro_f1s[-1], 140 | # micro_f1s[-1], 141 | # auc_score_list[-1] 142 | # ) 143 | # ) 144 | 145 | 146 | logger.info("\t[Classification] Macro-F1: {:.4f} var: {:.4f} Micro-F1_mean: {:.4f} var: {:.4f} auc {:.4f}" 147 | .format(np.mean(macro_f1s), 148 | np.std(macro_f1s), 149 | np.mean(micro_f1s), 150 | np.std(micro_f1s), 151 | np.mean(auc_score_list), 152 | np.std(auc_score_list))) 153 | 154 | def prepareModel(self): 155 | self.initial_feature = self.handler.feature_list 156 | self.dim = self.initial_feature.shape[1] 157 | self.train_idx = self.handler.train_idx 158 | self.test_idx = self.handler.test_idx 159 | self.val_idx = self.handler.val_idx 160 | self.label = self.handler.labels 161 | self.nbclasses = self.label.shape[1] 162 | 163 | self.model = HGDM(self.dim).to(device) 164 | self.opt = t.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=0) 165 | 166 | def trainEpoch(self,i): 167 | 168 | trnLoader = index_generator(batch_size=args.batch, indices=self.train_idx[i]) 169 | 170 | epBCELoss, epDFLoss = 0, 0 171 | self.label = self.handler.labels 172 | steps = trnLoader.num_iterations() 173 | 174 | for i in range(trnLoader.num_iterations()): 175 | train_idx_batch = trnLoader.next() 176 | train_idx_batch.sort() 177 | ancs=t.LongTensor(train_idx_batch) 178 | 179 | nll_loss,diffloss = self.model.cal_loss(ancs, self.label,self.handler.he_adjs,self.initial_feature) 180 | 181 | loss = nll_loss + diffloss 182 | epBCELoss += nll_loss.item() 183 | 184 | epDFLoss += diffloss.item() 185 | 186 | self.opt.zero_grad() 187 | loss.backward() 188 | self.opt.step() 189 | # log('Step %d/%d: bceloss = %.3f, diffloss = %.3f ' % (i, steps, nll_loss,diffloss), save=False, 190 | # oneline=True) 191 | ret = dict() 192 | ret['bceLoss'] = epBCELoss / steps 193 | ret['diffLoss'] = epDFLoss / steps 194 | 195 | 196 | return ret 197 | 198 | 199 | def testEpoch(self,i): 200 | labels = self.handler.labels 201 | test_idx = self.handler.test_idx[i] 202 | with t.no_grad(): 203 | 204 | embeds,scores = self.model.get_allembeds(self.handler.he_adjs,self.initial_feature) 205 | val_acc,val_f1_macro,val_f1_micro,test_acc,test_f1_macro,test_f1_micro,test_logits=evaluate(embeds,scores, args.ratio[i], self.train_idx[i], self.val_idx[i], self.test_idx[i], labels, self.nbclasses) 206 | val_ret = dict() 207 | val_ret['acc'] = val_acc 208 | val_ret['macro'] = val_f1_macro 209 | val_ret['micro'] = val_f1_micro 210 | 211 | test_ret = dict() 212 | test_ret['acc'] = test_acc 213 | test_ret['macro'] = test_f1_macro 214 | test_ret['micro'] = test_f1_micro 215 | test_ret['logits'] = test_logits 216 | return val_ret,test_ret 217 | 218 | 219 | 220 | 221 | def saveHistory(self): 222 | if args.epoch == 0: 223 | return 224 | with open('./History/' + args.save_path + '.his', 'wb') as fs: 225 | pickle.dump(self.metrics, fs) 226 | 227 | 228 | def saveModel(self): 229 | content = { 230 | 'model': self.model, 231 | } 232 | t.save(content, './Models/' + args.save_path + '.mod') 233 | log('Model Saved: %s' % args.save_path) 234 | 235 | def loadModel(self): 236 | 237 | ckp = t.load('./Models/' + args.load_model ) 238 | self.model = ckp['model'] 239 | self.opt = t.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=0) 240 | log('Model Loaded') 241 | 242 | 243 | 244 | if __name__ == '__main__': 245 | # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 246 | logger.saveDefault = True 247 | log('Start') 248 | handler = DataHandler() 249 | handler.LoadData() 250 | log('Load Data') 251 | 252 | coach = Coach(handler) 253 | coach.run() 254 | # coach.test() -------------------------------------------------------------------------------- /DiffGraph_NC/params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def ParseArgs(): 4 | parser = argparse.ArgumentParser(description='Model Params') 5 | parser.add_argument('--lr', default=3e-3, type=float, help='learning rate')#tune source:1e-3 6 | parser.add_argument('--difflr', default=1e-3, type=float, help='learning rate') 7 | parser.add_argument('--batch', default=8, type=int, help='batch size') 8 | parser.add_argument('--patience', type=int, default=20) 9 | parser.add_argument('--ratio', type=int, default=[20, 40, 60]) 10 | #retain params 11 | parser.add_argument('--threshold', default=0.5, type=float, help='threshold to filter users') 12 | parser.add_argument('--data', default='aminer', type=str, help='name of dataset') 13 | parser.add_argument('--save_path', default='tem', help='file name to save model and training record') 14 | parser.add_argument('--task_name', default='retain7', type=str, help='specific task') 15 | parser.add_argument('--eval_hdfs', default='', type=str, help='eval hdfs path to save the posterior result') 16 | parser.add_argument('--input_hdfs1', default='', type=str, help='dataset_input') 17 | parser.add_argument('--input_hdfs2', default='', type=str, help='dataset_input') 18 | parser.add_argument('--output_model1', default='', type=str, help='output_model1') 19 | parser.add_argument('--tb_log_dir', default='', type=str, help='tb_log_dir') 20 | 21 | 22 | #ssl setting 23 | # parser.add_argument('--ssl_reg', default=1, type=float, help='contrastive regularizer') 24 | # parser.add_argument('--temp', default=1, type=float, help='temperature for ssl') 25 | # parser.add_argument('--eps', default=0.2, type=float, help='scaled weight as reward') 26 | 27 | #gat setting 28 | parser.add_argument('--head', default=4, type=int, help='number of heads in attention') 29 | 30 | 31 | #gcn_setting 32 | parser.add_argument('--epoch', default=100, type=int, help='number of epochs') 33 | parser.add_argument('--decay', default=0.96, type=float, help='weight decay rate') 34 | parser.add_argument('--decay_step', type=int, default=1) 35 | parser.add_argument('--init', default=False, type=bool, help='whether initial embedding') 36 | parser.add_argument('--latdim', default=64, type=int, help='embedding size') 37 | parser.add_argument('--gcn_layer', default=3, type=int, help='number of gcn layers')# 38 | parser.add_argument('--uugcn_layer', default=3, type=int, help='number of gcn layers') 39 | parser.add_argument('--load_model', default=None, help='model name to load') 40 | parser.add_argument('--topk', default=20, type=int, help='K of top K') 41 | parser.add_argument('--dropRate', default=0.5, type=float, help='rate for dropout layer') 42 | parser.add_argument('--gpu', default='2', type=str, help='indicates which gpu to use') 43 | 44 | 45 | #diffusion setting 46 | parser.add_argument('--dims', type=str, default='[64]') 47 | parser.add_argument('--d_emb_size', type=int, default=8) 48 | parser.add_argument('--norm', type=bool, default=True) 49 | parser.add_argument('--steps', type=int, default=200)#tune 5 20 50 100 150 50 | parser.add_argument('--noise_scale', type=float, default=1e-5) #tune 51 | parser.add_argument('--noise_min', type=float, default=0.0001) 52 | parser.add_argument('--noise_max', type=float, default=0.001) 53 | parser.add_argument('--sampling_steps', type=int, default=0) 54 | 55 | 56 | 57 | return parser.parse_args() 58 | args = ParseArgs() 59 | 60 | 61 | #tmall :lr 1e-3 batch:4096 2048 layer:2 reg:3e-2 steps:200 noise-scale:1e-4 62 | 63 | #retail_rocket :lr 1e-3 batch:4096 2048 layer:2 reg:3e-2 steps:150 noise-scale:1e-4 -------------------------------------------------------------------------------- /DiffGraph_NC/script.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0,1,2 python main.py -------------------------------------------------------------------------------- /HDL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HKUDS/DiffGraph/e00c946bcf70f74badd9419034d1c0b2532936f7/HDL.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WSDM'2025: DiffGraph: Heterogeneous Graph Diffusion Model 2 | ![poster](https://github.com/Zongwei9888/Experiment_Images/blob/4ea67a9d8c5f02e78c1b5a77855c16710c6bb819/HGDM_images/image.jpeg) 3 | >Framework 4 | ![model](./HDL.jpg) 5 | ## Abstract 6 | Recent advances in Graph Neural Networks (GNNs) have signifi- 7 | cantly improved modeling of graph-structured data. However, tradi- 8 | tional GNNs face challenges in dealing with complex heterogeneous 9 | structures commonly found in real-world applications. While re- 10 | cent studies have addressed dependencies among heterogeneous 11 | interactions, two major challenges persist: 1) Noisy data within 12 | heterogeneous structures can impair the learning of embeddings 13 | and negatively affect graph learning tasks; 2) Existing methods fail 14 | to capture complex semantic transitions among heterogeneous re- 15 | lations, impacting downstream predictions. To address these issues, 16 | we introduce a novel framework, Heterogeneous Graph Diffusion 17 | Model (DiffGraph), which incorporates a cross-view denoising strat- 18 | egy. This strategy effectively transforms auxiliary heterogeneous 19 | data into the target semantic space to distill task-relevant infor- 20 | mation. Our approach features a latent heterogeneous graph dif- 21 | fusion mechanism, which manages noise through an innovative 22 | forward and backward diffusion process. This method simulta- 23 | neously achieves heterogeneous graph denoising and cross-type 24 | transition, and also eases the challenges of graph generation by 25 | leveraging its latent-space diffusion process. We validated our pro- 26 | posed framework through comprehensive experiments on both 27 | public and industrial datasets. The evaluation results demonstrate 28 | that DiffGraph outperforms existing methods in both link predic- 29 | tion and node classification tasks, showcasing its robustness and 30 | efficiency in processing heterogeneous graphs. 31 | ## Environment 32 | - python=3.8 33 | - torch=1.12.1 34 | - numpy=1.23.1 35 | - scipy=1.9.1 36 | - dgl=1.0.2+cu113 37 | ## Code Structures 38 | #### The folder DiffGraph-Rec presents the code and datasets for link prediction(Recommendation), while DiffGraph_NC contains the code and datasets for the node classification task. 39 | . 40 | ├──DiffGraph-Rec 41 | ├── DataHandler.py 42 | ├── main.py 43 | ├── param.py 44 | ├── Utils 45 | │ ├── TimeLogger.py 46 | │ ├── Utils.py 47 | ├── Model.py 48 | ├──DiffGraph_NC 49 | ├──DataHandler.py 50 | ├── main.py 51 | ├── param.py 52 | ├── Utils 53 | │ ├── TimeLogger.py 54 | │ ├── Utils.py 55 | ├── Model.py 56 | └── README 57 | ## Datasets 58 | We evaluate HGDM on both the link prediction 59 | and node classification tasks. For link prediction, we utilize three 60 | publicly available datasets collected from real-world commercial 61 | platforms: Tmall, Retailrocket, and IJCAI. For the node classifi- 62 | cation task, we use two public datasets, DBLP and AMiner, which 63 | focus on publications and academic social ties, as well as an Indus- 64 | try dataset for user classification collected from a popular game 65 | platform. Statistics of these datasets are in Table 1. Below is the 66 | detailed descriptions for the experimental datasets. 67 | 68 | | Dataset | User \# | Item \# | Link \# | Interaction Types | 69 | |---------------|---------|---------|------------|---------------------------------| 70 | | Tmall | 31882 | 31232 | 1,451,29 | View, Favorite, Cart, Purchase | 71 | | Retail Rocket | 2174 | 30113 | 97,381 | View, Cart, Transaction | 72 | | IJCAI | 17435 | 35920 | 799,368 | View, Favorite, Cart, Purchase | 73 | | Industry | 1M | 361 | 23,890,445 | Purchase, Friend, Complete Task | 74 | 75 | | | Node | Metapath | | Node | Metapath | 76 | |---|---|---|---|---|---| 77 | | DBLP | Author:4057 | APA | AMiner | paper:6564 | PAP | 78 | | | Paper:14328 | APCPA | | author:13329 | PRP | 79 | | | Conference:20 | APTPA | | Reference:35890 | POS | 80 | | | Term:7723 | | | | 81 | 82 | 83 | 84 | --------------------------------------------------------------------------------