├── README.md ├── aug.py ├── dataset ├── BlogCatalog.mat ├── Flickr.mat ├── citeseer.mat ├── cora.mat ├── make_folder └── pubmed.mat ├── diff_citeseer.npy.7z ├── model.py ├── run.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | Code for 《Reconstruction Enhanced Multi-View Contrastive Learning for Anomaly Detection on Attributed Networks》 2 | Thank you for reading, you can contact me through zhang6jq@gmail.com. 3 | -------------------------------------------------------------------------------- /aug.py: -------------------------------------------------------------------------------- 1 | from numpy.core.fromnumeric import shape 2 | import torch 3 | import copy 4 | import random 5 | import scipy.sparse as sp 6 | import numpy as np 7 | 8 | def gdc(A: sp.csr_matrix, alpha: float, eps: float): 9 | N = A.shape[0] 10 | A_loop = sp.eye(N) + A 11 | D_loop_vec = A_loop.sum(0).A1 12 | D_loop_vec_invsqrt = 1 / np.sqrt(D_loop_vec) 13 | D_loop_invsqrt = sp.diags(D_loop_vec_invsqrt) 14 | T_sym = D_loop_invsqrt @ A_loop @ D_loop_invsqrt 15 | S = alpha * sp.linalg.inv(sp.eye(N) - (1 - alpha) * T_sym) 16 | S_tilde = S.multiply(S >= eps) 17 | D_tilde_vec = S_tilde.sum(0).A1 18 | T_S = S_tilde / D_tilde_vec 19 | return T_S 20 | 21 | -------------------------------------------------------------------------------- /dataset/BlogCatalog.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zjer12/Sub/44570f655483e166a1044914581576a9553db002/dataset/BlogCatalog.mat -------------------------------------------------------------------------------- /dataset/Flickr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zjer12/Sub/44570f655483e166a1044914581576a9553db002/dataset/Flickr.mat -------------------------------------------------------------------------------- /dataset/citeseer.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zjer12/Sub/44570f655483e166a1044914581576a9553db002/dataset/citeseer.mat -------------------------------------------------------------------------------- /dataset/cora.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zjer12/Sub/44570f655483e166a1044914581576a9553db002/dataset/cora.mat -------------------------------------------------------------------------------- /dataset/make_folder: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dataset/pubmed.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zjer12/Sub/44570f655483e166a1044914581576a9553db002/dataset/pubmed.mat -------------------------------------------------------------------------------- /diff_citeseer.npy.7z: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zjer12/Sub/44570f655483e166a1044914581576a9553db002/diff_citeseer.npy.7z -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from dgl.batched_graph import batch 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class GCN(nn.Module): 8 | def __init__(self, in_ft, out_ft, act, dropout, bias=True): 9 | super(GCN, self).__init__() 10 | self.fc = nn.Linear(in_ft, out_ft, bias=False) 11 | self.act = nn.PReLU() if act == 'prelu' else act 12 | 13 | if bias: 14 | self.bias = nn.Parameter(torch.FloatTensor(out_ft)) 15 | self.bias.data.fill_(0.0) 16 | else: 17 | self.register_parameter('bias', None) 18 | 19 | for m in self.modules(): 20 | self.weights_init(m) 21 | 22 | def weights_init(self, m): 23 | if isinstance(m, nn.Linear): 24 | torch.nn.init.xavier_uniform_(m.weight.data) 25 | if m.bias is not None: 26 | m.bias.data.fill_(0.0) 27 | 28 | def forward(self, seq, adj, du, sparse=False): 29 | seq_fts = self.fc(seq) 30 | if sparse: 31 | out = torch.unsqueeze(torch.spmm(adj, torch.squeeze(seq_fts, 0)), 0) 32 | else: 33 | out = torch.bmm(adj, seq_fts) 34 | if self.bias is not None: 35 | out += self.bias 36 | 37 | return self.act(out) 38 | 39 | 40 | class AvgReadout(nn.Module): 41 | def __init__(self): 42 | super(AvgReadout, self).__init__() 43 | 44 | def forward(self, seq): 45 | return torch.mean(seq, 1) 46 | 47 | 48 | class MaxReadout(nn.Module): 49 | def __init__(self): 50 | super(MaxReadout, self).__init__() 51 | 52 | def forward(self, seq): 53 | return torch.max(seq, 1).values 54 | 55 | 56 | class MinReadout(nn.Module): 57 | def __init__(self): 58 | super(MinReadout, self).__init__() 59 | 60 | def forward(self, seq): 61 | return torch.min(seq, 1).values 62 | 63 | 64 | class WSReadout(nn.Module): 65 | def __init__(self): 66 | super(WSReadout, self).__init__() 67 | 68 | def forward(self, seq, query): 69 | query = query.permute(0, 2, 1) 70 | sim = torch.matmul(seq, query) 71 | sim = F.softmax(sim, dim=1) 72 | sim = sim.repeat(1, 1, 64) 73 | out = torch.mul(seq, sim) 74 | out = torch.sum(out, 1) 75 | return out 76 | 77 | 78 | class Discriminator(nn.Module): 79 | def __init__(self, n_h, negsamp_round): 80 | super(Discriminator, self).__init__() 81 | self.f_k = nn.Bilinear(n_h, n_h, 1) 82 | 83 | for m in self.modules(): 84 | self.weights_init(m) 85 | 86 | self.negsamp_round = negsamp_round 87 | 88 | def weights_init(self, m): 89 | if isinstance(m, nn.Bilinear): 90 | torch.nn.init.xavier_uniform_(m.weight.data) 91 | if m.bias is not None: 92 | m.bias.data.fill_(0.0) 93 | 94 | def forward(self, c, h_pl): 95 | scs = [] 96 | # positive 97 | scs.append(self.f_k(h_pl, c)) 98 | 99 | # negative 100 | c_mi = c 101 | for _ in range(self.negsamp_round): 102 | c_mi = torch.cat((c_mi[-2:-1, :], c_mi[:-1, :]), 0) 103 | scs.append(self.f_k(h_pl, c_mi)) 104 | 105 | logits = torch.cat(tuple(scs)) 106 | 107 | return logits 108 | 109 | 110 | 111 | 112 | class Model(nn.Module): 113 | def __init__(self, n_in, n_h, activation, negsamp_round, readout, dropout): 114 | super(Model, self).__init__() 115 | self.read_mode = readout 116 | self.gcn = GCN(n_in, n_h, activation, dropout) 117 | self.hidden_size=128 118 | # decode 119 | self.network = nn.Sequential( 120 | nn.Linear(n_h*3,self.hidden_size), 121 | nn.PReLU(), 122 | nn.Linear(self.hidden_size,self.hidden_size), 123 | nn.PReLU(), 124 | nn.Linear(self.hidden_size,n_in), 125 | nn.PReLU() 126 | ) 127 | if readout == 'max': 128 | self.read = MaxReadout() 129 | elif readout == 'min': 130 | self.read = MinReadout() 131 | elif readout == 'avg': 132 | self.read = AvgReadout() 133 | elif readout == 'weighted_sum': 134 | self.read = WSReadout() 135 | 136 | self.disc = Discriminator(n_h, negsamp_round) 137 | 138 | def forward(self, seq1, adj,seq2, sparse=False): 139 | 140 | h_1 = self.gcn(seq1, adj,sparse) 141 | h_row=self.gcn(seq2, adj,sparse) 142 | 143 | sub_size=h_row.shape[1] 144 | batch=h_row.shape[0] 145 | aa=h_row[:,:sub_size-2,:] 146 | input_nei=aa.reshape(batch,-1) 147 | now=self.network(input_nei) 148 | 149 | if self.read_mode != 'weighted_sum': 150 | c = self.read(h_1[:, : -1, :]) 151 | h_mv = h_1[:, -1, :] 152 | else: 153 | h_mv = h_1[:, -1, :] 154 | c = self.read(h_1[:, : -1, :], h_1[:, -2: -1, :]) 155 | 156 | ret = self.disc(c, h_mv) 157 | 158 | return now, ret 159 | 160 | 161 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.core.fromnumeric import shape 3 | import scipy.sparse as sp 4 | import torch 5 | import torch.nn as nn 6 | from aug import * 7 | from model import * 8 | from utils import * 9 | from aug import * 10 | from sklearn.metrics import roc_auc_score 11 | from sklearn.metrics import f1_score 12 | from sklearn.metrics import recall_score 13 | from sklearn.preprocessing import MinMaxScaler 14 | 15 | import random 16 | import os 17 | import dgl 18 | 19 | import argparse 20 | from tqdm import tqdm 21 | 22 | os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" 23 | 24 | # Set argument 25 | parser = argparse.ArgumentParser(description='Sub') 26 | parser.add_argument('--dataset', type=str,default='citeseer') # 'BlogCatalog' 'Flickr' 'cora' 'citeseer' 'pubmed' 27 | parser.add_argument('--lr', type=float) 28 | parser.add_argument('--weight_decay', type=float, default=0.0) 29 | parser.add_argument('--seed', type=int, default=2) 30 | parser.add_argument('--embedding_dim', type=int, default=64) 31 | parser.add_argument('--num_epoch', type=int) 32 | parser.add_argument('--drop_prob', type=float, default=0.0) 33 | parser.add_argument('--batch_size', type=int, default=300) 34 | parser.add_argument('--subgraph_size', type=int, default=4) 35 | parser.add_argument('--readout', type=str, default='avg') # max min avg weighted_sum 36 | parser.add_argument('--auc_test_rounds', type=int, default=300) 37 | parser.add_argument('--negsamp_ratio', type=int, default=1) 38 | parser.add_argument('--dropout', type=float, default=0.5) 39 | 40 | args = parser.parse_args() 41 | 42 | if args.lr is None: 43 | if args.dataset in ['cora', 'citeseer', 'pubmed', 'Flickr']: 44 | args.lr = 1e-3 45 | elif args.dataset == 'BlogCatalog': 46 | args.lr = 3e-3 47 | 48 | if args.num_epoch is None: 49 | if args.dataset in ['cora', 'citeseer', 'pubmed']: 50 | args.num_epoch = 100 51 | elif args.dataset in ['BlogCatalog', 'Flickr']: 52 | args.num_epoch = 400 53 | 54 | batch_size = args.batch_size 55 | subgraph_size = args.subgraph_size 56 | 57 | print('Dataset: ', args.dataset) 58 | 59 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 60 | # Set random seed 61 | dgl.random.seed(args.seed) 62 | np.random.seed(args.seed) 63 | torch.manual_seed(args.seed) 64 | torch.cuda.manual_seed(args.seed) 65 | torch.cuda.manual_seed_all(args.seed) 66 | random.seed(args.seed) 67 | os.environ['PYTHONHASHSEED'] = str(args.seed) 68 | os.environ['OMP_NUM_THREADS'] = '1' 69 | torch.backends.cudnn.deterministic = True 70 | torch.backends.cudnn.benchmark = False 71 | 72 | # Load and preprocess data 73 | adj, features, labels, idx_train, idx_val, \ 74 | idx_test, ano_label, str_ano_label, attr_ano_label = load_mat(args.dataset) 75 | # if the folder don't have diffusion data, the code for generate: 76 | #diff=gdc(adj,alpha=0.01,eps=0.0001) 77 | #np.save('diff_A',diff) A can be changed to 'BlogCatalog' 'cite' 'Flickr'... 78 | # if the folder has diffusion data: 79 | diff = np.load('./diff_citeseer.npy' ,allow_pickle=True) 80 | 81 | b_adj = sp.csr_matrix(diff) 82 | b_adj = (b_adj + sp.eye(b_adj.shape[0])).todense() 83 | dgl_graph = adj_to_dgl_graph(adj) 84 | raw_feature=features.todense() 85 | features, _ = preprocess_features(features) 86 | nb_nodes = features.shape[0] 87 | ft_size = features.shape[1] 88 | adj = normalize_adj(adj) 89 | adj = (adj + sp.eye(adj.shape[0])).todense() 90 | 91 | features = torch.FloatTensor(features[np.newaxis]) 92 | raw_feature = torch.FloatTensor(raw_feature[np.newaxis]) 93 | 94 | adj = torch.FloatTensor(adj[np.newaxis]) 95 | b_adj = torch.FloatTensor(b_adj[np.newaxis]) 96 | 97 | # Initialize model and optimiser 98 | model = Model(ft_size, args.embedding_dim, 'prelu', args.negsamp_ratio, args.readout, args.dropout) 99 | optimiser = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 100 | 101 | if torch.cuda.is_available(): 102 | print('Using CUDA') 103 | model.to(device) 104 | features = features.to(device) 105 | raw_feature = raw_feature.to(device) 106 | adj = adj.to(device) 107 | b_adj = b_adj.to(device) 108 | 109 | if torch.cuda.is_available(): 110 | b_xent = nn.BCEWithLogitsLoss(reduction='none', pos_weight=torch.tensor([args.negsamp_ratio]).to(device)) 111 | else: 112 | b_xent = nn.BCEWithLogitsLoss(reduction='none', pos_weight=torch.tensor([args.negsamp_ratio])) 113 | xent = nn.CrossEntropyLoss() 114 | cnt_wait = 0 115 | best = 1e9 116 | best_t = 0 117 | batch_num = nb_nodes // batch_size + 1 118 | 119 | added_adj_zero_row = torch.zeros((nb_nodes, 1, subgraph_size)) 120 | added_adj_zero_col = torch.zeros((nb_nodes, subgraph_size + 1, 1)) 121 | added_adj_zero_col[:, -1, :] = 1. 122 | added_feat_zero_row = torch.zeros((nb_nodes, 1, ft_size)) 123 | if torch.cuda.is_available(): 124 | added_adj_zero_row = added_adj_zero_row.to(device) 125 | added_adj_zero_col = added_adj_zero_col.to(device) 126 | added_feat_zero_row = added_feat_zero_row.to(device) 127 | mse_loss = nn.MSELoss(reduction='mean') 128 | # # Train model 129 | with tqdm(total=args.num_epoch) as pbar: 130 | pbar.set_description('Training') 131 | for epoch in range(args.num_epoch): 132 | 133 | loss_full_batch = torch.zeros((nb_nodes, 1)) 134 | if torch.cuda.is_available(): 135 | loss_full_batch = loss_full_batch.to(device) 136 | 137 | model.train() 138 | 139 | all_idx = list(range(nb_nodes)) 140 | 141 | random.shuffle(all_idx) 142 | total_loss = 0. 143 | subgraphs = generate_rwr_subgraph(dgl_graph, subgraph_size) 144 | p = 0 145 | for batch_idx in range(batch_num): 146 | 147 | optimiser.zero_grad() 148 | 149 | is_final_batch = (batch_idx == (batch_num)) 150 | 151 | if not is_final_batch: 152 | idx = all_idx[batch_idx * batch_size: (batch_idx + 1) * batch_size] 153 | else: 154 | idx = all_idx[batch_idx * batch_size:] 155 | 156 | cur_batch_size = len(idx) 157 | 158 | lbl = torch.unsqueeze( 159 | torch.cat((torch.ones(cur_batch_size), torch.zeros(cur_batch_size * args.negsamp_ratio))), 1) 160 | 161 | ba = [] 162 | bf = [] 163 | br = [] 164 | raw=[] 165 | 166 | added_adj_zero_row = torch.zeros((cur_batch_size, 1, subgraph_size)) 167 | added_adj_zero_col = torch.zeros((cur_batch_size, subgraph_size + 1, 1)) 168 | added_adj_zero_col[:, -1, :] = 1. 169 | added_feat_zero_row = torch.zeros((cur_batch_size, 1, ft_size)) 170 | 171 | if torch.cuda.is_available(): 172 | lbl = lbl.to(device) 173 | added_adj_zero_row = added_adj_zero_row.to(device) 174 | added_adj_zero_col = added_adj_zero_col.to(device) 175 | added_feat_zero_row = added_feat_zero_row.to(device) 176 | 177 | for i in idx: 178 | cur_adj = adj[:, subgraphs[i], :][:, :, subgraphs[i]] 179 | cur_adj_r = b_adj[:, subgraphs[i], :][:, :, subgraphs[i]] 180 | cur_feat = features[:, subgraphs[i], :] 181 | raw_f=raw_feature[:, subgraphs[i], :] 182 | ba.append(cur_adj) 183 | bf.append(cur_feat) 184 | raw.append(raw_f) 185 | br.append(cur_adj_r) 186 | 187 | 188 | ba = torch.cat(ba) 189 | br = torch.cat(br) 190 | ba = torch.cat((ba, added_adj_zero_row), dim=1) 191 | ba = torch.cat((ba, added_adj_zero_col), dim=2) 192 | 193 | br = torch.cat((br, added_adj_zero_row), dim=1) 194 | br = torch.cat((br, added_adj_zero_col), dim=2) 195 | 196 | bf = torch.cat(bf) 197 | bf = torch.cat((bf[:, :-1, :], added_feat_zero_row, bf[:, -1:, :]), dim=1) 198 | 199 | raw = torch.cat(raw) 200 | raw = torch.cat((raw[:, :-1, :], added_feat_zero_row, raw[:, -1:, :]), dim=1) 201 | 202 | now1, logits = model(bf, ba,raw) 203 | now2, logits2 = model(bf, br,raw) 204 | batch = now1.shape[0] 205 | loss_re=0.5 * (mse_loss(now1, raw[:, -1, :]) + mse_loss(now2, raw[:, -1, :])) 206 | loss_all2 = b_xent(logits2, lbl) 207 | loss_all1 = b_xent(logits, lbl) 208 | loss_bce = (loss_all1 + loss_all2) /2 209 | h_1 = F.normalize(logits[:batch, :], dim=1, p=2) 210 | h_2 = F.normalize(logits2[:batch, :], dim=1, p=2) 211 | coloss2 = 2 - 2 * (h_1 * h_2).sum(dim=-1).mean() 212 | loss = torch.mean(loss_bce) + coloss2+0.6*loss_re 213 | 214 | loss.backward() 215 | optimiser.step() 216 | 217 | loss = loss.detach().cpu().numpy() 218 | 219 | total_loss += loss 220 | p = p + 1 221 | 222 | mean_loss = total_loss 223 | 224 | if mean_loss < best: 225 | best = mean_loss 226 | best_t = epoch 227 | cnt_wait = 0 228 | torch.save(model.state_dict(), 'best_model.pkl') # multi_round_ano_score_p[round, idx] = ano_score_p 229 | 230 | else: 231 | cnt_wait += 1 232 | 233 | pbar.update(1) 234 | 235 | # # # # # Test model 236 | print('Loading {}th epoch'.format(best_t)) 237 | model.load_state_dict(torch.load('best_model.pkl')) 238 | 239 | multi_round_ano_score = np.zeros((args.auc_test_rounds, nb_nodes)) 240 | multi_round_ano_score_p = np.zeros((args.auc_test_rounds, nb_nodes)) 241 | multi_round_ano_score_n = np.zeros((args.auc_test_rounds, nb_nodes)) 242 | kk = 0 243 | 244 | with tqdm(total=args.auc_test_rounds) as pbar_test: 245 | pbar_test.set_description('Testing') 246 | for round in range(args.auc_test_rounds): 247 | 248 | all_idx = list(range(nb_nodes)) 249 | random.shuffle(all_idx) 250 | 251 | subgraphs = generate_rwr_subgraph(dgl_graph, subgraph_size) 252 | for batch_idx in range(batch_num): 253 | 254 | optimiser.zero_grad() 255 | 256 | is_final_batch = (batch_idx == (batch_num - 1)) 257 | 258 | if not is_final_batch: 259 | idx = all_idx[batch_idx * batch_size: (batch_idx + 1) * batch_size] 260 | else: 261 | idx = all_idx[batch_idx * batch_size:] 262 | 263 | cur_batch_size = len(idx) 264 | 265 | ba = [] 266 | bf = [] 267 | br = [] 268 | raw=[] 269 | added_adj_zero_row = torch.zeros((cur_batch_size, 1, subgraph_size)) 270 | added_adj_zero_col = torch.zeros((cur_batch_size, subgraph_size + 1, 1)) 271 | added_adj_zero_col[:, -1, :] = 1. 272 | added_feat_zero_row = torch.zeros((cur_batch_size, 1, ft_size)) 273 | 274 | if torch.cuda.is_available(): 275 | added_adj_zero_row = added_adj_zero_row.to(device) 276 | added_adj_zero_col = added_adj_zero_col.to(device) 277 | added_feat_zero_row = added_feat_zero_row.to(device) 278 | 279 | for i in idx: 280 | cur_adj = adj[:, subgraphs[i], :][:, :, subgraphs[i]] 281 | cur_adj2 = b_adj[:, subgraphs[i], :][:, :, subgraphs[i]] 282 | cur_feat = features[:, subgraphs[i], :] 283 | raw_f = raw_feature[:, subgraphs[i], :] 284 | ba.append(cur_adj) 285 | br.append(cur_adj2) 286 | bf.append(cur_feat) 287 | raw.append(raw_f) 288 | 289 | ba = torch.cat(ba) 290 | ba = torch.cat((ba, added_adj_zero_row), dim=1) 291 | ba = torch.cat((ba, added_adj_zero_col), dim=2) 292 | br = torch.cat(br) 293 | br = torch.cat((br, added_adj_zero_row), dim=1) 294 | br = torch.cat((br, added_adj_zero_col), dim=2) 295 | 296 | bf = torch.cat(bf) 297 | bf = torch.cat((bf[:, :-1, :], added_feat_zero_row, bf[:, -1:, :]), dim=1) 298 | raw = torch.cat(raw) 299 | raw = torch.cat((raw[:, :-1, :], added_feat_zero_row, raw[:, -1:, :]), dim=1) 300 | 301 | with torch.no_grad(): 302 | now1, logits = model(bf, ba,raw) 303 | now2, logits2 = model(bf, br,raw) 304 | 305 | logits = torch.squeeze(logits) 306 | logits = torch.sigmoid(logits) 307 | 308 | logits2 = torch.squeeze(logits2) 309 | logits2 = torch.sigmoid(logits2) 310 | pdist = nn.PairwiseDistance(p=2) 311 | 312 | 313 | scaler1 = MinMaxScaler() 314 | scaler2 = MinMaxScaler() 315 | ano_score1 = - (logits[:cur_batch_size] - logits[cur_batch_size:]).cpu().numpy() 316 | ano_score2 = - (logits2[:cur_batch_size] - logits2[cur_batch_size:]).cpu().numpy() 317 | score1=(pdist(now1, raw[:, -1, :])+pdist(now2, raw[:, -1, :]))/2 318 | 319 | score2=(ano_score1+ano_score2)/2 320 | score1=score1.cpu().numpy() 321 | ano_score_co = scaler1.fit_transform(score2.reshape(-1, 1)).reshape(-1) 322 | score_re = scaler2.fit_transform(score1.reshape(-1, 1)).reshape(-1) 323 | ano_scores = ano_score_co+0.6*score_re 324 | multi_round_ano_score[round, idx] = ano_scores 325 | 326 | pbar_test.update(1) 327 | 328 | ano_score_final = np.mean(multi_round_ano_score, axis=0) 329 | auc = roc_auc_score(ano_label, ano_score_final) 330 | print('AUC:{:.4f}'.format(auc)) 331 | 332 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | import scipy.sparse as sp 4 | import torch 5 | import scipy.io as sio 6 | import random 7 | import dgl 8 | 9 | 10 | def sparse_to_tuple(sparse_mx, insert_batch=False): 11 | """Convert sparse matrix to tuple representation.""" 12 | """Set insert_batch=True if you want to insert a batch dimension.""" 13 | def to_tuple(mx): 14 | if not sp.isspmatrix_coo(mx): 15 | mx = mx.tocoo() 16 | if insert_batch: 17 | coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose() 18 | values = mx.data 19 | shape = (1,) + mx.shape 20 | else: 21 | coords = np.vstack((mx.row, mx.col)).transpose() 22 | values = mx.data 23 | shape = mx.shape 24 | return coords, values, shape 25 | 26 | if isinstance(sparse_mx, list): 27 | for i in range(len(sparse_mx)): 28 | sparse_mx[i] = to_tuple(sparse_mx[i]) 29 | else: 30 | sparse_mx = to_tuple(sparse_mx) 31 | 32 | return sparse_mx 33 | 34 | def preprocess_features(features): 35 | """Row-normalize feature matrix and convert to tuple representation""" 36 | rowsum = np.array(features.sum(1)) 37 | r_inv = np.power(rowsum, -1).flatten() 38 | r_inv[np.isinf(r_inv)] = 0. 39 | r_mat_inv = sp.diags(r_inv) 40 | features = r_mat_inv.dot(features) 41 | return features.todense(), sparse_to_tuple(features) 42 | 43 | def normalize_adj(adj): 44 | """Symmetrically normalize adjacency matrix.""" 45 | adj = sp.coo_matrix(adj) 46 | rowsum = np.array(adj.sum(1)) 47 | d_inv_sqrt = np.power(rowsum, -0.5).flatten() 48 | d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. 49 | d_mat_inv_sqrt = sp.diags(d_inv_sqrt) 50 | return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() 51 | 52 | def dense_to_one_hot(labels_dense, num_classes): 53 | """Convert class labels from scalars to one-hot vectors.""" 54 | num_labels = labels_dense.shape[0] 55 | index_offset = np.arange(num_labels) * num_classes 56 | labels_one_hot = np.zeros((num_labels, num_classes)) 57 | labels_one_hot.flat[index_offset+labels_dense.ravel()] = 1 58 | return labels_one_hot 59 | 60 | 61 | # 62 | def load_mat(dataset, train_rate=0.3, val_rate=0.1): 63 | """Load .mat dataset.""" 64 | data = sio.loadmat("./dataset/{}.mat".format(dataset)) 65 | label = data['Label'] if ('Label' in data) else data['gnd'] 66 | attr = data['Attributes'] if ('Attributes' in data) else data['X'] 67 | network = data['Network'] if ('Network' in data) else data['A'] 68 | adj = sp.csr_matrix(network) 69 | feat = sp.lil_matrix(attr) 70 | labels = np.squeeze(np.array(data['Class'],dtype=np.int64) - 1) 71 | num_classes = np.max(labels) + 1 72 | labels = dense_to_one_hot(labels,num_classes) 73 | ano_labels = np.squeeze(np.array(label)) 74 | 75 | 76 | if 'str_anomaly_label' in data: 77 | str_ano_labels = np.squeeze(np.array(data['str_anomaly_label'])) 78 | attr_ano_labels = np.squeeze(np.array(data['attr_anomaly_label'])) 79 | else: 80 | str_ano_labels = None 81 | attr_ano_labels = None 82 | 83 | num_node = adj.shape[0] 84 | num_train = int(num_node * train_rate) 85 | num_val = int(num_node * val_rate) 86 | all_idx = list(range(num_node)) 87 | random.shuffle(all_idx) 88 | idx_train = all_idx[ : num_train] 89 | idx_val = all_idx[num_train : num_train + num_val] 90 | idx_test = all_idx[num_train + num_val : ] 91 | 92 | return adj, feat, labels, idx_train, idx_val, idx_test, ano_labels, str_ano_labels, attr_ano_labels 93 | 94 | def adj_to_dgl_graph(adj): 95 | """Convert adjacency matrix to dgl format.""" 96 | nx_graph = nx.from_scipy_sparse_matrix(adj) 97 | dgl_graph = dgl.DGLGraph(nx_graph) 98 | return dgl_graph 99 | 100 | def generate_rwr_subgraph(dgl_graph, subgraph_size): 101 | """Generate subgraph with RWR algorithm.""" 102 | all_idx = list(range(dgl_graph.number_of_nodes())) 103 | reduced_size = subgraph_size - 1 104 | traces = dgl.contrib.sampling.random_walk_with_restart(dgl_graph, all_idx, restart_prob=1, max_nodes_per_seed=subgraph_size*2) 105 | subv = [] 106 | for i,trace in enumerate(traces): 107 | subv.append(torch.unique(torch.cat(trace),sorted=False).tolist()) 108 | retry_time = 0 109 | while len(subv[i]) < reduced_size: 110 | cur_trace = dgl.contrib.sampling.random_walk_with_restart(dgl_graph, [i], restart_prob=0.9, max_nodes_per_seed=subgraph_size*4) 111 | subv[i] = torch.unique(torch.cat(cur_trace[0]),sorted=False).tolist() 112 | retry_time += 1 113 | if (len(subv[i]) <= reduced_size) and (retry_time >10): 114 | subv[i] = (subv[i] * reduced_size) 115 | # print(subv[i]) 116 | subv[i] = subv[i][:reduced_size] 117 | subv[i].append(i) 118 | return subv 119 | 120 | --------------------------------------------------------------------------------