├── README.md ├── add_dp_noise ├── main.py ├── model │ ├── ADtransformer.py │ ├── Client.py │ └── Server.py ├── par.py └── utils │ ├── Update.py │ ├── gaussian_noise.py │ └── load_data.py ├── data └── arrhythmia_normalization.csv ├── main.py ├── model ├── ADtransformer.py ├── Client.py └── Server.py ├── par.py └── util └── load_data.py /README.md: -------------------------------------------------------------------------------- 1 | # Anomaly Detection via Federated Transformer 2 | ## Dataset 3 | We conduct the expermients in four datasets, NSL-KDD, Spambase, Shuttle, Arrhythmia. The details of datasets as follows: 4 | ![](https://codimd.xixiaoyao.cn/uploads/upload_80e50175c49182f9935ce969de97356e.png) 5 | Consider the real world settings, we set the number of labeled anomalies is 30 in NSL-KDD, Spambase and Shuttle, we set the number of labeled anomalies is 15 in Arrhythmia. 6 | ## Framework 7 | We propose the framework as follows: 8 | ![](https://codimd.xixiaoyao.cn/uploads/upload_45e7974b68f16c42a1e92e8702a48260.png) 9 | ## How to run 10 | We want to run the experiment on NSL-KDD via the followinng commend lines: 11 | ```python= 12 | python main.py -d nslkdd_normalization.csv -c 3 -f 1 -d_data 122 -heads 2 -r 0.0005 -e 1000 -d_feature 64 13 | ``` 14 | -d means the dataset which you want to use 15 | -c means the number of clients 16 | -f means the portion of clients 17 | -d_data means the dimension of original data 18 | -d_feature means the dimension of feature uploaded by clients 19 | -heads means the number of mulit-head attention blocks 20 | -r means the percentage of labeled anomalies in the training datasets 21 | -e means the number of epoch in framework 22 | -------------------------------------------------------------------------------- /add_dp_noise/main.py: -------------------------------------------------------------------------------- 1 | from par import Parser 2 | from utils import load_data, gaussian_noise,Update 3 | from model import Server, Client 4 | import torch 5 | import json 6 | import random 7 | from sklearn.metrics import average_precision_score, roc_auc_score 8 | from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise 9 | 10 | 11 | # evaluation 12 | def aucPerformance(labels, mse): 13 | roc_auc = roc_auc_score(labels, mse) 14 | ap = average_precision_score(labels, mse) 15 | print("AUC-ROC: %.4f, AUC-PR: %.4f" % (roc_auc, ap)) 16 | pr_performance.append(ap) 17 | roc_performance.append(roc_auc) 18 | return roc_auc, ap 19 | 20 | 21 | if __name__ == '__main__': 22 | pr_performance = [] 23 | roc_performance = [] 24 | device = torch.device("cuda") 25 | arg = Parser().parse() 26 | loader = load_data.Loader(arg) 27 | client_data = loader.run() 28 | server = Server.Server(arg,device) 29 | client = [] 30 | for i in range(arg.client): 31 | client.append(Client.Client(arg, device)) 32 | 33 | for epoch in range(arg.epoch): 34 | gradients = [] 35 | gradients_server = [] 36 | loss_global = torch.zeros(1, requires_grad=True) 37 | loss_global = loss_global.to(device) 38 | for i in range(arg.client): 39 | embedding_features = client[i].train_client(client_data[i]) 40 | embedding_features = torch.stack(embedding_features) 41 | #sigma = compute_noise(1, 0.01, 4.0, 1 * 150, 1e-3, 42 | #1e-5) # (n, batch_size, target_epsilon, epochs, delta, noise_lbd) 43 | #embedding_features += gaussian_noise.gaussian_noise(embedding_features.shape, 32, sigma, device=device)/len(embedding_features) 44 | scores = server.train_server(embedding_features) 45 | ##in our setting, the number of local data is same 46 | gradient, gradient_server = Update.get_gradients(arg, client, server, scores) 47 | gradients.append(gradient) 48 | gradients_server.append(gradient_server) 49 | 50 | global_grads = Update.agg_grads(arg, gradients) 51 | server_grads = Update.agg_grads(arg, gradients_server) 52 | for i in range(arg.client): 53 | client[i].model_update(global_grads) 54 | server.model_update(server_grads) 55 | 56 | #updating by the same gradient 57 | #server.optimizer.zero_grad() 58 | #loss_global.backward() 59 | #server.optimizer.step() 60 | #print(epoch,"loss:",loss_global.item()) 61 | print(epoch) 62 | 63 | 64 | rand_client_num = random.randint(0, arg.client - 1) 65 | embedding_features = client[rand_client_num].train_client(loader.test[rand_client_num]) 66 | scores = client[rand_client_num].get_test_labels_scores(server.train_server(embedding_features)) 67 | aucPerformance(loader.test_label,scores.cpu().detach().numpy()) 68 | if epoch % 10 == 0: 69 | with open('pr.txt', 'w') as f: 70 | json.dump(pr_performance, f) 71 | with open('roc.txt', 'w') as f: 72 | json.dump(roc_performance, f) 73 | -------------------------------------------------------------------------------- /add_dp_noise/model/ADtransformer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import LayerNorm, MultiheadAttention,ModuleList,init 3 | from torch.nn import functional as F 4 | import copy 5 | 6 | def _get_activation_fn(activation): 7 | if activation == "relu": 8 | return F.relu 9 | elif activation == "gelu": 10 | return F.gelu 11 | 12 | def _get_clones(module, N): 13 | return ModuleList([copy.deepcopy(module) for i in range(N)]) 14 | 15 | class TransformerEncoderLayer(nn.Module): 16 | 17 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"): 18 | super(TransformerEncoderLayer, self).__init__() 19 | self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) 20 | # Implementation of Feedforward model 21 | self.linear1 = nn.Linear(d_model, dim_feedforward) 22 | self.dropout = nn.Dropout(dropout) 23 | self.linear2 = nn.Linear(dim_feedforward, d_model) 24 | 25 | self.norm1 = LayerNorm(d_model) 26 | self.norm2 = LayerNorm(d_model) 27 | self.dropout1 = nn.Dropout(dropout) 28 | self.dropout2 = nn.Dropout(dropout) 29 | 30 | self.activation = _get_activation_fn(activation) 31 | 32 | def forward(self, src): 33 | src2 = self.self_attn(src, src, src)[0] 34 | src = src + self.dropout1(src2) 35 | src = self.norm1(src) 36 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 37 | src = src + self.dropout2(src2) 38 | src = self.norm2(src) 39 | return src 40 | 41 | class TransformerEncoder(nn.Module): 42 | #__constants__ = ['norm'] 43 | 44 | def __init__(self, d_model, d_feature, n_heads, num_layers, norm=None): 45 | super(TransformerEncoder, self).__init__() 46 | self.encoder_layer = TransformerEncoderLayer(d_model, n_heads) 47 | self.layers = _get_clones(self.encoder_layer, num_layers) 48 | self.num_layers = num_layers 49 | self.norm = norm 50 | self.linear = nn.Linear(d_model, d_feature) 51 | self._reset_parameters() 52 | 53 | def _reset_parameters(self): 54 | for p in self.parameters(): 55 | if p.dim() > 1: 56 | init.xavier_uniform_(p) 57 | 58 | def forward(self, src): 59 | output = src 60 | for mod in self.layers: 61 | output = mod(output) 62 | if self.norm is not None: 63 | output = self.norm(output) 64 | output = self.linear(output) 65 | return output 66 | 67 | 68 | class MLP(nn.Module): 69 | def __init__(self, dim, out_dim): 70 | super(MLP,self).__init__() 71 | self.dim = dim 72 | self.out_dim = out_dim 73 | self.mlp = nn.Sequential( 74 | nn.Linear(self.dim,64), 75 | nn.Sigmoid(), 76 | nn.Linear(64,16), 77 | nn.Sigmoid(), 78 | nn.Linear(16,self.out_dim), 79 | nn.Sigmoid() 80 | ) 81 | 82 | def forward(self, x): 83 | x = self.mlp(x) 84 | return x -------------------------------------------------------------------------------- /add_dp_noise/model/Client.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.nn import LayerNorm, MultiheadAttention,ModuleList,init 4 | from torch.nn import functional as F 5 | import copy 6 | torch.autograd.set_detect_anomaly(True) 7 | 8 | def _get_activation_fn(activation): 9 | if activation == "relu": 10 | return F.relu 11 | elif activation == "gelu": 12 | return F.gelu 13 | 14 | def _get_clones(module, N): 15 | return ModuleList([copy.deepcopy(module) for i in range(N)]) 16 | 17 | 18 | class TransformerEncoderLayer(nn.Module): 19 | 20 | def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0.1, activation="relu"): 21 | super(TransformerEncoderLayer, self).__init__() 22 | self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) 23 | # Implementation of Feedforward model 24 | self.linear1 = nn.Linear(d_model, dim_feedforward) 25 | self.dropout = nn.Dropout(dropout) 26 | self.linear2 = nn.Linear(dim_feedforward, d_model) 27 | 28 | self.norm1 = LayerNorm(d_model) 29 | self.norm2 = LayerNorm(d_model) 30 | self.dropout1 = nn.Dropout(dropout) 31 | self.dropout2 = nn.Dropout(dropout) 32 | 33 | self.activation = _get_activation_fn(activation) 34 | 35 | def forward(self, src): 36 | src2 = self.self_attn(src, src, src)[0] 37 | src = src + self.dropout1(src2) 38 | src = self.norm1(src) 39 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 40 | src = src + self.dropout2(src2) 41 | src = self.norm2(src) 42 | return src 43 | 44 | class TransformerEncoder(nn.Module): 45 | #__constants__ = ['norm'] 46 | 47 | def __init__(self, d_model, d_feature, n_heads, num_layers, norm=None): 48 | super(TransformerEncoder, self).__init__() 49 | self.encoder_layer = TransformerEncoderLayer(d_model, n_heads) 50 | self.layers = _get_clones(self.encoder_layer, num_layers) 51 | self.num_layers = num_layers 52 | self.norm = norm 53 | self.linear = nn.Linear(d_model, d_feature) 54 | self._reset_parameters() 55 | 56 | def _reset_parameters(self): 57 | for p in self.parameters(): 58 | if p.dim() > 1: 59 | init.xavier_uniform_(p) 60 | 61 | def forward(self, src): 62 | output = src 63 | for mod in self.layers: 64 | output = mod(output) 65 | output = self.linear(output) 66 | return output 67 | 68 | class Client(): 69 | def __init__(self,arg,device): 70 | self.arg = arg 71 | self.device = device 72 | self.encoder = TransformerEncoder(self.arg.d_data,self.arg.d_feature,self.arg.heads,1) 73 | self.encoder.to(self.device) 74 | self.criterion = nn.BCELoss() 75 | self.labels = [] 76 | self.optimizer_tf = torch.optim.Adam(self.encoder.parameters(),lr=1e-4,weight_decay=0.1)#weight_decay=0.2 77 | 78 | def train_client(self, train): 79 | features = [] 80 | for idx,(x,y) in enumerate(train): 81 | batch_size = x.shape[0] 82 | x = x.to(self.device) 83 | x = x.to(torch.float32) 84 | #y = y.to(torch.float32) 85 | y = y.to(self.device) 86 | y = y.to(torch.float32) 87 | self.labels.append(y) 88 | x = x.reshape(1, batch_size, self.arg.d_data) 89 | feature = self.encoder(x) 90 | feature = feature.reshape(batch_size, self.arg.d_feature) 91 | features.append(feature) 92 | return features 93 | 94 | def calculate_loss(self, scores): 95 | for i in range(len(scores)): 96 | if i == 0: 97 | score = scores[i] 98 | label = self.labels[i] 99 | else: 100 | score = torch.cat((score,scores[i])) 101 | label = torch.cat((label,self.labels[i])) 102 | loss = self.criterion(score,label) 103 | return loss 104 | 105 | def get_test_labels_scores(self,scores): 106 | for i in range(len(scores)): 107 | if i == 0: 108 | score = scores[i] 109 | else: 110 | score = torch.cat((score, scores[i])) 111 | return score 112 | 113 | def get_gradients(self, scores): 114 | for i in range(len(scores)): 115 | if i == 0: 116 | score = scores[i] 117 | label = self.labels[i] 118 | else: 119 | score = torch.cat((score,scores[i])) 120 | label = torch.cat((label,self.labels[i])) 121 | loss = self.criterion(score, label) 122 | self.loss = copy.deepcopy(loss) 123 | loss.backward() 124 | grads = {'named_grads': {}} 125 | for name, param in self.encoder.named_parameters(): 126 | grads['named_grads'][name] = param.grad 127 | return grads, self.loss 128 | 129 | def model_update(self, grads): 130 | self.encoder.train() 131 | self.optimizer_tf.zero_grad() 132 | for k, v in self.encoder.named_parameters(): 133 | v.grad = grads[k] 134 | self.optimizer_tf.step() -------------------------------------------------------------------------------- /add_dp_noise/model/Server.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch.nn as nn 3 | import torch 4 | 5 | #anomaly scorer 6 | class MLP(nn.Module): 7 | def __init__(self, dim, out_dim): 8 | super(MLP,self).__init__() 9 | self.mlp = nn.Sequential( 10 | nn.Linear(dim,out_dim), 11 | nn.Sigmoid() 12 | ) 13 | 14 | def forward(self, x): 15 | x = self.mlp(x) 16 | return x 17 | 18 | class Server(): 19 | def __init__(self,arg,device): 20 | self.arg = arg 21 | self.device = device 22 | self.client_list = self.choose_client() 23 | self.model = MLP(self.arg.d_feature,1) 24 | self.model.to(self.device) 25 | self.optimizer = torch.optim.Adam(self.model.parameters(),lr=1e-4) 26 | self.pr_performance = [] 27 | self.roc_performance = [] 28 | 29 | def choose_client(self): 30 | return [random.randint(0,self.arg.client-1) for i in range(int(self.arg.client*self.arg.frac))] 31 | 32 | def train_server(self,features): 33 | scores = [] 34 | for i in range(len(features)): 35 | feature = features[i] 36 | batch_size = feature.shape[0] 37 | #feature = feature.to(self.device) 38 | feature = feature.reshape(batch_size, self.arg.d_feature) 39 | score = self.model(feature) 40 | score = score.view(batch_size) 41 | scores.append(score) 42 | return scores 43 | 44 | def model_update(self,grads): 45 | self.model.train() 46 | self.optimizer.zero_grad() 47 | for k, v in self.model.named_parameters(): 48 | v.grad = grads[k] 49 | self.optimizer.step() 50 | 51 | -------------------------------------------------------------------------------- /add_dp_noise/par.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | DATASET_PATH = r'/home/nx/msy/AD/DTransformer/data/' 3 | 4 | 5 | class Parser: 6 | 7 | def __init__(self): 8 | self.parser = argparse.ArgumentParser() 9 | self.set_arguments() 10 | 11 | def set_arguments(self): 12 | self.parser.add_argument('-path', type=str, default= DATASET_PATH) 13 | self.parser.add_argument('-d', '--dataset', type=str) 14 | self.parser.add_argument('-c', '--client', type=int) 15 | self.parser.add_argument('-f', '--frac', type=float, help='to set fraction of clients per round') 16 | self.parser.add_argument('-b', '--batch_size',default=32, type=int) 17 | self.parser.add_argument('-d_data', type=int) 18 | self.parser.add_argument('-d_feature', type=int, default=16) 19 | self.parser.add_argument('-heads', type=int) 20 | self.parser.add_argument('-r','--radio',type=float) 21 | self.parser.add_argument('-e', '--epoch',type=int) 22 | 23 | 24 | def parse(self): 25 | args, unparsed = self.parser.parse_known_args() 26 | if len(unparsed) != 0: 27 | raise SystemExit('Unknown argument: {}'.format(unparsed)) 28 | return args 29 | -------------------------------------------------------------------------------- /add_dp_noise/utils/Update.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def get_gradients(args, client, server, scores): 4 | for j in range(args.client): 5 | for i in range(len(scores)): 6 | if i == 0: 7 | score = scores[i] 8 | label = client[j].labels[i] 9 | else: 10 | score = torch.cat((score,scores[i])) 11 | label = torch.cat((label, client[j].labels[i])) 12 | loss = client[j].criterion(score, label) 13 | client[j].optimizer_tf.zero_grad() 14 | server.optimizer.zero_grad() 15 | loss.backward() 16 | grads = {'named_grads': {}} 17 | for name, param in client[j].encoder.named_parameters(): 18 | grads['named_grads'][name] = param.grad 19 | grad_server = {'named_grads': {}} 20 | for name, param in server.model.named_parameters(): 21 | grad_server['named_grads'][name] = param.grad 22 | return grads,grad_server 23 | 24 | def agg_grads(args, gradients): 25 | total_grads = {} 26 | for info in gradients: 27 | for k, v in info['named_grads'].items(): 28 | if k not in total_grads: 29 | total_grads[k] = v 30 | if v != None: 31 | total_grads[k] += v 32 | global_grads = {} 33 | for k, v in total_grads.items(): 34 | if v != None: 35 | global_grads[k] = torch.div(v, args.client) 36 | else: 37 | global_grads[k] = v 38 | return global_grads -------------------------------------------------------------------------------- /add_dp_noise/utils/gaussian_noise.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def gaussian_noise(data_shape, s, sigma, device=None): 4 | """ 5 | Gaussian noise for CDP-FedAVG-LS Algorithm 6 | """ 7 | return torch.normal(0, sigma * s, data_shape).to(device) -------------------------------------------------------------------------------- /add_dp_noise/utils/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | import random 4 | import torch 5 | from torch.utils.data import DataLoader,TensorDataset 6 | 7 | class Loader(): 8 | def __init__(self,arg): 9 | self.arg = arg 10 | self.client_dataset = [] 11 | self.test = [] 12 | self.test_label = np.array([]) 13 | 14 | def dataLoading(self): 15 | path = self.arg.path + self.arg.dataset 16 | # loading data 17 | x = [] 18 | labels = [] 19 | with (open(path, 'r')) as data_from: 20 | csv_reader = csv.reader(data_from) 21 | for i in csv_reader: 22 | x.append(i[0:self.arg.d_data]) 23 | labels.append(i[self.arg.d_data]) 24 | 25 | for i in range(self.arg.d_data): 26 | for j in range(self.arg.d_data): 27 | x[i][j] = np.float32(x[i][j]) 28 | for i in range(len(labels)): 29 | labels[i] = np.float32(labels[i]) 30 | x = np.array(x,dtype=float) 31 | labels = np.array(labels,dtype=float) 32 | return x, labels 33 | 34 | def split_anomaly(self, data, label): 35 | norm_tmp, anomaly_tmp = {'x': [], 'y': []}, {'x': [], 'y': []} 36 | for i in range(len(label)): 37 | if float(label[i]) == 0: # 0 means norm data 38 | norm_tmp['x'].append(data[i]) 39 | norm_tmp['y'].append(label[i]) 40 | else: 41 | anomaly_tmp['x'].append(data[i]) 42 | anomaly_tmp['y'].append(label[i]) 43 | return norm_tmp, anomaly_tmp 44 | 45 | def shuffle(self, a, b): 46 | randnum = random.randint(0, 100) 47 | random.seed(randnum) 48 | random.shuffle(a) 49 | random.seed(randnum) 50 | random.shuffle(b) 51 | return np.array(a), np.array(b) 52 | 53 | def split(self, norm_data, anomaly_data): 54 | train, test, select_anomaly, select_noise, select_train = {'x': [], 'y': []},{'x': [], 'y': []}, {'x': [], 'y': []}, {'x': [],'y': []}, {'x': [], 'y': []} 55 | norm_data['x'], norm_data['y'] = self.shuffle(norm_data['x'], norm_data['y']) 56 | anomaly_data['x'], anomaly_data['y'] = self.shuffle(anomaly_data['x'], anomaly_data['y']) 57 | 58 | length_norm = len(norm_data['y']) 59 | length_anomaly = len(anomaly_data['y']) 60 | batch_size_half = int(self.arg.batch_size / 2) 61 | batch_numbers = int(length_norm*0.8 / batch_size_half) 62 | anomaly_train_number = int(length_norm*0.8 * self.arg.radio)+1 63 | noise_num = int(0.02 * length_norm*0.8) 64 | 65 | select_anomaly['x'], select_anomaly['y'] = anomaly_data['x'][:anomaly_train_number], anomaly_data['y'][:anomaly_train_number] 66 | select_noise['x'], select_noise['y'] = anomaly_data['x'][anomaly_train_number:noise_num], norm_data['y'][anomaly_train_number:noise_num] 67 | # mixing 68 | select_train['x'] = np.concatenate((norm_data['x'][:int(0.8*length_norm)], select_noise['x'])) 69 | select_train['y'] = np.concatenate((norm_data['y'][:int(0.8*length_norm)], select_noise['y'])) 70 | select_train['x'], select_train['y'] = self.shuffle(select_train['x'], select_train['y']) 71 | print('noise_num:', noise_num) 72 | print('anomaly_num:',anomaly_train_number) 73 | num = 0 74 | for i in range(batch_numbers): 75 | for j in range(self.arg.batch_size): 76 | if j % 2 == 0: 77 | train['x'].append(select_train['x'][num]) 78 | train['y'].append(select_train['y'][num]) 79 | num += 1 80 | else: 81 | randnum = random.randint(0, anomaly_train_number-1) 82 | train['x'].append(select_anomaly['x'][randnum]) 83 | train['y'].append(select_anomaly['y'][randnum]) 84 | train['x'], train['y'] = np.array(train['x']), np.array(train['y']) 85 | train['x'], train['y'] = torch.from_numpy(train['x']), torch.from_numpy(train['y']) 86 | test['x'] = np.concatenate((norm_data['x'][int(0.8 * length_norm):], anomaly_data['x'][int(0.8 * length_anomaly):])) 87 | test['y'] = np.concatenate((norm_data['y'][int(0.8 * length_norm):], anomaly_data['y'][int(0.8 * length_anomaly):])) 88 | self.test_label = test['y'] 89 | test['x'], test['y'] = torch.from_numpy(test['x']), torch.from_numpy(test['y']) 90 | self.test.append(DataLoader(dataset=TensorDataset(test['x'],test['y']),batch_size=self.arg.batch_size,shuffle=False)) 91 | return train 92 | 93 | def spilt_into_client(self, norm_data, anomaly_data): 94 | length_norm_client = int(len(norm_data['y'])/self.arg.client) 95 | length_anomaly_client = int(len(anomaly_data['y'])/self.arg.client) 96 | for i in range(self.arg.client): 97 | norm = {'x': norm_data['x'][i*length_norm_client:(i+1)*length_norm_client],'y':norm_data['y'][i*length_norm_client:(i+1)*length_norm_client]} 98 | anomaly = {'x':anomaly_data['x'][i*length_anomaly_client:(i+1)*length_anomaly_client],'y':anomaly_data['y'][i*length_anomaly_client:(i+1)*length_anomaly_client]} 99 | self.client_dataset.append(self.split(norm, anomaly)) 100 | 101 | def run(self): 102 | x, y = self.dataLoading() 103 | norm, anomaly = self.split_anomaly(x, y) 104 | self.spilt_into_client(norm,anomaly) 105 | for i in range(self.arg.client): 106 | self.client_dataset[i] = DataLoader(dataset=TensorDataset(self.client_dataset[i]['x'],self.client_dataset[i]['y']),batch_size=self.arg.batch_size,shuffle=False) 107 | return self.client_dataset 108 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from par import Parser 2 | from util import load_data 3 | from model import Server,Client 4 | import torch 5 | import random 6 | from sklearn.metrics import average_precision_score, roc_auc_score 7 | import json 8 | 9 | # evaluation 10 | def aucPerformance(labels, mse): 11 | roc_auc = roc_auc_score(labels, mse) 12 | ap = average_precision_score(labels, mse) 13 | print("AUC-ROC: %.4f, AUC-PR: %.4f" % (roc_auc, ap)) 14 | pr_performance.append(ap) 15 | roc_performance.append(roc_auc) 16 | return roc_auc, ap 17 | 18 | 19 | if __name__ == '__main__': 20 | pr_performance = [] 21 | roc_performance = [] 22 | device = torch.device("cuda") 23 | arg = Parser().parse() 24 | loader = load_data.Loader(arg) 25 | client_data = loader.run() 26 | server = Server.Server(arg,device) 27 | client = [] 28 | for i in range(arg.client): 29 | client.append(Client.Client(arg,device)) 30 | 31 | for epoch in range(arg.epoch): 32 | #train 33 | loss_global = torch.zeros(1,requires_grad=True) 34 | loss_global = loss_global.to(device) 35 | #aggregation 36 | for i in range(arg.client): 37 | embedding_features = client[i].train_client(client_data[i]) 38 | scores = server.train_server(embedding_features) 39 | loss_global = torch.mul(client[i].calculate_loss(scores),1/arg.client) + loss_global # in our setting, the number of local dataset is same. 40 | 41 | # updating by the same gradient 42 | server.optimizer.zero_grad() 43 | for i in range(arg.client): 44 | client[i].optimizer_tf.zero_grad() 45 | loss_global.backward() 46 | for i in range(arg.client): 47 | client[i].optimizer_tf.step() 48 | server.optimizer.step() 49 | print(epoch,"loss:",loss_global.item()) 50 | 51 | #test 52 | rand_client_num = random.randint(0, arg.client - 1) 53 | embedding_features = client[rand_client_num].train_client(loader.test[rand_client_num]) 54 | scores = client[rand_client_num].get_test_labels_scores(server.train_server(embedding_features)) 55 | aucPerformance(loader.test_label,scores.cpu().detach().numpy()) 56 | 57 | if epoch % 10 == 0: 58 | with open('pr.txt','w') as f: 59 | json.dump(pr_performance,f) 60 | with open('roc.txt','w') as f: 61 | json.dump(roc_performance,f) 62 | 63 | -------------------------------------------------------------------------------- /model/ADtransformer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.nn import LayerNorm, MultiheadAttention,ModuleList,init 3 | from torch.nn import functional as F 4 | import copy 5 | 6 | def _get_activation_fn(activation): 7 | if activation == "relu": 8 | return F.relu 9 | elif activation == "gelu": 10 | return F.gelu 11 | 12 | def _get_clones(module, N): 13 | return ModuleList([copy.deepcopy(module) for i in range(N)]) 14 | 15 | class TransformerEncoderLayer(nn.Module): 16 | 17 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"): 18 | super(TransformerEncoderLayer, self).__init__() 19 | self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) 20 | # Implementation of Feedforward model 21 | self.linear1 = nn.Linear(d_model, dim_feedforward) 22 | self.dropout = nn.Dropout(dropout) 23 | self.linear2 = nn.Linear(dim_feedforward, d_model) 24 | 25 | self.norm1 = LayerNorm(d_model) 26 | self.norm2 = LayerNorm(d_model) 27 | self.dropout1 = nn.Dropout(dropout) 28 | self.dropout2 = nn.Dropout(dropout) 29 | 30 | self.activation = _get_activation_fn(activation) 31 | 32 | def forward(self, src): 33 | src2 = self.self_attn(src, src, src)[0] 34 | src = src + self.dropout1(src2) 35 | src = self.norm1(src) 36 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 37 | src = src + self.dropout2(src2) 38 | src = self.norm2(src) 39 | return src 40 | 41 | class TransformerEncoder(nn.Module): 42 | #__constants__ = ['norm'] 43 | 44 | def __init__(self, d_model, d_feature, n_heads, num_layers, norm=None): 45 | super(TransformerEncoder, self).__init__() 46 | self.encoder_layer = TransformerEncoderLayer(d_model, n_heads) 47 | self.layers = _get_clones(self.encoder_layer, num_layers) 48 | self.num_layers = num_layers 49 | self.norm = norm 50 | self.linear = nn.Linear(d_model, d_feature) 51 | self._reset_parameters() 52 | 53 | def _reset_parameters(self): 54 | for p in self.parameters(): 55 | if p.dim() > 1: 56 | init.xavier_uniform_(p) 57 | 58 | def forward(self, src): 59 | output = src 60 | for mod in self.layers: 61 | output = mod(output) 62 | if self.norm is not None: 63 | output = self.norm(output) 64 | output = self.linear(output) 65 | return output 66 | 67 | 68 | class MLP(nn.Module): 69 | def __init__(self, dim, out_dim): 70 | super(MLP,self).__init__() 71 | self.dim = dim 72 | self.out_dim = out_dim 73 | self.mlp = nn.Sequential( 74 | nn.Linear(self.dim,64), 75 | nn.Sigmoid(), 76 | nn.Linear(64,16), 77 | nn.Sigmoid(), 78 | nn.Linear(16,self.out_dim), 79 | nn.Sigmoid() 80 | ) 81 | 82 | def forward(self, x): 83 | x = self.mlp(x) 84 | return x -------------------------------------------------------------------------------- /model/Client.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.nn import LayerNorm, MultiheadAttention,ModuleList,init 4 | from torch.nn import functional as F 5 | import copy 6 | torch.autograd.set_detect_anomaly(True) 7 | 8 | def _get_activation_fn(activation): 9 | if activation == "relu": 10 | return F.relu 11 | elif activation == "gelu": 12 | return F.gelu 13 | 14 | def _get_clones(module, N): 15 | return ModuleList([copy.deepcopy(module) for i in range(N)]) 16 | 17 | 18 | class TransformerEncoderLayer(nn.Module): 19 | 20 | def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0.1, activation="relu"): 21 | super(TransformerEncoderLayer, self).__init__() 22 | self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) 23 | # Implementation of Feedforward model 24 | self.linear1 = nn.Linear(d_model, dim_feedforward) 25 | self.dropout = nn.Dropout(dropout) 26 | self.linear2 = nn.Linear(dim_feedforward, d_model) 27 | 28 | self.norm1 = LayerNorm(d_model) 29 | self.norm2 = LayerNorm(d_model) 30 | self.dropout1 = nn.Dropout(dropout) 31 | self.dropout2 = nn.Dropout(dropout) 32 | 33 | self.activation = _get_activation_fn(activation) 34 | 35 | def forward(self, src): 36 | src2 = self.self_attn(src, src, src)[0] 37 | src = src + self.dropout1(src2) 38 | src = self.norm1(src) 39 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 40 | src = src + self.dropout2(src2) 41 | src = self.norm2(src) 42 | return src 43 | 44 | class TransformerEncoder(nn.Module): 45 | #__constants__ = ['norm'] 46 | 47 | def __init__(self, d_model, d_feature, n_heads, num_layers, norm=None): 48 | super(TransformerEncoder, self).__init__() 49 | self.encoder_layer = TransformerEncoderLayer(d_model, n_heads) 50 | self.layers = _get_clones(self.encoder_layer, num_layers) 51 | self.num_layers = num_layers 52 | self.norm = norm 53 | self.linear = nn.Linear(d_model, d_feature) 54 | self._reset_parameters() 55 | 56 | def _reset_parameters(self): 57 | for p in self.parameters(): 58 | if p.dim() > 1: 59 | init.xavier_uniform_(p) 60 | 61 | def forward(self, src): 62 | output = src 63 | for mod in self.layers: 64 | output = mod(output) 65 | output = self.linear(output) 66 | return output 67 | 68 | class Client(): 69 | def __init__(self,arg,device): 70 | self.arg = arg 71 | self.device = device 72 | self.encoder = TransformerEncoder(self.arg.d_data,self.arg.d_feature,self.arg.heads,1) 73 | self.encoder.to(self.device) 74 | self.criterion = nn.BCELoss() 75 | self.labels = [] 76 | self.optimizer_tf = torch.optim.Adam(self.encoder.parameters(),lr=1e-4,weight_decay=0.1)#weight_decay=0.2 77 | 78 | def train_client(self, train): 79 | features = [] 80 | for idx,(x,y) in enumerate(train): 81 | batch_size = x.shape[0] 82 | x = x.to(self.device) 83 | x = x.to(torch.float32) 84 | #y = y.to(torch.float32) 85 | y = y.to(self.device) 86 | y = y.to(torch.float32) 87 | self.labels.append(y) 88 | x = x.reshape(1, batch_size, self.arg.d_data) 89 | feature = self.encoder(x) 90 | feature = feature.reshape(batch_size, self.arg.d_feature) 91 | features.append(feature) 92 | return features 93 | 94 | def calculate_loss(self, scores): 95 | for i in range(len(scores)): 96 | if i == 0: 97 | score = scores[i] 98 | label = self.labels[i] 99 | else: 100 | score = torch.cat((score,scores[i])) 101 | label = torch.cat((label,self.labels[i])) 102 | loss = self.criterion(score,label) 103 | return loss 104 | 105 | def get_test_labels_scores(self,scores): 106 | for i in range(len(scores)): 107 | if i == 0: 108 | score = scores[i] 109 | else: 110 | score = torch.cat((score, scores[i])) 111 | return score 112 | -------------------------------------------------------------------------------- /model/Server.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch.nn as nn 3 | import torch 4 | 5 | 6 | class MLP(nn.Module): 7 | def __init__(self, dim, out_dim): 8 | super(MLP,self).__init__() 9 | self.mlp = nn.Sequential( 10 | nn.Linear(dim,out_dim), 11 | nn.Sigmoid() 12 | ) 13 | 14 | def forward(self, x): 15 | x = self.mlp(x) 16 | return x 17 | 18 | class Server(): 19 | def __init__(self,arg,device): 20 | self.arg = arg 21 | self.device = device 22 | self.client_list = self.choose_client() 23 | self.model = MLP(self.arg.d_feature,1) 24 | self.model.to(self.device) 25 | self.optimizer = torch.optim.Adam(self.model.parameters(),lr=1e-4) 26 | self.pr_performance = [] 27 | self.roc_performance = [] 28 | 29 | def choose_client(self): 30 | return [random.randint(0,self.arg.client-1) for i in range(int(self.arg.client*self.arg.frac))] 31 | 32 | def train_server(self,features): 33 | scores = [] 34 | for i in range(len(features)): 35 | feature = features[i] 36 | batch_size = feature.shape[0] 37 | #feature = feature.to(self.device) 38 | feature = feature.reshape(batch_size, self.arg.d_feature) 39 | score = self.model(feature) 40 | score = score.view(batch_size) 41 | scores.append(score) 42 | return scores 43 | -------------------------------------------------------------------------------- /par.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | DATASET_PATH = r'F:\Code\Anomaly Detection\FedAnomaly\data\\' 3 | 4 | 5 | class Parser: 6 | 7 | def __init__(self): 8 | self.parser = argparse.ArgumentParser() 9 | self.set_arguments() 10 | 11 | def set_arguments(self): 12 | self.parser.add_argument('-path', type=str, default= DATASET_PATH) 13 | self.parser.add_argument('-d', '--dataset', type=str) 14 | self.parser.add_argument('-c', '--client', type=int) 15 | self.parser.add_argument('-f', '--frac', type=float, help='to set fraction of clients per round') 16 | self.parser.add_argument('-b', '--batch_size',default=32, type=int) 17 | self.parser.add_argument('-d_data', type=int) 18 | self.parser.add_argument('-d_feature', type=int, default=16) 19 | self.parser.add_argument('-heads', type=int) 20 | self.parser.add_argument('-r','--radio',type=float) 21 | self.parser.add_argument('-e', '--epoch',type=int) 22 | 23 | 24 | def parse(self): 25 | args, unparsed = self.parser.parse_known_args() 26 | if len(unparsed) != 0: 27 | raise SystemExit('Unknown argument: {}'.format(unparsed)) 28 | return args 29 | -------------------------------------------------------------------------------- /util/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | import random 4 | import torch 5 | from torch.utils.data import DataLoader,TensorDataset 6 | 7 | class Loader(): 8 | def __init__(self,arg): 9 | self.arg = arg 10 | self.client_dataset = [] 11 | self.test = [] 12 | self.test_label = np.array([]) 13 | 14 | def dataLoading(self): 15 | path = self.arg.path + self.arg.dataset 16 | # loading data 17 | x = [] 18 | labels = [] 19 | with (open(path, 'r')) as data_from: 20 | csv_reader = csv.reader(data_from) 21 | for i in csv_reader: 22 | x.append(i[0:self.arg.d_data]) 23 | labels.append(i[self.arg.d_data]) 24 | 25 | for i in range(self.arg.d_data): 26 | for j in range(self.arg.d_data): 27 | x[i][j] = np.float32(x[i][j]) 28 | for i in range(len(labels)): 29 | labels[i] = np.float32(labels[i]) 30 | x = np.array(x,dtype=float) 31 | labels = np.array(labels,dtype=float) 32 | return x, labels 33 | 34 | def split_anomaly(self, data, label): 35 | norm_tmp, anomaly_tmp = {'x': [], 'y': []}, {'x': [], 'y': []} 36 | for i in range(len(label)): 37 | if float(label[i]) == 0: # 0 means norm data 38 | norm_tmp['x'].append(data[i]) 39 | norm_tmp['y'].append(label[i]) 40 | else: 41 | anomaly_tmp['x'].append(data[i]) 42 | anomaly_tmp['y'].append(label[i]) 43 | return norm_tmp, anomaly_tmp 44 | 45 | def shuffle(self, a, b): 46 | randnum = random.randint(0, 100) 47 | random.seed(randnum) 48 | random.shuffle(a) 49 | random.seed(randnum) 50 | random.shuffle(b) 51 | return np.array(a), np.array(b) 52 | 53 | def split(self, norm_data, anomaly_data): 54 | train, test, select_anomaly, select_noise, select_train = {'x': [], 'y': []},{'x': [], 'y': []}, {'x': [], 'y': []}, {'x': [],'y': []}, {'x': [], 'y': []} 55 | norm_data['x'], norm_data['y'] = self.shuffle(norm_data['x'], norm_data['y']) 56 | anomaly_data['x'], anomaly_data['y'] = self.shuffle(anomaly_data['x'], anomaly_data['y']) 57 | 58 | length_norm = len(norm_data['y']) 59 | length_anomaly = len(anomaly_data['y']) 60 | batch_size_half = int(self.arg.batch_size / 2) 61 | batch_numbers = int(length_norm*0.8 / batch_size_half) 62 | anomaly_train_number = int(length_norm*0.8 * self.arg.radio)+1 63 | noise_num = int(0.02 * length_norm*0.8) 64 | 65 | select_anomaly['x'], select_anomaly['y'] = anomaly_data['x'][:anomaly_train_number], anomaly_data['y'][:anomaly_train_number] 66 | select_noise['x'], select_noise['y'] = anomaly_data['x'][anomaly_train_number:noise_num], norm_data['y'][anomaly_train_number:noise_num] 67 | # mixing 68 | select_train['x'] = np.concatenate((norm_data['x'][:int(0.8*length_norm)], select_noise['x'])) 69 | select_train['y'] = np.concatenate((norm_data['y'][:int(0.8*length_norm)], select_noise['y'])) 70 | select_train['x'], select_train['y'] = self.shuffle(select_train['x'], select_train['y']) 71 | print('noise_num:', noise_num) 72 | print('anomaly_num:',anomaly_train_number) 73 | num = 0 74 | for i in range(batch_numbers): 75 | for j in range(self.arg.batch_size): 76 | if j % 2 == 0: 77 | train['x'].append(select_train['x'][num]) 78 | train['y'].append(select_train['y'][num]) 79 | num += 1 80 | else: 81 | randnum = random.randint(0, anomaly_train_number-1) 82 | train['x'].append(select_anomaly['x'][randnum]) 83 | train['y'].append(select_anomaly['y'][randnum]) 84 | train['x'], train['y'] = np.array(train['x']), np.array(train['y']) 85 | train['x'], train['y'] = torch.from_numpy(train['x']), torch.from_numpy(train['y']) 86 | test['x'] = np.concatenate((norm_data['x'][int(0.8 * length_norm):], anomaly_data['x'][int(0.8 * length_anomaly):])) 87 | test['y'] = np.concatenate((norm_data['y'][int(0.8 * length_norm):], anomaly_data['y'][int(0.8 * length_anomaly):])) 88 | self.test_label = test['y'] 89 | test['x'], test['y'] = torch.from_numpy(test['x']), torch.from_numpy(test['y']) 90 | self.test.append(DataLoader(dataset=TensorDataset(test['x'],test['y']),batch_size=self.arg.batch_size,shuffle=False)) 91 | return train 92 | 93 | def spilt_into_client(self, norm_data, anomaly_data): 94 | length_norm_client = int(len(norm_data['y'])/self.arg.client) 95 | length_anomaly_client = int(len(anomaly_data['y'])/self.arg.client) 96 | for i in range(self.arg.client): 97 | norm = {'x': norm_data['x'][i*length_norm_client:(i+1)*length_norm_client],'y':norm_data['y'][i*length_norm_client:(i+1)*length_norm_client]} 98 | anomaly = {'x':anomaly_data['x'][i*length_anomaly_client:(i+1)*length_anomaly_client],'y':anomaly_data['y'][i*length_anomaly_client:(i+1)*length_anomaly_client]} 99 | self.client_dataset.append(self.split(norm, anomaly)) 100 | 101 | def run(self): 102 | x, y = self.dataLoading() 103 | norm, anomaly = self.split_anomaly(x, y) 104 | self.spilt_into_client(norm,anomaly) 105 | for i in range(self.arg.client): 106 | self.client_dataset[i] = DataLoader(dataset=TensorDataset(self.client_dataset[i]['x'],self.client_dataset[i]['y']),batch_size=self.arg.batch_size,shuffle=False) 107 | return self.client_dataset 108 | --------------------------------------------------------------------------------