├── DPLAN.py ├── LICENSE ├── Readme.md ├── dran ├── DRAN.py └── DRAN_test.py ├── env.py ├── imgs ├── general_arch.png └── regr_training.png ├── main.py ├── preprocessing └── preproc_unsw.py └── util.py /DPLAN.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple, deque 2 | import random 3 | import math 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import os 7 | 8 | from util import hyper, DQN_iforest, get_total_reward, test_model 9 | from Env import ADEnv 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.optim as optim 14 | import torch.nn.functional as F 15 | 16 | torch.manual_seed(42) 17 | random.seed(42) 18 | np.random.seed(42) 19 | 20 | 21 | Transition = namedtuple('Transition', 22 | ('state', 'action', 'next_state', 'reward','state_index','next_state_index')) 23 | 24 | class DPLAN(): 25 | """ 26 | DPLAN agent that encapsulates the training and testing of the DQN 27 | """ 28 | def __init__(self, env : ADEnv, test_set, destination_path, device = 'cpu',double_dqn=True): 29 | """ 30 | Initialize the DPLAN agent 31 | :param env: the environment 32 | :param validation_set: the validation set 33 | :param test_set: the test set 34 | :param destination_path: the path where to save the model 35 | :param device: the device to use for training 36 | """ 37 | self.double_dqn = double_dqn 38 | self.test_set = test_set 39 | self.device = device 40 | self.env = env 41 | 42 | if not os.path.exists(destination_path): 43 | raise ValueError('destination path does not exist') 44 | 45 | self.destination_path = destination_path 46 | 47 | # tensor rapresentation of the dataset used in the intrinsic reward 48 | self.x_tensor = torch.tensor(env.x, dtype=torch.float32, device=device) 49 | 50 | # hyperparameters setup 51 | self.hidden_size = hyper['hidden_size'] 52 | self.BATCH_SIZE = hyper['batch_size'] 53 | self.GAMMA = hyper['gamma'] 54 | self.EPS_START = hyper['eps_max'] 55 | self.EPS_END = hyper['eps_min'] 56 | self.EPS_DECAY = hyper['eps_decay'] 57 | self.LR = hyper['learning_rate'] 58 | self.momentum = hyper['momentum'] 59 | self.min_squared_gradient = hyper['min_squared_gradient'] 60 | self.num_episodes = hyper['n_episodes'] 61 | self.num_warmup_steps = hyper['warmup_steps']//hyper['steps_per_episode'] 62 | self.steps_per_episode = hyper['steps_per_episode'] 63 | self.max_memory_size = hyper['max_memory'] 64 | self.target_update = hyper['target_update'] 65 | self.validation_frequency = hyper['validation_frequency'] 66 | self.theta_update = hyper['theta_update'] 67 | self.weight_decay = hyper['weight_decay'] 68 | 69 | # n actions and n observations 70 | self.n_actions = env.action_space.n 71 | self.n_observations = env.n_feature 72 | 73 | # resetting the agent 74 | self.reset_nets() 75 | 76 | # resetting agent's memory 77 | self.reset_memory() 78 | 79 | # resetting counters 80 | self.reset_counters() 81 | 82 | 83 | def reset_memory(self): 84 | self.memory = ReplayMemory(self.max_memory_size) 85 | 86 | def reset_counters(self): 87 | # training counters and utils 88 | self.num_steps_done = 0 89 | self.episodes_total_reward = [] 90 | self.pr_auc_history = [] 91 | self.roc_auc_history = [] 92 | self.best_pr = None 93 | 94 | def reset_nets(self): 95 | # net definition 96 | self.policy_net = DQN(self.n_observations, self.hidden_size, self.n_actions, device = self.device).to(self.device) 97 | # not sure if this works 98 | #self.policy_net._initialize_weights() 99 | self.target_net = DQN(self.n_observations, self.hidden_size, self.n_actions, device = self.device).to(self.device) 100 | self.val_net = DQN(self.n_observations, self.hidden_size, self.n_actions, device = self.device).to(self.device) 101 | self.target_net.load_state_dict(self.policy_net.state_dict()) 102 | # set target net weights to 0 103 | with torch.no_grad(): 104 | for param in self.target_net.parameters(): 105 | param.zero_() 106 | 107 | # setting up the environment's DQN 108 | self.env.DQN = self.policy_net 109 | # setting up the environment's intrinsic reward as function of netwo rk's theta_e (i.e. the hidden layer) 110 | self.intrinsic_rewards = DQN_iforest(self.x_tensor, self.policy_net) 111 | 112 | # setting the rmsprop optimizer 113 | self.optimizer = optim.RMSprop( 114 | self.policy_net.parameters(), 115 | lr=self.LR, 116 | momentum = self.momentum, 117 | eps = self.min_squared_gradient, 118 | weight_decay = self.weight_decay 119 | ) 120 | 121 | def select_action(self,state,steps_done): 122 | """ 123 | Select an action using the epsilon-greedy policy 124 | :param state: the current state 125 | :param steps_done: the number of steps done 126 | :return: the action 127 | """ 128 | sample = random.random() 129 | eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \ 130 | math.exp(-1. * steps_done / self.EPS_DECAY) 131 | steps_done += 1 132 | if sample > eps_threshold: 133 | with torch.no_grad(): 134 | # t.max(1) will return the largest column value of each row. 135 | # second column on max result is index of where max element was 136 | # found, so we pick action with the larger expected reward. 137 | return self.policy_net(state).max(1)[1].view(1, 1) 138 | else: 139 | return torch.tensor([[self.env.action_space.sample()]], device=self.device, dtype=torch.long) 140 | 141 | 142 | def optimize_model(self): 143 | """ 144 | Optimize the model using the replay memory 145 | """ 146 | if len(self.memory) < self.BATCH_SIZE: 147 | return 148 | transitions = self.memory.sample(self.BATCH_SIZE) 149 | 150 | # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for 151 | # detailed explanation). This converts batch-array of Transitions 152 | # to Transition of batch-arrays. 153 | batch = Transition(*zip(*transitions)) 154 | 155 | # Compute a mask of non-final states and concatenate the batch elements 156 | # (a final state would've been the one after which simulation ended) 157 | non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, 158 | batch.next_state)), device=self.device, dtype=torch.bool) 159 | non_final_next_states = torch.cat([s for s in batch.next_state 160 | if s is not None]) 161 | state_batch = torch.cat(batch.state) 162 | action_batch = torch.cat(batch.action) 163 | reward_batch = torch.cat(batch.reward) 164 | 165 | # Compute Q(s_t, a) - the model computes Q(s_t), then we select the 166 | # columns of actions taken. These are the actions which would've been taken 167 | # for each batch state according to policy_net 168 | state_action_values = self.policy_net(state_batch).gather(1, action_batch) 169 | 170 | # Compute V(s_{t+1}) for all next states. 171 | # Expected values of actions for non_final_next_states are computed based 172 | # on the "older" target_net; selecting their best reward with max(1)[0]. 173 | # This is merged based on the mask, such that we'll have either the expected 174 | # state value or 0 in case the state was final. 175 | next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device) 176 | with torch.no_grad(): 177 | next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0] 178 | # Compute the expected Q values 179 | expected_state_action_values = (next_state_values * self.GAMMA) + reward_batch 180 | 181 | # Compute Huber loss 182 | criterion = nn.SmoothL1Loss() 183 | loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1)) 184 | 185 | # Optimize the model 186 | self.optimizer.zero_grad() 187 | loss.backward() 188 | # In-place gradient clipping 189 | torch.nn.utils.clip_grad_value_(self.policy_net.parameters(), 100) 190 | self.optimizer.step() 191 | 192 | def warmup_steps(self): 193 | """ 194 | Implement the warmup steps to fill the replay memory using random actions 195 | """ 196 | for _ in range(self.num_warmup_steps): 197 | state = self.env.reset() 198 | obs_index = state 199 | state = torch.tensor(self.env.x[state,:], dtype=torch.float32, device=self.device).unsqueeze(0) 200 | for _ in range(self.steps_per_episode): 201 | action = np.random.randint(0,self.n_actions) 202 | observation, reward, _, _ = self.env.step(action) 203 | reward = get_total_reward(reward,self.intrinsic_rewards,obs_index) 204 | reward = torch.tensor([reward], device=self.device) 205 | obs_index = observation 206 | observation = torch.tensor(self.env.x[observation,:], dtype=torch.float32, device=self.device).unsqueeze(0) 207 | next_state = observation 208 | self.memory.push(state, torch.tensor([[action]], device=self.device), next_state, reward) 209 | state = next_state 210 | 211 | 212 | 213 | def fit(self,reset_nets = False): 214 | """ 215 | Fit the model according to the dataset and hyperparameters. The best model is obtained by using 216 | the best auc-pr score with the validation set. 217 | :param reset_nets: whether to reset the networks 218 | """ 219 | 220 | # reset necessary variables 221 | self.reset_counters() 222 | self.reset_memory() 223 | if reset_nets: 224 | self.reset_nets() 225 | 226 | # perform warmup steps 227 | self.warmup_steps() 228 | 229 | 230 | for i_episode in range(self.num_episodes): 231 | # Initialize the environment and get it's state 232 | reward_history = [] 233 | state = self.env.reset() 234 | # mantain both the obervation as the dataset index and value 235 | state_index = state 236 | state = torch.tensor(self.env.x[state,:], dtype=torch.float32, device=self.device).unsqueeze(0) 237 | 238 | for t in range(self.steps_per_episode): 239 | self.num_steps_done += 1 240 | 241 | # select_action encapsulates the epsilon-greedy policy 242 | action = self.select_action(state,self.num_steps_done) 243 | 244 | observation, reward, _, _ = self.env.step(action.item()) 245 | #states.append((self.env.x[observation,:],action.item())) 246 | 247 | reward = get_total_reward(reward,self.intrinsic_rewards,state_index,write_rew=False) 248 | 249 | reward_history.append(reward) 250 | reward = torch.tensor([reward], dtype=torch.float32 ,device=self.device) 251 | obs_index = observation 252 | observation = torch.tensor(self.env.x[observation,:], dtype=torch.float32, device=self.device).unsqueeze(0) 253 | next_state = observation 254 | 255 | # Store the transition in memory 256 | self.memory.push(state, action, next_state, reward,state_index,obs_index) 257 | 258 | # Move to the next state 259 | state = next_state 260 | state_index = obs_index 261 | 262 | # Perform one step of the optimization (on the policy network) 263 | self.optimize_model() 264 | 265 | # update the target network 266 | if self.num_steps_done % self.target_update == 0: 267 | policy_net_state_dict = self.policy_net.state_dict() 268 | self.target_net.load_state_dict(policy_net_state_dict) 269 | # validation step 270 | if self.num_steps_done % self.validation_frequency == 0: 271 | auc, pr = test_model(self.test_set,self.policy_net) 272 | self.pr_auc_history.append(pr) 273 | self.roc_auc_history.append(auc) 274 | if self.num_steps_done % self.theta_update == 0: 275 | self.intrinsic_rewards = DQN_iforest(self.x_tensor, self.policy_net) 276 | 277 | # because the theta^e update is equal to the duration of the episode we can update the theta^e here 278 | self.episodes_total_reward.append(sum(reward_history)) 279 | 280 | # print the results at the end of the episode 281 | avg_reward = np.mean(reward_history) 282 | print('Episode: {} \t Steps: {} \t Average episode Reward: {}'.format(i_episode, t+1, avg_reward)) 283 | 284 | print('Complete') 285 | 286 | 287 | 288 | 289 | def save_model(self,model_name): 290 | """ 291 | Save the model 292 | :param model_name: name of the model 293 | """ 294 | file_path = os.path.join(self.destination_path,model_name) 295 | torch.save(self.val_net.state_dict(), file_path) 296 | 297 | def show_results(self): 298 | """ 299 | Show the results of the training 300 | """ 301 | 302 | # plot total reward, pr auc and roc auc history in subplots 303 | fig, axs = plt.subplots(3,1,figsize=(10,10)) 304 | axs[0].plot(self.episodes_total_reward) 305 | axs[0].set_title('Total reward per episode') 306 | axs[1].plot(self.pr_auc_history) 307 | axs[1].set_title('PR AUC per validation step') 308 | axs[2].plot(self.roc_auc_history) 309 | axs[2].set_title('ROC AUC per validation step') 310 | plt.show() 311 | 312 | 313 | def model_performance(self): 314 | """ 315 | Test the model 316 | :param on_test_set: whether to test on the test set or the validation set 317 | """ 318 | return test_model(self.test_set,self.policy_net) 319 | 320 | 321 | class ReplayMemory(object): 322 | """ 323 | Replay Memory implemented as a deque 324 | """ 325 | 326 | def __init__(self, capacity): 327 | self.memory = deque([], maxlen=capacity) 328 | 329 | def push(self, *args): 330 | """Save a transition""" 331 | self.memory.append(Transition(*args)) 332 | 333 | def sample(self, batch_size): 334 | return random.sample(self.memory, batch_size) 335 | 336 | def __len__(self): 337 | return len(self.memory) 338 | 339 | 340 | 341 | class DQN(nn.Module): 342 | """ 343 | Deep Q Network 344 | """ 345 | 346 | def __init__(self, n_observations,hidden_size, n_actions, device='cpu'): 347 | super(DQN, self).__init__() 348 | self.device = device 349 | self.latent = nn.Sequential( 350 | nn.Linear(n_observations,hidden_size), 351 | ) 352 | self.output_layer = nn.Linear(hidden_size,n_actions) 353 | 354 | def forward(self, x): 355 | if not isinstance(x,torch.Tensor): 356 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 357 | x = F.relu(self.latent(x)) 358 | return self.output_layer(x) 359 | 360 | 361 | def get_latent(self,x): 362 | """ 363 | Get the latent representation of the input using the latent layer 364 | """ 365 | self.eval() 366 | if not isinstance(x,torch.Tensor): 367 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 368 | 369 | with torch.no_grad(): 370 | latent_embs = F.relu(self.latent(x)) 371 | self.train() 372 | return latent_embs 373 | 374 | def predict_label(self,x): 375 | self.eval() 376 | """ 377 | Predict the label of the input as the argmax of the output layer 378 | """ 379 | if not isinstance(x,torch.Tensor): 380 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 381 | 382 | with torch.no_grad(): 383 | ret = torch.argmax(self.forward(x),axis = 1) 384 | self.train() 385 | return ret 386 | 387 | def _initialize_weights(self,): 388 | with torch.no_grad(): 389 | for m in self.modules(): 390 | if isinstance(m, nn.Linear): 391 | nn.init.normal_(m.weight, 0.0, 0.01) 392 | nn.init.constant_(m.bias, 0.0) 393 | 394 | def forward_latent(self,x): 395 | if not isinstance(x,torch.Tensor): 396 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 397 | latent = F.relu(self.latent(x)) 398 | out = self.output_layer(latent) 399 | return out,latent 400 | 401 | def get_latent_grad(self,x): 402 | if not isinstance(x,torch.Tensor): 403 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 404 | latent_embs = F.relu(self.latent(x)) 405 | return latent_embs 406 | 407 | 408 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Teodoro Sullazzo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # DPLAN 2 | # A Pytorch implementation 3 | ## author: Teodoro Sullazzo 4 | 5 | This repository contains an implementation of an anomaly detection method called DPLAN, which is based on the reinforcement learning framework. The method is described in the paper "Toward Deep Supervised Anomaly Detection: Reinforcement Learning from Partially Labeled Anomaly Data" by Pang et al. You can access the paper [here](https://arxiv.org/pdf/2009.06847.pdf). 6 | 7 | There are some differences between the work presented here and the original work by Pang et al.: 8 | 9 | - I introduced the set $D_n$, which contains known normal data. 10 | - I utilized a different kind of normalization for the Isolation Forest score. 11 | - I changed how the reward is computed for the extrinsic reward. 12 | - This implementation is based on the Pytorch framework. 13 | - There are also some other minor changes. 14 | 15 | 16 | The code in env.py is derived from the code provided by the Github user lflfdxfn. 17 | 18 | 19 | ## UPDATE - 13/12/2023 20 | Added preprocessing for UNSW-NB15 dataset according to Pang et al's paper. It can be find in the preprocessing folder. 21 | 22 | 23 | ## DRAN 24 | 25 | The repository also includes a new method based on DPLAN called DRAN that removes the reinforcement learning component by using a regression layer. The method's workflow is described below. 26 | 27 | ![General architecture](imgs/general_arch.png) 28 | 29 | ![Main network training](imgs/regr_training.png) 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /dran/DRAN.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from sklearn.metrics import roc_auc_score, average_precision_score 4 | import matplotlib.pyplot as plt 5 | 6 | from sklearn.ensemble import IsolationForest 7 | 8 | import pandas as pd 9 | import os 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | np.random.seed(42) 15 | torch.manual_seed(42) 16 | 17 | 18 | # deep sad hyperparameters 19 | eta = 0.2 20 | eps = 1e-6 21 | train_labels = None 22 | 23 | 24 | 25 | ### Neural Network definition ### 26 | 27 | 28 | class DQN(nn.Module): 29 | """ 30 | Deep Q Network 31 | """ 32 | def __init__(self, n_observations,hidden_size, n_actions, device='cpu'): 33 | super(DQN, self).__init__() 34 | self.device = device 35 | bias = True 36 | self.latent = nn.Sequential( 37 | nn.Linear(n_observations,hidden_size,bias=bias), 38 | ) 39 | self.output_layer = nn.Linear(hidden_size,n_actions,bias=bias) 40 | 41 | def forward(self, x): 42 | if not isinstance(x,torch.Tensor): 43 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 44 | x = F.relu(self.latent(x)) 45 | return self.output_layer(x) 46 | def get_latent(self,x): 47 | """ 48 | Get the latent representation of the input using the latent layer 49 | """ 50 | self.eval() 51 | if not isinstance(x,torch.Tensor): 52 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 53 | 54 | with torch.no_grad(): 55 | latent_embs = F.relu(self.latent(x)) 56 | self.train() 57 | return latent_embs 58 | def predict_label(self,x): 59 | self.eval() 60 | """ 61 | Predict the label of the input as the argmax of the output layer 62 | """ 63 | if not isinstance(x,torch.Tensor): 64 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 65 | 66 | with torch.no_grad(): 67 | ret = torch.argmax(self.forward(x),axis = 1) 68 | self.train() 69 | return ret 70 | 71 | def _initialize_weights(self,): 72 | with torch.no_grad(): 73 | for m in self.modules(): 74 | if isinstance(m, nn.Linear): 75 | nn.init.normal_(m.weight, 0.0, 0.01) 76 | nn.init.constant_(m.bias, 0.0) 77 | 78 | def forward_latent(self,x): 79 | if not isinstance(x,torch.Tensor): 80 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 81 | latent = F.relu(self.latent(x)) 82 | out = self.output_layer(latent) 83 | return out,latent 84 | 85 | def get_latent_grad(self,x): 86 | if not isinstance(x,torch.Tensor): 87 | x = torch.as_tensor(x, dtype=torch.float32,device=self.device) 88 | latent_embs = F.relu(self.latent(x)) 89 | return latent_embs 90 | 91 | 92 | ### Utility functions ### 93 | 94 | def DQN_iforest(x, model): 95 | # iforest function on the penuli-layer space of DQN 96 | # get the output of penulti-layer 97 | latent_x=model.get_latent(x) 98 | latent_x=latent_x.cpu().detach().numpy() 99 | # calculate anomaly scores in the latent space 100 | iforest=IsolationForest().fit(latent_x) 101 | scores = -iforest.decision_function(latent_x) 102 | # normalize the scores 103 | norm_scores = (scores - scores.min()) / (scores.max() - scores.min()) 104 | #norm_scores = np.array([-1*s+0.5 for s in scores]) 105 | return norm_scores 106 | 107 | 108 | def distance_from_c(x,net,c): 109 | with torch.no_grad(): 110 | latent_x=net.get_latent(x) 111 | #dist = torch.abs(**2) 112 | dist = torch.sum((latent_x - c)**2, dim=1) 113 | dist = (dist - torch.min(dist)) / (torch.max(dist) - torch.min(dist)) 114 | dist = dist.cpu().detach().numpy() 115 | return dist 116 | 117 | 118 | def loss_sad(x,labels,c,eta,eps): 119 | labels = labels*(-1) 120 | dist = torch.sum((x - c) ** 2, dim=1) 121 | losses = torch.where(labels == 0, dist, eta * ((dist + eps) ** labels.float())) 122 | loss = torch.mean(losses) 123 | return loss 124 | 125 | def moving_average(x, w): 126 | return np.convolve(x, np.ones(w), 'valid') / w 127 | 128 | ### DRAN class ### 129 | 130 | class DRAN: 131 | def __init__(self,train_set,test_set,config,c,device='cpu'): 132 | """ 133 | c : the hypersphere's center according to Deep-SAD 134 | """ 135 | 136 | self.device = device 137 | 138 | self.x = train_set[:,:-1] 139 | self.x_tensor = torch.tensor(self.x,dtype=torch.float32,device=self.device) 140 | self.y = train_set[:,-1] 141 | self.y[self.y==2] = -1 142 | self.test_set = test_set 143 | 144 | self.relabeling_accuracy = [] 145 | self.changed = [] 146 | self.iter = -1 147 | 148 | self.model = DQN(self.x.shape[1],20,1).to(self.device) 149 | self.model._initialize_weights() 150 | self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config['lr']) #-3 for thyroid 151 | self.criterion = torch.nn.SmoothL1Loss() 152 | 153 | self.batch_size = config['batch_size'] 154 | self.validation_step = config['validation_step'] 155 | self.update_step = config['update_step'] 156 | 157 | self.validation_history = [] 158 | self.steps_per_epoch = None 159 | 160 | 161 | self.index_a = np.argwhere(self.y==1).reshape(-1) 162 | self.index_n = np.argwhere(self.y==-1).reshape(-1) 163 | self.index_u = np.argwhere(self.y==0).reshape(-1) 164 | 165 | 166 | self.c = c 167 | 168 | self.sad_lr = config['sad_lr'] 169 | self.scores = None 170 | 171 | 172 | 173 | def train_sad(self,num_epochs=30,logs=True): 174 | sad_optimizer = torch.optim.Adam(self.model.latent.parameters(), lr=self.sad_lr, weight_decay=1e-6) 175 | y = torch.tensor(self.y, dtype=torch.float32, device=self.device) 176 | 177 | dataset = torch.utils.data.TensorDataset(self.x_tensor,y) 178 | data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True) 179 | 180 | for epoch in range(num_epochs): 181 | total_loss = 0 182 | for x,y in data_loader: 183 | sad_optimizer.zero_grad() 184 | latent = self.model.get_latent_grad(x) 185 | loss = loss_sad(latent,y,self.c,eta,eps) 186 | loss.backward() 187 | sad_optimizer.step() 188 | total_loss += loss.cpu().detach().item() 189 | loss_epoch = total_loss/len(data_loader) 190 | if logs and (epoch+1) % 5 == 0: 191 | print(f'epoch: {epoch} loss: {loss_epoch}') 192 | total_loss = 0 193 | 194 | 195 | def train(self,n_epochs=100,n_epochs_sad=10): 196 | # optional deep sad pretraining 197 | self.train_sad(n_epochs_sad) 198 | 199 | self.scores = distance_from_c(self.x_tensor,self.model,self.c) 200 | 201 | self.validation_history = [] 202 | n_steps = 0 203 | num_normals = int(self.batch_size*0.4) 204 | num_anomalies = int(self.batch_size*0.4) 205 | num_unlabeled = int(self.batch_size*0.2) # or 0.1? 206 | 207 | for epoch in range(n_epochs): 208 | for _ in range(self.x.shape[0]//self.batch_size): 209 | idx_normal = np.random.choice(self.index_n,num_normals) 210 | idx_anomaly = np.random.choice(self.index_a,num_anomalies) 211 | idx_unlabeled = np.random.choice(self.index_u,num_unlabeled) #self.get_lowest_scores(num_unlabeled) 212 | x_normal = self.x[idx_normal] 213 | x_anomaly = self.x[idx_anomaly] 214 | x_unlabeled = self.x[idx_unlabeled] 215 | x_batch = np.concatenate((x_normal,x_anomaly,x_unlabeled),axis=0) 216 | 217 | y = np.concatenate(( 218 | self.y[idx_normal]+self.scores[idx_normal], 219 | self.y[idx_anomaly]+self.scores[idx_anomaly], 220 | np.full(num_unlabeled,-1)+self.scores[idx_unlabeled], 221 | )).reshape(-1,1) 222 | 223 | 224 | x_batch = torch.tensor(x_batch,dtype=torch.float32,device=self.device) 225 | y = torch.tensor(y,dtype=torch.float32,device=self.device) 226 | 227 | # forward pass 228 | y_pred = self.model(x_batch) 229 | # compute loss 230 | loss = self.criterion(y_pred,y) 231 | # backward pass 232 | self.optimizer.zero_grad() 233 | loss.backward() 234 | self.optimizer.step() 235 | n_steps += 1 236 | if (n_steps) % self.validation_step == 0: 237 | self.validation_history.append(self.test()) 238 | if epoch==0: 239 | self.steps_per_epoch = n_steps 240 | if (epoch+1) % 1 == 0: 241 | self.update_labels() 242 | self.plot_results() 243 | def plot_results(self): 244 | x_axis = np.arange(0,len(self.validation_history))*self.validation_step/self.steps_per_epoch 245 | plt.plot(x_axis,self.validation_history) 246 | plt.show() 247 | 248 | 249 | def update_labels(self): 250 | self.y[self.changed] = 0 251 | self.changed = [] 252 | self.iter+=1 253 | 254 | self.index_a = np.argwhere(self.y==1).reshape(-1) 255 | self.index_n = np.argwhere(self.y==-1).reshape(-1) 256 | self.index_u = np.argwhere(self.y==0).reshape(-1) 257 | 258 | scores = self.model(self.x_tensor).cpu().detach().numpy().reshape(-1) 259 | arg_scores = np.argsort(scores) 260 | arg_scores = arg_scores[np.isin(arg_scores,self.index_u)] 261 | P = 100 262 | k = int(P+self.iter*P) # 100 for unsbs 263 | top_k = arg_scores[-k:] 264 | # add only if the score of the top k is distant enough from 0 265 | self.relabeling_accuracy = [] 266 | for i in top_k: 267 | if scores[i] >= 0.8: 268 | #if train_labels[i]!=3: 269 | # self.relabeling_accuracy.append(1) 270 | #else: self.relabeling_accuracy.append(0) 271 | 272 | self.y[i]= 1 273 | self.changed.append(i) 274 | 275 | 276 | #bottom_k = arg_scores[:k] 277 | 278 | #self.y[bottom_k] = -1 279 | 280 | # update indeces 281 | self.index_a = np.argwhere(self.y==1).reshape(-1) 282 | self.index_n = np.argwhere(self.y==-1).reshape(-1) 283 | self.index_u = np.argwhere(self.y==0).reshape(-1) 284 | 285 | 286 | def test(self): 287 | dataset = self.test_set 288 | self.model.eval() 289 | with torch.no_grad(): 290 | test_X, test_y=dataset[:,:-1], dataset[:,-1] 291 | pred_y=self.model(test_X).cpu().detach().numpy() 292 | 293 | roc = roc_auc_score(test_y, pred_y) 294 | pr = average_precision_score(test_y, pred_y) 295 | 296 | self.model.train() 297 | return pr,roc 298 | -------------------------------------------------------------------------------- /dran/DRAN_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import pandas as pd 4 | import numpy as np 5 | from DRAN import DRAN 6 | 7 | 8 | BASE_PATH = '../DPLAN/data.nosync/preprocessed/UNSW-NB15/' 9 | TEST_PATH = os.path.join(BASE_PATH,'test_for_all.csv') 10 | 11 | # you can play with the config 12 | config = { 13 | 'batch_size': 32, 14 | 'lr': 1e-4, 15 | 'sad_lr': 1e-3, 16 | 'validation_step' : 100, 17 | 'update_step' : 1, 18 | } 19 | c = None # you need to pre compute the c 20 | 21 | 22 | if not os.path.exists('results_sad_no_score.csv'): 23 | with open('results_sad_no_score.csv','w') as f: 24 | f.write('version,dataset,subset,pr_mean,pr_std,roc_mean,roc_std\n') 25 | 26 | 27 | data_list = ['Analysis','Backdoors','DoS','Exploits','Fuzzers','Generic','Reconnaissance'] 28 | 29 | dran = None 30 | means=[] 31 | num_runs = 1 32 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 33 | for x in data_list: 34 | outs_pr = [] 35 | outs_roc = [] 36 | for i in range(num_runs): 37 | train_path = os.path.join(BASE_PATH,x+'_0.02_60.csv') 38 | test_path = os.path.join(BASE_PATH,'test_for_all.csv') 39 | train_labels = pd.read_csv(os.path.join(BASE_PATH,x+'_0.02_60.csv')).values 40 | X = pd.read_csv(train_path).values 41 | test = pd.read_csv(test_path).values 42 | dran = DRAN( 43 | train_set=X.copy(), 44 | test_set=test.copy(), 45 | config=config, 46 | device=device, 47 | c = c 48 | ) 49 | dran.train(n_epochs=5,n_epochs_sad=20) # play with the epochs 50 | 51 | out,roc,ms = dran.test_final() 52 | means.append(ms) 53 | outs_pr.append(out) 54 | outs_roc.append(roc) 55 | print(out,roc) 56 | print(np.unique(np.array(dran.relabeling_accuracy),return_counts=True)) 57 | print("-----------------------------") 58 | # print the mean of the results 59 | print(f'mean pr :{np.mean(outs_pr)}') 60 | print(f'mean roc: {np.mean(outs_roc)}') 61 | # print the std of the results 62 | print(f'std pr :{np.std(outs_pr)}') 63 | print(f'std roc: {np.std(outs_roc)}') 64 | # save to results.csv 65 | with open('results_sad_no_score.csv', 'a') as f: 66 | f.write(f'regression,UNSW-NB15,{x},{np.mean(outs_pr)},{np.std(outs_pr)},{np.mean(outs_roc)},{np.std(outs_roc)}\n') 67 | -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import time 3 | import numpy as np 4 | 5 | from gym import spaces 6 | 7 | class ADEnv(gym.Env): 8 | """ 9 | Customized environment for anomaly detection 10 | """ 11 | def __init__(self,dataset: np.ndarray,sampling_Du=1000,prob_au=0.5,label_normal=0,label_anomaly=1, name="default"): 12 | """ 13 | Initialize anomaly environment for DPLAN algorithm. 14 | :param dataset: Input dataset in the form of 2-D array. The Last column is the label. 15 | :param sampling_Du: Number of sampling on D_u for the generator g_u 16 | :param prob_au: Probability of performing g_a. 17 | :param label_normal: label of normal instances 18 | :param label_anomaly: label of anomaly instances 19 | """ 20 | super().__init__() 21 | self.name=name 22 | 23 | # hyperparameters: 24 | self.num_S=sampling_Du 25 | self.normal=label_normal 26 | self.anomaly=label_anomaly 27 | self.prob=prob_au 28 | 29 | # Dataset infos: D_a and D_u 30 | self.m,self.n=dataset.shape 31 | self.n_feature=self.n-1 32 | self.n_samples=self.m 33 | self.x=dataset[:,:self.n_feature] 34 | self.y=dataset[:,self.n_feature] 35 | self.dataset=dataset 36 | self.index_u=np.where(self.y==self.normal)[0] 37 | self.index_a=np.where(self.y==self.anomaly)[0] 38 | self.index_n=np.where(self.y==2)[0] 39 | 40 | # observation space: 41 | self.observation_space=spaces.Discrete(self.m) 42 | 43 | # action space: 0 or 1 44 | self.action_space=spaces.Discrete(2) 45 | 46 | # initial state 47 | self.counts=None 48 | self.state=None 49 | self.DQN=None 50 | 51 | def generater_a(self, *args, **kwargs): 52 | # sampling function for D_a 53 | index=np.random.choice(self.index_a) 54 | 55 | return index 56 | 57 | def generater_n(self, *args, **kwargs): 58 | # sampling function for D_n 59 | index=np.random.choice(self.index_n) 60 | 61 | return index 62 | 63 | def generate_u(self,action,s_t): 64 | # sampling function for D_u 65 | S=np.random.choice(self.index_u,self.num_S) 66 | # calculate distance in the space of last hidden layer of DQN 67 | all_x=self.x[np.append(S,s_t)] 68 | 69 | all_dqn_s = self.DQN.get_latent(all_x) 70 | all_dqn_s = all_dqn_s.cpu().detach().numpy() 71 | dqn_s=all_dqn_s[:-1] 72 | dqn_st=all_dqn_s[-1] 73 | 74 | dist=np.linalg.norm(dqn_s-dqn_st,axis=1) 75 | 76 | if action==1: 77 | loc=np.argmin(dist) 78 | elif action==0: 79 | loc=np.argmax(dist) 80 | index=S[loc] 81 | 82 | return index 83 | 84 | def reward_h(self,action,s_t): 85 | # Anomaly-biased External Handcrafted Reward Function h 86 | if (action==1) & (s_t in self.index_a): 87 | return 1 88 | elif (action==0) & (s_t in self.index_n): 89 | return 1 90 | elif (action==0) & (s_t in self.index_u): 91 | return 0 92 | elif (action==1) & (s_t in self.index_u): 93 | return -0.5 94 | return -1 95 | 96 | def step(self,action): 97 | self.state = int(self.state) 98 | # store former state 99 | s_t=self.state 100 | # choose generator 101 | 102 | g=np.random.choice([self.generater_a, self.generate_u, self.generater_n],p=[0.4,0.2,0.4]) 103 | s_tp1=g(action,s_t) 104 | 105 | # change to the next state 106 | self.state=s_tp1 107 | self.state = int(self.state) 108 | self.counts+=1 109 | 110 | # calculate the reward 111 | reward=self.reward_h(action,s_t) 112 | 113 | # done: whether terminal or not 114 | done=False 115 | 116 | # info 117 | info={"State t":s_t, "Action t": action, "State t+1":s_tp1} 118 | 119 | return self.state, reward, done, info 120 | 121 | def reset(self): 122 | # reset the status of environment 123 | self.counts=0 124 | # the first observation is uniformly sampled from the D_u 125 | self.state=np.random.choice(self.index_u) 126 | 127 | return self.state -------------------------------------------------------------------------------- /imgs/general_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teo-sl/DPLAN_pytorch/4d1b12580963e8862ee81c0424ba2c861269fe2a/imgs/general_arch.png -------------------------------------------------------------------------------- /imgs/regr_training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/teo-sl/DPLAN_pytorch/4d1b12580963e8862ee81c0424ba2c861269fe2a/imgs/regr_training.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from util import hyper,write_results 2 | from Env import ADEnv 3 | from DPLAN import DPLAN 4 | import torch 5 | import os 6 | import pandas as pd 7 | 8 | 9 | BASE_PATH = './data.nosync/preprocessed' 10 | 11 | subsets = { 12 | 'UNSW-NB15' : ['Fuzzers','Analysis','Backdoors','DoS','Exploits','Generic','Reconnaissance'], 13 | } 14 | datasets = subsets.keys() 15 | TEST_NAME = 'test_for_all.csv' 16 | VALIDATION_NAME = 'validation_for_all.csv' 17 | LABEL_NORMAL = 0 18 | LABEL_ANOMALY = 1 19 | CONTAMINATION_RATE = hyper['contamination_rate'] 20 | NUM_ANOMALY_KNOWS = hyper['num_anomaly_knows'] 21 | NUM_RUNS = hyper['runs'] 22 | 23 | MODELS_PATH = 'models/' 24 | RESULTS_PATH = 'results' 25 | 26 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 27 | results_filename = os.path.join(RESULTS_PATH, 'results.csv') 28 | if not os.path.exists(MODELS_PATH): 29 | os.makedirs(MODELS_PATH) 30 | if not os.path.exists(RESULTS_PATH): 31 | os.makedirs(RESULTS_PATH) 32 | if not os.path.exists(RESULTS_PATH): 33 | os.makedirs(RESULTS_PATH) 34 | with open(results_filename, 'w') as f: 35 | f.write('dataset,subset,pr_mean,pr_std,roc_mean,roc_std\n') 36 | 37 | 38 | for dataset in datasets: 39 | test_path = os.path.join(BASE_PATH, dataset, TEST_NAME) 40 | test_set = pd.read_csv(test_path).values 41 | 42 | for subset in subsets[dataset]: 43 | data_path = os.path.join(BASE_PATH, dataset, subset)+f'_{CONTAMINATION_RATE}_{NUM_ANOMALY_KNOWS}.csv' 44 | training_set = pd.read_csv(data_path).values 45 | 46 | pr_auc_history = [] 47 | roc_auc_history = [] 48 | 49 | for i in range(NUM_RUNS): 50 | print(f'Running {dataset} {subset} {i}...') 51 | model_id = f'_{CONTAMINATION_RATE}_{NUM_ANOMALY_KNOWS}_run_{i}' 52 | 53 | env = ADEnv( 54 | dataset=training_set, 55 | sampling_Du=hyper['sampling_du'], 56 | prob_au=hyper['prob_au'], 57 | label_normal=LABEL_NORMAL, 58 | label_anomaly=LABEL_ANOMALY 59 | ) 60 | 61 | dplan = DPLAN( 62 | env=env, 63 | validation_set=None, 64 | test_set=test_set, 65 | destination_path=MODELS_PATH, 66 | c = c, 67 | double_dqn=False 68 | ) 69 | dplan.fit(reset_nets = True) 70 | dplan.show_results() 71 | roc,pr = dplan.model_performance(on_test_set=True) 72 | print(f'Finished run {i} with pr: {pr} and auc-roc: {roc}...') 73 | pr_auc_history.append(pr) 74 | roc_auc_history.append(roc) 75 | 76 | destination_filename =subset+'_'+model_id + '.pth' 77 | dplan.save_model(destination_filename) 78 | print() 79 | print('--------------------------------------------------\n') 80 | 81 | print(f'Finished {dataset} {subset}...') 82 | print('--------------------------------------------------\n') 83 | write_results(pr_auc_history,roc_auc_history,dataset,subset,results_filename) 84 | 85 | -------------------------------------------------------------------------------- /preprocessing/preproc_unsw.py: -------------------------------------------------------------------------------- 1 | # IMPORT 2 | import pandas as pd 3 | import os 4 | import glob 5 | from sklearn.model_selection import train_test_split 6 | 7 | # GLOBAL VARIABLES 8 | 9 | DEST_PATH = 'pat/to/dest' 10 | DATA_PATH = 'path/to/data' 11 | CONTAMINATION_RATE = 0.02 12 | NUM_KNOWS = 60 13 | D_U_SIZE = 1900 14 | 15 | 16 | ANOMALIES = ['Generic', 'Exploits', 'Fuzzers', 'DoS', 'Reconnaissance', 'Analysis', 'Backdoors'] 17 | AT_DICT = { 18 | 'Normal' : 0, 19 | 'Generic' : 1, 20 | 'Exploits' : 2, 21 | 'Fuzzers' : 3, 22 | 'DoS' : 4, 23 | 'Reconnaissance' : 5, 24 | 'Analysis' : 6, 25 | 'Backdoors' : 7, 26 | } 27 | 28 | # PREPROCESSING 29 | 30 | # read the features file 31 | features_df = pd.read_csv(os.path.join(DATA_PATH,'NUSW-NB15_features.csv'),encoding='cp1252') 32 | 33 | # get features names 34 | features_name = features_df['Name'].tolist() 35 | 36 | 37 | # types management 38 | type_conversion = {} 39 | for x in features_df.values: 40 | type_conversion[x[1]] = x[2] 41 | 42 | for x in type_conversion.keys(): 43 | value = type_conversion[x] 44 | if value == 'nominal': 45 | dst_value = 'str' 46 | elif value == 'integer' or value == 'Timestamp': 47 | dst_value = 'int64' 48 | elif value == 'Float': 49 | dst_value = 'float64' 50 | elif value == 'Binary' or value == 'binary': 51 | dst_value = 'bool' 52 | type_conversion[x]=dst_value 53 | 54 | 55 | 56 | # read the data 57 | files = glob.glob(DATA_PATH+'/*_[1-4].csv') 58 | dfs = [] 59 | for f in files: 60 | dfs.append(pd.read_csv(f,names=features_name)) 61 | df = pd.concat(dfs, ignore_index=True) 62 | 63 | # drop the useless columns 64 | columns_to_drop = ['srcip', 'sport', 'dstip', 'dsport'] 65 | df = df.drop(columns_to_drop, axis=1) 66 | 67 | 68 | # trim every element in attack_cat column 69 | df['attack_cat'] = df['attack_cat'].str.strip() 70 | # rename Backdoor to Backdoors 71 | df['attack_cat'] = df['attack_cat'].replace('Backdoor','Backdoors') 72 | 73 | 74 | # get the unique values of attack_cat column || optional 75 | # types_of_attack = df['attack_cat'].unique() 76 | 77 | df.loc[df['Label'] == 0, 'attack_cat'] = 'Normal' 78 | 79 | 80 | # delete unused anomalies 81 | df_dst = df[df['attack_cat'] != 'Worms'] 82 | df_dst= df_dst[df_dst['attack_cat'] != 'Shellcode'] 83 | 84 | 85 | # this anomalies are overrepresented 86 | to_be_sampled = ['DoS','Exploits','Fuzzers','Generic','Reconnaissance','Normal'] 87 | df_sampled = [] 88 | for attack in to_be_sampled: 89 | if attack == 'Normal': 90 | df_sampled.append(df_dst[df_dst['attack_cat'] == attack].sample(n=93_000, random_state=42)) 91 | else: 92 | df_sampled.append(df_dst[df_dst['attack_cat'] == attack].sample(n=3000, random_state=42)) 93 | 94 | 95 | # get all the rows that are not in to_be_sampled list 96 | df_not_sampled = df_dst[~df_dst['attack_cat'].isin(to_be_sampled)] 97 | 98 | # concat all the sampled dataframes 99 | df_sampled = pd.concat(df_sampled, ignore_index=True) 100 | 101 | # concat the sampled and not sampled dataframes 102 | df_sampled = pd.concat([df_sampled, df_not_sampled], ignore_index=True) 103 | 104 | 105 | # convert to numeric values 106 | df_sampled['ct_ftp_cmd'] = pd.to_numeric(df_sampled['ct_ftp_cmd'],errors='coerce') 107 | 108 | # replace the missing values with the mean of the target column 109 | df_sampled['ct_ftp_cmd'] = df_sampled['ct_ftp_cmd'].fillna(df_sampled['ct_ftp_cmd'].mean()) 110 | df_sampled['ct_flw_http_mthd'] = df_sampled['ct_flw_http_mthd'].fillna(df_sampled['ct_flw_http_mthd'].mean()) 111 | df_sampled['is_ftp_login'] = df_sampled['is_ftp_login'].fillna(df_sampled['is_ftp_login'].mean()) 112 | 113 | 114 | # one hot encoding 115 | ohe_columns = ['proto','state','service'] 116 | df_ohe = pd.get_dummies(df_sampled, columns=ohe_columns) 117 | 118 | 119 | 120 | # convert the attack_cat column to numeric values using the AT_DICT 121 | df_ohe['attack_cat'] = df_ohe['attack_cat'].map(AT_DICT) 122 | df_ohe['attack_cat'] = df_ohe['attack_cat'].astype(int) 123 | 124 | # normalize the data between 0 and 1 except the attack_cat column 125 | for col in df_ohe.columns: 126 | if col != 'attack_cat': 127 | df_ohe[col] = (df_ohe[col] - df_ohe[col].min()) / (df_ohe[col].max() - df_ohe[col].min()) 128 | 129 | # CREATE THE DATASET 130 | 131 | 132 | df = df_ohe 133 | train, test = train_test_split(df, test_size=0.2, random_state=42) 134 | 135 | # use it for debug purpose 136 | train['Label'].value_counts() 137 | 138 | 139 | # TRAINING SET 140 | 141 | df = train 142 | 143 | df_normal = df[df['attack_cat'] == AT_DICT['Normal']] 144 | 145 | # Adjust according to your needs 146 | df_normal.loc[df_normal.sample(n=100, random_state=42).index, 'Label'] = 2 147 | num_for_each_attack =int((df.shape[0]*0.02))//7 148 | 149 | for attack_type in AT_DICT.keys(): 150 | 151 | if attack_type == 'Normal': 152 | continue 153 | 154 | name = '{}_{}_{}.csv'.format(attack_type, CONTAMINATION_RATE, NUM_KNOWS) 155 | file_name = os.path.join(DEST_PATH, name) 156 | 157 | df_attack_sample = df[df['attack_cat'] == AT_DICT[attack_type]].sample(n=NUM_KNOWS, random_state=42) 158 | # get all rows that are not Normal and not the attack type 159 | d_u = df[(df['attack_cat'] != AT_DICT['Normal'])] 160 | d_us = [] 161 | for sub_attack in AT_DICT.keys(): 162 | if sub_attack=='Normal': 163 | continue 164 | d_us_i = d_u[d_u['attack_cat'] == AT_DICT[sub_attack]].sample(n=num_for_each_attack, random_state=42) 165 | d_us.append(d_us_i) 166 | 167 | d_u = pd.concat(d_us, ignore_index=True) 168 | d_u['Label']=0.0 169 | 170 | df_attack = pd.concat([df_attack_sample, d_u, df_normal], ignore_index=True) 171 | print(df_attack['attack_cat'].value_counts()) 172 | # remove the attack_cat column 173 | df_attack = df_attack.drop(columns=['attack_cat']) 174 | print(df_attack['Label'].value_counts()) 175 | # move the Label column to the end 176 | cols = list(df_attack.columns.values) 177 | cols.pop(cols.index('Label')) 178 | df_attack = df_attack[cols+['Label']] 179 | # save the file 180 | df_attack.to_csv(file_name, index=False) 181 | 182 | # TEST SET 183 | 184 | # Adjust according to your needs 185 | num_for_each_attack = int((test.shape[0]*0.03)//7) 186 | df_test = test 187 | 188 | # take the normal data 189 | df_test_normal = df_test[df_test['Label'] == 0] 190 | # take the anomaly data 191 | df_test_anomaly = df_test[df_test['Label'] == 1] 192 | attack_sampled = [] 193 | for anomaly_type in ANOMALIES: 194 | df_test_anomaly_i = df_test_anomaly[df_test_anomaly['attack_cat'] == AT_DICT[anomaly_type]].sample(n=num_for_each_attack, random_state=42) 195 | attack_sampled.append(df_test_anomaly_i) 196 | 197 | df_test_anomaly = pd.concat(attack_sampled, ignore_index=True) 198 | 199 | df_test_final = pd.concat([df_test_normal, df_test_anomaly],ignore_index=True) 200 | 201 | print(df_test_final['Label'].value_counts()) 202 | 203 | # save attack_cat column to csv 204 | df_test_final['attack_cat'].to_csv(os.path.join(DEST_PATH, 'y_test_for_all.csv'), index=False) 205 | # drop the attack column 206 | df_test_final = df_test_final.drop(['attack_cat'], axis=1) 207 | # pt Label column at the end 208 | cols = list(df_test_final.columns.values) 209 | cols.pop(cols.index('Label')) 210 | df_attack = df_test_final[cols+['Label']] 211 | 212 | file_name = os.path.join(DEST_PATH, 'test_for_all.csv') 213 | 214 | df_attack.to_csv(file_name, index=False) 215 | 216 | 217 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | from sklearn.ensemble import IsolationForest 2 | import torch 3 | from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix,roc_curve 4 | from sklearn.metrics import PrecisionRecallDisplay 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | 9 | hyper = { 10 | 'base_save_path': 'models/', 11 | 'n_episodes': 6, 12 | 'steps_per_episode' : 2000, 13 | 'max_memory' : 100_000, 14 | 'eps_max' : 1, 15 | 'eps_min' : 0.1, 16 | 'eps_decay' : 10_000, 17 | 'hidden_size' : 20, 18 | 'learning_rate' : 0.25e-4, 19 | 'momentum' : 0.95, 20 | 'min_squared_gradient' : 0.01, 21 | 'warmup_steps' : 100, 22 | 'gamma' : 0.99, 23 | 'batch_size' : 64, 24 | 'target_update' : 5_000, 25 | 'theta_update' : 2_000, 26 | 'num_anomaly_knows' : 60, 27 | 'contamination_rate' : 0.02, 28 | 'runs' : 1, 29 | 'S_size' : 2000, 30 | 'sampling_du' : 1000, 31 | 'prob_au' : 0.4, 32 | 'validation_frequency' : 100, 33 | 'weight_decay' : 1e-3, 34 | } 35 | 36 | 37 | def DQN_iforest(x, model): 38 | # iforest function on the penuli-layer space of DQN 39 | 40 | # get the output of penulti-layer 41 | latent_x=model.get_latent(x) 42 | latent_x=latent_x.cpu().detach().numpy() 43 | # calculate anomaly scores in the latent space 44 | iforest=IsolationForest().fit(latent_x) 45 | scores = iforest.decision_function(latent_x) 46 | # normalize the scores 47 | norm_scores = np.array([-1*s+0.5 for s in scores]) 48 | return norm_scores 49 | 50 | def get_total_reward(reward_e,intrinsic_rewards,s_t,write_rew=False): 51 | reward_i = intrinsic_rewards[s_t] 52 | if write_rew: 53 | write_reward('./results/rewards.csv',reward_i,reward_e) 54 | return reward_e + reward_i 55 | 56 | def plot_roc_pr(test_set,policy_net): 57 | test_X, test_y=test_set[:,:-1], test_set[:,-1] 58 | pred_y=policy_net(test_X).detach().numpy()[:,1] 59 | fpr, tpr, _ = roc_curve(test_y, pred_y) 60 | plt.plot(fpr, tpr) 61 | plt.show() 62 | 63 | display = PrecisionRecallDisplay.from_predictions(test_y, pred_y, name="DQN") 64 | _ = display.ax_.set_title("2-class Precision-Recall curve") 65 | 66 | 67 | def test_model(test_set,policy_net): 68 | policy_net.eval() 69 | test_X, test_y=test_set[:,:-1], test_set[:,-1] 70 | pred_y=policy_net(test_X).detach().cpu().numpy()[:,1] 71 | 72 | roc = roc_auc_score(test_y, pred_y) 73 | pr = average_precision_score(test_y, pred_y) 74 | policy_net.train() 75 | return roc,pr 76 | 77 | def count_parameters(model): 78 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 79 | 80 | def write_results(pr_auc_history,roc_auc_history,dataset,subset,path): 81 | pr_auc_history = np.array(pr_auc_history) 82 | roc_auc_history = np.array(roc_auc_history) 83 | pr_mean = np.mean(pr_auc_history) 84 | auc_mean = np.mean(roc_auc_history) 85 | pr_std = np.std(pr_auc_history) 86 | auc_std = np.std(roc_auc_history) 87 | line = f'{dataset},{subset},{pr_mean},{pr_std},{auc_mean},{auc_std}\n' 88 | 89 | with open(path, 'a') as f: 90 | f.write(line) 91 | 92 | def write_reward(path,r_i,r_e): 93 | with open(path, 'a') as f: 94 | f.write(f'{r_i},{r_e},') 95 | 96 | 97 | --------------------------------------------------------------------------------