├── DPLAN.py
├── LICENSE
├── Readme.md
├── dran
    ├── DRAN.py
    └── DRAN_test.py
├── env.py
├── imgs
    ├── general_arch.png
    └── regr_training.png
├── main.py
├── preprocessing
    └── preproc_unsw.py
└── util.py


/DPLAN.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple, deque
  2 | import random
  3 | import math
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | import os
  7 | 
  8 | from util import hyper, DQN_iforest, get_total_reward, test_model
  9 | from Env import ADEnv
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import torch.optim as optim
 14 | import torch.nn.functional as F
 15 | 
 16 | torch.manual_seed(42)
 17 | random.seed(42)
 18 | np.random.seed(42)
 19 | 
 20 | 
 21 | Transition = namedtuple('Transition',
 22 |                         ('state', 'action', 'next_state', 'reward','state_index','next_state_index'))
 23 | 
 24 | class DPLAN():
 25 |     """
 26 |     DPLAN agent that encapsulates the training and testing of the DQN
 27 |     """
 28 |     def __init__(self, env : ADEnv, test_set, destination_path, device = 'cpu',double_dqn=True):
 29 |         """
 30 |         Initialize the DPLAN agent
 31 |         :param env: the environment
 32 |         :param validation_set: the validation set
 33 |         :param test_set: the test set
 34 |         :param destination_path: the path where to save the model
 35 |         :param device: the device to use for training
 36 |         """
 37 |         self.double_dqn = double_dqn
 38 |         self.test_set = test_set
 39 |         self.device = device
 40 |         self.env = env
 41 | 
 42 |         if not os.path.exists(destination_path):
 43 |             raise ValueError('destination path does not exist')
 44 |         
 45 |         self.destination_path = destination_path
 46 | 
 47 |         # tensor rapresentation of the dataset used in the intrinsic reward
 48 |         self.x_tensor = torch.tensor(env.x, dtype=torch.float32, device=device)
 49 | 
 50 |         # hyperparameters setup
 51 |         self.hidden_size = hyper['hidden_size']
 52 |         self.BATCH_SIZE = hyper['batch_size']
 53 |         self.GAMMA = hyper['gamma']
 54 |         self.EPS_START = hyper['eps_max']
 55 |         self.EPS_END = hyper['eps_min']
 56 |         self.EPS_DECAY = hyper['eps_decay']
 57 |         self.LR = hyper['learning_rate']
 58 |         self.momentum = hyper['momentum']
 59 |         self.min_squared_gradient = hyper['min_squared_gradient']
 60 |         self.num_episodes = hyper['n_episodes']
 61 |         self.num_warmup_steps = hyper['warmup_steps']//hyper['steps_per_episode']
 62 |         self.steps_per_episode = hyper['steps_per_episode']
 63 |         self.max_memory_size = hyper['max_memory']
 64 |         self.target_update = hyper['target_update']
 65 |         self.validation_frequency = hyper['validation_frequency']
 66 |         self.theta_update = hyper['theta_update']
 67 |         self.weight_decay = hyper['weight_decay']
 68 |         
 69 |         # n actions and n observations
 70 |         self.n_actions = env.action_space.n
 71 |         self.n_observations = env.n_feature
 72 |                 
 73 |         # resetting the agent
 74 |         self.reset_nets()
 75 | 
 76 |         # resetting agent's memory
 77 |         self.reset_memory()
 78 | 
 79 |         # resetting counters
 80 |         self.reset_counters()
 81 | 
 82 |     
 83 |     def reset_memory(self):
 84 |         self.memory = ReplayMemory(self.max_memory_size)
 85 | 
 86 |     def reset_counters(self):
 87 |         # training counters and utils
 88 |         self.num_steps_done = 0
 89 |         self.episodes_total_reward = []
 90 |         self.pr_auc_history = []
 91 |         self.roc_auc_history = []
 92 |         self.best_pr = None
 93 | 
 94 |     def reset_nets(self):
 95 |         # net definition
 96 |         self.policy_net = DQN(self.n_observations, self.hidden_size, self.n_actions, device = self.device).to(self.device)
 97 |         # not sure if this works
 98 |         #self.policy_net._initialize_weights()
 99 |         self.target_net = DQN(self.n_observations, self.hidden_size, self.n_actions, device = self.device).to(self.device)
100 |         self.val_net = DQN(self.n_observations, self.hidden_size, self.n_actions, device = self.device).to(self.device)
101 |         self.target_net.load_state_dict(self.policy_net.state_dict())
102 |         # set target net weights to 0
103 |         with torch.no_grad():
104 |             for param in self.target_net.parameters():
105 |                 param.zero_()
106 |         
107 |         # setting up the environment's DQN
108 |         self.env.DQN = self.policy_net
109 |         # setting up the environment's intrinsic reward as function of netwo rk's theta_e (i.e. the hidden layer)
110 |         self.intrinsic_rewards =  DQN_iforest(self.x_tensor, self.policy_net)
111 | 
112 |         # setting the rmsprop optimizer
113 |         self.optimizer = optim.RMSprop(
114 |                         self.policy_net.parameters(), 
115 |                         lr=self.LR,
116 |                         momentum = self.momentum,
117 |                         eps = self.min_squared_gradient,
118 |                         weight_decay = self.weight_decay
119 |                     )
120 |     
121 |     def select_action(self,state,steps_done):
122 |         """
123 |         Select an action using the epsilon-greedy policy
124 |         :param state: the current state
125 |         :param steps_done: the number of steps done
126 |         :return: the action
127 |         """
128 |         sample = random.random()
129 |         eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \
130 |             math.exp(-1. * steps_done / self.EPS_DECAY)
131 |         steps_done += 1
132 |         if sample > eps_threshold:
133 |             with torch.no_grad():
134 |                 # t.max(1) will return the largest column value of each row.
135 |                 # second column on max result is index of where max element was
136 |                 # found, so we pick action with the larger expected reward.
137 |                 return self.policy_net(state).max(1)[1].view(1, 1)
138 |         else:
139 |             return torch.tensor([[self.env.action_space.sample()]], device=self.device, dtype=torch.long)
140 | 
141 |     
142 |     def optimize_model(self):
143 |         """
144 |         Optimize the model using the replay memory
145 |         """
146 |         if len(self.memory) < self.BATCH_SIZE:
147 |             return
148 |         transitions = self.memory.sample(self.BATCH_SIZE)
149 |         
150 |         # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
151 |         # detailed explanation). This converts batch-array of Transitions
152 |         # to Transition of batch-arrays.
153 |         batch = Transition(*zip(*transitions))
154 |     
155 |         # Compute a mask of non-final states and concatenate the batch elements
156 |         # (a final state would've been the one after which simulation ended)
157 |         non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
158 |                                               batch.next_state)), device=self.device, dtype=torch.bool)
159 |         non_final_next_states = torch.cat([s for s in batch.next_state
160 |                                                     if s is not None])
161 |         state_batch = torch.cat(batch.state)
162 |         action_batch = torch.cat(batch.action)
163 |         reward_batch = torch.cat(batch.reward)
164 |     
165 |         # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
166 |         # columns of actions taken. These are the actions which would've been taken
167 |         # for each batch state according to policy_net
168 |         state_action_values = self.policy_net(state_batch).gather(1, action_batch)
169 |     
170 |         # Compute V(s_{t+1}) for all next states.
171 |         # Expected values of actions for non_final_next_states are computed based
172 |         # on the "older" target_net; selecting their best reward with max(1)[0].
173 |         # This is merged based on the mask, such that we'll have either the expected
174 |         # state value or 0 in case the state was final.
175 |         next_state_values = torch.zeros(self.BATCH_SIZE, device=self.device)
176 |         with torch.no_grad():
177 |             next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0]
178 |         # Compute the expected Q values
179 |         expected_state_action_values = (next_state_values * self.GAMMA) + reward_batch
180 |     
181 |         # Compute Huber loss
182 |         criterion = nn.SmoothL1Loss()
183 |         loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))
184 |     
185 |         # Optimize the model
186 |         self.optimizer.zero_grad()
187 |         loss.backward()
188 |         # In-place gradient clipping
189 |         torch.nn.utils.clip_grad_value_(self.policy_net.parameters(), 100)
190 |         self.optimizer.step()
191 | 
192 |     def warmup_steps(self):
193 |         """
194 |         Implement the warmup steps to fill the replay memory using random actions
195 |         """
196 |         for _ in range(self.num_warmup_steps):
197 |             state = self.env.reset()
198 |             obs_index = state
199 |             state = torch.tensor(self.env.x[state,:], dtype=torch.float32, device=self.device).unsqueeze(0)
200 |             for _ in range(self.steps_per_episode):
201 |                 action = np.random.randint(0,self.n_actions)
202 |                 observation, reward, _, _ = self.env.step(action)
203 |                 reward = get_total_reward(reward,self.intrinsic_rewards,obs_index)
204 |                 reward = torch.tensor([reward], device=self.device)
205 |                 obs_index = observation
206 |                 observation = torch.tensor(self.env.x[observation,:], dtype=torch.float32, device=self.device).unsqueeze(0)
207 |                 next_state = observation
208 |                 self.memory.push(state, torch.tensor([[action]], device=self.device), next_state, reward)
209 |                 state = next_state
210 | 
211 | 
212 | 
213 |     def fit(self,reset_nets = False):
214 |         """
215 |         Fit the model according to the dataset and hyperparameters. The best model is obtained by using
216 |         the best auc-pr score with the validation set.
217 |         :param reset_nets: whether to reset the networks
218 |         """
219 | 
220 |         # reset necessary variables
221 |         self.reset_counters()
222 |         self.reset_memory()
223 |         if reset_nets:
224 |             self.reset_nets()
225 | 
226 |         # perform warmup steps
227 |         self.warmup_steps()
228 | 
229 |     
230 |         for i_episode in range(self.num_episodes):
231 |             # Initialize the environment and get it's state
232 |             reward_history = []
233 |             state = self.env.reset()
234 |             # mantain both the obervation as the dataset index and value
235 |             state_index = state
236 |             state = torch.tensor(self.env.x[state,:], dtype=torch.float32, device=self.device).unsqueeze(0)
237 | 
238 |             for t in range(self.steps_per_episode):
239 |                 self.num_steps_done += 1
240 | 
241 |                 # select_action encapsulates the epsilon-greedy policy
242 |                 action = self.select_action(state,self.num_steps_done)
243 | 
244 |                 observation, reward, _, _ = self.env.step(action.item())
245 |                 #states.append((self.env.x[observation,:],action.item()))
246 |                 
247 |                 reward = get_total_reward(reward,self.intrinsic_rewards,state_index,write_rew=False)
248 | 
249 |                 reward_history.append(reward)
250 |                 reward = torch.tensor([reward], dtype=torch.float32 ,device=self.device)
251 |                 obs_index = observation
252 |                 observation = torch.tensor(self.env.x[observation,:], dtype=torch.float32, device=self.device).unsqueeze(0)
253 |                 next_state = observation
254 | 
255 |                 # Store the transition in memory
256 |                 self.memory.push(state, action, next_state, reward,state_index,obs_index)
257 | 
258 |                 # Move to the next state
259 |                 state = next_state
260 |                 state_index = obs_index
261 | 
262 |                 # Perform one step of the optimization (on the policy network)
263 |                 self.optimize_model()
264 | 
265 |                 # update the target network
266 |                 if self.num_steps_done % self.target_update == 0:
267 |                     policy_net_state_dict = self.policy_net.state_dict()
268 |                     self.target_net.load_state_dict(policy_net_state_dict)
269 |                 # validation step
270 |                 if self.num_steps_done % self.validation_frequency == 0:
271 |                     auc, pr = test_model(self.test_set,self.policy_net)
272 |                     self.pr_auc_history.append(pr)
273 |                     self.roc_auc_history.append(auc)
274 |                 if self.num_steps_done % self.theta_update == 0:
275 |                     self.intrinsic_rewards = DQN_iforest(self.x_tensor, self.policy_net)
276 |       
277 |             # because the theta^e update is equal to the duration of the episode we can update the theta^e here
278 |             self.episodes_total_reward.append(sum(reward_history))
279 | 
280 |             # print the results at the end of the episode
281 |             avg_reward = np.mean(reward_history)
282 |             print('Episode: {} \t Steps: {} \t Average episode Reward: {}'.format(i_episode, t+1, avg_reward))
283 | 
284 |         print('Complete')
285 |         
286 | 
287 | 
288 | 
289 |     def save_model(self,model_name):
290 |         """
291 |         Save the model
292 |         :param model_name: name of the model
293 |         """
294 |         file_path = os.path.join(self.destination_path,model_name)
295 |         torch.save(self.val_net.state_dict(), file_path)
296 | 
297 |     def show_results(self):
298 |         """
299 |         Show the results of the training
300 |         """
301 |         
302 |         # plot total reward, pr auc and roc auc history in subplots
303 |         fig, axs = plt.subplots(3,1,figsize=(10,10))
304 |         axs[0].plot(self.episodes_total_reward)
305 |         axs[0].set_title('Total reward per episode')
306 |         axs[1].plot(self.pr_auc_history)
307 |         axs[1].set_title('PR AUC per validation step')
308 |         axs[2].plot(self.roc_auc_history)
309 |         axs[2].set_title('ROC AUC per validation step')
310 |         plt.show()
311 | 
312 |         
313 |     def model_performance(self):
314 |         """
315 |         Test the model
316 |         :param on_test_set: whether to test on the test set or the validation set
317 |         """
318 |         return test_model(self.test_set,self.policy_net)
319 | 
320 |     
321 | class ReplayMemory(object):
322 |     """
323 |     Replay Memory implemented as a deque
324 |     """
325 | 
326 |     def __init__(self, capacity):
327 |         self.memory = deque([], maxlen=capacity)
328 | 
329 |     def push(self, *args):
330 |         """Save a transition"""
331 |         self.memory.append(Transition(*args))
332 | 
333 |     def sample(self, batch_size):
334 |         return random.sample(self.memory, batch_size)
335 | 
336 |     def __len__(self):
337 |         return len(self.memory)    
338 | 
339 | 
340 |     
341 | class DQN(nn.Module):
342 |     """
343 |     Deep Q Network
344 |     """
345 | 
346 |     def __init__(self, n_observations,hidden_size, n_actions, device='cpu'):
347 |         super(DQN, self).__init__()
348 |         self.device = device
349 |         self.latent = nn.Sequential(
350 |             nn.Linear(n_observations,hidden_size),
351 |         )
352 |         self.output_layer = nn.Linear(hidden_size,n_actions)
353 | 
354 |     def forward(self, x):
355 |         if not isinstance(x,torch.Tensor):
356 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
357 |         x = F.relu(self.latent(x))
358 |         return self.output_layer(x)
359 |     
360 | 
361 |     def get_latent(self,x):
362 |         """
363 |         Get the latent representation of the input using the latent layer
364 |         """
365 |         self.eval()
366 |         if not isinstance(x,torch.Tensor):
367 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
368 |         
369 |         with torch.no_grad():
370 |             latent_embs = F.relu(self.latent(x))
371 |         self.train()
372 |         return latent_embs
373 |     
374 |     def predict_label(self,x):
375 |         self.eval()
376 |         """
377 |         Predict the label of the input as the argmax of the output layer
378 |         """
379 |         if not isinstance(x,torch.Tensor):
380 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
381 | 
382 |         with torch.no_grad():
383 |             ret = torch.argmax(self.forward(x),axis = 1)
384 |             self.train()
385 |             return ret
386 |         
387 |     def _initialize_weights(self,):
388 |         with torch.no_grad():
389 |             for m in self.modules():
390 |                 if isinstance(m, nn.Linear):
391 |                     nn.init.normal_(m.weight, 0.0, 0.01)
392 |                     nn.init.constant_(m.bias, 0.0)
393 | 
394 |     def forward_latent(self,x):
395 |         if not isinstance(x,torch.Tensor):
396 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
397 |         latent = F.relu(self.latent(x))
398 |         out = self.output_layer(latent)
399 |         return out,latent
400 |     
401 |     def get_latent_grad(self,x):
402 |         if not isinstance(x,torch.Tensor):
403 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
404 |         latent_embs = F.relu(self.latent(x))
405 |         return latent_embs
406 | 
407 |     
408 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Teodoro Sullazzo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # DPLAN 
 2 | # A Pytorch implementation
 3 | ## author: Teodoro Sullazzo
 4 | 
 5 | This repository contains an implementation of an anomaly detection method called DPLAN, which is based on the reinforcement learning framework. The method is described in the paper "Toward Deep Supervised Anomaly Detection: Reinforcement Learning from Partially Labeled Anomaly Data" by Pang et al. You can access the paper [here](https://arxiv.org/pdf/2009.06847.pdf).
 6 | 
 7 | There are some differences between the work presented here and the original work by Pang et al.:
 8 | 
 9 | - I introduced the set $D_n$, which contains known normal data.
10 | - I utilized a different kind of normalization for the Isolation Forest score.
11 | - I changed how the reward is computed for the extrinsic reward.
12 | - This implementation is based on the Pytorch framework.
13 | - There are also some other minor changes.
14 | 
15 | 
16 | The code in env.py is derived from the code provided by the Github user lflfdxfn.
17 | 
18 | 
19 | ## UPDATE - 13/12/2023
20 | Added preprocessing for UNSW-NB15 dataset according to Pang et al's paper. It can be find in the preprocessing folder. 
21 | 
22 | 
23 | ## DRAN
24 | 
25 | The repository also includes a new method based on DPLAN called DRAN that removes the reinforcement learning component by using a regression layer. The method's workflow is described below.
26 | 
27 | ![General architecture](imgs/general_arch.png)
28 | 
29 | ![Main network training](imgs/regr_training.png)
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/dran/DRAN.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from sklearn.metrics import roc_auc_score, average_precision_score
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | from sklearn.ensemble import IsolationForest
  7 | 
  8 | import pandas as pd
  9 | import os
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | np.random.seed(42)
 15 | torch.manual_seed(42)
 16 | 
 17 | 
 18 | # deep sad hyperparameters
 19 | eta = 0.2
 20 | eps = 1e-6
 21 | train_labels = None
 22 | 
 23 | 
 24 | 
 25 | ### Neural Network definition ### 
 26 | 
 27 | 
 28 | class DQN(nn.Module):
 29 |     """
 30 |     Deep Q Network
 31 |     """
 32 |     def __init__(self, n_observations,hidden_size, n_actions, device='cpu'):
 33 |         super(DQN, self).__init__()
 34 |         self.device = device
 35 |         bias = True
 36 |         self.latent = nn.Sequential(
 37 |             nn.Linear(n_observations,hidden_size,bias=bias),
 38 |         )
 39 |         self.output_layer = nn.Linear(hidden_size,n_actions,bias=bias)
 40 | 
 41 |     def forward(self, x):
 42 |         if not isinstance(x,torch.Tensor):
 43 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
 44 |         x = F.relu(self.latent(x))
 45 |         return self.output_layer(x)
 46 |     def get_latent(self,x):
 47 |         """
 48 |         Get the latent representation of the input using the latent layer
 49 |         """
 50 |         self.eval()
 51 |         if not isinstance(x,torch.Tensor):
 52 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
 53 |         
 54 |         with torch.no_grad():
 55 |             latent_embs = F.relu(self.latent(x))
 56 |         self.train()
 57 |         return latent_embs
 58 |     def predict_label(self,x):
 59 |         self.eval()
 60 |         """
 61 |         Predict the label of the input as the argmax of the output layer
 62 |         """
 63 |         if not isinstance(x,torch.Tensor):
 64 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
 65 | 
 66 |         with torch.no_grad():
 67 |             ret = torch.argmax(self.forward(x),axis = 1)
 68 |             self.train()
 69 |             return ret
 70 |         
 71 |     def _initialize_weights(self,):
 72 |         with torch.no_grad():
 73 |             for m in self.modules():
 74 |                 if isinstance(m, nn.Linear):
 75 |                     nn.init.normal_(m.weight, 0.0, 0.01)
 76 |                     nn.init.constant_(m.bias, 0.0)
 77 | 
 78 |     def forward_latent(self,x):
 79 |         if not isinstance(x,torch.Tensor):
 80 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
 81 |         latent = F.relu(self.latent(x))
 82 |         out = self.output_layer(latent)
 83 |         return out,latent
 84 |     
 85 |     def get_latent_grad(self,x):
 86 |         if not isinstance(x,torch.Tensor):
 87 |             x = torch.as_tensor(x, dtype=torch.float32,device=self.device)
 88 |         latent_embs = F.relu(self.latent(x))
 89 |         return latent_embs
 90 |     
 91 | 
 92 | ### Utility functions ###
 93 | 
 94 | def DQN_iforest(x, model):
 95 |     # iforest function on the penuli-layer space of DQN
 96 |     # get the output of penulti-layer
 97 |     latent_x=model.get_latent(x)
 98 |     latent_x=latent_x.cpu().detach().numpy()
 99 |     # calculate anomaly scores in the latent space
100 |     iforest=IsolationForest().fit(latent_x)
101 |     scores = -iforest.decision_function(latent_x)
102 |     # normalize the scores
103 |     norm_scores = (scores - scores.min()) / (scores.max() - scores.min())
104 |     #norm_scores = np.array([-1*s+0.5 for s in scores])
105 |     return norm_scores
106 | 
107 | 
108 | def distance_from_c(x,net,c):
109 |     with torch.no_grad():
110 |         latent_x=net.get_latent(x)
111 |         #dist = torch.abs(**2)
112 |         dist = torch.sum((latent_x - c)**2, dim=1)
113 |         dist = (dist - torch.min(dist)) / (torch.max(dist) - torch.min(dist))
114 |         dist = dist.cpu().detach().numpy()
115 |     return dist
116 | 
117 | 
118 | def loss_sad(x,labels,c,eta,eps):
119 |     labels = labels*(-1)
120 |     dist = torch.sum((x - c) ** 2, dim=1)
121 |     losses = torch.where(labels == 0, dist, eta * ((dist + eps) ** labels.float()))
122 |     loss = torch.mean(losses)
123 |     return loss
124 | 
125 | def moving_average(x, w):
126 |     return np.convolve(x, np.ones(w), 'valid') / w
127 | 
128 | ### DRAN class ###
129 | 
130 | class DRAN:
131 |     def __init__(self,train_set,test_set,config,c,device='cpu'):
132 |         """
133 |         c : the hypersphere's center according to Deep-SAD
134 |         """
135 | 
136 |         self.device = device
137 | 
138 |         self.x = train_set[:,:-1]
139 |         self.x_tensor = torch.tensor(self.x,dtype=torch.float32,device=self.device)
140 |         self.y = train_set[:,-1]
141 |         self.y[self.y==2] = -1
142 |         self.test_set = test_set
143 |         
144 |         self.relabeling_accuracy = []
145 |         self.changed = []
146 |         self.iter = -1
147 | 
148 |         self.model = DQN(self.x.shape[1],20,1).to(self.device)
149 |         self.model._initialize_weights()
150 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config['lr']) #-3 for thyroid
151 |         self.criterion = torch.nn.SmoothL1Loss()
152 | 
153 |         self.batch_size = config['batch_size']
154 |         self.validation_step = config['validation_step']
155 |         self.update_step = config['update_step']
156 | 
157 |         self.validation_history = []
158 |         self.steps_per_epoch = None
159 | 
160 | 
161 |         self.index_a = np.argwhere(self.y==1).reshape(-1)
162 |         self.index_n = np.argwhere(self.y==-1).reshape(-1)
163 |         self.index_u = np.argwhere(self.y==0).reshape(-1)
164 |         
165 |         
166 |         self.c = c
167 | 
168 |         self.sad_lr = config['sad_lr']
169 |         self.scores = None
170 |         
171 | 
172 | 
173 |     def train_sad(self,num_epochs=30,logs=True):
174 |         sad_optimizer = torch.optim.Adam(self.model.latent.parameters(), lr=self.sad_lr, weight_decay=1e-6)
175 |         y = torch.tensor(self.y, dtype=torch.float32, device=self.device)
176 | 
177 |         dataset = torch.utils.data.TensorDataset(self.x_tensor,y)
178 |         data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
179 | 
180 |         for epoch in range(num_epochs):
181 |             total_loss = 0
182 |             for x,y in data_loader:
183 |                 sad_optimizer.zero_grad()
184 |                 latent = self.model.get_latent_grad(x)
185 |                 loss = loss_sad(latent,y,self.c,eta,eps)
186 |                 loss.backward()
187 |                 sad_optimizer.step()
188 |                 total_loss += loss.cpu().detach().item()
189 |             loss_epoch = total_loss/len(data_loader)
190 |             if logs and (epoch+1) % 5 == 0:
191 |                 print(f'epoch: {epoch} loss: {loss_epoch}')
192 |             total_loss = 0
193 |             
194 | 
195 |     def train(self,n_epochs=100,n_epochs_sad=10):
196 |         # optional deep sad pretraining
197 |         self.train_sad(n_epochs_sad)
198 |     
199 |         self.scores = distance_from_c(self.x_tensor,self.model,self.c)
200 | 
201 |         self.validation_history = []
202 |         n_steps = 0
203 |         num_normals = int(self.batch_size*0.4)
204 |         num_anomalies = int(self.batch_size*0.4)
205 |         num_unlabeled = int(self.batch_size*0.2) # or 0.1?
206 | 
207 |         for epoch in range(n_epochs):
208 |             for _ in range(self.x.shape[0]//self.batch_size):
209 |                 idx_normal = np.random.choice(self.index_n,num_normals)
210 |                 idx_anomaly = np.random.choice(self.index_a,num_anomalies)
211 |                 idx_unlabeled = np.random.choice(self.index_u,num_unlabeled) #self.get_lowest_scores(num_unlabeled) 
212 |                 x_normal = self.x[idx_normal]
213 |                 x_anomaly = self.x[idx_anomaly]
214 |                 x_unlabeled = self.x[idx_unlabeled]
215 |                 x_batch = np.concatenate((x_normal,x_anomaly,x_unlabeled),axis=0)
216 |     
217 |                 y = np.concatenate((
218 |                     self.y[idx_normal]+self.scores[idx_normal],
219 |                     self.y[idx_anomaly]+self.scores[idx_anomaly],
220 |                     np.full(num_unlabeled,-1)+self.scores[idx_unlabeled],
221 |                 )).reshape(-1,1)
222 | 
223 | 
224 |                 x_batch = torch.tensor(x_batch,dtype=torch.float32,device=self.device)
225 |                 y = torch.tensor(y,dtype=torch.float32,device=self.device)
226 |                 
227 |                 # forward pass
228 |                 y_pred = self.model(x_batch)
229 |                 # compute loss
230 |                 loss = self.criterion(y_pred,y)
231 |                 # backward pass
232 |                 self.optimizer.zero_grad()
233 |                 loss.backward()
234 |                 self.optimizer.step()
235 |                 n_steps += 1
236 |                 if (n_steps) % self.validation_step == 0:
237 |                     self.validation_history.append(self.test())
238 |             if epoch==0:
239 |                 self.steps_per_epoch = n_steps                
240 |             if (epoch+1) % 1 == 0:
241 |                 self.update_labels()
242 |         self.plot_results()
243 |     def plot_results(self):
244 |         x_axis = np.arange(0,len(self.validation_history))*self.validation_step/self.steps_per_epoch
245 |         plt.plot(x_axis,self.validation_history)
246 |         plt.show()
247 | 
248 | 
249 |     def update_labels(self):   
250 |         self.y[self.changed] = 0
251 |         self.changed = []
252 |         self.iter+=1
253 | 
254 |         self.index_a = np.argwhere(self.y==1).reshape(-1)
255 |         self.index_n = np.argwhere(self.y==-1).reshape(-1)
256 |         self.index_u = np.argwhere(self.y==0).reshape(-1)
257 | 
258 |         scores = self.model(self.x_tensor).cpu().detach().numpy().reshape(-1)
259 |         arg_scores =  np.argsort(scores)
260 |         arg_scores = arg_scores[np.isin(arg_scores,self.index_u)]
261 |         P = 100
262 |         k = int(P+self.iter*P) # 100 for unsbs
263 |         top_k = arg_scores[-k:]
264 |         # add only if the score of the top k is distant enough from 0
265 |         self.relabeling_accuracy = []
266 |         for i in top_k:
267 |             if scores[i] >= 0.8:
268 |                 #if train_labels[i]!=3:
269 |                 #    self.relabeling_accuracy.append(1)
270 |                 #else: self.relabeling_accuracy.append(0)
271 | 
272 |                 self.y[i]= 1
273 |                 self.changed.append(i)
274 | 
275 |         
276 |         #bottom_k = arg_scores[:k]
277 |        
278 |         #self.y[bottom_k] = -1
279 | 
280 |         # update indeces
281 |         self.index_a = np.argwhere(self.y==1).reshape(-1)
282 |         self.index_n = np.argwhere(self.y==-1).reshape(-1)
283 |         self.index_u = np.argwhere(self.y==0).reshape(-1)
284 |         
285 |         
286 |     def test(self):
287 |         dataset = self.test_set
288 |         self.model.eval()
289 |         with torch.no_grad():
290 |             test_X, test_y=dataset[:,:-1], dataset[:,-1]
291 |             pred_y=self.model(test_X).cpu().detach().numpy()
292 | 
293 |         roc = roc_auc_score(test_y, pred_y)
294 |         pr = average_precision_score(test_y, pred_y)
295 | 
296 |         self.model.train()
297 |         return pr,roc
298 |         


--------------------------------------------------------------------------------
/dran/DRAN_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import pandas as pd
 4 | import numpy as np
 5 | from DRAN import DRAN
 6 | 
 7 | 
 8 | BASE_PATH = '../DPLAN/data.nosync/preprocessed/UNSW-NB15/'
 9 | TEST_PATH = os.path.join(BASE_PATH,'test_for_all.csv')
10 | 
11 | # you can play with the config
12 | config = {
13 |     'batch_size': 32,
14 |     'lr': 1e-4, 
15 |     'sad_lr': 1e-3,
16 |     'validation_step' : 100,
17 |     'update_step' : 1,
18 | }
19 | c = None # you need to pre compute the c 
20 | 
21 | 
22 | if not os.path.exists('results_sad_no_score.csv'):
23 |     with open('results_sad_no_score.csv','w') as f:
24 |         f.write('version,dataset,subset,pr_mean,pr_std,roc_mean,roc_std\n')
25 | 
26 | 
27 | data_list = ['Analysis','Backdoors','DoS','Exploits','Fuzzers','Generic','Reconnaissance']
28 | 
29 | dran = None
30 | means=[]
31 | num_runs = 1
32 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
33 | for x in data_list:
34 |     outs_pr = []
35 |     outs_roc = []
36 |     for i in range(num_runs):
37 |         train_path = os.path.join(BASE_PATH,x+'_0.02_60.csv')
38 |         test_path = os.path.join(BASE_PATH,'test_for_all.csv')
39 |         train_labels = pd.read_csv(os.path.join(BASE_PATH,x+'_0.02_60.csv')).values
40 |         X = pd.read_csv(train_path).values
41 |         test = pd.read_csv(test_path).values
42 |         dran = DRAN(
43 |             train_set=X.copy(),
44 |             test_set=test.copy(),
45 |             config=config,
46 |             device=device,
47 |             c = c
48 |         )
49 |         dran.train(n_epochs=5,n_epochs_sad=20) # play with the epochs
50 | 
51 |         out,roc,ms = dran.test_final()
52 |         means.append(ms)
53 |         outs_pr.append(out)
54 |         outs_roc.append(roc)
55 |         print(out,roc)
56 |         print(np.unique(np.array(dran.relabeling_accuracy),return_counts=True))
57 |         print("-----------------------------")
58 |     # print the mean of the results
59 |     print(f'mean pr :{np.mean(outs_pr)}')
60 |     print(f'mean roc: {np.mean(outs_roc)}')
61 |     # print the std of the results
62 |     print(f'std pr :{np.std(outs_pr)}')
63 |     print(f'std roc: {np.std(outs_roc)}')
64 |     # save to results.csv
65 |     with open('results_sad_no_score.csv', 'a') as f:
66 |         f.write(f'regression,UNSW-NB15,{x},{np.mean(outs_pr)},{np.std(outs_pr)},{np.mean(outs_roc)},{np.std(outs_roc)}\n')
67 |     


--------------------------------------------------------------------------------
/env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import time
  3 | import numpy as np
  4 | 
  5 | from gym import spaces
  6 | 
  7 | class ADEnv(gym.Env):
  8 |     """
  9 |     Customized environment for anomaly detection
 10 |     """
 11 |     def __init__(self,dataset: np.ndarray,sampling_Du=1000,prob_au=0.5,label_normal=0,label_anomaly=1, name="default"):
 12 |         """
 13 |         Initialize anomaly environment for DPLAN algorithm.
 14 |         :param dataset: Input dataset in the form of 2-D array. The Last column is the label.
 15 |         :param sampling_Du: Number of sampling on D_u for the generator g_u
 16 |         :param prob_au: Probability of performing g_a.
 17 |         :param label_normal: label of normal instances
 18 |         :param label_anomaly: label of anomaly instances
 19 |         """
 20 |         super().__init__()
 21 |         self.name=name
 22 | 
 23 |         # hyperparameters:
 24 |         self.num_S=sampling_Du
 25 |         self.normal=label_normal
 26 |         self.anomaly=label_anomaly
 27 |         self.prob=prob_au
 28 | 
 29 |         # Dataset infos: D_a and D_u
 30 |         self.m,self.n=dataset.shape
 31 |         self.n_feature=self.n-1
 32 |         self.n_samples=self.m
 33 |         self.x=dataset[:,:self.n_feature]
 34 |         self.y=dataset[:,self.n_feature]
 35 |         self.dataset=dataset
 36 |         self.index_u=np.where(self.y==self.normal)[0]
 37 |         self.index_a=np.where(self.y==self.anomaly)[0]
 38 |         self.index_n=np.where(self.y==2)[0]
 39 | 
 40 |         # observation space:
 41 |         self.observation_space=spaces.Discrete(self.m)
 42 | 
 43 |         # action space: 0 or 1
 44 |         self.action_space=spaces.Discrete(2)
 45 | 
 46 |         # initial state
 47 |         self.counts=None
 48 |         self.state=None
 49 |         self.DQN=None
 50 | 
 51 |     def generater_a(self, *args, **kwargs):
 52 |         # sampling function for D_a
 53 |         index=np.random.choice(self.index_a)
 54 | 
 55 |         return index
 56 |     
 57 |     def generater_n(self, *args, **kwargs):
 58 |         # sampling function for D_n
 59 |         index=np.random.choice(self.index_n)
 60 | 
 61 |         return index
 62 | 
 63 |     def generate_u(self,action,s_t):
 64 |         # sampling function for D_u
 65 |         S=np.random.choice(self.index_u,self.num_S)
 66 |         # calculate distance in the space of last hidden layer of DQN
 67 |         all_x=self.x[np.append(S,s_t)]
 68 | 
 69 |         all_dqn_s = self.DQN.get_latent(all_x)
 70 |         all_dqn_s = all_dqn_s.cpu().detach().numpy()
 71 |         dqn_s=all_dqn_s[:-1]
 72 |         dqn_st=all_dqn_s[-1]
 73 | 
 74 |         dist=np.linalg.norm(dqn_s-dqn_st,axis=1)
 75 | 
 76 |         if action==1:
 77 |             loc=np.argmin(dist)
 78 |         elif action==0:
 79 |             loc=np.argmax(dist)
 80 |         index=S[loc]
 81 | 
 82 |         return index
 83 | 
 84 |     def reward_h(self,action,s_t):
 85 |         # Anomaly-biased External Handcrafted Reward Function h
 86 |         if (action==1) & (s_t in self.index_a):
 87 |             return 1
 88 |         elif (action==0) & (s_t in self.index_n):
 89 |             return 1
 90 |         elif (action==0) & (s_t in self.index_u):
 91 |             return 0
 92 |         elif (action==1) & (s_t in self.index_u):        
 93 |             return -0.5
 94 |         return -1
 95 | 
 96 |     def step(self,action):
 97 |         self.state = int(self.state)
 98 |         # store former state
 99 |         s_t=self.state
100 |         # choose generator
101 |     
102 |         g=np.random.choice([self.generater_a, self.generate_u, self.generater_n],p=[0.4,0.2,0.4])
103 |         s_tp1=g(action,s_t)
104 | 
105 |         # change to the next state
106 |         self.state=s_tp1
107 |         self.state = int(self.state)
108 |         self.counts+=1
109 | 
110 |         # calculate the reward
111 |         reward=self.reward_h(action,s_t)
112 | 
113 |         # done: whether terminal or not
114 |         done=False
115 | 
116 |         # info
117 |         info={"State t":s_t, "Action t": action, "State t+1":s_tp1}
118 | 
119 |         return self.state, reward, done, info
120 | 
121 |     def reset(self):
122 |         # reset the status of environment
123 |         self.counts=0
124 |         # the first observation is uniformly sampled from the D_u
125 |         self.state=np.random.choice(self.index_u)
126 | 
127 |         return self.state


--------------------------------------------------------------------------------
/imgs/general_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/teo-sl/DPLAN_pytorch/4d1b12580963e8862ee81c0424ba2c861269fe2a/imgs/general_arch.png


--------------------------------------------------------------------------------
/imgs/regr_training.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/teo-sl/DPLAN_pytorch/4d1b12580963e8862ee81c0424ba2c861269fe2a/imgs/regr_training.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from util import hyper,write_results
 2 | from Env import ADEnv
 3 | from DPLAN import DPLAN
 4 | import torch
 5 | import os
 6 | import pandas as pd
 7 | 
 8 | 
 9 | BASE_PATH = './data.nosync/preprocessed'
10 | 
11 | subsets = {
12 |     'UNSW-NB15' : ['Fuzzers','Analysis','Backdoors','DoS','Exploits','Generic','Reconnaissance'],
13 | }
14 | datasets = subsets.keys()
15 | TEST_NAME = 'test_for_all.csv'
16 | VALIDATION_NAME = 'validation_for_all.csv'
17 | LABEL_NORMAL = 0
18 | LABEL_ANOMALY = 1
19 | CONTAMINATION_RATE  = hyper['contamination_rate']
20 | NUM_ANOMALY_KNOWS = hyper['num_anomaly_knows']
21 | NUM_RUNS = hyper['runs']
22 | 
23 | MODELS_PATH = 'models/'
24 | RESULTS_PATH = 'results'
25 | 
26 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27 | results_filename = os.path.join(RESULTS_PATH, 'results.csv')
28 | if not os.path.exists(MODELS_PATH):
29 |     os.makedirs(MODELS_PATH)
30 | if not os.path.exists(RESULTS_PATH):
31 |     os.makedirs(RESULTS_PATH)
32 | if not os.path.exists(RESULTS_PATH):
33 |     os.makedirs(RESULTS_PATH)
34 | with open(results_filename, 'w') as f:
35 |         f.write('dataset,subset,pr_mean,pr_std,roc_mean,roc_std\n')
36 | 
37 | 
38 | for dataset in datasets:
39 |     test_path = os.path.join(BASE_PATH, dataset, TEST_NAME)
40 |     test_set = pd.read_csv(test_path).values
41 | 
42 |     for subset in subsets[dataset]:
43 |         data_path = os.path.join(BASE_PATH, dataset, subset)+f'_{CONTAMINATION_RATE}_{NUM_ANOMALY_KNOWS}.csv'
44 |         training_set = pd.read_csv(data_path).values
45 | 
46 |         pr_auc_history = []
47 |         roc_auc_history = []
48 | 
49 |         for i in range(NUM_RUNS):
50 |             print(f'Running {dataset} {subset} {i}...')
51 |             model_id = f'_{CONTAMINATION_RATE}_{NUM_ANOMALY_KNOWS}_run_{i}'
52 |             
53 |             env = ADEnv(
54 |                 dataset=training_set,
55 |                 sampling_Du=hyper['sampling_du'],
56 |                 prob_au=hyper['prob_au'],
57 |                 label_normal=LABEL_NORMAL,
58 |                 label_anomaly=LABEL_ANOMALY
59 |             )
60 | 
61 |             dplan = DPLAN(
62 |                 env=env,
63 |                 validation_set=None,
64 |                 test_set=test_set,
65 |                 destination_path=MODELS_PATH,
66 |                 c = c,
67 |                 double_dqn=False
68 |             )
69 |             dplan.fit(reset_nets = True)
70 |             dplan.show_results()
71 |             roc,pr = dplan.model_performance(on_test_set=True)
72 |             print(f'Finished run {i} with pr: {pr} and auc-roc: {roc}...')
73 |             pr_auc_history.append(pr)
74 |             roc_auc_history.append(roc)
75 | 
76 |             destination_filename =subset+'_'+model_id + '.pth'
77 |             dplan.save_model(destination_filename)
78 |             print()
79 |             print('--------------------------------------------------\n')
80 | 
81 |         print(f'Finished {dataset} {subset}...')
82 |         print('--------------------------------------------------\n')
83 |         write_results(pr_auc_history,roc_auc_history,dataset,subset,results_filename)
84 | 
85 | 


--------------------------------------------------------------------------------
/preprocessing/preproc_unsw.py:
--------------------------------------------------------------------------------
  1 | # IMPORT
  2 | import pandas as pd
  3 | import os
  4 | import glob
  5 | from sklearn.model_selection import train_test_split
  6 | 
  7 | # GLOBAL VARIABLES
  8 | 
  9 | DEST_PATH = 'pat/to/dest'
 10 | DATA_PATH = 'path/to/data'
 11 | CONTAMINATION_RATE = 0.02
 12 | NUM_KNOWS = 60
 13 | D_U_SIZE = 1900
 14 | 
 15 | 
 16 | ANOMALIES = ['Generic', 'Exploits', 'Fuzzers', 'DoS', 'Reconnaissance', 'Analysis', 'Backdoors']
 17 | AT_DICT = {
 18 |                             'Normal' : 0,     
 19 |                             'Generic' : 1,     
 20 |                             'Exploits' : 2,   
 21 |                             'Fuzzers'  : 3,
 22 |                             'DoS'    : 4,
 23 |                             'Reconnaissance'  : 5,
 24 |                             'Analysis'   : 6,
 25 |                             'Backdoors'  : 7,
 26 |                         }
 27 | 
 28 | # PREPROCESSING
 29 | 
 30 | # read the features file
 31 | features_df = pd.read_csv(os.path.join(DATA_PATH,'NUSW-NB15_features.csv'),encoding='cp1252')
 32 | 
 33 | # get features names
 34 | features_name = features_df['Name'].tolist()
 35 | 
 36 | 
 37 | # types management
 38 | type_conversion = {}
 39 | for x in features_df.values:
 40 |     type_conversion[x[1]] = x[2]
 41 | 
 42 | for x in type_conversion.keys():
 43 |     value = type_conversion[x]
 44 |     if value == 'nominal':
 45 |         dst_value = 'str'
 46 |     elif value == 'integer' or value == 'Timestamp':
 47 |         dst_value = 'int64'
 48 |     elif value == 'Float':
 49 |         dst_value = 'float64'
 50 |     elif value == 'Binary' or value == 'binary':
 51 |         dst_value = 'bool'
 52 |     type_conversion[x]=dst_value
 53 | 
 54 | 
 55 | 
 56 | # read the data
 57 | files = glob.glob(DATA_PATH+'/*_[1-4].csv')
 58 | dfs = []
 59 | for f in files:
 60 |     dfs.append(pd.read_csv(f,names=features_name))
 61 | df = pd.concat(dfs, ignore_index=True)
 62 | 
 63 | # drop the useless columns
 64 | columns_to_drop = ['srcip', 'sport', 'dstip', 'dsport']
 65 | df = df.drop(columns_to_drop, axis=1)
 66 | 
 67 | 
 68 | # trim every element in attack_cat column
 69 | df['attack_cat'] = df['attack_cat'].str.strip()
 70 | # rename Backdoor to Backdoors 
 71 | df['attack_cat'] = df['attack_cat'].replace('Backdoor','Backdoors')
 72 | 
 73 | 
 74 | # get the unique values of attack_cat column || optional
 75 | # types_of_attack = df['attack_cat'].unique()
 76 | 
 77 | df.loc[df['Label'] == 0, 'attack_cat'] = 'Normal'
 78 | 
 79 | 
 80 | # delete unused anomalies
 81 | df_dst = df[df['attack_cat'] != 'Worms']
 82 | df_dst= df_dst[df_dst['attack_cat'] != 'Shellcode']
 83 | 
 84 | 
 85 | # this anomalies are overrepresented
 86 | to_be_sampled = ['DoS','Exploits','Fuzzers','Generic','Reconnaissance','Normal']
 87 | df_sampled = []
 88 | for attack in to_be_sampled:
 89 |     if attack == 'Normal':
 90 |         df_sampled.append(df_dst[df_dst['attack_cat'] == attack].sample(n=93_000, random_state=42))
 91 |     else:
 92 |         df_sampled.append(df_dst[df_dst['attack_cat'] == attack].sample(n=3000, random_state=42))
 93 | 
 94 | 
 95 | # get all the rows that are not in to_be_sampled list
 96 | df_not_sampled = df_dst[~df_dst['attack_cat'].isin(to_be_sampled)]
 97 | 
 98 | # concat all the sampled dataframes
 99 | df_sampled = pd.concat(df_sampled, ignore_index=True)
100 | 
101 | # concat the sampled and not sampled dataframes
102 | df_sampled = pd.concat([df_sampled, df_not_sampled], ignore_index=True)
103 | 
104 | 
105 | # convert to numeric values
106 | df_sampled['ct_ftp_cmd'] = pd.to_numeric(df_sampled['ct_ftp_cmd'],errors='coerce')
107 | 
108 | # replace the missing values with the mean of the target column
109 | df_sampled['ct_ftp_cmd'] = df_sampled['ct_ftp_cmd'].fillna(df_sampled['ct_ftp_cmd'].mean())
110 | df_sampled['ct_flw_http_mthd'] = df_sampled['ct_flw_http_mthd'].fillna(df_sampled['ct_flw_http_mthd'].mean())
111 | df_sampled['is_ftp_login'] = df_sampled['is_ftp_login'].fillna(df_sampled['is_ftp_login'].mean())
112 | 
113 | 
114 | # one hot encoding
115 | ohe_columns = ['proto','state','service']
116 | df_ohe = pd.get_dummies(df_sampled, columns=ohe_columns)
117 | 
118 | 
119 | 
120 | # convert the attack_cat column to numeric values using the AT_DICT
121 | df_ohe['attack_cat'] = df_ohe['attack_cat'].map(AT_DICT)
122 | df_ohe['attack_cat'] = df_ohe['attack_cat'].astype(int)
123 | 
124 | # normalize the data between 0 and 1 except the attack_cat column
125 | for col in df_ohe.columns:
126 |     if col != 'attack_cat':
127 |         df_ohe[col] = (df_ohe[col] - df_ohe[col].min()) / (df_ohe[col].max() - df_ohe[col].min())
128 | 
129 | # CREATE THE DATASET
130 | 
131 | 
132 | df = df_ohe
133 | train, test = train_test_split(df, test_size=0.2, random_state=42)
134 | 
135 | # use it for debug purpose
136 | train['Label'].value_counts()
137 | 
138 | 
139 | # TRAINING SET
140 | 
141 | df = train
142 | 
143 | df_normal = df[df['attack_cat'] == AT_DICT['Normal']]
144 | 
145 | # Adjust according to your needs
146 | df_normal.loc[df_normal.sample(n=100, random_state=42).index, 'Label'] = 2
147 | num_for_each_attack =int((df.shape[0]*0.02))//7
148 | 
149 | for attack_type in AT_DICT.keys():
150 | 
151 |     if attack_type == 'Normal':
152 |         continue
153 | 
154 |     name = '{}_{}_{}.csv'.format(attack_type, CONTAMINATION_RATE, NUM_KNOWS)  
155 |     file_name = os.path.join(DEST_PATH, name)
156 |     
157 |     df_attack_sample = df[df['attack_cat'] == AT_DICT[attack_type]].sample(n=NUM_KNOWS, random_state=42)
158 |     # get all rows that are not Normal and not the attack type
159 |     d_u = df[(df['attack_cat'] != AT_DICT['Normal'])]
160 |     d_us = []
161 |     for sub_attack in AT_DICT.keys():
162 |         if sub_attack=='Normal':
163 |             continue
164 |         d_us_i = d_u[d_u['attack_cat'] == AT_DICT[sub_attack]].sample(n=num_for_each_attack, random_state=42)
165 |         d_us.append(d_us_i)
166 |     
167 |     d_u = pd.concat(d_us, ignore_index=True)
168 |     d_u['Label']=0.0
169 |     
170 |     df_attack = pd.concat([df_attack_sample, d_u, df_normal], ignore_index=True)
171 |     print(df_attack['attack_cat'].value_counts())
172 |     # remove the attack_cat column
173 |     df_attack = df_attack.drop(columns=['attack_cat'])
174 |     print(df_attack['Label'].value_counts())
175 |     # move the Label column to the end
176 |     cols = list(df_attack.columns.values)
177 |     cols.pop(cols.index('Label'))
178 |     df_attack = df_attack[cols+['Label']]
179 |     # save the file
180 |     df_attack.to_csv(file_name, index=False)
181 | 
182 | # TEST SET
183 | 
184 | # Adjust according to your needs
185 | num_for_each_attack = int((test.shape[0]*0.03)//7)
186 | df_test = test
187 | 
188 | # take the normal data
189 | df_test_normal = df_test[df_test['Label'] == 0]
190 | # take the anomaly data
191 | df_test_anomaly = df_test[df_test['Label'] == 1]
192 | attack_sampled = []
193 | for anomaly_type in ANOMALIES:
194 |     df_test_anomaly_i = df_test_anomaly[df_test_anomaly['attack_cat'] == AT_DICT[anomaly_type]].sample(n=num_for_each_attack, random_state=42)
195 |     attack_sampled.append(df_test_anomaly_i)
196 |     
197 | df_test_anomaly = pd.concat(attack_sampled, ignore_index=True)
198 | 
199 | df_test_final = pd.concat([df_test_normal, df_test_anomaly],ignore_index=True)
200 | 
201 | print(df_test_final['Label'].value_counts())
202 | 
203 | # save attack_cat column to csv
204 | df_test_final['attack_cat'].to_csv(os.path.join(DEST_PATH, 'y_test_for_all.csv'), index=False)
205 | # drop the attack column
206 | df_test_final = df_test_final.drop(['attack_cat'], axis=1)
207 | # pt Label column at the end
208 | cols = list(df_test_final.columns.values)
209 | cols.pop(cols.index('Label'))
210 | df_attack = df_test_final[cols+['Label']]
211 | 
212 | file_name = os.path.join(DEST_PATH, 'test_for_all.csv')
213 | 
214 | df_attack.to_csv(file_name, index=False)
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | from sklearn.ensemble import IsolationForest
 2 | import torch
 3 | from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix,roc_curve
 4 | from sklearn.metrics import PrecisionRecallDisplay
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | 
 8 | 
 9 | hyper = {
10 |     'base_save_path': 'models/',
11 |     'n_episodes': 6,
12 |     'steps_per_episode' : 2000,
13 |     'max_memory' : 100_000,
14 |     'eps_max' : 1,
15 |     'eps_min' : 0.1,
16 |     'eps_decay' : 10_000,
17 |     'hidden_size' : 20,
18 |     'learning_rate' : 0.25e-4,
19 |     'momentum' : 0.95,
20 |     'min_squared_gradient' : 0.01,
21 |     'warmup_steps' : 100,
22 |     'gamma' : 0.99,
23 |     'batch_size' : 64,
24 |     'target_update' : 5_000,
25 |     'theta_update' : 2_000,
26 |     'num_anomaly_knows' : 60,
27 |     'contamination_rate' : 0.02,
28 |     'runs' : 1,
29 |     'S_size' : 2000,
30 |     'sampling_du' : 1000,
31 |     'prob_au' : 0.4,
32 |     'validation_frequency' : 100,
33 |     'weight_decay' : 1e-3,
34 | }
35 | 
36 | 
37 | def DQN_iforest(x, model):
38 |     # iforest function on the penuli-layer space of DQN
39 | 
40 |     # get the output of penulti-layer
41 |     latent_x=model.get_latent(x)
42 |     latent_x=latent_x.cpu().detach().numpy()
43 |     # calculate anomaly scores in the latent space
44 |     iforest=IsolationForest().fit(latent_x)
45 |     scores = iforest.decision_function(latent_x)
46 |     # normalize the scores
47 |     norm_scores = np.array([-1*s+0.5 for s in scores])
48 |     return norm_scores
49 | 
50 | def get_total_reward(reward_e,intrinsic_rewards,s_t,write_rew=False):
51 |     reward_i = intrinsic_rewards[s_t]
52 |     if write_rew:
53 |         write_reward('./results/rewards.csv',reward_i,reward_e)
54 |     return reward_e + reward_i
55 | 
56 | def plot_roc_pr(test_set,policy_net):
57 |     test_X, test_y=test_set[:,:-1], test_set[:,-1]
58 |     pred_y=policy_net(test_X).detach().numpy()[:,1]
59 |     fpr, tpr, _ = roc_curve(test_y, pred_y)
60 |     plt.plot(fpr, tpr)
61 |     plt.show()
62 | 
63 |     display = PrecisionRecallDisplay.from_predictions(test_y, pred_y, name="DQN")
64 |     _ = display.ax_.set_title("2-class Precision-Recall curve")
65 | 
66 |    
67 | def test_model(test_set,policy_net):
68 |     policy_net.eval()
69 |     test_X, test_y=test_set[:,:-1], test_set[:,-1]
70 |     pred_y=policy_net(test_X).detach().cpu().numpy()[:,1]
71 | 
72 |     roc = roc_auc_score(test_y, pred_y)
73 |     pr = average_precision_score(test_y, pred_y)
74 |     policy_net.train()
75 |     return roc,pr
76 | 
77 | def count_parameters(model):
78 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
79 | 
80 | def write_results(pr_auc_history,roc_auc_history,dataset,subset,path):
81 |     pr_auc_history = np.array(pr_auc_history)
82 |     roc_auc_history = np.array(roc_auc_history)
83 |     pr_mean = np.mean(pr_auc_history)
84 |     auc_mean = np.mean(roc_auc_history)
85 |     pr_std = np.std(pr_auc_history)
86 |     auc_std = np.std(roc_auc_history)
87 |     line = f'{dataset},{subset},{pr_mean},{pr_std},{auc_mean},{auc_std}\n'
88 |     
89 |     with open(path, 'a') as f:
90 |         f.write(line)
91 | 
92 | def write_reward(path,r_i,r_e):
93 |     with open(path, 'a') as f:
94 |         f.write(f'{r_i},{r_e},')
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------