├── requirements.txt ├── result └── RandomForest.model ├── source ├── train.py ├── agent.py ├── action_value.py ├── detector.py ├── env.py ├── utils.py └── start.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | chainerrl 2 | numpy 3 | sklearn 4 | matplotlib -------------------------------------------------------------------------------- /result/RandomForest.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohnsonGeng/malicious_traffic/HEAD/result/RandomForest.model -------------------------------------------------------------------------------- /source/train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : train.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-14 16:23 9 | @Discription : 10 | 11 | 主函数 12 | 13 | ''' 14 | 15 | # import lib 16 | from detector import Detector 17 | from utils import * 18 | 19 | # 由强化学习智能体选择 NSL-KDD 中的第1,2,5,22,24,25,27,35,36号特征 20 | RF_feature = [1, 2, 5, 22, 24, 25, 27, 35] 21 | DT_feature = [2, 4, 5, 24, 27, 30, 31, 33, 35, 36] 22 | 23 | if __name__ == '__main__': 24 | 25 | # 根据特征获取数据 26 | dl = DataLoader(feature=RF_feature) 27 | data, label = dl.load_data('KDDTrain+.csv') 28 | # 对数据做min-max变换 29 | data = data_preprocessing(data) 30 | # 使用随机森里训练模型 31 | detector = Detector('RandomForest') 32 | # 获得模型结果 33 | result = detector.train_and_test(data, label) 34 | print('训练结果:') 35 | print(result) 36 | # 保存模型 37 | detector.save_model() 38 | 39 | # 用KDDTest+数据集评估性能 40 | dl_evaluate = DataLoader(feature=RF_feature) 41 | test_data, test_label = dl.load_data('KDDTest-21.csv') 42 | test_data = data_preprocessing(test_data) 43 | eva_result = detector.test(test_data, test_label) 44 | print('测试结果:') 45 | print(eva_result) 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### 数据集 2 | 3 | 采用 NSL-KDD 数据集,存放于 data/detection 文件夹下,以CSV文件格式组织 4 | 5 | 包含以下四个文件: 6 | 7 | KDDTrain+.csv ———— 训练数据集,一般用这个训练 8 | KDDTrain+_20Percent.csv ———— 20%的训练数据集 9 | KDDTest+.csv ———— 测试数据集,一般用这个测试 10 | KDDTest-21.csv ———— 难度更高的测试数据集 11 | 12 | 13 | #### 文件结构(Source文件夹中) 14 | 15 | train:用于直接训练模型 16 | utils:工具类,执行数据读取、数据预处理与画图操作 17 | start:强化学习训练智能体筛选模型的入口 18 | detector:检测器类,实现了检测器的训练、测试、保存与载入 19 | env:环境,用于智能体获取回报 20 | agent:智能体类,用于构建DQN 21 | action_value:获取奖励值 22 | 23 | 24 | #### 输入输出 25 | 26 | 输入:数据和标签 27 | 28 | 通过 utils.DataLoader(特征列表) 获取 29 | 30 | DataLoader.data 提取特征列表中指定特征后的训练数据 31 | DataLoader.label 标签 32 | 33 | 输出:模型对流量的检测结果字典 34 | 35 | 通过 detector.Detector('学习算法') 获取 36 | 37 | Detector.train_and_test(数据, 标签) 训练并测试 38 | Detector.test(数据, 标签) 评估模型 39 | 40 | 41 | #### 最终经强化学习搜索得到性能最佳的模型 42 | 43 | 随机森林算法 + 9个特征(1, 2, 5, 22, 24, 25, 27, 35) 44 | 45 | 46 | 47 | #### 搜索出的其他机器学习算法性能较好的模型,供参考 48 | 49 | 决策树 + 10个特征(39, 15, 4, 1, 21, 31, 34, 11, 37, 38) 50 | KNN + 10个特征(5, 24, 31, 2, 20, 4, 25, 11, 3, 22) 51 | Bagging + 8个特征(2, 29, 31, 22, 3, 4, 1, 12) 52 | GBDT + 9个特征(4, 3, 8, 1, 2, 26, 9, 11, 33) -------------------------------------------------------------------------------- /source/agent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : agent.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-15 14:39 9 | @Discription : 10 | 11 | 实现智能体的一些操作 12 | 13 | ''' 14 | 15 | # import lib 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | from __future__ import unicode_literals 20 | 21 | from chainer import cuda 22 | from future import standard_library 23 | 24 | standard_library.install_aliases() 25 | 26 | import chainer 27 | 28 | from chainerrl.agents import double_dqn 29 | 30 | 31 | # 继承chainer的DDQN 32 | class MyDoubleDQN(double_dqn.DoubleDQN): 33 | 34 | def act(self, state, action_list): 35 | with chainer.using_config('train', False): 36 | with chainer.no_backprop_mode(): 37 | action_value = self.model( 38 | self.batch_states([state], self.xp, self.phi)) 39 | 40 | # 设置当前状态的state,保证在action_value选取动作的时候考虑一下目前已经选了的state 41 | # 此处不能直接写action_value.load_current_state(state) 42 | # 应该使用self.batch_states,保证在CPU和GPU中都能使用 43 | action_value.load_current_action( 44 | action_list 45 | ) 46 | q = float(action_value.max.data) 47 | action = cuda.to_cpu(action_value.greedy_actions_with_state.data)[0] 48 | 49 | # Update stats 50 | self.average_q *= self.average_q_decay 51 | self.average_q += (1 - self.average_q_decay) * q 52 | 53 | # paper2的返回值 54 | # return action, action_value.q_values.data.astype(np.float) 55 | # chanierrl的返回 56 | return action 57 | 58 | def act_and_train(self, state, reward, action_list): 59 | 60 | with chainer.using_config('train', False): 61 | with chainer.no_backprop_mode(): 62 | action_value = self.model( 63 | self.batch_states([state], self.xp, self.phi)) 64 | 65 | # 设置当前状态的state,保证在action_value选取动作的时候考虑一下目前已经选了的state 66 | # 此处不能直接写action_value.load_current_state(state) 67 | # 应该使用self.batch_states,保证在CPU和GPU中都能使用 68 | action_value.load_current_action( 69 | action_list 70 | ) 71 | q = float(action_value.max.data) 72 | greedy_action = cuda.to_cpu(action_value.greedy_actions_with_state.data)[ 73 | 0] 74 | 75 | # Update stats 76 | self.average_q *= self.average_q_decay 77 | self.average_q += (1 - self.average_q_decay) * q 78 | 79 | 80 | action = self.explorer.select_action( 81 | self.t, lambda: greedy_action, action_value=action_value) 82 | self.t += 1 83 | 84 | 85 | # 更新目标网络 86 | if self.t % self.target_update_interval == 0: 87 | self.sync_target_network() 88 | 89 | if self.last_state is not None: 90 | assert self.last_action is not None 91 | # 向经验回放池中放入数据 92 | self.replay_buffer.append( 93 | state=self.last_state, 94 | action=self.last_action, 95 | reward=reward, 96 | next_state=state, 97 | next_action=action, 98 | is_state_terminal=False) 99 | 100 | self.last_state = state 101 | self.last_action = action 102 | 103 | self.replay_updater.update_if_necessary(self.t) 104 | 105 | # paper2的返回 106 | # return self.last_action, action_value.q_values.data.astype(np.float), greedy_action 107 | # chainerrl的返回 108 | return self.last_action -------------------------------------------------------------------------------- /source/action_value.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : action_value.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-15 15:07 9 | @Discription : 10 | 11 | 12 | ''' 13 | 14 | # import lib 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | from __future__ import unicode_literals 19 | 20 | from builtins import * # NOQA 21 | 22 | from chainerrl.action_value import ActionValue 23 | from future import standard_library 24 | 25 | standard_library.install_aliases() 26 | 27 | from cached_property import cached_property 28 | import chainer 29 | from chainer import cuda 30 | from chainer import functions as F 31 | import numpy as np 32 | 33 | 34 | class DiscreteActionValue(ActionValue): 35 | """Q-function output for discrete action space. 36 | Args: 37 | q_values (ndarray or chainer.Variable): 38 | Array of Q values whose shape is (batchsize, n_actions) 39 | """ 40 | 41 | def __init__(self, q_values, q_values_formatter=lambda x: x): 42 | assert isinstance(q_values, chainer.Variable) 43 | # self.xp = cuda.get_array_module(q_values.data) 44 | self.q_values = q_values 45 | self.n_actions = q_values.data.shape[1] 46 | self.q_values_formatter = q_values_formatter 47 | 48 | @cached_property 49 | def greedy_actions(self): 50 | return chainer.Variable( 51 | self.q_values.data.argmax(axis=1).astype(np.int32)) 52 | 53 | @cached_property 54 | def greedy_actions_with_state(self): 55 | data = self.q_values.data.astype(np.float) 56 | # print("data: ", data, len(data)) 57 | # print("state: ", self.state, len(self.state)) 58 | while True: 59 | action = np.argmax(data, axis=1)[0] 60 | print("action:", action) 61 | # 设置规则降低q_value,防止盯着一个动作选,降低10000即可。(前面降低了1,还是有点问题,变成10000试试) 62 | if action in self.action_list: 63 | 64 | print("Action chosen: {}, Data : {}, Reset Q value!!!!!!!!!!".format(action, data)) 65 | # data[0][action] /= 2 66 | data[0][action] = -10000 67 | else: 68 | break 69 | # if action == len(self.state): 70 | # action = -1 71 | 72 | # print("q is {}, action is {}".format(data, action)) 73 | 74 | return chainer.Variable(np.array([action]).astype(np.int32)) 75 | # return chainer.Variable(np.array([-1]).astype(np.int32)) 76 | # print(self.q_values.data.argmax(axis=1).astype(np.int32)) 77 | # return chainer.Variable( 78 | # self.q_values.data.argmax(axis=1).astype(np.int32)) 79 | 80 | @cached_property 81 | def max(self): 82 | with chainer.force_backprop_mode(): 83 | return F.select_item(self.q_values, self.greedy_actions) 84 | 85 | def sample_epsilon_greedy_actions(self, epsilon): 86 | assert self.q_values.data.shape[0] == 1, \ 87 | "This method doesn't support batch computation" 88 | if np.random.random() < epsilon: 89 | return chainer.Variable( 90 | self.xp.asarray([np.random.randint(0, self.n_actions)], 91 | dtype=np.int32)) 92 | else: 93 | return self.greedy_actions 94 | 95 | def evaluate_actions(self, actions): 96 | return F.select_item(self.q_values, actions) 97 | 98 | def compute_advantage(self, actions): 99 | return self.evaluate_actions(actions) - self.max 100 | 101 | def compute_double_advantage(self, actions, argmax_actions): 102 | return (self.evaluate_actions(actions) - 103 | self.evaluate_actions(argmax_actions)) 104 | 105 | def compute_expectation(self, beta): 106 | return F.sum(F.softmax(beta * self.q_values) * self.q_values, axis=1) 107 | 108 | def load_current_action(self, action_list): 109 | self.action_list = action_list 110 | 111 | def __repr__(self): 112 | return 'DiscreteActionValue greedy_actions:{} q_values:{}'.format( 113 | self.greedy_actions.data, 114 | self.q_values_formatter(self.q_values.data)) 115 | 116 | @property 117 | def params(self): 118 | return (self.q_values,) 119 | 120 | def __getitem__(self, i): 121 | return DiscreteActionValue( 122 | self.q_values[i], q_values_formatter=self.q_values_formatter) -------------------------------------------------------------------------------- /source/detector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : detector.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-14 16:28 9 | @Discription : 10 | 11 | 用于训练强化学习选定算法的检测器 12 | 13 | ''' 14 | 15 | # import lib 16 | from sklearn import metrics 17 | from sklearn.tree import DecisionTreeClassifier 18 | from sklearn.ensemble import RandomForestClassifier 19 | from sklearn.model_selection import train_test_split 20 | from sklearn.naive_bayes import GaussianNB 21 | from sklearn.neighbors import KNeighborsClassifier 22 | from sklearn.neural_network import MLPClassifier 23 | from sklearn.ensemble import AdaBoostClassifier 24 | from sklearn.ensemble import GradientBoostingClassifier 25 | from sklearn.ensemble import BaggingClassifier 26 | from sklearn.svm import SVC 27 | from sklearn.externals import joblib 28 | import time 29 | import os 30 | 31 | 32 | # 模型存放位置 33 | model_path = '../result/' 34 | 35 | 36 | # 算法池,根据算法名取对应的机器学习模型 37 | Algorithm_POOL = { 38 | 'RandomForest': RandomForestClassifier(random_state=0, n_estimators=50), 39 | 'KNN': KNeighborsClassifier(), 40 | 'NB': GaussianNB(), 41 | 'DT': DecisionTreeClassifier(), 42 | 'MLP': MLPClassifier(hidden_layer_sizes=(32,16), solver='adam', alpha=1e-5), 43 | 'Ada': AdaBoostClassifier(n_estimators=100), 44 | 'BAGGING': BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5), 45 | # SVM 维度太大不太好收敛 46 | 'SVM': SVC(kernel='rbf', probability=True, gamma='auto', max_iter=1000), 47 | 'GBDT': GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, 48 | max_depth=1, random_state=0) 49 | } 50 | 51 | 52 | 53 | 54 | # 检测器类 55 | class Detector(): 56 | def __init__(self, algorithm): 57 | 58 | self.algorithm = algorithm 59 | self.detector = Algorithm_POOL[self.algorithm] 60 | 61 | 62 | 63 | # 给入数据和特征,进行检测器训练 64 | def train_and_test(self, data, label): 65 | 66 | # 返回的result字典,里面存有分类的各种评价结果 67 | result = {} 68 | 69 | # 划分训练集、测试集 70 | x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=0) 71 | 72 | # 训练 73 | train_start = time.time() 74 | self.detector.fit(x_train, y_train) 75 | train_end = time.time() 76 | train_time = train_end - train_start 77 | sample_number = len(x_test) 78 | 79 | # 测试 80 | detect_start = time.time() 81 | y_predict = self.detector.predict(x_test) 82 | detect_end = time.time() 83 | detect_time = detect_end - detect_start 84 | 85 | # 获取混淆矩阵,得到各个指标 86 | cm = metrics.confusion_matrix(y_test, y_predict) 87 | TP = cm[0][0] 88 | FP = cm[0][1] 89 | FN = cm[1][0] 90 | TN = cm[1][1] 91 | 92 | # 准确率 93 | accuracy = metrics.accuracy_score(y_test, y_predict) 94 | # 精确率 95 | precision = metrics.precision_score(y_test, y_predict, pos_label='1', average='binary') 96 | # 召回率 97 | recall = metrics.recall_score(y_test, y_predict, pos_label='1', average='binary') 98 | # F1 Score 99 | f1_score = metrics.f1_score(y_test, y_predict, pos_label='1', average='binary') 100 | # 误警率 101 | false_alarm_rate = FP / (FP + TN) 102 | # 漏警率 103 | miss_alarm_rate = FN / (TP + FN) 104 | 105 | result['Accuracy'] = accuracy 106 | result['Precision'] = precision 107 | result['Recall'] = recall 108 | result['F1 Score'] = f1_score 109 | result['False Alarm Rate'] = false_alarm_rate 110 | result['Miss Alarm Rate'] = miss_alarm_rate 111 | result['Train Time'] = train_time 112 | result['Detect Time For Per Sample'] = detect_time/sample_number 113 | 114 | return result 115 | 116 | 117 | 118 | # 测试模型性能 119 | def test(self, data, label): 120 | 121 | # 返回的result字典,里面存有分类的各种评价结果 122 | result = {} 123 | # 存放提取后的特征 124 | 125 | detect_start = time.time() 126 | predict = self.detector.predict(data) 127 | detect_end = time.time() 128 | detect_time = detect_end - detect_start 129 | 130 | # 获取混淆矩阵,得到各个指标 131 | cm = metrics.confusion_matrix(label, predict) 132 | TP = cm[0][0] 133 | FP = cm[0][1] 134 | FN = cm[1][0] 135 | TN = cm[1][1] 136 | 137 | # 准确率 138 | accuracy = metrics.accuracy_score(label, predict) 139 | # 精确率 140 | precision = metrics.precision_score(label, predict, pos_label='1', average='binary') 141 | # 召回率 142 | recall = metrics.recall_score(label, predict, pos_label='1', average='binary') 143 | # F1 Score 144 | f1_score = metrics.f1_score(label, predict, pos_label='1', average='binary') 145 | # 误警率 146 | false_alarm_rate = FP / (FP + TN) 147 | # 漏警率 148 | miss_alarm_rate = FN / (TP + FN) 149 | 150 | 151 | result['Accuracy'] = accuracy 152 | result['Precision'] = precision 153 | result['Recall'] = recall 154 | result['F1 Score'] = f1_score 155 | result['False Alarm Rate'] = false_alarm_rate 156 | result['Miss Alarm Rate'] = miss_alarm_rate 157 | result['Detect Time For Per Sample'] = detect_time / len(label) 158 | 159 | return result 160 | 161 | 162 | # 保存模型 163 | def save_model(self): 164 | # 如果存在同样的模型 165 | if os.path.exists(model_path+self.algorithm+'.model'): 166 | os.remove(model_path+self.algorithm+'.model') 167 | 168 | joblib.dump(self.detector, model_path+self.algorithm+'.model') 169 | 170 | 171 | # 载入模型 172 | def load_model(self): 173 | self.detector = joblib.load(model_path+self.algorithm+'.model') 174 | 175 | -------------------------------------------------------------------------------- /source/env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : env.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-15 14:38 9 | @Discription : 10 | 11 | 12 | ''' 13 | 14 | # import lib 15 | import random 16 | 17 | import numpy as np 18 | 19 | 20 | # action space 中的最后一个动作为终止 21 | 22 | # 自己构建的环境 23 | class MyEnv: 24 | def __init__(self, state_size, action_size, max, detector, dataloader): 25 | self.state_size = state_size 26 | self.action_size = action_size 27 | self.max = max # 最多选取max个特征,超出直接终止 28 | self.detector = detector 29 | self.dataloader = dataloader 30 | self.reward_dict = {} 31 | self.average = self._average_training_data() 32 | self.reset() 33 | 34 | def random_action(self): 35 | while True: 36 | action = random.randint(0, self.action_size - 1) 37 | if action in self.state_index: 38 | continue 39 | else: 40 | break 41 | return action 42 | 43 | 44 | def step(self, action_index): 45 | self.state_index.add(action_index) 46 | if len(self.state_index) == self.max: # 已经到达选择数量上线 47 | self.done = True 48 | 49 | # reward 默认为0 50 | # if current_count>self.max: 51 | # reward = self.max - current_count 52 | # else: 53 | reward, detect_result = self.get_reward() 54 | # reward = random.random()*100 55 | return self.get_one_hot(detect_result), reward, detect_result, self.done 56 | 57 | def reset(self): 58 | self.done = False 59 | self.state_index = set() 60 | # 记录前一轮的准确率、检测率、以及召回率 61 | self.pre_accuracy = 0 62 | self.pre_precision = 0 63 | self.pre_recall = 0 64 | self.current_result = {} 65 | 66 | return self.get_one_hot(self.current_result) 67 | 68 | def get_reward(self): 69 | temp = [str(x) for x in self.state_index] 70 | temp = '.'.join(temp) 71 | if temp in self.reward_dict.keys(): 72 | item = self.reward_dict.get(temp) 73 | self.pre_accuracy = item[1]['Accuracy'] 74 | self.pre_precision = item[1]['Precision'] 75 | self.pre_recall = item[1]['Recall'] 76 | return item[0], item[1] 77 | else: 78 | # 获得分类结果的字典 79 | detect_result = self.detector.train_and_test(self.dataloader.data, self.dataloader.label) 80 | # for element in reward.values(): 81 | # result += 0.2*element 82 | 83 | accuracy = detect_result['Accuracy'] 84 | precision = detect_result['Precision'] 85 | recall = detect_result['Recall'] 86 | # time = classify_result['Test Time For Per Sample'] 87 | 88 | # 方案1:仅考虑Accuracy 89 | # reward = r_a = accuracy 90 | # 方案2:考虑Accuracy、Precision、Recall 91 | # reward = r_a * 0.4 + r_p *0.3 + r_r * 0.3 92 | # 方案3:考虑Accuracy、Precision、Recall、Time 93 | # reward = r_a * 0.4 + r_p * 0.2 + r_r * 0.2 + r_t * 0.2 94 | 95 | 96 | # 准确率 97 | # 增加了一个feature反而减小了 98 | if self.pre_accuracy > accuracy: 99 | r_a = -1 100 | # 准确率增大 101 | else: 102 | # if accuracy < 0.80: 103 | # r_a = 0 104 | # elif accuracy < 0.95: 105 | # r_a = 0.5 106 | # else: 107 | # r_a = 1 108 | r_a = accuracy 109 | 110 | # 检测率 111 | # 增加了一个feature反而减小了 112 | # if self.pre_precision > precision: 113 | # r_p = -2 114 | # # 检测率增大 115 | # else: 116 | # if precision < 0.80: 117 | # r_p = 0 118 | # elif precision < 0.95: 119 | # r_p = 0.5 120 | # else: 121 | # r_p = 1 122 | 123 | # 召回率 124 | # 增加了一个feature反而减小了 125 | # if self.pre_recall > recall: 126 | # r_r = -2 127 | # # 召回率增大 128 | # else: 129 | # if recall < 0.80: 130 | # r_r = 0 131 | # elif recall < 0.95: 132 | # r_r = 0.5 133 | # else: 134 | # r_r = 1 135 | 136 | # 训练时间,如果比平均时间还短,那么奖励值为0(暂时先不使用) 137 | # if time > 5.43e-5: 138 | # r_t = 0 139 | # elif time > 1.00e-5: 140 | # r_t = 0.5 141 | # else: 142 | # r_t = 1 143 | 144 | # 方案1 145 | reward = r_a 146 | 147 | # 方案2 148 | # reward = r_a * 0.4 + r_p * 0.3 + r_r * 0.3 149 | 150 | # 方案3 151 | # reward = r_a * 0.5 + r_p * 0.2 + r_r * 0.2 + r_t * 0.1 152 | 153 | 154 | self.add_dict(reward, detect_result) 155 | self.pre_accuracy = accuracy 156 | self.pre_precision = precision 157 | self.pre_recall = recall 158 | 159 | return reward, detect_result 160 | 161 | # key:选取的哪些特征, 形如[1,3,5..] value:(回报,分类结果) 162 | def add_dict(self, reward, classify_result): 163 | temp = [str(x) for x in self.state_index] 164 | temp = '.'.join(temp) 165 | self.reward_dict[temp] = [reward, classify_result] 166 | 167 | def get_one_hot(self, current_result): 168 | # 1、使用0或1 169 | # state = [1 if i in self.state_index else 0 for i in range(41)] 170 | # 2、使用平均值 171 | one_hot_state = [1 if i in self.state_index else 0 for i in range(self.state_size)] 172 | state = [self.average[i] if one_hot_state[i] > 0 else 0 for i in range(len(one_hot_state))] 173 | 174 | # 3、使用选定特征的平均值+补0 175 | state = [] 176 | for i in self.state_index: 177 | state.append(self.average[i]) 178 | 179 | for i in range(10 - len(self.state_index)): 180 | state.append(0) 181 | 182 | count = len(self.state_index) 183 | accuracy = current_result.get('Accuracy', 0) 184 | precision = current_result.get('Precision', 0) 185 | recall = current_result.get('Recall', 0) 186 | f1_score = current_result.get('F1 Score', 0) 187 | false_alarm_rate = current_result.get('False Alarm Rate', 0) 188 | miss_alarm_rate = current_result.get('Miss Alarm Rate', 0) 189 | time_per_sample = current_result.get('Test Time For Per Sample',0) 190 | 191 | # state.append(count) 192 | # state.append(accuracy/100) 193 | # state.append(precision/100) 194 | # state.append(recall/100) 195 | # state.append(f1_score/100) 196 | # state.append(false_alarm_rate) 197 | # state.append(miss_alarm_rate) 198 | # state.append(time_per_sample) 199 | 200 | return np.array(state) 201 | # return np.array(one_hot_state) 202 | 203 | def _average_training_data(self): 204 | data = self.dataloader.data 205 | average = [0 for _ in range(41)] 206 | for line in data: 207 | for i in range(len(line)): 208 | average[i] += line[i] 209 | 210 | # return [item / len(data) for item in average] -------------------------------------------------------------------------------- /source/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : utils.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-14 16:08 9 | @Discription : 10 | 11 | 工具类,包含数据预处理和画图操作 12 | 13 | ''' 14 | 15 | 16 | # import lib 17 | import csv 18 | import os 19 | from sklearn import preprocessing 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | from pylab import mpl 23 | mpl.rcParams['font.sans-serif'] = ['SimHei'] 24 | 25 | 26 | data_path = '../data/detection/' 27 | result_path = '../result/' 28 | 29 | # 数据格式转换用 30 | protocol_type = ['tcp', 'udp', 'icmp'] 31 | service = ['aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard', 'domain', 'domain_u', 'echo', 32 | 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp', 'ftp_data', 'gopher', 'harvest', 'hostnames', 'http', 33 | 'http_2784', 'http_443', 'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link', 34 | 'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat', 'nnsp', 'nntp', 'ntp_u', 35 | 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer', 'private', 'red_i', 'remote_job', 'rje', 'shell', 36 | 'smtp', 'sql_net', 'ssh', 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i', 37 | 'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'] 38 | flag = ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH'] 39 | 40 | 41 | # 数据读取类,用于读取数据和标签,将其传递给检测器类 42 | class DataLoader(): 43 | def __init__(self, feature=None): 44 | self.data = [] 45 | self.label = [] 46 | self.feature = feature 47 | 48 | def load_data(self, filename): 49 | with open(data_path+filename) as f: 50 | for row in f: 51 | row = row.split(',') 52 | self.data.append(row[:-1]) 53 | # 去掉换行 54 | temp = row[-1].replace('\n', '') 55 | # 获取标签 56 | self.label.append(temp) 57 | 58 | # 存放提取后的特征 59 | x = [] 60 | 61 | if self.feature == None: 62 | x = self.data 63 | else: 64 | # 把对应的特征取出来 65 | feature = set(self.feature) 66 | for data_row in self.data: 67 | new_data_row = [] 68 | for i in feature: 69 | new_data_row.append(data_row[i]) 70 | x.append(new_data_row) 71 | 72 | return self.data, self.label 73 | 74 | 75 | # 读取csv,针对两种不同的csv文件 76 | def load_csv(fiename, original=False): 77 | data = [] 78 | label = [] 79 | if original == True: 80 | index = 2 81 | else: 82 | index = 1 83 | with open(fiename) as csvfile: 84 | for row in csvfile: 85 | row = row.split(',') 86 | # 处理protocol_type 87 | row[1] = str(protocol_type.index(row[1])) 88 | # 处理service 89 | row[2] = str(service.index(row[2])) 90 | # 处理flag 91 | row[3] = str(flag.index(row[3])) 92 | # 将str类型转化为float类型 93 | for i in range(len(row)-index): 94 | row[i] = float(row[i]) 95 | # 处理label 96 | row[-index] = row[-index].replace('\n', '') 97 | if row[-index] == 'normal': 98 | label.append(0) 99 | else: 100 | label.append(1) 101 | data.append(row[:-index]) 102 | 103 | return data, label 104 | 105 | 106 | # 保存csv 107 | def save_csv(path, data, label): 108 | 109 | if os.path.exists(path): 110 | os.remove(path) 111 | 112 | with open(path, 'w', newline='') as csvfile: 113 | writer = csv.writer(csvfile) 114 | for i in range(len(data)): 115 | data[i].append(label[i]) 116 | writer.writerow(data[i]) 117 | 118 | print("Save data successfully to: {}".format(path)) 119 | 120 | 121 | # 读出CSV数据 122 | def load_data(path): 123 | 124 | data = [] 125 | label = [] 126 | 127 | with open(path) as f: 128 | for row in f: 129 | row = row.split(',') 130 | data.append(row[:-1]) 131 | # 去掉换行 132 | temp = row[-1].replace('\n', '') 133 | label.append(temp) 134 | 135 | return data, label 136 | 137 | 138 | # 数据预处理 139 | def data_preprocessing(data): 140 | 141 | # Min-Max数据 142 | # min_max_scaler = preprocessing.MinMaxScaler() 143 | # temp_data = min_max_scaler.fit_transform(data) 144 | # 正则化数据,这个效果回比上面Min-Max好一些 145 | preprocessed_data = preprocessing.normalize(data, norm='l2') 146 | 147 | return preprocessed_data 148 | 149 | 150 | # 绘制折线图 151 | def draw_line(x, y, metric): 152 | plt.xticks(x) 153 | plt.plot(x, y['DT'], color='deepskyblue', marker='o', ls='-', label='DT') 154 | plt.plot(x, y['RF'], color='lavender', marker='o', ls='-', label='RF') 155 | plt.plot(x, y['KNN'], color='wheat', marker='o', ls='-', label='KNN') 156 | plt.plot(x, y['NB'], color='plum', marker='o', ls='-', label='NB') 157 | plt.plot(x, y['MLP'], color='teal', marker='o', ls='-', label='MLP') 158 | plt.plot(x, y['Ada'], color='red', marker='o', ls='-', label='Ada') 159 | plt.plot(x, y['Bagging'], color='grey', marker='o', ls='-', label='Bagging') 160 | plt.plot(x, y['GBDT'], color='orangered', marker='o', ls='-', label='GBDT') 161 | plt.xlabel("Feature used during model training") 162 | plt.ylabel("Accuracy(%)") 163 | plt.legend(loc='best', ncol=2) 164 | plt.savefig(metric+".png") 165 | 166 | 167 | # 绘制柱状图 168 | def draw_bar(data_list1): 169 | bar_label_list = ['ARC', 'CS', 'CP', 'CG', 'MR', 'DU'] 170 | bar_locs = np.arange(6) 171 | bar_width = 0.40 172 | # 柱状图 X 轴标识 173 | xtick_labels = [bar_label_list[i] for i in range(6)] 174 | 175 | # 用 matplotlib 库中的 pyplot 来画出 图形 176 | plt.figure() 177 | # 柱状图 178 | rect1 = plt.bar(bar_locs, data_list1, width=bar_width, color='orange', alpha=0.7, xtick_labels=bar_label_list) 179 | # rect2 = plt.bar(bar_locs + bar_width, data_list2, width=bar_width, color='orange', alpha=0.7, label='攻击后') 180 | # 显示数值 181 | for rect in rect1: 182 | plt.text(rect.get_x(), rect.get_height(), rect.get_height(), ha='right', va='bottom', fontsize=12) 183 | 184 | plt.ylabel = '次数' 185 | plt.ylim(0, 2000) 186 | plt.title('智能体所作出的修改动作统计') 187 | plt.tight_layout() 188 | plt.savefig('statistics_bar.png',dpi=1000) 189 | plt.show() 190 | 191 | 192 | 193 | def my_drawline(x, y, metric): 194 | plt.plot(x, y, c='deepskyblue') 195 | # plt.tick_params(axis='both', which='major', labelsize=14) 196 | plt.xlabel('Epoch', fontsize=10) 197 | plt.ylabel('Reward', fontsize=10) 198 | plt.xlim(0, 1000) 199 | plt.ylim(-1, 2) 200 | plt.savefig(metric+".png") 201 | plt.show() 202 | 203 | 204 | 205 | if __name__ == '__main__': 206 | # 折线图绘制 207 | # x = [6, 7, 8, 9, 10] 208 | # y = {'DT':[87.43, 87.65, 87.93, 88.11, 87.98], 209 | # 'RF':[86.93, 86.80, 87.19, 87.76, 88.06], 210 | # 'KNN':[85.24, 84.89, 85.26, 86.02, 86.13], 211 | # 'NB':[72.30, 73.28, 73.84, 73.94, 74.21], 212 | # 'MLP':[79.20, 79.13, 79.94, 80.13, 80.67], 213 | # 'Ada':[83.11, 83.78, 84.31, 84.52, 84.89], 214 | # 'Bagging':[81.20, 81.33, 81.84, 82.40, 83.06], 215 | # 'GBDT':[84.56, 84.77, 85.03, 85.43, 85.50]} 216 | # draw_line(x, y, 'test') 217 | pass -------------------------------------------------------------------------------- /source/start.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | 5 | @File : start.py 6 | @Author : 你牙上有辣子 7 | @Contact : johnsogunn23@gmail.com 8 | @Create Time : 2021-10-15 15:11 9 | @Discription : 10 | 11 | 强化学习智能体训练主函数 12 | 13 | ''' 14 | 15 | # import lib 16 | import argparse 17 | import time 18 | import chainer 19 | import chainer.functions as F 20 | import chainer.links as L 21 | import numpy as np 22 | from chainer import optimizers 23 | from chainerrl import explorers 24 | import action_value as ActionValue 25 | from agent import MyDoubleDQN 26 | from chainerrl.replay_buffers import prioritized 27 | import utils 28 | from detector import Detector 29 | import env as Env 30 | 31 | 32 | # 命令行参数 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--maxf', type=int, default=10) 35 | parser.add_argument('--gpu', type=int, default=-1) 36 | parser.add_argument('--cls', type=str, default='RandomForest') 37 | args = parser.parse_args() 38 | 39 | 40 | # 超参数 41 | minibatch_size = 8 42 | replay_start_size = 20 43 | update_interval = 5 44 | state_size = 10 # 可观察的状态数 45 | action_size = 41 # 可选的特征数 46 | feature_max_count = args.maxf # 选取的特征数目大于该值时,reward为0,用于当特征数目在该范围内时,成功率最多可以到达多少 47 | MAX_EPISODE = 1000 # 48 | net_layers = [32, 64] # 一个包含两个隐藏层的神经网络 49 | 50 | result_file = './result/result-{}-{}-{}.txt'.format(args.cls, time.strftime('%Y%m%d%H%M'),args.maxf) 51 | 52 | # 每一轮逻辑如下 53 | # 1. 初始化环境,定义S和A两个list,用来保存过程中的state和action。进入循环,直到当前这一轮完成(done == True) 54 | # 2. 在每一步里,首先选择一个action,此处先用简单的act()代替 55 | # 3. 接着env接收这个action,返回新的state,done和reward,当done==False时,reward=0,当done==True时,reward为模型的准确率 56 | # 4. 如果done==True,那么应该把当前的S、A和reward送到replay buffer里(replay也应该在此时进行),往replay buffer里添加时, 57 | # 每一对state和action都有一个reward,这个reward应该和env返回的reward(也就是该模型的acc)和count有关。 58 | 59 | # 用这个逻辑替代原来的my_train的逻辑,只需要把agent加入即可,agent应该是不需要修改的 60 | 61 | 62 | 63 | def main(): 64 | 65 | # 保存训练中每一轮的回报 66 | train_reward = [] 67 | # 保存评估中每一轮的回报 68 | evaluate_reward = [] 69 | # 保存训练中每一轮的分类准确率 70 | train_accuracy = [] 71 | # 保存评估中每一轮的分类准确率 72 | evaluate_accuracy = [] 73 | # 用来保存效果最优的特征集合 74 | feature_list_train = [] 75 | feature_list_evaluate = [] 76 | 77 | train_precision = [] 78 | train_recall = [] 79 | 80 | 81 | 82 | class QFunction(chainer.Chain): 83 | def __init__(self, obs_size, n_actions, n_hidden_channels=None): 84 | super(QFunction, self).__init__() 85 | if n_hidden_channels is None: 86 | n_hidden_channels = net_layers 87 | net = [] 88 | inpdim = obs_size 89 | for i, n_hid in enumerate(n_hidden_channels): 90 | net += [('l{}'.format(i), L.Linear(inpdim, n_hid))] 91 | net += [('norm{}'.format(i), L.BatchNormalization(n_hid))] 92 | net += [('_act{}'.format(i), F.relu)] 93 | net += [('_dropout{}'.format(i), F.dropout)] 94 | inpdim = n_hid 95 | 96 | net += [('output', L.Linear(inpdim, n_actions))] 97 | 98 | with self.init_scope(): 99 | for n in net: 100 | if not n[0].startswith('_'): 101 | setattr(self, n[0], n[1]) 102 | 103 | self.forward = net 104 | 105 | def __call__(self, x, test=False): 106 | """ 107 | Args: 108 | x (ndarray or chainer.Variable): An observation 109 | test (bool): a flag indicating whether it is in test mode 110 | """ 111 | for n, f in self.forward: 112 | if not n.startswith('_'): 113 | x = getattr(self, n)(x) 114 | elif n.startswith('_dropout'): 115 | x = f(x, 0.1) 116 | else: 117 | x = f(x) 118 | 119 | return ActionValue.DiscreteActionValue(x) 120 | 121 | def evaluate(env, agent, current): 122 | for i in range(1): 123 | print("evaluate episode: {}".format(current)) 124 | state = env.reset() 125 | terminal = False 126 | action_list = [] 127 | 128 | while not terminal: 129 | action = agent.act(state, action_list) 130 | if action not in action_list: 131 | action_list.append(action) 132 | state, reward, classify_result, terminal = env.step(action) 133 | 134 | if terminal or len(action_list) > 10: 135 | if len(action_list) > 10: 136 | terminal = True 137 | 138 | with open(result_file, 'a+') as f: 139 | f.write( 140 | "--------------------------------------------------------------------------------------------------\n" 141 | "Evaluate episode:{}, Reward = {}, Accuracy = {}, FAR = {}, MAR = {}, Action = {}\n" 142 | "-------------------------------------------------------------------------------------------------\n" 143 | .format(current, reward, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list) 144 | ) 145 | print( 146 | "--------------------------------------------------------------------------------------------------\n" 147 | "Evaluate episode:{}, Reward = {}, Accuracy = {}, FAR = {}, MAR = {}, Action = {}\n" 148 | "-------------------------------------------------------------------------------------------------\n" 149 | .format(current, reward, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list) 150 | ) 151 | # 加入本轮次评估的回报,后面可能需要 152 | evaluate_reward.append(reward) 153 | # 每次评估添加本次评估准确率 154 | evaluate_accuracy.append(classify_result['Accuracy']) 155 | # 同时添加对应特征 156 | feature_list_evaluate.append(action_list) 157 | 158 | 159 | 160 | def train_agent(env, agent): 161 | for episode in range(MAX_EPISODE): 162 | state = env.reset() 163 | terminal = False 164 | reward = 0 165 | t = 0 166 | action_list = [] 167 | while not terminal: 168 | t += 1 169 | action = agent.act_and_train( 170 | state, reward, action_list) # 此处action是否合法(即不能重复选取同一个指标)由agent判断。env默认得到的action合法。 171 | if action not in action_list: 172 | action_list.append(action) 173 | state, reward, classify_result, terminal = env.step(action) 174 | print("Episode:{}, t:{}, Action:{}, Accuracy = {}, FAR = {}, MAR = {}, Reward = {}".format(episode, t, action_list, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], reward)) 175 | 176 | if terminal: 177 | with open(result_file, 'a+') as f: 178 | f.write("Train episode:{}, Reward = {}, Accuracy = {}, FAR = {}, MAR = {}, Action = {}\n" 179 | .format(episode, reward , classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list)) 180 | print("Train episode:{}, Reward = {}, Accuracy = {} ,FAR = {}, MAR = {}, Action = {}\n" 181 | .format(episode, reward, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list)) 182 | 183 | agent.stop_episode() 184 | # 加入轮次训练的回报,后面可能需要 185 | train_reward.append(reward) 186 | # 加入本轮次训练的准确率 187 | train_accuracy.append(classify_result['Accuracy']) 188 | train_precision.append(classify_result['Precision']) 189 | train_recall.append(classify_result['Recall']) 190 | # 本轮次对应所选的特征 191 | feature_list_train.append(action_list) 192 | if (episode + 1) % 10 == 0 and episode != 0: 193 | evaluate(env, agent, (episode + 1) / 10) 194 | 195 | def create_agent(env): 196 | state_size = env.state_size 197 | action_size = env.action_size 198 | q_func = QFunction(state_size, action_size) 199 | 200 | start_epsilon = 1 201 | end_epsilon = 0.3 202 | decay_steps = 10 203 | explorer = explorers.LinearDecayEpsilonGreedy( 204 | start_epsilon, end_epsilon, decay_steps, 205 | env.random_action) 206 | 207 | opt = optimizers.Adam() 208 | opt.setup(q_func) 209 | 210 | 211 | 212 | # 优先经验回放 213 | rbuf = prioritized.PrioritizedReplayBuffer() 214 | 215 | phi = lambda x: x.astype(np.float32, copy=False) 216 | 217 | 218 | # 自己的DDQN 219 | agent = MyDoubleDQN(q_func, 220 | opt, 221 | rbuf, 222 | gamma=0.99, 223 | explorer=explorer, 224 | replay_start_size=replay_start_size, 225 | target_update_interval=10, # target q网络多久和q网络同步 226 | update_interval=update_interval, 227 | phi=phi, 228 | minibatch_size=minibatch_size, 229 | gpu=args.gpu, # 设置是否使用gpu 230 | episodic_update_len=16) 231 | 232 | return agent 233 | 234 | 235 | def train(): 236 | env = Env.MyEnv(state_size, action_size, 237 | feature_max_count, 238 | Detector('RandomForest'), 239 | utils.DataLoader()) 240 | agent = create_agent(env) 241 | train_agent(env, agent) 242 | 243 | # evaluate(env, agent) 244 | 245 | return env, agent 246 | 247 | train() 248 | 249 | # 用于计算本次训练中最大的准确率,及所选的特征以及训练过程中的平均准确率 250 | max_train_accuracy = max(train_accuracy) 251 | max_evaluate_accuracy = max(evaluate_accuracy) 252 | max_train_reward = max(train_reward) 253 | max_evaluate_reward = max(evaluate_reward) 254 | # 取索引 255 | max_train_accuracy_index = train_accuracy.index(max_train_accuracy) 256 | max_evaluate_accuracy_index = evaluate_accuracy.index(max_evaluate_accuracy) 257 | max_train_reward_index = train_reward.index(max_train_reward) 258 | max_evaluate_reward_index = evaluate_reward.index(max_evaluate_reward) 259 | # 找特征 260 | best_train_accuracy_feature = feature_list_train[max_train_accuracy_index] 261 | best_evaluate_accuracy_feature = feature_list_evaluate[max_evaluate_accuracy_index] 262 | best_train_reward_feature = feature_list_train[max_train_reward_index] 263 | best_evaluate_reward_feature = feature_list_train[max_evaluate_reward_index] 264 | 265 | # 统计训练过程中reward的平均值以及变化趋势 266 | average_reward = 0 267 | for i in range(len(train_reward) - 1): 268 | average_reward = average_reward + train_reward[i] 269 | average_reward = average_reward / len(train_reward) 270 | 271 | # 写入文件训练过程统计结果 272 | with open(result_file, 'a+') as f: 273 | f.write("Train reward:{}\n".format(train_reward)) 274 | # 打印accuracy,用于绘图 275 | f.write("Train precision:{}\n".format(train_precision)) 276 | # 打印recall,用于绘图 277 | f.write("Train recall:{}\n".format(train_recall)) 278 | f.write("The max accuracy of the train:{}, the feature selected are:{}.\n".format(max_train_accuracy, 279 | best_train_accuracy_feature)) 280 | f.write("The max accuracy of the evaluate:{}, the feature selected are:{}.\n".format(max_evaluate_accuracy, 281 | best_evaluate_accuracy_feature)) 282 | f.write("The max reward of the train:{}, the feature selected are:{}\n".format(max_train_reward, 283 | best_train_reward_feature)) 284 | f.write("The max reward of the evaluate:{}, the feature selected are:{}\n".format(max_evaluate_reward, 285 | best_evaluate_reward_feature)) 286 | f.write("The average reward of this train:{}.\n".format(average_reward)) 287 | 288 | 289 | 290 | if __name__ == '__main__': 291 | 292 | start_time = time.time() 293 | main() 294 | elapsed = time.time() - start_time 295 | print("Time: {}".format(elapsed)) 296 | # 训练时间 297 | with open(result_file, 'a+') as f: 298 | 299 | f.write("Training time:{} seconds".format(elapsed)) 300 | --------------------------------------------------------------------------------