├── requirements.txt
├── result
    └── RandomForest.model
├── source
    ├── train.py
    ├── agent.py
    ├── action_value.py
    ├── detector.py
    ├── env.py
    ├── utils.py
    └── start.py
└── README.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | chainerrl
2 | numpy
3 | sklearn
4 | matplotlib


--------------------------------------------------------------------------------
/result/RandomForest.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JohnsonGeng/malicious_traffic/HEAD/result/RandomForest.model


--------------------------------------------------------------------------------
/source/train.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | 
 5 | @File    :   train.py
 6 | @Author  :   你牙上有辣子
 7 | @Contact :   johnsogunn23@gmail.com
 8 | @Create Time : 2021-10-14 16:23     
 9 | @Discription :
10 | 
11 | 主函数
12 | 
13 | '''
14 | 
15 | # import lib
16 | from detector import Detector
17 | from utils import *
18 | 
19 | # 由强化学习智能体选择 NSL-KDD 中的第1，2，5，22，24，25，27，35，36号特征
20 | RF_feature = [1, 2, 5, 22, 24, 25, 27, 35]
21 | DT_feature = [2, 4, 5, 24, 27, 30, 31, 33, 35, 36]
22 | 
23 | if __name__ == '__main__':
24 | 
25 |     # 根据特征获取数据
26 |     dl = DataLoader(feature=RF_feature)
27 |     data, label = dl.load_data('KDDTrain+.csv')
28 |     # 对数据做min-max变换
29 |     data = data_preprocessing(data)
30 |     # 使用随机森里训练模型
31 |     detector = Detector('RandomForest')
32 |     # 获得模型结果
33 |     result = detector.train_and_test(data, label)
34 |     print('训练结果：')
35 |     print(result)
36 |     # 保存模型
37 |     detector.save_model()
38 | 
39 |     # 用KDDTest+数据集评估性能
40 |     dl_evaluate = DataLoader(feature=RF_feature)
41 |     test_data, test_label = dl.load_data('KDDTest-21.csv')
42 |     test_data = data_preprocessing(test_data)
43 |     eva_result = detector.test(test_data, test_label)
44 |     print('测试结果：')
45 |     print(eva_result)
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #### 数据集
 2 | 
 3 |     采用 NSL-KDD 数据集，存放于 data/detection 文件夹下，以CSV文件格式组织
 4 |     
 5 | 包含以下四个文件：
 6 | 
 7 |     KDDTrain+.csv ———— 训练数据集，一般用这个训练
 8 |     KDDTrain+_20Percent.csv ———— 20%的训练数据集
 9 |     KDDTest+.csv ———— 测试数据集，一般用这个测试
10 |     KDDTest-21.csv ———— 难度更高的测试数据集
11 | 
12 | 
13 | #### 文件结构（Source文件夹中）
14 | 
15 |     train：用于直接训练模型
16 |     utils：工具类，执行数据读取、数据预处理与画图操作
17 |     start：强化学习训练智能体筛选模型的入口
18 |     detector：检测器类，实现了检测器的训练、测试、保存与载入
19 |     env：环境，用于智能体获取回报
20 |     agent：智能体类，用于构建DQN
21 |     action_value：获取奖励值
22 |     
23 |     
24 | #### 输入输出
25 | 
26 |     输入：数据和标签
27 |         
28 |         通过 utils.DataLoader(特征列表) 获取
29 |         
30 |         DataLoader.data 提取特征列表中指定特征后的训练数据
31 |         DataLoader.label 标签
32 |     
33 |     输出：模型对流量的检测结果字典
34 |         
35 |         通过 detector.Detector('学习算法') 获取
36 |         
37 |         Detector.train_and_test(数据, 标签) 训练并测试
38 |         Detector.test(数据, 标签) 评估模型
39 | 
40 |     
41 | #### 最终经强化学习搜索得到性能最佳的模型
42 | 
43 |     随机森林算法 + 9个特征（1, 2, 5, 22, 24, 25, 27, 35）
44 | 
45 | 
46 | 
47 | #### 搜索出的其他机器学习算法性能较好的模型，供参考
48 | 
49 |     决策树 + 10个特征（39, 15, 4, 1, 21, 31, 34, 11, 37, 38）
50 |     KNN + 10个特征（5, 24, 31, 2, 20, 4, 25, 11, 3, 22）
51 |     Bagging +  8个特征（2, 29, 31, 22, 3, 4, 1, 12） 
52 |     GBDT + 9个特征（4, 3, 8, 1, 2, 26, 9, 11, 33）


--------------------------------------------------------------------------------
/source/agent.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | 
  5 | @File    :   agent.py    
  6 | @Author  :   你牙上有辣子
  7 | @Contact :   johnsogunn23@gmail.com
  8 | @Create Time : 2021-10-15 14:39     
  9 | @Discription :
 10 | 
 11 | 实现智能体的一些操作
 12 | 
 13 | '''
 14 | 
 15 | # import lib
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | from __future__ import unicode_literals
 20 | 
 21 | from chainer import cuda
 22 | from future import standard_library
 23 | 
 24 | standard_library.install_aliases()
 25 | 
 26 | import chainer
 27 | 
 28 | from chainerrl.agents import double_dqn
 29 | 
 30 | 
 31 | # 继承chainer的DDQN
 32 | class MyDoubleDQN(double_dqn.DoubleDQN):
 33 | 
 34 |     def act(self, state, action_list):
 35 |         with chainer.using_config('train', False):
 36 |             with chainer.no_backprop_mode():
 37 |                 action_value = self.model(
 38 |                     self.batch_states([state], self.xp, self.phi))
 39 | 
 40 |                 # 设置当前状态的state，保证在action_value选取动作的时候考虑一下目前已经选了的state
 41 |                 # 此处不能直接写action_value.load_current_state(state)
 42 |                 # 应该使用self.batch_states，保证在CPU和GPU中都能使用
 43 |                 action_value.load_current_action(
 44 |                     action_list
 45 |                 )
 46 |                 q = float(action_value.max.data)
 47 |                 action = cuda.to_cpu(action_value.greedy_actions_with_state.data)[0]
 48 | 
 49 |         # Update stats
 50 |         self.average_q *= self.average_q_decay
 51 |         self.average_q += (1 - self.average_q_decay) * q
 52 | 
 53 |         # paper2的返回值
 54 |         # return action, action_value.q_values.data.astype(np.float)
 55 |         # chanierrl的返回
 56 |         return action
 57 | 
 58 |     def act_and_train(self, state, reward, action_list):
 59 | 
 60 |         with chainer.using_config('train', False):
 61 |             with chainer.no_backprop_mode():
 62 |                 action_value = self.model(
 63 |                     self.batch_states([state], self.xp, self.phi))
 64 | 
 65 |                 # 设置当前状态的state，保证在action_value选取动作的时候考虑一下目前已经选了的state
 66 |                 # 此处不能直接写action_value.load_current_state(state)
 67 |                 # 应该使用self.batch_states，保证在CPU和GPU中都能使用
 68 |                 action_value.load_current_action(
 69 |                     action_list
 70 |                 )
 71 |                 q = float(action_value.max.data)
 72 |                 greedy_action = cuda.to_cpu(action_value.greedy_actions_with_state.data)[
 73 |                     0]
 74 | 
 75 |         # Update stats
 76 |         self.average_q *= self.average_q_decay
 77 |         self.average_q += (1 - self.average_q_decay) * q
 78 | 
 79 | 
 80 |         action = self.explorer.select_action(
 81 |             self.t, lambda: greedy_action, action_value=action_value)
 82 |         self.t += 1
 83 | 
 84 | 
 85 |         # 更新目标网络
 86 |         if self.t % self.target_update_interval == 0:
 87 |             self.sync_target_network()
 88 | 
 89 |         if self.last_state is not None:
 90 |             assert self.last_action is not None
 91 |             # 向经验回放池中放入数据
 92 |             self.replay_buffer.append(
 93 |                 state=self.last_state,
 94 |                 action=self.last_action,
 95 |                 reward=reward,
 96 |                 next_state=state,
 97 |                 next_action=action,
 98 |                 is_state_terminal=False)
 99 | 
100 |         self.last_state = state
101 |         self.last_action = action
102 | 
103 |         self.replay_updater.update_if_necessary(self.t)
104 | 
105 |         # paper2的返回
106 |         # return self.last_action, action_value.q_values.data.astype(np.float), greedy_action
107 |         # chainerrl的返回
108 |         return self.last_action


--------------------------------------------------------------------------------
/source/action_value.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | 
  5 | @File    :   action_value.py
  6 | @Author  :   你牙上有辣子
  7 | @Contact :   johnsogunn23@gmail.com
  8 | @Create Time : 2021-10-15 15:07     
  9 | @Discription :
 10 | 
 11 | 
 12 | '''
 13 | 
 14 | # import lib
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | from __future__ import unicode_literals
 19 | 
 20 | from builtins import *  # NOQA
 21 | 
 22 | from chainerrl.action_value import ActionValue
 23 | from future import standard_library
 24 | 
 25 | standard_library.install_aliases()
 26 | 
 27 | from cached_property import cached_property
 28 | import chainer
 29 | from chainer import cuda
 30 | from chainer import functions as F
 31 | import numpy as np
 32 | 
 33 | 
 34 | class DiscreteActionValue(ActionValue):
 35 |     """Q-function output for discrete action space.
 36 |     Args:
 37 |         q_values (ndarray or chainer.Variable):
 38 |             Array of Q values whose shape is (batchsize, n_actions)
 39 |     """
 40 | 
 41 |     def __init__(self, q_values, q_values_formatter=lambda x: x):
 42 |         assert isinstance(q_values, chainer.Variable)
 43 |         # self.xp = cuda.get_array_module(q_values.data)
 44 |         self.q_values = q_values
 45 |         self.n_actions = q_values.data.shape[1]
 46 |         self.q_values_formatter = q_values_formatter
 47 | 
 48 |     @cached_property
 49 |     def greedy_actions(self):
 50 |         return chainer.Variable(
 51 |             self.q_values.data.argmax(axis=1).astype(np.int32))
 52 | 
 53 |     @cached_property
 54 |     def greedy_actions_with_state(self):
 55 |         data = self.q_values.data.astype(np.float)
 56 |         # print("data: ", data, len(data))
 57 |         # print("state: ", self.state, len(self.state))
 58 |         while True:
 59 |             action = np.argmax(data, axis=1)[0]
 60 |             print("action:", action)
 61 |             # 设置规则降低q_value，防止盯着一个动作选，降低10000即可。(前面降低了1，还是有点问题，变成10000试试)
 62 |             if action in self.action_list:
 63 | 
 64 |                 print("Action chosen: {}, Data : {}, Reset Q value!!!!!!!!!!".format(action, data))
 65 |                 # data[0][action] /= 2
 66 |                 data[0][action] = -10000
 67 |             else:
 68 |                 break
 69 |         # if action == len(self.state):
 70 |         #     action = -1
 71 | 
 72 |         # print("q is {}, action is {}".format(data, action))
 73 | 
 74 |         return chainer.Variable(np.array([action]).astype(np.int32))
 75 |         # return chainer.Variable(np.array([-1]).astype(np.int32))
 76 |         # print(self.q_values.data.argmax(axis=1).astype(np.int32))
 77 |         # return chainer.Variable(
 78 |         #     self.q_values.data.argmax(axis=1).astype(np.int32))
 79 | 
 80 |     @cached_property
 81 |     def max(self):
 82 |         with chainer.force_backprop_mode():
 83 |             return F.select_item(self.q_values, self.greedy_actions)
 84 | 
 85 |     def sample_epsilon_greedy_actions(self, epsilon):
 86 |         assert self.q_values.data.shape[0] == 1, \
 87 |             "This method doesn't support batch computation"
 88 |         if np.random.random() < epsilon:
 89 |             return chainer.Variable(
 90 |                 self.xp.asarray([np.random.randint(0, self.n_actions)],
 91 |                                 dtype=np.int32))
 92 |         else:
 93 |             return self.greedy_actions
 94 | 
 95 |     def evaluate_actions(self, actions):
 96 |         return F.select_item(self.q_values, actions)
 97 | 
 98 |     def compute_advantage(self, actions):
 99 |         return self.evaluate_actions(actions) - self.max
100 | 
101 |     def compute_double_advantage(self, actions, argmax_actions):
102 |         return (self.evaluate_actions(actions) -
103 |                 self.evaluate_actions(argmax_actions))
104 | 
105 |     def compute_expectation(self, beta):
106 |         return F.sum(F.softmax(beta * self.q_values) * self.q_values, axis=1)
107 | 
108 |     def load_current_action(self, action_list):
109 |         self.action_list = action_list
110 | 
111 |     def __repr__(self):
112 |         return 'DiscreteActionValue greedy_actions:{} q_values:{}'.format(
113 |             self.greedy_actions.data,
114 |             self.q_values_formatter(self.q_values.data))
115 | 
116 |     @property
117 |     def params(self):
118 |         return (self.q_values,)
119 | 
120 |     def __getitem__(self, i):
121 |         return DiscreteActionValue(
122 |             self.q_values[i], q_values_formatter=self.q_values_formatter)


--------------------------------------------------------------------------------
/source/detector.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | 
  5 | @File    :   detector.py
  6 | @Author  :   你牙上有辣子
  7 | @Contact :   johnsogunn23@gmail.com
  8 | @Create Time : 2021-10-14 16:28     
  9 | @Discription :
 10 | 
 11 | 用于训练强化学习选定算法的检测器
 12 | 
 13 | '''
 14 | 
 15 | # import lib
 16 | from sklearn import metrics
 17 | from sklearn.tree import DecisionTreeClassifier
 18 | from sklearn.ensemble import RandomForestClassifier
 19 | from sklearn.model_selection import train_test_split
 20 | from sklearn.naive_bayes import GaussianNB
 21 | from sklearn.neighbors import KNeighborsClassifier
 22 | from sklearn.neural_network import MLPClassifier
 23 | from sklearn.ensemble import AdaBoostClassifier
 24 | from sklearn.ensemble import GradientBoostingClassifier
 25 | from sklearn.ensemble import BaggingClassifier
 26 | from sklearn.svm import SVC
 27 | from sklearn.externals import joblib
 28 | import time
 29 | import os
 30 | 
 31 | 
 32 | # 模型存放位置
 33 | model_path = '../result/'
 34 | 
 35 | 
 36 | # 算法池，根据算法名取对应的机器学习模型
 37 | Algorithm_POOL = {
 38 |     'RandomForest': RandomForestClassifier(random_state=0, n_estimators=50),
 39 |     'KNN': KNeighborsClassifier(),
 40 |     'NB': GaussianNB(),
 41 |     'DT': DecisionTreeClassifier(),
 42 |     'MLP': MLPClassifier(hidden_layer_sizes=(32,16), solver='adam', alpha=1e-5),
 43 |     'Ada': AdaBoostClassifier(n_estimators=100),
 44 |     'BAGGING': BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, max_features=0.5),
 45 |     # SVM 维度太大不太好收敛
 46 |     'SVM': SVC(kernel='rbf', probability=True, gamma='auto', max_iter=1000),
 47 |     'GBDT': GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
 48 |                                      max_depth=1, random_state=0)
 49 | }
 50 | 
 51 | 
 52 | 
 53 | 
 54 | # 检测器类
 55 | class Detector():
 56 |     def __init__(self, algorithm):
 57 | 
 58 |         self.algorithm = algorithm
 59 |         self.detector = Algorithm_POOL[self.algorithm]
 60 | 
 61 | 
 62 | 
 63 |     # 给入数据和特征，进行检测器训练
 64 |     def train_and_test(self, data, label):
 65 | 
 66 |         # 返回的result字典，里面存有分类的各种评价结果
 67 |         result = {}
 68 | 
 69 |         # 划分训练集、测试集
 70 |         x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2, random_state=0)
 71 | 
 72 |         # 训练
 73 |         train_start = time.time()
 74 |         self.detector.fit(x_train, y_train)
 75 |         train_end = time.time()
 76 |         train_time = train_end - train_start
 77 |         sample_number = len(x_test)
 78 | 
 79 |         # 测试
 80 |         detect_start = time.time()
 81 |         y_predict = self.detector.predict(x_test)
 82 |         detect_end = time.time()
 83 |         detect_time = detect_end - detect_start
 84 | 
 85 |         # 获取混淆矩阵，得到各个指标
 86 |         cm = metrics.confusion_matrix(y_test, y_predict)
 87 |         TP = cm[0][0]
 88 |         FP = cm[0][1]
 89 |         FN = cm[1][0]
 90 |         TN = cm[1][1]
 91 | 
 92 |         # 准确率
 93 |         accuracy = metrics.accuracy_score(y_test, y_predict)
 94 |         # 精确率
 95 |         precision = metrics.precision_score(y_test, y_predict, pos_label='1', average='binary')
 96 |         # 召回率
 97 |         recall = metrics.recall_score(y_test, y_predict, pos_label='1', average='binary')
 98 |         # F1 Score
 99 |         f1_score = metrics.f1_score(y_test, y_predict, pos_label='1', average='binary')
100 |         # 误警率
101 |         false_alarm_rate = FP / (FP + TN)
102 |         # 漏警率
103 |         miss_alarm_rate = FN / (TP + FN)
104 | 
105 |         result['Accuracy'] = accuracy
106 |         result['Precision'] = precision
107 |         result['Recall'] = recall
108 |         result['F1 Score'] = f1_score
109 |         result['False Alarm Rate'] = false_alarm_rate
110 |         result['Miss Alarm Rate'] = miss_alarm_rate
111 |         result['Train Time'] = train_time
112 |         result['Detect Time For Per Sample'] = detect_time/sample_number
113 | 
114 |         return result
115 | 
116 | 
117 | 
118 |     # 测试模型性能
119 |     def test(self, data, label):
120 | 
121 |         # 返回的result字典，里面存有分类的各种评价结果
122 |         result = {}
123 |         # 存放提取后的特征
124 | 
125 |         detect_start = time.time()
126 |         predict = self.detector.predict(data)
127 |         detect_end = time.time()
128 |         detect_time = detect_end - detect_start
129 | 
130 |         # 获取混淆矩阵，得到各个指标
131 |         cm = metrics.confusion_matrix(label, predict)
132 |         TP = cm[0][0]
133 |         FP = cm[0][1]
134 |         FN = cm[1][0]
135 |         TN = cm[1][1]
136 | 
137 |         # 准确率
138 |         accuracy = metrics.accuracy_score(label, predict)
139 |         # 精确率
140 |         precision = metrics.precision_score(label, predict, pos_label='1', average='binary')
141 |         # 召回率
142 |         recall = metrics.recall_score(label, predict, pos_label='1', average='binary')
143 |         # F1 Score
144 |         f1_score = metrics.f1_score(label, predict, pos_label='1', average='binary')
145 |         # 误警率
146 |         false_alarm_rate = FP / (FP + TN)
147 |         # 漏警率
148 |         miss_alarm_rate = FN / (TP + FN)
149 | 
150 | 
151 |         result['Accuracy'] = accuracy
152 |         result['Precision'] = precision
153 |         result['Recall'] = recall
154 |         result['F1 Score'] = f1_score
155 |         result['False Alarm Rate'] = false_alarm_rate
156 |         result['Miss Alarm Rate'] = miss_alarm_rate
157 |         result['Detect Time For Per Sample'] = detect_time / len(label)
158 | 
159 |         return result
160 | 
161 | 
162 |     # 保存模型
163 |     def save_model(self):
164 |         # 如果存在同样的模型
165 |         if os.path.exists(model_path+self.algorithm+'.model'):
166 |             os.remove(model_path+self.algorithm+'.model')
167 | 
168 |         joblib.dump(self.detector, model_path+self.algorithm+'.model')
169 | 
170 | 
171 |     # 载入模型
172 |     def load_model(self):
173 |         self.detector = joblib.load(model_path+self.algorithm+'.model')
174 | 
175 | 


--------------------------------------------------------------------------------
/source/env.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | 
  5 | @File    :   env.py    
  6 | @Author  :   你牙上有辣子
  7 | @Contact :   johnsogunn23@gmail.com
  8 | @Create Time : 2021-10-15 14:38     
  9 | @Discription :
 10 | 
 11 | 
 12 | '''
 13 | 
 14 | # import lib
 15 | import random
 16 | 
 17 | import numpy as np
 18 | 
 19 | 
 20 | # action space 中的最后一个动作为终止
 21 | 
 22 | # 自己构建的环境
 23 | class MyEnv:
 24 |     def __init__(self, state_size, action_size, max, detector, dataloader):
 25 |         self.state_size = state_size
 26 |         self.action_size = action_size
 27 |         self.max = max  # 最多选取max个特征，超出直接终止
 28 |         self.detector = detector
 29 |         self.dataloader = dataloader
 30 |         self.reward_dict = {}
 31 |         self.average = self._average_training_data()
 32 |         self.reset()
 33 | 
 34 |     def random_action(self):
 35 |         while True:
 36 |             action = random.randint(0, self.action_size - 1)
 37 |             if action in self.state_index:
 38 |                 continue
 39 |             else:
 40 |                 break
 41 |         return action
 42 | 
 43 | 
 44 |     def step(self, action_index):
 45 |         self.state_index.add(action_index)
 46 |         if len(self.state_index) == self.max:  # 已经到达选择数量上线
 47 |             self.done = True
 48 | 
 49 |             # reward 默认为0
 50 |             # if current_count>self.max:
 51 |             #     reward = self.max - current_count
 52 |             # else:
 53 |         reward, detect_result = self.get_reward()
 54 |         # reward = random.random()*100
 55 |         return self.get_one_hot(detect_result), reward, detect_result, self.done
 56 | 
 57 |     def reset(self):
 58 |         self.done = False
 59 |         self.state_index = set()
 60 |         # 记录前一轮的准确率、检测率、以及召回率
 61 |         self.pre_accuracy = 0
 62 |         self.pre_precision = 0
 63 |         self.pre_recall = 0
 64 |         self.current_result = {}
 65 | 
 66 |         return self.get_one_hot(self.current_result)
 67 | 
 68 |     def get_reward(self):
 69 |         temp = [str(x) for x in self.state_index]
 70 |         temp = '.'.join(temp)
 71 |         if temp in self.reward_dict.keys():
 72 |             item = self.reward_dict.get(temp)
 73 |             self.pre_accuracy = item[1]['Accuracy']
 74 |             self.pre_precision = item[1]['Precision']
 75 |             self.pre_recall = item[1]['Recall']
 76 |             return item[0], item[1]
 77 |         else:
 78 |             # 获得分类结果的字典
 79 |             detect_result = self.detector.train_and_test(self.dataloader.data, self.dataloader.label)
 80 |             # for element in reward.values():
 81 |             #     result += 0.2*element
 82 | 
 83 |             accuracy = detect_result['Accuracy']
 84 |             precision = detect_result['Precision']
 85 |             recall = detect_result['Recall']
 86 |             # time = classify_result['Test Time For Per Sample']
 87 | 
 88 |             # 方案1：仅考虑Accuracy
 89 |             # reward = r_a = accuracy
 90 |             # 方案2：考虑Accuracy、Precision、Recall
 91 |             # reward = r_a * 0.4 + r_p *0.3 + r_r * 0.3
 92 |             # 方案3：考虑Accuracy、Precision、Recall、Time
 93 |             # reward = r_a * 0.4 + r_p * 0.2 + r_r * 0.2 + r_t * 0.2
 94 | 
 95 | 
 96 |             # 准确率
 97 |             # 增加了一个feature反而减小了
 98 |             if self.pre_accuracy > accuracy:
 99 |                 r_a = -1
100 |             # 准确率增大
101 |             else:
102 |                 # if accuracy < 0.80:
103 |                 #     r_a = 0
104 |                 # elif accuracy < 0.95:
105 |                 #     r_a = 0.5
106 |                 # else:
107 |                 #     r_a = 1
108 |                 r_a = accuracy
109 | 
110 |             # 检测率
111 |             # 增加了一个feature反而减小了
112 |             # if self.pre_precision > precision:
113 |             #     r_p = -2
114 |             # # 检测率增大
115 |             # else:
116 |             #     if precision < 0.80:
117 |             #         r_p = 0
118 |             #     elif precision < 0.95:
119 |             #         r_p = 0.5
120 |             #     else:
121 |             #         r_p = 1
122 | 
123 |             # 召回率
124 |             # 增加了一个feature反而减小了
125 |             # if self.pre_recall > recall:
126 |             #     r_r = -2
127 |             # # 召回率增大
128 |             # else:
129 |             #     if recall < 0.80:
130 |             #         r_r = 0
131 |             #     elif recall < 0.95:
132 |             #         r_r = 0.5
133 |             #     else:
134 |             #         r_r = 1
135 | 
136 |             # 训练时间,如果比平均时间还短,那么奖励值为0(暂时先不使用)
137 |             # if time > 5.43e-5:
138 |             #     r_t = 0
139 |             # elif time > 1.00e-5:
140 |             #     r_t = 0.5
141 |             # else:
142 |             #     r_t = 1
143 | 
144 |             # 方案1
145 |             reward = r_a
146 | 
147 |             # 方案2
148 |             # reward = r_a * 0.4 + r_p * 0.3 + r_r * 0.3
149 | 
150 |             # 方案3
151 |             # reward = r_a * 0.5 + r_p * 0.2 + r_r * 0.2 + r_t * 0.1
152 | 
153 | 
154 |             self.add_dict(reward, detect_result)
155 |             self.pre_accuracy = accuracy
156 |             self.pre_precision = precision
157 |             self.pre_recall = recall
158 | 
159 |             return reward, detect_result
160 | 
161 |     # key:选取的哪些特征, 形如[1,3,5..]   value:(回报，分类结果)
162 |     def add_dict(self, reward, classify_result):
163 |         temp = [str(x) for x in self.state_index]
164 |         temp = '.'.join(temp)
165 |         self.reward_dict[temp] = [reward, classify_result]
166 | 
167 |     def get_one_hot(self, current_result):
168 |         # 1、使用0或1
169 |         # state = [1 if i in self.state_index else 0 for i in range(41)]
170 |         # 2、使用平均值
171 |         one_hot_state = [1 if i in self.state_index else 0 for i in range(self.state_size)]
172 |         state = [self.average[i] if one_hot_state[i] > 0 else 0 for i in range(len(one_hot_state))]
173 | 
174 |         # 3、使用选定特征的平均值+补0
175 |         state = []
176 |         for i in self.state_index:
177 |             state.append(self.average[i])
178 | 
179 |         for i in range(10 - len(self.state_index)):
180 |             state.append(0)
181 | 
182 |         count = len(self.state_index)
183 |         accuracy = current_result.get('Accuracy', 0)
184 |         precision = current_result.get('Precision', 0)
185 |         recall = current_result.get('Recall', 0)
186 |         f1_score = current_result.get('F1 Score', 0)
187 |         false_alarm_rate = current_result.get('False Alarm Rate', 0)
188 |         miss_alarm_rate = current_result.get('Miss Alarm Rate', 0)
189 |         time_per_sample = current_result.get('Test Time For Per Sample',0)
190 | 
191 |         # state.append(count)
192 |         # state.append(accuracy/100)
193 |         # state.append(precision/100)
194 |         # state.append(recall/100)
195 |         # state.append(f1_score/100)
196 |         # state.append(false_alarm_rate)
197 |         # state.append(miss_alarm_rate)
198 |         # state.append(time_per_sample)
199 | 
200 |         return np.array(state)
201 |         # return np.array(one_hot_state)
202 | 
203 |     def _average_training_data(self):
204 |         data = self.dataloader.data
205 |         average = [0 for _ in range(41)]
206 |         for line in data:
207 |             for i in range(len(line)):
208 |                 average[i] += line[i]
209 | 
210 |         # return [item / len(data) for item in average]


--------------------------------------------------------------------------------
/source/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | 
  5 | @File    :   utils.py    
  6 | @Author  :   你牙上有辣子
  7 | @Contact :   johnsogunn23@gmail.com
  8 | @Create Time : 2021-10-14 16:08     
  9 | @Discription :
 10 | 
 11 | 工具类，包含数据预处理和画图操作
 12 | 
 13 | '''
 14 | 
 15 | 
 16 | # import lib
 17 | import csv
 18 | import os
 19 | from sklearn import preprocessing
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | from pylab import mpl
 23 | mpl.rcParams['font.sans-serif'] = ['SimHei']
 24 | 
 25 | 
 26 | data_path = '../data/detection/'
 27 | result_path = '../result/'
 28 | 
 29 | # 数据格式转换用
 30 | protocol_type = ['tcp', 'udp', 'icmp']
 31 | service = ['aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard', 'domain', 'domain_u', 'echo',
 32 |            'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp', 'ftp_data', 'gopher', 'harvest', 'hostnames', 'http',
 33 |            'http_2784', 'http_443', 'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link',
 34 |            'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat', 'nnsp', 'nntp', 'ntp_u',
 35 |            'other', 'pm_dump', 'pop_2', 'pop_3', 'printer', 'private', 'red_i', 'remote_job', 'rje', 'shell',
 36 |            'smtp', 'sql_net', 'ssh', 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i',
 37 |            'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50']
 38 | flag = ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH']
 39 | 
 40 | 
 41 | # 数据读取类，用于读取数据和标签，将其传递给检测器类
 42 | class DataLoader():
 43 |     def __init__(self, feature=None):
 44 |         self.data = []
 45 |         self.label = []
 46 |         self.feature = feature
 47 | 
 48 |     def load_data(self, filename):
 49 |         with open(data_path+filename) as f:
 50 |             for row in f:
 51 |                 row = row.split(',')
 52 |                 self.data.append(row[:-1])
 53 |                 # 去掉换行
 54 |                 temp = row[-1].replace('\n', '')
 55 |                 # 获取标签
 56 |                 self.label.append(temp)
 57 | 
 58 |                 # 存放提取后的特征
 59 |                 x = []
 60 | 
 61 |             if self.feature == None:
 62 |                 x = self.data
 63 |             else:
 64 |                 # 把对应的特征取出来
 65 |                 feature = set(self.feature)
 66 |                 for data_row in self.data:
 67 |                     new_data_row = []
 68 |                     for i in feature:
 69 |                         new_data_row.append(data_row[i])
 70 |                     x.append(new_data_row)
 71 | 
 72 |             return self.data, self.label
 73 | 
 74 | 
 75 | # 读取csv,针对两种不同的csv文件
 76 | def load_csv(fiename, original=False):
 77 |     data = []
 78 |     label = []
 79 |     if original == True:
 80 |         index = 2
 81 |     else:
 82 |         index = 1
 83 |     with open(fiename) as csvfile:
 84 |         for row in csvfile:
 85 |             row = row.split(',')
 86 |             # 处理protocol_type
 87 |             row[1] = str(protocol_type.index(row[1]))
 88 |             # 处理service
 89 |             row[2] = str(service.index(row[2]))
 90 |             # 处理flag
 91 |             row[3] = str(flag.index(row[3]))
 92 |             # 将str类型转化为float类型
 93 |             for i in range(len(row)-index):
 94 |                 row[i] = float(row[i])
 95 |             # 处理label
 96 |             row[-index] = row[-index].replace('\n', '')
 97 |             if row[-index] == 'normal':
 98 |                 label.append(0)
 99 |             else:
100 |                 label.append(1)
101 |             data.append(row[:-index])
102 | 
103 |     return data, label
104 | 
105 | 
106 | # 保存csv
107 | def save_csv(path, data, label):
108 | 
109 |     if os.path.exists(path):
110 |         os.remove(path)
111 | 
112 |     with open(path, 'w', newline='') as csvfile:
113 |         writer = csv.writer(csvfile)
114 |         for i in range(len(data)):
115 |             data[i].append(label[i])
116 |             writer.writerow(data[i])
117 | 
118 |     print("Save data successfully to: {}".format(path))
119 | 
120 | 
121 | # 读出CSV数据
122 | def load_data(path):
123 | 
124 |     data = []
125 |     label = []
126 | 
127 |     with open(path) as f:
128 |         for row in f:
129 |             row = row.split(',')
130 |             data.append(row[:-1])
131 |             # 去掉换行
132 |             temp = row[-1].replace('\n', '')
133 |             label.append(temp)
134 | 
135 |     return data, label
136 | 
137 | 
138 | # 数据预处理
139 | def data_preprocessing(data):
140 | 
141 |     # Min-Max数据
142 |     # min_max_scaler = preprocessing.MinMaxScaler()
143 |     # temp_data = min_max_scaler.fit_transform(data)
144 |     # 正则化数据,这个效果回比上面Min-Max好一些
145 |     preprocessed_data = preprocessing.normalize(data, norm='l2')
146 | 
147 |     return preprocessed_data
148 | 
149 | 
150 | # 绘制折线图
151 | def draw_line(x, y, metric):
152 |     plt.xticks(x)
153 |     plt.plot(x, y['DT'], color='deepskyblue', marker='o', ls='-', label='DT')
154 |     plt.plot(x, y['RF'], color='lavender', marker='o', ls='-', label='RF')
155 |     plt.plot(x, y['KNN'], color='wheat', marker='o', ls='-', label='KNN')
156 |     plt.plot(x, y['NB'], color='plum', marker='o', ls='-', label='NB')
157 |     plt.plot(x, y['MLP'], color='teal', marker='o', ls='-', label='MLP')
158 |     plt.plot(x, y['Ada'], color='red', marker='o', ls='-', label='Ada')
159 |     plt.plot(x, y['Bagging'], color='grey', marker='o', ls='-', label='Bagging')
160 |     plt.plot(x, y['GBDT'], color='orangered', marker='o', ls='-', label='GBDT')
161 |     plt.xlabel("Feature used during model training")
162 |     plt.ylabel("Accuracy(%)")
163 |     plt.legend(loc='best', ncol=2)
164 |     plt.savefig(metric+".png")
165 | 
166 | 
167 | # 绘制柱状图
168 | def draw_bar(data_list1):
169 |     bar_label_list = ['ARC', 'CS', 'CP', 'CG', 'MR', 'DU']
170 |     bar_locs = np.arange(6)
171 |     bar_width = 0.40
172 |     # 柱状图 X 轴标识
173 |     xtick_labels = [bar_label_list[i] for i in range(6)]
174 | 
175 |     # 用 matplotlib 库中的 pyplot 来画出 图形
176 |     plt.figure()
177 |     # 柱状图
178 |     rect1 = plt.bar(bar_locs, data_list1, width=bar_width, color='orange', alpha=0.7, xtick_labels=bar_label_list)
179 |     # rect2 = plt.bar(bar_locs + bar_width, data_list2, width=bar_width, color='orange', alpha=0.7, label='攻击后')
180 |     # 显示数值
181 |     for rect in rect1:
182 |         plt.text(rect.get_x(), rect.get_height(), rect.get_height(), ha='right', va='bottom', fontsize=12)
183 | 
184 |     plt.ylabel = '次数'
185 |     plt.ylim(0, 2000)
186 |     plt.title('智能体所作出的修改动作统计')
187 |     plt.tight_layout()
188 |     plt.savefig('statistics_bar.png',dpi=1000)
189 |     plt.show()
190 | 
191 | 
192 | 
193 | def my_drawline(x, y, metric):
194 |     plt.plot(x, y, c='deepskyblue')
195 |     # plt.tick_params(axis='both', which='major', labelsize=14)
196 |     plt.xlabel('Epoch', fontsize=10)
197 |     plt.ylabel('Reward', fontsize=10)
198 |     plt.xlim(0, 1000)
199 |     plt.ylim(-1, 2)
200 |     plt.savefig(metric+".png")
201 |     plt.show()
202 | 
203 | 
204 | 
205 | if __name__ == '__main__':
206 |     # 折线图绘制
207 |     # x = [6, 7, 8, 9, 10]
208 |     # y = {'DT':[87.43, 87.65, 87.93, 88.11, 87.98],
209 |     #      'RF':[86.93, 86.80, 87.19, 87.76, 88.06],
210 |     #      'KNN':[85.24, 84.89, 85.26, 86.02, 86.13],
211 |     #      'NB':[72.30, 73.28, 73.84, 73.94, 74.21],
212 |     #      'MLP':[79.20, 79.13, 79.94, 80.13, 80.67],
213 |     #      'Ada':[83.11, 83.78, 84.31, 84.52, 84.89],
214 |     #      'Bagging':[81.20, 81.33, 81.84, 82.40, 83.06],
215 |     #      'GBDT':[84.56, 84.77, 85.03, 85.43, 85.50]}
216 |     # draw_line(x, y, 'test')
217 |     pass


--------------------------------------------------------------------------------
/source/start.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | '''
  4 | 
  5 | @File    :   start.py
  6 | @Author  :   你牙上有辣子
  7 | @Contact :   johnsogunn23@gmail.com
  8 | @Create Time : 2021-10-15 15:11     
  9 | @Discription :
 10 | 
 11 | 强化学习智能体训练主函数
 12 | 
 13 | '''
 14 | 
 15 | # import lib
 16 | import argparse
 17 | import time
 18 | import chainer
 19 | import chainer.functions as F
 20 | import chainer.links as L
 21 | import numpy as np
 22 | from chainer import optimizers
 23 | from chainerrl import explorers
 24 | import action_value as ActionValue
 25 | from agent import MyDoubleDQN
 26 | from chainerrl.replay_buffers import prioritized
 27 | import utils
 28 | from detector import Detector
 29 | import env as Env
 30 | 
 31 | 
 32 | # 命令行参数
 33 | parser = argparse.ArgumentParser()
 34 | parser.add_argument('--maxf', type=int, default=10)
 35 | parser.add_argument('--gpu', type=int, default=-1)
 36 | parser.add_argument('--cls', type=str, default='RandomForest')
 37 | args = parser.parse_args()
 38 | 
 39 | 
 40 | # 超参数
 41 | minibatch_size = 8
 42 | replay_start_size = 20
 43 | update_interval = 5
 44 | state_size = 10  # 可观察的状态数
 45 | action_size = 41  # 可选的特征数
 46 | feature_max_count = args.maxf  # 选取的特征数目大于该值时，reward为0，用于当特征数目在该范围内时，成功率最多可以到达多少
 47 | MAX_EPISODE = 1000 #
 48 | net_layers = [32, 64] # 一个包含两个隐藏层的神经网络
 49 | 
 50 | result_file = './result/result-{}-{}-{}.txt'.format(args.cls, time.strftime('%Y%m%d%H%M'),args.maxf)
 51 | 
 52 | # 每一轮逻辑如下
 53 | # 1. 初始化环境，定义S和A两个list，用来保存过程中的state和action。进入循环，直到当前这一轮完成（done == True）
 54 | # 2. 在每一步里，首先选择一个action，此处先用简单的act()代替
 55 | # 3. 接着env接收这个action，返回新的state，done和reward，当done==False时，reward=0，当done==True时，reward为模型的准确率
 56 | # 4. 如果done==True，那么应该把当前的S、A和reward送到replay buffer里（replay也应该在此时进行），往replay buffer里添加时，
 57 | #    每一对state和action都有一个reward，这个reward应该和env返回的reward（也就是该模型的acc）和count有关。
 58 | 
 59 | # 用这个逻辑替代原来的my_train的逻辑，只需要把agent加入即可，agent应该是不需要修改的
 60 | 
 61 | 
 62 | 
 63 | def main():
 64 | 
 65 |     # 保存训练中每一轮的回报
 66 |     train_reward = []
 67 |     # 保存评估中每一轮的回报
 68 |     evaluate_reward = []
 69 |     # 保存训练中每一轮的分类准确率
 70 |     train_accuracy = []
 71 |     # 保存评估中每一轮的分类准确率
 72 |     evaluate_accuracy = []
 73 |     # 用来保存效果最优的特征集合
 74 |     feature_list_train = []
 75 |     feature_list_evaluate = []
 76 | 
 77 |     train_precision = []
 78 |     train_recall = []
 79 | 
 80 | 
 81 | 
 82 |     class QFunction(chainer.Chain):
 83 |         def __init__(self, obs_size, n_actions, n_hidden_channels=None):
 84 |             super(QFunction, self).__init__()
 85 |             if n_hidden_channels is None:
 86 |                 n_hidden_channels = net_layers
 87 |             net = []
 88 |             inpdim = obs_size
 89 |             for i, n_hid in enumerate(n_hidden_channels):
 90 |                 net += [('l{}'.format(i), L.Linear(inpdim, n_hid))]
 91 |                 net += [('norm{}'.format(i), L.BatchNormalization(n_hid))]
 92 |                 net += [('_act{}'.format(i), F.relu)]
 93 |                 net += [('_dropout{}'.format(i), F.dropout)]
 94 |                 inpdim = n_hid
 95 | 
 96 |             net += [('output', L.Linear(inpdim, n_actions))]
 97 | 
 98 |             with self.init_scope():
 99 |                 for n in net:
100 |                     if not n[0].startswith('_'):
101 |                         setattr(self, n[0], n[1])
102 | 
103 |             self.forward = net
104 | 
105 |         def __call__(self, x, test=False):
106 |             """
107 |             Args:
108 |                 x (ndarray or chainer.Variable): An observation
109 |                 test (bool): a flag indicating whether it is in test mode
110 |             """
111 |             for n, f in self.forward:
112 |                 if not n.startswith('_'):
113 |                     x = getattr(self, n)(x)
114 |                 elif n.startswith('_dropout'):
115 |                     x = f(x, 0.1)
116 |                 else:
117 |                     x = f(x)
118 | 
119 |             return ActionValue.DiscreteActionValue(x)
120 | 
121 |     def evaluate(env, agent, current):
122 |         for i in range(1):
123 |             print("evaluate episode: {}".format(current))
124 |             state = env.reset()
125 |             terminal = False
126 |             action_list = []
127 | 
128 |             while not terminal:
129 |                 action = agent.act(state, action_list)
130 |                 if action not in action_list:
131 |                     action_list.append(action)
132 |                 state, reward, classify_result, terminal = env.step(action)
133 | 
134 |                 if terminal or len(action_list) > 10:
135 |                     if len(action_list) > 10:
136 |                         terminal = True
137 | 
138 |                     with open(result_file, 'a+') as f:
139 |                         f.write(
140 |                             "--------------------------------------------------------------------------------------------------\n"
141 |                             "Evaluate episode:{}, Reward = {}, Accuracy = {}, FAR = {}, MAR = {}, Action = {}\n"
142 |                             "-------------------------------------------------------------------------------------------------\n"
143 |                                 .format(current, reward, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list)
144 |                         )
145 |                         print(
146 |                             "--------------------------------------------------------------------------------------------------\n"
147 |                             "Evaluate episode:{}, Reward = {}, Accuracy = {}, FAR = {}, MAR = {}, Action = {}\n"
148 |                             "-------------------------------------------------------------------------------------------------\n"
149 |                                 .format(current, reward, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list)
150 |                         )
151 |                     # 加入本轮次评估的回报,后面可能需要
152 |                     evaluate_reward.append(reward)
153 |                     # 每次评估添加本次评估准确率
154 |                     evaluate_accuracy.append(classify_result['Accuracy'])
155 |                     # 同时添加对应特征
156 |                     feature_list_evaluate.append(action_list)
157 | 
158 | 
159 | 
160 |     def train_agent(env, agent):
161 |         for episode in range(MAX_EPISODE):
162 |             state = env.reset()
163 |             terminal = False
164 |             reward = 0
165 |             t = 0
166 |             action_list = []
167 |             while not terminal:
168 |                 t += 1
169 |                 action = agent.act_and_train(
170 |                     state, reward, action_list)  # 此处action是否合法（即不能重复选取同一个指标）由agent判断。env默认得到的action合法。
171 |                 if action not in action_list:
172 |                     action_list.append(action)
173 |                 state, reward, classify_result, terminal = env.step(action)
174 |                 print("Episode:{}, t:{}, Action:{}, Accuracy = {}, FAR = {}, MAR = {}, Reward = {}".format(episode, t, action_list, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], reward))
175 | 
176 |                 if terminal:
177 |                     with open(result_file, 'a+') as f:
178 |                         f.write("Train episode:{}, Reward = {}, Accuracy = {}, FAR = {}, MAR = {}, Action = {}\n"
179 |                                 .format(episode, reward , classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list))
180 |                         print("Train episode:{}, Reward = {}, Accuracy = {} ,FAR = {}, MAR = {}, Action = {}\n"
181 |                                 .format(episode, reward, classify_result['Accuracy'], classify_result['False Alarm Rate'], classify_result['Miss Alarm Rate'], action_list))
182 | 
183 |                         agent.stop_episode()
184 |                         # 加入轮次训练的回报,后面可能需要
185 |                         train_reward.append(reward)
186 |                         # 加入本轮次训练的准确率
187 |                         train_accuracy.append(classify_result['Accuracy'])
188 |                         train_precision.append(classify_result['Precision'])
189 |                         train_recall.append(classify_result['Recall'])
190 |                         # 本轮次对应所选的特征
191 |                         feature_list_train.append(action_list)
192 |                         if (episode + 1) % 10 == 0 and episode != 0:
193 |                             evaluate(env, agent, (episode + 1) / 10)
194 | 
195 |     def create_agent(env):
196 |         state_size = env.state_size
197 |         action_size = env.action_size
198 |         q_func = QFunction(state_size, action_size)
199 | 
200 |         start_epsilon = 1
201 |         end_epsilon = 0.3
202 |         decay_steps = 10
203 |         explorer = explorers.LinearDecayEpsilonGreedy(
204 |             start_epsilon, end_epsilon, decay_steps,
205 |             env.random_action)
206 | 
207 |         opt = optimizers.Adam()
208 |         opt.setup(q_func)
209 | 
210 | 
211 | 
212 |         # 优先经验回放
213 |         rbuf = prioritized.PrioritizedReplayBuffer()
214 | 
215 |         phi = lambda x: x.astype(np.float32, copy=False)
216 | 
217 | 
218 |         # 自己的DDQN
219 |         agent = MyDoubleDQN(q_func,
220 |                             opt,
221 |                             rbuf,
222 |                             gamma=0.99,
223 |                             explorer=explorer,
224 |                             replay_start_size=replay_start_size,
225 |                             target_update_interval=10,  # target q网络多久和q网络同步
226 |                             update_interval=update_interval,
227 |                             phi=phi,
228 |                             minibatch_size=minibatch_size,
229 |                             gpu=args.gpu,  # 设置是否使用gpu
230 |                             episodic_update_len=16)
231 | 
232 |         return agent
233 | 
234 | 
235 |     def train():
236 |         env = Env.MyEnv(state_size, action_size,
237 |                         feature_max_count,
238 |                         Detector('RandomForest'),
239 |                         utils.DataLoader())
240 |         agent = create_agent(env)
241 |         train_agent(env, agent)
242 | 
243 |         # evaluate(env, agent)
244 | 
245 |         return env, agent
246 | 
247 |     train()
248 | 
249 |     # 用于计算本次训练中最大的准确率,及所选的特征以及训练过程中的平均准确率
250 |     max_train_accuracy = max(train_accuracy)
251 |     max_evaluate_accuracy = max(evaluate_accuracy)
252 |     max_train_reward = max(train_reward)
253 |     max_evaluate_reward = max(evaluate_reward)
254 |     # 取索引
255 |     max_train_accuracy_index = train_accuracy.index(max_train_accuracy)
256 |     max_evaluate_accuracy_index = evaluate_accuracy.index(max_evaluate_accuracy)
257 |     max_train_reward_index = train_reward.index(max_train_reward)
258 |     max_evaluate_reward_index = evaluate_reward.index(max_evaluate_reward)
259 |     # 找特征
260 |     best_train_accuracy_feature = feature_list_train[max_train_accuracy_index]
261 |     best_evaluate_accuracy_feature = feature_list_evaluate[max_evaluate_accuracy_index]
262 |     best_train_reward_feature = feature_list_train[max_train_reward_index]
263 |     best_evaluate_reward_feature = feature_list_train[max_evaluate_reward_index]
264 | 
265 |     # 统计训练过程中reward的平均值以及变化趋势
266 |     average_reward = 0
267 |     for i in range(len(train_reward) - 1):
268 |         average_reward = average_reward + train_reward[i]
269 |     average_reward = average_reward / len(train_reward)
270 | 
271 |     # 写入文件训练过程统计结果
272 |     with open(result_file, 'a+') as f:
273 |         f.write("Train reward:{}\n".format(train_reward))
274 |         # 打印accuracy,用于绘图
275 |         f.write("Train precision:{}\n".format(train_precision))
276 |         # 打印recall,用于绘图
277 |         f.write("Train recall:{}\n".format(train_recall))
278 |         f.write("The max accuracy of the train:{}, the feature selected are:{}.\n".format(max_train_accuracy,
279 |                                                                                           best_train_accuracy_feature))
280 |         f.write("The max accuracy of the evaluate:{}, the feature selected are:{}.\n".format(max_evaluate_accuracy,
281 |                                                                                              best_evaluate_accuracy_feature))
282 |         f.write("The max reward of the train:{}, the feature selected are:{}\n".format(max_train_reward,
283 |                                                                                        best_train_reward_feature))
284 |         f.write("The max reward of the evaluate:{}, the feature selected are:{}\n".format(max_evaluate_reward,
285 |                                                                                        best_evaluate_reward_feature))
286 |         f.write("The average reward of this train:{}.\n".format(average_reward))
287 | 
288 | 
289 | 
290 | if __name__ == '__main__':
291 | 
292 |     start_time = time.time()
293 |     main()
294 |     elapsed = time.time() - start_time
295 |     print("Time: {}".format(elapsed))
296 |     # 训练时间
297 |     with open(result_file, 'a+') as f:
298 | 
299 |         f.write("Training time:{} seconds".format(elapsed))
300 | 


--------------------------------------------------------------------------------