├── README.md
├── url_svm.py
├── html_svm.py
└── README.html


/README.md:
--------------------------------------------------------------------------------
 1 | [TOC]
 2 | #基于启发式特征的钓鱼网站检测系统
 3 | #__Author__:沂水寒城
 4 | 
 5 | ##背景
 6 | 钓鱼网站的检测问题可以看做是一个二分类的问题，因此可以基于机器学习的流程和方法来做分类
 7 | 
 8 | ##基于html的启发式钓鱼网站检测
 9 | 基于数据的大量统计以及综合已有的方法提取29维特征，之后使用SVM模型来训练和测试
10 | 
11 | ##基于url的启发式钓鱼网站检测
12 | 基于url的特征设计的轻量级的分类模型，同样是基于SVM分类模型
13 | 
14 | ##实验方案
15 | 实验采用的方法是结合两个分类模型的结果综合决策最终页面的所属类别
16 | 
17 | ##实验数据
18 | 由于爬取得到的html文件较大不易上传就不提供数据了 ，url的数据可以在我的malicious_web_page_detection_based_on_url里面找到，数据是可以通用的，在malicious_web_page_detection_based_on_url里面着重是使用sklearn中的机器学习模型来测试，而这个工作主要是集中在特征工程方面，这样合在一起就是一个完整的机器学习实践的流程了。


--------------------------------------------------------------------------------
/url_svm.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/python
  2 | #-*-coding:utf-8-*-
  3 | 
  4 | 
  5 | '''
  6 | __Author__:沂水寒城
  7 | 功能：基于url的特征设计轻量级的仿冒网站检测工具
  8 | '''
  9 | 
 10 | 
 11 | import codecs
 12 | import sys
 13 | import re
 14 | from urllib2 import urlparse
 15 | 
 16 | 
 17 | 
 18 | RED_KEYWORDS = ["account", "admin", "administrator",
 19 |                 "auth", "bank", "client", "confirm", "email", "host",
 20 |                 "password", "pay", "private", "safe", "secure", "security",
 21 |                 "sign", "user", "validation", "verification", "icbc"]
 22 | PATH_KEYWORDS = ["www", "net", "com", "cn"]
 23 | 
 24 | 
 25 | 
 26 | def geturlat(url):
 27 |     '''
 28 |     判断URL中是否含有@，？，-,_等符号
 29 |     '''
 30 |     re_script = re.compile(r'@|-|_|\?|~')
 31 |     return 1 if re_script.search(url) else 0
 32 | 
 33 | def get_has_ip(url):
 34 |     '''
 35 |     判断url中是否包含ip，包含返回1，不包含返回0
 36 |     '''
 37 |     compile_rule_ip = re.compile(r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])')
 38 |     ip_list=re.findall(compile_rule_ip, url)
 39 |     if ip_list:
 40 |         return 1
 41 |     else:
 42 |         return 0
 43 | 
 44 | 
 45 | def geturldot(url):
 46 |     '''
 47 |     判断URL中.的个数
 48 |     '''
 49 |     dotnum = 0
 50 |     for u in url:
 51 |         if u == '.':
 52 |             dotnum += 1
 53 |     return dotnum
 54 | 
 55 | 
 56 | def get_url_length(url):
 57 |     '''
 58 |     获得URL的总长度
 59 |     '''
 60 |     return len(url)
 61 | 
 62 | 
 63 | def get_url_number_length(url):
 64 |     '''
 65 |     获得URL的最长的数字串长度
 66 |     '''
 67 |     result = 0
 68 |     match = re.findall(r"\d+", url)
 69 |     if match:
 70 |         match.sort(key=lambda x: len(x), reverse=True)
 71 |         result = len(match[0])
 72 |     return result
 73 | 
 74 | 
 75 | def get_red_keyword(url):
 76 |     '''
 77 |     判断URL中是否包含敏感词汇
 78 |     '''
 79 |     url = url.lower()
 80 |     for key in RED_KEYWORDS:
 81 |         if url.find(key) != -1:
 82 |             return 1
 83 |     return 0
 84 | 
 85 | 
 86 | def get_path_key(url):
 87 |     '''
 88 |     判断URL的路径中是否包含敏感词汇 
 89 |     '''
 90 |     url=url.replace('//', '')
 91 |     if '/' in url:
 92 |         url_path_list=url.split('/')[1:]
 93 |         if url_path_list:
 94 |             for key in PATH_KEYWORDS:
 95 |                 if url_path_list.lower().find(key)!=-1:
 96 |                     return 1
 97 |     return 0
 98 |     # url_parse = urlparse(url)
 99 |     # path = url_parse.path
100 |     # if path:
101 |     #     for key in PATH_KEYWORDS:
102 |     #         if path.lower().find(key) != -1:
103 |     #             return 1
104 |     # return 0
105 | 
106 | 
107 | def get_url_vector(url):
108 |     '''
109 |     输入：URL，网页存储路径，关键字列表
110 |     输出：当前网页的结果list 包含所有特征的返回值
111 |     '''
112 |     heuristic_vector = []
113 |     heuristic_vector.append(get_has_ip(url))
114 |     heuristic_vector.append(geturlat(url))
115 |     heuristic_vector.append(geturldot(url))
116 |     heuristic_vector.append(get_url_length(url))
117 |     heuristic_vector.append(get_url_number_length(url))
118 |     heuristic_vector.append(get_red_keyword(url))
119 |     heuristic_vector.append(get_path_key(url))
120 |     return heuristic_vector
121 | 
122 | if __name__ == '__main__':
123 |     url='http://www.baidu.com'
124 |     baidu_vector=get_url_vector(url)
125 |     print baidu_vector
126 |     '''
127 |     [0, 0, 2, 20, 0, 0, 0]
128 |     '''
129 |     
130 | 
131 |     '''
132 |     下述代码存储特征数据使用
133 |     '''
134 |     # white_urlresult = open('result/white_urlresult.txt', 'wb')
135 |     # #存储白名单数据
136 |     # white_urlresult.write('url_sign, url_dot, url_length, url_number_length, red_keyword, label')
137 |     # white_urlresult.write('\n')
138 |     # urlfile = open('dataset/white.txt', 'r')
139 |     # urlfile_list = urlfile.readlines()
140 |     # for one_url in urlfile_list:
141 |     #     heuristic_vector = get_url_vector(one_url.strip())
142 |     #     heuristic_url_str = [str(vector) for vector in heuristic_vector]
143 |     #     #fw.write(url + ',Y,1,' + ','.join(heuristic_vector_str) + '\n')
144 |     #     white_urlresult.write(','.join(heuristic_url_str) + ',' + '0' + '\n')
145 | 
146 |     # #存储黑名单数据
147 |     # black_urlresult = open('black_urlresult.txt', 'wb')
148 |     # id = 1
149 |     # black_urlresult.write('id, url_sign, url_dot, url_length, url_number_length, red_keyword, label')
150 |     # black_urlresult.write('\n')
151 |     # urlfile = open('dataset/all.txt', 'r')
152 |     # urlfile_list = urlfile.readlines()
153 |     # url_list_len = len(urlfile_list)
154 |     # i = 0
155 |     # for one1_url in urlfile_list:
156 |     #     heuristic_vector = get_url_vector(one1_url.strip())
157 |     #     heuristic_url_str = [str(vector) for vector in heuristic_vector]
158 |     #     #fw.write(url + ',Y,1,' + ','.join(heuristic_vector_str) + '\n')
159 |     #     i += 1
160 |     #     if i == url_list_len:
161 |     #         black_urlresult.write(str(id) + ',' + ','.join(heuristic_url_str) + ',' + '1' )
162 |     #         id += 1
163 |     #     else:
164 |     #         black_urlresult.write(str(id) + ',' + ','.join(heuristic_url_str) + ',' + '1' + '\n')
165 |     #         id += 1
166 |     
167 | 


--------------------------------------------------------------------------------
/html_svm.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/python
  2 | #-*-coding:utf-8-*-
  3 | 
  4 | '''
  5 | __Author__:沂水寒城
  6 | 功能：基于html的特征检测仿冒网站
  7 | '''
  8 | 
  9 | 
 10 | import re
 11 | import urllib2
 12 | from lxml import etree
 13 | from bs4 import BeautifulSoup
 14 | 
 15 | 
 16 | def get_html_length(Html):
 17 |     '''
 18 |     获取Html的长度
 19 |     '''
 20 |     return len(Html)
 21 | 
 22 | 
 23 | def get_div_num(Html):
 24 |     '''
 25 |     获得html中div标签的数量
 26 |     '''
 27 |     match = re.findall(r"<div.*?>(.*?)</div>", Html)
 28 |     if len(match) > 13:
 29 |         return 1
 30 |     else:
 31 |         return 0
 32 | 
 33 | 
 34 | def get_embed_num(Html):
 35 |     '''
 36 |     获得HTML中的<embed>标签数量(特征大多为0)
 37 |     '''
 38 |     match = re.compile(r'<embed[^>]+/>|<embed[^>]+[^/>]></embed>')
 39 |     embed_list = re.findall(match, Html)
 40 |     return len(embed_list)
 41 | 
 42 | 
 43 | def get_iframe_num(Html):
 44 |     '''
 45 |     获得html中的<iframe>标签的数量(特征大多为0)
 46 |     '''
 47 |     match = re.compile(r'<iframe[^>]+/>|<iframe[^>]+[^/>]></iframe>')
 48 |     iframe_list = re.findall(match, Html)
 49 |     return len(iframe_list)
 50 | 
 51 | 
 52 | def get_applet_num(Html):
 53 |     '''
 54 |     获得html中的<applet>标签的数量(特征大多为0)
 55 |     '''
 56 |     match = re.compile(r'<applet[\s\S]+/applet>')
 57 |     applet_list = re.findall(match, Html)
 58 |     return len(applet_list)
 59 | 
 60 | 
 61 | def get_frame_num(Html):
 62 |     '''
 63 |     获得html中的<frame>标签的数量(特征大多为0)
 64 |     '''
 65 |     match = re.compile(r'<frame[^>]+/>|<frame[^>]+[^/>]></frame>')
 66 |     frame_list = re.findall(match, Html)
 67 |     return len(frame_list)
 68 | 
 69 | 
 70 | def get_form_get_num(Html):
 71 |     '''
 72 |     提取form中的get方法数量(特征大多为0)
 73 |     '''
 74 |     match = re.compile(r'<form[\s\S]+method="get"[\s\S]+/form>')
 75 |     get_list = re.findall(match, Html)
 76 |     return len(get_list)
 77 | 
 78 | 
 79 | def get_form_post_num(Html):
 80 |     '''
 81 |     提取form中的post方法数量(特征大多为0和1)
 82 |     '''
 83 |     match = re.compile(r'<form[\s\S]+method="post"[\s\S]+/form>')
 84 |     post_list = re.findall(match, Html)
 85 |     if len(post_list) > 1:
 86 |         return 1
 87 |     else:
 88 |         return 0
 89 | 
 90 | 
 91 | def get_js_long(Html):
 92 |     '''
 93 |     计算网页中的script内容长度(特征大多为0)
 94 |     '''
 95 |     script_num = 0
 96 |     # 抽取script包含的内容
 97 |     re_alert = re.compile(r">\s*alert[\S\s]*</script>")
 98 |     alert_content = re_alert.findall(Html)
 99 |     # 计算script中内容的字符长度
100 |     if alert_content != []:
101 |         for once_alert_content in alert_content:
102 |             script_num += len(once_alert_content)
103 |     if script_num > 0:
104 |         return 1
105 |     else:
106 |         return 0
107 | 
108 | 
109 | def get_divonClick_num(Html):
110 |     '''
111 |     计算html中<div onClick = window.open()>方法出现的数量(均为0)
112 |     '''
113 |     match = re.compile(r'<div\s[\S]*onClick=window.open()')
114 |     divonClick_list = re.findall(match, Html)
115 |     return len(divonClick_list)
116 | 
117 | 
118 | def get_headtitlebody_num(Html):
119 |     '''
120 |     计算html中<head><title><body>的数量
121 |     '''
122 |     #match = re.compile(r'<head>[\s\S]*<title>[\s\S]*<body>[\s\S]*</head>')
123 |     head_num = 0
124 |     title_num = 0
125 |     body_num = 0
126 |     match_head = re.compile(r'<head>[\s\S]*</head>')
127 |     match_title = re.compile(r'<title>[\s\S]*</title>')
128 |     match_body = re.compile(r'<body[\s\S]*body>')
129 |     head_list = re.findall(match_head, Html)
130 |     title_list = re.findall(match_title, Html)
131 |     body_list = re.findall(match_body, Html)
132 |     for head_content in head_list:
133 |         head_num += head_content.count('<head>')
134 |     for body_content in body_list:
135 |         body_num += body_content.count('<body')
136 |     for title_content in title_list:
137 |         title_num += title_content.count('<title>')
138 |     if head_num > 0:  # 1或者2
139 |         head_num = 1
140 |     else:
141 |         head_num = 0
142 |     if body_num > 0:  # 1或者2
143 |         body_num = 1
144 |     else:
145 |         body_num = 0
146 |     if title_num > 0:  # 1或者2
147 |         title_num = 1
148 |     else:
149 |         title_num = 0
150 |     return [head_num, title_num, body_num]
151 |     #return head_list, title_list, body_list
152 |     # print len(head_list), len(title_list), len(body_list)
153 |     # print head_num, title_num, body_num
154 | 
155 | 
156 | def get_input_num(Html):
157 |     '''
158 |     计算html中的<input>标签数量
159 |     '''
160 |     #match = re.compile(r'<input[\s\S]*/>')
161 |     input_num = 0
162 |     match = re.compile(r'<input')
163 |     input_list = re.findall(match, Html)
164 |     for input_content in input_list:
165 |         input_num += input_content.count('<input')
166 |     if input_num > 5:
167 |         return 1
168 |     else:
169 |         return 0
170 | 
171 | 
172 | def get_form_num(Html):
173 |     '''
174 |     计算html中的<form>表单数量
175 |     '''
176 |     form_num = 0
177 |     match = re.compile(r'<form[\s\S]*</form>')
178 |     form_list = re.findall(match, Html)
179 |     # print 'form_list is :', form_list
180 |     # print 'len_form_list is :', len(form_list)
181 |     for form_content in form_list:
182 |         form_num += form_content.count('</form>')
183 |     #form = form_list[0]
184 |     #form_num_list = form.split('</form>')
185 |     #form_num = len(form_num_list) - 1
186 |     # print form_num
187 |     if form_num > 0:#以1，2，3居多，有较大值
188 |         return 1
189 |     else:
190 |         return 0
191 | 
192 | 
193 | def get_SetIntervel_num(html):
194 |     '''
195 |     获取页面html中SetIntervel()方法数量
196 |     '''
197 |     match = re.compile(r'setInterval\([\s\S]*\)', re.I)
198 |     setInterval_list = re.findall(match, html)
199 |     if len(setInterval_list)>0:#多数为0.有极少数为1
200 |         return 1
201 |     else: 
202 |         return 0
203 | 
204 | 
205 | def get_SetTimeout_num(html):
206 |     '''
207 |     获取页面html中SetTimeout()方法数量
208 |     '''
209 |     match = re.compile(r'setTimeout\([\s\S]*\)', re.I)
210 |     setTimeout_list = re.findall(match, html)
211 |     if len(setTimeout_list) > 0:#多数为0.有少数为1
212 |         return 1
213 |     else:
214 |         return 0
215 | 
216 | 
217 | def get_onload_num(html):
218 |     '''
219 |     获取页面html中onload()方法数
220 |     '''
221 |     match = re.compile(r'onload\([\s\S]\)', re.I)
222 |     onload_list = re.findall(match, html)
223 |     if len(onload_list)>0:#多数为0.微量为1
224 |         return 1
225 |     else:
226 |         return 0
227 | 
228 | 
229 | def get_onerror_num(html):
230 |     '''
231 |     获取页面html中onerror()方法数  #测试均为0
232 |     '''
233 |     match = re.compile(r'onerror\([\s\S]*\)', re.I)
234 |     onerror_list = re.findall(match, html)
235 |     return len(onerror_list)
236 | 
237 | 
238 | def get_js_functions_num(html):
239 |     '''
240 |     获取页面html中js调用的方法数
241 |     '''
242 |     match_script = re.compile(r'<script>[\s\S]+</script>')
243 |     script_list = re.findall(match_script, html)
244 |     '''代码报错，数组越界
245 |     script = script_list[0]
246 |     '''
247 |     match_functions = re.compile(r'function')
248 |     functions_list = re.findall(match_functions, script)
249 |     return len(functions_list)
250 | 
251 | 
252 | def get_script_functions_num(html):
253 |     '''
254 |     获取页面html中<script>标签的方法数
255 |     '''
256 |     match_script = re.compile(r'<script>[\s\S]+</script>')
257 |     script_list = re.findall(match_script, html)
258 |     '''代码报错，数组越界
259 |     script = script_list[0]
260 |     '''
261 |     match_functions = re.compile(r'function')
262 |     functions_list = re.findall(match_functions, script)
263 |     return len(functions_list)
264 | 
265 | 
266 | def get_pop_num(html):
267 |     '''
268 |     获取页面html中pop()方法数
269 |     '''
270 |     match = re.compile(r'pop\([\s\S]*\)', re.I)
271 |     pop_list = re.findall(match, html)
272 |     if len(pop_list) >0:
273 |         return 1
274 |     else:
275 |         return 0
276 | 
277 | 
278 | def get_exec_num(html):
279 |     '''
280 |     获取页面html中的exec()方法数
281 |     '''
282 |     match = re.compile(r'exec\([\s\S]*\)', re.I)
283 |     exec_list = re.findall(match, html)
284 |     if len(exec_list) > 0:
285 |         return 1
286 |     else:
287 |         return 0
288 | 
289 | 
290 | def get_Dispatchevent_num(html):
291 |     '''
292 |     获取页面html中DispatchEvent()方法数
293 |     '''
294 |     match = re.compile(r'dispatchevent\([\s\S]*\)', re.I)
295 |     dispatchevent_list = re.findall(match, html)
296 |     return len(dispatchevent_list)
297 |     # 极少数为0
298 | 
299 | 
300 | def get_Eval_num(html):
301 |     '''
302 |     获取页面html中Eval()方法数
303 |     '''
304 |     match = re.compile(r'Eval\([\s\S]*\)', re.I)
305 |     eval_list = re.findall(match, html)
306 |     if len(eval_list) > 0:
307 |         return 1
308 |     else:
309 |         return 0
310 | 
311 | 
312 | def get_attachevent_num(html):
313 |     '''
314 |     获取页面html中attachEvent()方法数
315 |     '''
316 |     match = re.compile(r'attachevent\([\s\S]*\)', re.I)
317 |     attachevent_list = re.findall(match, html)
318 |     if len(attachevent_list) > 0:
319 |         return 1
320 |     else:
321 |         return 0
322 | 
323 | 
324 | def get_formcharcode_num(html):
325 |     '''
326 |     获取页面html中formcharcode()方法数
327 |     '''
328 |     match = re.compile(r'formcharcode\([\s\S]*\)', re.I)
329 |     formcharcode_list = re.findall(match, html)
330 |     return len(formcharcode_list)
331 |     #测试文件均为0
332 | 
333 | 
334 | def extract_html_feature(html_path):
335 |     '''
336 |     读取html文件，并抽取title分词后名词、正文关键词、ICP号, 均为字符串格式, 
337 |     关键词由'/'分隔
338 |     '''
339 |     html = utf8_open_file(html_path)
340 |     title_keyword = get_title_cut(html)
341 |     text_keyword = get_html_keyword(html)
342 |     ICP = get_ICP(html)  # 抽取html中ICP
343 |     return title_keyword, text_keyword, ICP
344 |     #这个函数有两个函数没有
345 |     
346 | 
347 | def get_html_vector(Html):
348 |     heuristic_vector = []
349 |     soup = BeautifulSoup(Html, "lxml")
350 |     if soup != None:
351 |         heuristic_vector.append(get_html_length(Html))
352 |         heuristic_vector.append(get_div_num(Html))
353 |         heuristic_vector.append(get_embed_num(Html))
354 |         heuristic_vector.append(get_iframe_num(Html))
355 |         heuristic_vector.append(get_applet_num(Html))
356 |         heuristic_vector.append(get_frame_num(Html))
357 |         heuristic_vector.append(get_form_get_num(Html))
358 |         heuristic_vector.append(get_form_post_num(Html))
359 |         heuristic_vector.append(get_js_long(Html))
360 |         heuristic_vector.append(get_divonClick_num(Html))
361 |         heuristic_vector.extend(get_headtitlebody_num(Html))
362 |         heuristic_vector.append(get_input_num(Html))
363 |         heuristic_vector.append(get_form_num(Html))
364 |         heuristic_vector.append(get_SetIntervel_num(Html))
365 |         heuristic_vector.append(get_SetTimeout_num(Html))
366 |         heuristic_vector.append(get_onload_num(Html))
367 |         heuristic_vector.append(get_onerror_num(Html))
368 |         heuristic_vector.append(get_form_get_num(Html))
369 |         heuristic_vector.append(get_form_post_num(Html))
370 |         heuristic_vector.append(get_pop_num(Html))
371 |         heuristic_vector.append(get_exec_num(Html))
372 |         heuristic_vector.append(get_Dispatchevent_num(Html))
373 |         heuristic_vector.append(get_Eval_num(Html))
374 |         heuristic_vector.append(get_attachevent_num(Html))
375 |         heuristic_vector.append(get_formcharcode_num(Html))
376 |         heuristic_vector.append(get_form_get_num(Html))
377 |         heuristic_vector.append(get_form_post_num(Html))
378 |     return heuristic_vector
379 | 
380 | if __name__ == '__main__':
381 |     html_baidu = urllib2.urlopen('http://www.baidu.com').read()
382 |     baidu_vector = []
383 |     baidu_vector = get_html_vector(html_baidu)
384 |     print baidu_vector
385 |     '''
386 |     [111666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
387 |     '''
388 | 


--------------------------------------------------------------------------------
/README.html:
--------------------------------------------------------------------------------
   1 | <!DOCTYPE html><html><head><meta charset="utf-8"><style>body {
   2 |   max-width: 980px;
   3 |   border: 1px solid #ddd;
   4 |   outline: 1300px solid #fff;
   5 |   margin: 16px auto;
   6 | }
   7 | 
   8 | body .markdown-body
   9 | {
  10 |   padding: 45px;
  11 | }
  12 | 
  13 | @font-face {
  14 |   font-family: fontawesome-mini;
  15 |   src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAzUABAAAAAAFNgAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABGRlRNAAABbAAAABwAAAAcZMzaOEdERUYAAAGIAAAAHQAAACAAOQAET1MvMgAAAagAAAA+AAAAYHqhde9jbWFwAAAB6AAAAFIAAAFa4azkLWN2dCAAAAI8AAAAKAAAACgFgwioZnBnbQAAAmQAAAGxAAACZVO0L6dnYXNwAAAEGAAAAAgAAAAIAAAAEGdseWYAAAQgAAAFDgAACMz7eroHaGVhZAAACTAAAAAwAAAANgWEOEloaGVhAAAJYAAAAB0AAAAkDGEGa2htdHgAAAmAAAAAEwAAADBEgAAQbG9jYQAACZQAAAAaAAAAGgsICJBtYXhwAAAJsAAAACAAAAAgASgBD25hbWUAAAnQAAACZwAABOD4no+3cG9zdAAADDgAAABsAAAAmF+yXM9wcmVwAAAMpAAAAC4AAAAusPIrFAAAAAEAAAAAyYlvMQAAAADLVHQgAAAAAM/u9uZ4nGNgZGBg4ANiCQYQYGJgBEJuIGYB8xgABMMAPgAAAHicY2Bm42OcwMDKwMLSw2LMwMDQBqGZihmiwHycoKCyqJjB4YPDh4NsDP+BfNb3DIuAFCOSEgUGRgAKDgt4AAB4nGNgYGBmgGAZBkYGEAgB8hjBfBYGCyDNxcDBwMTA9MHhQ9SHrA8H//9nYACyQyFs/sP86/kX8HtB9UIBIxsDXICRCUgwMaACRoZhDwA3fxKSAAAAAAHyAHABJQB/AIEAdAFGAOsBIwC/ALgAxACGAGYAugBNACcA/wCIeJxdUbtOW0EQ3Q0PA4HE2CA52hSzmZDGe6EFCcTVjWJkO4XlCGk3cpGLcQEfQIFEDdqvGaChpEibBiEXSHxCPiESM2uIojQ7O7NzzpkzS8qRqnfpa89T5ySQwt0GzTb9Tki1swD3pOvrjYy0gwdabGb0ynX7/gsGm9GUO2oA5T1vKQ8ZTTuBWrSn/tH8Cob7/B/zOxi0NNP01DoJ6SEE5ptxS4PvGc26yw/6gtXhYjAwpJim4i4/plL+tzTnasuwtZHRvIMzEfnJNEBTa20Emv7UIdXzcRRLkMumsTaYmLL+JBPBhcl0VVO1zPjawV2ys+hggyrNgQfYw1Z5DB4ODyYU0rckyiwNEfZiq8QIEZMcCjnl3Mn+pED5SBLGvElKO+OGtQbGkdfAoDZPs/88m01tbx3C+FkcwXe/GUs6+MiG2hgRYjtiKYAJREJGVfmGGs+9LAbkUvvPQJSA5fGPf50ItO7YRDyXtXUOMVYIen7b3PLLirtWuc6LQndvqmqo0inN+17OvscDnh4Lw0FjwZvP+/5Kgfo8LK40aA4EQ3o3ev+iteqIq7wXPrIn07+xWgAAAAABAAH//wAPeJyFlctvG1UUh+/12DPN1B7P3JnYjj2Ox4/MuDHxJH5N3UdaEUQLqBIkfQQioJWQ6AMEQkIqsPGCPwA1otuWSmTBhjtps2ADWbJg3EpIXbGouqSbCraJw7kzNo2dRN1cnXN1ZvT7zuuiMEI7ncizyA0URofRBJpCdbQuIFShYY+GZRrxMDVtih5TwQPHtXDFFSIKoWIbuREBjLH27Ny4MsbVx+uOJThavebgVrNRLAiYx06rXsvhxLgWx9xpfHdrs/ekc2Pl2cpPCVEITQpwbj8VQhfXSq2m+Wxqaq2D73Kne5e3NjHqQNj3CRYlJlgUl/jRNP+2Gs2pNYRQiOnmUaQDqm30KqKiTTWPWjboxnTWpvgxjXo0KrtZXAHt7hwIz0YVcj88JnKlJKi3NPAwLyDwZudSmJSMMJFDYaOkaol6XtESx3Gt1VTytdZJ3DCLeaVhVnCBH1fycHTxFXwPX+l2e3d6H/TufGGmMTLTnbSJUdo00zuBswMO/nl3YLeL/wnu9/limCuD3vC54h5NBVz6Li414AI8Vx3iiosKcQXUbrvhFFiYb++HN4DaF4XzFW0fIN4XDWJ3a3XQoq9V8WiyRmdsatV9xUcHims1JloH0YUa090G3Tro3mC6c01f+YwCPquINr1PTaCP6rVTOOmf0GE2dBc7zWIhji3/5MchSuBHgDbU99RMWt3YUNMZMJmx92YP6NsHx/5/M1yvInpnkIOM3Z8fA3JQ2lW1RFC1KaBPDFXNAHYYvGy73aYZZZ3HifbeuiVZCpwA3oQBs0wGPYJbJfg60xrKEbKiNtTe1adwrpBRwlAuQ3q3VRaX0QmQ9a49BTSCuF1MLfQ6+tinOubRBZuWPNoMevGMT+V41KitO1is3D/tpMcq1JHZqDHGs8DoYGDkxJgKjHROeTCmhZvzPm9pod+ltKm4PN7Dyvvldlpsg8D+4AUJZ3F/JBstZz7cbFRxsaAGV6yX/dkcycWf8eS3QlQea+YLjdm3yrOnrhFpUyKVvFE4lpv4bO3Svx/6F/4xmiDu/RT5iI++lko18mY1oX+5UGKR6kmVjM/Zb76yfHtxy+h/SyQ0lLdpdKy/lWB6szatetQJ8nZ80A2Qt6ift6gJeavU3BO4gtxs/KCtNPVibCtYCWY3SIlSBPKXZALXiIR9oZeJ1AuMyxLpHIy/yO7vSiSE+kZvk0ihJ30HgHfzZtEMmvV58x6dtqns0XTAW7Vdm4HJ04OCp/crOO7rd9SGxQAE/mVA9xRN+kVSMRFF6S9JFGUtthkjBA5tFCWc2l4V43Ex9GmUP3SI37Jjmir9KqlaDJ4S4JB3vuM/jzyH1+8MuoZ+QGzfnvPoJb96cZlWjMcKLfgDwB7E634JTY+asjsPzS5CiVnEWY+KsrsIN5rn3mAPjqmQBxGjcGKB9f9ZxY3mYC2L85CJ2FXIxKKyHk+dg0FHbuEc7D5NzWUX32WxFcWNGRAbvwSx0RmIXVDuYySafluQBmzA/ssqJAMLnli+WIC90Gw4lm85wcp0qjArEDPJJV/sSx4P9ungTpgMw5gVC1XO4uULq0s3v1rqLi0vX/z65vlH50f8T/RHmSPTk5xxWBWOluMT6WiOy+tdvWxlV/XQb3o3c6Ssr+r6I708GsX9/nzp1tKFh0s3v7m4vAy/Hnb/KMOvc1wump6Il48K6mGDy02X9Yd65pa+nQIjk76lWxCkG8NBCP0HQS9IpAAAeJxjYGRgYGBhcCrq214Qz2/zlUGenQEEzr/77oug/zewFbB+AHI5GJhAogBwKQ0qeJxjYGRgYH3/P46BgZ0BBNgKGBgZUAEPAE/7At0AAAB4nGNngAB2IGYjhBsYBAAIYADVAAAAAAAAAAAAAFwAyAEeAaACCgKmAx4DggRmAAAAAQAAAAwAagAEAAAAAAACAAEAAgAWAAABAAChAAAAAHiclZI7bxQxFIWPd/JkUYQChEhIyAVKgdBMskm1QkKrRETpQiLRUczueB/K7HhlOxttg8LvoKPgP9DxFxANDR0tHRWi4NjrPIBEgh1p/dm+vufcawNYFWsQmP6e4jSyQB2fI9cwj++RE9wTjyPP4LYoI89iWbyLPIe6+Bh5Hs9rryMv4GbtW+RF3EhuRa7jbrIbeQkPkjdUETOLnL0Kip4FVvAhco1RXyMnSPEz8gzWxE7kWTwUp5HnsCLeR57HW/El8gJWa58iL+JO7UfkOh4l9yMv4UnyEtvQGGECgwF66MNBooF1bGCL1ELB/TYU+ZBRlvsKQ44Se6jQ4a7hef+fh72Crv25kp+8lNWGmeKoOI5jJLb1aGIGvb6TjfWNLdkqdFvJw4l1amjlXtXRZqRN7lSRylZZyhBqpVFWmTEXgWfUrpi/hZOQXdOd4rKuXOtEWT3k5IArPRzTUU5tHKjecZkTpnVbNOnt6jzN8240GD4xtikvZW56043rPMg/dS+dlOceXoR+WPbJ55Dsekq1lJpnypsMUsYOdCW30o103Ytu/lvh+5RWFLfBjm9/N8hJntPhvx92rnoE/kyHdGasGy754kw36vsVf/lFeBi+0COu+cfgQr42G3CRpeLoZ53gmfe3X6rcKt5oVxnptHR9JS8ehVUd5wvvahN2uqxOOpMXapibI5k7Zwbt4xBSaTfoKBufhAnO/uqNcfK8OTs0OQ6l7JIqFjDhYj5WcjevCnI/1DDiI8j4ndWb/5YzDZWh79yomWXeXj7Nnw70/2TIeFPTrlSh89k1ObOSRVZWZfgF0r/zJQB4nG2JUQuCQBCEd07TTg36fb2IyBaLd3vWaUh/vmSJnvpgmG8YcmS8X3Shf3R7QA4OBUocUKHGER5NNbOOEvwc1txnuWkTRb/aPjimJ5vXabI+3VfOiyS15UWvyezM2xiGOPyuMohOH8O8JiO4Af+FsAGNAEuwCFBYsQEBjlmxRgYrWCGwEFlLsBRSWCGwgFkdsAYrXFhZsBQrAAA=) format('woff');
  16 | }
  17 | 
  18 | @font-face {
  19 |   font-family: octicons-anchor;
  20 |   src: url(data:font/woff;charset=utf-8;base64,d09GRgABAAAAAAYcAA0AAAAACjQAAQAAAAAAAAAAAAAAAAAAAAAAAAAAAABGRlRNAAABMAAAABwAAAAca8vGTk9TLzIAAAFMAAAARAAAAFZG1VHVY21hcAAAAZAAAAA+AAABQgAP9AdjdnQgAAAB0AAAAAQAAAAEACICiGdhc3AAAAHUAAAACAAAAAj//wADZ2x5ZgAAAdwAAADRAAABEKyikaNoZWFkAAACsAAAAC0AAAA2AtXoA2hoZWEAAALgAAAAHAAAACQHngNFaG10eAAAAvwAAAAQAAAAEAwAACJsb2NhAAADDAAAAAoAAAAKALIAVG1heHAAAAMYAAAAHwAAACABEAB2bmFtZQAAAzgAAALBAAAFu3I9x/Nwb3N0AAAF/AAAAB0AAAAvaoFvbwAAAAEAAAAAzBdyYwAAAADP2IQvAAAAAM/bz7t4nGNgZGFgnMDAysDB1Ml0hoGBoR9CM75mMGLkYGBgYmBlZsAKAtJcUxgcPsR8iGF2+O/AEMPsznAYKMwIkgMA5REMOXicY2BgYGaAYBkGRgYQsAHyGMF8FgYFIM0ChED+h5j//yEk/3KoSgZGNgYYk4GRCUgwMaACRoZhDwCs7QgGAAAAIgKIAAAAAf//AAJ4nHWMMQrCQBBF/0zWrCCIKUQsTDCL2EXMohYGSSmorScInsRGL2DOYJe0Ntp7BK+gJ1BxF1stZvjz/v8DRghQzEc4kIgKwiAppcA9LtzKLSkdNhKFY3HF4lK69ExKslx7Xa+vPRVS43G98vG1DnkDMIBUgFN0MDXflU8tbaZOUkXUH0+U27RoRpOIyCKjbMCVejwypzJJG4jIwb43rfl6wbwanocrJm9XFYfskuVC5K/TPyczNU7b84CXcbxks1Un6H6tLH9vf2LRnn8Ax7A5WQAAAHicY2BkYGAA4teL1+yI57f5ysDNwgAC529f0kOmWRiYVgEpDgYmEA8AUzEKsQAAAHicY2BkYGB2+O/AEMPCAAJAkpEBFbAAADgKAe0EAAAiAAAAAAQAAAAEAAAAAAAAKgAqACoAiAAAeJxjYGRgYGBhsGFgYgABEMkFhAwM/xn0QAIAD6YBhwB4nI1Ty07cMBS9QwKlQapQW3VXySvEqDCZGbGaHULiIQ1FKgjWMxknMfLEke2A+IJu+wntrt/QbVf9gG75jK577Lg8K1qQPCfnnnt8fX1NRC/pmjrk/zprC+8D7tBy9DHgBXoWfQ44Av8t4Bj4Z8CLtBL9CniJluPXASf0Lm4CXqFX8Q84dOLnMB17N4c7tBo1AS/Qi+hTwBH4rwHHwN8DXqQ30XXAS7QaLwSc0Gn8NuAVWou/gFmnjLrEaEh9GmDdDGgL3B4JsrRPDU2hTOiMSuJUIdKQQayiAth69r6akSSFqIJuA19TrzCIaY8sIoxyrNIrL//pw7A2iMygkX5vDj+G+kuoLdX4GlGK/8Lnlz6/h9MpmoO9rafrz7ILXEHHaAx95s9lsI7AHNMBWEZHULnfAXwG9/ZqdzLI08iuwRloXE8kfhXYAvE23+23DU3t626rbs8/8adv+9DWknsHp3E17oCf+Z48rvEQNZ78paYM38qfk3v/u3l3u3GXN2Dmvmvpf1Srwk3pB/VSsp512bA/GG5i2WJ7wu430yQ5K3nFGiOqgtmSB5pJVSizwaacmUZzZhXLlZTq8qGGFY2YcSkqbth6aW1tRmlaCFs2016m5qn36SbJrqosG4uMV4aP2PHBmB3tjtmgN2izkGQyLWprekbIntJFing32a5rKWCN/SdSoga45EJykyQ7asZvHQ8PTm6cslIpwyeyjbVltNikc2HTR7YKh9LBl9DADC0U/jLcBZDKrMhUBfQBvXRzLtFtjU9eNHKin0x5InTqb8lNpfKv1s1xHzTXRqgKzek/mb7nB8RZTCDhGEX3kK/8Q75AmUM/eLkfA+0Hi908Kx4eNsMgudg5GLdRD7a84npi+YxNr5i5KIbW5izXas7cHXIMAau1OueZhfj+cOcP3P8MNIWLyYOBuxL6DRylJ4cAAAB4nGNgYoAALjDJyIAOWMCiTIxMLDmZedkABtIBygAAAA==) format('woff');
  21 | }
  22 | 
  23 | .markdown-body {
  24 |   font-family: sans-serif;
  25 |   -ms-text-size-adjust: 100%;
  26 |   -webkit-text-size-adjust: 100%;
  27 |   color: #333333;
  28 |   overflow: hidden;
  29 |   font-family: "Helvetica Neue", Helvetica, "Segoe UI", Arial, freesans, sans-serif;
  30 |   font-size: 16px;
  31 |   line-height: 1.6;
  32 |   word-wrap: break-word;
  33 | }
  34 | 
  35 | .markdown-body a {
  36 |   background: transparent;
  37 | }
  38 | 
  39 | .markdown-body a:active,
  40 | .markdown-body a:hover {
  41 |   outline: 0;
  42 | }
  43 | 
  44 | .markdown-body b,
  45 | .markdown-body strong {
  46 |   font-weight: bold;
  47 | }
  48 | 
  49 | .markdown-body mark {
  50 |   background: #ff0;
  51 |   color: #000;
  52 |   font-style: italic;
  53 |   font-weight: bold;
  54 | }
  55 | 
  56 | .markdown-body sub,
  57 | .markdown-body sup {
  58 |   font-size: 75%;
  59 |   line-height: 0;
  60 |   position: relative;
  61 |   vertical-align: baseline;
  62 | }
  63 | .markdown-body sup {
  64 |   top: -0.5em;
  65 | }
  66 | .markdown-body sub {
  67 |   bottom: -0.25em;
  68 | }
  69 | 
  70 | .markdown-body h1 {
  71 |   font-size: 2em;
  72 |   margin: 0.67em 0;
  73 | }
  74 | 
  75 | .markdown-body img {
  76 |   border: 0;
  77 | }
  78 | 
  79 | .markdown-body hr {
  80 |   -moz-box-sizing: content-box;
  81 |   box-sizing: content-box;
  82 |   height: 0;
  83 | }
  84 | 
  85 | .markdown-body pre {
  86 |   overflow: auto;
  87 | }
  88 | 
  89 | .markdown-body code,
  90 | .markdown-body kbd,
  91 | .markdown-body pre,
  92 | .markdown-body samp {
  93 |   font-family: monospace, monospace;
  94 |   font-size: 1em;
  95 | }
  96 | 
  97 | .markdown-body input {
  98 |   color: inherit;
  99 |   font: inherit;
 100 |   margin: 0;
 101 | }
 102 | 
 103 | .markdown-body html input[disabled] {
 104 |   cursor: default;
 105 | }
 106 | 
 107 | .markdown-body input {
 108 |   line-height: normal;
 109 | }
 110 | 
 111 | .markdown-body input[type="checkbox"] {
 112 |   box-sizing: border-box;
 113 |   padding: 0;
 114 | }
 115 | 
 116 | .markdown-body table {
 117 |   border-collapse: collapse;
 118 |   border-spacing: 0;
 119 | }
 120 | 
 121 | .markdown-body td,
 122 | .markdown-body th {
 123 |   padding: 0;
 124 | }
 125 | 
 126 | .markdown-body .codehilitetable {
 127 |   border: 0;
 128 |   border-spacing: 0;
 129 | }
 130 | 
 131 | .markdown-body .codehilitetable tr {
 132 |   border: 0;
 133 | }
 134 | 
 135 | .markdown-body .codehilitetable pre,
 136 | .markdown-body .codehilitetable div.codehilite {
 137 |   margin: 0;
 138 | }
 139 | 
 140 | .markdown-body .linenos,
 141 | .markdown-body .code,
 142 | .markdown-body .codehilitetable td {
 143 |   border: 0;
 144 |   padding: 0;
 145 | }
 146 | 
 147 | .markdown-body td:not(.linenos) .linenodiv {
 148 |   padding: 0 !important;
 149 | }
 150 | 
 151 | .markdown-body .code {
 152 |   width: 100%;
 153 | }
 154 | 
 155 | .markdown-body .linenos div pre,
 156 | .markdown-body .linenodiv pre,
 157 | .markdown-body .linenodiv {
 158 |   border: 0;
 159 |   -webkit-border-radius: 0;
 160 |   -moz-border-radius: 0;
 161 |   border-radius: 0;
 162 |   -webkit-border-top-left-radius: 3px;
 163 |   -webkit-border-bottom-left-radius: 3px;
 164 |   -moz-border-radius-topleft: 3px;
 165 |   -moz-border-radius-bottomleft: 3px;
 166 |   border-top-left-radius: 3px;
 167 |   border-bottom-left-radius: 3px;
 168 | }
 169 | 
 170 | .markdown-body .code div pre,
 171 | .markdown-body .code div {
 172 |   border: 0;
 173 |   -webkit-border-radius: 0;
 174 |   -moz-border-radius: 0;
 175 |   border-radius: 0;
 176 |   -webkit-border-top-right-radius: 3px;
 177 |   -webkit-border-bottom-right-radius: 3px;
 178 |   -moz-border-radius-topright: 3px;
 179 |   -moz-border-radius-bottomright: 3px;
 180 |   border-top-right-radius: 3px;
 181 |   border-bottom-right-radius: 3px;
 182 | }
 183 | 
 184 | .markdown-body * {
 185 |   -moz-box-sizing: border-box;
 186 |   box-sizing: border-box;
 187 | }
 188 | 
 189 | .markdown-body input {
 190 |   font: 13px Helvetica, arial, freesans, clean, sans-serif, "Segoe UI Emoji", "Segoe UI Symbol";
 191 |   line-height: 1.4;
 192 | }
 193 | 
 194 | .markdown-body a {
 195 |   color: #4183c4;
 196 |   text-decoration: none;
 197 | }
 198 | 
 199 | .markdown-body a:hover,
 200 | .markdown-body a:focus,
 201 | .markdown-body a:active {
 202 |   text-decoration: underline;
 203 | }
 204 | 
 205 | .markdown-body hr {
 206 |   height: 0;
 207 |   margin: 15px 0;
 208 |   overflow: hidden;
 209 |   background: transparent;
 210 |   border: 0;
 211 |   border-bottom: 1px solid #ddd;
 212 | }
 213 | 
 214 | .markdown-body hr:before,
 215 | .markdown-body hr:after {
 216 |   display: table;
 217 |   content: " ";
 218 | }
 219 | 
 220 | .markdown-body hr:after {
 221 |   clear: both;
 222 | }
 223 | 
 224 | .markdown-body h1,
 225 | .markdown-body h2,
 226 | .markdown-body h3,
 227 | .markdown-body h4,
 228 | .markdown-body h5,
 229 | .markdown-body h6 {
 230 |   margin-top: 15px;
 231 |   margin-bottom: 15px;
 232 |   line-height: 1.1;
 233 | }
 234 | 
 235 | .markdown-body h1 {
 236 |   font-size: 30px;
 237 | }
 238 | 
 239 | .markdown-body h2 {
 240 |   font-size: 21px;
 241 | }
 242 | 
 243 | .markdown-body h3 {
 244 |   font-size: 16px;
 245 | }
 246 | 
 247 | .markdown-body h4 {
 248 |   font-size: 14px;
 249 | }
 250 | 
 251 | .markdown-body h5 {
 252 |   font-size: 12px;
 253 | }
 254 | 
 255 | .markdown-body h6 {
 256 |   font-size: 11px;
 257 | }
 258 | 
 259 | .markdown-body blockquote {
 260 |   margin: 0;
 261 | }
 262 | 
 263 | .markdown-body ul,
 264 | .markdown-body ol {
 265 |   padding: 0;
 266 |   margin-top: 0;
 267 |   margin-bottom: 0;
 268 | }
 269 | 
 270 | .markdown-body ol ol,
 271 | .markdown-body ul ol {
 272 |   list-style-type: lower-roman;
 273 | }
 274 | 
 275 | .markdown-body ul ul ol,
 276 | .markdown-body ul ol ol,
 277 | .markdown-body ol ul ol,
 278 | .markdown-body ol ol ol {
 279 |   list-style-type: lower-alpha;
 280 | }
 281 | 
 282 | .markdown-body dd {
 283 |   margin-left: 0;
 284 | }
 285 | 
 286 | .markdown-body code,
 287 | .markdown-body pre,
 288 | .markdown-body samp {
 289 |   font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
 290 |   font-size: 12px;
 291 | }
 292 | 
 293 | .markdown-body pre {
 294 |   margin-top: 0;
 295 |   margin-bottom: 0;
 296 | }
 297 | 
 298 | .markdown-body kbd {
 299 |   background-color: #e7e7e7;
 300 |   background-image: -moz-linear-gradient(#fefefe, #e7e7e7);
 301 |   background-image: -webkit-linear-gradient(#fefefe, #e7e7e7);
 302 |   background-image: linear-gradient(#fefefe, #e7e7e7);
 303 |   background-repeat: repeat-x;
 304 |   border-radius: 2px;
 305 |   border: 1px solid #cfcfcf;
 306 |   color: #000;
 307 |   padding: 3px 5px;
 308 |   line-height: 10px;
 309 |   font: 11px Consolas, "Liberation Mono", Menlo, Courier, monospace;
 310 |   display: inline-block;
 311 | }
 312 | 
 313 | .markdown-body>*:first-child {
 314 |   margin-top: 0 !important;
 315 | }
 316 | 
 317 | .markdown-body>*:last-child {
 318 |   margin-bottom: 0 !important;
 319 | }
 320 | 
 321 | .markdown-body .headeranchor-link {
 322 |   position: absolute;
 323 |   top: 0;
 324 |   bottom: 0;
 325 |   left: 0;
 326 |   display: block;
 327 |   padding-right: 6px;
 328 |   padding-left: 30px;
 329 |   margin-left: -30px;
 330 | }
 331 | 
 332 | .markdown-body .headeranchor-link:focus {
 333 |   outline: none;
 334 | }
 335 | 
 336 | .markdown-body h1,
 337 | .markdown-body h2,
 338 | .markdown-body h3,
 339 | .markdown-body h4,
 340 | .markdown-body h5,
 341 | .markdown-body h6 {
 342 |   position: relative;
 343 |   margin-top: 1em;
 344 |   margin-bottom: 16px;
 345 |   font-weight: bold;
 346 |   line-height: 1.4;
 347 | }
 348 | 
 349 | .markdown-body h1 .headeranchor,
 350 | .markdown-body h2 .headeranchor,
 351 | .markdown-body h3 .headeranchor,
 352 | .markdown-body h4 .headeranchor,
 353 | .markdown-body h5 .headeranchor,
 354 | .markdown-body h6 .headeranchor {
 355 |   display: none;
 356 |   color: #000;
 357 |   vertical-align: middle;
 358 | }
 359 | 
 360 | .markdown-body h1:hover .headeranchor-link,
 361 | .markdown-body h2:hover .headeranchor-link,
 362 | .markdown-body h3:hover .headeranchor-link,
 363 | .markdown-body h4:hover .headeranchor-link,
 364 | .markdown-body h5:hover .headeranchor-link,
 365 | .markdown-body h6:hover .headeranchor-link {
 366 |   height: 1em;
 367 |   padding-left: 8px;
 368 |   margin-left: -30px;
 369 |   line-height: 1;
 370 |   text-decoration: none;
 371 | }
 372 | 
 373 | .markdown-body h1:hover .headeranchor-link .headeranchor,
 374 | .markdown-body h2:hover .headeranchor-link .headeranchor,
 375 | .markdown-body h3:hover .headeranchor-link .headeranchor,
 376 | .markdown-body h4:hover .headeranchor-link .headeranchor,
 377 | .markdown-body h5:hover .headeranchor-link .headeranchor,
 378 | .markdown-body h6:hover .headeranchor-link .headeranchor {
 379 |   display: inline-block;
 380 | }
 381 | 
 382 | .markdown-body h1 {
 383 |   padding-bottom: 0.3em;
 384 |   font-size: 2.25em;
 385 |   line-height: 1.2;
 386 |   border-bottom: 1px solid #eee;
 387 | }
 388 | 
 389 | .markdown-body h2 {
 390 |   padding-bottom: 0.3em;
 391 |   font-size: 1.75em;
 392 |   line-height: 1.225;
 393 |   border-bottom: 1px solid #eee;
 394 | }
 395 | 
 396 | .markdown-body h3 {
 397 |   font-size: 1.5em;
 398 |   line-height: 1.43;
 399 | }
 400 | 
 401 | .markdown-body h4 {
 402 |   font-size: 1.25em;
 403 | }
 404 | 
 405 | .markdown-body h5 {
 406 |   font-size: 1em;
 407 | }
 408 | 
 409 | .markdown-body h6 {
 410 |   font-size: 1em;
 411 |   color: #777;
 412 | }
 413 | 
 414 | .markdown-body p,
 415 | .markdown-body blockquote,
 416 | .markdown-body ul,
 417 | .markdown-body ol,
 418 | .markdown-body dl,
 419 | .markdown-body table,
 420 | .markdown-body pre,
 421 | .markdown-body .admonition {
 422 |   margin-top: 0;
 423 |   margin-bottom: 16px;
 424 | }
 425 | 
 426 | .markdown-body hr {
 427 |   height: 4px;
 428 |   padding: 0;
 429 |   margin: 16px 0;
 430 |   background-color: #e7e7e7;
 431 |   border: 0 none;
 432 | }
 433 | 
 434 | .markdown-body ul,
 435 | .markdown-body ol {
 436 |   padding-left: 2em;
 437 | }
 438 | 
 439 | .markdown-body ul ul,
 440 | .markdown-body ul ol,
 441 | .markdown-body ol ol,
 442 | .markdown-body ol ul {
 443 |   margin-top: 0;
 444 |   margin-bottom: 0;
 445 | }
 446 | 
 447 | .markdown-body li>p {
 448 |   margin-top: 16px;
 449 | }
 450 | 
 451 | .markdown-body dl {
 452 |   padding: 0;
 453 | }
 454 | 
 455 | .markdown-body dl dt {
 456 |   padding: 0;
 457 |   margin-top: 16px;
 458 |   font-size: 1em;
 459 |   font-style: italic;
 460 |   font-weight: bold;
 461 | }
 462 | 
 463 | .markdown-body dl dd {
 464 |   padding: 0 16px;
 465 |   margin-bottom: 16px;
 466 | }
 467 | 
 468 | .markdown-body blockquote {
 469 |   padding: 0 15px;
 470 |   color: #777;
 471 |   border-left: 4px solid #ddd;
 472 | }
 473 | 
 474 | .markdown-body blockquote>:first-child {
 475 |   margin-top: 0;
 476 | }
 477 | 
 478 | .markdown-body blockquote>:last-child {
 479 |   margin-bottom: 0;
 480 | }
 481 | 
 482 | .markdown-body table {
 483 |   display: block;
 484 |   width: 100%;
 485 |   overflow: auto;
 486 |   word-break: normal;
 487 |   word-break: keep-all;
 488 | }
 489 | 
 490 | .markdown-body table th {
 491 |   font-weight: bold;
 492 | }
 493 | 
 494 | .markdown-body table th,
 495 | .markdown-body table td {
 496 |   padding: 6px 13px;
 497 |   border: 1px solid #ddd;
 498 | }
 499 | 
 500 | .markdown-body table tr {
 501 |   background-color: #fff;
 502 |   border-top: 1px solid #ccc;
 503 | }
 504 | 
 505 | .markdown-body table tr:nth-child(2n) {
 506 |   background-color: #f8f8f8;
 507 | }
 508 | 
 509 | .markdown-body img {
 510 |   max-width: 100%;
 511 |   -moz-box-sizing: border-box;
 512 |   box-sizing: border-box;
 513 | }
 514 | 
 515 | .markdown-body code,
 516 | .markdown-body samp {
 517 |   padding: 0;
 518 |   padding-top: 0.2em;
 519 |   padding-bottom: 0.2em;
 520 |   margin: 0;
 521 |   font-size: 85%;
 522 |   background-color: rgba(0,0,0,0.04);
 523 |   border-radius: 3px;
 524 | }
 525 | 
 526 | .markdown-body code:before,
 527 | .markdown-body code:after {
 528 |   letter-spacing: -0.2em;
 529 |   content: "\00a0";
 530 | }
 531 | 
 532 | .markdown-body pre>code {
 533 |   padding: 0;
 534 |   margin: 0;
 535 |   font-size: 100%;
 536 |   word-break: normal;
 537 |   white-space: pre;
 538 |   background: transparent;
 539 |   border: 0;
 540 | }
 541 | 
 542 | .markdown-body .codehilite {
 543 |   margin-bottom: 16px;
 544 | }
 545 | 
 546 | .markdown-body .codehilite pre,
 547 | .markdown-body pre {
 548 |   padding: 16px;
 549 |   overflow: auto;
 550 |   font-size: 85%;
 551 |   line-height: 1.45;
 552 |   background-color: #f7f7f7;
 553 |   border-radius: 3px;
 554 | }
 555 | 
 556 | .markdown-body .codehilite pre {
 557 |   margin-bottom: 0;
 558 |   word-break: normal;
 559 | }
 560 | 
 561 | .markdown-body pre {
 562 |   word-wrap: normal;
 563 | }
 564 | 
 565 | .markdown-body pre code {
 566 |   display: inline;
 567 |   max-width: initial;
 568 |   padding: 0;
 569 |   margin: 0;
 570 |   overflow: initial;
 571 |   line-height: inherit;
 572 |   word-wrap: normal;
 573 |   background-color: transparent;
 574 |   border: 0;
 575 | }
 576 | 
 577 | .markdown-body pre code:before,
 578 | .markdown-body pre code:after {
 579 |   content: normal;
 580 | }
 581 | 
 582 | /* Admonition */
 583 | .markdown-body .admonition {
 584 |   -webkit-border-radius: 3px;
 585 |   -moz-border-radius: 3px;
 586 |   position: relative;
 587 |   border-radius: 3px;
 588 |   border: 1px solid #e0e0e0;
 589 |   border-left: 6px solid #333;
 590 |   padding: 10px 10px 10px 30px;
 591 | }
 592 | 
 593 | .markdown-body .admonition table {
 594 |   color: #333;
 595 | }
 596 | 
 597 | .markdown-body .admonition p {
 598 |   padding: 0;
 599 | }
 600 | 
 601 | .markdown-body .admonition-title {
 602 |   font-weight: bold;
 603 |   margin: 0;
 604 | }
 605 | 
 606 | .markdown-body .admonition>.admonition-title {
 607 |   color: #333;
 608 | }
 609 | 
 610 | .markdown-body .attention>.admonition-title {
 611 |   color: #a6d796;
 612 | }
 613 | 
 614 | .markdown-body .caution>.admonition-title {
 615 |   color: #d7a796;
 616 | }
 617 | 
 618 | .markdown-body .hint>.admonition-title {
 619 |   color: #96c6d7;
 620 | }
 621 | 
 622 | .markdown-body .danger>.admonition-title {
 623 |   color: #c25f77;
 624 | }
 625 | 
 626 | .markdown-body .question>.admonition-title {
 627 |   color: #96a6d7;
 628 | }
 629 | 
 630 | .markdown-body .note>.admonition-title {
 631 |   color: #d7c896;
 632 | }
 633 | 
 634 | .markdown-body .admonition:before,
 635 | .markdown-body .attention:before,
 636 | .markdown-body .caution:before,
 637 | .markdown-body .hint:before,
 638 | .markdown-body .danger:before,
 639 | .markdown-body .question:before,
 640 | .markdown-body .note:before {
 641 |   font: normal normal 16px fontawesome-mini;
 642 |   -moz-osx-font-smoothing: grayscale;
 643 |   -webkit-user-select: none;
 644 |   -moz-user-select: none;
 645 |   -ms-user-select: none;
 646 |   user-select: none;
 647 |   line-height: 1.5;
 648 |   color: #333;
 649 |   position: absolute;
 650 |   left: 0;
 651 |   top: 0;
 652 |   padding-top: 10px;
 653 |   padding-left: 10px;
 654 | }
 655 | 
 656 | .markdown-body .admonition:before {
 657 |   content: "\f056\00a0";
 658 |   color: 333;
 659 | }
 660 | 
 661 | .markdown-body .attention:before {
 662 |   content: "\f058\00a0";
 663 |   color: #a6d796;
 664 | }
 665 | 
 666 | .markdown-body .caution:before {
 667 |   content: "\f06a\00a0";
 668 |   color: #d7a796;
 669 | }
 670 | 
 671 | .markdown-body .hint:before {
 672 |   content: "\f05a\00a0";
 673 |   color: #96c6d7;
 674 | }
 675 | 
 676 | .markdown-body .danger:before {
 677 |   content: "\f057\00a0";
 678 |   color: #c25f77;
 679 | }
 680 | 
 681 | .markdown-body .question:before {
 682 |   content: "\f059\00a0";
 683 |   color: #96a6d7;
 684 | }
 685 | 
 686 | .markdown-body .note:before {
 687 |   content: "\f040\00a0";
 688 |   color: #d7c896;
 689 | }
 690 | 
 691 | .markdown-body .admonition::after {
 692 |   content: normal;
 693 | }
 694 | 
 695 | .markdown-body .attention {
 696 |   border-left: 6px solid #a6d796;
 697 | }
 698 | 
 699 | .markdown-body .caution {
 700 |   border-left: 6px solid #d7a796;
 701 | }
 702 | 
 703 | .markdown-body .hint {
 704 |   border-left: 6px solid #96c6d7;
 705 | }
 706 | 
 707 | .markdown-body .danger {
 708 |   border-left: 6px solid #c25f77;
 709 | }
 710 | 
 711 | .markdown-body .question {
 712 |   border-left: 6px solid #96a6d7;
 713 | }
 714 | 
 715 | .markdown-body .note {
 716 |   border-left: 6px solid #d7c896;
 717 | }
 718 | 
 719 | .markdown-body .admonition>*:first-child {
 720 |   margin-top: 0 !important;
 721 | }
 722 | 
 723 | .markdown-body .admonition>*:last-child {
 724 |   margin-bottom: 0 !important;
 725 | }
 726 | 
 727 | /* progress bar*/
 728 | .markdown-body .progress {
 729 |   display: block;
 730 |   width: 300px;
 731 |   margin: 10px 0;
 732 |   height: 24px;
 733 |   -webkit-border-radius: 3px;
 734 |   -moz-border-radius: 3px;
 735 |   border-radius: 3px;
 736 |   background-color: #ededed;
 737 |   position: relative;
 738 |   box-shadow: inset -1px 1px 3px rgba(0, 0, 0, .1);
 739 | }
 740 | 
 741 | .markdown-body .progress-label {
 742 |   position: absolute;
 743 |   text-align: center;
 744 |   font-weight: bold;
 745 |   width: 100%; margin: 0;
 746 |   line-height: 24px;
 747 |   color: #333;
 748 |   text-shadow: 1px 1px 0 #fefefe, -1px -1px 0 #fefefe, -1px 1px 0 #fefefe, 1px -1px 0 #fefefe, 0 1px 0 #fefefe, 0 -1px 0 #fefefe, 1px 0 0 #fefefe, -1px 0 0 #fefefe, 1px 1px 2px #000;
 749 |   -webkit-font-smoothing: antialiased !important;
 750 |   white-space: nowrap;
 751 |   overflow: hidden;
 752 | }
 753 | 
 754 | .markdown-body .progress-bar {
 755 |   height: 24px;
 756 |   float: left;
 757 |   -webkit-border-radius: 3px;
 758 |   -moz-border-radius: 3px;
 759 |   border-radius: 3px;
 760 |   background-color: #96c6d7;
 761 |   box-shadow: inset 0 1px 0 rgba(255, 255, 255, .5), inset 0 -1px 0 rgba(0, 0, 0, .1);
 762 |   background-size: 30px 30px;
 763 |   background-image: -webkit-linear-gradient(
 764 |     135deg, rgba(255, 255, 255, .4) 27%,
 765 |     transparent 27%,
 766 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 767 |     rgba(255, 255, 255, .4) 77%,
 768 |     transparent 77%, transparent
 769 |   );
 770 |   background-image: -moz-linear-gradient(
 771 |     135deg,
 772 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 773 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 774 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 775 |     transparent
 776 |   );
 777 |   background-image: -ms-linear-gradient(
 778 |     135deg,
 779 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 780 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 781 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 782 |     transparent
 783 |   );
 784 |   background-image: -o-linear-gradient(
 785 |     135deg,
 786 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 787 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 788 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 789 |     transparent
 790 |   );
 791 |   background-image: linear-gradient(
 792 |     135deg,
 793 |     rgba(255, 255, 255, .4) 27%, transparent 27%,
 794 |     transparent 52%, rgba(255, 255, 255, .4) 52%,
 795 |     rgba(255, 255, 255, .4) 77%, transparent 77%,
 796 |     transparent
 797 |   );
 798 | }
 799 | 
 800 | .markdown-body .progress-100plus .progress-bar {
 801 |   background-color: #a6d796;
 802 | }
 803 | 
 804 | .markdown-body .progress-80plus .progress-bar {
 805 |   background-color: #c6d796;
 806 | }
 807 | 
 808 | .markdown-body .progress-60plus .progress-bar {
 809 |   background-color: #d7c896;
 810 | }
 811 | 
 812 | .markdown-body .progress-40plus .progress-bar {
 813 |   background-color: #d7a796;
 814 | }
 815 | 
 816 | .markdown-body .progress-20plus .progress-bar {
 817 |   background-color: #d796a6;
 818 | }
 819 | 
 820 | .markdown-body .progress-0plus .progress-bar {
 821 |   background-color: #c25f77;
 822 | }
 823 | 
 824 | .markdown-body .candystripe-animate .progress-bar{
 825 |   -webkit-animation: animate-stripes 3s linear infinite;
 826 |   -moz-animation: animate-stripes 3s linear infinite;
 827 |   animation: animate-stripes 3s linear infinite;
 828 | }
 829 | 
 830 | @-webkit-keyframes animate-stripes {
 831 |   0% {
 832 |     background-position: 0 0;
 833 |   }
 834 | 
 835 |   100% {
 836 |     background-position: 60px 0;
 837 |   }
 838 | }
 839 | 
 840 | @-moz-keyframes animate-stripes {
 841 |   0% {
 842 |     background-position: 0 0;
 843 |   }
 844 | 
 845 |   100% {
 846 |     background-position: 60px 0;
 847 |   }
 848 | }
 849 | 
 850 | @keyframes animate-stripes {
 851 |   0% {
 852 |     background-position: 0 0;
 853 |   }
 854 | 
 855 |   100% {
 856 |     background-position: 60px 0;
 857 |   }
 858 | }
 859 | 
 860 | .markdown-body .gloss .progress-bar {
 861 |   box-shadow:
 862 |     inset 0 4px 12px rgba(255, 255, 255, .7),
 863 |     inset 0 -12px 0 rgba(0, 0, 0, .05);
 864 | }
 865 | 
 866 | /* Multimarkdown Critic Blocks */
 867 | .markdown-body .critic_mark {
 868 |   background: #ff0;
 869 | }
 870 | 
 871 | .markdown-body .critic_delete {
 872 |   color: #c82829;
 873 |   text-decoration: line-through;
 874 | }
 875 | 
 876 | .markdown-body .critic_insert {
 877 |   color: #718c00 ;
 878 |   text-decoration: underline;
 879 | }
 880 | 
 881 | .markdown-body .critic_comment {
 882 |   color: #8e908c;
 883 |   font-style: italic;
 884 | }
 885 | 
 886 | .markdown-body .headeranchor {
 887 |   font: normal normal 16px octicons-anchor;
 888 |   line-height: 1;
 889 |   display: inline-block;
 890 |   text-decoration: none;
 891 |   -webkit-font-smoothing: antialiased;
 892 |   -moz-osx-font-smoothing: grayscale;
 893 |   -webkit-user-select: none;
 894 |   -moz-user-select: none;
 895 |   -ms-user-select: none;
 896 |   user-select: none;
 897 | }
 898 | 
 899 | .headeranchor:before {
 900 |   content: '\f05c';
 901 | }
 902 | 
 903 | .markdown-body .task-list-item {
 904 |   list-style-type: none;
 905 | }
 906 | 
 907 | .markdown-body .task-list-item+.task-list-item {
 908 |   margin-top: 3px;
 909 | }
 910 | 
 911 | .markdown-body .task-list-item input {
 912 |   margin: 0 4px 0.25em -20px;
 913 |   vertical-align: middle;
 914 | }
 915 | 
 916 | /* Media */
 917 | @media only screen and (min-width: 480px) {
 918 |   .markdown-body {
 919 |     font-size:14px;
 920 |   }
 921 | }
 922 | 
 923 | @media only screen and (min-width: 768px) {
 924 |   .markdown-body {
 925 |     font-size:16px;
 926 |   }
 927 | }
 928 | 
 929 | @media print {
 930 |   .markdown-body * {
 931 |     background: transparent !important;
 932 |     color: black !important;
 933 |     filter:none !important;
 934 |     -ms-filter: none !important;
 935 |   }
 936 | 
 937 |   .markdown-body {
 938 |     font-size:12pt;
 939 |     max-width:100%;
 940 |     outline:none;
 941 |     border: 0;
 942 |   }
 943 | 
 944 |   .markdown-body a,
 945 |   .markdown-body a:visited {
 946 |     text-decoration: underline;
 947 |   }
 948 | 
 949 |   .markdown-body .headeranchor-link {
 950 |     display: none;
 951 |   }
 952 | 
 953 |   .markdown-body a[href]:after {
 954 |     content: " (" attr(href) ")";
 955 |   }
 956 | 
 957 |   .markdown-body abbr[title]:after {
 958 |     content: " (" attr(title) ")";
 959 |   }
 960 | 
 961 |   .markdown-body .ir a:after,
 962 |   .markdown-body a[href^="javascript:"]:after,
 963 |   .markdown-body a[href^="#"]:after {
 964 |     content: "";
 965 |   }
 966 | 
 967 |   .markdown-body pre {
 968 |     white-space: pre;
 969 |     white-space: pre-wrap;
 970 |     word-wrap: break-word;
 971 |   }
 972 | 
 973 |   .markdown-body pre,
 974 |   .markdown-body blockquote {
 975 |     border: 1px solid #999;
 976 |     padding-right: 1em;
 977 |     page-break-inside: avoid;
 978 |   }
 979 | 
 980 |   .markdown-body .progress,
 981 |   .markdown-body .progress-bar {
 982 |     -moz-box-shadow: none;
 983 |     -webkit-box-shadow: none;
 984 |     box-shadow: none;
 985 |   }
 986 | 
 987 |   .markdown-body .progress {
 988 |     border: 1px solid #ddd;
 989 |   }
 990 | 
 991 |   .markdown-body .progress-bar {
 992 |     height: 22px;
 993 |     border-right: 1px solid #ddd;
 994 |   }
 995 | 
 996 |   .markdown-body tr,
 997 |   .markdown-body img {
 998 |     page-break-inside: avoid;
 999 |   }
1000 | 
1001 |   .markdown-body img {
1002 |     max-width: 100% !important;
1003 |   }
1004 | 
1005 |   .markdown-body p,
1006 |   .markdown-body h2,
1007 |   .markdown-body h3 {
1008 |     orphans: 3;
1009 |     widows: 3;
1010 |   }
1011 | 
1012 |   .markdown-body h2,
1013 |   .markdown-body h3 {
1014 |     page-break-after: avoid;
1015 |   }
1016 | }
1017 | </style><title>README</title></head><body><article class="markdown-body"><div class="toc">
1018 | <ul>
1019 | <li><a href="#_1">基于启发式特征的钓鱼网站检测系统</a></li>
1020 | <li><a href="#author">Author:沂水寒城</a><ul>
1021 | <li><a href="#_2">背景</a></li>
1022 | <li><a href="#html">基于html的启发式钓鱼网站检测</a></li>
1023 | <li><a href="#url">基于url的启发式钓鱼网站检测</a></li>
1024 | <li><a href="#_3">实验方案</a></li>
1025 | <li><a href="#_4">实验数据</a></li>
1026 | </ul>
1027 | </li>
1028 | </ul>
1029 | </div>
1030 | <h1 id="_1"><a name="user-content-_1" href="#_1" class="headeranchor-link" aria-hidden="true"><span class="headeranchor"></span></a>基于启发式特征的钓鱼网站检测系统</h1>
1031 | <h1 id="author"><strong>Author</strong>:沂水寒城</h1>
1032 | <h2 id="_2"><a name="user-content-_2" href="#_2" class="headeranchor-link" aria-hidden="true"><span class="headeranchor"></span></a>背景</h2>
1033 | <p>钓鱼网站的检测问题可以看做是一个二分类的问题，因此可以基于机器学习的流程和方法来做分类</p>
1034 | <h2 id="html"><a name="user-content-html" href="#html" class="headeranchor-link" aria-hidden="true"><span class="headeranchor"></span></a>基于html的启发式钓鱼网站检测</h2>
1035 | <p>基于数据的大量统计以及综合已有的方法提取29维特征，之后使用SVM模型来训练和测试</p>
1036 | <h2 id="url"><a name="user-content-url" href="#url" class="headeranchor-link" aria-hidden="true"><span class="headeranchor"></span></a>基于url的启发式钓鱼网站检测</h2>
1037 | <p>基于url的特征设计的轻量级的分类模型，同样是基于SVM分类模型</p>
1038 | <h2 id="_3"><a name="user-content-_3" href="#_3" class="headeranchor-link" aria-hidden="true"><span class="headeranchor"></span></a>实验方案</h2>
1039 | <p>实验采用的方法是结合两个分类模型的结果综合决策最终页面的所属类别</p>
1040 | <h2 id="_4"><a name="user-content-_4" href="#_4" class="headeranchor-link" aria-hidden="true"><span class="headeranchor"></span></a>实验数据</h2>
1041 | <p>由于爬取得到的html文件较大不易上传就不提供数据了 ，url的数据可以在我的malicious_web_page_detection_based_on_url里面找到，数据是可以通用的，在malicious_web_page_detection_based_on_url里面着重是使用sklearn中的机器学习模型来测试，而这个工作主要是集中在特征工程方面，这样合在一起就是一个完整的机器学习实践的流程了。</p></article></body></html>


--------------------------------------------------------------------------------