├── README.md
├── requirements.txt
└── simplescan.py


/README.md:
--------------------------------------------------------------------------------
 1 | simplescan (alpha-0.1)
 2 | ==
 3 | 
 4 | Installation
 5 | ----
 6 | 
 7 | Download simplescan by cloning the [Git](https://github.com/RickGray/simplescan) repository:
 8 | 
 9 |     git clone http://github.com/RickGray/simplescan.git simplescan
10 | 
11 | Then use `pip install -r requirements.txt` to install the third modules.
12 | 
13 | Usage
14 | ----
15 | 
16 | To get a list of basic options use:
17 | 
18 |     python simplescan.py -h
19 | 
20 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | PySocks


--------------------------------------------------------------------------------
/simplescan.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # __buildin__ modules
  5 | import os
  6 | import re
  7 | import sys
  8 | import time
  9 | import socket
 10 | import random
 11 | import string
 12 | import difflib
 13 | import urlparse
 14 | import platform
 15 | import argparse
 16 | 
 17 | from multiprocessing.dummy import Pool as ThreadPool
 18 | 
 19 | # thirdparty modules
 20 | import socks
 21 | import requests
 22 | 
 23 | 
 24 | BANNER = r'''
 25 |    _____ _                 __    _____
 26 |   / ___/(_)___ ___  ____  / /__ / ___/_________ _____
 27 |   \__ \/ / __ `__ \/ __ \/ / _ \\__ \/ ___/ __ `/ __ \
 28 |  ___/ / / / / / / / /_/ / /  __/__/ / /__/ /_/ / / / /
 29 | /____/_/_/ /_/ /_/ .___/_/\___/____/\___/\__,_/_/ /_/
 30 |                 /_/
 31 |                                         (alpha-0.1)
 32 |    Author: RickGray@0xFA-Team
 33 |            Croxy@0xFA-Team             |
 34 |    Create: 2015-10-19                  |
 35 |    Update: 2015-11-02                  |
 36 | _______________________________________|
 37 | '''
 38 | 
 39 | 
 40 | _OPTIONS_HELP_ = {
 41 |     'URL': 'Target URL (e.g. "http://www.example.com/)',
 42 |     'URLFILE': 'Scan multiple targets given in a textual file',
 43 |     'WORDFILE': 'Load wordlist from a wordfile (e.g. "wordlist.txt")',
 44 |     'WORDFILEDIR': 'Load wordlist from a directory (e.g. "wordlist/")',
 45 |     'PROXY': 'Usa a proxy to connect to the target URL',
 46 |     'AGENT': 'HTTP User-Agent header value',
 47 |     'COOKIE': 'HTTP Cookie header value',
 48 |     'TIMEOUT': 'Seconds to wait before timeout connection (default 10)',
 49 |     'MODE': ('The mode will be use, "quick" mode use HEAD method to scan, '
 50 |              '"smart" mode will filter some pages with 404 page template '
 51 |              'which builed when begging (default: "quick")'),
 52 |     'THREADS': 'Max number of concurrent HTTP(s) requests (default 10)',
 53 | }
 54 | 
 55 | 
 56 | def parse_commond():
 57 |     """ 解析终端命令并返回其解析结果 """
 58 |     parse = argparse.ArgumentParser()
 59 | 
 60 |     target = parse.add_argument_group('target')
 61 |     target.add_argument('-u', dest='URL',
 62 |                         type=str, help=_OPTIONS_HELP_['URL'])
 63 |     target.add_argument('-f', dest='URLFILE',
 64 |                         type=str, help=_OPTIONS_HELP_['URLFILE'])
 65 | 
 66 |     wordfile = parse.add_argument_group('wordfile')
 67 |     wordfile.add_argument('-w', dest='WORDFILE',
 68 |                           type=str, help=_OPTIONS_HELP_['WORDFILE'])
 69 |     wordfile.add_argument('-d', dest='WORDFILEDIR',
 70 |                           type=str, help=_OPTIONS_HELP_['WORDFILEDIR'])
 71 | 
 72 |     request = parse.add_argument_group('request')
 73 |     request.add_argument('--proxy', dest='PROXY',
 74 |                          type=str, help=_OPTIONS_HELP_['PROXY'])
 75 |     request.add_argument('--user-agent', dest='AGENT',
 76 |                          type=str, help=_OPTIONS_HELP_['AGENT'])
 77 |     request.add_argument('--cookie', dest='COOKIE',
 78 |                          type=str, help=_OPTIONS_HELP_['COOKIE'])
 79 |     request.add_argument('--timeout', dest='TIMEOUT',
 80 |                          type=int, default=10, help=_OPTIONS_HELP_['TIMEOUT'])
 81 | 
 82 |     optimization = parse.add_argument_group('optimization')
 83 |     optimization.add_argument('--mode', dest='MODE',
 84 |                               type=str, choices=['quick', 'smart'], default='quick',
 85 |                               help=_OPTIONS_HELP_['MODE'])
 86 |     optimization.add_argument('--threads', dest='THREADS',
 87 |                               type=int, default=10, help=_OPTIONS_HELP_['THREADS'])
 88 | 
 89 |     return parse.parse_args()
 90 | 
 91 | 
 92 | def cprint(val, color):
 93 |     """ *nix下终端着色输出 """
 94 |     colorcodes = {'bold': {True: '\x1b[1m', False: '\x1b[22m'},
 95 |                   'cyan': {True: '\x1b[36m', False: '\x1b[39m'},
 96 |                   'blue': {True: '\x1b[34m', False: '\x1b[39m'},
 97 |                   'red': {True: '\x1b[31m', False: '\x1b[39m'},
 98 |                   'magenta': {True: '\x1b[35m', False: '\x1b[39m'},
 99 |                   'green': {True: '\x1b[32m', False: '\x1b[39m'},
100 |                   'yellow': {True: '\x1b[33m', False: '\x1b[39m'},
101 |                   'underline': {True: '\x1b[4m', False: '\x1b[24m'}}
102 |     colors = (platform.system() != 'Windows')
103 |     if colors:
104 |         sys.stdout.write(colorcodes[color][True] + val + colorcodes[color][False] + '\n')
105 |     else:
106 |         sys.stdout.write(val)
107 | 
108 | 
109 | def get_random_string(length=16):
110 |     """ 随机生成指定长度由大小写字母和数字构成的字符串 """
111 |     choices = string.letters + string.digits
112 |     return ''.join([random.choice(choices) for _ in range(int(length))])
113 | 
114 | 
115 | def build_random_path():
116 |     """ 随机生成由大小写字母和数字构成的路径 """
117 |     random_string = get_random_string(random.randint(5, 10))
118 |     ext_choices = ['.html', '.php', '.asp', '.htm', '.jpeg', '.png', '.zip']
119 |     random_path = random_string
120 |     while True:
121 |         # 随机构建子路径，当 random.choice([True, False]) 为 False 时退出循环
122 |         if not random.choice([True, False]):
123 |             random_path += random.choice(ext_choices)
124 |             break
125 |         else:
126 |             random_string = get_random_string(random.randint(5, 10))
127 |             random_path += '/' + random_string
128 | 
129 |     return random_path
130 | 
131 | 
132 | def patch_url(url):
133 |     """ 修复不标准URL """
134 |     res = urlparse.urlparse(url)
135 |     if not res.scheme:
136 |         url = 'http://' + url
137 | 
138 |     return url
139 | 
140 | 
141 | def build_not_found_template(url, headers=None):
142 |     """ 获取扫描URL基本路径，构建基于当前目录的404页面模板 """
143 |     base_url = urlparse.urljoin(url, './')
144 | 
145 |     pre_responses = []
146 |     for _ in range(6):
147 |         # 随机生成路径，相继访问得到页面内容，
148 |         # 对成功返回的结果进行比较得到404页面模板
149 |         random_path = build_random_path()
150 |         random_url = urlparse.urljoin(base_url, random_path)
151 |         try:
152 |             response = requests.get(random_url, headers=headers)
153 |         except requests.exceptions.RequestException, ex:
154 |             err = 'failed to access %s, ' % random_url
155 |             err += str(ex)
156 |             print err
157 |             continue
158 |         pre_responses.append(response)
159 | 
160 |     if len(pre_responses) < 2:
161 |         # 由于随机获取到的页面内容数量太少不能进行404页面模板提取操作
162 |         return None
163 | 
164 |     ratios = []
165 |     pre_content = pre_responses[0].content
166 |     for response in pre_responses[1:]:
167 |         cur_content = response.content
168 |         ratio = difflib.SequenceMatcher(None, pre_content, cur_content).quick_ratio()
169 |         ratios.append(ratio)
170 |         pre_content = cur_content
171 | 
172 |     average = float(sum(ratios)) / len(ratios)
173 |     if average > 0.9:
174 |         print 'succeed to build %s 404 page template' % url
175 | 
176 |     return random.choice(pre_responses).content
177 | 
178 | 
179 | # def check_url(url, err_content):
180 | def check_url(opt):
181 |     """ 请求指定URL地址，返回其状态值，根据 err_content 来过滤404页面 """
182 |     url, err_content, headers = opt[0], opt[1], opt[2]
183 |     try:
184 |         response = requests.get(url, stream=True, headers=headers)
185 |     except requests.exceptions.RequestException, ex:
186 |         err = 'failed to access %s, ' % url
187 |         err += str(ex)
188 |         return None
189 | 
190 |     if err_content:
191 |         content = response.content
192 |         ratio = difflib.SequenceMatcher(None, content, err_content).quick_ratio()
193 |         if ratio > 0.9:
194 |             # print 'fetched similar page or others'
195 |             return None
196 | 
197 |     status_code = response.status_code
198 |     m = re.search(r'<title>(?P<title>.*)</title>', response.content)
199 |     page_title = m.group('title') if m else ''
200 |     sys.stdout.write('[{0}], [{1}], [{2}]\n'.format(status_code, page_title, url))
201 |     return status_code, page_title
202 | 
203 | 
204 | def set_request_proxy(proxy):
205 |     """ 设置请求代理 """
206 |     res = urlparse.urlparse(proxy)
207 |     mode = None
208 |     if res.scheme == 'socks4':
209 |         mode = socks.SOCKS4
210 |     elif res.scheme == 'socks5':
211 |         mode = socks.SOCKS5
212 |     elif res.scheme == 'http':
213 |         mode = socks.HTTP
214 |     else:
215 |         print 'unknown proxy type'
216 | 
217 |     if mode:
218 |         host = res.netloc.split(':')[0]
219 |         port = int(res.netloc.split(':')[1])
220 |         socks.set_default_proxy(mode, host, port)
221 |         socks.socket = socks.socksocket
222 |         print 'proxy %s using' % proxy
223 | 
224 | 
225 | def build_extended_wordlist(t_url):
226 |     """ 根据待扫描域名和URL生成基本扫描字典 """
227 |     wordlist = []
228 | 
229 |     return wordlist
230 | 
231 | 
232 | def get_request_headers(args):
233 |     """ 获取命令参数构造每次请求的自定义头部 """
234 |     headers = {}
235 |     if args.AGENT:
236 |         headers['User-Agent'] = args.AGENT
237 |     if args.COOKIE:
238 |         headers['Cookie'] = args.COOKIE
239 | 
240 |     return headers
241 | 
242 | 
243 | def process_with_url(url, args):
244 |     """ 单一目标扫描处理 """
245 |     t_url = patch_url(url)
246 |     headers = get_request_headers(args)
247 |     if not args.WORDFILE and not args.WORDFILEDIR:
248 |         print 'wordfile or wordfile dir required'
249 |         sys.exit()
250 | 
251 |     try:
252 |         w_fd = open(args.WORDFILE, 'r')
253 |     except IOError, ex:
254 |         err = 'unable to load wordfile, ("%s")' % str(ex)
255 |         print err
256 |         sys.exit()
257 | 
258 |     # 获取 404页面模板
259 |     if args.MODE == 'smart':
260 |         err_content = build_not_found_template(t_url, headers)
261 |     else:
262 |         err_content = None
263 | 
264 |     # TODO 需要改进多线程参数冗余 - err_content
265 |     # 在生成带扫描URL时，默认以当前目录为扫描路径
266 |     l = [(urlparse.urljoin(t_url, _.strip().lstrip('/')), err_content, headers)
267 |          for _ in build_extended_wordlist(t_url)]
268 |     l += [(urlparse.urljoin(t_url, _.strip().lstrip('/')), err_content, headers)
269 |           for _ in w_fd.readlines()]
270 | 
271 |     # 初始化线程池
272 |     # TODO 需要定义线程函数回调 callback，用于收集结果给外部程序使用
273 |     pool = ThreadPool(args.THREADS)
274 |     pool.map(check_url, l)
275 | 
276 | 
277 | def process_with_url_file(url_file, args):
278 |     """ 目标文件扫描处理 """
279 |     try:
280 |         u_fd = open(url_file, 'r')
281 |     except IOError, ex:
282 |         err = 'unable to load url file, ("%s")' % str(ex)
283 |         print err
284 |         sys.exit()
285 | 
286 |     while True:
287 |         try:
288 |             url = u_fd.next().strip()
289 |         except StopIteration, ex:
290 |             print 'no more url found, ("%s")' % str(ex)
291 |             break
292 | 
293 |         # TODO 定义输出接口数据格式
294 |         process_with_url(url, args)
295 | 
296 | 
297 | def run(args):
298 |     print BANNER
299 |     # TODO 全局中断信号处理，用于多线程运行时立即退出程序
300 | 
301 |     if args.PROXY:
302 |         set_request_proxy(args.PROXY)
303 | 
304 |     if args.TIMEOUT:
305 |         socket.setdefaulttimeout(args.TIMEOUT)
306 |         print 'set request time out to %ds' % args.TIMEOUT
307 | 
308 |     if not args.URL and not args.URLFILE:
309 |         print 'url or url file required'
310 |         sys.exit()
311 | 
312 |     if args.URL:
313 |         return process_with_url(args.URL, args)
314 |     if args.URLFILE:
315 |         return process_with_url_file(args.URLFILE, args)
316 | 
317 | 
318 | if __name__ == '__main__':
319 |     run(parse_commond())
320 | 


--------------------------------------------------------------------------------