├── README.md ├── requirements.txt └── simplescan.py /README.md: -------------------------------------------------------------------------------- 1 | simplescan (alpha-0.1) 2 | == 3 | 4 | Installation 5 | ---- 6 | 7 | Download simplescan by cloning the [Git](https://github.com/RickGray/simplescan) repository: 8 | 9 | git clone http://github.com/RickGray/simplescan.git simplescan 10 | 11 | Then use `pip install -r requirements.txt` to install the third modules. 12 | 13 | Usage 14 | ---- 15 | 16 | To get a list of basic options use: 17 | 18 | python simplescan.py -h 19 | 20 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | PySocks -------------------------------------------------------------------------------- /simplescan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # __buildin__ modules 5 | import os 6 | import re 7 | import sys 8 | import time 9 | import socket 10 | import random 11 | import string 12 | import difflib 13 | import urlparse 14 | import platform 15 | import argparse 16 | 17 | from multiprocessing.dummy import Pool as ThreadPool 18 | 19 | # thirdparty modules 20 | import socks 21 | import requests 22 | 23 | 24 | BANNER = r''' 25 | _____ _ __ _____ 26 | / ___/(_)___ ___ ____ / /__ / ___/_________ _____ 27 | \__ \/ / __ `__ \/ __ \/ / _ \\__ \/ ___/ __ `/ __ \ 28 | ___/ / / / / / / / /_/ / / __/__/ / /__/ /_/ / / / / 29 | /____/_/_/ /_/ /_/ .___/_/\___/____/\___/\__,_/_/ /_/ 30 | /_/ 31 | (alpha-0.1) 32 | Author: RickGray@0xFA-Team 33 | Croxy@0xFA-Team | 34 | Create: 2015-10-19 | 35 | Update: 2015-11-02 | 36 | _______________________________________| 37 | ''' 38 | 39 | 40 | _OPTIONS_HELP_ = { 41 | 'URL': 'Target URL (e.g. "http://www.example.com/)', 42 | 'URLFILE': 'Scan multiple targets given in a textual file', 43 | 'WORDFILE': 'Load wordlist from a wordfile (e.g. "wordlist.txt")', 44 | 'WORDFILEDIR': 'Load wordlist from a directory (e.g. "wordlist/")', 45 | 'PROXY': 'Usa a proxy to connect to the target URL', 46 | 'AGENT': 'HTTP User-Agent header value', 47 | 'COOKIE': 'HTTP Cookie header value', 48 | 'TIMEOUT': 'Seconds to wait before timeout connection (default 10)', 49 | 'MODE': ('The mode will be use, "quick" mode use HEAD method to scan, ' 50 | '"smart" mode will filter some pages with 404 page template ' 51 | 'which builed when begging (default: "quick")'), 52 | 'THREADS': 'Max number of concurrent HTTP(s) requests (default 10)', 53 | } 54 | 55 | 56 | def parse_commond(): 57 | """ 解析终端命令并返回其解析结果 """ 58 | parse = argparse.ArgumentParser() 59 | 60 | target = parse.add_argument_group('target') 61 | target.add_argument('-u', dest='URL', 62 | type=str, help=_OPTIONS_HELP_['URL']) 63 | target.add_argument('-f', dest='URLFILE', 64 | type=str, help=_OPTIONS_HELP_['URLFILE']) 65 | 66 | wordfile = parse.add_argument_group('wordfile') 67 | wordfile.add_argument('-w', dest='WORDFILE', 68 | type=str, help=_OPTIONS_HELP_['WORDFILE']) 69 | wordfile.add_argument('-d', dest='WORDFILEDIR', 70 | type=str, help=_OPTIONS_HELP_['WORDFILEDIR']) 71 | 72 | request = parse.add_argument_group('request') 73 | request.add_argument('--proxy', dest='PROXY', 74 | type=str, help=_OPTIONS_HELP_['PROXY']) 75 | request.add_argument('--user-agent', dest='AGENT', 76 | type=str, help=_OPTIONS_HELP_['AGENT']) 77 | request.add_argument('--cookie', dest='COOKIE', 78 | type=str, help=_OPTIONS_HELP_['COOKIE']) 79 | request.add_argument('--timeout', dest='TIMEOUT', 80 | type=int, default=10, help=_OPTIONS_HELP_['TIMEOUT']) 81 | 82 | optimization = parse.add_argument_group('optimization') 83 | optimization.add_argument('--mode', dest='MODE', 84 | type=str, choices=['quick', 'smart'], default='quick', 85 | help=_OPTIONS_HELP_['MODE']) 86 | optimization.add_argument('--threads', dest='THREADS', 87 | type=int, default=10, help=_OPTIONS_HELP_['THREADS']) 88 | 89 | return parse.parse_args() 90 | 91 | 92 | def cprint(val, color): 93 | """ *nix下终端着色输出 """ 94 | colorcodes = {'bold': {True: '\x1b[1m', False: '\x1b[22m'}, 95 | 'cyan': {True: '\x1b[36m', False: '\x1b[39m'}, 96 | 'blue': {True: '\x1b[34m', False: '\x1b[39m'}, 97 | 'red': {True: '\x1b[31m', False: '\x1b[39m'}, 98 | 'magenta': {True: '\x1b[35m', False: '\x1b[39m'}, 99 | 'green': {True: '\x1b[32m', False: '\x1b[39m'}, 100 | 'yellow': {True: '\x1b[33m', False: '\x1b[39m'}, 101 | 'underline': {True: '\x1b[4m', False: '\x1b[24m'}} 102 | colors = (platform.system() != 'Windows') 103 | if colors: 104 | sys.stdout.write(colorcodes[color][True] + val + colorcodes[color][False] + '\n') 105 | else: 106 | sys.stdout.write(val) 107 | 108 | 109 | def get_random_string(length=16): 110 | """ 随机生成指定长度由大小写字母和数字构成的字符串 """ 111 | choices = string.letters + string.digits 112 | return ''.join([random.choice(choices) for _ in range(int(length))]) 113 | 114 | 115 | def build_random_path(): 116 | """ 随机生成由大小写字母和数字构成的路径 """ 117 | random_string = get_random_string(random.randint(5, 10)) 118 | ext_choices = ['.html', '.php', '.asp', '.htm', '.jpeg', '.png', '.zip'] 119 | random_path = random_string 120 | while True: 121 | # 随机构建子路径,当 random.choice([True, False]) 为 False 时退出循环 122 | if not random.choice([True, False]): 123 | random_path += random.choice(ext_choices) 124 | break 125 | else: 126 | random_string = get_random_string(random.randint(5, 10)) 127 | random_path += '/' + random_string 128 | 129 | return random_path 130 | 131 | 132 | def patch_url(url): 133 | """ 修复不标准URL """ 134 | res = urlparse.urlparse(url) 135 | if not res.scheme: 136 | url = 'http://' + url 137 | 138 | return url 139 | 140 | 141 | def build_not_found_template(url, headers=None): 142 | """ 获取扫描URL基本路径,构建基于当前目录的404页面模板 """ 143 | base_url = urlparse.urljoin(url, './') 144 | 145 | pre_responses = [] 146 | for _ in range(6): 147 | # 随机生成路径,相继访问得到页面内容, 148 | # 对成功返回的结果进行比较得到404页面模板 149 | random_path = build_random_path() 150 | random_url = urlparse.urljoin(base_url, random_path) 151 | try: 152 | response = requests.get(random_url, headers=headers) 153 | except requests.exceptions.RequestException, ex: 154 | err = 'failed to access %s, ' % random_url 155 | err += str(ex) 156 | print err 157 | continue 158 | pre_responses.append(response) 159 | 160 | if len(pre_responses) < 2: 161 | # 由于随机获取到的页面内容数量太少不能进行404页面模板提取操作 162 | return None 163 | 164 | ratios = [] 165 | pre_content = pre_responses[0].content 166 | for response in pre_responses[1:]: 167 | cur_content = response.content 168 | ratio = difflib.SequenceMatcher(None, pre_content, cur_content).quick_ratio() 169 | ratios.append(ratio) 170 | pre_content = cur_content 171 | 172 | average = float(sum(ratios)) / len(ratios) 173 | if average > 0.9: 174 | print 'succeed to build %s 404 page template' % url 175 | 176 | return random.choice(pre_responses).content 177 | 178 | 179 | # def check_url(url, err_content): 180 | def check_url(opt): 181 | """ 请求指定URL地址,返回其状态值,根据 err_content 来过滤404页面 """ 182 | url, err_content, headers = opt[0], opt[1], opt[2] 183 | try: 184 | response = requests.get(url, stream=True, headers=headers) 185 | except requests.exceptions.RequestException, ex: 186 | err = 'failed to access %s, ' % url 187 | err += str(ex) 188 | return None 189 | 190 | if err_content: 191 | content = response.content 192 | ratio = difflib.SequenceMatcher(None, content, err_content).quick_ratio() 193 | if ratio > 0.9: 194 | # print 'fetched similar page or others' 195 | return None 196 | 197 | status_code = response.status_code 198 | m = re.search(r'(?P<title>.*)', response.content) 199 | page_title = m.group('title') if m else '' 200 | sys.stdout.write('[{0}], [{1}], [{2}]\n'.format(status_code, page_title, url)) 201 | return status_code, page_title 202 | 203 | 204 | def set_request_proxy(proxy): 205 | """ 设置请求代理 """ 206 | res = urlparse.urlparse(proxy) 207 | mode = None 208 | if res.scheme == 'socks4': 209 | mode = socks.SOCKS4 210 | elif res.scheme == 'socks5': 211 | mode = socks.SOCKS5 212 | elif res.scheme == 'http': 213 | mode = socks.HTTP 214 | else: 215 | print 'unknown proxy type' 216 | 217 | if mode: 218 | host = res.netloc.split(':')[0] 219 | port = int(res.netloc.split(':')[1]) 220 | socks.set_default_proxy(mode, host, port) 221 | socks.socket = socks.socksocket 222 | print 'proxy %s using' % proxy 223 | 224 | 225 | def build_extended_wordlist(t_url): 226 | """ 根据待扫描域名和URL生成基本扫描字典 """ 227 | wordlist = [] 228 | 229 | return wordlist 230 | 231 | 232 | def get_request_headers(args): 233 | """ 获取命令参数构造每次请求的自定义头部 """ 234 | headers = {} 235 | if args.AGENT: 236 | headers['User-Agent'] = args.AGENT 237 | if args.COOKIE: 238 | headers['Cookie'] = args.COOKIE 239 | 240 | return headers 241 | 242 | 243 | def process_with_url(url, args): 244 | """ 单一目标扫描处理 """ 245 | t_url = patch_url(url) 246 | headers = get_request_headers(args) 247 | if not args.WORDFILE and not args.WORDFILEDIR: 248 | print 'wordfile or wordfile dir required' 249 | sys.exit() 250 | 251 | try: 252 | w_fd = open(args.WORDFILE, 'r') 253 | except IOError, ex: 254 | err = 'unable to load wordfile, ("%s")' % str(ex) 255 | print err 256 | sys.exit() 257 | 258 | # 获取 404页面模板 259 | if args.MODE == 'smart': 260 | err_content = build_not_found_template(t_url, headers) 261 | else: 262 | err_content = None 263 | 264 | # TODO 需要改进多线程参数冗余 - err_content 265 | # 在生成带扫描URL时,默认以当前目录为扫描路径 266 | l = [(urlparse.urljoin(t_url, _.strip().lstrip('/')), err_content, headers) 267 | for _ in build_extended_wordlist(t_url)] 268 | l += [(urlparse.urljoin(t_url, _.strip().lstrip('/')), err_content, headers) 269 | for _ in w_fd.readlines()] 270 | 271 | # 初始化线程池 272 | # TODO 需要定义线程函数回调 callback,用于收集结果给外部程序使用 273 | pool = ThreadPool(args.THREADS) 274 | pool.map(check_url, l) 275 | 276 | 277 | def process_with_url_file(url_file, args): 278 | """ 目标文件扫描处理 """ 279 | try: 280 | u_fd = open(url_file, 'r') 281 | except IOError, ex: 282 | err = 'unable to load url file, ("%s")' % str(ex) 283 | print err 284 | sys.exit() 285 | 286 | while True: 287 | try: 288 | url = u_fd.next().strip() 289 | except StopIteration, ex: 290 | print 'no more url found, ("%s")' % str(ex) 291 | break 292 | 293 | # TODO 定义输出接口数据格式 294 | process_with_url(url, args) 295 | 296 | 297 | def run(args): 298 | print BANNER 299 | # TODO 全局中断信号处理,用于多线程运行时立即退出程序 300 | 301 | if args.PROXY: 302 | set_request_proxy(args.PROXY) 303 | 304 | if args.TIMEOUT: 305 | socket.setdefaulttimeout(args.TIMEOUT) 306 | print 'set request time out to %ds' % args.TIMEOUT 307 | 308 | if not args.URL and not args.URLFILE: 309 | print 'url or url file required' 310 | sys.exit() 311 | 312 | if args.URL: 313 | return process_with_url(args.URL, args) 314 | if args.URLFILE: 315 | return process_with_url_file(args.URLFILE, args) 316 | 317 | 318 | if __name__ == '__main__': 319 | run(parse_commond()) 320 | --------------------------------------------------------------------------------