├── .gitignore ├── README.md ├── SitePathScan.py ├── dict └── default.txt └── www_bodkin_ren.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | **/.DS_Store 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | 3 | **SitePathScan** 是一个扫描路径的脚本工具,基于 Python 语言,总体结构较简单,Tag v1.0参照王松师傅之前所写的 [webdirscan](https://github.com/Strikersb/webdirscan) 实现了多线程;Tag v2.0受b0uya师傅帮助,了解了Python3下的asyncio协程高并发,配合aiohttp比之前快了三倍有余,不考虑服务器压力影响下,可以与御剑这类扫描工具抗衡。 4 | 5 | 由于 Windows 下拥有众多优秀目录扫描工具,且速度很快;而对于 Mac/Linux 环境, Python 脚本最合适不过。 6 | 7 | # 安装 8 | 9 | 基于 Python 3.5 及以上版本 10 | 11 | 获取源代码: 12 | 13 | ```shell 14 | git clone https://github.com/L4oZu1/SitePathScan.git 15 | ``` 16 | 17 | 或者直接按URL下载: 18 | 19 | ```shell 20 | https://github.com/L4oZu1/SitePathScan/archive/master.zip 21 | ``` 22 | 23 | 仅需要安装`aiohttp`模块: 24 | 25 | ```shell 26 | pip3 install aiohttp 27 | ``` 28 | 29 | # 步骤 30 | 31 | 查看帮助文档:`python3 SitePathScan.py -h` 32 | 33 | ```shell 34 | python3 SitePathScan.py -h 35 | ____ _ _ ____ _ _ ____ 36 | / ___|(_) |_ ___| _ \ __ _| |_| |__ / ___| ___ __ _ _ ___ 37 | \___ \| | __/ _ \ |_) / _` | __| '_ /\___ \ / __/ _` | '_ / 38 | ___) | | || __/ __/ (_| | |_| | | |___) | (_| (_| | | | | 39 | |____/|_|\__\___|_| \__,_|\__|_| |_|____/ \___\__,_|_| |_| 40 | 41 | usage: SitePathScan.py [-h] [-d SCANDICT] [-o SCANOUTPUT] [-t COROUTINENUM] 42 | website 43 | 44 | This script uses the aiohttp library's head() method to determine the status 45 | word. 46 | 47 | positional arguments: 48 | website The website that needs to be scanned 49 | 50 | optional arguments: 51 | -h, --help show this help message and exit 52 | -d SCANDICT, --dict SCANDICT 53 | Dictionary for scanning 54 | -o SCANOUTPUT, --output SCANOUTPUT 55 | Results saved files 56 | -t COROUTINENUM, --thread COROUTINENUM 57 | Number of coroutine running the program 58 | ``` 59 | 60 | 基本扫描格式: 61 | 62 | ```shell 63 | python3 SitePathScan.py https://www.bodkin.ren 64 | python3 SitePathScan.py https://www.bodkin.ren -d dict/dict.txt -t 200 -o dir.txt 65 | ``` 66 | 67 | 默认目录字典文件为 `dict/default.txt`,里面只放了10条目录用于测试我的博客,识别率准确: 68 | 69 | ```shell 70 | * SitePathScan ready to start. 71 | * Current target: https://www.bodkin.ren 72 | * Total Dictionary: 10 73 | [ 301 ] https://www.bodkin.ren/wp-content/themes/mylife-wp 74 | [ 200 ] https://www.bodkin.ren/wp-content/themes/mylife-wp/screenshot.png 75 | [ 301 ] https://www.bodkin.ren/index.php 76 | [ 403 ] https://www.bodkin.ren/wp-content/themes/mylife-wp/1.txt 77 | [ 200 ] https://www.bodkin.ren/tools/root.tar 78 | [ 200 ] https://www.bodkin.ren/web.config 79 | [ 200 ] https://www.bodkin.ren/wp-login.php 80 | * End of scan. 81 | ``` 82 | 83 | 包含200(压缩文件),301,403页面,由于脚本简单,可自定义修改。 84 | 85 | # Note 86 | 87 | - 默认扫描结果保存路径为当前目录; 88 | - 默认协程数为50,当字典较小时,协程数不宜过大;当字典较大时,可以将协程数调大,200-1000都行,看硬件配置; 89 | - 没个字典说个求? ☺,Github一搜一堆,猪猪侠师傅的字典一搜一堆,想要精简的自己搜集吧; 90 | - 本脚本适合学习,(v1.0实际情况还是需要开个虚拟机用御剑)现在不需要了... 91 | 92 | # ToDo 93 | 94 | - 放弃自带queue,考虑 asyncio 中的队列方法 95 | 96 | | Timeline | Method | Tag | 97 | | :-------: | :----------------: | :--: | 98 | | 2017.8.9 | threading+requests | v1.0 | 99 | | 2017.8.10 | asyncio+aiohttp | v2.0 | 100 | 101 | -------------------------------------------------------------------------------- /SitePathScan.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | 4 | import queue 5 | import asyncio 6 | import argparse 7 | from aiohttp import ClientSession 8 | 9 | class SitePathScan(object): 10 | def __init__(self, scanSite, scanDict, scanOutput,coroutineNum): 11 | print('* SitePathScan ready to start.') 12 | self.scanSite = scanSite if scanSite.find('://') != -1 else 'http://%s' % scanSite 13 | print('* Current target:',self.scanSite) 14 | self.scanDict = scanDict 15 | self.scanOutput = scanSite.rstrip('/').replace('https://', '').replace('http://', '').replace('.', '_')+'.txt' if scanOutput == 0 else scanOutput 16 | self.loadDict(self.scanDict) 17 | self.coroutineNum = coroutineNum 18 | self.loop = asyncio.get_event_loop() # 创建一个事件循环 19 | self.sema = asyncio.Semaphore(self.coroutineNum) 20 | self.tasks = [] 21 | self.flag = 0 22 | 23 | def loadDict(self, dict_list): 24 | self.q = queue.Queue() 25 | with open(dict_list) as f: 26 | for line in f: 27 | self.q.put(line.strip()) 28 | self.data = self.q.qsize() 29 | if self.data > 0: 30 | print('* Total Dictionary:',self.data) 31 | else: 32 | print('* NO default.txt') 33 | quit() 34 | 35 | def writeOutput(self, result): 36 | with open(self.scanOutput, 'a') as f: 37 | f.write(result + '\n') 38 | 39 | async def scan(self, url): 40 | headers = { 41 | 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 42 | 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8', 43 | 'Referer':'https://www.baidu.com', 44 | 'Accept-Encoding':'gzip, deflate', 45 | 'Connection':'keep-alive', 46 | } 47 | try: 48 | async with self.sema: 49 | async with ClientSession() as session: 50 | async with session.head(url, headers=headers,timeout=10) as resp: 51 | code = resp.status 52 | if code == 200 or code == 301 or code == 403: 53 | print('[ %i ] %s' % (code, url)) 54 | self.writeOutput('[ %i ] %s' % (code, url)) 55 | except Exception as e: 56 | # print(e) 57 | pass 58 | 59 | def run(self): 60 | while True: 61 | self.flag += 1 62 | url = self.scanSite + self.q.get() 63 | future = asyncio.ensure_future(self.scan(url)) 64 | self.tasks.append(future) # 创建多个协程任务的列表,然后将这些协程注册到事件循环中。 65 | if self.flag == self.data: 66 | break 67 | try: 68 | self.loop.run_until_complete(asyncio.wait(self.tasks)) # 将协程注册到事件循环,并启动事件循环 69 | self.loop.close() 70 | except CancelledError as e: 71 | print('* Warning:CancelledError.') 72 | 73 | if __name__ == '__main__': 74 | # main() 75 | banner = '''\ 76 | ____ _ _ ____ _ _ ____ 77 | / ___|(_) |_ ___| _ \ __ _| |_| |__ / ___| ___ __ _ _ ___ 78 | \___ \| | __/ _ \ |_) / _` | __| '_ /\___ \ / __/ _` | '_ / 79 | ___) | | || __/ __/ (_| | |_| | | |___) | (_| (_| | | | | 80 | |____/|_|\__\___|_| \__,_|\__|_| |_|____/ \___\__,_|_| |_| 81 | ''' 82 | print(banner) 83 | parser = argparse.ArgumentParser(description="This script uses the aiohttp library's head() method to determine the status word.") 84 | # 位置参数 85 | parser.add_argument("website", type=str, help="The website that needs to be scanned") 86 | # 可选参数 87 | parser.add_argument('-d', '--dict', dest="scanDict", help="Dictionary for scanning", type=str, default="dict/default.txt") 88 | parser.add_argument('-o', '--output', dest="scanOutput", help="Results saved files", type=str, default=0) 89 | parser.add_argument('-t', '--thread', dest="coroutineNum", help="Number of coroutine running the program", type=int, default=50) 90 | args = parser.parse_args() 91 | scan = SitePathScan(args.website, args.scanDict, args.scanOutput, args.coroutineNum) 92 | # print 'Scan Start!!!' 93 | scan.run() 94 | print("* End of scan.") 95 | -------------------------------------------------------------------------------- /dict/default.txt: -------------------------------------------------------------------------------- 1 | /wp-content/themes/mylife-wp 2 | /wp-content/themes/mylife-wp/1.txt 3 | /wp-content/themes/mylife-wp/screenshot.png 4 | /tools/root.tar 5 | /wp-admin.php 6 | /wp-login.php 7 | /admin.php 8 | /robots.txt 9 | /index.php 10 | /web.config 11 | -------------------------------------------------------------------------------- /www_bodkin_ren.txt: -------------------------------------------------------------------------------- 1 | [ 403 ] https://www.bodkin.ren/wp-content/themes/mylife-wp/1.txt 2 | [ 200 ] https://www.bodkin.ren/wp-content/themes/mylife-wp/screenshot.png 3 | [ 200 ] https://www.bodkin.ren/tools/root.tar 4 | [ 301 ] https://www.bodkin.ren/wp-content/themes/mylife-wp 5 | [ 200 ] https://www.bodkin.ren/web.config 6 | [ 200 ] https://www.bodkin.ren/wp-login.php 7 | [ 301 ] https://www.bodkin.ren/index.php 8 | --------------------------------------------------------------------------------