├── README.md ├── dict └── suffix.txt ├── filesensor.py ├── lib ├── __init__.py ├── cmdparse.py ├── common.py ├── data.py ├── datatype.py └── envcheck.py ├── output └── DO_NOT_DELETE_THIS_FOLDER ├── requirements.txt └── scrapy_project ├── __init__.py ├── crawl.py ├── settings.py └── spiders ├── __init__.py └── filesensor.py /README.md: -------------------------------------------------------------------------------- 1 | # FileSensor 2 | 3 | **Dynamic file detection tool based on crawler** 4 | 基于爬虫的动态敏感文件探测工具 5 | 6 | ![banner](http://static.cdxy.me/Screenshot-banner-filesensor.png) 7 | 8 | Feature 9 | ------- 10 | * Generate the fuzzing vectors based on crawler results 11 | **(input)http://localhost/ -> (crawl)http://localhost/test.php -> (detect)http://localhost/.test.php.swp** 12 | 13 | * Scrapy framework 14 | Stable crawler and customizable HTTP requests. 15 | 16 | * Custom 404 filter 17 | Use a regular expression to filter out user-defined 404 pages(which status code is 200). 18 | 19 | Requirement 20 | ----------- 21 | * Python 3.x 22 | * pip 23 | 24 | Install 25 | ------- 26 | 1. `git clone https://github.com/Xyntax/FileSensor` 27 | 2. `cd FileSensor` 28 | 3. `pip3 install -r requirement.txt` 29 | 30 | * [Scrapy official installation guide](http://scrapy.readthedocs.io/en/latest/intro/install.html) 31 | 32 | Usage 33 | ----- 34 | ``` 35 | FileSensor ver0.2 by 36 | https://github.com/Xyntax/FileSensor 37 | 38 | Usage: 39 | filesensor.py URL [--404 REGEX] [-o] 40 | filesensor.py (-h | --help) 41 | 42 | Example: 43 | python3 filesensor.py https://www.cdxy.me --404 "404 File not Found!" 44 | 45 | Options: 46 | -o save results in ./output folder 47 | --404 REGEX filter out custom 404 page with regex 48 | -h --help show this help message 49 | 50 | ``` 51 | 52 | 53 | Links 54 | ----- 55 | 56 | * [Bug tracking](https://github.com/Xyntax/FileSensor/issues) 57 | * Contact 58 | -------------------------------------------------------------------------------- /dict/suffix.txt: -------------------------------------------------------------------------------- 1 | {FULL}~ 2 | {FULL}- 3 | {FULL}_ 4 | {NAME}~.{EXT} 5 | {NAME}-.{EXT} 6 | {NAME}_.{EXT} 7 | {FULL}0 8 | {FULL}1 9 | {FULL}2 10 | {FULL}3 11 | {NAME}0.{EXT} 12 | {NAME}1.{EXT} 13 | {NAME}2.{EXT} 14 | {NAME}3.{EXT} 15 | {FULL}_0 16 | {FULL}_1 17 | {FULL}_2 18 | {FULL}_3 19 | {NAME}(1).{EXT} 20 | {NAME}(2).{EXT} 21 | {NAME}(3).{EXT} 22 | {FULL}__ 23 | {FULL}_bak 24 | {FULL}.bak 25 | {FULL}.bak~ 26 | {NAME}.bak.{EXT} 27 | {NAME}_bak.{EXT} 28 | {FULL}.source 29 | {FULL}_source 30 | {NAME}.source.{EXT} 31 | {NAME}_source.{EXT} 32 | {FULL}.zip 33 | {FULL}.rar 34 | {FULL}.tar.gz 35 | {FULL}.tar.xz 36 | {FULL}.7z 37 | {FULL}_old 38 | {FULL}.old 39 | {NAME}_old.{EXT} 40 | {NAME}.old.{EXT} 41 | {FULL}_new 42 | {FULL}.new 43 | {NAME}_new.{EXT} 44 | {NAME}.new.{EXT} 45 | {FULL}.swo 46 | {FULL}.swp 47 | {FULL}.save 48 | {FULL}_save 49 | {NAME}_save.{EXT} 50 | {NAME}.save.{EXT} 51 | .{FULL}.swp 52 | .{FULL}.un~ -------------------------------------------------------------------------------- /filesensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | from lib import envcheck # check environment at start 6 | from lib.common import init_options, final_message 7 | from scrapy_project.crawl import run_spider 8 | 9 | init_options() 10 | run_spider() 11 | final_message() 12 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | -------------------------------------------------------------------------------- /lib/cmdparse.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | """ 6 | FileSensor ver0.2 by 7 | https://github.com/Xyntax/FileSensor 8 | 9 | Usage: 10 | filesensor.py URL [--404 REGEX] [-o] 11 | filesensor.py (-h | --help) 12 | 13 | Example: 14 | python3 filesensor.py https://www.cdxy.me --404 "404 File not Found!" 15 | 16 | Options: 17 | -o save results in ./output folder 18 | --404 REGEX filter out custom 404 page with regex 19 | -h --help show this help message 20 | 21 | """ 22 | 23 | from docopt import docopt 24 | 25 | 26 | def get_arguments(): 27 | return docopt(__doc__) 28 | -------------------------------------------------------------------------------- /lib/common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | import os 6 | import hashlib 7 | import random 8 | import time 9 | from urllib.parse import urlparse 10 | from .cmdparse import get_arguments 11 | from .data import spider_data, dict_data, paths, conf 12 | 13 | 14 | def init_options(): 15 | set_path() 16 | 17 | args = get_arguments() 18 | spider_data.start_urls = args.get('URL') 19 | spider_data.custom_404_regex = args.get('--404') 20 | spider_data.found = [] 21 | spider_data.crawled = [] 22 | conf.save_results = args.get('-o') 23 | 24 | load_dict_suffix() 25 | 26 | 27 | def set_path(): 28 | paths.root_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 29 | paths.dict_path = os.path.join(paths.root_path, 'dict') 30 | paths.default_suffix_dict = os.path.join(paths.dict_path, 'suffix.txt') 31 | paths.output_path = os.path.join(paths.root_path, 'output') 32 | 33 | if not all(os.path.exists(p) for p in paths.values()): 34 | exit('[CRITICAL]Some folders or files are missing, ' 35 | 'please download the project in https://github.com/Xyntax/FileSensor/') 36 | 37 | 38 | def load_dict_suffix(): 39 | with open(paths.default_suffix_dict) as f: 40 | dict_data.url_suffix = set(f.read().split('\n')) - {'', '#'} 41 | 42 | 43 | def gen_urls(base_url): 44 | def _split_filename(filename): 45 | 46 | full_filename = filename.rstrip('.') 47 | extension = full_filename.split('.')[-1] 48 | name = '.'.join(full_filename.split('.')[:-1]) 49 | 50 | return name, extension 51 | 52 | url = base_url.split('?')[0].rstrip('/') 53 | if not urlparse(url).path: 54 | return [] 55 | 56 | path = '/'.join(url.split('/')[:-1]) 57 | filename = url.split('/')[-1] 58 | 59 | # Check if target CMS uses route instead of static file 60 | isfile = True if '.' in filename else False 61 | 62 | if isfile: 63 | name, extension = _split_filename(filename) 64 | 65 | final_urls = [] 66 | for each in dict_data.url_suffix: 67 | new_filename = path + '/' + each.replace('{FULL}', filename) 68 | if isfile: 69 | new_filename = new_filename.replace('{NAME}', name).replace('{EXT}', extension) 70 | else: 71 | if '{NAME}' in each or '{EXT}' in each: 72 | continue 73 | final_urls.append(new_filename.replace('..', '.')) 74 | 75 | return final_urls 76 | 77 | 78 | def final_message(): 79 | print('-' * 10) 80 | print('Crawled Page: %d' % len(spider_data.crawled)) 81 | print('Sensitive File Found: %d' % len(spider_data.found)) 82 | for each in spider_data.found: 83 | print(each) 84 | 85 | save_results() 86 | 87 | 88 | def random_string(): 89 | return hashlib.md5(str(random.uniform(1, 10)).encode('utf-8')).hexdigest() 90 | 91 | 92 | def save_results(): 93 | if not conf.save_results: 94 | return 95 | 96 | site = urlparse(spider_data.start_urls).netloc 97 | filepath = site if site else spider_data.start_urls.replace('/', '') 98 | filepath += time.strftime('-%Y%m%d-%H%M%S', time.localtime(time.time())) 99 | filepath = os.path.join(paths.output_path, filepath) 100 | 101 | try: 102 | with open(filepath, 'w') as f: 103 | f.write('\n'.join(spider_data.found)) 104 | except Exception as e: 105 | exit(e) 106 | 107 | print('\nResults saved in %s' % filepath) 108 | -------------------------------------------------------------------------------- /lib/data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | from .datatype import AttribDict 6 | 7 | paths = AttribDict() 8 | spider_data = AttribDict() 9 | dict_data = AttribDict() 10 | conf = AttribDict() -------------------------------------------------------------------------------- /lib/datatype.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | import copy 6 | import types 7 | 8 | 9 | class AttribDict(dict): 10 | """ 11 | This class defines the project object, inheriting from Python data 12 | type dictionary. 13 | 14 | >>> foo = AttribDict() 15 | >>> foo.bar = 1 16 | >>> foo.bar 17 | 1 18 | """ 19 | 20 | def __init__(self, indict=None, attribute=None): 21 | if indict is None: 22 | indict = {} 23 | 24 | # Set any attributes here - before initialisation 25 | # these remain as normal attributes 26 | self.attribute = attribute 27 | dict.__init__(self, indict) 28 | self.__initialised = True 29 | 30 | # After initialisation, setting attributes 31 | # is the same as setting an item 32 | 33 | def __getattr__(self, item): 34 | """ 35 | Maps values to attributes 36 | Only called if there *is NOT* an attribute with this name 37 | """ 38 | 39 | try: 40 | return self.__getitem__(item) 41 | except KeyError: 42 | raise Exception("unable to access item '%s'" % item) 43 | 44 | def __setattr__(self, item, value): 45 | """ 46 | Maps attributes to values 47 | Only if we are initialised 48 | """ 49 | 50 | # This test allows attributes to be set in the __init__ method 51 | if "_AttribDict__initialised" not in self.__dict__: 52 | return dict.__setattr__(self, item, value) 53 | 54 | # Any normal attributes are handled normally 55 | elif item in self.__dict__: 56 | dict.__setattr__(self, item, value) 57 | 58 | else: 59 | self.__setitem__(item, value) 60 | 61 | def __getstate__(self): 62 | return self.__dict__ 63 | 64 | def __setstate__(self, dict): 65 | self.__dict__ = dict 66 | 67 | def __deepcopy__(self, memo): 68 | retVal = self.__class__() 69 | memo[id(self)] = retVal 70 | 71 | for attr in dir(self): 72 | if not attr.startswith('_'): 73 | value = getattr(self, attr) 74 | if not isinstance(value, (types.BuiltinFunctionType, types.FunctionType, types.MethodType)): 75 | setattr(retVal, attr, copy.deepcopy(value, memo)) 76 | 77 | for key, value in self.items(): 78 | retVal.__setitem__(key, copy.deepcopy(value, memo)) 79 | 80 | return retVal 81 | -------------------------------------------------------------------------------- /lib/envcheck.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | """ 6 | Use as 'import envcheck' 7 | It has to be the first non-standard import before your project enter main() function 8 | """ 9 | 10 | import sys 11 | 12 | PYVERSION = sys.version.split()[0] 13 | 14 | if PYVERSION < "3": 15 | exit("[CRITICAL] incompatible Python version detected ('%s'). " 16 | "For successfully running this project, you'll have to use version 3.x" 17 | % PYVERSION) 18 | 19 | extensions = ("scrapy", "docopt") 20 | try: 21 | for _ in extensions: 22 | __import__(_) 23 | except ImportError: 24 | errMsg = "[CRITICAL] missing one or more requirements (%s) " % (", ".join("'%s'" % _ for _ in extensions)) 25 | errMsg += "please run \"pip3 install -r requirements.txt\" " 26 | exit(errMsg) 27 | -------------------------------------------------------------------------------- /output/DO_NOT_DELETE_THIS_FOLDER: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xyntax/FileSensor/20db79361e19cdd68b162058587ce5af0c2c5a18/output/DO_NOT_DELETE_THIS_FOLDER -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scrapy 2 | docopt 3 | twisted 4 | lxml 5 | parsel 6 | w3lib 7 | cryptography 8 | pyopenssl 9 | -------------------------------------------------------------------------------- /scrapy_project/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | -------------------------------------------------------------------------------- /scrapy_project/crawl.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | import os 6 | from twisted.internet import reactor 7 | from scrapy.crawler import CrawlerRunner 8 | from scrapy.utils.project import get_project_settings 9 | 10 | 11 | def run_spider(): 12 | os.environ['SCRAPY_SETTINGS_MODULE'] = 'scrapy_project.settings' 13 | settings = get_project_settings() 14 | runner = CrawlerRunner(settings) 15 | 16 | d = runner.crawl('filesensor') 17 | d.addBoth(lambda _: reactor.stop()) 18 | reactor.run() # the script will block here until the crawling is finished 19 | -------------------------------------------------------------------------------- /scrapy_project/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for scrapy_project project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'scrapy_project' 13 | 14 | SPIDER_MODULES = ['scrapy_project.spiders'] 15 | NEWSPIDER_MODULE = 'scrapy_project.spiders' 16 | 17 | LOG_ENABLED = True 18 | 19 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 20 | #USER_AGENT = 'scrapy_project (+http://www.yourdomain.com)' 21 | 22 | # Obey robots.txt rules 23 | ROBOTSTXT_OBEY = False 24 | 25 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 26 | #CONCURRENT_REQUESTS = 32 27 | 28 | # Configure a delay for requests for the same website (default: 0) 29 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 30 | # See also autothrottle settings and docs 31 | #DOWNLOAD_DELAY = 3 32 | # The download delay setting will honor only one of: 33 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 34 | #CONCURRENT_REQUESTS_PER_IP = 16 35 | 36 | # Disable cookies (enabled by default) 37 | #COOKIES_ENABLED = False 38 | 39 | # Disable Telnet Console (enabled by default) 40 | #TELNETCONSOLE_ENABLED = False 41 | 42 | # Override the default request headers: 43 | #DEFAULT_REQUEST_HEADERS = { 44 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 45 | # 'Accept-Language': 'en', 46 | #} 47 | 48 | # Enable or disable spider middlewares 49 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 50 | #SPIDER_MIDDLEWARES = { 51 | # 'scrapy_project.middlewares.MyCustomSpiderMiddleware': 543, 52 | #} 53 | 54 | # Enable or disable downloader middlewares 55 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 56 | #DOWNLOADER_MIDDLEWARES = { 57 | # 'scrapy_project.middlewares.MyCustomDownloaderMiddleware': 543, 58 | #} 59 | 60 | # Enable or disable extensions 61 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 62 | #EXTENSIONS = { 63 | # 'scrapy.extensions.telnet.TelnetConsole': None, 64 | #} 65 | 66 | # Configure item pipelines 67 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 68 | #ITEM_PIPELINES = { 69 | # 'scrapy_project.pipelines.SomePipeline': 300, 70 | #} 71 | 72 | # Enable and configure the AutoThrottle extension (disabled by default) 73 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 74 | #AUTOTHROTTLE_ENABLED = True 75 | # The initial download delay 76 | #AUTOTHROTTLE_START_DELAY = 5 77 | # The maximum download delay to be set in case of high latencies 78 | #AUTOTHROTTLE_MAX_DELAY = 60 79 | # The average number of requests Scrapy should be sending in parallel to 80 | # each remote server 81 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 82 | # Enable showing throttling stats for every response received: 83 | #AUTOTHROTTLE_DEBUG = False 84 | 85 | # Enable and configure HTTP caching (disabled by default) 86 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 87 | #HTTPCACHE_ENABLED = True 88 | #HTTPCACHE_EXPIRATION_SECS = 0 89 | #HTTPCACHE_DIR = 'httpcache' 90 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 91 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 92 | -------------------------------------------------------------------------------- /scrapy_project/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | -------------------------------------------------------------------------------- /scrapy_project/spiders/filesensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # project = https://github.com/Xyntax/FileSensor 3 | # author = i@cdxy.me 4 | 5 | import scrapy 6 | import re 7 | from urllib.parse import urlparse 8 | from lib.data import spider_data 9 | from lib.common import gen_urls 10 | 11 | 12 | class FileSensorSpider(scrapy.Spider): 13 | name = 'filesensor' 14 | handle_httpstatus_list = [301, 302, 204, 206, 403, 500] 15 | 16 | def __init__(self): 17 | super(FileSensorSpider, self).__init__() 18 | self.url = spider_data.start_urls 19 | print('[START] ' + self.url) 20 | if not self.url.startswith('http://') and not self.url.startswith('https://'): 21 | self.url = 'http://%s/' % self.url 22 | self.allowed_domains = [re.sub(r'^www\.', '', urlparse(self.url).hostname)] 23 | 24 | def start_requests(self): 25 | return [scrapy.Request(self.url, callback=self.parse, dont_filter=True)] 26 | 27 | def parse(self, response): 28 | spider_data.crawled.append(response.url) 29 | print('[%s]%s' % (response.status, response.url)) 30 | 31 | # generate new urls with /dict/suffix.txt 32 | for new_url in gen_urls(response.url): 33 | # avoid recursive loop 34 | yield scrapy.Request(new_url, callback=self.vul_found) 35 | 36 | extracted_url = [] 37 | try: 38 | # TODO handle this 39 | extracted_url.extend(response.xpath('//*/@href | //*/@src | //form/@action').extract()) 40 | except: 41 | return 42 | 43 | # ignore links like 44 | extracted_url = set(extracted_url) - {'#', ''} 45 | 46 | # recursive crawling new links 47 | for url in extracted_url: 48 | next_url = response.urljoin(url) 49 | yield scrapy.Request(next_url, callback=self.parse) 50 | 51 | def vul_found(self, response): 52 | # filter custom 404 page(status_code=200) with [--404] option 53 | if spider_data.custom_404_regex and re.findall(spider_data.custom_404_regex, str(response.body)): 54 | return 55 | 56 | msg = '[%s]%s' % (response.status, response.url) 57 | spider_data.found.append(msg) 58 | print('[!]' + msg) 59 | --------------------------------------------------------------------------------