├── README.md
├── dict
    └── suffix.txt
├── filesensor.py
├── lib
    ├── __init__.py
    ├── cmdparse.py
    ├── common.py
    ├── data.py
    ├── datatype.py
    └── envcheck.py
├── output
    └── DO_NOT_DELETE_THIS_FOLDER
├── requirements.txt
└── scrapy_project
    ├── __init__.py
    ├── crawl.py
    ├── settings.py
    └── spiders
        ├── __init__.py
        └── filesensor.py


/README.md:
--------------------------------------------------------------------------------
 1 | # FileSensor
 2 | 
 3 | **Dynamic file detection tool based on crawler**  
 4 | 基于爬虫的动态敏感文件探测工具  
 5 | 
 6 | ![banner](http://static.cdxy.me/Screenshot-banner-filesensor.png)
 7 | 
 8 | Feature
 9 | -------
10 | * Generate the fuzzing vectors based on crawler results  
11 | **(input)http://localhost/ -> (crawl)http://localhost/test.php -> (detect)http://localhost/.test.php.swp**  
12 | 
13 | * Scrapy framework  
14 | Stable crawler and customizable HTTP requests.  
15 | 
16 | * Custom 404 filter  
17 | Use a regular expression to filter out user-defined 404 pages(which status code is 200).  
18 | 
19 | Requirement
20 | -----------
21 | * Python 3.x
22 | * pip
23 | 
24 | Install
25 | -------
26 | 1. `git clone https://github.com/Xyntax/FileSensor`
27 | 2. `cd FileSensor`
28 | 3. `pip3 install -r requirement.txt`
29 | 
30 | * [Scrapy official installation guide](http://scrapy.readthedocs.io/en/latest/intro/install.html)
31 | 
32 | Usage
33 | -----
34 | ```
35 | FileSensor ver0.2 by <i@cdxy.me>
36 | https://github.com/Xyntax/FileSensor
37 | 
38 | Usage:
39 |   filesensor.py URL [--404 REGEX] [-o]
40 |   filesensor.py (-h | --help)
41 | 
42 | Example:
43 |   python3 filesensor.py https://www.cdxy.me --404 "404 File not Found!"
44 | 
45 | Options:
46 |   -o           save results in ./output folder
47 |   --404 REGEX  filter out custom 404 page with regex
48 |   -h --help    show this help message
49 | 
50 | ```
51 | 
52 | 
53 | Links
54 | -----
55 | 
56 | * [Bug tracking](https://github.com/Xyntax/FileSensor/issues)
57 | * Contact <i@cdxy.me>
58 | 


--------------------------------------------------------------------------------
/dict/suffix.txt:
--------------------------------------------------------------------------------
 1 | {FULL}~
 2 | {FULL}-
 3 | {FULL}_
 4 | {NAME}~.{EXT}
 5 | {NAME}-.{EXT}
 6 | {NAME}_.{EXT}
 7 | {FULL}0
 8 | {FULL}1
 9 | {FULL}2
10 | {FULL}3
11 | {NAME}0.{EXT}
12 | {NAME}1.{EXT}
13 | {NAME}2.{EXT}
14 | {NAME}3.{EXT}
15 | {FULL}_0
16 | {FULL}_1
17 | {FULL}_2
18 | {FULL}_3
19 | {NAME}(1).{EXT}
20 | {NAME}(2).{EXT}
21 | {NAME}(3).{EXT}
22 | {FULL}__
23 | {FULL}_bak
24 | {FULL}.bak
25 | {FULL}.bak~
26 | {NAME}.bak.{EXT}
27 | {NAME}_bak.{EXT}
28 | {FULL}.source
29 | {FULL}_source
30 | {NAME}.source.{EXT}
31 | {NAME}_source.{EXT}
32 | {FULL}.zip
33 | {FULL}.rar
34 | {FULL}.tar.gz
35 | {FULL}.tar.xz
36 | {FULL}.7z
37 | {FULL}_old
38 | {FULL}.old
39 | {NAME}_old.{EXT}
40 | {NAME}.old.{EXT}
41 | {FULL}_new
42 | {FULL}.new
43 | {NAME}_new.{EXT}
44 | {NAME}.new.{EXT}
45 | {FULL}.swo
46 | {FULL}.swp
47 | {FULL}.save
48 | {FULL}_save
49 | {NAME}_save.{EXT}
50 | {NAME}.save.{EXT}
51 | .{FULL}.swp
52 | .{FULL}.un~


--------------------------------------------------------------------------------
/filesensor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | from lib import envcheck # check environment at start
 6 | from lib.common import init_options, final_message
 7 | from scrapy_project.crawl import run_spider
 8 | 
 9 | init_options()
10 | run_spider()
11 | final_message()
12 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # project = https://github.com/Xyntax/FileSensor
3 | # author = i@cdxy.me
4 | 


--------------------------------------------------------------------------------
/lib/cmdparse.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | """
 6 | FileSensor ver0.2 by <i@cdxy.me>
 7 | https://github.com/Xyntax/FileSensor
 8 | 
 9 | Usage:
10 |   filesensor.py URL [--404 REGEX] [-o]
11 |   filesensor.py (-h | --help)
12 | 
13 | Example:
14 |   python3 filesensor.py https://www.cdxy.me --404 "404 File not Found!"
15 | 
16 | Options:
17 |   -o           save results in ./output folder
18 |   --404 REGEX  filter out custom 404 page with regex
19 |   -h --help    show this help message
20 | 
21 | """
22 | 
23 | from docopt import docopt
24 | 
25 | 
26 | def get_arguments():
27 |     return docopt(__doc__)
28 | 


--------------------------------------------------------------------------------
/lib/common.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # project = https://github.com/Xyntax/FileSensor
  3 | # author = i@cdxy.me
  4 | 
  5 | import os
  6 | import hashlib
  7 | import random
  8 | import time
  9 | from urllib.parse import urlparse
 10 | from .cmdparse import get_arguments
 11 | from .data import spider_data, dict_data, paths, conf
 12 | 
 13 | 
 14 | def init_options():
 15 |     set_path()
 16 | 
 17 |     args = get_arguments()
 18 |     spider_data.start_urls = args.get('URL')
 19 |     spider_data.custom_404_regex = args.get('--404')
 20 |     spider_data.found = []
 21 |     spider_data.crawled = []
 22 |     conf.save_results = args.get('-o')
 23 | 
 24 |     load_dict_suffix()
 25 | 
 26 | 
 27 | def set_path():
 28 |     paths.root_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 29 |     paths.dict_path = os.path.join(paths.root_path, 'dict')
 30 |     paths.default_suffix_dict = os.path.join(paths.dict_path, 'suffix.txt')
 31 |     paths.output_path = os.path.join(paths.root_path, 'output')
 32 | 
 33 |     if not all(os.path.exists(p) for p in paths.values()):
 34 |         exit('[CRITICAL]Some folders or files are missing, '
 35 |              'please download the project in https://github.com/Xyntax/FileSensor/')
 36 | 
 37 | 
 38 | def load_dict_suffix():
 39 |     with open(paths.default_suffix_dict) as f:
 40 |         dict_data.url_suffix = set(f.read().split('\n')) - {'', '#'}
 41 | 
 42 | 
 43 | def gen_urls(base_url):
 44 |     def _split_filename(filename):
 45 | 
 46 |         full_filename = filename.rstrip('.')
 47 |         extension = full_filename.split('.')[-1]
 48 |         name = '.'.join(full_filename.split('.')[:-1])
 49 | 
 50 |         return name, extension
 51 | 
 52 |     url = base_url.split('?')[0].rstrip('/')
 53 |     if not urlparse(url).path:
 54 |         return []
 55 | 
 56 |     path = '/'.join(url.split('/')[:-1])
 57 |     filename = url.split('/')[-1]
 58 | 
 59 |     # Check if target CMS uses route instead of static file
 60 |     isfile = True if '.' in filename else False
 61 | 
 62 |     if isfile:
 63 |         name, extension = _split_filename(filename)
 64 | 
 65 |     final_urls = []
 66 |     for each in dict_data.url_suffix:
 67 |         new_filename = path + '/' + each.replace('{FULL}', filename)
 68 |         if isfile:
 69 |             new_filename = new_filename.replace('{NAME}', name).replace('{EXT}', extension)
 70 |         else:
 71 |             if '{NAME}' in each or '{EXT}' in each:
 72 |                 continue
 73 |         final_urls.append(new_filename.replace('..', '.'))
 74 | 
 75 |     return final_urls
 76 | 
 77 | 
 78 | def final_message():
 79 |     print('-' * 10)
 80 |     print('Crawled Page: %d' % len(spider_data.crawled))
 81 |     print('Sensitive File Found: %d' % len(spider_data.found))
 82 |     for each in spider_data.found:
 83 |         print(each)
 84 | 
 85 |     save_results()
 86 | 
 87 | 
 88 | def random_string():
 89 |     return hashlib.md5(str(random.uniform(1, 10)).encode('utf-8')).hexdigest()
 90 | 
 91 | 
 92 | def save_results():
 93 |     if not conf.save_results:
 94 |         return
 95 | 
 96 |     site = urlparse(spider_data.start_urls).netloc
 97 |     filepath = site if site else spider_data.start_urls.replace('/', '')
 98 |     filepath += time.strftime('-%Y%m%d-%H%M%S', time.localtime(time.time()))
 99 |     filepath = os.path.join(paths.output_path, filepath)
100 | 
101 |     try:
102 |         with open(filepath, 'w') as f:
103 |             f.write('\n'.join(spider_data.found))
104 |     except Exception as e:
105 |         exit(e)
106 | 
107 |     print('\nResults saved in %s' % filepath)
108 | 


--------------------------------------------------------------------------------
/lib/data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | from .datatype import AttribDict
 6 | 
 7 | paths = AttribDict()
 8 | spider_data = AttribDict()
 9 | dict_data = AttribDict()
10 | conf = AttribDict()


--------------------------------------------------------------------------------
/lib/datatype.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | import copy
 6 | import types
 7 | 
 8 | 
 9 | class AttribDict(dict):
10 |     """
11 |     This class defines the project object, inheriting from Python data
12 |     type dictionary.
13 | 
14 |     >>> foo = AttribDict()
15 |     >>> foo.bar = 1
16 |     >>> foo.bar
17 |     1
18 |     """
19 | 
20 |     def __init__(self, indict=None, attribute=None):
21 |         if indict is None:
22 |             indict = {}
23 | 
24 |         # Set any attributes here - before initialisation
25 |         # these remain as normal attributes
26 |         self.attribute = attribute
27 |         dict.__init__(self, indict)
28 |         self.__initialised = True
29 | 
30 |         # After initialisation, setting attributes
31 |         # is the same as setting an item
32 | 
33 |     def __getattr__(self, item):
34 |         """
35 |         Maps values to attributes
36 |         Only called if there *is NOT* an attribute with this name
37 |         """
38 | 
39 |         try:
40 |             return self.__getitem__(item)
41 |         except KeyError:
42 |             raise Exception("unable to access item '%s'" % item)
43 | 
44 |     def __setattr__(self, item, value):
45 |         """
46 |         Maps attributes to values
47 |         Only if we are initialised
48 |         """
49 | 
50 |         # This test allows attributes to be set in the __init__ method
51 |         if "_AttribDict__initialised" not in self.__dict__:
52 |             return dict.__setattr__(self, item, value)
53 | 
54 |         # Any normal attributes are handled normally
55 |         elif item in self.__dict__:
56 |             dict.__setattr__(self, item, value)
57 | 
58 |         else:
59 |             self.__setitem__(item, value)
60 | 
61 |     def __getstate__(self):
62 |         return self.__dict__
63 | 
64 |     def __setstate__(self, dict):
65 |         self.__dict__ = dict
66 | 
67 |     def __deepcopy__(self, memo):
68 |         retVal = self.__class__()
69 |         memo[id(self)] = retVal
70 | 
71 |         for attr in dir(self):
72 |             if not attr.startswith('_'):
73 |                 value = getattr(self, attr)
74 |                 if not isinstance(value, (types.BuiltinFunctionType, types.FunctionType, types.MethodType)):
75 |                     setattr(retVal, attr, copy.deepcopy(value, memo))
76 | 
77 |         for key, value in self.items():
78 |             retVal.__setitem__(key, copy.deepcopy(value, memo))
79 | 
80 |         return retVal
81 | 


--------------------------------------------------------------------------------
/lib/envcheck.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | """
 6 | Use as 'import envcheck'
 7 | It has to be the first non-standard import before your project enter main() function
 8 | """
 9 | 
10 | import sys
11 | 
12 | PYVERSION = sys.version.split()[0]
13 | 
14 | if PYVERSION < "3":
15 |     exit("[CRITICAL] incompatible Python version detected ('%s'). "
16 |          "For successfully running this project, you'll have to use version 3.x"
17 |          % PYVERSION)
18 | 
19 | extensions = ("scrapy", "docopt")
20 | try:
21 |     for _ in extensions:
22 |         __import__(_)
23 | except ImportError:
24 |     errMsg = "[CRITICAL] missing one or more requirements (%s) " % (", ".join("'%s'" % _ for _ in extensions))
25 |     errMsg += "please run \"pip3 install -r requirements.txt\" "
26 |     exit(errMsg)
27 | 


--------------------------------------------------------------------------------
/output/DO_NOT_DELETE_THIS_FOLDER:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xyntax/FileSensor/20db79361e19cdd68b162058587ce5af0c2c5a18/output/DO_NOT_DELETE_THIS_FOLDER


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scrapy
2 | docopt
3 | twisted
4 | lxml
5 | parsel
6 | w3lib
7 | cryptography
8 | pyopenssl
9 | 


--------------------------------------------------------------------------------
/scrapy_project/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # project = https://github.com/Xyntax/FileSensor
3 | # author = i@cdxy.me
4 | 


--------------------------------------------------------------------------------
/scrapy_project/crawl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | import os
 6 | from twisted.internet import reactor
 7 | from scrapy.crawler import CrawlerRunner
 8 | from scrapy.utils.project import get_project_settings
 9 | 
10 | 
11 | def run_spider():
12 |     os.environ['SCRAPY_SETTINGS_MODULE'] = 'scrapy_project.settings'
13 |     settings = get_project_settings()
14 |     runner = CrawlerRunner(settings)
15 | 
16 |     d = runner.crawl('filesensor')
17 |     d.addBoth(lambda _: reactor.stop())
18 |     reactor.run()  # the script will block here until the crawling is finished
19 | 


--------------------------------------------------------------------------------
/scrapy_project/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for scrapy_project project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     http://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'scrapy_project'
13 | 
14 | SPIDER_MODULES = ['scrapy_project.spiders']
15 | NEWSPIDER_MODULE = 'scrapy_project.spiders'
16 | 
17 | LOG_ENABLED = True
18 | 
19 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
20 | #USER_AGENT = 'scrapy_project (+http://www.yourdomain.com)'
21 | 
22 | # Obey robots.txt rules
23 | ROBOTSTXT_OBEY = False
24 | 
25 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
26 | #CONCURRENT_REQUESTS = 32
27 | 
28 | # Configure a delay for requests for the same website (default: 0)
29 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
30 | # See also autothrottle settings and docs
31 | #DOWNLOAD_DELAY = 3
32 | # The download delay setting will honor only one of:
33 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
34 | #CONCURRENT_REQUESTS_PER_IP = 16
35 | 
36 | # Disable cookies (enabled by default)
37 | #COOKIES_ENABLED = False
38 | 
39 | # Disable Telnet Console (enabled by default)
40 | #TELNETCONSOLE_ENABLED = False
41 | 
42 | # Override the default request headers:
43 | #DEFAULT_REQUEST_HEADERS = {
44 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
45 | #   'Accept-Language': 'en',
46 | #}
47 | 
48 | # Enable or disable spider middlewares
49 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
50 | #SPIDER_MIDDLEWARES = {
51 | #    'scrapy_project.middlewares.MyCustomSpiderMiddleware': 543,
52 | #}
53 | 
54 | # Enable or disable downloader middlewares
55 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
56 | #DOWNLOADER_MIDDLEWARES = {
57 | #    'scrapy_project.middlewares.MyCustomDownloaderMiddleware': 543,
58 | #}
59 | 
60 | # Enable or disable extensions
61 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
62 | #EXTENSIONS = {
63 | #    'scrapy.extensions.telnet.TelnetConsole': None,
64 | #}
65 | 
66 | # Configure item pipelines
67 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
68 | #ITEM_PIPELINES = {
69 | #    'scrapy_project.pipelines.SomePipeline': 300,
70 | #}
71 | 
72 | # Enable and configure the AutoThrottle extension (disabled by default)
73 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
74 | #AUTOTHROTTLE_ENABLED = True
75 | # The initial download delay
76 | #AUTOTHROTTLE_START_DELAY = 5
77 | # The maximum download delay to be set in case of high latencies
78 | #AUTOTHROTTLE_MAX_DELAY = 60
79 | # The average number of requests Scrapy should be sending in parallel to
80 | # each remote server
81 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
82 | # Enable showing throttling stats for every response received:
83 | #AUTOTHROTTLE_DEBUG = False
84 | 
85 | # Enable and configure HTTP caching (disabled by default)
86 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
87 | #HTTPCACHE_ENABLED = True
88 | #HTTPCACHE_EXPIRATION_SECS = 0
89 | #HTTPCACHE_DIR = 'httpcache'
90 | #HTTPCACHE_IGNORE_HTTP_CODES = []
91 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
92 | 


--------------------------------------------------------------------------------
/scrapy_project/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # project = https://github.com/Xyntax/FileSensor
3 | # author = i@cdxy.me
4 | 


--------------------------------------------------------------------------------
/scrapy_project/spiders/filesensor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # project = https://github.com/Xyntax/FileSensor
 3 | # author = i@cdxy.me
 4 | 
 5 | import scrapy
 6 | import re
 7 | from urllib.parse import urlparse
 8 | from lib.data import spider_data
 9 | from lib.common import gen_urls
10 | 
11 | 
12 | class FileSensorSpider(scrapy.Spider):
13 |     name = 'filesensor'
14 |     handle_httpstatus_list = [301, 302, 204, 206, 403, 500]
15 | 
16 |     def __init__(self):
17 |         super(FileSensorSpider, self).__init__()
18 |         self.url = spider_data.start_urls
19 |         print('[START] ' + self.url)
20 |         if not self.url.startswith('http://') and not self.url.startswith('https://'):
21 |             self.url = 'http://%s/' % self.url
22 |         self.allowed_domains = [re.sub(r'^www\.', '', urlparse(self.url).hostname)]
23 | 
24 |     def start_requests(self):
25 |         return [scrapy.Request(self.url, callback=self.parse, dont_filter=True)]
26 | 
27 |     def parse(self, response):
28 |         spider_data.crawled.append(response.url)
29 |         print('[%s]%s' % (response.status, response.url))
30 | 
31 |         # generate new urls with /dict/suffix.txt
32 |         for new_url in gen_urls(response.url):
33 |             # avoid recursive loop
34 |             yield scrapy.Request(new_url, callback=self.vul_found)
35 | 
36 |         extracted_url = []
37 |         try:
38 |             # TODO handle this <a href="/.htaccess">
39 |             extracted_url.extend(response.xpath('//*/@href | //*/@src | //form/@action').extract())
40 |         except:
41 |             return
42 | 
43 |         # ignore links like <a href="#">
44 |         extracted_url = set(extracted_url) - {'#', ''}
45 | 
46 |         # recursive crawling new links
47 |         for url in extracted_url:
48 |             next_url = response.urljoin(url)
49 |             yield scrapy.Request(next_url, callback=self.parse)
50 | 
51 |     def vul_found(self, response):
52 |         # filter custom 404 page(status_code=200) with [--404] option
53 |         if spider_data.custom_404_regex and re.findall(spider_data.custom_404_regex, str(response.body)):
54 |             return
55 | 
56 |         msg = '[%s]%s' % (response.status, response.url)
57 |         spider_data.found.append(msg)
58 |         print('[!]' + msg)
59 | 


--------------------------------------------------------------------------------