├── tests ├── __init__.py ├── test_req_res.py ├── test_common.py └── test_main.py ├── requirements.txt ├── pathbuster ├── __init__.py ├── utils │ └── common.py ├── classes │ ├── config.py │ └── response.py └── pathbuster.py ├── pyproject.toml ├── .github └── workflows │ └── publish-to-pypi.yml ├── LICENSE ├── .gitignore └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | pytest 3 | -------------------------------------------------------------------------------- /pathbuster/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -------------------------------------------------------------------------------- /tests/test_req_res.py: -------------------------------------------------------------------------------- 1 | from pathbuster.pathbuster import Response 2 | 3 | 4 | def test_init(): 5 | bodyb = b'one two\nthree four' 6 | headers = {'Cookie': 'test=1234;'} 7 | parent_url = 'http://example.com' 8 | req = Response('http://example.com/admin', 200, 'OK', bodyb, headers, parent_url, 'meta1') 9 | assert(len(req.headers) == 1) 10 | assert(req.headers['Cookie'] == 'test=1234;') 11 | assert(req.strbody == 'one two\nthree four') 12 | assert(req.bodylen == len(bodyb)) 13 | assert(req.scheme == 'http') 14 | assert(req.parent_url == parent_url) 15 | assert(req.bodylines == 2) 16 | assert(req.bodywords == 3) 17 | -------------------------------------------------------------------------------- /pathbuster/utils/common.py: -------------------------------------------------------------------------------- 1 | import string 2 | import random 3 | from hashlib import md5 4 | 5 | 6 | def random_str(length=30): 7 | """Generate a random string of fixed length """ 8 | letters = string.ascii_letters + string.digits 9 | return ''.join(random.choice(letters) for i in range(length)) 10 | 11 | 12 | def count_lines(text: str): 13 | if len(text) > 0: 14 | return text.count("\n") + 1 15 | else: 16 | return 0 17 | 18 | 19 | def count_words(text: str): 20 | if len(text) > 0: 21 | return text.count(" ") + 1 22 | else: 23 | return 0 24 | 25 | 26 | def md5str(s): 27 | return md5(s.encode()).hexdigest() -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "pathbuster" 7 | version = "0.3.0" 8 | authors = [ 9 | { name="Vladimir Sopernikov" }, 10 | ] 11 | description = "PathBuster - multiple hosts Web path scanner" 12 | readme = "README.md" 13 | requires-python = ">=3.7" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | ] 19 | dependencies = [ 20 | "requests" 21 | ] 22 | 23 | [project.urls] 24 | "Homepage" = "https://github.com/rivalsec/pathbuster" 25 | "Bug Tracker" = "https://github.com/rivalsec/pathbuster/issues" 26 | 27 | [project.scripts] 28 | pathbuster = "pathbuster.pathbuster:main" -------------------------------------------------------------------------------- /tests/test_common.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pathbuster.utils.common import count_lines, count_words 3 | 4 | def test_count_lines(): 5 | assert count_lines("") == 0 # Empty string should return 0 6 | assert count_lines("Hello\nWorld") == 2 # Two lines separated by a newline character 7 | assert count_lines("Hello\n\nWorld") == 3 # Three lines with an empty line in between 8 | assert count_lines("Hello World") == 1 # Single line without any newline characters 9 | 10 | def test_count_words(): 11 | assert count_words("") == 0 # Empty string should return 0 12 | assert count_words("Hello World") == 2 # Two words separated by a space 13 | assert count_words("Hello\nWorld") == 1 # TODO:fix? Two words separated by a newline character 14 | assert count_words("Hello\n\nWorld") == 1 # TODO:fix? Two words with an empty line in between 15 | -------------------------------------------------------------------------------- /pathbuster/classes/config.py: -------------------------------------------------------------------------------- 1 | class Config: 2 | __slots__ = [ 3 | "proxies", "timeout", "headers", "max_errors", "http_method", 4 | "max_response_size", "store_response", "filter_regex", 5 | "json_print", "follow_redirects", "max_redirects", "exclude_codes", 6 | "extensions", "stats", "res_dir", "stats_interval" 7 | ] 8 | 9 | def __init__(self): 10 | #global settings 11 | self.proxies = None 12 | self.timeout = 30 13 | self.headers = dict() 14 | self.max_errors = 5 15 | self.http_method = 'GET' 16 | self.max_response_size = 250000 17 | self.store_response = False 18 | self.filter_regex = None 19 | self.json_print = False 20 | self.follow_redirects = False 21 | self.max_redirects = 3 22 | self.exclude_codes = [] 23 | self.extensions = [''] 24 | self.stats = None 25 | self.res_dir = None 26 | self.stats_interval = 60 -------------------------------------------------------------------------------- /.github/workflows/publish-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI and TestPyPI 2 | on: push 3 | jobs: 4 | build-n-publish: 5 | name: Build and publish PyPI and TestPyPI 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v3 9 | - name: Set up Python 10 | uses: actions/setup-python@v3 11 | with: 12 | python-version: '3.x' 13 | - name: Install pypa/build 14 | run: | 15 | python -m pip install --upgrade pip 16 | pip install build 17 | - name: Build package 18 | run: python -m build 19 | # - name: Publish distribution to Test PyPI 20 | # uses: pypa/gh-action-pypi-publish@release/v1 21 | # with: 22 | # password: ${{ secrets.TEST_PYPI_API_TOKEN }} 23 | # repository-url: https://test.pypi.org/legacy/ 24 | # skip-existing: true 25 | - name: Publish distribution to PyPI (only on tags) 26 | if: startsWith(github.ref, 'refs/tags') 27 | uses: pypa/gh-action-pypi-publish@release/v1 28 | with: 29 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Vladimir Sopernikov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pathbuster/classes/response.py: -------------------------------------------------------------------------------- 1 | import json 2 | from utils.common import count_lines, count_words, md5str 3 | import urllib.parse 4 | 5 | 6 | class Response: 7 | __slots__ = [ 8 | "base_url", "url", "status", "reason", "headers", 9 | "parent_url", "bodylen", "strbody", 10 | "bodywords", "bodylines", "meta", "location", 11 | "scheme", "host", "path_hash", "body" 12 | ] 13 | 14 | def __init__(self, url, status, reason, body, headers, parent_url, meta = None): 15 | self.base_url = None 16 | self.url = url 17 | self.status = status 18 | self.reason = reason 19 | self.headers = headers 20 | self.parent_url = parent_url 21 | if "Content-Length" in headers: 22 | self.bodylen = int(headers["Content-Length"]) 23 | else: 24 | self.bodylen = len(body) 25 | self.strbody = body.decode('utf-8', errors='ignore') 26 | self.bodywords = count_words(self.strbody) 27 | self.bodylines = count_lines(self.strbody) 28 | self.meta = [] 29 | if meta: 30 | self.meta.append(meta) 31 | if 'location' in headers: 32 | self.location = headers['location'] 33 | else: 34 | self.location = None 35 | up = urllib.parse.urlparse(url) 36 | self.scheme = up[0] 37 | self.host = up[1] 38 | self.path_hash = md5str(up[2]) 39 | self.body = body 40 | 41 | 42 | def add_meta(self, s): 43 | self.meta.append(s) 44 | 45 | 46 | def __str__(self): 47 | s = f"{self.url}\t{self.status}\tBytes:{self.bodylen}/Lines:{self.bodylines}/Words:{self.bodywords}" 48 | if self.location: 49 | s += f"\t-> {self.location}" 50 | if self.meta: 51 | meta = ', '.join(self.meta) 52 | s += f"\t{meta}" 53 | return s 54 | 55 | 56 | def is_similar(self, other:'Response'): 57 | if self.status == other.status and self.bodywords == other.bodywords and self.bodylines == other.bodylines: 58 | return True 59 | 60 | 61 | def to_json(self, store_response=False): 62 | jkeys = ['url', 'status', 'reason', 'parent_url', 'meta', 'scheme', 'host'] 63 | if store_response: 64 | jkeys.append('strbody') 65 | jres = { k:getattr(self,k) for k in jkeys} 66 | return json.dumps(jres) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | test/ 133 | pathbuster-res/ 134 | .vscode/ 135 | .DS_Store 136 | dist/ 137 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import pathbuster.pathbuster as pathbuster 2 | from pathbuster.classes.response import Response 3 | 4 | required_args = [ 5 | '-u', './test/testurls', 6 | '-p', './test/testwordlist', 7 | ] 8 | 9 | 10 | def test_filter_regex(): 11 | pathbuster.conf.filter_regex = 'Admin' 12 | pathbuster.conf.exclude_codes = [301,] 13 | 14 | res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'<title>Member etc', headers=[], parent_url=None) 15 | assert(pathbuster.result_valid(res1) == False) 16 | 17 | res2 = Response(url='http://example.com/test', status=404, reason='OK', body=b'bla \n Admin Panel ', headers=[], parent_url=None) 18 | assert(pathbuster.result_valid(res2) == True) 19 | 20 | #code is filtered 21 | res2 = Response(url='http://example.com/test', status=301, reason='OK', body=b'bla \n Admin Panel ', headers=[], parent_url=None) 22 | assert(pathbuster.result_valid(res2) == False) 23 | 24 | 25 | def test_not_ac(): 26 | # only status code filter 27 | args = required_args.copy() 28 | args.extend(['-e', '401,404,400']) 29 | pathbuster.parse_args(args) 30 | 31 | res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'', headers=[], parent_url=None) 32 | assert(pathbuster.result_valid(res1) == True) 33 | 34 | res2 = Response(url='http://example.com/test', status=400, reason='NOT OK', body=b'', headers=[], parent_url=None) 35 | assert(pathbuster.result_valid(res2) == False) 36 | 37 | res3 = Response(url='http://example.com/test', status=401, reason='NOT OK', body=b'', headers=[], parent_url=None) 38 | assert(pathbuster.result_valid(res3) == False) 39 | 40 | 41 | def test_empty_e(): 42 | pathbuster.conf.exclude_codes = [] 43 | 44 | res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'', headers=[], parent_url=None) 45 | assert(pathbuster.result_valid(res1) == True) 46 | 47 | res2 = Response(url='http://example.com/test', status=400, reason='NOT OK', body=b'', headers=[], parent_url=None) 48 | assert(pathbuster.result_valid(res2) == True) 49 | 50 | 51 | def test_ac(): 52 | args = required_args.copy() 53 | args.extend(['-ac']) 54 | pathbuster.parse_args(args) 55 | pf_res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'1 2 3\n4 5', headers=[], parent_url='http://example.com') 56 | pf_res2 = Response(url='http://example.com/redirect', status=301, reason='OK', body=b'1 2 3\n4 5\n6 7', headers=[], parent_url='http://example.com') 57 | pathbuster.preflight_samples = { 58 | 'http://example.com': [pf_res1, pf_res2], 59 | } 60 | assert(pathbuster.result_valid(pf_res1) == False) 61 | assert(pathbuster.result_valid(pf_res2) == False) 62 | res = Response(url='http://example.com/test2', status=200, reason='OK', body=b'1 2 3\n4 5 6', headers=[], parent_url='http://example.com') 63 | assert(pathbuster.result_valid(res) == True) 64 | res2 = Response(url='http://example.com/test22', status=301, reason='OK', body=b'1 2 3\n4 5 6', headers=[], parent_url='http://example.com') 65 | assert(pathbuster.result_valid(res2) == True) 66 | res3 = Response(url='http://example2.com/test222', status=200, reason='OK', body=b'1 2 3\n4 5 6', headers=[], parent_url='http://example2.com') 67 | assert(pathbuster.result_valid(res3) == True) 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PathBuster - multiple hosts Web path scanner 2 | 3 | This scanner is designed to check paths on multiple hosts at the same time. 4 | One path is taken and checked multithreaded across all hosts, then the next path is taken, etc. 5 | 6 | This gives us the following benefits: 7 | - there is no heavy load on one host (if we checked many paths in several threads on one host). 8 | - prevents a possible ban on the scanner by the WAF. 9 | - saving time, there is no need to run the scanner for each host separately. 10 | - a large number of results at once. 11 | 12 | For convenience, the results are written to two files at once in the pathbuster-res folder: 13 | - file with hostname (all paths found for this host) 14 | - file with response code (file with all responses 200, 301, etc.) 15 | 16 | ![image](https://user-images.githubusercontent.com/50343281/114876542-de8ab200-9e17-11eb-9c1c-78702fd2d4f1.png) 17 | 18 | 19 | Before starting scanning, the program checks the server's responses on random string and, if the response code is not excluded by the program settings, writes a sample (code and size) of the response for subsequent comparison. 20 | This allows us to exclude a large number of false positives (for example, if the server responds to us 200 OK for all requests) 21 | And it allows you to find answers that differ from the recorded samples, even if the code was the same. 22 | 23 | ## Installation: 24 | ``` 25 | pip3 install -U pathbuster 26 | ``` 27 | 28 | ## Basic usage: 29 | ``` 30 | pathbuster -u /path/to/URLS_FILE -p /path/to/wordlist -srd pathbuster-res 31 | ``` 32 | 33 | ## Passive check with Nuclei 34 | ``` 35 | pathbuster -u /path/to/URLS_FILE -p /path/to/wordlist --store_response -srd pathbuster-res 36 | ``` 37 | ![image](https://user-images.githubusercontent.com/50343281/149454129-c3c262f3-d3e1-4125-bb87-c334839ac338.png) 38 | 39 | 40 | ## options: 41 | ``` 42 | -h, --help show this help message and exit 43 | -m HTTP_METHOD, --http_method HTTP_METHOD 44 | HTTP method to use (default: GET) 45 | -u URLS_FILE, --urls_file URLS_FILE 46 | urls file (base url) (default: None) 47 | -p PATHS_FILE, --paths_file PATHS_FILE 48 | paths wordlist (default: None) 49 | -e EXCLUDE_CODES, --exclude_codes EXCLUDE_CODES 50 | Exclude status codes, separated by commas (Example: 404,403) (default: 404) 51 | -x EXTENSIONS, --extensions EXTENSIONS 52 | Extension list separated by commas (Example: php,asp) (default: ) 53 | -ac Automatically calibrate filtering options (default: False) 54 | -sr, --store_response 55 | Store finded HTTP responses (default: False) 56 | -srd STORE_RESPONSE_DIR, --store_response_dir STORE_RESPONSE_DIR 57 | Output directory (default: None) 58 | -fe FILTER_REGEX, --filter-regex FILTER_REGEX 59 | filter response with specified regex (-fe admin) (default: None) 60 | -json store output in JSONL(ines) format (default: False) 61 | -f, --follow_redirects 62 | Follow HTTP redirects (same host only) (default: False) 63 | -H HEADER, --header HEADER 64 | Add custom HTTP request header, support multiple flags (Example: -H "Referer: example.com" -H "Accept: */*") (default: None) 65 | --proxy PROXY proxy ip:port (default: None) 66 | --max_response_size MAX_RESPONSE_SIZE 67 | Maximum response size in bytes (default: 250000) 68 | --max_errors MAX_ERRORS 69 | Maximum errors before url exclude (default: 5) 70 | -t THREADS, --threads THREADS 71 | Number of threads (keep number of threads less than the number of hosts) (default: 10) 72 | -ua USER_AGENT, --user_agent USER_AGENT 73 | User agent (default: Mozilla/5.0 (compatible; pathbuster/0.1; +https://github.com/rivalsec/pathbuster)) 74 | --stats_interval STATS_INTERVAL 75 | number of seconds to wait between showing a statistics update (default: 60) 76 | -maxr MAX_REDIRECTS, --max_redirects MAX_REDIRECTS 77 | Max number of redirects to follow (default: 5) 78 | ``` 79 | -------------------------------------------------------------------------------- /pathbuster/pathbuster.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import requests 4 | import argparse 5 | import threading 6 | from requests.packages import urllib3 7 | from io import BytesIO 8 | import os 9 | import urllib.parse 10 | import sys 11 | import time 12 | import re 13 | from classes.config import Config 14 | from classes.response import Response 15 | from utils.common import random_str 16 | 17 | 18 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 19 | 20 | get_work_locker = threading.Lock() 21 | print_locker = threading.Lock() 22 | preflight_iter = None 23 | task_iter = None 24 | preflight_samples = {} # for preflight results 25 | err_table = dict() 26 | uniq_locs = set() 27 | 28 | 29 | #global settings 30 | conf = Config() 31 | 32 | 33 | def work_prod(urls, paths, extensions = [''], update_stats=False): 34 | for path in paths: 35 | for ext in extensions: 36 | p = path.lstrip('/') 37 | if ext: 38 | p += f".{ext.lstrip('.')}" 39 | if update_stats: 40 | stats['path'] = p 41 | for url in urls: 42 | if update_stats: 43 | stats['reqs_done'] += 1 44 | if url in err_table and err_table[url] >= conf.max_errors: 45 | continue 46 | yield (url.rstrip('/') , p) 47 | 48 | 49 | def truncated_stream_res(s: requests.Response, max_size:int): 50 | readed = 0 51 | with BytesIO() as buf: 52 | for chunk in s.iter_content(None, False): 53 | readed += buf.write(chunk) 54 | if readed > max_size: 55 | break 56 | r = buf.getvalue() 57 | return r 58 | 59 | 60 | def process_url(url, parent = None): 61 | with requests.request(conf.http_method, url, headers=conf.headers, timeout=conf.timeout, verify=False, stream=True, allow_redirects=False, proxies=conf.proxies) as s: 62 | body = truncated_stream_res(s, conf.max_response_size) 63 | return Response(url, s.status_code, s.reason, body, s.headers, parent_url=parent) 64 | 65 | 66 | def lprint(s, **kwargs): 67 | with print_locker: 68 | print(s, **kwargs) 69 | 70 | 71 | def save_res(s:Response): 72 | fn = f'{conf.res_dir}/{s.scheme}_{s.host}.txt' 73 | with print_locker: 74 | with open(fn, "a") as f: 75 | f.write(str(s) + "\n") 76 | with open(f'{conf.res_dir}/_{s.status}.txt', 'a') as f: 77 | f.write(str(s) + '\n') 78 | if conf.store_response and s.bodylen: 79 | site_dir = f'{conf.res_dir}/responses/{s.scheme}_{s.host}' 80 | res_fn = f'{site_dir}/{s.path_hash}.txt' 81 | if not os.path.exists(site_dir): 82 | os.mkdir(site_dir) 83 | with print_locker: 84 | with open(f'{conf.res_dir}/_index.txt', 'a') as f: 85 | f.write(f'{res_fn}\t{s.url}\n') 86 | with open(res_fn, 'wb') as f: 87 | f.write(f'HTTP/2 {s.status} {s.reason}\n'.encode()) 88 | for k,v in s.headers.items(): 89 | # remove because of nuclei parse error with passive mode 90 | if k.title() == 'Transfer-Encoding': 91 | continue 92 | f.write(f'{k.title()}: {v}\n'.encode()) 93 | f.write('\n'.encode()) 94 | f.write(s.body) 95 | 96 | 97 | def preflight_worker(): 98 | while True: 99 | with get_work_locker: 100 | try: 101 | url, path = next(preflight_iter) 102 | except StopIteration: 103 | return 104 | 105 | try: 106 | res = process_url(f'{url}/{path}', url) 107 | except Exception as e: 108 | err_table[url] = err_table.get(url, 0) + 1 109 | # lprint(str(e), file=sys.stderr) 110 | continue 111 | 112 | # collect samples (status code, body length) for future comparison if response status of random url not excluded by settings 113 | if res.status not in conf.exclude_codes: 114 | if url not in preflight_samples: 115 | preflight_samples[url] = [] 116 | 117 | if len(preflight_samples[url]) == 0 or samples_diff(res, url): 118 | lprint(f"{res} status code not excluded, add to preflight samples", file=sys.stderr) 119 | preflight_samples[url].append(res) 120 | 121 | 122 | def samples_diff(res: Response, url: str): 123 | """is differ from ALL url samples?""" 124 | for sample in preflight_samples.get(url, []): 125 | if res.is_similar(sample): 126 | return False 127 | return True 128 | 129 | 130 | def result_valid(res:Response): 131 | if res.status in conf.exclude_codes: 132 | return False 133 | 134 | if conf.filter_regex: 135 | if re.search(conf.filter_regex, res.body.decode('utf-8', 'ignore')): 136 | res.add_meta(f"{conf.filter_regex} match") 137 | else: 138 | return False 139 | 140 | # if ac 141 | if len(preflight_samples) > 0: 142 | if samples_diff(res, res.parent_url): 143 | res.add_meta('(preflight differ)') 144 | else: 145 | return False 146 | 147 | #pass all filters 148 | return True 149 | 150 | 151 | def worker_process(url, parent, redirect_count = 0): 152 | try: 153 | res = process_url(url, parent) 154 | except requests.exceptions.RequestException as e: 155 | err_table[url] = err_table.get(url, 0) + 1 156 | #lprint(str(e)) 157 | return 158 | 159 | if result_valid(res): 160 | if conf.json_print: 161 | lprint(res.to_json(conf.store_response)) 162 | else: 163 | lprint(f"{res}") 164 | if conf.res_dir: 165 | save_res(res) 166 | # follow host redirects on valid results 167 | if res.location and conf.follow_redirects and redirect_count < conf.max_redirects: 168 | if res.location.startswith('http://') or res.location.startswith('https://'): 169 | location = res.location 170 | else: 171 | location = urllib.parse.urljoin(res.url, res.location) 172 | 173 | loc_p = urllib.parse.urlparse(location) 174 | loc_wo_query = f'{loc_p.scheme}://{loc_p.netloc}{loc_p.path}' 175 | if loc_p.netloc == res.host and loc_wo_query not in uniq_locs: 176 | redirect_count += 1 177 | uniq_locs.add(loc_wo_query) 178 | worker_process(location, parent, redirect_count) 179 | 180 | 181 | def worker(): 182 | while True: 183 | with get_work_locker: 184 | try: 185 | url, path = next(task_iter) 186 | except StopIteration: 187 | return 188 | urlpath = f"{url}/{path}" 189 | worker_process(urlpath, url) 190 | 191 | 192 | def statworker(looptime = 5): 193 | while True: 194 | time.sleep(looptime) 195 | time_passed = time.time() - stats['starttime'] 196 | req_left = stats['allreqs'] - stats['reqs_done'] 197 | vel = int(stats["reqs_done"] / time_passed * 60) 198 | try: 199 | timeleft = req_left // vel 200 | except ZeroDivisionError: 201 | timeleft = 0 202 | lprint(f'[Statistics] path: {stats["path"]}, {stats["reqs_done"]}/{stats["allreqs"]} requests, speed {vel} req/min (about {timeleft} min left)', file=sys.stderr) 203 | 204 | 205 | def start_thread_pool(threads, worker): 206 | workers = [] 207 | for i in range(threads): 208 | t = threading.Thread(target=worker, name='worker {}'.format(i),args=()) 209 | t.start() 210 | workers.append(t) 211 | 212 | for w in workers: 213 | w.join() 214 | 215 | 216 | def parse_args(sys_args): 217 | global conf 218 | 219 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, 220 | description='multiple hosts web path scanner') 221 | parser.add_argument('-m', '--http_method', type=str, help='HTTP method to use', default='GET') 222 | parser.add_argument('-u', '--urls_file', type=argparse.FileType(mode='r', encoding='UTF-8'), help='urls file (base url)', required=True) 223 | parser.add_argument('-p', '--paths_file', type=argparse.FileType(mode='r', encoding='UTF-8'), help='paths wordlist', required=True) 224 | parser.add_argument('-e', '--exclude_codes', type=str, help="Exclude status codes, separated by commas (Example: 404,403)", default="404") 225 | parser.add_argument('-x', '--extensions', type=str, help="Extension list separated by commas (Example: php,asp)", default="") 226 | parser.add_argument('-ac', action='store_true', help='Automatically calibrate filtering options') 227 | parser.add_argument('-sr', '--store_response', action='store_true', help='Store finded HTTP responses') 228 | parser.add_argument('-srd', '--store_response_dir', type=str, help='Output directory') 229 | parser.add_argument('-fe', '--filter-regex', type=str, help='filter response with specified regex (-fe admin)', default=None) 230 | parser.add_argument('-json', action='store_true', help='store output in JSONL(ines) format') 231 | parser.add_argument('-f', '--follow_redirects', action='store_true', help='Follow HTTP redirects (same host only)') 232 | parser.add_argument('-H','--header', action='append', help="Add custom HTTP request header, support multiple flags (Example: -H \"Referer: example.com\" -H \"Accept: */*\")") 233 | parser.add_argument('--proxy', type=str, help='proxy ip:port', default=None) 234 | parser.add_argument('--max_response_size', help='Maximum response size in bytes', default=250000) 235 | parser.add_argument('--max_errors', type=int, help='Maximum errors before url exclude', default=5) 236 | parser.add_argument('-t', '--threads', type=int, help='Number of threads (keep number of threads less than the number of hosts)', default=10) 237 | parser.add_argument('-ua', '--user_agent', type=str, help="User agent", default="Mozilla/5.0 (compatible; pathbuster/0.1; +https://github.com/rivalsec/pathbuster)") 238 | parser.add_argument('--stats_interval', type=int, help="number of seconds to wait between showing a statistics update", default = 60) 239 | parser.add_argument('-maxr', '--max_redirects', type=int, help='Max number of redirects to follow', default=5) 240 | 241 | args = parser.parse_args(sys_args) 242 | 243 | if args.proxy: 244 | conf.proxies = { 245 | 'http': 'http://' + args.proxy, 246 | 'https': 'http://' + args.proxy 247 | } 248 | 249 | conf.headers["User-Agent"] = args.user_agent 250 | if args.header: 251 | for h in args.header: 252 | k, v = [x.strip() for x in h.split(':', maxsplit=1)] 253 | conf.headers[k] = v 254 | 255 | if args.exclude_codes: 256 | conf.exclude_codes = [int(x.strip()) for x in args.exclude_codes.strip(',').split(',')] 257 | 258 | if args.extensions: 259 | conf.extensions.extend([x.strip() for x in args.extensions.strip().strip(',').split(',')]) 260 | 261 | conf.max_errors = args.max_errors 262 | conf.http_method = args.http_method 263 | conf.max_response_size = args.max_response_size 264 | conf.store_response = args.store_response 265 | conf.filter_regex = args.filter_regex 266 | conf.json_print = args.json 267 | conf.follow_redirects = args.follow_redirects 268 | conf.max_redirects = args.max_redirects 269 | conf.res_dir = args.store_response_dir 270 | conf.stats_interval = args.stats_interval 271 | return args 272 | 273 | 274 | def auto_calibration(urls, threads): 275 | global preflight_iter 276 | print("Collecting auto-calibration samples...", file=sys.stderr) 277 | # auto calibration like in ffuf 278 | acStrings = [ 279 | random_str(16), 280 | random_str(16) + '/', 281 | '.' + random_str(16) + '/', 282 | '.htaccess' + random_str(16), 283 | 'admin' + random_str(16) + '/' 284 | ] 285 | acStrings.extend( [ random_str(16) + '.' + ext for ext in conf.extensions if ext] ) 286 | preflight_iter = work_prod(urls, acStrings) 287 | start_thread_pool(threads, preflight_worker) 288 | 289 | 290 | def fuzz(urls, paths, extensions, threads, ac=False ): 291 | global task_iter, stats 292 | 293 | if conf.res_dir: 294 | if not os.path.exists(conf.res_dir): 295 | os.mkdir(conf.res_dir) 296 | if conf.store_response and not os.path.exists(conf.res_dir + "/responses"): 297 | os.mkdir(conf.res_dir + "/responses") 298 | 299 | if ac: 300 | auto_calibration(urls, threads) 301 | 302 | #stats 303 | stats = { 304 | "allreqs": len(urls) * len(paths) * len(conf.extensions), 305 | "reqs_done": 0, 306 | "path": "", 307 | "starttime": time.time(), 308 | } 309 | st = threading.Thread(target=statworker, daemon=True, name='StatThread', args=(conf.stats_interval,)) 310 | st.start() 311 | 312 | task_iter = work_prod(urls, paths, extensions, True) 313 | start_thread_pool(threads, worker) 314 | 315 | 316 | def main(): 317 | global stats, task_iter 318 | args = parse_args(sys.argv[1:]) 319 | 320 | urls = [l.strip() for l in args.urls_file] 321 | args.urls_file.close() 322 | 323 | paths = [l.strip() for l in args.paths_file] 324 | args.paths_file.close() 325 | 326 | fuzz(urls, paths, conf.extensions, args.threads, args.ac) 327 | 328 | print('THE END', file=sys.stderr) 329 | 330 | 331 | if __name__ == "__main__": 332 | main() --------------------------------------------------------------------------------