├── tests
├── __init__.py
├── test_req_res.py
├── test_common.py
└── test_main.py
├── requirements.txt
├── pathbuster
├── __init__.py
├── utils
│ └── common.py
├── classes
│ ├── config.py
│ └── response.py
└── pathbuster.py
├── pyproject.toml
├── .github
└── workflows
│ └── publish-to-pypi.yml
├── LICENSE
├── .gitignore
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pytest
3 |
--------------------------------------------------------------------------------
/pathbuster/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 |
4 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
--------------------------------------------------------------------------------
/tests/test_req_res.py:
--------------------------------------------------------------------------------
1 | from pathbuster.pathbuster import Response
2 |
3 |
4 | def test_init():
5 | bodyb = b'one two\nthree four'
6 | headers = {'Cookie': 'test=1234;'}
7 | parent_url = 'http://example.com'
8 | req = Response('http://example.com/admin', 200, 'OK', bodyb, headers, parent_url, 'meta1')
9 | assert(len(req.headers) == 1)
10 | assert(req.headers['Cookie'] == 'test=1234;')
11 | assert(req.strbody == 'one two\nthree four')
12 | assert(req.bodylen == len(bodyb))
13 | assert(req.scheme == 'http')
14 | assert(req.parent_url == parent_url)
15 | assert(req.bodylines == 2)
16 | assert(req.bodywords == 3)
17 |
--------------------------------------------------------------------------------
/pathbuster/utils/common.py:
--------------------------------------------------------------------------------
1 | import string
2 | import random
3 | from hashlib import md5
4 |
5 |
6 | def random_str(length=30):
7 | """Generate a random string of fixed length """
8 | letters = string.ascii_letters + string.digits
9 | return ''.join(random.choice(letters) for i in range(length))
10 |
11 |
12 | def count_lines(text: str):
13 | if len(text) > 0:
14 | return text.count("\n") + 1
15 | else:
16 | return 0
17 |
18 |
19 | def count_words(text: str):
20 | if len(text) > 0:
21 | return text.count(" ") + 1
22 | else:
23 | return 0
24 |
25 |
26 | def md5str(s):
27 | return md5(s.encode()).hexdigest()
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "pathbuster"
7 | version = "0.3.0"
8 | authors = [
9 | { name="Vladimir Sopernikov" },
10 | ]
11 | description = "PathBuster - multiple hosts Web path scanner"
12 | readme = "README.md"
13 | requires-python = ">=3.7"
14 | classifiers = [
15 | "Programming Language :: Python :: 3",
16 | "License :: OSI Approved :: MIT License",
17 | "Operating System :: OS Independent",
18 | ]
19 | dependencies = [
20 | "requests"
21 | ]
22 |
23 | [project.urls]
24 | "Homepage" = "https://github.com/rivalsec/pathbuster"
25 | "Bug Tracker" = "https://github.com/rivalsec/pathbuster/issues"
26 |
27 | [project.scripts]
28 | pathbuster = "pathbuster.pathbuster:main"
--------------------------------------------------------------------------------
/tests/test_common.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pathbuster.utils.common import count_lines, count_words
3 |
4 | def test_count_lines():
5 | assert count_lines("") == 0 # Empty string should return 0
6 | assert count_lines("Hello\nWorld") == 2 # Two lines separated by a newline character
7 | assert count_lines("Hello\n\nWorld") == 3 # Three lines with an empty line in between
8 | assert count_lines("Hello World") == 1 # Single line without any newline characters
9 |
10 | def test_count_words():
11 | assert count_words("") == 0 # Empty string should return 0
12 | assert count_words("Hello World") == 2 # Two words separated by a space
13 | assert count_words("Hello\nWorld") == 1 # TODO:fix? Two words separated by a newline character
14 | assert count_words("Hello\n\nWorld") == 1 # TODO:fix? Two words with an empty line in between
15 |
--------------------------------------------------------------------------------
/pathbuster/classes/config.py:
--------------------------------------------------------------------------------
1 | class Config:
2 | __slots__ = [
3 | "proxies", "timeout", "headers", "max_errors", "http_method",
4 | "max_response_size", "store_response", "filter_regex",
5 | "json_print", "follow_redirects", "max_redirects", "exclude_codes",
6 | "extensions", "stats", "res_dir", "stats_interval"
7 | ]
8 |
9 | def __init__(self):
10 | #global settings
11 | self.proxies = None
12 | self.timeout = 30
13 | self.headers = dict()
14 | self.max_errors = 5
15 | self.http_method = 'GET'
16 | self.max_response_size = 250000
17 | self.store_response = False
18 | self.filter_regex = None
19 | self.json_print = False
20 | self.follow_redirects = False
21 | self.max_redirects = 3
22 | self.exclude_codes = []
23 | self.extensions = ['']
24 | self.stats = None
25 | self.res_dir = None
26 | self.stats_interval = 60
--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
1 | name: Publish to PyPI and TestPyPI
2 | on: push
3 | jobs:
4 | build-n-publish:
5 | name: Build and publish PyPI and TestPyPI
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v3
9 | - name: Set up Python
10 | uses: actions/setup-python@v3
11 | with:
12 | python-version: '3.x'
13 | - name: Install pypa/build
14 | run: |
15 | python -m pip install --upgrade pip
16 | pip install build
17 | - name: Build package
18 | run: python -m build
19 | # - name: Publish distribution to Test PyPI
20 | # uses: pypa/gh-action-pypi-publish@release/v1
21 | # with:
22 | # password: ${{ secrets.TEST_PYPI_API_TOKEN }}
23 | # repository-url: https://test.pypi.org/legacy/
24 | # skip-existing: true
25 | - name: Publish distribution to PyPI (only on tags)
26 | if: startsWith(github.ref, 'refs/tags')
27 | uses: pypa/gh-action-pypi-publish@release/v1
28 | with:
29 | password: ${{ secrets.PYPI_API_TOKEN }}
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Vladimir Sopernikov
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/pathbuster/classes/response.py:
--------------------------------------------------------------------------------
1 | import json
2 | from utils.common import count_lines, count_words, md5str
3 | import urllib.parse
4 |
5 |
6 | class Response:
7 | __slots__ = [
8 | "base_url", "url", "status", "reason", "headers",
9 | "parent_url", "bodylen", "strbody",
10 | "bodywords", "bodylines", "meta", "location",
11 | "scheme", "host", "path_hash", "body"
12 | ]
13 |
14 | def __init__(self, url, status, reason, body, headers, parent_url, meta = None):
15 | self.base_url = None
16 | self.url = url
17 | self.status = status
18 | self.reason = reason
19 | self.headers = headers
20 | self.parent_url = parent_url
21 | if "Content-Length" in headers:
22 | self.bodylen = int(headers["Content-Length"])
23 | else:
24 | self.bodylen = len(body)
25 | self.strbody = body.decode('utf-8', errors='ignore')
26 | self.bodywords = count_words(self.strbody)
27 | self.bodylines = count_lines(self.strbody)
28 | self.meta = []
29 | if meta:
30 | self.meta.append(meta)
31 | if 'location' in headers:
32 | self.location = headers['location']
33 | else:
34 | self.location = None
35 | up = urllib.parse.urlparse(url)
36 | self.scheme = up[0]
37 | self.host = up[1]
38 | self.path_hash = md5str(up[2])
39 | self.body = body
40 |
41 |
42 | def add_meta(self, s):
43 | self.meta.append(s)
44 |
45 |
46 | def __str__(self):
47 | s = f"{self.url}\t{self.status}\tBytes:{self.bodylen}/Lines:{self.bodylines}/Words:{self.bodywords}"
48 | if self.location:
49 | s += f"\t-> {self.location}"
50 | if self.meta:
51 | meta = ', '.join(self.meta)
52 | s += f"\t{meta}"
53 | return s
54 |
55 |
56 | def is_similar(self, other:'Response'):
57 | if self.status == other.status and self.bodywords == other.bodywords and self.bodylines == other.bodylines:
58 | return True
59 |
60 |
61 | def to_json(self, store_response=False):
62 | jkeys = ['url', 'status', 'reason', 'parent_url', 'meta', 'scheme', 'host']
63 | if store_response:
64 | jkeys.append('strbody')
65 | jres = { k:getattr(self,k) for k in jkeys}
66 | return json.dumps(jres)
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 |
132 | test/
133 | pathbuster-res/
134 | .vscode/
135 | .DS_Store
136 | dist/
137 |
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | import pathbuster.pathbuster as pathbuster
2 | from pathbuster.classes.response import Response
3 |
4 | required_args = [
5 | '-u', './test/testurls',
6 | '-p', './test/testwordlist',
7 | ]
8 |
9 |
10 | def test_filter_regex():
11 | pathbuster.conf.filter_regex = '
Admin'
12 | pathbuster.conf.exclude_codes = [301,]
13 |
14 | res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'Member etc', headers=[], parent_url=None)
15 | assert(pathbuster.result_valid(res1) == False)
16 |
17 | res2 = Response(url='http://example.com/test', status=404, reason='OK', body=b'bla \n Admin Panel ', headers=[], parent_url=None)
18 | assert(pathbuster.result_valid(res2) == True)
19 |
20 | #code is filtered
21 | res2 = Response(url='http://example.com/test', status=301, reason='OK', body=b'bla \n Admin Panel ', headers=[], parent_url=None)
22 | assert(pathbuster.result_valid(res2) == False)
23 |
24 |
25 | def test_not_ac():
26 | # only status code filter
27 | args = required_args.copy()
28 | args.extend(['-e', '401,404,400'])
29 | pathbuster.parse_args(args)
30 |
31 | res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'', headers=[], parent_url=None)
32 | assert(pathbuster.result_valid(res1) == True)
33 |
34 | res2 = Response(url='http://example.com/test', status=400, reason='NOT OK', body=b'', headers=[], parent_url=None)
35 | assert(pathbuster.result_valid(res2) == False)
36 |
37 | res3 = Response(url='http://example.com/test', status=401, reason='NOT OK', body=b'', headers=[], parent_url=None)
38 | assert(pathbuster.result_valid(res3) == False)
39 |
40 |
41 | def test_empty_e():
42 | pathbuster.conf.exclude_codes = []
43 |
44 | res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'', headers=[], parent_url=None)
45 | assert(pathbuster.result_valid(res1) == True)
46 |
47 | res2 = Response(url='http://example.com/test', status=400, reason='NOT OK', body=b'', headers=[], parent_url=None)
48 | assert(pathbuster.result_valid(res2) == True)
49 |
50 |
51 | def test_ac():
52 | args = required_args.copy()
53 | args.extend(['-ac'])
54 | pathbuster.parse_args(args)
55 | pf_res1 = Response(url='http://example.com/test', status=200, reason='OK', body=b'1 2 3\n4 5', headers=[], parent_url='http://example.com')
56 | pf_res2 = Response(url='http://example.com/redirect', status=301, reason='OK', body=b'1 2 3\n4 5\n6 7', headers=[], parent_url='http://example.com')
57 | pathbuster.preflight_samples = {
58 | 'http://example.com': [pf_res1, pf_res2],
59 | }
60 | assert(pathbuster.result_valid(pf_res1) == False)
61 | assert(pathbuster.result_valid(pf_res2) == False)
62 | res = Response(url='http://example.com/test2', status=200, reason='OK', body=b'1 2 3\n4 5 6', headers=[], parent_url='http://example.com')
63 | assert(pathbuster.result_valid(res) == True)
64 | res2 = Response(url='http://example.com/test22', status=301, reason='OK', body=b'1 2 3\n4 5 6', headers=[], parent_url='http://example.com')
65 | assert(pathbuster.result_valid(res2) == True)
66 | res3 = Response(url='http://example2.com/test222', status=200, reason='OK', body=b'1 2 3\n4 5 6', headers=[], parent_url='http://example2.com')
67 | assert(pathbuster.result_valid(res3) == True)
68 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PathBuster - multiple hosts Web path scanner
2 |
3 | This scanner is designed to check paths on multiple hosts at the same time.
4 | One path is taken and checked multithreaded across all hosts, then the next path is taken, etc.
5 |
6 | This gives us the following benefits:
7 | - there is no heavy load on one host (if we checked many paths in several threads on one host).
8 | - prevents a possible ban on the scanner by the WAF.
9 | - saving time, there is no need to run the scanner for each host separately.
10 | - a large number of results at once.
11 |
12 | For convenience, the results are written to two files at once in the pathbuster-res folder:
13 | - file with hostname (all paths found for this host)
14 | - file with response code (file with all responses 200, 301, etc.)
15 |
16 | 
17 |
18 |
19 | Before starting scanning, the program checks the server's responses on random string and, if the response code is not excluded by the program settings, writes a sample (code and size) of the response for subsequent comparison.
20 | This allows us to exclude a large number of false positives (for example, if the server responds to us 200 OK for all requests)
21 | And it allows you to find answers that differ from the recorded samples, even if the code was the same.
22 |
23 | ## Installation:
24 | ```
25 | pip3 install -U pathbuster
26 | ```
27 |
28 | ## Basic usage:
29 | ```
30 | pathbuster -u /path/to/URLS_FILE -p /path/to/wordlist -srd pathbuster-res
31 | ```
32 |
33 | ## Passive check with Nuclei
34 | ```
35 | pathbuster -u /path/to/URLS_FILE -p /path/to/wordlist --store_response -srd pathbuster-res
36 | ```
37 | 
38 |
39 |
40 | ## options:
41 | ```
42 | -h, --help show this help message and exit
43 | -m HTTP_METHOD, --http_method HTTP_METHOD
44 | HTTP method to use (default: GET)
45 | -u URLS_FILE, --urls_file URLS_FILE
46 | urls file (base url) (default: None)
47 | -p PATHS_FILE, --paths_file PATHS_FILE
48 | paths wordlist (default: None)
49 | -e EXCLUDE_CODES, --exclude_codes EXCLUDE_CODES
50 | Exclude status codes, separated by commas (Example: 404,403) (default: 404)
51 | -x EXTENSIONS, --extensions EXTENSIONS
52 | Extension list separated by commas (Example: php,asp) (default: )
53 | -ac Automatically calibrate filtering options (default: False)
54 | -sr, --store_response
55 | Store finded HTTP responses (default: False)
56 | -srd STORE_RESPONSE_DIR, --store_response_dir STORE_RESPONSE_DIR
57 | Output directory (default: None)
58 | -fe FILTER_REGEX, --filter-regex FILTER_REGEX
59 | filter response with specified regex (-fe admin) (default: None)
60 | -json store output in JSONL(ines) format (default: False)
61 | -f, --follow_redirects
62 | Follow HTTP redirects (same host only) (default: False)
63 | -H HEADER, --header HEADER
64 | Add custom HTTP request header, support multiple flags (Example: -H "Referer: example.com" -H "Accept: */*") (default: None)
65 | --proxy PROXY proxy ip:port (default: None)
66 | --max_response_size MAX_RESPONSE_SIZE
67 | Maximum response size in bytes (default: 250000)
68 | --max_errors MAX_ERRORS
69 | Maximum errors before url exclude (default: 5)
70 | -t THREADS, --threads THREADS
71 | Number of threads (keep number of threads less than the number of hosts) (default: 10)
72 | -ua USER_AGENT, --user_agent USER_AGENT
73 | User agent (default: Mozilla/5.0 (compatible; pathbuster/0.1; +https://github.com/rivalsec/pathbuster))
74 | --stats_interval STATS_INTERVAL
75 | number of seconds to wait between showing a statistics update (default: 60)
76 | -maxr MAX_REDIRECTS, --max_redirects MAX_REDIRECTS
77 | Max number of redirects to follow (default: 5)
78 | ```
79 |
--------------------------------------------------------------------------------
/pathbuster/pathbuster.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import requests
4 | import argparse
5 | import threading
6 | from requests.packages import urllib3
7 | from io import BytesIO
8 | import os
9 | import urllib.parse
10 | import sys
11 | import time
12 | import re
13 | from classes.config import Config
14 | from classes.response import Response
15 | from utils.common import random_str
16 |
17 |
18 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
19 |
20 | get_work_locker = threading.Lock()
21 | print_locker = threading.Lock()
22 | preflight_iter = None
23 | task_iter = None
24 | preflight_samples = {} # for preflight results
25 | err_table = dict()
26 | uniq_locs = set()
27 |
28 |
29 | #global settings
30 | conf = Config()
31 |
32 |
33 | def work_prod(urls, paths, extensions = [''], update_stats=False):
34 | for path in paths:
35 | for ext in extensions:
36 | p = path.lstrip('/')
37 | if ext:
38 | p += f".{ext.lstrip('.')}"
39 | if update_stats:
40 | stats['path'] = p
41 | for url in urls:
42 | if update_stats:
43 | stats['reqs_done'] += 1
44 | if url in err_table and err_table[url] >= conf.max_errors:
45 | continue
46 | yield (url.rstrip('/') , p)
47 |
48 |
49 | def truncated_stream_res(s: requests.Response, max_size:int):
50 | readed = 0
51 | with BytesIO() as buf:
52 | for chunk in s.iter_content(None, False):
53 | readed += buf.write(chunk)
54 | if readed > max_size:
55 | break
56 | r = buf.getvalue()
57 | return r
58 |
59 |
60 | def process_url(url, parent = None):
61 | with requests.request(conf.http_method, url, headers=conf.headers, timeout=conf.timeout, verify=False, stream=True, allow_redirects=False, proxies=conf.proxies) as s:
62 | body = truncated_stream_res(s, conf.max_response_size)
63 | return Response(url, s.status_code, s.reason, body, s.headers, parent_url=parent)
64 |
65 |
66 | def lprint(s, **kwargs):
67 | with print_locker:
68 | print(s, **kwargs)
69 |
70 |
71 | def save_res(s:Response):
72 | fn = f'{conf.res_dir}/{s.scheme}_{s.host}.txt'
73 | with print_locker:
74 | with open(fn, "a") as f:
75 | f.write(str(s) + "\n")
76 | with open(f'{conf.res_dir}/_{s.status}.txt', 'a') as f:
77 | f.write(str(s) + '\n')
78 | if conf.store_response and s.bodylen:
79 | site_dir = f'{conf.res_dir}/responses/{s.scheme}_{s.host}'
80 | res_fn = f'{site_dir}/{s.path_hash}.txt'
81 | if not os.path.exists(site_dir):
82 | os.mkdir(site_dir)
83 | with print_locker:
84 | with open(f'{conf.res_dir}/_index.txt', 'a') as f:
85 | f.write(f'{res_fn}\t{s.url}\n')
86 | with open(res_fn, 'wb') as f:
87 | f.write(f'HTTP/2 {s.status} {s.reason}\n'.encode())
88 | for k,v in s.headers.items():
89 | # remove because of nuclei parse error with passive mode
90 | if k.title() == 'Transfer-Encoding':
91 | continue
92 | f.write(f'{k.title()}: {v}\n'.encode())
93 | f.write('\n'.encode())
94 | f.write(s.body)
95 |
96 |
97 | def preflight_worker():
98 | while True:
99 | with get_work_locker:
100 | try:
101 | url, path = next(preflight_iter)
102 | except StopIteration:
103 | return
104 |
105 | try:
106 | res = process_url(f'{url}/{path}', url)
107 | except Exception as e:
108 | err_table[url] = err_table.get(url, 0) + 1
109 | # lprint(str(e), file=sys.stderr)
110 | continue
111 |
112 | # collect samples (status code, body length) for future comparison if response status of random url not excluded by settings
113 | if res.status not in conf.exclude_codes:
114 | if url not in preflight_samples:
115 | preflight_samples[url] = []
116 |
117 | if len(preflight_samples[url]) == 0 or samples_diff(res, url):
118 | lprint(f"{res} status code not excluded, add to preflight samples", file=sys.stderr)
119 | preflight_samples[url].append(res)
120 |
121 |
122 | def samples_diff(res: Response, url: str):
123 | """is differ from ALL url samples?"""
124 | for sample in preflight_samples.get(url, []):
125 | if res.is_similar(sample):
126 | return False
127 | return True
128 |
129 |
130 | def result_valid(res:Response):
131 | if res.status in conf.exclude_codes:
132 | return False
133 |
134 | if conf.filter_regex:
135 | if re.search(conf.filter_regex, res.body.decode('utf-8', 'ignore')):
136 | res.add_meta(f"{conf.filter_regex} match")
137 | else:
138 | return False
139 |
140 | # if ac
141 | if len(preflight_samples) > 0:
142 | if samples_diff(res, res.parent_url):
143 | res.add_meta('(preflight differ)')
144 | else:
145 | return False
146 |
147 | #pass all filters
148 | return True
149 |
150 |
151 | def worker_process(url, parent, redirect_count = 0):
152 | try:
153 | res = process_url(url, parent)
154 | except requests.exceptions.RequestException as e:
155 | err_table[url] = err_table.get(url, 0) + 1
156 | #lprint(str(e))
157 | return
158 |
159 | if result_valid(res):
160 | if conf.json_print:
161 | lprint(res.to_json(conf.store_response))
162 | else:
163 | lprint(f"{res}")
164 | if conf.res_dir:
165 | save_res(res)
166 | # follow host redirects on valid results
167 | if res.location and conf.follow_redirects and redirect_count < conf.max_redirects:
168 | if res.location.startswith('http://') or res.location.startswith('https://'):
169 | location = res.location
170 | else:
171 | location = urllib.parse.urljoin(res.url, res.location)
172 |
173 | loc_p = urllib.parse.urlparse(location)
174 | loc_wo_query = f'{loc_p.scheme}://{loc_p.netloc}{loc_p.path}'
175 | if loc_p.netloc == res.host and loc_wo_query not in uniq_locs:
176 | redirect_count += 1
177 | uniq_locs.add(loc_wo_query)
178 | worker_process(location, parent, redirect_count)
179 |
180 |
181 | def worker():
182 | while True:
183 | with get_work_locker:
184 | try:
185 | url, path = next(task_iter)
186 | except StopIteration:
187 | return
188 | urlpath = f"{url}/{path}"
189 | worker_process(urlpath, url)
190 |
191 |
192 | def statworker(looptime = 5):
193 | while True:
194 | time.sleep(looptime)
195 | time_passed = time.time() - stats['starttime']
196 | req_left = stats['allreqs'] - stats['reqs_done']
197 | vel = int(stats["reqs_done"] / time_passed * 60)
198 | try:
199 | timeleft = req_left // vel
200 | except ZeroDivisionError:
201 | timeleft = 0
202 | lprint(f'[Statistics] path: {stats["path"]}, {stats["reqs_done"]}/{stats["allreqs"]} requests, speed {vel} req/min (about {timeleft} min left)', file=sys.stderr)
203 |
204 |
205 | def start_thread_pool(threads, worker):
206 | workers = []
207 | for i in range(threads):
208 | t = threading.Thread(target=worker, name='worker {}'.format(i),args=())
209 | t.start()
210 | workers.append(t)
211 |
212 | for w in workers:
213 | w.join()
214 |
215 |
216 | def parse_args(sys_args):
217 | global conf
218 |
219 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
220 | description='multiple hosts web path scanner')
221 | parser.add_argument('-m', '--http_method', type=str, help='HTTP method to use', default='GET')
222 | parser.add_argument('-u', '--urls_file', type=argparse.FileType(mode='r', encoding='UTF-8'), help='urls file (base url)', required=True)
223 | parser.add_argument('-p', '--paths_file', type=argparse.FileType(mode='r', encoding='UTF-8'), help='paths wordlist', required=True)
224 | parser.add_argument('-e', '--exclude_codes', type=str, help="Exclude status codes, separated by commas (Example: 404,403)", default="404")
225 | parser.add_argument('-x', '--extensions', type=str, help="Extension list separated by commas (Example: php,asp)", default="")
226 | parser.add_argument('-ac', action='store_true', help='Automatically calibrate filtering options')
227 | parser.add_argument('-sr', '--store_response', action='store_true', help='Store finded HTTP responses')
228 | parser.add_argument('-srd', '--store_response_dir', type=str, help='Output directory')
229 | parser.add_argument('-fe', '--filter-regex', type=str, help='filter response with specified regex (-fe admin)', default=None)
230 | parser.add_argument('-json', action='store_true', help='store output in JSONL(ines) format')
231 | parser.add_argument('-f', '--follow_redirects', action='store_true', help='Follow HTTP redirects (same host only)')
232 | parser.add_argument('-H','--header', action='append', help="Add custom HTTP request header, support multiple flags (Example: -H \"Referer: example.com\" -H \"Accept: */*\")")
233 | parser.add_argument('--proxy', type=str, help='proxy ip:port', default=None)
234 | parser.add_argument('--max_response_size', help='Maximum response size in bytes', default=250000)
235 | parser.add_argument('--max_errors', type=int, help='Maximum errors before url exclude', default=5)
236 | parser.add_argument('-t', '--threads', type=int, help='Number of threads (keep number of threads less than the number of hosts)', default=10)
237 | parser.add_argument('-ua', '--user_agent', type=str, help="User agent", default="Mozilla/5.0 (compatible; pathbuster/0.1; +https://github.com/rivalsec/pathbuster)")
238 | parser.add_argument('--stats_interval', type=int, help="number of seconds to wait between showing a statistics update", default = 60)
239 | parser.add_argument('-maxr', '--max_redirects', type=int, help='Max number of redirects to follow', default=5)
240 |
241 | args = parser.parse_args(sys_args)
242 |
243 | if args.proxy:
244 | conf.proxies = {
245 | 'http': 'http://' + args.proxy,
246 | 'https': 'http://' + args.proxy
247 | }
248 |
249 | conf.headers["User-Agent"] = args.user_agent
250 | if args.header:
251 | for h in args.header:
252 | k, v = [x.strip() for x in h.split(':', maxsplit=1)]
253 | conf.headers[k] = v
254 |
255 | if args.exclude_codes:
256 | conf.exclude_codes = [int(x.strip()) for x in args.exclude_codes.strip(',').split(',')]
257 |
258 | if args.extensions:
259 | conf.extensions.extend([x.strip() for x in args.extensions.strip().strip(',').split(',')])
260 |
261 | conf.max_errors = args.max_errors
262 | conf.http_method = args.http_method
263 | conf.max_response_size = args.max_response_size
264 | conf.store_response = args.store_response
265 | conf.filter_regex = args.filter_regex
266 | conf.json_print = args.json
267 | conf.follow_redirects = args.follow_redirects
268 | conf.max_redirects = args.max_redirects
269 | conf.res_dir = args.store_response_dir
270 | conf.stats_interval = args.stats_interval
271 | return args
272 |
273 |
274 | def auto_calibration(urls, threads):
275 | global preflight_iter
276 | print("Collecting auto-calibration samples...", file=sys.stderr)
277 | # auto calibration like in ffuf
278 | acStrings = [
279 | random_str(16),
280 | random_str(16) + '/',
281 | '.' + random_str(16) + '/',
282 | '.htaccess' + random_str(16),
283 | 'admin' + random_str(16) + '/'
284 | ]
285 | acStrings.extend( [ random_str(16) + '.' + ext for ext in conf.extensions if ext] )
286 | preflight_iter = work_prod(urls, acStrings)
287 | start_thread_pool(threads, preflight_worker)
288 |
289 |
290 | def fuzz(urls, paths, extensions, threads, ac=False ):
291 | global task_iter, stats
292 |
293 | if conf.res_dir:
294 | if not os.path.exists(conf.res_dir):
295 | os.mkdir(conf.res_dir)
296 | if conf.store_response and not os.path.exists(conf.res_dir + "/responses"):
297 | os.mkdir(conf.res_dir + "/responses")
298 |
299 | if ac:
300 | auto_calibration(urls, threads)
301 |
302 | #stats
303 | stats = {
304 | "allreqs": len(urls) * len(paths) * len(conf.extensions),
305 | "reqs_done": 0,
306 | "path": "",
307 | "starttime": time.time(),
308 | }
309 | st = threading.Thread(target=statworker, daemon=True, name='StatThread', args=(conf.stats_interval,))
310 | st.start()
311 |
312 | task_iter = work_prod(urls, paths, extensions, True)
313 | start_thread_pool(threads, worker)
314 |
315 |
316 | def main():
317 | global stats, task_iter
318 | args = parse_args(sys.argv[1:])
319 |
320 | urls = [l.strip() for l in args.urls_file]
321 | args.urls_file.close()
322 |
323 | paths = [l.strip() for l in args.paths_file]
324 | args.paths_file.close()
325 |
326 | fuzz(urls, paths, conf.extensions, args.threads, args.ac)
327 |
328 | print('THE END', file=sys.stderr)
329 |
330 |
331 | if __name__ == "__main__":
332 | main()
--------------------------------------------------------------------------------