├── .gitignore ├── BBScan.py ├── LICENSE ├── README.md ├── lib ├── __init__.py ├── cmdline.py ├── cms_fingerprints.py ├── common.py ├── config.py ├── consle_width.py ├── javascript_parser.py └── report.py ├── report └── .gitignore ├── requirements.txt ├── rules ├── black.list ├── change_log.txt ├── compressed_backup_files.txt ├── config_file.txt ├── directory_traversal.txt ├── disabled │ ├── .gitignore │ ├── resin_admin.txt │ └── zabbix_jsrpc_sqli.txt ├── git_and_svn.txt ├── go_pprof_debug.txt ├── graphite_ssrf.txt ├── java_server_faces2.txt ├── java_web_config_files.txt ├── phpinfo_or_apc.txt ├── phpmyadmin.txt ├── sensitive_url.txt ├── shell_script_disclosure.txt ├── source_code_disclosure.txt ├── ssh_sensitive_file.txt ├── test_page.txt ├── tomcat_manager.txt ├── web_editors.txt ├── web_fingerprint_v3.json └── white.list ├── scripts ├── __init__.py ├── disabled │ ├── .gitignore │ ├── __init__.py │ ├── discuz_backup_file.py │ ├── kong_admin_rest_api.py │ ├── mongodb_unauthorized_access.py │ ├── redis_unauthorized_access.py │ ├── smb_ms17010.py │ └── zookeeper_unauth.py ├── is_admin_site.py ├── log_files.py ├── outlook_web_app.py ├── readme.txt ├── scan_by_hostname_or_folder.py ├── sensitive_folders.py ├── tools │ ├── __init__.py │ └── port_scan.py └── wordpress_backup_file.py └── targets └── .gitignore /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | tests/ 6 | temp/ 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | venv/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | .idea/ 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | *.html 60 | -------------------------------------------------------------------------------- /BBScan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | """ 4 | A fast and light-weight web vulnerability scanner. It helps pen-testers pinpoint possibly vulnerable targets from a large number of web servers. 5 | https://github.com/lijiejie/BBScan 6 | Li JieJie my[at]lijiejie.com https://www.lijiejie.com 7 | """ 8 | 9 | import os 10 | # first, change working dir 11 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 12 | os.chdir(cur_dir) 13 | 14 | import sys 15 | import codecs 16 | import asyncio 17 | import httpx 18 | import re 19 | from bs4 import BeautifulSoup 20 | import warnings 21 | import time 22 | import glob 23 | import ipaddress 24 | import ssl 25 | import traceback 26 | import importlib 27 | import copy 28 | import string 29 | import random 30 | import dns.asyncresolver 31 | from urllib.parse import urlparse 32 | 33 | from lib.common import clear_queue, parse_url, cal_depth, get_domain_sub, is_port_open, scan_given_ports, \ 34 | is_ip_addr, get_dns_resolver, get_http_title, clear_url 35 | from lib.cmdline import parse_args 36 | from lib.report import save_report 37 | import lib.config as conf 38 | from lib.cms_fingerprints import Fingerprint 39 | import hashlib 40 | from lib.javascript_parser import get_urls_in_js_async 41 | 42 | 43 | if hasattr(ssl, '_create_unverified_context'): 44 | ssl._create_default_https_context = ssl._create_unverified_context 45 | 46 | from bs4 import MarkupResemblesLocatorWarning 47 | warnings.filterwarnings('ignore', category=MarkupResemblesLocatorWarning) 48 | 49 | 50 | fingerprint = Fingerprint() 51 | 52 | 53 | class Scanner(object): 54 | def __init__(self, timeout=900): 55 | self.q_results = q_results 56 | self.args = args 57 | self.start_time = time.time() 58 | self.time_out = timeout 59 | self.links_limit = 100 # max number of folders allowed to scan 60 | 61 | async def init(self): 62 | await self._init_rules() 63 | self._init_scripts() 64 | 65 | self.url_queue = asyncio.Queue() # all urls to scan 66 | self.urls_processed = set() # processed urls 67 | self.urls_enqueued = set() # entered queue urls 68 | self.urls_crawled = set() 69 | 70 | self.lock = asyncio.Lock() 71 | self.results = {} 72 | self.log_file = None 73 | self._404_status = -1 74 | self.conn_pool = None 75 | self.index_status, self.index_headers, self.index_html_doc = None, {}, '' 76 | self.scheme, self.host, self.port, self.path = None, None, None, None 77 | self.domain_sub = '' 78 | self.base_url = '' 79 | self.max_depth = 0 80 | self.len_404_doc = 0 81 | self.has_http = None 82 | self.ports_open = None 83 | self.ports_closed = None 84 | self.no_scripts = None 85 | self.status_502_count = 0 86 | self.timeout_count = 0 87 | self.timeout_scan_aborted = False 88 | self.fingerprint_check = True 89 | self.js_urls = [] 90 | self.index_has_reported = False 91 | self.urls_regex_found = set() 92 | 93 | async def print_msg(self, msg): 94 | await self.q_results.put(msg) 95 | 96 | def reset_scanner(self): 97 | self.start_time = time.time() 98 | clear_queue(self.url_queue) 99 | self.urls_processed.clear() 100 | self.urls_enqueued.clear() 101 | self.urls_crawled.clear() 102 | self.results.clear() 103 | self.log_file = None 104 | self._404_status = -1 105 | # self.conn_pool = None # Bug Fixed, shouldn't set to None right here, used pool can not be closed 106 | self.index_status, self.index_headers, self.index_html_doc = None, {}, '' 107 | self.scheme, self.host, self.port, self.path = None, None, None, None 108 | self.domain_sub = '' 109 | self.base_url = '' 110 | self.status_502_count = 0 111 | self.timeout_count = 0 112 | self.timeout_scan_aborted = False 113 | self.fingerprint_check = True 114 | self.js_urls = [] 115 | self.index_has_reported = False 116 | self.urls_regex_found = set() 117 | 118 | # scan from a given URL 119 | async def init_from_url(self, target): 120 | self.reset_scanner() 121 | self.scheme = target['scheme'] 122 | self.host = target['host'] 123 | self.port = target['port'] 124 | self.path = target['path'] 125 | self.has_http = target['has_http'] 126 | self.ports_open = target['ports_open'] 127 | self.ports_closed = target['ports_closed'] 128 | self.no_scripts = target['no_scripts'] if 'no_scripts' in target else 0 129 | self.domain_sub = get_domain_sub(self.host) 130 | await self.init_final() 131 | return True 132 | 133 | # Fix me: not yet implemented and tested 2024-05-27 134 | async def init_from_log_file(self, log_file): 135 | self.reset_scanner() 136 | self.log_file = log_file 137 | self.scheme, self.host, self.path = self._parse_url_from_file() 138 | self.domain_sub = get_domain_sub(self.host) 139 | if self.host: 140 | if self.host.find(':') > 0: 141 | _ret = self.host.split(':') 142 | self.host = _ret[0] 143 | self.port = _ret[1] 144 | elif self.scheme == 'https': 145 | self.port = 443 146 | elif self.scheme == 'http': 147 | self.port = 80 148 | else: 149 | self.port = None 150 | if await is_port_open(self.host, self.port): 151 | await self.print_msg('[Port Not Open] %s:%s' % (self.host, self.port)) 152 | return False 153 | self.has_http = True 154 | self.no_scripts = 1 155 | await self.init_final() 156 | await self.load_all_urls_from_log_file() 157 | return True 158 | else: 159 | host = os.path.basename(log_file).replace('.log', '') 160 | try: 161 | await dns.asyncresolver.resolve(host, "A") 162 | await self.init_from_url(host) # Fix Me 163 | return True 164 | except Exception as e: 165 | await self.print_msg('[ERROR] Invalid host from log name: %s' % host) 166 | return False 167 | 168 | async def init_final(self): 169 | try: 170 | if self.conn_pool: 171 | await self.conn_pool.aclose() 172 | except Exception as e: 173 | await self.print_msg('conn_pool.aclose exception: %s' % str(e)) 174 | self.conn_pool = None # after close 175 | if self.scheme == 'http' and self.port == 80 or self.scheme == 'https' and self.port == 443: 176 | self.base_url = '%s://%s' % (self.scheme, self.host) 177 | else: 178 | self.base_url = '%s://%s:%s' % (self.scheme, self.host, self.port) 179 | 180 | if self.has_http: 181 | await self.print_msg('Scan %s' % self.base_url) 182 | else: 183 | await self.print_msg('Scan %s:%s' % (self.host, self.port) if self.port else 'Scan %s' % self.host) 184 | 185 | if self.has_http: 186 | limits = httpx.Limits(max_connections=100, max_keepalive_connections=40) 187 | self.conn_pool = httpx.AsyncClient(headers=conf.default_headers, 188 | proxies=args.proxy, verify=False, limits=limits, follow_redirects=False) 189 | 190 | if self.args.require_index_doc: 191 | await self.crawl('/', do_not_process_links=True) 192 | 193 | if self.no_scripts != 1: # 不是重复目标 80 443 跳转的,不需要重复扫描 194 | # 当前目标disable, 或者 全局开启插件扫描 195 | if self.args.scripts_only or not self.no_scripts: 196 | for _ in self.user_scripts: 197 | await self.url_queue.put((_, '/')) 198 | 199 | if not self.has_http or self.args.scripts_only: # 未发现HTTP服务 或 只依赖插件扫描 200 | return 201 | 202 | self.max_depth = cal_depth(self, self.path)[1] + 5 203 | if self.args.no_check404: 204 | self._404_status = 404 205 | else: 206 | await self.check_404_existence() 207 | if self._404_status == -1: 208 | await self.print_msg('[Warning] HTTP 404 check failed: %s' % self.base_url) 209 | # elif self._404_status != 404: 210 | # await self.print_msg('[Warning] %s has no HTTP 404.' % self.base_url) 211 | _path, _depth = cal_depth(self, self.path) 212 | 213 | await self.enqueue('/') 214 | if _path != '/' and not self.log_file: 215 | await self.enqueue(_path) 216 | 217 | def _parse_url_from_file(self): 218 | url = '' 219 | with open(self.log_file) as infile: 220 | for _line in infile.readlines(): 221 | _line = _line.strip() 222 | if _line and len(_line.split()) >= 3: 223 | url = _line.split()[1] 224 | break 225 | return parse_url(url) 226 | 227 | # load urls from rules/*.txt 228 | async def _init_rules(self): 229 | self.text_to_find = [] 230 | self.regex_to_find = [] 231 | self.text_to_exclude = [] 232 | self.regex_to_exclude = [] 233 | self.rules_set = set() 234 | self.rules_set_root_only = set() 235 | 236 | p_tag = re.compile('{tag="(.*?)"}') 237 | p_status = re.compile(r'{status=(\d{3})}') 238 | p_content_type = re.compile('{type="(.*?)"}') 239 | p_content_type_no = re.compile('{type_no="(.*?)"}') 240 | 241 | _files = self.args.rule_files if self.args.rule_files else glob.glob('rules/*.txt') 242 | if self.args.fingerprint_only: 243 | _files = [] 244 | 245 | for rule_file in _files: 246 | with codecs.open(rule_file, 'r', encoding='utf-8') as infile: 247 | vul_type = os.path.basename(rule_file)[:-4] 248 | for url in infile.readlines(): 249 | url = url.strip() 250 | if url.startswith('/'): 251 | _ = p_tag.search(url) 252 | tag = _.group(1) if _ else '' 253 | 254 | _ = p_status.search(url) 255 | status = int(_.group(1)) if _ else 0 256 | 257 | _ = p_content_type.search(url) 258 | content_type = _.group(1) if _ else '' 259 | 260 | _ = p_content_type_no.search(url) 261 | content_type_no = _.group(1) if _ else '' 262 | 263 | root_only = True if url.find('{root_only}') >= 0 else False 264 | 265 | rule = (url.split()[0], tag, status, content_type, content_type_no, root_only, vul_type) 266 | if root_only: 267 | if rule not in self.rules_set_root_only: 268 | self.rules_set_root_only.add(rule) 269 | else: 270 | await self.print_msg('Duplicated root only rule: %s' % str(rule)) 271 | else: 272 | if rule not in self.rules_set: 273 | self.rules_set.add(rule) 274 | else: 275 | await self.print_msg('Duplicated rule: %s' % str(rule)) 276 | 277 | re_text = re.compile('{text="(.*)"}') 278 | re_regex_text = re.compile('{regex_text="(.*)"}') 279 | 280 | file_path = 'rules/white.list' 281 | if not os.path.exists(file_path): 282 | await self.print_msg('[ERROR] File not exist: %s' % file_path) 283 | return 284 | for _line in codecs.open(file_path, encoding='utf-8'): 285 | _line = _line.strip() 286 | if not _line or _line.startswith('#'): 287 | continue 288 | _m = re_text.search(_line) 289 | if _m: 290 | self.text_to_find.append(_m.group(1)) 291 | else: 292 | _m = re_regex_text.search(_line) 293 | if _m: 294 | self.regex_to_find.append(re.compile(_m.group(1))) 295 | 296 | file_path = 'rules/black.list' 297 | if not os.path.exists(file_path): 298 | await self.print_msg('[ERROR] File not exist: %s' % file_path) 299 | return 300 | for _line in codecs.open(file_path, encoding='utf-8'): 301 | _line = _line.strip() 302 | if not _line or _line.startswith('#'): 303 | continue 304 | _m = re_text.search(_line) 305 | if _m: 306 | self.text_to_exclude.append(_m.group(1)) 307 | else: 308 | _m = re_regex_text.search(_line) 309 | if _m: 310 | self.regex_to_exclude.append(re.compile(_m.group(1))) 311 | 312 | def _init_scripts(self): 313 | self.user_scripts = [] 314 | if self.args.no_scripts: # 全局禁用插件,无需导入 315 | return 316 | files = 'scripts/*.py' 317 | if self.args.fingerprint_only: 318 | files = 'scripts/is_admin_site.py' 319 | for _script in glob.glob(files): 320 | script_name_origin = os.path.basename(_script) 321 | script_name = script_name_origin.replace('.py', '') 322 | if self.args.script: # 只导入指定的脚本 323 | if script_name not in self.args.script and script_name_origin not in self.args.script: 324 | continue 325 | if script_name.startswith('_'): 326 | continue 327 | try: 328 | self.user_scripts.append(importlib.import_module('scripts.%s' % script_name)) 329 | except Exception as e: 330 | print('[ERROR] Fail to load script %s' % script_name) 331 | 332 | async def http_request(self, url, headers=conf.default_headers, timeout=30, follow_redirects=False): 333 | try: 334 | if not url: 335 | url = '/' 336 | if not self.conn_pool or self.timeout_scan_aborted: 337 | return -1, {}, '' 338 | if self.args.debug: 339 | await self.print_msg('--> %s' % self.base_url + url) 340 | resp = await self.conn_pool.get(self.base_url + url, 341 | headers=headers, follow_redirects=follow_redirects, timeout=timeout) 342 | if resp.headers.get('content-type', '').find('text') >= 0 \ 343 | or resp.headers.get('content-type', '').find('html') >= 0 \ 344 | or int(resp.headers.get('content-length', '0')) <= 20480: # 1024 * 20 345 | html_doc = resp.text 346 | else: 347 | html_doc = '' 348 | 349 | if resp.status_code == 502: # 502出现超过3次,排除该站点不再扫描 350 | self.status_502_count += 1 351 | if self.status_502_count > 3: 352 | self.timeout_scan_aborted = True 353 | clear_queue(self.url_queue) 354 | try: 355 | if self.conn_pool: 356 | await self.conn_pool.aclose() 357 | except Exception as e: 358 | pass # 359 | self.conn_pool = None 360 | if self.args.debug: 361 | await self.print_msg('Website 502 exceeded: %s' % self.base_url) 362 | 363 | return resp.status_code, resp.headers, html_doc 364 | except httpx.ReadTimeout as e: 365 | self.timeout_count += 1 366 | if self.timeout_count >= 3: 367 | if not self.timeout_scan_aborted: 368 | self.timeout_scan_aborted = True 369 | await self.print_msg('[Warning] timeout exceeded, scan aborted: %s' % self.base_url) 370 | clear_queue(self.url_queue) 371 | return -1, {}, '' 372 | except (httpx.RequestError, httpx.HTTPStatusError, ssl.SSLError) as e: 373 | if self.args.debug: 374 | await self.print_msg('[Request Error] %s %s %s' % (type(e), str(e), self.base_url)) 375 | return -1, {}, '' 376 | except Exception as e: 377 | if self.args.debug: 378 | await self.print_msg('[Request Error] %s %s %s' % (type(e), str(e), self.base_url)) 379 | return -1, {}, '' 380 | 381 | async def check_404_existence(self): 382 | try: 383 | try: 384 | path = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in 385 | range(random.randint(10, 30))) 386 | self._404_status, _, html_doc = await self.http_request('/' + path) 387 | except Exception as e: 388 | await self.print_msg('[Warning] HTTP 404 check failed: %s, %s' % (self.base_url, type(e))) 389 | self._404_status, _, html_doc = -1, {}, '' 390 | if self._404_status != 404: 391 | self.len_404_doc = len(html_doc) 392 | except Exception as e: 393 | await self.print_msg('[Check_404] Exception %s %s' % (self.base_url, str(e))) 394 | 395 | # 396 | async def enqueue(self, url): 397 | try: 398 | url = str(url) 399 | except Exception as e: 400 | return False 401 | try: 402 | url_pattern = re.sub(r'\d+', '{num}', url) 403 | if url_pattern in self.urls_processed or len(self.urls_processed) >= self.links_limit: 404 | return False 405 | 406 | self.urls_processed.add(url_pattern) 407 | # await self.print_msg('Entered Queue: %s' % url) 408 | if not self.args.no_crawl: # no crawl 409 | await self.crawl(url) 410 | if self._404_status != -1: # valid web service 411 | rule_set_to_process = [self.rules_set, self.rules_set_root_only] if url == '/' else [self.rules_set] 412 | for rule_set in rule_set_to_process: 413 | for _ in rule_set: 414 | if _[5] and url != '/': # root only 415 | continue 416 | try: 417 | full_url = url.rstrip('/') + _[0] 418 | except Exception as e: 419 | continue 420 | if full_url in self.urls_enqueued: 421 | continue 422 | url_description = {'prefix': url.rstrip('/'), 'full_url': full_url} 423 | item = (url_description, _[1], _[2], _[3], _[4], _[5], _[6]) 424 | await self.url_queue.put(item) 425 | self.urls_enqueued.add(full_url) 426 | 427 | if self.args.full_scan and url.count('/') >= 2: 428 | await self.enqueue('/'.join(url.split('/')[:-2]) + '/') # sub folder enqueue 429 | 430 | if url != '/' and not self.no_scripts: 431 | for script in self.user_scripts: 432 | await self.url_queue.put((script, url)) 433 | return True 434 | except Exception as e: 435 | await self.print_msg('[_enqueue.exception] %s' % str(e)) 436 | return False 437 | 438 | # 439 | async def crawl(self, path, do_not_process_links=False): 440 | try: 441 | # increase body size to 200 KB 442 | request_headers = dict(conf.default_headers, Range='bytes=0-204800') 443 | status, headers, html_doc = await self.http_request(path, headers=request_headers) 444 | 445 | if path == '/': 446 | self.index_status, self.index_headers, self.index_html_doc = status, headers, html_doc 447 | if not self.index_has_reported: 448 | self.index_has_reported = True 449 | title = get_http_title(html_doc) 450 | location = headers.get('Location', '') 451 | server = headers.get('Server', '') 452 | str_headers = '' 453 | for key in self.index_headers: 454 | # 减少非关键HTTP头的输出 455 | if key.lower() in ['connection', 'content-encoding', 'content-security-policy', 456 | 'date', 'p3p', 'x-ua-compatible', 'x-ua-compatible', 'cache-control', 457 | 'x-xss-protection', 'transfer-encoding', 'last-modified', 'etag']: 458 | continue 459 | str_headers += '%s: %s\n' % (key, self.index_headers[key]) 460 | _ = {'status': status, 'url': clear_url(self.base_url), 'title': title, 'server': server, 461 | 'location': location, 'headers': str_headers} 462 | await self.save_result('$Index', _) 463 | 464 | if self.fingerprint_check: 465 | # 检查Web指纹 466 | cms_name = fingerprint.get_cms_name('/^^^get^^^{}^^^', status, headers, html_doc) 467 | if cms_name: 468 | await self.save_result( 469 | '$Fingerprint', cms_name, 470 | msg='[Fingerprint] %s found %s' % (('%s%s' % (self.base_url, path)).rstrip('/'), cms_name)) 471 | 472 | # 首页30x跳转,在第二次请求时,需要parse HTML, follow后获取新的HTML 473 | if not self.args.no_crawl and not do_not_process_links and status in [301, 302]: 474 | resp = await self.conn_pool.get(self.base_url + '/', 475 | headers=conf.default_headers, timeout=20) 476 | location = resp.headers.get('Location', '') 477 | if location.lower().startswith('http'): 478 | scheme, netloc, _path, params, query, fragment = urlparse(location, 'http') 479 | if netloc.find(self.host) < 0: # different host, do not follow 480 | location = '' 481 | else: 482 | location = _path + '?' + query 483 | elif location.lower().startswith('/'): 484 | pass 485 | else: 486 | location = '/' + location 487 | if location: 488 | url, depth = cal_depth(self, resp.headers.get('Location', '')) 489 | if depth <= self.max_depth: 490 | await self.enqueue(url) 491 | # 避免处理错误,直接传入原始path,让httpx处理跳转URL,会重复,多1次请求 492 | status, headers, html_doc = await self.http_request(path, headers=request_headers, 493 | follow_redirects=True) 494 | # 再次检查Web指纹 495 | cms_name = fingerprint.get_cms_name('/^^^get^^^{}^^^', status, headers, html_doc) 496 | if cms_name: 497 | await self.save_result( 498 | '$Fingerprint', cms_name, 499 | msg='[Fingerprint] %s found %s' % ( 500 | ('%s%s' % (self.base_url, path)).rstrip('/'), cms_name)) 501 | 502 | if not self.args.no_crawl and not do_not_process_links and html_doc: 503 | 504 | fav_url_found = False 505 | soup = BeautifulSoup(html_doc, "html.parser") 506 | for tag in ['link', 'script', 'a']: 507 | for link in soup.find_all(tag): 508 | origin_url = url = link.get('href', '').strip() 509 | if not url: 510 | origin_url = url = link.get('src', '').strip() 511 | if url.startswith('..'): 512 | continue 513 | if not url.startswith('/') and url.find('//') < 0: # relative path 514 | url = path + url 515 | url, depth = cal_depth(self, url) 516 | # print(url, depth) 517 | if depth <= self.max_depth: 518 | await self.enqueue(url) 519 | if self.fingerprint_check and tag == 'link' and str(link.get('rel', '')).find('icon') >= 0: 520 | fav_url_found = True 521 | fav_url, depth = cal_depth(self, link.get('href', '').strip()) 522 | if fav_url: # 非当前域名的icon url,不会请求 523 | await self.url_queue.put(('favicon', fav_url, '')) 524 | # 解析js获取URL 525 | if (path == '/' and tag == 'script' and (self.args.api or not self.args.fingerprint_only) and 526 | origin_url not in self.js_urls): 527 | self.js_urls.append(origin_url) 528 | js_url, depth = cal_depth(self, origin_url) 529 | if js_url: 530 | if origin_url.lower().startswith('http') and origin_url.find('://') > 0: 531 | origin_url = origin_url.split('://')[1] 532 | if origin_url.find('/') > 0: 533 | origin_url = '/'.join(origin_url.split('/')[1:]) 534 | await self.url_queue.put(('js_file', origin_url, '')) 535 | 536 | if path == '/' and self.fingerprint_check and not fav_url_found: # 尝试请求默认favicon,计算hash 537 | await self.url_queue.put(('favicon', '/favicon.ico', '')) 538 | 539 | if path == '/' and self.fingerprint_check: 540 | self.fingerprint_check = False # this should only run once for each target 541 | # 将CMS识别的其他请求,添加到队列 542 | for key_name in fingerprint.rules.keys(): 543 | if key_name != '/^^^get^^^{}^^^': # 首页已经默认请求过 544 | await self.url_queue.put(('key_name', key_name, '')) 545 | 546 | ret = self.find_text(html_doc) 547 | if ret: 548 | title = get_http_title(html_doc) 549 | _ = {'status': status, 'url': '%s%s' % (self.base_url, path), 'title': title, 'vul_type': ret[1]} 550 | await self.save_result('/', _) 551 | 552 | except Exception as e: 553 | await self.print_msg('[crawl Exception] %s %s %s' % (path, type(e), str(e))) 554 | 555 | async def load_all_urls_from_log_file(self): 556 | try: 557 | with open(self.log_file) as infile: 558 | for _line in infile.readlines(): 559 | _ = _line.strip().split() 560 | if len(_) == 3 and (_[2].find('^^^200') > 0 or _[2].find('^^^403') > 0 or _[2].find('^^^302') > 0): 561 | url, depth = cal_depth(self, _[1]) 562 | await self.enqueue(url) 563 | except Exception as e: 564 | await self.print_msg('[load_all_urls_from_log_file] %s' % str(e)) 565 | 566 | def find_text(self, html_doc): 567 | for _text in self.text_to_find: 568 | if html_doc.find(_text) >= 0: 569 | return True, 'Found [%s]' % _text 570 | for _regex in self.regex_to_find: 571 | if _regex.search(html_doc): 572 | return True, 'Found Regex [%s]' % _regex.pattern 573 | return False 574 | 575 | def find_exclude_text(self, html_doc): 576 | for _text in self.text_to_exclude: 577 | if html_doc.find(_text) >= 0: 578 | return True 579 | for _regex in self.regex_to_exclude: 580 | if _regex.search(html_doc): 581 | return True 582 | return False 583 | 584 | async def is_url_valid(self, url, item): 585 | url_description, tag, status_to_match, content_type, content_type_no, root_only, vul_type = item 586 | status, headers, html_doc = await self.http_request(url) 587 | cur_content_type = headers.get('content-type', '') 588 | cur_content_length = headers.get('content-length', len(html_doc)) 589 | 590 | if self.find_exclude_text(html_doc): # excluded text found 591 | return False 592 | 593 | if 0 <= int(cur_content_length) <= 10: # text too short 594 | return False 595 | 596 | if cur_content_type.find('image/') >= 0: # exclude image 597 | return False 598 | 599 | if content_type != 'application/json' and cur_content_type.find('application/json') >= 0 and \ 600 | not url.endswith('.json'): # invalid json 601 | return False 602 | 603 | if content_type and cur_content_type.find(content_type) < 0 \ 604 | or content_type_no and cur_content_type.find(content_type_no) >= 0: 605 | return False # content type mismatch 606 | 607 | if tag and html_doc.find(tag) < 0: 608 | return False # tag mismatch 609 | 610 | if self.find_text(html_doc): 611 | valid_item = True 612 | else: 613 | # status code check 614 | if status_to_match == 206 and status != 206: 615 | return False 616 | if status_to_match in (200, 206) and status in (200, 206): 617 | valid_item = True 618 | elif status_to_match and status != status_to_match: 619 | return False 620 | elif status in (403, 404) and status != status_to_match: 621 | return False 622 | else: 623 | valid_item = True 624 | 625 | if status == self._404_status and url != '/': 626 | len_doc = len(html_doc) 627 | len_sum = self.len_404_doc + len_doc 628 | if len_sum == 0 or (0.4 <= float(len_doc) / len_sum <= 0.6): 629 | return False 630 | return valid_item 631 | 632 | async def save_result(self, prefix, item, msg=None): 633 | async with self.lock: 634 | if prefix not in self.results: 635 | self.results[prefix] = [] 636 | if item not in self.results[prefix]: 637 | self.results[prefix].append(item) 638 | if msg: 639 | await self.print_msg(msg) 640 | 641 | async def scan_worker(self): 642 | while True: 643 | if time.time() - self.start_time > self.time_out and not self.timeout_scan_aborted: 644 | self.timeout_scan_aborted = True 645 | clear_queue(self.url_queue) 646 | await self.print_msg('[ERROR] Timed out task: %s' % self.base_url) 647 | return 648 | try: 649 | item = self.url_queue.get_nowait() 650 | except Exception as e: 651 | return 652 | try: 653 | if len(item) == 3: 654 | if item[0] == 'favicon': 655 | resp = await self.conn_pool.get(self.base_url + item[1], 656 | headers=conf.default_headers, 657 | follow_redirects=False, timeout=20) 658 | fav_hash = hashlib.md5(resp.content).hexdigest() 659 | if fav_hash in fingerprint.fav_icons: 660 | cms_name = fingerprint.fav_icons[fav_hash] 661 | await self.save_result('$Fingerprint', cms_name, 662 | msg='[Fingerprint] %s found %s' % (self.base_url, cms_name)) 663 | 664 | elif item[0] == 'key_name': 665 | key_name = item[1] 666 | req_item = fingerprint.requests_to_do[key_name] 667 | if req_item[2]: 668 | headers = copy.deepcopy(conf.default_headers) 669 | headers.update(req_item[2]) # update headers 670 | else: 671 | headers = conf.default_headers 672 | resp = None 673 | if req_item[1].lower() == 'get': 674 | resp = await self.conn_pool.get(self.base_url + req_item[0], headers=headers) 675 | elif req_item[1].lower() == 'post': 676 | data = req_item[3] 677 | resp = await self.conn_pool.post(self.base_url + req_item[0], headers=headers, data=data) 678 | 679 | if resp: 680 | cms_name = fingerprint.get_cms_name(key_name, resp.status_code, resp.headers, resp.text) 681 | if cms_name: 682 | await self.save_result('$Fingerprint', cms_name, 683 | '[Fingerprint] %s found %s' % (self.base_url, cms_name)) 684 | 685 | elif item[0] == 'js_file': 686 | _path = item[1] if item[1].startswith('/') else '/' + item[1] 687 | status, headers, js_doc = await self.http_request(_path) 688 | if headers['content-type'].find('javascript') >= 0: 689 | urls_regex, all_path_items, data_leak_found = await get_urls_in_js_async( 690 | asyncio.get_event_loop(), js_doc, self.base_url + item[1], self.args.api, self) 691 | # 目前并没有尝试请求匹配到的两组 疑似API接口,有误报,需要先优化正则,减少误报后,再添加 692 | # 对于接口测试,这里应该是1个非常重要的检测点 693 | if self.args.api: 694 | self.urls_regex_found = self.urls_regex_found.union(urls_regex) 695 | 696 | for item in all_path_items: 697 | if type(item[2]) is str: 698 | if self.args.api: 699 | urls_regex.add(item[2]) 700 | # await self.url_queue.put(('api_endpoint', item[2], '')) 701 | url, depth = cal_depth(self, item[2]) 702 | if depth <= self.max_depth: 703 | await self.enqueue(url) 704 | 705 | if data_leak_found: 706 | for item in data_leak_found: 707 | _ = {'status': 200, 'url': self.base_url + _path, 708 | 'title': '%s (%s)' % (item[1], item[2]), 'vul_type': 'JS Info Leak'} 709 | await self.save_result('/', _, '[JS Info Leak] %s : %s' % (_['url'], _['title'])) 710 | 711 | continue 712 | elif len(item) == 2: # Script Scan 713 | check_func = getattr(item[0], 'do_check') 714 | # await self.print_msg('Begin %s %s' % (os.path.basename(item[0].__file__), item[1])) 715 | await check_func(self, item[1]) 716 | # await self.print_msg('End %s %s' % (os.path.basename(item[0].__file__), item[1])) 717 | continue 718 | else: 719 | url_description, tag, status_to_match, content_type, content_type_no, root_only, vul_type = item 720 | prefix = url_description['prefix'] 721 | url = url_description['full_url'] 722 | 723 | if url.find('{sub}') >= 0: 724 | if not self.domain_sub: 725 | continue 726 | url = url.replace('{sub}', self.domain_sub) 727 | 728 | except Exception as e: 729 | await self.print_msg('[scan_worker.1] %s, %s, %s' % (str(e), self.base_url, item)) 730 | # await self.print_msg(traceback.format_exc()) 731 | continue 732 | if not item or not url: 733 | break 734 | 735 | try: 736 | valid_item = await self.is_url_valid(url, item) 737 | 738 | if valid_item: 739 | _ = url.split('/') 740 | _[-1] = 'fptest' + _[-1] 741 | url_fp_test = '/'.join(_) # add false positive test prefix 742 | ret = await self.is_url_valid(url_fp_test, item) 743 | if ret: 744 | valid_item = False 745 | if valid_item: 746 | status, headers, html_doc = await self.http_request(url) 747 | title = get_http_title(html_doc) 748 | _ = {'status': status, 'url': '%s%s' % (self.base_url, url), 'title': title, 'vul_type': vul_type} 749 | await self.save_result(prefix, _) 750 | except Exception as e: 751 | await self.print_msg('[scan_worker.2][%s] %s, %s' % (url, str(e), item)) 752 | # await self.print_msg(traceback.format_exc()) 753 | 754 | async def scan(self, threads=6): 755 | try: 756 | all_threads = [] 757 | for i in range(threads): 758 | t = self.scan_worker() 759 | all_threads.append(t) 760 | await asyncio.gather(*all_threads) 761 | 762 | for key in self.results.keys(): 763 | # too many URLs found under this folder, deduplicate results 764 | if len(self.results[key]) > 10: 765 | vul_type_count = {} 766 | for item in copy.deepcopy(self.results[key]): 767 | if item['vul_type'] not in vul_type_count: 768 | vul_type_count[item['vul_type']] = 1 769 | else: 770 | vul_type_count[item['vul_type']] += 1 771 | if vul_type_count[item['vul_type']] >= 3: 772 | self.results[key].remove(item) 773 | return clear_url(self.base_url), self.results, self.urls_regex_found 774 | 775 | except Exception as e: 776 | await self.print_msg('[scan exception] %s' % str(e)) 777 | finally: 778 | try: 779 | await self.conn_pool.aclose() 780 | except Exception as e: 781 | pass 782 | 783 | 784 | async def scan_process(): 785 | s = Scanner(args.timeout * 60) 786 | await s.init() 787 | while True: 788 | try: 789 | target = q_targets.get_nowait() 790 | except asyncio.queues.QueueEmpty as e: 791 | if conf.process_targets_done and q_targets.qsize() == 0: 792 | break 793 | else: 794 | await asyncio.sleep(0.1) 795 | continue 796 | 797 | if 'target' in target: 798 | ret = await s.init_from_url(target['target']) 799 | elif 'file' in target: 800 | ret = await s.init_from_log_file(target['file']) 801 | else: 802 | continue 803 | 804 | if ret: 805 | item = await s.scan(threads=args.t) 806 | if item[1]: 807 | await q_results.put(copy.deepcopy(item)) 808 | 809 | 810 | async def add_target(target, is_neighbor=False): 811 | if is_neighbor: 812 | target['no_scripts'] = 1 # 邻居IP,不启用插件. Bug fixed: 2024/05/03 813 | if args.debug: 814 | await q_results.put('New target: %s' % target) 815 | await q_targets.put({'target': target}) 816 | if args.save_ports and target['ports_open']: 817 | conf.ports_saved_to_file = True 818 | if not args.ports_file: 819 | args.ports_file = open(args.save_ports, 'w') 820 | for port in target['ports_open']: 821 | args.ports_file.write('%s:%s\n' % (target['host'], port)) 822 | args.ports_file.flush() 823 | conf.tasks_count += 1 824 | 825 | 826 | def is_intranet(ip): 827 | try: 828 | ret = ip.split('.') 829 | if len(ret) != 4: 830 | return True 831 | if ret[0] == '10': 832 | return True 833 | if ret[0] == '172' and 16 <= int(ret[1]) <= 31: 834 | return True 835 | if ret[0] == '192' and ret[1] == '168': 836 | return True 837 | return False 838 | except Exception as e: 839 | return False 840 | 841 | 842 | resolver = dns.asyncresolver.Resolver() 843 | 844 | 845 | async def domain_lookup_check(queue_targets_origin, processed_targets, queue_targets): 846 | while True: 847 | try: 848 | url = queue_targets_origin.get_nowait() 849 | except asyncio.queues.QueueEmpty as e: 850 | break 851 | # scheme netloc path 852 | if url.find('://') < 0: 853 | netloc = url[:url.find('/')] if url.find('/') > 0 else url 854 | else: 855 | scheme, netloc, path, params, query, fragment = urlparse(url, 'http') 856 | 857 | # host port 858 | host = netloc.split(':')[0] if netloc.find(':') >= 0 else netloc 859 | 860 | if is_ip_addr(host): 861 | processed_targets.append(host) 862 | if args.skip_intranet and is_intranet(host): 863 | await q_results.put('Private IP target skipped: %s [%s]' % (url, host)) 864 | else: 865 | await queue_targets.put((url, 0, host)) 866 | else: 867 | for i in range(5): 868 | try: 869 | answers = await resolver.resolve(host, "A") 870 | processed_targets.append(answers[0].address) 871 | if args.skip_intranet and is_intranet(answers[0].address): 872 | await q_results.put('Private IP target skipped: %s [%s]' % (url, answers[0].address)) 873 | else: 874 | await queue_targets.put((url, 0, answers[0].address)) 875 | break 876 | except dns.resolver.NXDOMAIN as e: 877 | await q_results.put('No such domain: %s' % host) 878 | break 879 | except Exception as e: 880 | if i == 4: # Failed after 4 retries 881 | await q_results.put('Domain lookup failed [%s]: %s' % (e.__class__.__name__, host)) 882 | 883 | 884 | async def do_port_scan_check(queue_targets): 885 | """ 886 | 检测目标的端口是否开放,输入的目标是URL,也可能是网段下的相邻IP 887 | """ 888 | while True: 889 | try: 890 | url, is_neighbor, ip_addr = queue_targets.get_nowait() # is_neighbor = 1 为相邻网段的IP,优先级降低 891 | except asyncio.queues.QueueEmpty as e: 892 | break 893 | try: 894 | # scheme netloc path 895 | if url.find('://') < 0: 896 | scheme = 'unknown' 897 | netloc = url[:url.find('/')] if url.find('/') > 0 else url 898 | path = '' 899 | else: 900 | scheme, netloc, path, params, query, fragment = urlparse(url, 'http') 901 | 902 | # host port 903 | if netloc.find(':') >= 0: 904 | _ = netloc.split(':') 905 | host = _[0] 906 | try: 907 | port = int(_[1]) 908 | except: 909 | port = None 910 | else: 911 | host = netloc 912 | port = None 913 | 914 | if scheme == 'https' and port is None: 915 | port = 443 916 | elif scheme == 'http' and port is None: 917 | port = 80 918 | 919 | if scheme == 'unknown': 920 | if port == 80: 921 | scheme = 'http' 922 | if port == 443: 923 | scheme = 'https' 924 | 925 | ports_open = set() 926 | ports_closed = set() 927 | 928 | # 插件不依赖HTTP连接池, 且仅启用插件扫描, 则不需要检查80/443端口的HTTP服务, 直接扫描 require_ports 929 | if args.scripts_only and args.require_no_http: 930 | ports_open, ports_closed = await scan_given_ports(ip_addr, args.require_ports, ports_open, ports_closed) 931 | target = {'scheme': scheme, 'host': host, 'port': port, 'path': path, 932 | 'has_http': False, 'ports_open': ports_open, 'ports_closed': ports_closed} 933 | await add_target(target) # 在只扫插件的情况下,相邻IP也需要启用 934 | continue 935 | 936 | if port: 937 | # 指定了 标准端口 或 非标准端口 938 | has_http = await is_port_open(ip_addr, port) 939 | if has_http: 940 | ports_open.add(port) 941 | else: 942 | ports_closed.add(port) 943 | if not args.no_scripts: 944 | ports_open, ports_closed = \ 945 | await scan_given_ports(ip_addr, args.require_ports, ports_open, ports_closed) 946 | 947 | target = {'scheme': scheme, 'host': host, 'port': port, 'path': path, 'has_http': has_http, 948 | 'ports_open': ports_open, 'ports_closed': ports_closed} 949 | await add_target(target) 950 | 951 | else: 952 | # 只有域名和IP情况下, 扫默认端口 953 | port_open_80 = await is_port_open(ip_addr, 80) 954 | port_open_443 = await is_port_open(ip_addr, 443) 955 | 956 | if port_open_80: 957 | ports_open.add(80) 958 | else: 959 | ports_closed.add(80) 960 | if port_open_443: 961 | ports_open.add(443) 962 | else: 963 | ports_closed.add(443) 964 | if not args.no_scripts: 965 | ports_open, ports_closed = \ 966 | await scan_given_ports(ip_addr, args.require_ports, ports_open, ports_closed) 967 | 968 | if port_open_80 and port_open_443: 969 | target = {'scheme': 'https', 'host': host, 'port': 443, 'path': path, 970 | 'has_http': True, 'ports_open': ports_open, 'ports_closed': ports_closed} 971 | await add_target(target, is_neighbor) 972 | # 排除 301 HTTP 跳转 HTTPS的目标 973 | async with httpx.AsyncClient() as client: 974 | r = await client.get('http://%s' % host, follow_redirects=False, timeout=20) 975 | if r and not \ 976 | (r.status_code == 301 and r.headers.get('Location', '').lower().startswith('https')): 977 | target = {'scheme': 'http', 'host': host, 'port': 80, 'path': path, 978 | 'has_http': True, 'no_scripts': 1, 979 | 'ports_open': ports_open, 'ports_closed': ports_closed} 980 | await add_target(target) 981 | 982 | elif port_open_443: 983 | target = {'scheme': 'https', 'host': host, 'port': 443, 'path': path, 984 | 'has_http': True, 'ports_open': ports_open, 'ports_closed': ports_closed} 985 | # 即使指定的目标,允许插件扫描,邻居也将不启用,节省扫描时间 986 | await add_target(target, is_neighbor) 987 | elif port_open_80: 988 | target = {'scheme': 'http', 'host': host, 'port': 80, 'path': path, 989 | 'has_http': True, 'ports_open': ports_open, 'ports_closed': ports_closed} 990 | await add_target(target, is_neighbor) 991 | elif args.no_scripts: 992 | # 80 443 端口不开放, 禁用插件扫描 993 | await q_results.put('No ports open: %s' % host) 994 | elif not is_neighbor or args.scripts_only: 995 | # 直接输入目标 或者 对相邻IP应用插件 996 | # 80 443 未开放,此时只能检测其他端口的漏洞 997 | # 如果没有任何开放的端口,直接跳过该目标 998 | if ports_open: 999 | target = {'scheme': 'http', 'host': host, 'port': 80, 'path': path, 1000 | 'has_http': False, 'ports_open': ports_open, 'ports_closed': ports_closed} 1001 | await add_target(target) 1002 | else: 1003 | await q_results.put('[Warning] Target has no open ports: %s' % url) 1004 | except (httpx.RequestError, httpx.HTTPStatusError) as e: 1005 | pass 1006 | except Exception as e: 1007 | # import traceback 1008 | # await q_results.put(traceback.format_exc()) 1009 | await q_results.put('[port_scan_check.exception] URL is %s, %s' % (url, str(e))) 1010 | 1011 | 1012 | async def port_scan_check(queue_targets): 1013 | threads = [do_port_scan_check(queue_targets) for _ in range(250)] 1014 | await asyncio.gather(*threads) 1015 | 1016 | 1017 | async def prepare_targets(target_list): 1018 | """ 1019 | Process URL / IP / Domain, port scan 1020 | 处理域名、IP,扫描目标端口80 443等端口是否开放 1021 | """ 1022 | queue_targets_origin = asyncio.Queue() 1023 | for target in target_list: 1024 | if target.strip() and len(target) > 5: 1025 | # work with https://github.com/lijiejie/subDomainsBrute 1026 | # Delimiter should be "," 1027 | hosts = target.replace(',', ' ').strip().split() 1028 | await queue_targets_origin.put(hosts[0]) 1029 | 1030 | processed_targets = [] 1031 | # 将域名解析和端口扫描拆分,可节省约2s.更简单的做法, 可以将DNS解析和端口扫描合并为一个函数,但会损失 2s 1032 | await q_results.put('Domain lookup start.') 1033 | queue_targets = asyncio.Queue() 1034 | # Be careful: 当 DNS查询并发过高时,在家庭网络下会出现较多超时 1035 | threads = [domain_lookup_check(queue_targets_origin, processed_targets, queue_targets) for _ in range(50)] 1036 | await asyncio.gather(*threads) 1037 | 1038 | if args.network != 32: 1039 | await q_results.put('Process sub network start.') 1040 | num_entered_queue = 0 1041 | for ip in processed_targets: 1042 | if ip.find('/') > 0: # 子网本身已经处理过 1043 | continue 1044 | _network = u'%s/%s' % ('.'.join(ip.split('.')[:3]), args.network) 1045 | if _network in processed_targets: 1046 | continue 1047 | processed_targets.append(_network) 1048 | 1049 | if args.network >= 20: 1050 | sub_nets = [ipaddress.IPv4Network(u'%s/%s' % (ip, args.network), strict=False).hosts()] 1051 | else: 1052 | sub_nets = ipaddress.IPv4Network(u'%s/%s' % (ip, args.network), strict=False).subnets(new_prefix=22) 1053 | for sub_net in sub_nets: 1054 | if sub_net in processed_targets: 1055 | continue 1056 | if type(sub_net) is ipaddress.IPv4Network: # add network only 1057 | processed_targets.append(str(sub_net)) 1058 | for _ip in sub_net: 1059 | _ip = str(_ip) 1060 | if _ip not in processed_targets: 1061 | await queue_targets.put((_ip, 1, _ip)) 1062 | num_entered_queue += 1 1063 | if num_entered_queue > 65535: # 队列不宜太长,如果超过一个B段,分多次处理 1064 | await port_scan_check(queue_targets) 1065 | num_entered_queue = 0 1066 | if queue_targets.qsize() > 0: # 还有剩余未处理目标 1067 | await port_scan_check(queue_targets) 1068 | # save ports data 1069 | if args.save_ports and args.ports_file: 1070 | args.ports_file.close() 1071 | 1072 | conf.process_targets_done = True 1073 | await q_results.put('* Targets DNS resolve and port scan all done.') 1074 | 1075 | 1076 | async def main(): 1077 | for input_file in args.input_files: 1078 | if args.host: 1079 | target_list = args.host 1080 | # Targets input via commandline args, create double processes at most 1081 | if args.network == 32 and len(target_list) * 2 < args.p: 1082 | args.p = len(target_list) * 2 1083 | elif args.f or args.d: 1084 | with codecs.open(input_file, encoding='utf-8') as inFile: 1085 | target_list = inFile.readlines() 1086 | # Targets input via file, create double processes at most 1087 | if args.network == 32 and len(target_list) * 2 < args.p: 1088 | args.p = len(target_list) * 2 1089 | try: 1090 | clear_queue(q_results) 1091 | clear_queue(q_targets) 1092 | # save report thread 1093 | asyncio.create_task(save_report(args, q_results, input_file)) 1094 | 1095 | conf.process_targets_done = False 1096 | start_time = time.time() 1097 | 1098 | if args.crawler: 1099 | # 爬虫URL导入,在3.0版本后,还未经测试,仅保留了原逻辑。 待测试 1100 | input_files = glob.glob(args.crawler + '/*.log') 1101 | for _file in input_files: 1102 | await q_targets.put({'file': _file}) 1103 | conf.tasks_count += 1 1104 | if conf.tasks_count < args.p: 1105 | args.p = conf.tasks_count 1106 | conf.process_targets_done = True 1107 | else: 1108 | conf.tasks_count = 0 1109 | asyncio.create_task(prepare_targets(target_list)) 1110 | 1111 | all_process = [scan_process() for _ in range(args.p)] 1112 | await q_results.put('%s scan process started' % args.p) 1113 | await asyncio.gather(*all_process) 1114 | 1115 | cost_time = time.time() - start_time 1116 | cost_min = int(cost_time / 60) 1117 | cost_min = '%s min ' % cost_min if cost_min > 0 else '' 1118 | cost_seconds = '%.1f' % (cost_time % 60) 1119 | await q_results.put('Scanned %s targets in %s%s seconds' % (conf.tasks_count, cost_min, cost_seconds)) 1120 | except KeyboardInterrupt as e: 1121 | conf.stop_me = True 1122 | await q_results.put('Scan aborted by user') 1123 | if conf.output_file_name: 1124 | await q_results.put('If you are interested, partial report is: %s' % conf.output_file_name) 1125 | exit(-1) 1126 | except Exception as e: 1127 | traceback.print_exc() 1128 | await q_results.put('[main.exception] %s %s' % (type(e), str(e))) 1129 | 1130 | conf.stop_me = True 1131 | await asyncio.sleep(3.0) # report 需要一些时间写入和唤起浏览器 1132 | 1133 | 1134 | if __name__ == '__main__': 1135 | args = parse_args() 1136 | print('* BBScan %s https://github.com/lijiejie/BBScan *' % conf.version) 1137 | if args.no_scripts: 1138 | print('* Scripts scan was disabled') 1139 | if args.require_ports: 1140 | print('* Scripts scan port check: %s' % ','.join([str(x) for x in args.require_ports])) 1141 | if sys.version_info.major >= 3 and sys.version_info.minor >= 10: 1142 | loop = asyncio.new_event_loop() 1143 | else: 1144 | loop = asyncio.get_event_loop() 1145 | q_targets = asyncio.Queue() # targets Queue 1146 | q_results = asyncio.Queue() # results Queue 1147 | loop.run_until_complete(main()) 1148 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BBScan 3.0 # 2 | 3 | `BBScan` 是一个高并发的、轻量级的Web漏洞扫描工具。它帮助安全工程师从大量目标中,快速发现,定位可能存在弱点的目标,辅助半自动化测试。 4 | 5 | `BBScan` is a fast and light-weight web vulnerability scanner. It helps pen-testers pinpoint possibly vulnerable targets from a large number of web servers. 6 | 7 | * Scan common web vulnerabilities: **Data Leaks** / **Directory Traversal** / **Admin Backends** 8 | * Extract **API Endpoints** from .js file, Scan **Token/Secrets/Pass/Key Leaks** 9 | * Recognize **Web Fingerprints**: web frameworks, programming languages, CMS, middle-ware, open source software or commercial product name 10 | 11 | ### Test Reports 12 | 13 | Brute sub names for *.baidu.com *.qq.com *.bytedance.com with [subDomainsBrute](https://github.com/lijiejie/subDomainsBrute) and then 14 | 15 | send the output files to BBScan, scan reports are as shown below 16 | 17 | * [qq.com_report.html](https://www.lijiejie.com/python/BBScan/qq.com_report.html) 18 | 19 | * [bytedance.com_report.html](https://www.lijiejie.com/python/BBScan/bytedance.com_report.html) 20 | 21 | * [baidu.com_report.html](https://www.lijiejie.com/python/BBScan/baidu.com_report.html) 22 | 23 | ### Install ### 24 | 25 | Require Python 3.6+ 26 | 27 | pip3 install -r requirements.txt 28 | 29 | ### Chang Log 30 | 31 | * **2024-05-27** 32 | * **New Features**: 33 | * CMS识别功能,Web指纹来自 [FingerprintHub](https://github.com/0x727/FingerprintHub) Credit to [@0x727](https://github.com/0x727) 34 | * JavaScript解析支持,提取拼接API接口,支持检测Key/Secret/Token泄露 35 | * 通过正则表达式提取URL,From: https://github.com/Threezh1/JSFinder Credit to [@Threezh1](https://github.com/Threezh1) 36 | * **减少漏报**:优化减少DNS查询次数,提高稳定性 37 | * **减少误报**:优化了误报验证逻辑 38 | * ``**界面优化**:输出更加易用的Web报告 39 | 40 | ### Usage 41 | 42 | * ##### Scan from file 43 | 44 | ``` 45 | python BBScan.py -f urls.txt --api 46 | ``` 47 | 48 | * **Scan from command line** 49 | 50 | ``` 51 | python BBScan.py --host www.test.com https://test2.com http://test3.com:8080 10.1.2.3 52 | ``` 53 | 54 | * ##### Scan with specified rules only 55 | 56 | ``` 57 | python BBScan.py --rule git_and_svn -f urls.txt 58 | ``` 59 | 60 | ### Key Arguments ### 61 | 62 | * `--network MASK` 63 | 64 | You scan involve other IPs under the same network to a scan 65 | 66 | * `--host www.baidu.com --network 24` 67 | * `-f urls.txt --network 28` 68 | 69 | * `--fp, --fingerprint` 70 | 71 | Under this mode, only fingerprint scan performed only, this helps to save some time by disable rule/script based scan. 72 | 73 | * `--api` 74 | 75 | Gather and display all API interfaces extracted from .js file 76 | 77 | * `--skip, --skip-intranet` 78 | 79 | Skip scanning private IP targets. 80 | 81 | ``` (venv_py) python BBScan.py 82 | usage: BBScan.py [options] 83 | 84 | 85 | 86 | Targets: 87 | 88 | --host [HOST [HOST ...]] 89 | Scan several hosts from command line 90 | -f TargetFile Load new line delimited targets from TargetFile 91 | -d TargetDirectory Load all *.txt files from TargetDirectory 92 | --crawler CrawlDirectory 93 | Load all *.log crawl files from CrawlDirectory 94 | --network MASK Scan all Target/MASK neighbour hosts, 95 | should be an integer between 8 and 31 96 | --skip, --skip-intranet 97 | Do not scan private IPs, when you are not under the same network with the target 98 | 99 | Rule Based SCAN: 100 | 101 | --rule [RuleFileName [RuleFileName ...]] 102 | Import specified rule files only. 103 | -n, --no-crawl No crawling, sub folders will not be processed 104 | --no-check404 No HTTP 404 existence check 105 | --full Process all sub directories 106 | --fp, --fingerprint Disable rule and script scan, only check fingerprint 107 | 108 | Script Based SCAN: 109 | 110 | --scripts-only Scan with user scripts only 111 | --script [ScriptName [ScriptName ...]] 112 | Execute specified scripts only 113 | --no-scripts Disable all scripts 114 | 115 | CONCURRENT: 116 | 117 | -p PROCESS Num of processes running concurrently, 30 by default 118 | -t THREADS Num of scan threads for each scan process, 3 by default 119 | 120 | OTHER: 121 | 122 | --proxy Proxy Set HTTP proxy server 123 | --timeout Timeout Max scan minutes for each target, 10 by default 124 | --api Gather and display all API interfaces extracted from .js file 125 | --save-ports PortsDataFile 126 | Save open ports to PortsDataFile 127 | --debug Show verbose debug info 128 | --no-browser Do not open web browser to view report 129 | 130 | ``` -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lijiejie/BBScan/29b9f11b1a33a18e6d755a80e3fefa6a01aa48f2/lib/__init__.py -------------------------------------------------------------------------------- /lib/cmdline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # 4 | # Parse command line arguments 5 | # 6 | 7 | 8 | import argparse 9 | import sys 10 | import os 11 | import glob 12 | import re 13 | import codecs 14 | from lib.config import version 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(prog='BBScan', 19 | formatter_class=argparse.RawTextHelpFormatter, 20 | description='* A fast vulnerability Scanner. *\n' 21 | '* Find sensitive info disclosure vulnerabilities ' 22 | 'from large number of targets *\n' 23 | 'By LiJieJie (https://www.lijiejie.com)', 24 | usage='BBScan.py [options]') 25 | 26 | group_target = parser.add_argument_group('Targets', '') 27 | group_target.add_argument('--host', metavar='HOST', type=str, default='', nargs='*', 28 | help='Scan several hosts from command line') 29 | group_target.add_argument('-f', metavar='TargetFile', type=str, default='', 30 | help='Load new line delimited targets from TargetFile') 31 | group_target.add_argument('-d', metavar='TargetDirectory', type=str, default='', 32 | help='Load all *.txt files from TargetDirectory') 33 | group_target.add_argument('--crawler', metavar='CrawlDirectory', type=str, default='', 34 | help='Load all *.log crawl files from CrawlDirectory') 35 | group_target.add_argument('--network', metavar='MASK', type=int, default=32, 36 | help='Scan all Target/MASK neighbour hosts, \nshould be an integer between 8 and 31') 37 | group_target.add_argument('--skip', '--skip-intranet', dest='skip_intranet', 38 | default=False, action='store_true', 39 | help='Do not scan private IPs, when you are not under the same network with the target') 40 | 41 | group_http = parser.add_argument_group('Rule Based SCAN', '') 42 | group_http.add_argument('--rule', metavar='RuleFileName', type=str, default='', nargs='*', 43 | help='Import specified rule files only.') 44 | group_http.add_argument('-n', '--no-crawl', dest='no_crawl', default=False, action='store_true', 45 | help='No crawling, sub folders will not be processed') 46 | group_http.add_argument('--no-check404', dest='no_check404', default=False, action='store_true', 47 | help='No HTTP 404 existence check') 48 | group_http.add_argument('--full', dest='full_scan', default=False, action='store_true', 49 | help='Process all sub directories') 50 | group_http.add_argument('--fp', '--fingerprint', dest='fingerprint_only', default=False, action='store_true', 51 | help='Disable rule and script scan, only check fingerprint') 52 | 53 | group_scripts = parser.add_argument_group('Script Based SCAN', '') 54 | group_scripts.add_argument('--scripts-only', dest='scripts_only', default=False, action='store_true', 55 | help='Scan with user scripts only') 56 | group_scripts.add_argument('--script', metavar='ScriptName', type=str, default='', nargs='*', 57 | help='Execute specified scripts only') 58 | group_scripts.add_argument('--no-scripts', dest='no_scripts', default=False, action='store_true', 59 | help='Disable all scripts') 60 | 61 | group_concurrent = parser.add_argument_group('CONCURRENT', '') 62 | group_concurrent.add_argument('-p', metavar='PROCESS', type=int, default=30, 63 | help='Num of processes running concurrently, 30 by default') 64 | group_concurrent.add_argument('-t', metavar='THREADS', type=int, default=3, 65 | help='Num of scan threads for each scan process, 3 by default') 66 | 67 | group_other = parser.add_argument_group('OTHER', '') 68 | 69 | group_other.add_argument('--proxy', metavar='Proxy', type=str, default=None, 70 | help='Set HTTP proxy server') 71 | 72 | group_other.add_argument('--timeout', metavar='Timeout', type=int, default=10, 73 | help='Max scan minutes for each target, 10 by default') 74 | 75 | # Disabled for now, will be added back later 76 | # group_other.add_argument('-md', default=False, action='store_true', 77 | # help='Save scan report as markdown format') 78 | 79 | group_other.add_argument('--api', default=False, action='store_true', 80 | help='Gather and display all API interfaces extracted from .js file') 81 | 82 | group_other.add_argument('--save-ports', metavar='PortsDataFile', dest='save_ports', type=str, default='', 83 | help='Save open ports to PortsDataFile') 84 | 85 | group_other.add_argument('--debug', default=False, action='store_true', 86 | help='Show verbose debug info') 87 | 88 | group_other.add_argument('--no-browser', dest='no_browser', default=False, action='store_true', 89 | help='Do not open web browser to view report') 90 | 91 | group_other.add_argument('-v', action='version', 92 | version='%(prog)s ' + version + ' (https://github.com/lijiejie/BBScan)') 93 | 94 | if len(sys.argv) == 1: 95 | sys.argv.append('-h') 96 | 97 | args = parser.parse_args() 98 | check_args(args) 99 | if args.f: 100 | args.input_files = [args.f] 101 | elif args.d: 102 | args.input_files = glob.glob(args.d + '/*.txt') 103 | elif args.crawler: 104 | args.input_files = ['crawler'] 105 | elif args.host: 106 | args.input_files = ['hosts'] 107 | 108 | return args 109 | 110 | 111 | def check_args(args): 112 | if not (args.f or args.d or args.host or args.crawler): 113 | msg = 'Args missing! One of following args needs to be specified \n' \ 114 | ' -f TargetFile \n' \ 115 | ' -d TargetDirectory \n' \ 116 | ' --crawler TargetDirectory \n' \ 117 | ' --host www.host1.com www.host2.com 8.8.8.8' 118 | print(msg) 119 | exit(-1) 120 | 121 | if args.f and not os.path.isfile(args.f): 122 | print('[ERROR] TargetFile not found: %s' % args.f) 123 | exit(-1) 124 | 125 | if args.d and not os.path.isdir(args.d): 126 | print('[ERROR] TargetDirectory not found: %s' % args.d) 127 | exit(-1) 128 | 129 | args.network = int(args.network) 130 | if not (8 <= args.network <= 32): 131 | print('[ERROR] Network should be an integer between 24 and 31') 132 | exit(-1) 133 | 134 | args.rule_files = [] 135 | if args.rule: 136 | for rule_name in args.rule: 137 | if not rule_name.endswith('.txt'): 138 | rule_name += '.txt' 139 | if not os.path.exists('rules/%s' % rule_name): 140 | print('[ERROR] Rule file not found: %s' % rule_name) 141 | exit(-1) 142 | args.rule_files.append('rules/%s' % rule_name) 143 | 144 | args.require_no_http = True # all scripts do not need http conn pool 145 | args.require_index_doc = False # scripts need index html doc 146 | args.require_ports = set() # ports need by scripts 147 | pattern = re.compile(r'ports_to_check.*?=(.*)') 148 | 149 | if not args.no_scripts: 150 | if args.script: 151 | for script_name in args.script: 152 | if not script_name.lower().endswith('.py'): 153 | script_name += '.py' 154 | if not os.path.exists('scripts/%s' % script_name): 155 | print('* Script file not found: %s' % script_name) 156 | exit(-1) 157 | 158 | for _script in glob.glob('scripts/*.py'): 159 | script_name_origin = os.path.basename(_script) 160 | script_name = script_name_origin.replace('.py', '') 161 | if args.script and script_name not in args.script and script_name_origin not in args.script: 162 | continue 163 | if script_name.startswith('_'): 164 | continue 165 | with codecs.open(_script, encoding='utf-8') as f: 166 | content = f.read() 167 | if content.find('self.http_request') > 0: 168 | args.require_no_http = False 169 | if content.find('self.index_') > 0: 170 | args.require_no_http = False 171 | args.require_index_doc = True 172 | 173 | m = pattern.search(content) 174 | if m: 175 | m_str = m.group(1).strip() 176 | if m_str.find('#') > 0: # remove comments 177 | m_str = m_str[:m_str.find('#')] 178 | if m_str.find('[') < 0: 179 | if int(m_str) not in args.require_ports: 180 | args.require_ports.add(int(m_str)) 181 | else: 182 | for port in eval(m_str): 183 | if port not in args.require_ports: 184 | args.require_ports.add(int(port)) 185 | 186 | # save open ports to file 187 | if args.save_ports: 188 | args.ports_file = None 189 | 190 | if args.proxy and args.proxy.find('://') < 0: 191 | args.proxy = 'http://%s' % args.proxy 192 | 193 | # 只需要指纹识别时,不需要404检查,也不需要抓取子页 194 | if args.fingerprint_only: 195 | args.no_check404 = True 196 | # args.no_crawl = True 197 | -------------------------------------------------------------------------------- /lib/cms_fingerprints.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # Indentify web app fingerprints: framework, programming languages, web server, CMS, 3 | # middle-ware, open source software or commercial product etc 4 | # Rules copy from https://github.com/0x727/FingerprintHub 5 | 6 | import hashlib 7 | import os 8 | import json 9 | import codecs 10 | import httpx 11 | 12 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 13 | rule_dir = os.path.join(cur_dir, '../rules/web_fingerprint_v3.json') 14 | 15 | 16 | class Fingerprint(object): 17 | def __init__(self): 18 | self.fav_icons = {} 19 | self.requests_to_do = {} 20 | self.rules = {} 21 | 22 | with codecs.open(rule_dir, encoding='utf-8') as f: 23 | doc = json.loads(f.read()) 24 | 25 | for rule in doc: 26 | # 处理fav hash 27 | if rule['favicon_hash']: 28 | for _hash in rule['favicon_hash']: 29 | if _hash: 30 | self.fav_icons[_hash] = rule['name'] 31 | 32 | key = '^^^'.join([rule['path'], rule['request_method'], 33 | str(rule['request_headers']), rule['request_data']]) 34 | self.requests_to_do[key] = [rule['path'], rule['request_method'], 35 | rule['request_headers'], rule['request_data']] 36 | if key not in self.rules: 37 | self.rules[key] = [] 38 | self.rules[key].append(rule) 39 | 40 | def get_cms_name_via_icon(self, favicon_hash): 41 | if favicon_hash in self.fav_icons: 42 | return self.fav_icons[favicon_hash] 43 | else: 44 | return 45 | 46 | def get_cms_name(self, key_name, status_code, headers, text, favicon_hash=None): 47 | cms_names = [] 48 | for rule in self.rules[key_name]: 49 | if rule['status_code'] != 0: 50 | # 200 和 206 单独检查 51 | if rule['status_code'] in [200, 206] and status_code in [200, 206]: 52 | pass 53 | else: 54 | if rule['status_code'] != status_code: # code mismatch 55 | continue 56 | mismatch = False 57 | if rule['headers']: 58 | for header_name in rule['headers']: 59 | if rule['headers'][header_name] == '*' and header_name in headers: 60 | continue 61 | if headers.get(header_name, '').find(rule['headers'][header_name]) < 0: 62 | mismatch = True 63 | break 64 | if mismatch: 65 | continue 66 | if rule['keyword']: 67 | for word in rule['keyword']: 68 | if text.lower().find(word) < 0 and text.find(word) < 0: 69 | mismatch = True 70 | break 71 | if mismatch: 72 | continue 73 | if rule['favicon_hash'] and favicon_hash != rule['favicon_hash']: 74 | continue 75 | if rule['name'] not in cms_names: 76 | cms_names.append(rule['name']) 77 | return cms_names 78 | 79 | 80 | if __name__ == '__main__': 81 | from config import default_headers 82 | import copy 83 | f = Fingerprint() 84 | 85 | client = httpx.Client() 86 | data = client.get('https://demo.jumpserver.org/static/img/facio.ico').read() 87 | fav_hash = hashlib.md5(data).hexdigest() 88 | if fav_hash in f.fav_icons: 89 | print('From fav hash:', f.fav_icons[fav_hash]) 90 | 91 | url = 'http://example.com/' 92 | 93 | for key_name in f.rules: 94 | item = f.requests_to_do[key_name] 95 | print(key_name) 96 | print() 97 | 98 | if item[2]: 99 | headers = copy.deepcopy(default_headers) 100 | headers.update(item[2]) # update headers 101 | else: 102 | headers = default_headers 103 | 104 | resp = None 105 | if item[1].lower() == 'get': 106 | resp = client.get(url.rstrip('/') + item[0], headers=headers) 107 | elif item[1].lower() == 'post': 108 | data = item[3] 109 | resp = client.post(url.rstrip('/') + item[0], headers=headers, data=item[3]) 110 | else: 111 | raise Exception('invalid method') 112 | 113 | if resp: 114 | cms_name = f.get_cms_name(key_name, resp.status_code, resp.headers, resp.text) 115 | if cms_name: 116 | print('cms name is:', cms_name) 117 | -------------------------------------------------------------------------------- /lib/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # 4 | 5 | from urllib.parse import urlparse 6 | import re 7 | import asyncio 8 | import platform 9 | import socket 10 | import dns.asyncresolver 11 | import time 12 | 13 | 14 | def get_dns_resolver(): 15 | resolver = dns.asyncresolver.Resolver() 16 | for server in ['114.114.114.114', '180.76.76.76', '8.8.8.8']: # Add public DNS Server 17 | if server not in resolver.nameservers: 18 | resolver.nameservers.append(server) 19 | return resolver 20 | 21 | 22 | if platform.system() == 'Windows': 23 | try: 24 | def _call_connection_lost(self, exc): 25 | try: 26 | self._protocol.connection_lost(exc) 27 | finally: 28 | if hasattr(self._sock, 'shutdown'): 29 | try: 30 | if self._sock.fileno() != -1: 31 | self._sock.shutdown(socket.SHUT_RDWR) 32 | except Exception as e: 33 | pass 34 | self._sock.close() 35 | self._sock = None 36 | server = self._server 37 | if server is not None: 38 | server._detach() 39 | self._server = None 40 | 41 | asyncio.proactor_events._ProactorBasePipeTransport._call_connection_lost = _call_connection_lost 42 | except Exception as e: 43 | pass 44 | 45 | 46 | def is_ip_addr(s): 47 | pattern_ip = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$') 48 | ret = pattern_ip.search(s) 49 | return True if ret else False 50 | 51 | 52 | def clear_queue(this_queue): 53 | try: 54 | while True: 55 | this_queue.get_nowait() 56 | except Exception as e: 57 | return 58 | 59 | 60 | def parse_url(url): 61 | _ = urlparse(url, 'http') 62 | if not _.netloc: 63 | _ = urlparse('https://' + url, 'http') 64 | return _.scheme, _.netloc, _.path if _.path else '/' 65 | 66 | 67 | # calculate depth of a given URL, return tuple (url, depth) 68 | def cal_depth(self, url): 69 | if url.find('#') >= 0: 70 | url = url[:url.find('#')] # cut off fragment 71 | if url.find('?') >= 0: 72 | url = url[:url.find('?')] # cut off query string 73 | 74 | while url.find('/./') >= 0: 75 | url = url.replace('/./', '/') 76 | 77 | if url.startswith('//'): 78 | return '', 10000 # //www.baidu.com/index.php 79 | 80 | if not urlparse(url, 'http').scheme.startswith('http'): 81 | return '', 10000 # no HTTP protocol 82 | 83 | if url.lower().startswith('http'): 84 | _ = urlparse(url, 'http') 85 | if _.netloc == self.host or _.netloc == '%s:%s' % (self.host, self.port): # same hostname 86 | url = _.path 87 | else: 88 | return '', 10000 # not the same hostname 89 | 90 | while url.find('//') >= 0: 91 | url = url.replace('//', '/') 92 | 93 | if not url: 94 | return '/', 1 # http://www.example.com 95 | 96 | if url[0] != '/': 97 | url = '/' + url 98 | 99 | url = url[: url.rfind('/') + 1] 100 | 101 | if url.split('/')[-2].find('.') > 0: 102 | url = '/'.join(url.split('/')[:-2]) + '/' 103 | 104 | depth = url.count('/') 105 | # print('cal_depth', url, depth) 106 | return url, depth 107 | 108 | 109 | async def save_script_result(self, status, url, title, vul_type=''): 110 | async with self.lock: 111 | # print '[+] [%s] %s' % (status, url) 112 | if url not in self.results: 113 | self.results[url] = [] 114 | _ = {'status': status, 'url': url, 'title': title, 'vul_type': vul_type} 115 | self.results[url].append(_) 116 | 117 | 118 | def get_domain_sub(host): 119 | if re.search(r'\d+\.\d+\.\d+\.\d+', host.split(':')[0]): 120 | return '' 121 | else: 122 | return host.split('.')[0] 123 | 124 | 125 | def escape(html): 126 | return html.replace('&', '&').\ 127 | replace('<', '<').replace('>', '>').\ 128 | replace('"', '"').replace("'", ''') 129 | 130 | 131 | sem = asyncio.Semaphore(100) 132 | 133 | resolver = get_dns_resolver() 134 | 135 | 136 | async def is_port_open(host, port): 137 | if not port: 138 | return True 139 | 140 | try: 141 | async with sem: 142 | start_time = time.time() 143 | if not is_ip_addr(host): 144 | answers = await resolver.resolve(host, "A") 145 | host = answers[0].address 146 | 147 | fut = asyncio.open_connection(host, int(port)) 148 | reader, writer = await asyncio.wait_for(fut, timeout=10) 149 | writer.close() 150 | try: 151 | await writer.wait_closed() # application data after close notify (_ssl.c:2730) 152 | except Exception as e: 153 | print('is_port_open.wait_closed.exception:', type(e)) 154 | return True 155 | except (asyncio.exceptions.TimeoutError, ConnectionRefusedError) as e: 156 | pass 157 | except Exception as e: 158 | print('is_port_open.exception:', e.__class__.__name__, str(e), host, port, 159 | 'elapsed %.2f seconds' % (time.time() - start_time)) 160 | return False 161 | 162 | 163 | def get_http_title(html_doc): 164 | if not html_doc: 165 | return '' 166 | m = re.search('
Scanned ${tasks_processed_count} targets in 198 | ${cost_min} ${cost_seconds} seconds. 199 | ${vulnerable_hosts_count} vulnerable hosts found in total.
200 |Target | Fingerprint | Status | Web Server | Title | Response Headers |
---|---|---|---|---|---|
${url} | ${fingerprint} | ${status} | 216 |${server} | 217 |${title} | ${headers} |
218 |
${vul_type} | 223 |${status} | ${title} | 224 |${url} | 225 |||
${api_urls} |
231 |
The server encountered an internal error or"} 17 | 18 | {text="http://www.qq.com/babygohome/?pgv_ref=404"} 19 | 20 | {text="