├── 1.txt ├── README.md └── scanner.py /1.txt: -------------------------------------------------------------------------------- 1 | 8.142.115.47:8020 2 | 36.111.150.14 3 | https://47.114.89.157:8082/ 4 | 116.62.208.139:8081 5 | https://47.114.89.157:8082 6 | 120.55.96.60:8062 7 | 183.157.107.130:8008 8 | https://121.43.105.118 9 | https://121.43.105.139 10 | 125.117.178.170:8181 11 | 183.130.3.149:8089 12 | 120.77.81.20:443 13 | 47.113.118.151:8061 14 | 47.102.120.213:8059 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # index-of-scanner 2 | 基于协程并发的Web敏感文件扫描器,精准探测备份文件、配置泄露及版本控制目录暴露风险,为渗透测试与安全防护提供专业级资产测绘方案。 3 | ![Python版本](https://img.shields.io/badge/Python-3.8%2B-blue) 4 | ![授权协议](https://img.shields.io/badge/License-MIT-green) 5 | ![版本](https://img.shields.io/badge/Release-v2.1.0-orange) 6 | 7 | 专业级Web敏感文件扫描工具,为渗透测试工程师量身定制的资产测绘解决方案 8 | 9 | ## 🚀 核心功能 10 | 11 | - **智能敏感文件探测** 12 | 精准识别`备份文件`、`版本控制`、`密钥证书`等12类敏感资产 13 | - **三重检测引擎** 14 | `扩展名匹配` + `路径正则` + `复合压缩检测`三维验证机制 15 | - **高效并发扫描** 16 | 动态协程控制(30-200并发),平均扫描速度达1500 URL/分钟 17 | - **智能去重机制** 18 | 基于可扩展布隆过滤器,内存占用<2MB/万级URL 19 | - **专业报告输出** 20 | 自动生成Excel兼容的CSV报告(UTF-8-SIG编码) 21 | 22 | ## 🛠️ 使用指南 23 | 24 | ### 基础扫描 25 | ```bash 26 | # 命令行模式 27 | python scanner.py targets.txt 28 | 29 | # 交互模式 30 | python scanner.py 31 | > 请输入目标文件路径: targets.txt 32 | ``` 33 | 34 | ### 目标文件格式 35 | `targets.txt`示例: 36 | ```text 37 | http://example.com 38 | admin.example.com/api/ 39 | 192.168.1.100:8080 40 | ``` 41 | 42 | ### 实时输出预览 43 | ![image](https://github.com/user-attachments/assets/e20de44f-ea8e-40a7-b355-0b99cc858cac) 44 | 45 | 46 | ## 📊 报告样本 47 | 48 | `安全扫描报告_20240520_1432.csv`示例: 49 | 50 | | 风险等级 | URL地址 | 检测依据 | 51 | |----------|--------------------------|---------------------| 52 | | 高危 | http://example.com/.git/ | 路径匹配: \.(git|svn)/ | 53 | | 高危 | http://example.com/db.sql | 敏感扩展名: sql | 54 | 55 | ## ⚙️ 配置定制 56 | 57 | 修改`CONFIG`字典调整扫描策略: 58 | ```python 59 | CONFIG = { 60 | "max_depth": 3, # 爬取深度 61 | "concurrency_range": (30, 200), # 动态并发区间 62 | "sensitive_ext": { # 扩展名黑名单 63 | 'sql', 'bak', 'pem', ... 64 | }, 65 | "sensitive_paths": [ # 路径正则规则 66 | re.compile(r'/(backup|archive)/', re.I), 67 | ... 68 | ] 69 | } 70 | ``` 71 | 72 | ## ⚠️ 注意事项 73 | 74 | 1. 遵循授权测试原则,禁止非法扫描 75 | 2. 建议在隔离环境测试后再用于生产 76 | 3. 可通过调整`concurrency_range`优化资源占用 77 | 4. 扫描日志详见同目录`scan.log` 78 | 79 | ## 📜 许可协议 80 | 81 | 本项目基于 [MIT License](LICENSE) 开放使用,禁止用于非法用途 82 | 83 | ``` 84 | 85 | --- 86 | 87 | **版本更新** 88 | `v2.1.0` 新增功能: 89 | - 智能交互式启动模式 90 | - 动态进度监控系统 91 | - CSV报告中文编码优化 92 | - 高危端口自动阻断机制 93 | 94 | -------------------------------------------------------------------------------- /scanner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | import csv 5 | import time 6 | import signal 7 | import asyncio 8 | import aiohttp 9 | from urllib.parse import urlparse, urlunparse, urljoin, parse_qs 10 | from collections import defaultdict 11 | from typing import Set, Tuple, Dict, Any, List 12 | from concurrent.futures import ThreadPoolExecutor 13 | 14 | from bs4 import BeautifulSoup 15 | from pybloom_live import ScalableBloomFilter 16 | from fake_useragent import UserAgent 17 | import logging 18 | 19 | # 深度递归保护 20 | sys.setrecursionlimit(10000) 21 | 22 | # 军工级扫描配置 23 | CONFIG = { 24 | "max_depth": 3, 25 | "request_timeout": 35, 26 | "concurrency_range": (50, 200), 27 | "forbidden_ports": {22, 3306, 3389}, 28 | "sensitive_ext": { 29 | 'config', 'ini', 'env', 'zip', 'rar', '7z', 'tar', 'gz', 'bz2', 'xz', 30 | 'bak', 'key', 'conf', 'properties', 'sql', 'db', 'dbf', 'pem', 'crt', 31 | 'jks', 'p12', 'audit', 'dmg', 'iso', 'img', 'vmdk', 'apk', 'jar' 32 | }, 33 | "sensitive_paths": [ 34 | re.compile(r'/(backup|archive)/', re.I), 35 | re.compile(r'\.(git|svn)/', re.I) 36 | ], 37 | "ignore_ext": {'png', 'jpg', 'jpeg', 'gif'} 38 | } 39 | 40 | # 企业级日志配置 41 | logging.basicConfig( 42 | level=logging.INFO, 43 | format="%(asctime)s [%(levelname)s] %(message)s", 44 | handlers=[ 45 | logging.FileHandler("scan_pro.log", mode='a', encoding='utf-8'), 46 | logging.StreamHandler() 47 | ] 48 | ) 49 | 50 | 51 | class ScannerPro: 52 | def __init__(self): 53 | """初始化扫描引擎""" 54 | self.dedup_filter = ScalableBloomFilter(initial_capacity=100000, error_rate=0.001) 55 | self.ua = UserAgent() 56 | self.stats = defaultdict(int) 57 | self.findings = defaultdict(list) 58 | self.blocked_domains: Set[str] = set() 59 | self.active_tasks: Dict[str, Set[asyncio.Task]] = defaultdict(set) 60 | self.concurrency_ctrl = asyncio.Semaphore(CONFIG["concurrency_range"][1]) 61 | self._shutdown = False 62 | self.session = None 63 | self.scanned_domains: Set[str] = set() # 新增:用于记录已扫描的域名 64 | signal.signal(signal.SIGINT, self._graceful_shutdown) 65 | 66 | async def _scan_worker(self, url: str, depth: int = 0): 67 | """闪电级响应终止的扫描线程""" 68 | if self._shutdown: 69 | raise asyncio.CancelledError("主动终止") 70 | 71 | try: 72 | normalized_url = await self._normalize_url(url) 73 | parsed = urlparse(normalized_url) 74 | full_domain = parsed.netloc 75 | 76 | if full_domain in self.blocked_domains: 77 | return 78 | 79 | # 新增:记录已扫描的域名 80 | self.scanned_domains.add(full_domain) 81 | 82 | is_sensitive, reason = self._is_sensitive(normalized_url) 83 | if is_sensitive: 84 | self.findings["critical"].append({ 85 | "url": normalized_url, 86 | "reason": reason 87 | }) 88 | self.blocked_domains.add(full_domain) 89 | logging.critical(f"🚨 发现敏感文件阻断域名 [{full_domain}]") 90 | await self._cancel_domain_tasks(full_domain) 91 | return 92 | 93 | async with self.concurrency_ctrl: 94 | async with self.session.get( 95 | normalized_url, 96 | allow_redirects=False, 97 | timeout=aiohttp.ClientTimeout(total=CONFIG["request_timeout"]) 98 | ) as resp: 99 | self.stats['total_requests'] += 1 100 | 101 | if depth == 0 and 'text/html' in resp.headers.get('Content-Type', ''): 102 | content = await resp.text() 103 | soup = BeautifulSoup(content, 'lxml') 104 | links = [urljoin(normalized_url, tag['href']) for tag in soup.select('a[href]')] 105 | await self._schedule_tasks(links, depth + 1) 106 | 107 | except (aiohttp.ClientError, asyncio.CancelledError) as e: 108 | if isinstance(e, asyncio.CancelledError): 109 | raise # 直接重新抛出保证快速终止 110 | self.stats['failed_requests'] += 1 111 | logging.debug(f"请求异常: {str(e)}") 112 | except Exception as e: 113 | self.stats['failed_requests'] += 1 114 | logging.error(f"未知错误: {str(e)}") 115 | 116 | async def run(self, targets: list): 117 | """闪电级响应运行入口""" 118 | try: 119 | async with aiohttp.ClientSession( 120 | headers={"User-Agent": self.ua.random}, 121 | connector=aiohttp.TCPConnector( 122 | ssl=False, 123 | limit=200, 124 | limit_per_host=20 125 | ) 126 | ) as self.session: 127 | monitor_task = asyncio.create_task(self._progress_monitor()) 128 | main_task = asyncio.create_task(self._schedule_tasks(targets, 0)) 129 | 130 | try: 131 | await asyncio.wait_for(main_task, timeout=3600) 132 | except (asyncio.CancelledError, KeyboardInterrupt, asyncio.TimeoutError): 133 | self._shutdown = True 134 | logging.warning("正在紧急终止扫描进程...") 135 | 136 | # 闪电级终止策略 137 | all_tasks = {t for tasks in self.active_tasks.values() for t in tasks} 138 | for task in all_tasks: 139 | task.cancel() 140 | 141 | # 极速等待(最多2秒) 142 | await asyncio.wait( 143 | all_tasks, 144 | timeout=min(2.0, len(all_tasks) * 0.01), 145 | return_when=asyncio.ALL_COMPLETED 146 | ) 147 | finally: 148 | monitor_task.cancel() 149 | try: 150 | await monitor_task 151 | except asyncio.CancelledError: 152 | pass 153 | 154 | # 最终清理 155 | await self.session.close() 156 | 157 | # 立即生成报告 158 | report_file = await self.generate_report() 159 | print(f"\n🔚 扫描终止 | 报告文件: {os.path.abspath(report_file)}") 160 | # 新增:输出已扫描的域名数目 161 | print(f"已扫描的域名数目: {len(self.scanned_domains)}") 162 | except Exception as e: 163 | logging.error(f"Session异常: {str(e)}") 164 | raise 165 | 166 | def _graceful_shutdown(self, signum, frame): 167 | """优雅关闭处理""" 168 | logging.warning("接收到终止信号,正在保存扫描状态...") 169 | self._shutdown = True 170 | 171 | async def _normalize_url(self, raw_url: str) -> str: 172 | """URL标准化处理(军工级)""" 173 | url = raw_url.strip().lower() 174 | if not url.startswith(('http://', 'https://')): 175 | url = f'http://{url}' 176 | 177 | parsed = urlparse(url) 178 | if parsed.port in CONFIG["forbidden_ports"]: 179 | raise ValueError(f"禁止访问高危端口: {parsed.geturl()}") 180 | 181 | full_domain = parsed.netloc 182 | if full_domain in self.blocked_domains: 183 | raise ValueError(f"域名已被阻断: {full_domain}") 184 | 185 | # 参数净化处理 186 | query = parse_qs(parsed.query) 187 | clean_query = '&'.join( 188 | f"{k}={v[0]}" for k, v in query.items() 189 | if not k.startswith(('utm_', 'token', 'auth')) 190 | ) 191 | return urlunparse(( 192 | parsed.scheme, full_domain, parsed.path.rstrip('/'), 193 | parsed.params, clean_query, parsed.fragment 194 | )) 195 | 196 | def _is_sensitive(self, url: str) -> Tuple[bool, str]: 197 | """智能敏感资源检测""" 198 | parsed = urlparse(url) 199 | path = parsed.path.lower() 200 | 201 | # 多级扩展名检测 202 | if '.' in path: 203 | parts = path.split('.') 204 | combined_ext = '.'.join(parts[-2:]) 205 | if combined_ext in {'tar.gz', 'tar.bz2', 'tar.xz'}: 206 | return True, f"复合压缩格式: {combined_ext}" 207 | 208 | # 扩展名检测 209 | if (ext := path.split('.')[-1]) in CONFIG["sensitive_ext"]: 210 | return True, f"敏感扩展名: {ext}" 211 | 212 | # 路径正则匹配 213 | for pattern in CONFIG["sensitive_paths"]: 214 | if pattern.search(path): 215 | return True, f"路径匹配: {pattern.pattern}" 216 | 217 | return False, None 218 | 219 | async def _schedule_tasks(self, urls: list, depth: int): 220 | """增强版任务调度""" 221 | tasks = [] 222 | for url in {u for u in urls if u}: 223 | parsed = urlparse(url) 224 | domain = parsed.netloc 225 | 226 | if domain in self.blocked_domains or url in self.dedup_filter: 227 | continue 228 | 229 | self.dedup_filter.add(url) 230 | task = asyncio.create_task( 231 | self._scan_worker(url, depth), 232 | name=f"ScanWorker:{domain}" 233 | ) 234 | 235 | # 添加安全回调 236 | def safe_remove(t): 237 | try: 238 | self.active_tasks[domain].remove(t) 239 | except KeyError: 240 | pass 241 | 242 | self.active_tasks[domain].add(task) 243 | task.add_done_callback(safe_remove) 244 | tasks.append(task) 245 | 246 | if tasks: 247 | try: 248 | await asyncio.wait_for( 249 | asyncio.shield(asyncio.gather(*tasks, return_exceptions=True)), 250 | timeout=CONFIG["request_timeout"] * 2 251 | ) 252 | except (asyncio.TimeoutError, asyncio.CancelledError): 253 | pass 254 | 255 | async def _cancel_domain_tasks(self, domain: str): 256 | """原子级任务终止方案""" 257 | if domain not in self.active_tasks: 258 | return 259 | 260 | cancel_tasks = self.active_tasks.pop(domain) 261 | if not cancel_tasks: 262 | return 263 | 264 | logging.info(f"🛑 终止任务组 [{domain}] 数量:{len(cancel_tasks)}") 265 | 266 | # 批量闪电取消 267 | for task in cancel_tasks: 268 | if not task.done(): 269 | task.cancel() 270 | 271 | # 极速等待(最多500ms) 272 | done, pending = await asyncio.wait( 273 | cancel_tasks, 274 | timeout=min(0.5, len(cancel_tasks) * 0.001), 275 | return_when=asyncio.ALL_COMPLETED 276 | ) 277 | 278 | def _emergency_cancel(self, task: asyncio.Task): 279 | """毫秒级任务终止""" 280 | try: 281 | task.cancel() 282 | if sys.platform == 'win32': 283 | with ThreadPoolExecutor(max_workers=1) as executor: 284 | executor.submit(task.exception, timeout=0.1) 285 | except: 286 | pass 287 | 288 | async def _progress_monitor(self): 289 | """实时性能监控""" 290 | start = time.time() 291 | while not self._shutdown: 292 | elapsed = time.time() - start 293 | sys.stdout.write( 294 | f"\r🚀 扫描中 | 成功: {self.stats['total_requests']} | " 295 | f"阻断: {len(self.blocked_domains)} | " 296 | f"高危: {len(self.findings['critical'])} | " 297 | f"耗时: {elapsed:.1f}s" 298 | ) 299 | sys.stdout.flush() 300 | await asyncio.sleep(0.5) 301 | 302 | async def generate_report(self): 303 | """生成精简报告""" 304 | filename = f"安全审计报告_{time.strftime('%Y%m%d_%H%M%S')}.csv" 305 | try: 306 | with open(filename, 'w', newline='', encoding='utf-8-sig') as f: 307 | writer = csv.writer(f) 308 | writer.writerow(["风险等级", "URL地址", "检测依据"]) 309 | for item in self.findings["critical"]: 310 | writer.writerow(["严重", item["url"], item["reason"]]) 311 | logging.info(f"报告路径: {os.path.abspath(filename)}") 312 | return filename 313 | except Exception as e: 314 | logging.error(f"报告生成失败: {str(e)}") 315 | return None 316 | 317 | if __name__ == "__main__": 318 | targets = [] 319 | try: 320 | if len(sys.argv) == 1: 321 | print("\n🔐 index of/安全扫描系统 v2.3") 322 | print("━" * 40) 323 | input_file = input("请输入目标文件路径: ").strip(' "\'') 324 | if not os.path.exists(input_file): 325 | raise FileNotFoundError(f"文件不存在: {input_file}") 326 | with open(input_file, encoding='utf-8') as f: 327 | targets = [ln.strip() for ln in f if ln.strip()] 328 | 329 | elif len(sys.argv) == 2: 330 | with open(sys.argv[1], encoding='utf-8') as f: 331 | targets = [ln.strip() for ln in f if ln.strip()] 332 | 333 | else: 334 | print("参数错误") 335 | print("用法: python scanner_pro.py [目标文件]") 336 | sys.exit(1) 337 | 338 | engine = ScannerPro() 339 | asyncio.run(engine.run(targets)) 340 | 341 | except KeyboardInterrupt: 342 | print("\n⚠️ 用户中断操作") 343 | except FileNotFoundError as e: 344 | print(f"\n❌ 文件错误: {str(e)}") 345 | except Exception as e: 346 | print(f"\n‼️ 系统异常: {str(e)}") 347 | logging.exception("致命错误") --------------------------------------------------------------------------------