├── 1.txt
├── README.md
└── scanner.py


/1.txt:
--------------------------------------------------------------------------------
 1 | 8.142.115.47:8020
 2 | 36.111.150.14
 3 | https://47.114.89.157:8082/
 4 | 116.62.208.139:8081
 5 | https://47.114.89.157:8082
 6 | 120.55.96.60:8062
 7 | 183.157.107.130:8008
 8 | https://121.43.105.118
 9 | https://121.43.105.139
10 | 125.117.178.170:8181
11 | 183.130.3.149:8089
12 | 120.77.81.20:443
13 | 47.113.118.151:8061
14 | 47.102.120.213:8059


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # index-of-scanner
 2 | 基于协程并发的Web敏感文件扫描器，精准探测备份文件、配置泄露及版本控制目录暴露风险，为渗透测试与安全防护提供专业级资产测绘方案。
 3 | ![Python版本](https://img.shields.io/badge/Python-3.8%2B-blue)
 4 | ![授权协议](https://img.shields.io/badge/License-MIT-green)
 5 | ![版本](https://img.shields.io/badge/Release-v2.1.0-orange)
 6 | 
 7 | 专业级Web敏感文件扫描工具，为渗透测试工程师量身定制的资产测绘解决方案
 8 | 
 9 | ## 🚀 核心功能
10 | 
11 | - **智能敏感文件探测**  
12 |   精准识别`备份文件`、`版本控制`、`密钥证书`等12类敏感资产
13 | - **三重检测引擎**  
14 |   `扩展名匹配` + `路径正则` + `复合压缩检测`三维验证机制
15 | - **高效并发扫描**  
16 |   动态协程控制（30-200并发），平均扫描速度达1500 URL/分钟
17 | - **智能去重机制**  
18 |   基于可扩展布隆过滤器，内存占用<2MB/万级URL
19 | - **专业报告输出**  
20 |   自动生成Excel兼容的CSV报告（UTF-8-SIG编码）
21 | 
22 | ## 🛠️ 使用指南
23 | 
24 | ### 基础扫描
25 | ```bash
26 | # 命令行模式
27 | python scanner.py targets.txt
28 | 
29 | # 交互模式
30 | python scanner.py
31 | > 请输入目标文件路径: targets.txt
32 | ```
33 | 
34 | ### 目标文件格式
35 | `targets.txt`示例：
36 | ```text
37 | http://example.com
38 | admin.example.com/api/
39 | 192.168.1.100:8080
40 | ```
41 | 
42 | ### 实时输出预览
43 | ![image](https://github.com/user-attachments/assets/e20de44f-ea8e-40a7-b355-0b99cc858cac)
44 | 
45 | 
46 | ## 📊 报告样本
47 | 
48 | `安全扫描报告_20240520_1432.csv`示例：
49 | 
50 | | 风险等级 | URL地址                  | 检测依据            |
51 | |----------|--------------------------|---------------------|
52 | | 高危     | http://example.com/.git/ | 路径匹配: \.(git|svn)/ |
53 | | 高危     | http://example.com/db.sql | 敏感扩展名: sql     |
54 | 
55 | ## ⚙️ 配置定制
56 | 
57 | 修改`CONFIG`字典调整扫描策略：
58 | ```python
59 | CONFIG = {
60 |     "max_depth": 3,          # 爬取深度
61 |     "concurrency_range": (30, 200),  # 动态并发区间
62 |     "sensitive_ext": {       # 扩展名黑名单
63 |         'sql', 'bak', 'pem', ...  
64 |     },
65 |     "sensitive_paths": [     # 路径正则规则
66 |         re.compile(r'/(backup|archive)/', re.I),
67 |         ...
68 |     ]
69 | }
70 | ```
71 | 
72 | ## ⚠️ 注意事项
73 | 
74 | 1. 遵循授权测试原则，禁止非法扫描
75 | 2. 建议在隔离环境测试后再用于生产
76 | 3. 可通过调整`concurrency_range`优化资源占用
77 | 4. 扫描日志详见同目录`scan.log`
78 | 
79 | ## 📜 许可协议
80 | 
81 | 本项目基于 [MIT License](LICENSE) 开放使用，禁止用于非法用途
82 | 
83 | ```
84 | 
85 | ---
86 | 
87 | **版本更新**  
88 | `v2.1.0` 新增功能：
89 | - 智能交互式启动模式
90 | - 动态进度监控系统
91 | - CSV报告中文编码优化
92 | - 高危端口自动阻断机制
93 | 
94 | 


--------------------------------------------------------------------------------
/scanner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import re
  4 | import csv
  5 | import time
  6 | import signal
  7 | import asyncio
  8 | import aiohttp
  9 | from urllib.parse import urlparse, urlunparse, urljoin, parse_qs
 10 | from collections import defaultdict
 11 | from typing import Set, Tuple, Dict, Any, List
 12 | from concurrent.futures import ThreadPoolExecutor
 13 | 
 14 | from bs4 import BeautifulSoup
 15 | from pybloom_live import ScalableBloomFilter
 16 | from fake_useragent import UserAgent
 17 | import logging
 18 | 
 19 | # 深度递归保护
 20 | sys.setrecursionlimit(10000)
 21 | 
 22 | # 军工级扫描配置
 23 | CONFIG = {
 24 |     "max_depth": 3,
 25 |     "request_timeout": 35,
 26 |     "concurrency_range": (50, 200),
 27 |     "forbidden_ports": {22, 3306, 3389},
 28 |     "sensitive_ext": {
 29 |         'config', 'ini', 'env', 'zip', 'rar', '7z', 'tar', 'gz', 'bz2', 'xz',
 30 |         'bak', 'key', 'conf', 'properties', 'sql', 'db', 'dbf', 'pem', 'crt',
 31 |         'jks', 'p12', 'audit', 'dmg', 'iso', 'img', 'vmdk', 'apk', 'jar'
 32 |     },
 33 |     "sensitive_paths": [
 34 |         re.compile(r'/(backup|archive)/', re.I),
 35 |         re.compile(r'\.(git|svn)/', re.I)
 36 |     ],
 37 |     "ignore_ext": {'png', 'jpg', 'jpeg', 'gif'}
 38 | }
 39 | 
 40 | # 企业级日志配置
 41 | logging.basicConfig(
 42 |     level=logging.INFO,
 43 |     format="%(asctime)s [%(levelname)s] %(message)s",
 44 |     handlers=[
 45 |         logging.FileHandler("scan_pro.log", mode='a', encoding='utf-8'),
 46 |         logging.StreamHandler()
 47 |     ]
 48 | )
 49 | 
 50 | 
 51 | class ScannerPro:
 52 |     def __init__(self):
 53 |         """初始化扫描引擎"""
 54 |         self.dedup_filter = ScalableBloomFilter(initial_capacity=100000, error_rate=0.001)
 55 |         self.ua = UserAgent()
 56 |         self.stats = defaultdict(int)
 57 |         self.findings = defaultdict(list)
 58 |         self.blocked_domains: Set[str] = set()
 59 |         self.active_tasks: Dict[str, Set[asyncio.Task]] = defaultdict(set)
 60 |         self.concurrency_ctrl = asyncio.Semaphore(CONFIG["concurrency_range"][1])
 61 |         self._shutdown = False
 62 |         self.session = None
 63 |         self.scanned_domains: Set[str] = set()  # 新增：用于记录已扫描的域名
 64 |         signal.signal(signal.SIGINT, self._graceful_shutdown)
 65 | 
 66 |     async def _scan_worker(self, url: str, depth: int = 0):
 67 |         """闪电级响应终止的扫描线程"""
 68 |         if self._shutdown:
 69 |             raise asyncio.CancelledError("主动终止")
 70 | 
 71 |         try:
 72 |             normalized_url = await self._normalize_url(url)
 73 |             parsed = urlparse(normalized_url)
 74 |             full_domain = parsed.netloc
 75 | 
 76 |             if full_domain in self.blocked_domains:
 77 |                 return
 78 | 
 79 |             # 新增：记录已扫描的域名
 80 |             self.scanned_domains.add(full_domain)
 81 | 
 82 |             is_sensitive, reason = self._is_sensitive(normalized_url)
 83 |             if is_sensitive:
 84 |                 self.findings["critical"].append({
 85 |                     "url": normalized_url,
 86 |                     "reason": reason
 87 |                 })
 88 |                 self.blocked_domains.add(full_domain)
 89 |                 logging.critical(f"🚨 发现敏感文件阻断域名 [{full_domain}]")
 90 |                 await self._cancel_domain_tasks(full_domain)
 91 |                 return
 92 | 
 93 |             async with self.concurrency_ctrl:
 94 |                 async with self.session.get(
 95 |                         normalized_url,
 96 |                         allow_redirects=False,
 97 |                         timeout=aiohttp.ClientTimeout(total=CONFIG["request_timeout"])
 98 |                 ) as resp:
 99 |                     self.stats['total_requests'] += 1
100 | 
101 |                     if depth == 0 and 'text/html' in resp.headers.get('Content-Type', ''):
102 |                         content = await resp.text()
103 |                         soup = BeautifulSoup(content, 'lxml')
104 |                         links = [urljoin(normalized_url, tag['href']) for tag in soup.select('a[href]')]
105 |                         await self._schedule_tasks(links, depth + 1)
106 | 
107 |         except (aiohttp.ClientError, asyncio.CancelledError) as e:
108 |             if isinstance(e, asyncio.CancelledError):
109 |                 raise  # 直接重新抛出保证快速终止
110 |             self.stats['failed_requests'] += 1
111 |             logging.debug(f"请求异常: {str(e)}")
112 |         except Exception as e:
113 |             self.stats['failed_requests'] += 1
114 |             logging.error(f"未知错误: {str(e)}")
115 | 
116 |     async def run(self, targets: list):
117 |         """闪电级响应运行入口"""
118 |         try:
119 |             async with aiohttp.ClientSession(
120 |                     headers={"User-Agent": self.ua.random},
121 |                     connector=aiohttp.TCPConnector(
122 |                         ssl=False,
123 |                         limit=200,
124 |                         limit_per_host=20
125 |                     )
126 |             ) as self.session:
127 |                 monitor_task = asyncio.create_task(self._progress_monitor())
128 |                 main_task = asyncio.create_task(self._schedule_tasks(targets, 0))
129 | 
130 |                 try:
131 |                     await asyncio.wait_for(main_task, timeout=3600)
132 |                 except (asyncio.CancelledError, KeyboardInterrupt, asyncio.TimeoutError):
133 |                     self._shutdown = True
134 |                     logging.warning("正在紧急终止扫描进程...")
135 | 
136 |                     # 闪电级终止策略
137 |                     all_tasks = {t for tasks in self.active_tasks.values() for t in tasks}
138 |                     for task in all_tasks:
139 |                         task.cancel()
140 | 
141 |                     # 极速等待（最多2秒）
142 |                     await asyncio.wait(
143 |                         all_tasks,
144 |                         timeout=min(2.0, len(all_tasks) * 0.01),
145 |                         return_when=asyncio.ALL_COMPLETED
146 |                     )
147 |                 finally:
148 |                     monitor_task.cancel()
149 |                     try:
150 |                         await monitor_task
151 |                     except asyncio.CancelledError:
152 |                         pass
153 | 
154 |                     # 最终清理
155 |                     await self.session.close()
156 | 
157 |                 # 立即生成报告
158 |                 report_file = await self.generate_report()
159 |                 print(f"\n🔚 扫描终止 | 报告文件: {os.path.abspath(report_file)}")
160 |                 # 新增：输出已扫描的域名数目
161 |                 print(f"已扫描的域名数目: {len(self.scanned_domains)}")
162 |         except Exception as e:
163 |             logging.error(f"Session异常: {str(e)}")
164 |             raise
165 | 
166 |     def _graceful_shutdown(self, signum, frame):
167 |         """优雅关闭处理"""
168 |         logging.warning("接收到终止信号，正在保存扫描状态...")
169 |         self._shutdown = True
170 | 
171 |     async def _normalize_url(self, raw_url: str) -> str:
172 |         """URL标准化处理（军工级）"""
173 |         url = raw_url.strip().lower()
174 |         if not url.startswith(('http://', 'https://')):
175 |             url = f'http://{url}'
176 | 
177 |         parsed = urlparse(url)
178 |         if parsed.port in CONFIG["forbidden_ports"]:
179 |             raise ValueError(f"禁止访问高危端口: {parsed.geturl()}")
180 | 
181 |         full_domain = parsed.netloc
182 |         if full_domain in self.blocked_domains:
183 |             raise ValueError(f"域名已被阻断: {full_domain}")
184 | 
185 |         # 参数净化处理
186 |         query = parse_qs(parsed.query)
187 |         clean_query = '&'.join(
188 |             f"{k}={v[0]}" for k, v in query.items()
189 |             if not k.startswith(('utm_', 'token', 'auth'))
190 |         )
191 |         return urlunparse((
192 |             parsed.scheme, full_domain, parsed.path.rstrip('/'),
193 |             parsed.params, clean_query, parsed.fragment
194 |         ))
195 | 
196 |     def _is_sensitive(self, url: str) -> Tuple[bool, str]:
197 |         """智能敏感资源检测"""
198 |         parsed = urlparse(url)
199 |         path = parsed.path.lower()
200 | 
201 |         # 多级扩展名检测
202 |         if '.' in path:
203 |             parts = path.split('.')
204 |             combined_ext = '.'.join(parts[-2:])
205 |             if combined_ext in {'tar.gz', 'tar.bz2', 'tar.xz'}:
206 |                 return True, f"复合压缩格式: {combined_ext}"
207 | 
208 |         # 扩展名检测
209 |         if (ext := path.split('.')[-1]) in CONFIG["sensitive_ext"]:
210 |             return True, f"敏感扩展名: {ext}"
211 | 
212 |         # 路径正则匹配
213 |         for pattern in CONFIG["sensitive_paths"]:
214 |             if pattern.search(path):
215 |                 return True, f"路径匹配: {pattern.pattern}"
216 | 
217 |         return False, None
218 | 
219 |     async def _schedule_tasks(self, urls: list, depth: int):
220 |         """增强版任务调度"""
221 |         tasks = []
222 |         for url in {u for u in urls if u}:
223 |             parsed = urlparse(url)
224 |             domain = parsed.netloc
225 | 
226 |             if domain in self.blocked_domains or url in self.dedup_filter:
227 |                 continue
228 | 
229 |             self.dedup_filter.add(url)
230 |             task = asyncio.create_task(
231 |                 self._scan_worker(url, depth),
232 |                 name=f"ScanWorker:{domain}"
233 |             )
234 | 
235 |             # 添加安全回调
236 |             def safe_remove(t):
237 |                 try:
238 |                     self.active_tasks[domain].remove(t)
239 |                 except KeyError:
240 |                     pass
241 | 
242 |             self.active_tasks[domain].add(task)
243 |             task.add_done_callback(safe_remove)
244 |             tasks.append(task)
245 | 
246 |         if tasks:
247 |             try:
248 |                 await asyncio.wait_for(
249 |                     asyncio.shield(asyncio.gather(*tasks, return_exceptions=True)),
250 |                     timeout=CONFIG["request_timeout"] * 2
251 |                 )
252 |             except (asyncio.TimeoutError, asyncio.CancelledError):
253 |                 pass
254 | 
255 |     async def _cancel_domain_tasks(self, domain: str):
256 |         """原子级任务终止方案"""
257 |         if domain not in self.active_tasks:
258 |             return
259 | 
260 |         cancel_tasks = self.active_tasks.pop(domain)
261 |         if not cancel_tasks:
262 |             return
263 | 
264 |         logging.info(f"🛑 终止任务组 [{domain}] 数量:{len(cancel_tasks)}")
265 | 
266 |         # 批量闪电取消
267 |         for task in cancel_tasks:
268 |             if not task.done():
269 |                 task.cancel()
270 | 
271 |         # 极速等待（最多500ms）
272 |         done, pending = await asyncio.wait(
273 |             cancel_tasks,
274 |             timeout=min(0.5, len(cancel_tasks) * 0.001),
275 |             return_when=asyncio.ALL_COMPLETED
276 |         )
277 | 
278 |     def _emergency_cancel(self, task: asyncio.Task):
279 |         """毫秒级任务终止"""
280 |         try:
281 |             task.cancel()
282 |             if sys.platform == 'win32':
283 |                 with ThreadPoolExecutor(max_workers=1) as executor:
284 |                     executor.submit(task.exception, timeout=0.1)
285 |         except:
286 |             pass
287 | 
288 |     async def _progress_monitor(self):
289 |         """实时性能监控"""
290 |         start = time.time()
291 |         while not self._shutdown:
292 |             elapsed = time.time() - start
293 |             sys.stdout.write(
294 |                 f"\r🚀 扫描中 | 成功: {self.stats['total_requests']} | "
295 |                 f"阻断: {len(self.blocked_domains)} | "
296 |                 f"高危: {len(self.findings['critical'])} | "
297 |                 f"耗时: {elapsed:.1f}s"
298 |             )
299 |             sys.stdout.flush()
300 |             await asyncio.sleep(0.5)
301 | 
302 |     async def generate_report(self):
303 |         """生成精简报告"""
304 |         filename = f"安全审计报告_{time.strftime('%Y%m%d_%H%M%S')}.csv"
305 |         try:
306 |             with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
307 |                 writer = csv.writer(f)
308 |                 writer.writerow(["风险等级", "URL地址", "检测依据"])
309 |                 for item in self.findings["critical"]:
310 |                     writer.writerow(["严重", item["url"], item["reason"]])
311 |             logging.info(f"报告路径: {os.path.abspath(filename)}")
312 |             return filename
313 |         except Exception as e:
314 |             logging.error(f"报告生成失败: {str(e)}")
315 |             return None
316 | 
317 | if __name__ == "__main__":
318 |     targets = []
319 |     try:
320 |         if len(sys.argv) == 1:
321 |             print("\n🔐 index of/安全扫描系统 v2.3")
322 |             print("━" * 40)
323 |             input_file = input("请输入目标文件路径: ").strip(' "\'')
324 |             if not os.path.exists(input_file):
325 |                 raise FileNotFoundError(f"文件不存在: {input_file}")
326 |             with open(input_file, encoding='utf-8') as f:
327 |                 targets = [ln.strip() for ln in f if ln.strip()]
328 | 
329 |         elif len(sys.argv) == 2:
330 |             with open(sys.argv[1], encoding='utf-8') as f:
331 |                 targets = [ln.strip() for ln in f if ln.strip()]
332 | 
333 |         else:
334 |             print("参数错误")
335 |             print("用法: python scanner_pro.py [目标文件]")
336 |             sys.exit(1)
337 | 
338 |         engine = ScannerPro()
339 |         asyncio.run(engine.run(targets))
340 | 
341 |     except KeyboardInterrupt:
342 |         print("\n⚠️ 用户中断操作")
343 |     except FileNotFoundError as e:
344 |         print(f"\n❌ 文件错误: {str(e)}")
345 |     except Exception as e:
346 |         print(f"\n‼️ 系统异常: {str(e)}")
347 |         logging.exception("致命错误")


--------------------------------------------------------------------------------