├── .gitignore ├── README.md ├── bladerazor.py ├── cdn_servers.yaml ├── cmd ├── knowledge_import.py └── run.py ├── config.py ├── exploits ├── __init__.py ├── attack_surface_research.py ├── deploy_attack.py ├── scanner │ ├── __init__.py │ └── nuclei_scanner_tool.py ├── vul_scan_expert.py └── web │ ├── __init__.py │ ├── curl_tool.py │ ├── gobuster_dir_tool.py │ ├── host_crawler_tool.py │ └── html_parse_info_tool.py ├── helpers ├── __init__.py ├── alienvault_api.py ├── crawler.py ├── fingers │ ├── __init__.py │ ├── assets │ │ ├── arl_finger.json │ │ ├── custom.json │ │ ├── finger.json │ │ └── web_fingerprint_v3.json │ └── web_fingers.py ├── fofa_api.py ├── gobuster.py ├── html_information_leak_analyze.py ├── masscan.py ├── nmap.py ├── nuclei.py ├── security_trails_api.py └── utils.py ├── persistence ├── __init__.py ├── database.py ├── orm.py └── vectordb.py ├── rag ├── __init__.py ├── rag.py └── rag_search_tool.py ├── recon ├── __init__.py ├── active │ ├── __init__.py │ ├── masscan_search_tool.py │ └── nmap_search_tool.py ├── cyber_assets_researcher.py └── passive │ ├── __init__.py │ ├── alienvault_search_tool.py │ ├── cdn_check.py │ ├── fofa_search_tool.py │ └── security_trails_search_tool.py ├── requirements.txt ├── team.py ├── workflow_attack_plan.py ├── workflow_deploy_attack.py └── workflow_pre_attack.py /.gitignore: -------------------------------------------------------------------------------- 1 | ### JetBrains template 2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 4 | 5 | # User-specific stuff 6 | .idea 7 | .idea/ 8 | .idea/* 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/**/usage.statistics.xml 12 | .idea/**/dictionaries 13 | .idea/**/shelf 14 | 15 | # AWS User-specific 16 | .idea/**/aws.xml 17 | 18 | # Generated files 19 | .idea/**/contentModel.xml 20 | 21 | # Sensitive or high-churn files 22 | .idea/**/dataSources/ 23 | .idea/**/dataSources.ids 24 | .idea/**/dataSources.local.xml 25 | .idea/**/sqlDataSources.xml 26 | .idea/**/dynamic.xml 27 | .idea/**/uiDesigner.xml 28 | .idea/**/dbnavigator.xml 29 | 30 | # Gradle 31 | .idea/**/gradle.xml 32 | .idea/**/libraries 33 | 34 | # Gradle and Maven with auto-import 35 | # When using Gradle or Maven with auto-import, you should exclude module files, 36 | # since they will be recreated, and may cause churn. Uncomment if using 37 | # auto-import. 38 | # .idea/artifacts 39 | # .idea/compiler.xml 40 | # .idea/jarRepositories.xml 41 | # .idea/modules.xml 42 | # .idea/*.iml 43 | # .idea/modules 44 | # *.iml 45 | # *.ipr 46 | 47 | # CMake 48 | cmake-build-*/ 49 | 50 | # Mongo Explorer plugin 51 | .idea/**/mongoSettings.xml 52 | 53 | # File-based project format 54 | *.iws 55 | 56 | # IntelliJ 57 | out/ 58 | 59 | # mpeltonen/sbt-idea plugin 60 | .idea_modules/ 61 | 62 | # JIRA plugin 63 | atlassian-ide-plugin.xml 64 | 65 | # Cursive Clojure plugin 66 | .idea/replstate.xml 67 | 68 | # SonarLint plugin 69 | .idea/sonarlint/ 70 | 71 | # Crashlytics plugin (for Android Studio and IntelliJ) 72 | com_crashlytics_export_strings.xml 73 | crashlytics.properties 74 | crashlytics-build.properties 75 | fabric.properties 76 | 77 | # Editor-based Rest Client 78 | .idea/httpRequests 79 | 80 | # Android studio 3.1+ serialized cache file 81 | .idea/caches/build_file_checksums.ser 82 | 83 | ### VirtualEnv template 84 | # Virtualenv 85 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ 86 | .Python 87 | [Bb]in 88 | [Ii]nclude 89 | [Ll]ib 90 | [Ll]ib64 91 | [Ll]ocal 92 | [Ss]cripts 93 | pyvenv.cfg 94 | .venv 95 | pip-selfcheck.json 96 | 97 | ### macOS template 98 | # General 99 | .DS_Store 100 | .AppleDouble 101 | .LSOverride 102 | 103 | # Icon must end with two \r 104 | Icon 105 | 106 | # Thumbnails 107 | ._* 108 | 109 | # Files that might appear in the root of a volume 110 | .DocumentRevisions-V100 111 | .fseventsd 112 | .Spotlight-V100 113 | .TemporaryItems 114 | .Trashes 115 | .VolumeIcon.icns 116 | .com.apple.timemachine.donotpresent 117 | 118 | # Directories potentially created on remote AFP share 119 | .AppleDB 120 | .AppleDesktop 121 | Network Trash Folder 122 | Temporary Items 123 | .apdisk 124 | 125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blade Razor 刃影 2 | 3 | 由人工智能驱动的渗透测试解决方案 4 | 5 | An AI-Driven Pentesting Solution. 6 | 7 | 资产侦察工具借鉴了 [OneForAll](https://github.com/shmilylty/OneForAll) 8 | 9 | ## 说明 10 | 11 | 本项目初衷只用于验证AI在渗透测试中的应用。 12 | 13 | 渗透测试本质上是基于网络安全人员的经验和知识,利用各种工具和技术对目标系统进行安全评估。 14 | 15 | 而LLM以及langchain这类ai agent的出现,是我们可以利用大模型的能力,来具体实施一些事情。 16 | 17 | 经过验证,LLM在渗透测试中的应用是可行的,以下四点对最终效果有决定性影响: 18 | 19 | 1. 大模型的选择 20 | 只要参数大即可,无需过于关注模型是否针对特定领域进行过微调,特定领域的知识,用rag可以解决。 21 | 2. RAG 22 | 相当于脱离LLM的外挂知识库,只要私有知识库足够完善,其“经验”就会足够丰富。 23 | 3. 提示词 24 | 通过提示词模拟出各种角色,如黑客、安全研究员、安全工程师等,这样可以让LLM更好的理解我们的需求。 25 | 4. 外部工具 26 | 武器库越丰富肯定越好 27 | 28 | ## TODO 29 | 30 | 待办事项过多... 31 | 32 | ## 使用 33 | 34 | # 创建一个 python 3.12 虚拟环境, 你也可以用 virtualenv 35 | conda create -n bladerazor python=3.12.3 36 | conda activate bladerazor 37 | 38 | # 更新 39 | conda update --all 40 | pip install pip-review 41 | pip-review --local --auto 42 | 43 | # 安装依赖 44 | pip install -r requirements.txt 45 | 46 | ## 环境变量 47 | 48 | LLM模型相关的配置遵循langchain的配置方式,具体参考[langchain](https://python.langchain.com/v0.1/docs/integrations/llms/) 49 | 50 | ``` 51 | OPENAI_API_BASE=https://xxx # 如果使用ollama等可以修改这里 52 | OPENAI_API_KEY=xxxx 53 | OPENAI_MODEL_NAME=gpt-4o 54 | 55 | # crewai遥测,最好关闭 56 | OTEL_SDK_DISABLED=true 57 | 58 | 59 | FOFA_API_KEY=xxxxxxxx 60 | FOFA_EMAIL=fofauser@emailaddress 61 | FOFA_VERSION=ent 62 | 63 | GOBUSTER_PATH=/your/gobuster/path/folder 64 | GOBUSTER_WORDLIST_PATH=/your/wordlists/small.txt 65 | 66 | NUCLEI_PATH=/your/nuclei/path/folder 67 | NUCLEI_TEMPLATES_PATH=/your/nuclei/path/folder/templates 68 | 69 | SECURITYTRAILS_API_KEY=xxxxxxxx 70 | ``` 71 | 72 | ## 导入知识库 73 | 74 | 工具为`cmd/knowledge.py`,默认使用openai的嵌入式模型,请参阅源码。 75 | 76 | ## 执行 77 | 78 | 目前代码在`bladerazor.py`,请参阅源码。 79 | 80 | ## 数据库 81 | 82 | docker run --name bladerazor-pg \ 83 | -e POSTGRES_USER=bladerazor \ 84 | -e POSTGRES_PASSWORD=123456 \ 85 | -e POSTGRES_DB=bladerazor \ 86 | -p 15432:5432 \ 87 | -d pgvector/pgvector:pg16 88 | 89 | ## LLM 90 | 91 | 本地模型受制于硬件,ollma或者lmstudio都没有完美的跑起来 92 | 93 | 目前只测试了以下模型: 94 | 95 | | LLM | 效果 | 推荐 | 96 | |---------------|----|------| 97 | | gpt-3.5-turbo | 可用 | ⭐⭐⭐ | 98 | | gpt-4o | 可用 | ⭐⭐⭐⭐ | 99 | 100 | ## 工作机制 101 | 102 | ```mermaid 103 | --- 104 | title: 预攻击阶段 105 | --- 106 | stateDiagram-v2 107 | state "资产侦察" as Recon 108 | state "资产测绘" as AssetMapping 109 | state "端口扫描" as PortScan 110 | state "漏扫" as VulScan 111 | state "目录枚举" as DirectoryBruteforcing 112 | state new_recon_assets_state <> 113 | state new_mapping_assets_state <> 114 | [*] --> Recon 115 | Recon --> new_recon_assets_state 116 | new_recon_assets_state --> Recon: 有新资产 117 | new_recon_assets_state --> AssetMapping: 无新资产 118 | AssetMapping --> new_mapping_assets_state 119 | new_mapping_assets_state --> Recon: 有新资产 120 | new_mapping_assets_state --> PortScan: 无新资产 121 | PortScan --> VulScan 122 | VulScan --> DirectoryBruteforcing 123 | DirectoryBruteforcing --> [*] 124 | ``` 125 | 126 | ```mermaid 127 | --- 128 | title: 攻击阶段 129 | --- 130 | stateDiagram-v2 131 | state "情报分析" as IntelligenceAnalysis 132 | state "攻击面分析" as AttackSurfaceResearch 133 | state "打点研究" as EstablishingFootholdResearch 134 | state "审核攻击计划" as AttackPlanReview 135 | state "部署并实施攻击" as DeployAndExecuteTheAttack 136 | state attack_plan_review_state <> 137 | [*] --> IntelligenceAnalysis 138 | IntelligenceAnalysis --> AttackSurfaceResearch 139 | AttackSurfaceResearch --> EstablishingFootholdResearch 140 | EstablishingFootholdResearch --> AttackPlanReview 141 | AttackPlanReview --> attack_plan_review_state 142 | attack_plan_review_state --> EstablishingFootholdResearch: 重做 143 | attack_plan_review_state --> [*]: 否决 144 | attack_plan_review_state --> DeployAndExecuteTheAttack: 通过 145 | DeployAndExecuteTheAttack --> [*] 146 | ``` 147 | -------------------------------------------------------------------------------- /bladerazor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import opentelemetry.sdk.trace 5 | from embedchain.config import BaseLlmConfig 6 | from embedchain.embedder.openai import OpenAIEmbedder 7 | from embedchain.llm.openai import OpenAILlm 8 | from sqlalchemy import func, and_ 9 | 10 | from exploits.attack_surface_research import AttackSurfaceResearch 11 | from workflow_attack_plan import WorkFlowAttackPlan 12 | from workflow_deploy_attack import WorkFlowDeployAttack 13 | from workflow_pre_attack import WorkFlowPreAttack 14 | from persistence.vectordb import NewEmbedChain 15 | from rag.rag import RAG 16 | from rag.rag_search_tool import RagSearchTool 17 | from team import Team 18 | 19 | opentelemetry.sdk.trace.logger.setLevel(logging.CRITICAL) 20 | from langchain_openai import ChatOpenAI 21 | 22 | from persistence.database import DB 23 | from persistence.orm import PenTestTask, Vul, WebInfo, Port 24 | from recon.cyber_assets_researcher import CyberAssetsResearchers 25 | 26 | if __name__ == '__main__': 27 | # TODO 移至 cmd 目录 28 | # PROXY_SOCKS = "http://localhost:1080" 29 | # os.environ['http_proxy'] = PROXY_SOCKS 30 | # os.environ['HTTP_PROXY'] = PROXY_SOCKS 31 | # os.environ['https_proxy'] = PROXY_SOCKS 32 | # os.environ['HTTPS_PROXY'] = PROXY_SOCKS 33 | # os.environ["OPENAI_API_KEY"] = OPENAI_KEY 34 | # os.environ["OPENAI_MODEL_NAME"] = "gpt-3.5-turbo" 35 | 36 | debug = True 37 | # 初始化数据库 38 | db = DB( 39 | user='bladerazor', 40 | password='123456', 41 | host='localhost', 42 | port=15432, 43 | dbname='bladerazor', 44 | # echo=debug, 45 | ) 46 | 47 | # 初始化RAG 48 | embder = OpenAIEmbedder() 49 | rag = RAG(db=db, embder=embder) 50 | 51 | # ragtool = RagSearchTool(rag) 52 | 53 | # 初始化LLM 54 | llm = ChatOpenAI( 55 | temperature=0.9, 56 | # max_tokens=16385, 57 | # http_client=httpx.Client(proxy=os.environ['PROXY']), 58 | # http_client=httpx.AsyncClient(proxy=os.environ['PROXY']), 59 | ) 60 | 61 | # 初始化团队 62 | team = Team( 63 | db=db, 64 | rag=rag, 65 | llm=llm, 66 | debug=debug, 67 | nmap_path=os.getenv('NMAP_PATH'), 68 | nuclei_path=os.environ['NUCLEI_PATH'], 69 | nuclei_templates_path=os.environ['NUCLEI_TEMPLATES_PATH'], 70 | gobuster_path=os.environ['GOBUSTER_PATH'], 71 | gobuster_wordlist_path=os.environ['GOBUSTER_WORDLIST_PATH'], 72 | ) 73 | 74 | # 目标 75 | target = 'https://www.example.com/' 76 | print('target', target) 77 | task_id = 0 78 | 79 | with db.DBSession() as session: 80 | task = PenTestTask() 81 | task.target = target 82 | task.name = target 83 | session.add(task) 84 | session.commit() 85 | task_id = task.id 86 | 87 | 88 | 89 | # 工作流 - 预攻击 90 | workflow = WorkFlowPreAttack( 91 | db=db, 92 | team=team, 93 | debug=debug 94 | ) 95 | workflow.run(task_id, target) 96 | 97 | # 工作流 - 攻击计划 98 | workflowAttack = WorkFlowAttackPlan( 99 | db=db, 100 | team=team, 101 | debug=debug 102 | ) 103 | 104 | workflowAttack.run(task_id) 105 | 106 | # 工作流 - 部署攻击 107 | workflowDeployAttack = WorkFlowDeployAttack( 108 | db=db, 109 | team=team, 110 | debug=debug 111 | ) 112 | 113 | workflowDeployAttack.run(task_id) 114 | -------------------------------------------------------------------------------- /cmd/knowledge_import.py: -------------------------------------------------------------------------------- 1 | from embedchain.embedder.openai import OpenAIEmbedder 2 | 3 | from persistence.database import DB 4 | from rag.rag import RAG 5 | 6 | 7 | def add_knowledge(): 8 | print('add_knowledge') 9 | pass 10 | 11 | 12 | if __name__ == '__main__': 13 | # TODO 修改为命令行模式 14 | db = DB( 15 | user='bladerazor', 16 | password='123456', 17 | host='localhost', 18 | port=15432, 19 | dbname='bladerazor', 20 | # echo=debug, 21 | ) 22 | 23 | # 默认嵌入式模型为 text-embedding-ada-002 24 | embder = OpenAIEmbedder() 25 | 26 | rag = RAG(db=db, embder=embder) 27 | # 添加目录,例如某个从git克隆的知识库 28 | rag.add_knowledge_folder('/my/knowledge/folder') 29 | # 添加url,例如某个在线的知识 30 | rag.add_knowledge_url("urlfor://online/sec/knowledge") -------------------------------------------------------------------------------- /cmd/run.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__': 2 | # TODO 程序入口 3 | pass 4 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from loguru import logger 4 | 5 | stdout_fmt = '{time:HH:mm:ss,SSS} ' \ 6 | '[{level: <5}] ' \ 7 | '{module}:{line} - ' \ 8 | '{message}' 9 | 10 | logger.remove() 11 | logger.level(name='TRACE', color='') 12 | logger.level(name='DEBUG', color='') 13 | logger.level(name='INFOR', no=20, color='') 14 | logger.level(name='QUITE', no=25, color='') 15 | logger.level(name='ALERT', no=30, color='') 16 | logger.level(name='ERROR', color='') 17 | logger.level(name='FATAL', no=50, color='') 18 | 19 | logger.add(sys.stderr, level='DEBUG', format=stdout_fmt, enqueue=True) 20 | -------------------------------------------------------------------------------- /exploits/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/exploits/__init__.py -------------------------------------------------------------------------------- /exploits/attack_surface_research.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | from crewai import Agent, Task, Crew 4 | from crewai_tools import BaseTool 5 | 6 | from persistence.database import DB 7 | from config import logger 8 | from rag.rag import RAG 9 | from rag.rag_search_tool import RagSearchTool 10 | 11 | 12 | class AttackSurfaceResearch: 13 | """ 14 | 攻击面研究 15 | 根据已探明的网络资产,进行漏洞分析与利用。 16 | """ 17 | 18 | def __init__(self, db: DB, rag: RAG, llm=None, verbose: bool = False): 19 | self.llm = llm 20 | self.db = db 21 | self.rag = rag 22 | self.verbose = verbose 23 | 24 | def _getKnowledgeTools(self) -> []: 25 | tools = [] 26 | tools.append(RagSearchTool(self.rag, self.llm, self.verbose)) 27 | return tools 28 | 29 | def task_information_leak_research(self, agent: Agent, assets: str) -> Task: 30 | logger.info("初始化任务 信息泄露研究") 31 | return Task( 32 | agent=agent, 33 | description=dedent( 34 | f""" 35 | 识别并分析有用的实体,需要注意以下几点: 36 | * 实体可能是敏感信息,如密码、密钥等。 37 | * 实体可能是系统信息,如版本信息、路径信息等。 38 | * 实体可能是配置信息,如数据库配置、系统配置等。 39 | * 实体可能是用户信息,如用户名等。 40 | 41 | 请特别留意以下资产: 42 | {assets} 43 | """), 44 | expected_output=dedent( 45 | """ 46 | 最终答案应包括你发现的所有敏感信息的列表。 47 | 只输出已发现的敏感信息。 48 | 不要将原始输入作为输出。 49 | 确保信息分类明确,例如:密码、密钥、版本信息、路径信息、数据库配置、系统配置、用户名等。 50 | """), 51 | ) 52 | 53 | def agent_intelligence_analysis_expert(self, llm=None) -> Agent: 54 | logger.info("初始化代理 情报分析专家") 55 | agent = Agent( 56 | role='情报分析专家', 57 | goal='根据目标的所有信息,提取有用的情报,为后续工作提供支持', 58 | backstory=dedent( 59 | """ 60 | 你是一名经验丰富的情报分析专家,隶属于攻击团队。 61 | 你的主要任务是利用目标的所有已知信息,查找对工作有利的情报。 62 | 你擅长发现潜在的情报,如敏感信息、漏洞信息等,这些信息有助于了解系统结构并找到潜在漏洞。 63 | 64 | 以下是需要特别注意的方面,包括但不限于: 65 | * 源代码 66 | * 注释 67 | * 隐藏字段 68 | * 错误信息 69 | * 错误页面中的堆栈跟踪信息 70 | * 版本信息 71 | * 文件路径 72 | * 电子邮件地址 73 | * IP地址 74 | * Meta标签 75 | * SQL报错信息 76 | 77 | 通过详细分析这些方面的信息,你将能够提取有用的情报,为团队的后续工作提供关键支持。 78 | """), 79 | verbose=self.verbose, 80 | allow_delegation=True, 81 | max_rpm=300, 82 | # max_iter=1, 83 | llm=llm, 84 | cache=False, 85 | ) 86 | 87 | if llm is not None: 88 | agent.llm = llm 89 | return agent 90 | 91 | def intelligenceAnalysisCrew(self, target: str): 92 | agents = [] 93 | tasks = [] 94 | 95 | agiae = self.agent_intelligence_analysis_expert(self.llm) 96 | agents.append(agiae) 97 | tasks.append(self.task_information_leak_research(agiae, target)) 98 | 99 | logger.info("初始化智能体 情报分析") 100 | return Crew( 101 | agents=agents, 102 | tasks=tasks, 103 | verbose=self.verbose, 104 | share_crew=False, 105 | cache=False 106 | ) 107 | 108 | def task_attack_surface_research(self, agent: Agent, target: str, intelligence: str) -> Task: 109 | logger.info("初始化任务 攻击面研究") 110 | return Task( 111 | agent=agent, 112 | description=dedent( 113 | f""" 114 | 对目标的技术栈、指纹信息、暴露的端口服务、漏扫结果以及掌握的情报进行深入分析研究,以找到所有可能的攻击面。 115 | 116 | 你应当遵循以下思路: 117 | 1. 目标是CMS还是程序?例如WordPress或phpMyAdmin。 118 | 确定目标类型后,才能找到对应的漏洞。 119 | 2. 哪些前置代理了流量?例如CDN、Nginx或Apache。 120 | 有些程序自带前置(如Go),有些程序需要前置代理(如PHP需要Nginx或Apache)。 121 | 如果存在前置,那么需要分别找漏洞,甚至有些漏洞需要前置与后端配合。 122 | 3. 有哪些情报可以利用? 123 | 如果页面中包含一些敏感信息(如报错信息、注释、路径等),这些信息有助于推测目标程序及版本。 124 | 125 | 按上述思路操作,避免发生低级错误(如在PHP的目标上找Java的漏洞)。 126 | 127 | 需要注意的方面包括但不限于以下内容: 128 | * 对于已明确指纹信息的目标,根据指纹信息找到对应的漏洞。 129 | * 不要错误地将其他产品的漏洞应用到目标上。 130 | * 注意产品一致性,例如Microsoft Office的漏洞不适用于WordPress,Windows的漏洞不适用于Linux。 131 | * 确保漏洞与目标匹配,例如Java框架的漏洞不适用于PHP框架。 132 | * 考虑漏洞的利用条件,例如是否需要登录、是否需要特定的请求等。 133 | * 描述漏洞的利用方法,例如是否需要上传文件、是否需要执行命令等。 134 | * 评估漏洞的风险等级,例如是否可以获取root权限、是否可以获取敏感信息等。 135 | * 考虑漏洞的利用复杂度,例如是否需要特定的环境、是否需要特定的工具等。 136 | * 如果前置有CDN,可能存在WAF,需要考虑绕过WAF的方法或找到原始服务器的方法。 137 | * 对于配置不当造成的安全威胁,仅关注对目标造成实质性影响的漏洞(如信息泄漏、XSS攻击等)。 138 | 139 | 使用错误的漏洞攻击已确定程序(如使用PHP的漏洞攻击Java程序)将会造成灾难性后果! 140 | 141 | 目标: 142 | {target} 143 | 144 | 已掌握的情报: 145 | {intelligence} 146 | """), 147 | expected_output=dedent( 148 | """ 149 | 最终答案应包括你发现的所有攻击面列表,并根据严重程度排序,越详细越好。 150 | 确保每个攻击面都包含以下信息: 151 | * 漏洞点,具体用于攻击的点,如url地址 152 | * 攻击步骤 153 | * 攻击方法 154 | * 使用工具 155 | * 攻击风险 156 | * 可能的漏洞利用条件和方法 157 | * 漏洞的风险等级和利用复杂度 158 | """), 159 | ) 160 | 161 | def task_establishing_foothold_research_rework(self, agent: Agent, plan: str, 162 | review: str) -> Task: 163 | logger.info("初始化任务 打点研究返工") 164 | return Task( 165 | agent=agent, 166 | description=dedent( 167 | f""" 168 | 根据攻击计划的审核结果,重新进行研究,确保所有潜在攻击面都得到充分考虑。 169 | 170 | 攻击计划: 171 | {plan} 172 | 173 | 审核结果: 174 | {review} 175 | 176 | 你需要进行以下步骤: 177 | 1. 分析审核结果,找出需要修改或补充的部分。 178 | 2. 重新评估每个攻击面,确保考虑到所有可能的漏洞。 179 | 3. 对每个攻击面进行详细研究,找出可能的漏洞利用方法。 180 | 4. 根据严重程度对漏洞进行排序,确保最关键的漏洞优先处理。 181 | 5. 确保研究结果详细,包括漏洞的利用条件、方法、工具、风险等级和利用复杂度。 182 | 183 | 避免低级错误,例如将不适用于目标系统的漏洞应用到目标上。 184 | """), 185 | expected_output=dedent( 186 | """ 187 | 最终答案应包括每个攻击面的漏洞利用方法,并根据严重程度排序,越详细越好。你是攻击队的一员,不需要提供修复建议,不要编造答案。 188 | 189 | 确保每个漏洞描述包括: 190 | * 攻击步骤 191 | * 攻击方法 192 | * 使用工具 193 | * 攻击风险 194 | * 漏洞的利用条件和方法 195 | * 漏洞的风险等级和利用复杂度 196 | """), 197 | ) 198 | 199 | def task_establishing_foothold_research(self, agent: Agent) -> Task: 200 | logger.info("初始化任务 打点研究") 201 | return Task( 202 | agent=agent, 203 | description=dedent( 204 | f""" 205 | 根据攻击面列表,结合本地知识库,为每一个攻击面找到所有攻击方法。 206 | 207 | 请遵循以下原则: 208 | * 不需要技术检测类的方案,如指纹识别等。 209 | * 对于已明确指纹信息的目标,需要根据指纹信息找到对应的漏洞。 210 | * 不要错误地将其他产品的漏洞应用到目标上。 211 | * 注意产品一致性,例如Microsoft Office的漏洞不适用于WordPress,Windows的漏洞不适用于Linux。 212 | * 确保漏洞与目标匹配,例如Java框架的漏洞不适用于PHP框架。 213 | * 考虑漏洞的利用条件,例如是否需要登录、是否需要特定的请求等。 214 | * 描述漏洞的利用方法,例如是否需要上传文件、是否需要执行命令等。 215 | * 评估漏洞的风险等级,例如是否可以获取root权限、是否可以获取敏感信息等。 216 | * 考虑漏洞的利用复杂度,例如是否需要特定的环境、是否需要特定的工具等。 217 | * 如果前置有CDN,可能存在WAF,需要考虑绕过WAF的方法或找到原始服务器的方法。 218 | * 对于配置不当造成的安全威胁,仅关注对目标造成实质性影响的漏洞(如信息泄漏、XSS攻击等)。 219 | 220 | 你应当确认每个漏洞的类型、风险等级以及利用复杂度。 221 | 222 | 避免低级错误,例如在已确定技术栈的程序中使用错误的漏洞(如使用PHP的漏洞攻击Java程序),这将会造成灾难性后果! 223 | """), 224 | expected_output=dedent( 225 | """ 226 | 最终答案应包括每个攻击面的漏洞利用方法,并根据严重程度排序,越详细越好。你是攻击队的一员,不需要提供修复建议,不要编造答案。 227 | 228 | 确保每个漏洞描述包括: 229 | * 攻击步骤 230 | * 攻击方法 231 | * 使用工具 232 | * 攻击风险 233 | * 漏洞的利用条件和方法 234 | * 漏洞的风险等级和利用复杂度 235 | """), 236 | ) 237 | 238 | def agent_establishing_foothold_research_expert(self, llm=None, tools: [BaseTool] = []) -> Agent: 239 | logger.info("初始化代理 打点研究专家") 240 | agent = Agent( 241 | role='打点研究专家', 242 | goal='根据目标的已知情报,结合本地知识库,研究并制定一套渗透方案', 243 | backstory=dedent( 244 | """ 245 | 你是一名经验丰富的打点研究专家,隶属于攻击团队。 246 | 你的主要任务是利用已知的各种情报,对目标制定一套详细且可执行的渗透方案。 247 | 方案中不需要给出修复建议,而是专注于攻击策略的制定。 248 | 249 | 任务要求: 250 | 1. 利用RAG搜索工具,围绕目标的技术栈进行深入搜索。 251 | 2. 详细描述每一步攻击步骤、方法、工具以及可能的攻击风险。 252 | 3. 输出的攻击方案需要包括: 253 | - 攻击路径的选择 254 | - 可能的攻击入口点 255 | - 每个步骤的具体执行方法 256 | - 所需工具及其使用方法 257 | - 潜在的风险评估及规避策略 258 | 259 | 使用搜索工具如RAG时应注意以下几点: 260 | 1. 应使用详细的关键词,针对性地搜索与目标技术栈相关的漏洞和配置缺陷。 261 | 2. 避免查询与目标明显不相关的内容,如特定的IP、域名或无关的技术栈。 262 | 3. 系统性的审核这些工具的输出,确保每项结果都与实际的安全目标直接相关。 263 | 审核过程中应遵循的步骤包括: 264 | 1. 核实每一条输出结果是否与搜索关键词直接相关。任何与查询技术栈不符的结果都应被标记为不相关。 265 | 2. 识别和记录结果中的任何可能误导的信息,如错误的技术细节或与查询不符的上下文。 266 | 3. 对每个不相关或误导性的结果提供简要的分析说明,解释为什么这些结果不适合当前的搜索需求。 267 | 4. 提出修改搜索策略的建议,如更换或细化关键词,以提高未来搜索的相关性和效果。 268 | 269 | 示例: 270 | 搜索关键词:'Apache Struts OGNL injection' 271 | 搜索结果:'Apache Struts version 2.3.31 RCE vulnerability details' 272 | 审核步骤:确认搜索结果与关键词的相关性,验证漏洞详情的准确性和目标系统的适用性。 273 | 274 | 如果结果涉及到不相关的框架,如'.NET framework vulnerability',则标记为不相关,并在报告中指出。 275 | 276 | 你的工作至关重要,攻击方案的质量将直接影响团队后续工作的开展和成功率。 277 | 278 | 如果无法找到合适的攻击方案,直接输出字符串`FAIL`。 279 | """), 280 | tools=tools, 281 | verbose=self.verbose, 282 | allow_delegation=True, 283 | max_rpm=300, 284 | # max_iter=1, 285 | llm=llm, 286 | cache=False, 287 | ) 288 | 289 | if llm is not None: 290 | agent.llm = llm 291 | return agent 292 | 293 | def establishingFootholdResearchCrew(self, target: str, intelligence: str): 294 | agents = [] 295 | tasks = [] 296 | tools = self._getKnowledgeTools() 297 | 298 | ag = self.agent_establishing_foothold_research_expert(self.llm, tools) 299 | agents.append(ag) 300 | 301 | tasks.append(self.task_attack_surface_research(ag, target, intelligence)) 302 | tasks.append(self.task_establishing_foothold_research(ag)) 303 | 304 | logger.info("初始化智能体 打点研究") 305 | return Crew( 306 | agents=agents, 307 | tasks=tasks, 308 | verbose=self.verbose, 309 | share_crew=False, 310 | cache=False 311 | ) 312 | 313 | def task_attack_plan_review(self, agent: Agent, assets: str, intelligence: str, plan: str) -> Task: 314 | logger.info("初始化任务 攻击方案审核") 315 | return Task( 316 | agent=agent, 317 | description=dedent( 318 | f""" 319 | 结合本地知识库,对攻击面与攻击计划进行审核。 320 | 你的任务是审核攻击面与攻击计划是否匹配。 321 | 322 | 审核思路: 323 | 1. 目标的技术栈是否明确? 324 | 确认方案是否按照已知情报正确识别出目标的程序(如WordPress或phpMyAdmin)。 325 | 2. 使用的漏洞是否匹配目标技术栈? 326 | 确认方案是否使用了与目标技术栈不匹配的漏洞(如使用PHP的漏洞攻击Java程序)。 327 | 如果使用了不匹配的漏洞,请移除相应方案。 328 | 329 | 基本原则: 330 | * 攻击方案可能很长,不要因为一点而全盘否定。 331 | * 不要因为攻击面缺失实体而否定攻击方案。 332 | * 确保漏洞与目标匹配(如Java框架的漏洞不适用于PHP框架)。 333 | * 注意漏洞的利用条件(如操作系统是否与目标匹配)。 334 | * 不能遗漏高危攻击面。 335 | * 不要混淆目标实体与漏洞实体。目标实体是你要进行渗透的对象,漏洞实体是漏洞的利用条件、特性以及关联的程序类型等信息的集合。 336 | * 如果仅有少部分不合理,可以提出优化建议,而非直接否决。 337 | 338 | 避免低级错误,例如对已确定技术栈的程序使用错误的漏洞(如使用PHP的漏洞攻击Java程序),这将会造成灾难性后果。 339 | 340 | 审核内容: 341 | 攻击面: 342 | {assets} 343 | 344 | 已掌握的情报: 345 | {intelligence} 346 | 347 | 攻击计划: 348 | {plan} 349 | """), 350 | expected_output=dedent( 351 | """ 352 | 最终答案应为以下三种类型之一: 353 | 354 | 如果审核通过: 355 | 只需要回答字符串`PASS` 356 | 如果不通过且方案完全不可行: 357 | 以字符串`FAIL`开头,后面跟上不可行的原因 358 | 如果不通过但需要优化: 359 | 返回优化后的攻击方案,并以字符串`OPTIMIZE`开头 360 | """), 361 | ) 362 | 363 | def agent_attack_plan_review_expert(self, llm=None, tools: [BaseTool] = []) -> Agent: 364 | logger.info("初始化代理 攻击方案审核专家") 365 | agent = Agent( 366 | role='攻击方案审核专家', 367 | goal='根据目标的已知情报,结合本地知识库,对攻击方案进行审核', 368 | backstory=dedent( 369 | """ 370 | 你是一名经验丰富的攻击方案审核专家,隶属于攻击团队。 371 | 你的主要任务是利用已知的各种情报与知识对攻击方案进行审核。 372 | 你的工作至关重要,你的工作成果将直接影响后续工作的开展。 373 | 374 | 使用搜索工具如RAG时应注意以下几点: 375 | 1. 应使用详细的关键词,针对性地搜索与目标技术栈相关的漏洞和配置缺陷。 376 | 2. 避免查询与目标明显不相关的内容,如特定的IP、域名或无关的技术栈。 377 | 3. 系统性的审核这些工具的输出,确保每项结果都与实际的安全目标直接相关。 378 | 审核过程中应遵循的步骤包括: 379 | 1. 核实每一条输出结果是否与搜索关键词直接相关。任何与查询技术栈不符的结果都应被标记为不相关。 380 | 2. 识别和记录结果中的任何可能误导的信息,如错误的技术细节或与查询不符的上下文。 381 | 3. 对每个不相关或误导性的结果提供简要的分析说明,解释为什么这些结果不适合当前的搜索需求。 382 | 4. 提出修改搜索策略的建议,如更换或细化关键词,以提高未来搜索的相关性和效果。 383 | 384 | 示例: 385 | 搜索关键词:'Apache Struts OGNL injection' 386 | 搜索结果:'Apache Struts version 2.3.31 RCE vulnerability details' 387 | 审核步骤:确认搜索结果与关键词的相关性,验证漏洞详情的准确性和目标系统的适用性。 388 | 389 | 如果结果涉及到不相关的框架,如'.NET framework vulnerability',则标记为不相关,并在报告中指出。 390 | 391 | 392 | 你深知: 393 | * 没有绝对的安全,只有相对的安全。 394 | * 一个好的攻击方案是成功的一半。 395 | * 防火墙并不是绝对的障碍,总有绕过的方法。 396 | * 你的任务是找到最佳和最适合当前情报的攻击方案,而不仅仅是寻找可用的方案。 397 | 398 | 在审核过程中,你需要: 399 | 1. 核实攻击方案是否充分利用已知情报。 400 | 2. 确保方案的实施方式与目标技术栈兼容。 401 | 3. 评估方案的可行性,避免使用不匹配或过时的攻击策略。 402 | 4. 优化方案,以提高攻击成功率和效率。 403 | """), 404 | tools=tools, 405 | verbose=self.verbose, 406 | allow_delegation=True, 407 | max_rpm=300, 408 | # max_iter=1, 409 | llm=llm, 410 | cache=False, 411 | ) 412 | 413 | if llm is not None: 414 | agent.llm = llm 415 | return agent 416 | 417 | def attackPlanReviewCrew(self, assets: str, intelligence: str, plan: str, review: str | None = None): 418 | agents = [] 419 | tasks = [] 420 | tools = self._getKnowledgeTools() 421 | 422 | ag = self.agent_attack_plan_review_expert(self.llm, tools) 423 | agents.append(ag) 424 | 425 | tasks.append(self.task_attack_surface_research(ag, assets, intelligence)) 426 | tasks.append(self.task_establishing_foothold_research(ag)) 427 | tasks.append(self.task_attack_plan_review(ag, assets, intelligence, plan)) 428 | 429 | if review is not None: 430 | tasks.append(self.task_establishing_foothold_research_rework(ag, plan, review)) 431 | 432 | logger.info("初始化智能体 攻击方案审核") 433 | return Crew( 434 | agents=agents, 435 | tasks=tasks, 436 | verbose=self.verbose, 437 | share_crew=False, 438 | cache=True 439 | ) 440 | -------------------------------------------------------------------------------- /exploits/deploy_attack.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | from crewai import Agent, Task, Crew 4 | from crewai_tools import BaseTool 5 | 6 | from exploits.web.curl_tool import CurlTool 7 | from persistence.database import DB 8 | from rag.rag import RAG 9 | from config import logger 10 | 11 | 12 | class DeployAttack: 13 | """ 14 | 部署攻击 15 | """ 16 | 17 | def __init__(self, db: DB, rag: RAG, llm=None, verbose: bool = False): 18 | self.llm = llm 19 | self.db = db 20 | self.rag = rag 21 | self.verbose = verbose 22 | 23 | def _getPenTestTools(self) -> []: 24 | tools = [] 25 | tools.append(CurlTool()) 26 | return tools 27 | 28 | def task_establishing_foothold_attack(self, agent: Agent, asset: str, plan: str) -> Task: 29 | logger.info("初始化任务 实施打点") 30 | return Task( 31 | agent=agent, 32 | description=dedent( 33 | f""" 34 | 你的任务是根据攻击计划,针对目标灵活运用策略并合理使用各种工具,成功利用漏洞,最终取得敏感信息或得到控制权。 35 | 这一过程将涉及多种渗透工具和策略,以确保在目标网络或系统中安全且隐蔽地实施打点行动。 36 | 在整个任务过程中,务必保持隐蔽性和安全性,避免任何可能引发目标系统警觉的行为。 37 | 38 | 你应该根据你所使用的工具的特性,分析其输出,以此来确定你的攻击是否成功。 39 | 40 | 注意,首先判断攻击计划是否合理,然后根据计划执行攻击。 41 | 42 | 目标: 43 | {asset} 44 | 45 | 攻击计划: 46 | {plan} 47 | """), 48 | expected_output=dedent( 49 | """ 50 | 对于敏感信息泄漏类型的任务, 51 | 最终答案应包括你发现的所有敏感信息的列表。 52 | 只输出已发现的敏感信息。 53 | 不要将原始输入作为输出。 54 | 确保信息分类明确,例如:密码、密钥、版本信息、路径信息、数据库配置、系统配置、用户名等。 55 | 56 | 对于控制权获取类型的任务, 57 | 最终答案应包括你成功获取的控制权的列表。 58 | """), 59 | ) 60 | 61 | def agent_establishing_foothold_attacker(self, llm=None, tools: [BaseTool] = []) -> Agent: 62 | logger.info("初始化代理 打点实施专家") 63 | agent = Agent( 64 | role='打点实施专家', 65 | goal='通过高效利用已识别的安全漏洞,在目标网络或系统中建立并维持立足点,确保后续的渗透测试和长期的网络侦察得以成功进行。', 66 | backstory=dedent( 67 | """ 68 | 你是一名经验丰富的打点专家,隶属于攻击团队。 69 | 你的核心任务是通过巧妙的技术手段和深厚的安全知识,在目标系统中建立持久的访问权限。 70 | 你精通多种渗透工具和技术,能够在复杂的网络环境中识别并利用微妙的安全漏洞。 71 | 你的技能不仅限于技术渗透,还包括对攻击策略的深入理解,能够识别并利用复杂系统的微妙弱点。 72 | 你的主要职责是通过技术手段建立初始的访问权,并扩展这些访问权限以形成持久的立足点。 73 | 你的工作对于后续的渗透阶段至关重要,因为它们提供了进行深入分析和数据窃取所需的访问基础。 74 | 75 | 你的专业知识和战术洞察力使你成为攻击团队中不可或缺的一员,你对网络安全的热情和对挑战的渴望推动你不断寻找更有效的渗透方法。 76 | """), 77 | tools=tools, 78 | verbose=self.verbose, 79 | allow_delegation=True, 80 | max_rpm=300, 81 | # max_iter=1, 82 | llm=llm, 83 | cache=False, 84 | ) 85 | 86 | if llm is not None: 87 | agent.llm = llm 88 | return agent 89 | 90 | def establishingFootholdAttackCrew(self, asset: str, plan: str): 91 | agents = [] 92 | tasks = [] 93 | tools = self._getPenTestTools() 94 | 95 | agefa = self.agent_establishing_foothold_attacker(self.llm, tools=tools) 96 | agents.append(agefa) 97 | tasks.append(self.task_establishing_foothold_attack(agefa, asset, plan)) 98 | 99 | logger.info("初始化智能体 打点实施") 100 | return Crew( 101 | agents=agents, 102 | tasks=tasks, 103 | verbose=self.verbose, 104 | share_crew=False, 105 | cache=False 106 | ) 107 | -------------------------------------------------------------------------------- /exploits/scanner/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/exploits/scanner/__init__.py -------------------------------------------------------------------------------- /exploits/scanner/nuclei_scanner_tool.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import tempfile 4 | from datetime import datetime 5 | from typing import Type, Any 6 | 7 | import validators 8 | from crewai_tools import BaseTool 9 | from embedchain import App 10 | from embedchain.models.data_type import DataType 11 | from pydantic.v1 import BaseModel, Field 12 | from sqlalchemy import exc 13 | 14 | from helpers.nuclei import Nuclei 15 | from helpers.utils import is_domain 16 | from persistence.database import DB 17 | from config import logger 18 | from persistence.orm import Vul, DuplicateException 19 | 20 | 21 | class NucleiScannerToolSchema(BaseModel): 22 | """NucleiScannerToolSchema 的查询参数""" 23 | target: str = Field(..., description="host地址或ip地址, 例如:`https://example.com` 或 `example.com` 或 `1.1.1.1`") 24 | template: str = Field(None, description="模板名称, 例如:`http` 或 `vulnerabilities`") 25 | 26 | 27 | class NucleiScannerTool(BaseTool): 28 | name: str = "Nuclei" 29 | description: str = "一款自动化的安全漏洞检测工具,可以进行进行快速、定制化的漏洞扫描。支持域名、URL、IP地址等。" 30 | args_schema: Type[BaseModel] = NucleiScannerToolSchema 31 | db: DB | None = None 32 | embedder: App | None = None 33 | nuclei_path: str | None = None 34 | templates_path: str | None = None 35 | task_id: int | None = None 36 | 37 | class Config: 38 | arbitrary_types_allowed = True 39 | 40 | def _init_embedder(self, embedder_cfg): 41 | self.embedder = App.from_config(config=embedder_cfg) 42 | 43 | data_sources = self.embedder.get_data_sources() 44 | if len(data_sources) > 0: 45 | return 46 | nuclei = Nuclei(nucleiAbsPath=self.nuclei_path, templatesAbsPath=self.templates_path) 47 | tpls = nuclei.nucleiTemplates(False) 48 | outputPath = os.path.join(tempfile.gettempdir(), 'nuclei_tpl_tmp.csv') 49 | datalist = [] 50 | for tpl in tpls: 51 | tpldata = { 52 | 'template': tpl.file, 53 | 'dir': tpl.dir, 54 | 'name': tpl.name.strip() if tpl.name is not None else None, 55 | 'tags': ','.join(tpl.tags) if tpl.tags is not None and len(tpl.tags) > 0 else '', 56 | 'description': tpl.description.strip() if tpl.description is not None else None, 57 | } 58 | 59 | datalist.append(tpldata) 60 | keys = datalist[0].keys() 61 | with open(outputPath, 'w', newline='', encoding='utf-8') as f: 62 | dict_writer = csv.DictWriter(f, fieldnames=keys) 63 | dict_writer.writeheader() 64 | dict_writer.writerows(datalist) 65 | 66 | self.embedder.add(source=outputPath, data_type=DataType.CSV) 67 | os.remove(outputPath) 68 | 69 | def __init__(self, db: DB, task_id: int, embedder_cfg: dict = None, nuclei_path: str = None, 70 | templates_path: str = None): 71 | super().__init__() 72 | self.db = db 73 | self.task_id = task_id 74 | self.nuclei_path = nuclei_path 75 | self.templates_path = templates_path 76 | logger.info("初始化工具 Nuclei") 77 | # TODO 需要完善推理逻辑 78 | # self._init_embedder(embedder_cfg) 79 | 80 | def reasoningTemplates(self, target) -> [str]: 81 | # TODO 需要推理合适的模板 82 | # answer, sources = self.embedder.query( 83 | # f'给我多个适合以下目标的模板名称:\n{target}', citations=True, 84 | # kwargs={'n_results': 100}) 85 | return ['http', 'network'] 86 | 87 | def _run( 88 | self, 89 | **kwargs: Any, 90 | ) -> Any: 91 | target = kwargs.pop('target', "") 92 | if target == "": 93 | return "target为空" 94 | target_type = "" 95 | if validators.url(target): 96 | target_type = "url" 97 | elif validators.ipv4(target): 98 | target_type = "ipv4" 99 | elif validators.ipv6(target): 100 | target_type = "ipv6" 101 | elif is_domain(target): 102 | target_type = "domain" 103 | else: 104 | return "target不合法" 105 | 106 | template = kwargs.pop('template', None) 107 | try: 108 | logger.info("Nuclei: {} {}", target, template) 109 | now = datetime.now() 110 | try: 111 | nuclei = Nuclei(nucleiAbsPath=self.nuclei_path, templatesAbsPath=self.templates_path) 112 | result = nuclei.scan(target, self.reasoningTemplates(target_type)) 113 | if len(result) == 0: 114 | return "未找到任何结果" 115 | 116 | with self.db.DBSession() as session: 117 | for data in result: 118 | vuldb = Vul() 119 | vuldb.task_id = self.task_id 120 | vuldb.target = target 121 | vuldb.host = data.host 122 | vuldb.type = data.type 123 | vuldb.vul_name = data.vulnerability_name 124 | vuldb.vul_detail = data.vulnerability_detail 125 | vuldb.vul_point = data.vulnerable_at 126 | vuldb.solution = data.solution 127 | vuldb.cve_id = data.cve_id 128 | vuldb.cwe_id = data.cwe_id 129 | vuldb.severity = data.severity 130 | vuldb.description = data.description 131 | vuldb.extra_info = data.dict( 132 | exclude_none=True, 133 | exclude={'host', 'type', 'vulnerability_name', 'vulnerability_detail', 134 | 'vulnerable_at', 'solution', 'cve_id', 'cwe_id', 'severity', 135 | 'description'}) 136 | vuldb.source = self.name 137 | vuldb.created = now 138 | try: 139 | session.add(vuldb) 140 | session.commit() 141 | except DuplicateException as e: 142 | session.rollback() 143 | except Exception as e: 144 | raise 145 | return f"共发现{len(result)}个漏洞" 146 | except exc.SQLAlchemyError as e: 147 | logger.error("数据库错误: {}", e) 148 | return "数据库错误" 149 | except Exception as e: 150 | logger.error("其他错误: {}", e) 151 | return f"其他错误: {e}" 152 | except Exception as e: 153 | logger.error("获取失败: {}", e) 154 | return f"获取失败: {e}" 155 | -------------------------------------------------------------------------------- /exploits/vul_scan_expert.py: -------------------------------------------------------------------------------- 1 | from ipaddress import ip_address 2 | from textwrap import dedent 3 | 4 | import validators 5 | from crewai import Agent, Task, Crew 6 | from crewai_tools import BaseTool 7 | from sqlalchemy import select, and_ 8 | 9 | from exploits.scanner.nuclei_scanner_tool import NucleiScannerTool 10 | from exploits.web.gobuster_dir_tool import GobusterDirTool 11 | from exploits.web.host_crawler_tool import HostCrawlerTool 12 | from helpers.utils import is_domain 13 | from persistence.database import DB 14 | from config import logger 15 | from persistence.orm import WebInfo, UrlEnum 16 | 17 | 18 | class VulScanExpert: 19 | """ 20 | 漏洞分析专家 21 | 根据已探明的网络资产,进行漏洞分析。 22 | """ 23 | 24 | # TODO 25 | def __init__(self, db: DB, llm=None, nuclei_path: str = None, nuclei_templates_path: str = None, 26 | gobuster_path: str = None, gobuster_wordlist_path: str = None, 27 | verbose: bool = False): 28 | self.llm = llm 29 | self.db = db 30 | self.nuclei_path = nuclei_path 31 | self.nuclei_templates_path = nuclei_templates_path 32 | self.gobuster_path = gobuster_path 33 | self.gobuster_wordlist_path = gobuster_wordlist_path 34 | self.verbose = verbose 35 | 36 | def agent_cyber_assets_fingerprinting_expert(self, llm=None, tools: [BaseTool] = []) -> Agent: 37 | logger.info("初始化代理 网络资产指纹识别专家") 38 | agent = Agent( 39 | role='网络资产指纹识别专家', 40 | goal='对网络资产进行指纹识别,最终掌握目标的所有特征,为后续工作打下基础。', 41 | backstory=dedent( 42 | """ 43 | 你是一名经验丰富的网络资产指纹识别专家,隶属于攻击团队。 44 | 你的主要任务是利用指纹识别工具分析网络服务的特征,例如端口提供的服务、web程序的开发语言、web程序使用的中间件等等。 45 | 你的工作至关重要,你的工作成果将直接影响后续工作的开展。 46 | """), 47 | tools=tools, 48 | verbose=self.verbose, 49 | allow_delegation=True, 50 | max_rpm=300, 51 | # max_iter=1, 52 | llm=llm, 53 | cache=False, 54 | ) 55 | 56 | if llm is not None: 57 | agent.llm = llm 58 | return agent 59 | 60 | def task_cyber_assets_fingerprinting(self, agent: Agent, target: str) -> Task: 61 | logger.info("初始化任务 网络资产指纹识别") 62 | return Task( 63 | agent=agent, 64 | description=dedent( 65 | f""" 66 | 使用多种指纹识别工具对目标进行指纹识别,为漏洞扫描和利用提供基础信息: 67 | - 探明暴露的TCP/UDP端口的服务, 例如探明某个端口是SSH服务还是HTTP服务,以及服务的版本信息。 68 | - 探明Web应用程序的类型和版本信息,例如探明某个Web应用程序是WordPress还是Joomla,以及Web应用程序的版本信息。 69 | 70 | 目标: `{target}` 71 | """), 72 | expected_output=dedent( 73 | """ 74 | 最终答案是本次探测结果数量,具体的结果已存储在数据库中。不要编造其他额外内容。 75 | """), 76 | ) 77 | 78 | def _getFingerPrintingTools(self, task_id: int, target) -> []: 79 | tools = [] 80 | if validators.url(target) or is_domain(target): 81 | tools.append(HostCrawlerTool(self.db, task_id)) 82 | return tools 83 | 84 | def _fingerPrintingCrew(self, task_id: int, target: str): 85 | 86 | agents = [] 87 | tasks = [] 88 | tools = self._getFingerPrintingTools(task_id, target) 89 | 90 | if len(tools) > 0: 91 | ag = self.agent_cyber_assets_fingerprinting_expert(self.llm, tools) 92 | agents.append(ag) 93 | 94 | tasks.append(self.task_cyber_assets_fingerprinting(ag, target)) 95 | 96 | if len(agents) == 0 or len(tasks) == 0: 97 | raise Exception("无可用工具") 98 | 99 | logger.info("初始化智能体 指纹识别") 100 | return Crew( 101 | agents=agents, 102 | tasks=tasks, 103 | verbose=self.verbose, 104 | share_crew=False 105 | ) 106 | 107 | def fingerprintingCrew(self, task_id: int, target: str): 108 | if validators.url(target): 109 | # url 110 | logger.info("url目标 {}", target) 111 | elif is_domain(target): 112 | # domain 113 | logger.info("domain目标 {}", target) 114 | else: 115 | try: 116 | ip_address(target) 117 | logger.info("IP目标 {}", target) 118 | except ValueError: 119 | raise ValueError("目标类型不支持") 120 | return self._fingerPrintingCrew(task_id, target) 121 | 122 | def do_directory_bruteforcing(self, task_id: int): 123 | 124 | datas = [] 125 | with self.db.DBSession() as session: 126 | subquery = select(UrlEnum.web_info_id).where(UrlEnum.task_id == task_id).subquery() 127 | infos = session.query(WebInfo).filter( 128 | and_( 129 | WebInfo.task_id == task_id), 130 | WebInfo.id.notin_(subquery) 131 | ).all() 132 | for info in infos: 133 | datas.append((info.id, info.url)) 134 | 135 | for id, url in datas: 136 | gobuster = GobusterDirTool(self.db, task_id, id, self.gobuster_path, self.gobuster_wordlist_path) 137 | gobuster.run(url=url) 138 | 139 | def _getVulScanTools(self, task_id: int) -> []: 140 | tools = [] 141 | if self.nuclei_path and self.nuclei_templates_path: 142 | tools.append( 143 | NucleiScannerTool(self.db, task_id, nuclei_path=self.nuclei_path, 144 | templates_path=self.nuclei_templates_path)) 145 | return tools 146 | 147 | def agent_vulnerability_scanning_expert(self, llm=None, tools: [BaseTool] = []) -> Agent: 148 | logger.info("初始化代理 漏扫专家") 149 | agent = Agent( 150 | role='漏洞扫描专家', 151 | goal='对网络资产进行漏洞扫描,以发现潜在的漏洞和弱点', 152 | backstory=dedent( 153 | """ 154 | 你是一名经验丰富的漏洞扫描专家,隶属于攻击团队。 155 | 你的主要任务是利用各种工具对目标进行漏洞扫描,以发现漏洞利用点、关键信息的泄漏、以及其他对渗透有用的各种信息。 156 | 你的工作至关重要,你的工作成果将直接影响后续工作的开展。 157 | """), 158 | tools=tools, 159 | verbose=self.verbose, 160 | allow_delegation=True, 161 | max_rpm=300, 162 | # max_iter=1, 163 | llm=llm, 164 | cache=False, 165 | ) 166 | 167 | if llm is not None: 168 | agent.llm = llm 169 | return agent 170 | 171 | def task_vulnerability_scanning_expert(self, agent: Agent, target: str) -> Task: 172 | logger.info("初始化任务 漏洞扫描") 173 | return Task( 174 | agent=agent, 175 | description=dedent( 176 | f""" 177 | 对目标进行全面的漏洞扫描,发现尽可能多的安全漏洞。 178 | 179 | 目标: `{target}` 180 | """), 181 | expected_output=dedent( 182 | """ 183 | 最终答案是本次探测结果数量,具体的结果已存储在数据库中。不要编造其他额外内容。 184 | """), 185 | ) 186 | 187 | def vulScanCrew(self, task_id: int, target: str): 188 | if validators.url(target): 189 | # url 190 | logger.info("url目标 {}", target) 191 | elif is_domain(target): 192 | # domain 193 | logger.info("domain目标 {}", target) 194 | else: 195 | try: 196 | ip_address(target) 197 | logger.info("IP目标 {}", target) 198 | except ValueError: 199 | raise ValueError("目标类型不支持") 200 | 201 | agents = [] 202 | tasks = [] 203 | tools = self._getVulScanTools(task_id) 204 | 205 | if len(tools) > 0: 206 | ag = self.agent_vulnerability_scanning_expert(self.llm, tools) 207 | agents.append(ag) 208 | 209 | tasks.append(self.task_vulnerability_scanning_expert(ag, target)) 210 | 211 | if len(agents) == 0 or len(tasks) == 0: 212 | raise Exception("无可用工具") 213 | 214 | logger.info("初始化智能体 漏洞扫描") 215 | return Crew( 216 | agents=agents, 217 | tasks=tasks, 218 | verbose=self.verbose, 219 | share_crew=False 220 | ) 221 | -------------------------------------------------------------------------------- /exploits/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/exploits/web/__init__.py -------------------------------------------------------------------------------- /exploits/web/curl_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Type, Any 2 | 3 | import httpx 4 | import validators 5 | from crewai_tools import BaseTool 6 | from pydantic.v1 import BaseModel, Field 7 | 8 | from config import logger 9 | 10 | 11 | class CurlToolSchema(BaseModel): 12 | """CurlToolSchema 的查询参数""" 13 | url: str = Field(..., description="url地址") 14 | method: str = Field(..., description="请求方法,如 'GET' 或 'POST'") 15 | headers: dict[str, str] = Field(description="字典形式的 HTTP 头部") 16 | data: dict[str, str] = Field(description="用于 POST 请求的表单数据") 17 | json_data: Any = Field(description="用于 POST 请求的 JSON 数据", alias='json') 18 | params: str = Field(description="用于 GET 请求的查询参数") 19 | 20 | 21 | class CurlTool(BaseTool): 22 | name: str = "Curl" 23 | description: str = "送HTTP请求并返回原始HTTP响应格式的字符串" 24 | args_schema: Type[BaseModel] = CurlToolSchema 25 | 26 | def __init__(self): 27 | super().__init__() 28 | logger.info("初始化工具 Curl") 29 | 30 | def _run( 31 | self, 32 | **kwargs: Any, 33 | ) -> Any: 34 | url = kwargs.pop('url', "") 35 | if url == "": 36 | return "url为空" 37 | 38 | if validators.url(url) is False: 39 | return "url地址不合法" 40 | 41 | method = kwargs.pop('method', "GET") 42 | headers = kwargs.pop('headers', None) 43 | data = kwargs.pop('data', None) 44 | json = kwargs.pop('json', None) 45 | params = kwargs.pop('params', None) 46 | try: 47 | logger.info("Curl: {}", url) 48 | with httpx.Client(verify=False) as client: 49 | response = client.request( 50 | method=method, 51 | url=url, 52 | headers=headers, 53 | data=data, 54 | json=json, 55 | params=params 56 | ) 57 | status_line = f"{response.http_version} {response.status_code} {response.reason_phrase}\r\n" 58 | 59 | # 头部 60 | headers = "" 61 | for name, value in response.headers.items(): 62 | headers += f"{name}: {value}\r\n" 63 | 64 | # 确保响应体以新行结束 65 | body = response.text 66 | if not body.endswith('\r\n'): 67 | body += '\r\n' 68 | 69 | # 组装完整的响应字符串 70 | return f"{status_line}{headers}\r\n{body}" 71 | 72 | except Exception as e: 73 | logger.error("获取失败: {}", e) 74 | return f"请求失败: {e}" 75 | -------------------------------------------------------------------------------- /exploits/web/gobuster_dir_tool.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import Type, Any 3 | from urllib.parse import urljoin 4 | 5 | from crewai_tools import BaseTool 6 | from pydantic.v1 import BaseModel, Field 7 | from sqlalchemy import exc 8 | from datetime import datetime 9 | 10 | from helpers import fingers 11 | from helpers.crawler import crawl_host 12 | from helpers.gobuster import Gobuster 13 | from config import logger 14 | from persistence.database import DB 15 | from persistence.orm import DuplicateException, UrlEnum 16 | 17 | 18 | class GobusterDirToolSchema(BaseModel): 19 | """GobusterDirToolSchema 的查询参数""" 20 | url: str = Field(..., description="host或url地址, 一个host或者完整的url地址") 21 | 22 | 23 | class GobusterDirTool(BaseTool): 24 | name: str = "Gobuster" 25 | description: str = "url路径枚举工具,用于发现隐藏或未公开的url路径" 26 | args_schema: Type[BaseModel] = GobusterDirToolSchema 27 | db: DB | None = None 28 | task_id: int | None = None 29 | webinfo_id: int | None = None 30 | gobuster_path: str | None = None 31 | wordlist_path: str | None = None 32 | 33 | class Config: 34 | arbitrary_types_allowed = True 35 | 36 | def __init__(self, db: DB, task_id: int, webinfo_id: int, gobuster_path: str = None, wordlist_path: str = None): 37 | super().__init__() 38 | self.db = db 39 | self.task_id = task_id 40 | self.webinfo_id = webinfo_id 41 | self.gobuster_path = gobuster_path 42 | self.wordlist_path = wordlist_path 43 | logger.info("初始化工具 Gobuster") 44 | 45 | def _run( 46 | self, 47 | **kwargs: Any, 48 | ) -> Any: 49 | url = kwargs.pop('url', "") 50 | if url == "": 51 | return "url为空" 52 | 53 | try: 54 | logger.info("Gobuster: {}", url) 55 | gobuster = Gobuster(wordlist=self.wordlist_path, gobusterAbsPath=self.gobuster_path) 56 | result = gobuster.dir(url) 57 | # 添加一个随机path 58 | result.append(str(uuid.uuid4())) 59 | if len(result) == 0: 60 | return "未找到任何结果" 61 | 62 | now = datetime.now() 63 | with self.db.DBSession() as session: 64 | for r in result: 65 | htmls = crawl_host(urljoin(url, r)) 66 | if len(htmls) == 0: 67 | continue 68 | for html in htmls: 69 | matched = fingers.Match(html.headers, html.body, [i.mmh3hash for i in html.favicons], 70 | [i.md5hash for i in html.favicons]) 71 | urlenumdb = UrlEnum() 72 | urlenumdb.task_id = self.task_id 73 | urlenumdb.web_info_id = self.webinfo_id 74 | urlenumdb.host = html.host 75 | urlenumdb.schema = html.schema 76 | urlenumdb.url = html.url 77 | urlenumdb.path = r 78 | urlenumdb.current_redirects = html.current_redirects 79 | urlenumdb.redirect_to = html.redirect_to 80 | urlenumdb.title = html.title 81 | urlenumdb.status = html.status 82 | urlenumdb.headers = html.headers 83 | urlenumdb.favicons = [favicon.to_dict() for favicon in html.favicons] 84 | urlenumdb.body = html.body 85 | urlenumdb.created = now 86 | urlenumdb.finger_prints = [match.to_dict() for match in matched] 87 | urlenumdb.source = self.name 88 | try: 89 | session.add(urlenumdb) 90 | session.commit() 91 | except DuplicateException as e: 92 | session.rollback() 93 | except Exception as e: 94 | raise 95 | 96 | return f"共发现{len(result)}个目录" 97 | except exc.SQLAlchemyError as e: 98 | logger.error("数据库错误: {}", e) 99 | return "数据库错误" 100 | except Exception as e: 101 | logger.error("获取失败: {}", e) 102 | return f"获取失败: {e}" 103 | -------------------------------------------------------------------------------- /exploits/web/host_crawler_tool.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Type, Any 3 | from urllib.parse import urlparse, urlunparse 4 | 5 | import validators 6 | from crewai_tools import BaseTool 7 | from pydantic.v1 import BaseModel, Field 8 | from sqlalchemy import exc 9 | 10 | from helpers import fingers 11 | from helpers.crawler import crawl_host 12 | from helpers.utils import is_domain 13 | from persistence.database import DB 14 | from config import logger 15 | from persistence.orm import WebInfo, Cdn, DuplicateException 16 | 17 | 18 | class HostCrawlerToolSchema(BaseModel): 19 | """HostCrawlerToolSchema 的查询参数""" 20 | host: str = Field(..., 21 | description="host或url地址, 一个host或者完整的url地址") 22 | 23 | 24 | class HostCrawlerTool(BaseTool): 25 | name: str = "HostCrawler" 26 | description: str = "根据host或url,获取网页的图标、标题、响应头、HTML正文等信息,同时根据指纹判断其应用信息。" 27 | args_schema: Type[BaseModel] = HostCrawlerToolSchema 28 | db: DB | None = None 29 | task_id: int | None = None 30 | 31 | class Config: 32 | arbitrary_types_allowed = True 33 | 34 | def __init__(self, db: DB, task_id: int): 35 | super().__init__() 36 | self.db = db 37 | self.task_id = task_id 38 | logger.info("初始化工具 HostCrawler") 39 | 40 | def _run( 41 | self, 42 | **kwargs: Any, 43 | ) -> Any: 44 | host = kwargs.pop('host', "") 45 | if host == "": 46 | return "host为空" 47 | 48 | if is_domain(host) is True: 49 | url_parsed = urlparse("http://" + host) 50 | host = urlunparse((url_parsed.scheme, url_parsed.netloc, url_parsed.path, '', '', '')) 51 | 52 | if validators.url(host) is False: 53 | return "host地址不合法" 54 | try: 55 | logger.info("HostCrawler: {}", host) 56 | now = datetime.now() 57 | htmls = crawl_host(host) 58 | if len(htmls) == 0: 59 | return "获取失败" 60 | try: 61 | with self.db.DBSession() as session: 62 | for html in htmls: 63 | matched = fingers.Match(html.headers, html.body, [i.mmh3hash for i in html.favicons], 64 | [i.md5hash for i in html.favicons]) 65 | webinfodb = WebInfo() 66 | webinfodb.target = host 67 | webinfodb.task_id = self.task_id 68 | webinfodb.host = html.host 69 | webinfodb.schema = html.schema 70 | webinfodb.url = html.url 71 | webinfodb.current_redirects = html.current_redirects 72 | webinfodb.redirect_to = html.redirect_to 73 | webinfodb.ip = html.ip 74 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(html.ip)).first() 75 | if ipcdn is not None: 76 | webinfodb.ip_cdn = ipcdn.organization 77 | webinfodb.port = html.port 78 | webinfodb.title = html.title 79 | webinfodb.status = html.status 80 | webinfodb.headers = html.headers 81 | webinfodb.favicons = [favicon.to_dict() for favicon in html.favicons] 82 | webinfodb.body = html.body 83 | webinfodb.certs = html.certs 84 | webinfodb.created = now 85 | webinfodb.finger_prints = [match.to_dict() for match in matched] 86 | webinfodb.source = self.name 87 | 88 | try: 89 | session.add(webinfodb) 90 | session.commit() 91 | except DuplicateException as e: 92 | session.rollback() 93 | except Exception as e: 94 | raise 95 | except exc.SQLAlchemyError as e: 96 | logger.error("数据库错误: {}", e) 97 | return "数据库错误" 98 | except Exception as e: 99 | logger.error("其他错误: {}", e) 100 | return f"其他错误: {e}" 101 | return f"共发现{len(htmls)}个特征" 102 | except Exception as e: 103 | logger.error("获取失败: {}", e) 104 | return f"获取失败: {e}" 105 | -------------------------------------------------------------------------------- /exploits/web/html_parse_info_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Type, Any 2 | 3 | from crewai_tools import BaseTool 4 | from pydantic.v1 import BaseModel, Field 5 | from config import logger 6 | from helpers.html_information_leak_analyze import analyze 7 | 8 | 9 | class HtmlInformationLeakAnalyzeToolSchema(BaseModel): 10 | """HtmlInformationLeakAnalyzeToolSchema 的查询参数""" 11 | html: str = Field(..., description="html源代码") 12 | 13 | 14 | class HtmlInformationLeakAnalyzeTool(BaseTool): 15 | name: str = "HtmlInformationLeakAnalyze" 16 | description: str = "从html源代码中得到潜在的敏感信息" 17 | args_schema: Type[BaseModel] = HtmlInformationLeakAnalyzeToolSchema 18 | 19 | def __init__(self, ): 20 | super().__init__() 21 | logger.info("初始化工具 HtmlInformationLeakAnalyze") 22 | 23 | def _run( 24 | self, 25 | **kwargs: Any, 26 | ) -> Any: 27 | html = kwargs.pop('html', "") 28 | if html == "": 29 | return "html为空" 30 | datas = analyze(html) 31 | 32 | if datas == "": 33 | return "未找到任何信息" 34 | 35 | return datas 36 | -------------------------------------------------------------------------------- /helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/helpers/__init__.py -------------------------------------------------------------------------------- /helpers/alienvault_api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | import requests 4 | from pydantic.v1 import BaseModel, Field 5 | from tld import get_tld 6 | 7 | from config import logger 8 | 9 | 10 | class SubDomain(BaseModel): 11 | apex_domain: Optional[str] = Field(description="顶级域名") 12 | sub_domain: Optional[str] = Field(description="子域名") 13 | hostname: Optional[str] = Field(description="域名") 14 | 15 | a: List[str] = Field(description="A记录") 16 | cname: List[str] = Field(description="CNAME记录") 17 | aaaa: List[str] = Field(description="AAAA记录") 18 | mx: List[str] = Field(description="MX记录") 19 | ns: List[str] = Field(description="NS记录") 20 | soa: List[str] = Field(description="SOA记录") 21 | txt: List[str] = Field(description="TXT记录") 22 | 23 | 24 | class AlienVaultResult(BaseModel): 25 | apex_domain: Optional[str] = Field(description="顶级域名") 26 | sub_domains: dict[str, SubDomain] = Field(description="子域名") 27 | 28 | 29 | class AlienVaultApi: 30 | def __init__(self, base_url: str = "https://otx.alienvault.com/api/v1/indicators", proxies: dict = None, 31 | timeout: int = 15): 32 | self.base_url = base_url 33 | self.timeout = timeout 34 | self._session = requests.Session() 35 | if proxies: 36 | self._session.proxies.update(proxies) 37 | self._session.trust_env = False 38 | 39 | def _search(self, stype, value) -> List[AlienVaultResult]: 40 | url = f"{self.base_url}/{stype}/{value}/passive_dns" 41 | logger.debug("AlienVault: {upath}", upath=url) 42 | headers = { 43 | "Content-Type": "application/json" 44 | } 45 | response = self._session.request("GET", url, headers=headers, timeout=self.timeout) 46 | response.raise_for_status() 47 | res = response.json() 48 | 49 | domains = {} 50 | if "passive_dns" in res: 51 | for item in res["passive_dns"]: 52 | 53 | domain_obj = get_tld(item["hostname"], fail_silently=True, as_object=True, fix_protocol=True) 54 | apex_domain = domain_obj.fld 55 | sub_domain = domain_obj.subdomain 56 | val = item["address"] 57 | if item["record_type"] == "CNAME": 58 | val = item["address"] 59 | elif item["record_type"] == "SOA" or item["record_type"] == "NS": 60 | val = item["address"] 61 | 62 | if apex_domain in domains: 63 | domain = domains[apex_domain] 64 | else: 65 | domain = AlienVaultResult( 66 | apex_domain=apex_domain, 67 | sub_domains={} 68 | ) 69 | domains[apex_domain] = domain 70 | 71 | if sub_domain in domain.sub_domains: 72 | record = domain.sub_domains[sub_domain] 73 | else: 74 | record = SubDomain( 75 | apex_domain=apex_domain, 76 | sub_domain=sub_domain, 77 | hostname=item["hostname"], 78 | a=[], 79 | cname=[], 80 | aaaa=[], 81 | mx=[], 82 | ns=[], 83 | soa=[], 84 | txt=[] 85 | ) 86 | domain.sub_domains[sub_domain] = record 87 | 88 | if item["record_type"] == "A": 89 | record.a.append(val) 90 | elif item["record_type"] == "AAAA": 91 | record.aaaa.append(val) 92 | elif item["record_type"] == "SOA": 93 | record.soa.append(val) 94 | elif item["record_type"] == "NS": 95 | record.ns.append(val) 96 | elif item["record_type"] == "TXT": 97 | record.txt.append(val) 98 | elif item["record_type"] == "SOA": 99 | record.soa.append(val) 100 | elif item["record_type"] == "MX": 101 | record.mx.append(val) 102 | elif item["record_type"] == "CNAME": 103 | record.cname.append(val) 104 | 105 | return list(domains.values()) 106 | 107 | def search_domain(self, domain: str) -> List[AlienVaultResult]: 108 | return self._search("domain", domain) 109 | 110 | def search_ipv4(self, ip: str) -> List[AlienVaultResult]: 111 | return self._search("IPv4", ip) 112 | 113 | def search_ipv6(self, ip: str) -> List[AlienVaultResult]: 114 | return self._search("IPv6", ip) 115 | -------------------------------------------------------------------------------- /helpers/crawler.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import hashlib 3 | from urllib.parse import urlparse, urlunparse, urljoin 4 | 5 | import httpx 6 | import mmh3 7 | from bs4 import BeautifulSoup 8 | from config import logger 9 | 10 | AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36' 11 | 12 | 13 | class Favicon: 14 | def __init__(self, b64data: str, md5hash: str, mmh3hash: int): 15 | self.b64data = b64data 16 | self.md5hash = md5hash 17 | self.mmh3hash = mmh3hash 18 | 19 | def __eq__(self, other): 20 | return self.md5hash == other.md5hash 21 | 22 | def __hash__(self): 23 | return hash(self.md5hash) 24 | 25 | def to_dict(self): 26 | return { 27 | 'b64data': self.b64data, 28 | 'md5hash': self.md5hash, 29 | 'mmh3hash': self.mmh3hash 30 | } 31 | 32 | def __repr__(self) -> str: 33 | return f"Favicon(md5hash={self.md5hash!r}, mmh3hash={self.mmh3hash!r})" 34 | 35 | 36 | class HttpHtml: 37 | def __init__(self, 38 | host: str, 39 | url: str, 40 | schema: str, 41 | title: str, 42 | headers: dict, 43 | status: int, 44 | body: str, 45 | current_redirects: int = 0, 46 | redirect_to: str = None, 47 | favicons: [Favicon] = None, 48 | ip=None, 49 | port=None, 50 | certs=None): 51 | self.host = host 52 | self.url = url 53 | self.schema = schema 54 | self.current_redirects = current_redirects 55 | self.redirect_to = redirect_to 56 | self.favicons = favicons 57 | self.title = title 58 | self.headers = headers 59 | self.status = status 60 | self.body = body 61 | self.ip = ip 62 | self.port = port 63 | self.certs = certs 64 | 65 | def __repr__(self) -> str: 66 | return f"HttpHtml(host={self.host!r}, favicons={self.favicons!r}, headers={self.headers!r}, title={self.title!r})" 67 | 68 | 69 | def _get_favicon_hash(url: str) -> Favicon | None: 70 | try: 71 | with httpx.Client(headers={'User-Agent': AGENT}, timeout=5, verify=False) as client: 72 | res = client.get(url) 73 | if res.status_code != 200: 74 | return None 75 | if len(res.content) == 0: 76 | return None 77 | b64data = base64.encodebytes(res.content) 78 | md5data = hashlib.md5(res.content).hexdigest() 79 | mmh3hash = mmh3.hash(b64data) 80 | 81 | favicon = Favicon(b64data.decode("utf-8"), md5data, mmh3hash) 82 | 83 | return favicon 84 | except httpx.RequestError as e: 85 | return None 86 | 87 | 88 | LINK_RELS = [ 89 | 'icon', 90 | 'shortcut icon', 91 | 'apple-touch-icon', 92 | 'apple-touch-icon-precomposed', 93 | ] 94 | 95 | META_NAMES = ['msapplication-TileImage', 'og:image'] 96 | 97 | 98 | def _get_favicons_urls(host: str, body: str) -> [str]: 99 | soup = BeautifulSoup(body, features='html.parser') 100 | link_tags = set() 101 | for rel in LINK_RELS: 102 | for link_tag in soup.find_all( 103 | 'link', attrs={'rel': lambda r: r and r.lower() == rel, 'href': True} 104 | ): 105 | link_tags.add(link_tag) 106 | 107 | meta_tags = set() 108 | for meta_tag in soup.find_all('meta', attrs={'content': True}): 109 | meta_type = meta_tag.get('name') or meta_tag.get('property') or '' 110 | meta_type = meta_type.lower() 111 | for name in META_NAMES: 112 | if meta_type == name.lower(): 113 | meta_tags.add(meta_tag) 114 | 115 | urls = set() 116 | hu = urlparse(host) 117 | urls.add(urlunparse((hu.scheme, hu.netloc, 'favicon.ico', '', '', ''))) 118 | 119 | for tag in link_tags | meta_tags: 120 | href = tag.get('href', '') or tag.get('content', '') 121 | href = href.strip() 122 | 123 | if not href or href.startswith('data:image/'): 124 | continue 125 | 126 | if bool(urlparse(href).netloc): 127 | url_parsed = href 128 | else: 129 | url_parsed = urljoin(host, href) 130 | 131 | # repair '//cdn.network.com/favicon.png' or `icon.png?v2` 132 | scheme = urlparse(host).scheme 133 | url_parsed = urlparse(url_parsed, scheme=scheme) 134 | urls.add(urlunparse((url_parsed.scheme, url_parsed.netloc, url_parsed.path, '', '', ''))) 135 | 136 | return list(urls) 137 | 138 | 139 | def _get_favicons_from_urls(urls: [str]) -> [Favicon]: 140 | favicons = set() 141 | if urls is None or len(urls) == 0: 142 | return favicons 143 | for url in urls: 144 | logger.debug("crawl_host: favicon {}", url) 145 | favicon = _get_favicon_hash(url) 146 | if favicon is not None: 147 | favicons.add(favicon) 148 | return list(favicons) 149 | 150 | 151 | def crawl_host(host: str) -> [HttpHtml]: 152 | """ 153 | 从host获取html 154 | :param host: 要抓取的主机地址 155 | :return: 包含HttpHtml对象的列表,每个对象都是从一次HTTP请求中获得的HTML和相关信息 156 | """ 157 | logger.debug("crawl_host: {}", host) 158 | MAX_REDIRECTS = 3 # 定义最大重定向次数 159 | 160 | def fetch_url(url, current_redirects=0) -> [HttpHtml]: 161 | """ 162 | 辅助函数,用于处理单个URL的获取和重定向 163 | :param url: 请求的URL 164 | :param current_redirects: 当前重定向次数 165 | :return: 收集的Html对象列表 166 | """ 167 | if current_redirects > MAX_REDIRECTS: 168 | logger.debug("crawl_host: Reached max redirects: {}", MAX_REDIRECTS) 169 | return [] 170 | 171 | with httpx.Client(headers={'User-Agent': AGENT}, timeout=5, verify=False) as client: 172 | res = client.get(url) 173 | socket = res.stream._stream._httpcore_stream._stream._connection._network_stream._sock 174 | try: 175 | server_ip, server_port = socket.getpeername() 176 | except OSError: 177 | server_ip, server_port = None, None 178 | 179 | certs = None 180 | if hasattr(socket, '_sslobj') and socket._sslobj: 181 | certs = [c.get_info() for c in socket._sslobj.get_unverified_chain()] 182 | 183 | title = '' 184 | if res.content: 185 | soup = BeautifulSoup(res.content, features='html.parser') 186 | if soup.title: 187 | title = str(soup.title.string) 188 | htmlobj = HttpHtml( 189 | host=res.url.host, 190 | url=str(res.url), 191 | schema=res.url.scheme, 192 | title=title, 193 | headers=dict(res.headers), 194 | status=res.status_code, 195 | body=res.text, 196 | ip=server_ip, 197 | port=server_port, 198 | certs=certs 199 | ) 200 | 201 | favicons_url = _get_favicons_urls(htmlobj.url, htmlobj.body) 202 | htmlobj.favicons = _get_favicons_from_urls(favicons_url) 203 | htmlobj.current_redirects = current_redirects 204 | htmls = [htmlobj] 205 | 206 | # 处理重定向 207 | if res.is_redirect: 208 | new_url = httpx.URL(res.headers['Location']) 209 | if new_url.is_relative_url: 210 | new_url = res.url.join(new_url) 211 | htmlobj.redirect_to = str(new_url) 212 | logger.debug("crawl_host: Redirecting to: {}", htmlobj.redirect_to) 213 | htmls += fetch_url(htmlobj.redirect_to, current_redirects + 1) 214 | 215 | return htmls 216 | 217 | return fetch_url(host) 218 | -------------------------------------------------------------------------------- /helpers/fingers/__init__.py: -------------------------------------------------------------------------------- 1 | from helpers.fingers.web_fingers import WebFingers, MatchItem 2 | 3 | _webFingersObj: WebFingers = WebFingers() 4 | 5 | 6 | def Match(headers: dict = None, body: str = None, favicon_int: int | str | list[int] | list[str] = None, 7 | favicon_md5: int | str | list[int] | list[str] = None) -> \ 8 | [MatchItem]: 9 | return _webFingersObj.match(headers, body, favicon_int, favicon_md5) 10 | -------------------------------------------------------------------------------- /helpers/fingers/assets/custom.json: -------------------------------------------------------------------------------- 1 | { 2 | "fingerprint": [ 3 | { 4 | "cms": "thinkadmin报错页面", 5 | "method": "keyword", 6 | "location": "body", 7 | "keyword": [ 8 | "ADMIN_MODULE", 9 | "十年磨一剑-为API开发设计的高性能框架", 10 | "http://www.thinkphp.cn", 11 | "Environment Variables", 12 | "ThinkPHP" 13 | ] 14 | } 15 | ] 16 | } 17 | -------------------------------------------------------------------------------- /helpers/fingers/web_fingers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from enum import Enum 4 | from os import path 5 | 6 | 7 | class MatchType(Enum): 8 | HEADER = "header" 9 | BODY = "body" 10 | FAVICON = "favicon" 11 | 12 | 13 | class MatchItem: 14 | def __init__(self, name: str, mtype: MatchType, matches: dict[str, str]): 15 | self.name = name 16 | self.match_type = mtype 17 | self.matches = matches 18 | 19 | def to_dict(self): 20 | return { 21 | 'name': self.name, 22 | 'type': self.match_type.value, 23 | 'matches': self.matches 24 | } 25 | 26 | def __repr__(self): 27 | return f"MatchItem(name={self.name!r}, type={self.match_type!r}, matches={self.matches!r})" 28 | 29 | 30 | class FingerPrint: 31 | def __init__(self, 32 | name: str, 33 | headers=None, 34 | body=None, 35 | favicons=None 36 | ): 37 | self.name = name 38 | self.headers = headers 39 | self.body = body 40 | self.favicons = favicons 41 | 42 | def _match_favicon(self, favicon: str = None) -> MatchItem | None: 43 | if favicon is None: 44 | return None 45 | if self.favicons is None: 46 | return None 47 | for sv in self.favicons: 48 | if sv == favicon: 49 | return MatchItem(self.name, MatchType.FAVICON, {favicon: sv}) 50 | return None 51 | 52 | def _match_body(self, body: str = None) -> MatchItem | None: 53 | if body is None: 54 | return None 55 | if self.body is None: 56 | return None 57 | 58 | matches = {} 59 | for sv in self.body: 60 | if sv in body: 61 | matches[sv] = "" 62 | 63 | if len(matches) == len(self.body): 64 | return MatchItem(self.name, MatchType.BODY, matches) 65 | return None 66 | 67 | def _match_headers(self, headers: dict = None) -> MatchItem | None: 68 | if headers is None: 69 | return None 70 | 71 | if self.headers is None: 72 | return None 73 | 74 | if type(self.headers) is dict: 75 | matches = {} 76 | for key, value in self.headers.items(): 77 | if key in headers: 78 | if value in headers[key]: 79 | matches[key] = f"{value} {headers[key]}" 80 | if len(matches) == len(self.headers): 81 | return MatchItem(self.name, MatchType.HEADER, matches) 82 | 83 | elif type(self.headers) is list: 84 | items = headers.items() 85 | matches = {} 86 | for sv in self.headers: 87 | for index, tv in enumerate([i[1] for i in items]): 88 | if sv in tv: 89 | matches[list(headers)[index]] = f"{sv} {tv}" 90 | if len(matches) >= len(self.headers): 91 | return MatchItem(self.name, MatchType.HEADER, matches) 92 | 93 | return None 94 | 95 | def match(self, headers: dict = None, body: str = None, favicon_int: int | str | list[int] | list[str] = None, 96 | favicon_md5: int | str | list[int] | list[str] = None) -> MatchItem | None: 97 | matched = self._match_headers(headers) 98 | if matched is not None: 99 | return matched 100 | 101 | matched = self._match_body(body) 102 | if matched is not None: 103 | return matched 104 | 105 | favs = [] 106 | if type(favicon_int) is str or type(favicon_int) is int: 107 | favs.append(str(favicon_int)) 108 | elif type(favicon_int) is list: 109 | for fav in favicon_int: 110 | favs.append(str(fav)) 111 | 112 | if type(favicon_md5) is str or type(favicon_md5) is int: 113 | favs.append(str(favicon_md5)) 114 | elif type(favicon_md5) is list: 115 | for fav in favicon_md5: 116 | favs.append(str(fav)) 117 | 118 | for fav in favs: 119 | matched = self._match_favicon(fav) 120 | if matched is not None: 121 | return matched 122 | 123 | return None 124 | 125 | def __repr__(self): 126 | return f"FingerPrint(name={self.name!r}, headers={self.headers!r}, body={self.body!r}, favicons={self.favicons!r})" 127 | 128 | 129 | class WebFingers: 130 | fingers: [FingerPrint] = [] 131 | 132 | def __init__(self, base_path=os.path.dirname(__file__)): 133 | self.base_path = base_path 134 | self.fingers = self._parse_arl_fingers() + self._parse_web_fingerprint() 135 | 136 | def match(self, headers: dict = None, body: str = None, favicon_int: int | str | list[int] | list[str] = None, 137 | favicon_md5: int | str | list[int] | list[str] = None) -> \ 138 | [MatchItem]: 139 | matched = [] 140 | for finger in self.fingers: 141 | matechitem = finger.match(headers, body, favicon_int, favicon_md5) 142 | if matechitem is not None: 143 | matched.append(matechitem) 144 | return matched 145 | 146 | def _parse_web_fingerprint(self) -> [FingerPrint]: 147 | fingers = [] 148 | # https://github.com/0x727/FingerprintHub/ 149 | with open(path.join(self.base_path, 'assets/web_fingerprint_v3.json'), encoding='utf-8') as f: 150 | data = json.load(f) 151 | for item in data: 152 | if item['request_data'] != "": 153 | continue 154 | elif item['request_method'] != "get": 155 | continue 156 | elif item['path'] != "/": 157 | continue 158 | 159 | finger = FingerPrint(name=item['name']) 160 | if len(item['headers']) > 0: 161 | finger.headers = item['headers'] 162 | if len(item['keyword']) > 0: 163 | finger.body = item['keyword'] 164 | if len(item['favicon_hash']) > 0: 165 | finger.favicons = item['favicon_hash'] 166 | 167 | fingers.append(finger) 168 | 169 | return fingers 170 | 171 | def _parse_arl_fingers(self) -> [FingerPrint]: 172 | # https://github.com/loecho-sec/ARL-Finger-ADD 173 | datas = [] 174 | with open(path.join(self.base_path, 'assets/arl_finger.json'), encoding='utf-8') as f: 175 | data = json.load(f) 176 | datas += data['fingerprint'] 177 | # https://github.com/EASY233/Finger/ 178 | with open(path.join(self.base_path, 'assets/finger.json'), encoding='utf-8') as f: 179 | data = json.load(f) 180 | datas += data['fingerprint'] 181 | with open(path.join(self.base_path, 'assets/custom.json'), encoding='utf-8') as f: 182 | data = json.load(f) 183 | datas += data['fingerprint'] 184 | 185 | fingers = [] 186 | for item in datas: 187 | finger = FingerPrint(name=item['cms']) 188 | if item['method'] == "keyword": 189 | if item['location'] == "header": 190 | finger.headers = item['keyword'] 191 | else: 192 | finger.body = item['keyword'] 193 | elif item['method'] == "faviconhash": 194 | finger.favicons = item['keyword'] 195 | else: 196 | continue 197 | fingers.append(finger) 198 | 199 | return fingers 200 | -------------------------------------------------------------------------------- /helpers/fofa_api.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | from fofa.client import Client 4 | from pydantic.v1 import BaseModel, Field 5 | from config import logger 6 | 7 | 8 | class FofaResult(BaseModel): 9 | ip: Optional[str] = Field(ver='base', description="ip地址") 10 | port: Optional[str] = Field(ver='base', description="port") 11 | protocol: Optional[str] = Field(ver='base', description="协议名") 12 | country: Optional[str] = Field(ver='base', description="国家代码") 13 | country_name: Optional[str] = Field(ver='base', description="国家名") 14 | region: Optional[str] = Field(ver='base', description="区域") 15 | city: Optional[str] = Field(ver='base', description="城市") 16 | as_number: Optional[str] = Field(ver='base', description="asn编号") 17 | as_organization: Optional[str] = Field(ver='base', description="asn组织") 18 | host: Optional[str] = Field(ver='base', description="完全限定域名 (FQDN)") 19 | domain: Optional[str] = Field(ver='base', description="域名") 20 | os: Optional[str] = Field(ver='base', description="操作系统") 21 | server: Optional[str] = Field(ver='base', description="网站server") 22 | icp: Optional[str] = Field(ver='base', description="icp备案号") 23 | title: Optional[str] = Field(ver='base', description="网站标题") 24 | jarm: Optional[str] = Field(ver='base', description="jarm 指纹") 25 | header: Optional[str] = Field(ver='base', description="网站header") 26 | banner: Optional[str] = Field(ver='base', description="协议 banner") 27 | base_protocol: Optional[str] = Field(ver='base', description="基础协议,比如tcp/udp") 28 | link: Optional[str] = Field(ver='base', description="资产的URL链接") 29 | cert: Optional[str] = Field(ver='base', description="证书") 30 | certs_issuer_org: Optional[str] = Field(ver='base', description="证书颁发者组织") 31 | certs_issuer_cn: Optional[str] = Field(ver='base', description="证书颁发者通用名称") 32 | certs_subject_org: Optional[str] = Field(ver='base', description="证书持有者组织") 33 | certs_subject_cn: Optional[str] = Field(ver='base', description="证书持有者通用名称") 34 | tls_ja3s: Optional[str] = Field(ver='base', description="ja3s指纹信息") 35 | tls_version: Optional[str] = Field(ver='base', description="tls协议版本") 36 | product: Optional[str] = Field(ver='pro', description="专业版本及以上") 37 | product_category: Optional[str] = Field(ver='pro', description="产品分类") 38 | version: Optional[str] = Field(ver='pro', description="产品版本号") 39 | lastupdatetime: Optional[str] = Field(ver='pro', description="FOFA最后更新时间") 40 | cname: Optional[str] = Field(ver='pro', description="域名cname") 41 | icon_hash: Optional[str] = Field(ver='bus', description="返回的icon_hash值") 42 | certs_valid: Optional[str] = Field(ver='bus', description="证书是否有效") 43 | version: Optional[str] = Field(ver='bus', description="产品版本号") 44 | cname_domain: Optional[str] = Field(ver='bus', description="cname的域名") 45 | body: Optional[str] = Field(ver='bus', description="网站正文内容") 46 | icon: Optional[str] = Field(ver='ent', description="icon 图标") 47 | fid: Optional[str] = Field(ver='ent', description="fid") 48 | structinfo: Optional[str] = Field(ver='ent', description="结构化信息 (部分协议支持、比如elastic、mongodb)") 49 | 50 | @classmethod 51 | def GetFields(cls, ver='base'): 52 | vers = ['base'] 53 | if ver == 'pro': 54 | vers = ['base', 'pro'] 55 | elif ver == 'bus': 56 | vers = ['base', 'pro', 'bus'] 57 | elif ver == 'ent': 58 | vers = ['base', 'pro', 'bus', 'ent'] 59 | 60 | fields = [] 61 | for field in cls.__fields__.values(): 62 | if field.field_info.extra['ver'] in vers: 63 | fields.append(field.name) 64 | return fields 65 | 66 | @classmethod 67 | def LoadFromList(cls, datas: list, fields: list) -> '[FofaResult]': 68 | results = [] 69 | for data in datas: 70 | kv = {} 71 | for i, item in enumerate(data): 72 | kv[fields[i]] = item 73 | results.append(cls.parse_obj(kv)) 74 | return results 75 | 76 | 77 | def _assembly_query_str(fuzzy=True, **kwargs: Any) -> (str, str): 78 | for key, value in kwargs.items(): 79 | if key == "domain": 80 | if fuzzy: 81 | return f"domain=\"{value}\" || host=\"{value}\" || cname=\"{value}\" || cname_domain=\"{value}\"", value 82 | else: 83 | return f"domain=\"{value}\" || cname=\"{value}\" || cname_domain=\"{value}\"", value 84 | elif key == "app": 85 | return f"app=\"{value}\" || product=\"{value}\"", value 86 | else: 87 | return f"{key}=\"{value}\"", value 88 | 89 | 90 | class FofaApi: 91 | """ 92 | 使用FOFA API进行搜索 93 | """ 94 | 95 | def __init__(self, email: str, api_key: str, version: str = 'base'): 96 | self.client = Client(email=email, key=api_key) 97 | self.client._session.trust_env = False 98 | self.fields = FofaResult.GetFields(version) 99 | 100 | def search(self, fuzzy=False, max_size=500, page_size=200, **kwargs: Any) -> [FofaResult]: 101 | """ 102 | 搜索 103 | fuzzy: bool, 是否模糊搜索 104 | max_size: int, 最大返回数量 105 | page_size: int, 每页数量 106 | """ 107 | fields = ','.join(self.fields) 108 | query_str, val = _assembly_query_str(fuzzy=fuzzy, **kwargs) 109 | next = "" 110 | results = [] 111 | while True: 112 | logger.debug("FOFA: {query_str} {next}", query_str=query_str, next=next) 113 | r = self.client.search_next(query_str=query_str, size=page_size, fields=fields, full=True, next=next) 114 | data = r['results'] 115 | if fuzzy is False: 116 | for item in data: 117 | if val in ' '.join(item): 118 | results.append(item) 119 | else: 120 | results += data 121 | if len(results) >= max_size or len(data) <= 0 or r['next'] == "": 122 | break 123 | next = r['next'] 124 | return FofaResult.LoadFromList(results, self.fields) 125 | -------------------------------------------------------------------------------- /helpers/gobuster.py: -------------------------------------------------------------------------------- 1 | # https://github.com/OJ/gobuster 2 | import os 3 | import re 4 | import shutil 5 | import subprocess 6 | 7 | 8 | class GobusterNotFound(Exception): 9 | pass 10 | 11 | 12 | class WordlistNotFound(Exception): 13 | pass 14 | 15 | 16 | class IllegalArgumentException(Exception): 17 | pass 18 | 19 | 20 | class GobusterException(Exception): 21 | pass 22 | 23 | 24 | class Gobuster: 25 | 26 | def __init__(self, wordlist, threads=10, gobusterAbsPath=None): 27 | self.gobusterPath = gobusterAbsPath 28 | if self.gobusterPath is None: 29 | self.gobusterPath = shutil.which("gobuster") 30 | 31 | if self.gobusterPath is None or not os.path.exists(self.gobusterPath): 32 | raise GobusterNotFound("gobuster not found in path") 33 | 34 | self.wordlist = wordlist 35 | if not os.path.exists(self.wordlist): 36 | raise WordlistNotFound(f"Wordlist not found at {self.wordlist}") 37 | self.threads = threads 38 | self.gobusterPath = os.path.dirname(self.gobusterPath) 39 | self.verbose = bool() 40 | self.creationflags = subprocess.CREATE_NO_WINDOW if subprocess.sys.platform == 'win32' else 0 41 | 42 | # Allow changing the path where nuclei is installed (instead of expecting it to be in $PATH) 43 | # Check if the '/' is at the end - and remove it if "yes" 44 | if gobusterAbsPath is not None and gobusterAbsPath[-1] == "/": 45 | self.nucleiPath = gobusterAbsPath[:-1] 46 | 47 | self.gobusterBinary = "gobuster" 48 | if self.gobusterPath: 49 | self.gobusterBinary = os.path.join(self.gobusterPath, self.gobusterBinary) 50 | 51 | def dir(self, url, exclude_status_code=None, verbose=False) -> [str]: 52 | command = [ 53 | '--url', url, 54 | '--no-tls-validation', 55 | '--no-status', 56 | '--hide-length' 57 | ] 58 | 59 | if exclude_status_code is not None: 60 | command.extend(['--status-codes-blacklist', exclude_status_code]) 61 | 62 | report = self.exec('dir', command, bufsize=1, verbose=verbose) 63 | paths = set() 64 | for line in report.split('\n'): 65 | paths.add(line.strip()) 66 | return list(paths) 67 | 68 | def exec(self, mode, cmd: [], bufsize=-1, verbose=False) -> str: 69 | command = [self.gobusterBinary, mode, "--no-color", "--no-progress", "--quiet", 70 | "--threads", str(self.threads), 71 | "--wordlist", self.wordlist] 72 | command.extend(cmd) 73 | print(f"[Gobuster] [INFO] {' '.join(command)}") 74 | 75 | # 启动进程 76 | process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, 77 | bufsize=bufsize, 78 | creationflags=self.creationflags) 79 | 80 | outputs = [] 81 | try: 82 | for line in iter(process.stdout.readline, ''): 83 | line = line.strip().lstrip('\r\x1b[2K') 84 | line = re.sub(r'\[.*?\]', '', line).strip() 85 | 86 | if verbose: 87 | print(line) 88 | if line != '': 89 | outputs.append(line) 90 | except KeyboardInterrupt: 91 | print(f"[Gobuster] [INFO] Process interrupted by user.") 92 | finally: 93 | process.stdout.close() 94 | process.wait() 95 | 96 | output = '\n'.join(outputs) 97 | if output.startswith("Error: "): 98 | raise GobusterException(output) 99 | return output 100 | -------------------------------------------------------------------------------- /helpers/html_information_leak_analyze.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from bs4 import BeautifulSoup, Comment 4 | 5 | 6 | def analyze(html: str, pure=False) -> str: 7 | soup = BeautifulSoup(html, 'html.parser') 8 | datas = "" 9 | puretext = '\n'.join([line.rstrip() for line in soup.get_text(separator='\n').split('\n') if line.rstrip()]) 10 | 11 | # 匹配HTML注释 12 | comments = soup.find_all(string=lambda text: isinstance(text, Comment)) 13 | comments = list(set(comments)) 14 | if len(comments) > 0: 15 | datas = f"{datas}\n\nHTML注释: {comments}" 16 | 17 | # 匹配隐藏字段 18 | hidden_fields = [(tag['name'], tag['value']) for tag in soup.find_all('input', type='hidden')] 19 | hidden_fields = list(set(hidden_fields)) 20 | if len(hidden_fields) > 0: 21 | datas = f"{datas}\n隐藏字段: {hidden_fields}" 22 | 23 | # 匹配电子邮件地址 24 | emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', html) 25 | emails = list(set(emails)) 26 | if len(emails) > 0: 27 | datas = f"{datas}\nemail地址: {emails}" 28 | 29 | # 匹配文件路径 30 | file_paths = [tag['src'] for tag in soup.find_all(src=True)] + [tag['href'] for tag in soup.find_all(href=True)] 31 | file_paths.extend(find_paths(puretext)) 32 | file_paths = list(set(file_paths)) 33 | if len(file_paths) > 0: 34 | datas = f"{datas}\n文件路径: {file_paths}" 35 | 36 | # 匹配内部IP地址 37 | ips = re.findall( 38 | r'(?:\d|1?\d\d|2[0-4]\d|25[0-5])(?:\.(?:\d|1?\d\d|2[0-4]\d|25[0-5])){3}', html) 39 | ips = list(set(ips)) 40 | if len(ips) > 0: 41 | datas = f"{datas}\nIP地址: {ips}" 42 | 43 | # 匹配meta标签中的信息 44 | meta_tags = [f"{tag['name']}={tag['content']}" for tag in 45 | soup.find_all('meta', attrs={'name': True, 'content': True})] 46 | meta_tags = list(set(meta_tags)) 47 | if len(meta_tags) > 0: 48 | datas = f"{datas}\nmeta标签中的信息: {meta_tags}" 49 | 50 | # 匹配表单字段 51 | form_fields = [(tag['name'], tag['value']) for tag in 52 | soup.find_all('input', attrs={'name': True, 'value': True})] 53 | form_fields = list(set(form_fields)) 54 | if len(form_fields) > 0: 55 | datas = f"{datas}\n表单字段: {form_fields}" 56 | 57 | if pure and puretext != "": 58 | datas = f"{datas}\n\n纯文本内容:\n`{puretext}`" 59 | 60 | datas = datas.strip() 61 | return datas 62 | 63 | 64 | def find_paths(text: str) -> []: 65 | # 正则表达式模式,用于匹配潜在的路径信息 66 | path_patterns = [ 67 | r"(?:[a-z]:)?(?:[\\\/][a-z0-9_. -]*)+", 68 | ] 69 | 70 | paths = set() 71 | for pattern in path_patterns: 72 | matches = re.findall(pattern, text, re.MULTILINE | re.IGNORECASE) 73 | for match in matches: 74 | p = match.strip() 75 | slashs = p.replace('\\', '').replace('/', '').strip() 76 | if slashs == "": 77 | continue 78 | paths.add(match.strip()) 79 | 80 | return list(paths) 81 | -------------------------------------------------------------------------------- /helpers/masscan.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/MyKings/python-masscan/ 3 | """ 4 | from config import logger 5 | import json 6 | import re 7 | import sys 8 | import shlex 9 | 10 | if sys.platform == "win32": 11 | shlex.split = lambda s, comments=False, posix=True: s 12 | import os 13 | import shutil 14 | import subprocess 15 | from pydantic.v1 import BaseModel, Field 16 | from typing import Optional 17 | 18 | 19 | class MasscanNotFound(Exception): 20 | pass 21 | 22 | 23 | class PortResult(BaseModel): 24 | ip: Optional[str] = Field(description="ip") 25 | port: Optional[str] = Field(description="port") 26 | proto: Optional[str] = Field(description="协议") 27 | 28 | 29 | class Masscan: 30 | """ 31 | Class which allows to use masscan from Python. 32 | """ 33 | 34 | def __init__(self, masscanPath=None): 35 | self._masscan_path = shutil.which("masscan", path=masscanPath) 36 | if not self._masscan_path: 37 | raise MasscanNotFound("[ERROR] Masscan not found in path") 38 | 39 | def scan(self, hosts='127.0.0.1', ports="1-65535", arguments='') -> [PortResult]: 40 | """ 41 | Scan given hosts. 42 | 43 | May raise PortScannerError exception if masscan output was not XML 44 | 45 | Test existence of the following key to know 46 | if something went wrong : ['masscan']['scaninfo']['error'] 47 | If not present, everything was ok. 48 | """ 49 | 50 | assert type(hosts) is str, 'Wrong type for [hosts], should be a string [was {0}]'.format( 51 | type(hosts)) # noqa 52 | assert type(ports) in (str, type(None)), 'Wrong type for [ports], should be a string [was {0}]'.format( 53 | type(ports)) # noqa 54 | assert type(arguments) is str, 'Wrong type for [arguments], should be a string [was {0}]'.format( 55 | type(arguments)) # noqa 56 | 57 | h_args = shlex.split(hosts) 58 | f_args = shlex.split(arguments) 59 | 60 | # Launch scan 61 | args = [self._masscan_path, '-oJ', '-'] + h_args + ['-p', ports] * (ports is not None) + f_args 62 | 63 | logger.debug("{args}", args=' '.join(args)) 64 | p = subprocess.Popen( 65 | args, 66 | bufsize=100000, 67 | stdin=subprocess.PIPE, 68 | stdout=subprocess.PIPE, 69 | stderr=subprocess.PIPE 70 | ) 71 | 72 | # wait until finished 73 | # get output 74 | masscan_output, masscan_err = p.communicate() 75 | 76 | if isinstance(masscan_output, bytes): 77 | masscan_output = masscan_output.decode('utf-8') 78 | if isinstance(masscan_err, bytes): 79 | masscan_err = masscan_err.decode('utf-8') 80 | 81 | # If there was something on stderr, there was a problem so abort... in 82 | # fact not always. As stated by AlenLPeacock : 83 | # This actually makes python-masscan mostly unusable on most real-life 84 | # networks -- a particular subnet might have dozens of scannable hosts, 85 | # but if a single one is unreachable or unroutable during the scan, 86 | # masscan.scan() returns nothing. This behavior also diverges significantly 87 | # from commandline masscan, which simply stderrs individual problems but 88 | # keeps on trucking. 89 | 90 | masscan_err_keep_trace = [] 91 | masscan_warn_keep_trace = [] 92 | if len(masscan_err) > 0: 93 | regex_warning = re.compile('^Warning: .*', re.IGNORECASE) 94 | for line in masscan_err.split(os.linesep): 95 | if len(line) > 0: 96 | rgw = regex_warning.search(line) 97 | if rgw is not None: 98 | # sys.stderr.write(line+os.linesep) 99 | masscan_warn_keep_trace.append(line + os.linesep) 100 | else: 101 | # raise PortScannerError(masscan_err) 102 | masscan_err_keep_trace.append(masscan_err) 103 | return self._load_scan_result(masscan_output) 104 | 105 | def _load_scan_result(self, scan_result: str) -> [PortResult]: 106 | datas = [] 107 | result = json.loads(scan_result) 108 | for r in result: 109 | for port in r['ports']: 110 | datas.append(PortResult(ip=r['ip'], port=port['port'], proto=port['proto'])) 111 | return datas 112 | -------------------------------------------------------------------------------- /helpers/nmap.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import shlex 3 | from config import logger 4 | 5 | if sys.platform == "win32": 6 | shlex.split = lambda s, comments=False, posix=True: s 7 | 8 | import nmap3 9 | from pydantic.v1 import BaseModel, Field 10 | from typing import Optional 11 | 12 | 13 | class PortResult(BaseModel): 14 | protocol: Optional[str] = Field(description="协议") 15 | ip: Optional[str] = Field(description="ip") 16 | portid: Optional[str] = Field(description="port") 17 | service: Optional[str] = Field(description="服务") 18 | product: Optional[str] = Field(description="产品") 19 | version: Optional[str] = Field(description="版本") 20 | extrainfo: Optional[str] = Field(description="额外信息") 21 | 22 | 23 | class Nmap: 24 | def __init__(self, path: str = None): 25 | self.nmap = nmap3.NmapScanTechniques(path=path) 26 | 27 | def scan(self, target: str, ports: str = '-') -> [PortResult]: 28 | args = f"-p{ports} -Pn -sV -sC -A" 29 | logger.debug("nmap {target} {args}", target=target, args=args) 30 | results = self.nmap.nmap_tcp_scan(target=target, args=args) 31 | results.pop("stats") 32 | results.pop("runtime") 33 | results.pop("task_results") 34 | ports = [] 35 | for ip, host in results.items(): 36 | for port in host["ports"]: 37 | pkv = {} 38 | pkv["ip"] = ip 39 | pkv["protocol"] = port["protocol"] 40 | pkv["portid"] = port["portid"] 41 | if "service" in port: 42 | if port["service"]["name"] != "tcpwrapped": 43 | pkv["service"] = port["service"]["name"] 44 | if "product" in port["service"]: 45 | pkv["product"] = port["service"]["product"] 46 | if "version" in port["service"]: 47 | pkv["version"] = port["service"]["version"] 48 | if "extrainfo" in port["service"]: 49 | pkv["extrainfo"] = port["service"]["extrainfo"] 50 | ports.append(PortResult(**pkv)) 51 | 52 | return ports 53 | -------------------------------------------------------------------------------- /helpers/security_trails_api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | from tld import get_tld 3 | import requests 4 | from pydantic.v1 import BaseModel, Field 5 | from helpers.utils import get_ip_type 6 | from config import logger 7 | 8 | keymapping = { 9 | "a": "ip", 10 | "aaaa": "ipv6", 11 | "mx": "hostname", 12 | "ns": "nameserver", 13 | "soa": "email", 14 | "txt": "value", 15 | } 16 | 17 | 18 | class Record(BaseModel): 19 | first_seen: Optional[str] = Field(description="首次发现时间") 20 | value: Optional[str] = Field(description="值") 21 | count: Optional[int] = Field(description="数量") 22 | organization: Optional[str] = Field(description="归属组织") 23 | 24 | 25 | class DomainInfo(BaseModel): 26 | apex_domain: Optional[str] = Field(description="顶级域名") 27 | hostname: Optional[str] = Field(description="主机名") 28 | subdomain_count: Optional[int] = Field(description="子域名数量") 29 | a: List[Record] = Field(description="A记录") 30 | aaaa: List[Record] = Field(description="AAAA记录") 31 | mx: List[Record] = Field(description="MX记录") 32 | ns: List[Record] = Field(description="NS记录") 33 | soa: List[Record] = Field(description="SOA记录") 34 | txt: List[Record] = Field(description="TXT记录") 35 | 36 | 37 | class Domain(BaseModel): 38 | apex_domain: Optional[str] = Field(description="顶级域名") 39 | hostname: Optional[str] = Field(description="域名") 40 | subdomain: Optional[str] = Field(description="子域名") 41 | ips: List[str] = Field(description="IP地址") 42 | 43 | 44 | class History(BaseModel): 45 | first_seen: Optional[str] = Field(description="首次发现时间") 46 | last_seen: Optional[str] = Field(description="最后发现时间") 47 | hostname: Optional[str] = Field(description="域名") 48 | ip: Optional[str] = Field(description="IP地址") 49 | 50 | 51 | class SecurityTrailsApi: 52 | def __init__(self, api_key: str, base_url: str = "https://api.securitytrails.com/v1", proxies: dict = None, 53 | timeout: int = 10): 54 | self.api_key = api_key 55 | self.base_url = base_url 56 | self.timeout = timeout 57 | self._session = requests.Session() 58 | if proxies: 59 | self._session.proxies.update(proxies) 60 | self._session.trust_env = False 61 | 62 | def _request(self, method: str, path: str, data: dict = None): 63 | url = f"{self.base_url}{path}" 64 | headers = { 65 | "apikey": self.api_key, 66 | "Content-Type": "application/json" 67 | } 68 | response = self._session.request(method, url, headers=headers, timeout=self.timeout, json=data) 69 | response.raise_for_status() 70 | return response.json() 71 | 72 | def _search(self, **filters) -> List[Domain]: 73 | records = [] 74 | qd = { 75 | "filter": filters 76 | } 77 | maxpage = 100 78 | page = 1 79 | while True: 80 | upath = f"/domains/list?include_ips=true&page={page}" 81 | logger.debug("SecurityTrails: {upath} {qd}", upath=upath, qd=qd) 82 | res = self._request("POST", upath, qd) 83 | for item in res["records"]: 84 | domainobj = get_tld(item["hostname"], fail_silently=True, as_object=True, fix_protocol=True) 85 | kvs = { 86 | "hostname": item["hostname"], 87 | "apex_domain": domainobj.fld, 88 | "subdomain": domainobj.subdomain, 89 | "ips": item["ips"] 90 | } 91 | records.append(Domain(**kvs)) 92 | 93 | if page >= maxpage or page >= res["meta"]["total_pages"]: 94 | break 95 | page += 1 96 | 97 | return records 98 | 99 | def search_domain(self, domain: str) -> List[Domain]: 100 | filters = { 101 | "apex_domain": domain, 102 | } 103 | return self._search(**filters) 104 | 105 | def search_domain_fuzzy(self, keyword: str) -> List[Domain]: 106 | filters = { 107 | "keyword": keyword, 108 | } 109 | return self._search(**filters) 110 | 111 | def search_subdomain(self, subdomain: str) -> List[Domain]: 112 | filters = { 113 | "subdomain": subdomain, 114 | } 115 | return self._search(**filters) 116 | 117 | def search_ip(self, ip: str) -> List[Domain]: 118 | iptype = get_ip_type(ip) 119 | filters = { 120 | iptype: ip, 121 | } 122 | return self._search(**filters) 123 | 124 | def _history(self, hostname: str, types="a") -> List[History]: 125 | records = [] 126 | maxpage = 100 127 | page = 1 128 | while True: 129 | upath = f"/history/{hostname}/dns/{types}?page={page}" 130 | logger.debug("SecurityTrails: {upath}", upath=upath) 131 | res = self._request("GET", upath) 132 | for item in res["records"]: 133 | for ip in item["values"]: 134 | kvs = { 135 | "first_seen": item["first_seen"], 136 | "last_seen": item["last_seen"], 137 | "hostname": hostname, 138 | "ip": ip["ip"] 139 | } 140 | records.append(History(**kvs)) 141 | 142 | if page >= maxpage or page >= res["pages"]: 143 | break 144 | page += 1 145 | 146 | return records 147 | 148 | def get_history(self, hostname: str) -> List[History]: 149 | return self._history(hostname, "a") 150 | 151 | def get_current_domain_info(self, domain: str) -> DomainInfo: 152 | upath = f"/domains/{domain}" 153 | logger.debug("SecurityTrails: {upath}", upath=upath) 154 | res = self._request("GET", upath) 155 | data = { 156 | "apex_domain": res["apex_domain"], 157 | "hostname": res["hostname"], 158 | "subdomain_count": 0, 159 | "a": [], 160 | "aaaa": [], 161 | "mx": [], 162 | "ns": [], 163 | "soa": [], 164 | "txt": [], 165 | } 166 | 167 | if res["subdomain_count"] is not None: 168 | data["subdomain_count"] = res["subdomain_count"] 169 | 170 | currentdns = res["current_dns"] 171 | if "first_seen" in currentdns["a"]: 172 | for item in currentdns["a"]["values"]: 173 | data["a"].append(Record( 174 | first_seen=currentdns["a"]["first_seen"], 175 | value=item["ip"], 176 | count=item["ip_count"], 177 | organization=item["ip_organization"] 178 | )) 179 | if "first_seen" in currentdns["aaaa"]: 180 | for item in currentdns["aaaa"]["values"]: 181 | data["aaaa"].append(Record( 182 | first_seen=currentdns["aaaa"]["first_seen"], 183 | value=item["ipv6"], 184 | count=item["ipv6_count"], 185 | organization=item["ipv6_organization"] 186 | )) 187 | if "first_seen" in currentdns["mx"]: 188 | for item in currentdns["mx"]["values"]: 189 | data["mx"].append(Record( 190 | first_seen=currentdns["mx"]["first_seen"], 191 | value=item["hostname"], 192 | count=item["hostname_count"], 193 | organization=item["hostname_organization"] 194 | )) 195 | if "first_seen" in currentdns["ns"]: 196 | for item in currentdns["ns"]["values"]: 197 | data["ns"].append(Record( 198 | first_seen=currentdns["ns"]["first_seen"], 199 | value=item["nameserver"], 200 | count=item["nameserver_count"], 201 | organization=item["nameserver_organization"] 202 | )) 203 | if "first_seen" in currentdns["soa"]: 204 | for item in currentdns["soa"]["values"]: 205 | data["soa"].append(Record( 206 | first_seen=currentdns["soa"]["first_seen"], 207 | value=item["email"], 208 | count=item["email_count"], 209 | )) 210 | if "first_seen" in currentdns["txt"]: 211 | for item in currentdns["txt"]["values"]: 212 | data["txt"].append(Record( 213 | first_seen=currentdns["txt"]["first_seen"], 214 | value=item["value"], 215 | )) 216 | 217 | return DomainInfo(**data) 218 | -------------------------------------------------------------------------------- /helpers/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import tiktoken 4 | from ipaddress import ip_address, IPv4Address, IPv6Address 5 | 6 | import validators 7 | from validators.domain import _iana_tld 8 | 9 | 10 | def num_tokens_from_string(string: str, encoding_name: str) -> int: 11 | """Returns the number of tokens in a text string.""" 12 | # TODO 13 | encoding = tiktoken.encoding_for_model(encoding_name) 14 | num_tokens = len(encoding.encode(string)) 15 | return num_tokens 16 | 17 | 18 | def valid_ip_address(ip: str) -> bool: 19 | try: 20 | return type(ip_address(ip)) is IPv4Address or IPv6Address 21 | except ValueError: 22 | return False 23 | 24 | 25 | def get_ip_type(IP: str) -> str: 26 | try: 27 | if type(ip_address(IP)) is IPv4Address: 28 | return "ipv4" 29 | else: 30 | return "ipv6" 31 | except ValueError: 32 | return "invalid" 33 | 34 | 35 | def is_private_ip(ip: str) -> bool: 36 | try: 37 | ip_obj = ip_address(ip) 38 | return ip_obj.is_private 39 | except ValueError: 40 | return False 41 | 42 | 43 | def is_domain( 44 | value: str, /, *, consider_tld: bool = False, rfc_1034: bool = False, rfc_2782: bool = False 45 | ): 46 | """Return whether or not given value is a valid domain. 47 | 48 | Examples: 49 | >>> domain('example.com') 50 | # Output: True 51 | >>> domain('example.com/') 52 | # Output: ValidationError(func=domain, ...) 53 | >>> # Supports IDN domains as well:: 54 | >>> domain('xn----gtbspbbmkef.xn--p1ai') 55 | # Output: True 56 | 57 | Args: 58 | value: 59 | Domain string to validate. 60 | consider_tld: 61 | Restrict domain to TLDs allowed by IANA. 62 | rfc_1034: 63 | Allows optional trailing dot in the domain name. 64 | Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034). 65 | rfc_2782: 66 | Domain name is of type service record. 67 | Allows optional underscores in the domain name. 68 | Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782). 69 | 70 | 71 | Returns: 72 | (Literal[True]): If `value` is a valid domain name. 73 | (ValidationError): If `value` is an invalid domain name. 74 | 75 | Raises: 76 | (UnicodeError): If `value` cannot be encoded into `idna` or decoded into `utf-8`. 77 | """ 78 | sr = validators.domain(value, consider_tld=consider_tld, rfc_1034=rfc_1034, rfc_2782=rfc_2782) 79 | if sr is True: 80 | return True 81 | 82 | if not value: 83 | return False 84 | 85 | if consider_tld and value.rstrip(".").rsplit(".", 1)[-1].upper() not in _iana_tld(): 86 | return False 87 | 88 | try: 89 | 90 | service_record = r"_" if rfc_2782 else "" 91 | trailing_dot = r"\.?$" if rfc_1034 else r"$" 92 | 93 | return re.match( 94 | # First character of the domain 95 | rf"^(?:[a-z0-9{service_record}]" 96 | # Sub-domain 97 | + rf"(?:[a-z0-9-{service_record}]{{0,61}}" 98 | # Hostname 99 | + rf"[a-z0-9{service_record}])?\.)" 100 | # First 61 characters of the gTLD 101 | + r"+[a-z0-9][a-z0-9-_]{0,61}" 102 | # Last character of the gTLD 103 | + rf"[a-z]{trailing_dot}", 104 | value.encode("idna").decode("utf-8"), 105 | re.IGNORECASE, 106 | ) 107 | except UnicodeError as err: 108 | raise UnicodeError(f"Unable to encode/decode {value}") from err -------------------------------------------------------------------------------- /persistence/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/persistence/__init__.py -------------------------------------------------------------------------------- /persistence/database.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine, func, text 2 | import threading 3 | 4 | from sqlalchemy.orm import sessionmaker, scoped_session 5 | 6 | from persistence.orm import Base, Cdn 7 | 8 | 9 | class DB(object): 10 | _instance_lock = threading.Lock() 11 | 12 | def __init__(self, user: str, password: str, host: str, port: int, dbname: str, echo: bool = False): 13 | self._engine = create_engine( 14 | url=f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}", 15 | echo=echo, # echo 设为 True 会打印出实际执行的 sql,调试的时候更方便 16 | future=True, # 使用 SQLAlchemy 2.0 API,向后兼容 17 | pool_size=5, # 连接池的大小默认为 5 个,设置为 0 时表示连接无限制 18 | pool_recycle=3600, # 设置时间以限制数据库自动断开 19 | ) 20 | with self._engine.connect() as conn: 21 | conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) 22 | conn.commit() 23 | Base.metadata.create_all(self._engine) 24 | self.DBSession = scoped_session(sessionmaker(bind=self._engine)) 25 | with self.DBSession() as session: 26 | cdncount = session.query(func.count(Cdn.id)).scalar() 27 | if cdncount == 0: 28 | import yaml 29 | from ipaddress import ip_network 30 | with open("cdn_servers.yaml", encoding='utf-8') as yamlstream: 31 | try: 32 | yamldata = yaml.safe_load(yamlstream) 33 | for cdn, vals in yamldata["cidr"].items(): 34 | for cidr in vals: 35 | session.add(Cdn(organization=cdn, cidr=ip_network(cidr))) 36 | for cdn, vals in yamldata["cname"].items(): 37 | for cname in vals: 38 | session.add(Cdn(organization=cdn, cname=cname)) 39 | session.commit() 40 | except Exception as e: 41 | raise e 42 | 43 | def __new__(cls, *args, **kwargs): 44 | if not hasattr(DB, "_instance"): 45 | with DB._instance_lock: 46 | if not hasattr(DB, "_instance"): 47 | DB._instance = object.__new__(cls) 48 | return DB._instance 49 | -------------------------------------------------------------------------------- /persistence/vectordb.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any, Optional, Union 3 | 4 | from embedchain.config import AppConfig 5 | from embedchain.config.vectordb.base import BaseVectorDbConfig 6 | from embedchain.embedder.base import BaseEmbedder 7 | from embedchain.helpers.json_serializable import register_deserializable 8 | from embedchain.vectordb.base import BaseVectorDB 9 | from sqlalchemy import Table, MetaData, Column, Text, String, inspect, func, and_ 10 | from sqlalchemy.dialects.postgresql import JSONB 11 | from sqlalchemy.ext.hybrid import hybrid_property 12 | from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, aliased 13 | from pgvector.sqlalchemy import Vector 14 | 15 | from embedchain import App 16 | from config import logger 17 | 18 | from persistence.database import DB 19 | 20 | 21 | @register_deserializable 22 | class PostgresqlDBConfig(BaseVectorDbConfig): 23 | def __init__( 24 | self, 25 | collection_name: Optional[str] = None, 26 | dir: Optional[str] = None, 27 | ): 28 | """ 29 | Initializes a configuration class instance for an Postgresql client. 30 | """ 31 | super().__init__(collection_name=collection_name, dir=dir) 32 | 33 | 34 | @register_deserializable 35 | class PgvectorDB(BaseVectorDB): 36 | """ 37 | PostgresqlDB as vector database 38 | """ 39 | 40 | def __init__( 41 | self, 42 | db: DB, 43 | config: PostgresqlDBConfig, 44 | ): 45 | 46 | self.db = db 47 | self.config = config 48 | self.cls_KnowledgeVectors = None 49 | 50 | super().__init__(config) 51 | 52 | def _initialize(self): 53 | pass 54 | 55 | def _get_or_create_db(self): 56 | """Called during initialization""" 57 | return self.db 58 | 59 | def _get_or_create_collection(self): 60 | try: 61 | with self.db.DBSession() as session: 62 | engine = session.get_bind() 63 | if not inspect(engine).has_table(self.config.collection_name): 64 | metadata = MetaData() 65 | metadata.reflect(bind=engine) 66 | table = Table(self.config.collection_name, metadata, 67 | Column('id', String, primary_key=True, nullable=False), 68 | Column('vector', Vector(self.embedder.vector_dimension)), 69 | Column('doc', Text), 70 | Column('meta_data', JSONB) 71 | ) 72 | metadata.create_all(engine, [table]) 73 | session.commit() 74 | except Exception as e: 75 | logger.error(e) 76 | 77 | if self.cls_KnowledgeVectors is None: 78 | class KnowledgeVectors(Base): 79 | __tablename__: str = self.config.collection_name 80 | 81 | id: Mapped[str] = mapped_column(String(), primary_key=True) 82 | vector: Mapped[[float]] = mapped_column(Vector()) 83 | doc: Mapped[str] = mapped_column(Text()) 84 | meta_data: Mapped[object] = mapped_column(JSONB) 85 | 86 | @hybrid_property 87 | def distance(self): 88 | # 仅用于标识,在实际查询中不会用到 89 | pass 90 | 91 | @distance.expression 92 | def distance(cls, query_vector): 93 | return cls.vector.cosine_distance(query_vector) 94 | 95 | KnowledgeVectors.__name__ = f"KnowledgeVectors_{self.config.collection_name}" 96 | self.cls_KnowledgeVectors = KnowledgeVectors 97 | return self.db 98 | 99 | def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None): 100 | result = {"ids": [], "metadatas": []} 101 | try: 102 | with self.db.DBSession() as session: 103 | kncls = self.cls_KnowledgeVectors 104 | 105 | clauses = [] 106 | if ids: 107 | clauses.append(kncls.id.in_(ids)) 108 | if where: 109 | clauses = clauses + [func.jsonb_extract_path_text(kncls.meta_data, key) == value for key, value in 110 | where.items()] 111 | 112 | query = session.query(kncls).filter(and_(*clauses)) 113 | if limit: 114 | query = query.limit(limit) 115 | 116 | datas = query.all() 117 | for data in datas: 118 | result["ids"].append(data.id) 119 | result["metadatas"].append(data.meta_data) 120 | except Exception as e: 121 | logger.error(e) 122 | return result 123 | 124 | def add(self, documents: list[str], metadatas: list[object], ids: list[str]) -> Any: 125 | to_ingest = list(zip(documents, metadatas, ids)) 126 | 127 | try: 128 | with self.db.DBSession() as session: 129 | count = 0 130 | for doc, meta, id in to_ingest: 131 | kvobj = self.cls_KnowledgeVectors() 132 | kvobj.id = id 133 | kvobj.doc = doc 134 | kvobj.meta_data = meta 135 | kvobj.vector = self.embedder.embedding_fn([doc])[0] 136 | session.add(kvobj) 137 | count += 1 138 | if count >= 10: 139 | session.commit() 140 | count = 0 141 | session.commit() 142 | except Exception as e: 143 | logger.error(e) 144 | 145 | def query( 146 | self, 147 | input_query: str, 148 | n_results: int, 149 | where: dict[str, any], 150 | citations: bool = False, 151 | **kwargs: Optional[dict[str, Any]], 152 | ) -> Union[list[tuple[str, dict]], list[str]]: 153 | input_query_vector = self.embedder.embedding_fn([input_query]) 154 | query_vector = input_query_vector[0] 155 | 156 | result = [] 157 | try: 158 | with self.db.DBSession() as session: 159 | kncls = self.cls_KnowledgeVectors 160 | 161 | clauses = [] 162 | if where: 163 | clauses = clauses + [func.jsonb_extract_path_text(kncls.meta_data, key) == value for key, value in 164 | where.items()] 165 | 166 | subquery = session.query( 167 | kncls.id, 168 | kncls.vector.cosine_distance(query_vector).label('distance') 169 | ).filter( 170 | and_(*clauses) 171 | ).subquery() 172 | 173 | alias_knowledge_vectors = aliased(kncls, subquery) 174 | 175 | query = session.query( 176 | kncls, 177 | subquery.c.distance 178 | ).join( 179 | subquery, kncls.id == subquery.c.id 180 | ).order_by( 181 | subquery.c.distance.asc() 182 | ).limit(n_results) 183 | 184 | datas = query.all() 185 | for data, distance in datas: 186 | if citations: 187 | metadata = data.meta_data 188 | metadata['score'] = distance 189 | result.append((data.doc, metadata)) 190 | else: 191 | result.append(data.doc) 192 | except Exception as e: 193 | logger.error(e) 194 | return result 195 | 196 | def set_collection_name(self, name: str): 197 | """ 198 | Set the name of the collection. A collection is an isolated space for vectors. 199 | 200 | :param name: Name of the collection. 201 | :type name: str 202 | """ 203 | if not isinstance(name, str): 204 | raise TypeError("Collection name must be a string") 205 | self.config.collection_name = '{prefix}_{suffix}'.format(prefix=name, suffix=self.embedder.config.model) 206 | self._get_or_create_collection() 207 | 208 | def count(self) -> int: 209 | count = 0 210 | try: 211 | with self.db.DBSession() as session: 212 | count = session.query(func.count(self.cls_KnowledgeVectors.id)).scalar() 213 | except Exception as e: 214 | logger.error(e) 215 | return count 216 | 217 | def delete(self, where): 218 | try: 219 | with self.db.DBSession() as session: 220 | kncls = self.cls_KnowledgeVectors 221 | clauses = [func.jsonb_extract_path_text(kncls.meta_data, key) == value for key, value in where.items()] 222 | session.query(kncls).filter(and_(*clauses)).delete() 223 | session.commit() 224 | except Exception as e: 225 | logger.error(e) 226 | 227 | def reset(self): 228 | try: 229 | with self.db.DBSession() as session: 230 | session.query(self.cls_KnowledgeVectors).delete() 231 | session.commit() 232 | except Exception as e: 233 | logger.error(e) 234 | 235 | 236 | class Base(DeclarativeBase): 237 | pass 238 | 239 | 240 | def NewEmbedChain(db: DB, embder: BaseEmbedder, collection_name="knowledge_vectors") -> App: 241 | # embedchain元数据存储在Postgresql数据库中 242 | os.environ['EMBEDCHAIN_DB_URI'] = db.DBSession.bind.url.render_as_string(False) 243 | 244 | cfg = PostgresqlDBConfig(collection_name=collection_name) 245 | pdb = PgvectorDB(db, config=cfg) 246 | 247 | app_config = AppConfig(collect_metrics=False) 248 | 249 | app = App( 250 | config=app_config, 251 | llm=None, 252 | db=pdb, 253 | embedding_model=embder, 254 | config_data=None, 255 | auto_deploy=False, 256 | chunker=None, 257 | cache_config=None, 258 | memory_config=None, 259 | ) 260 | return app 261 | -------------------------------------------------------------------------------- /rag/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/rag/__init__.py -------------------------------------------------------------------------------- /rag/rag.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from typing import Optional, List 3 | 4 | from embedchain.embedder.base import BaseEmbedder 5 | from embedchain.loaders.directory_loader import DirectoryLoader 6 | from embedchain.models.data_type import DataType 7 | from pydantic import Field 8 | from pydantic.v1 import BaseModel 9 | 10 | from persistence.database import DB 11 | from persistence.vectordb import NewEmbedChain 12 | 13 | 14 | class Source(BaseModel): 15 | content: Optional[str] = Field(description="内容") 16 | data_type: Optional[str] = Field(description="类型") 17 | src: Optional[str] = Field(description="引用来源") 18 | 19 | def __str__(self): 20 | if self.data_type == DataType.WEB_PAGE: 21 | return f"来源: {self.src}\n内容:\n{self.content}" 22 | return self.content 23 | 24 | def __repr__(self): 25 | return self.__str__() 26 | 27 | 28 | class Answer(BaseModel): 29 | answer: Optional[str] = Field(description="答案") 30 | sources: List[Source] = Field(description="来源") 31 | 32 | def __str__(self): 33 | srcs = "\n\n====================\n\n".join([str(src) for src in self.sources]) 34 | return f"答案: {self.answer}\n\n详情:\n{srcs}" 35 | 36 | 37 | class RAG(object): 38 | _instance_lock = threading.Lock() 39 | 40 | def __init__(self, db: DB, embder: BaseEmbedder, collection_name="knowledge_vectors"): 41 | self._embedchain_app = NewEmbedChain(db=db, embder=embder, collection_name=collection_name) 42 | 43 | def __new__(cls, *args, **kwargs): 44 | if not hasattr(RAG, "_instance"): 45 | with RAG._instance_lock: 46 | if not hasattr(RAG, "_instance"): 47 | RAG._instance = object.__new__(cls) 48 | return RAG._instance 49 | 50 | def add_knowledge_url(self, url: str): 51 | self._embedchain_app.add(url, data_type=DataType.WEB_PAGE) 52 | 53 | def add_knowledge_folder(self, folder_path: str): 54 | lconfig = { 55 | "recursive": True, 56 | "extensions": [".txt", ".md", ".readme", ".README"] 57 | } 58 | loader = DirectoryLoader(config=lconfig) 59 | 60 | self._embedchain_app.add(folder_path, data_type=DataType.DIRECTORY, loader=loader) 61 | 62 | def search(self, query: str, num_documents=3): 63 | return self._embedchain_app.search(query, num_documents=num_documents) 64 | 65 | def query(self, query: str) -> Answer: 66 | answer, sources = self._embedchain_app.query(query, citations=True) 67 | 68 | srclist = [] 69 | for content, metadata in sources: 70 | src = Source(content=content, data_type=metadata['data_type'], src=metadata['url']) 71 | srclist.append(src) 72 | 73 | return Answer(answer=answer, sources=srclist) 74 | -------------------------------------------------------------------------------- /rag/rag_search_tool.py: -------------------------------------------------------------------------------- 1 | import re 2 | from textwrap import dedent 3 | from typing import Type, Any 4 | 5 | from crewai import Task, Agent, Crew 6 | from crewai_tools import BaseTool 7 | from pydantic.v1 import BaseModel, Field 8 | 9 | from helpers.html_information_leak_analyze import find_paths 10 | from helpers.utils import is_domain 11 | from rag.rag import RAG 12 | from config import logger 13 | 14 | 15 | class RagSearchToolSchema(BaseModel): 16 | """RagSearchTool 的查询参数""" 17 | search_query: str = Field(..., description="搜索的内容,问题应当具有代表性,使用实体关键词,避免使用自然语言") 18 | 19 | 20 | class RagSearchTool(BaseTool): 21 | name: str = "RAG知识搜索" 22 | description: str = "搜索本地知识库。对于特定ip地址或者特定域名,不可以使用该工具。" 23 | args_schema: Type[BaseModel] = RagSearchToolSchema 24 | masscan_path: str | None = None 25 | rag: RAG | None = None 26 | verbose: bool = False 27 | llm: Any | None = None 28 | 29 | class Config: 30 | arbitrary_types_allowed = True 31 | 32 | def __init__(self, rag: RAG, llm, verbose=False): 33 | super().__init__() 34 | self.rag = rag 35 | self.verbose = verbose 36 | self.llm = llm 37 | logger.info("初始化工具 RAG知识搜索") 38 | 39 | def _run( 40 | self, 41 | **kwargs: Any, 42 | ) -> Any: 43 | search_query = kwargs.pop('search_query') 44 | 45 | # TODO 46 | ips = re.findall( 47 | r'(?:\d|1?\d\d|2[0-4]\d|25[0-5])(?:\.(?:\d|1?\d\d|2[0-4]\d|25[0-5])){3}', search_query) 48 | if len(ips) > 0: 49 | return "不可以使用该工具搜索特定ip地址" 50 | 51 | paths = find_paths(search_query) 52 | if len(paths) > 0: 53 | return "不可以使用该工具搜索特定路径" 54 | 55 | if is_domain(search_query): 56 | return "不可以使用该工具搜索特定域名" 57 | 58 | try: 59 | answer = self.rag.query(search_query) 60 | out = self.review(search_query, str(answer)) 61 | if "PASS" in out: 62 | return answer 63 | elif "FAIL" in out: 64 | return "查询结果不符合要求, 请重新查询" 65 | else: 66 | return out 67 | except Exception as e: 68 | logger.error("知识库搜索失败: {}", e) 69 | return f"查询失败: {e}" 70 | 71 | def review(self, query: str, answer: str) -> str: 72 | agent = Agent( 73 | role='搜索结果审核专家', 74 | goal='确保RAG工具的搜索输出与查询意图完全一致,优化搜索效率和结果的相关性。', 75 | backstory=dedent( 76 | """ 77 | 你是一名专注于搜索技术和结果验证的专家,隶属于技术保障团队。 78 | 你的任务是对RAG等智能搜索工具的输出进行精确的审核,确保每个搜索结果都严格符合预设的查询意图和技术需求。 79 | 80 | 你具备深厚的技术背景,特别擅长: 81 | - 分析和解析复杂的搜索结果。 82 | - 识别与查询意图不符的搜索输出。 83 | - 使用逻辑和技术知识来评估搜索结果的技术相关性。 84 | - 提出实用的改进建议,帮助改进搜索工具的算法和查询策略。 85 | 86 | 你的目标是通过严格的审核流程,提升搜索工具的准确性和效率,从而支持团队的技术决策和安全操作。 87 | """ 88 | ), 89 | verbose=self.verbose, 90 | llm=self.llm, 91 | allow_delegation=True, 92 | max_rpm=300, 93 | cache=False, 94 | ) 95 | 96 | task = Task( 97 | agent=agent, 98 | description=dedent( 99 | f""" 100 | 审核RAG搜索工具输出的结果,确保搜索结果与查询意图保持一致性。 101 | 在进行技术栈、漏洞和配置信息的搜索时,需要特别注意结果的相关性。 102 | 103 | 审核流程如下: 104 | 1. 确认每一项搜索结果是否直接相关于查询的技术栈或问题。例如,如果搜索关键词为Java相关技术,结果应严格限于Java环境,不应包含如.NET或其他无关技术的信息。 105 | 2. 分析搜索结果中的误报,标识出那些明显不符合查询条件的内容。 106 | 3. 提出改进搜索策略的建议,如调整关键词、使用更精确的查询语句等,以减少未来的误报。 107 | 108 | 这个任务的目的是提高搜索效率和准确性,确保团队能够获得最相关和可行的信息。 109 | 110 | 请根据以下搜索结果,回答是否通过审核: 111 | 查询内容: 112 | {query} 113 | 114 | ------------------------ 115 | 116 | 搜索结果: 117 | {answer} 118 | """), 119 | expected_output=dedent( 120 | """ 121 | 最终答案应为以下三种类型之一,不要编造其他内容: 122 | 123 | 如果审核通过: 124 | 只需要回答字符串`PASS` 125 | 如果不通过,但是存在合理回答: 126 | 移除不合理的回答,然后提供合理的回答 127 | 如果不通过,且没有合理回答: 128 | 只需要回答字符串`FAIL` 129 | """), 130 | ) 131 | 132 | return Crew( 133 | agents=[agent], 134 | tasks=[task], 135 | verbose=self.verbose, 136 | share_crew=False, 137 | cache=True 138 | ).kickoff() 139 | -------------------------------------------------------------------------------- /recon/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 用于情报搜集的工具包。 3 | 4 | - `active` 子包用于主动侦察。 5 | - `passive` 子包用于被动侦察。 6 | 7 | """ -------------------------------------------------------------------------------- /recon/active/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 主动信息搜集工具 3 | """ -------------------------------------------------------------------------------- /recon/active/masscan_search_tool.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from ipaddress import ip_address 3 | from typing import Type, Any 4 | from pydantic.v1 import BaseModel, Field 5 | from crewai_tools.tools.base_tool import BaseTool 6 | from sqlalchemy import exc 7 | 8 | from helpers.masscan import Masscan 9 | from helpers.utils import valid_ip_address 10 | from persistence.database import DB 11 | from persistence.orm import Port, DuplicateException 12 | from config import logger 13 | 14 | 15 | class MasscanSearchToolSchema(BaseModel): 16 | """MasscanSearchToolSchema 的查询参数""" 17 | ip: str = Field(..., description="ip地址") 18 | 19 | 20 | class MasscanSearchTool(BaseTool): 21 | name: str = "Masscan" 22 | description: str = "使用Masscan扫描ip地址,扫描全端口,仅用于发现开放端口。扫描速度较快,但结果可能会不准确。" 23 | args_schema: Type[BaseModel] = MasscanSearchToolSchema 24 | masscan_path: str | None = None 25 | db: DB | None = None 26 | task_id: int | None = None 27 | 28 | class Config: 29 | arbitrary_types_allowed = True 30 | 31 | def __init__(self, db: DB, task_id: int, masscan_path: str = None): 32 | super().__init__() 33 | self.db = db 34 | self.task_id = task_id 35 | self.masscan_path = masscan_path 36 | logger.info("初始化工具 Masscan") 37 | 38 | def _run( 39 | self, 40 | **kwargs: Any, 41 | ) -> Any: 42 | masscan = Masscan(self.masscan_path) 43 | ip = kwargs.pop('ip') 44 | if valid_ip_address(ip) is False: 45 | return "IP地址格式错误" 46 | now = datetime.now() 47 | results = [] 48 | openports = [] 49 | try: 50 | results = masscan.scan(hosts=ip) 51 | except Exception as e: 52 | logger.error("masscan扫描失败: {}", e) 53 | return f"扫描失败: {e}" 54 | if len(results) == 0: 55 | return "未找到任何开放端口" 56 | try: 57 | with self.db.DBSession() as session: 58 | for port in results: 59 | openports.append(port.port) 60 | pdb = Port() 61 | pdb.target = ip 62 | pdb.task_id = self.task_id 63 | pdb.ip = ip_address(port.ip).exploded 64 | pdb.protocol = port.proto 65 | pdb.port = port.port 66 | pdb.checked_time = now 67 | pdb.is_passive = False 68 | pdb.source = self.name 69 | try: 70 | session.add(pdb) 71 | session.commit() 72 | except DuplicateException: 73 | session.rollback() 74 | except Exception: 75 | raise 76 | 77 | except exc.SQLAlchemyError as e: 78 | logger.error("数据库错误: {}", e) 79 | return "数据库错误" 80 | except Exception as e: 81 | logger.error("其他错误: {}", e) 82 | return f"其他错误: {e}" 83 | 84 | return f"IP: {ip} 共发现{len(openports)}个开放端口\n{','.join(openports)}" 85 | -------------------------------------------------------------------------------- /recon/active/nmap_search_tool.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from ipaddress import ip_address 3 | from typing import Type, Any 4 | from pydantic.v1 import BaseModel, Field 5 | from crewai_tools.tools.base_tool import BaseTool 6 | from sqlalchemy import exc 7 | 8 | from helpers.nmap import Nmap 9 | from helpers.utils import valid_ip_address 10 | from persistence.database import DB 11 | from persistence.orm import Port, DuplicateException 12 | from config import logger 13 | 14 | 15 | class NmapSearchToolSchema(BaseModel): 16 | """NmapSearchToolSchema 的查询参数""" 17 | ip: str = Field(..., description="ip地址") 18 | ports: str = Field("-", description="端口,','分割") 19 | 20 | 21 | class NmapSearchTool(BaseTool): 22 | name: str = "Nmap" 23 | description: str = "使用Nmap扫描ip地址,发现开放端口的服务信息。扫描较慢,但结果精准。" 24 | args_schema: Type[BaseModel] = NmapSearchToolSchema 25 | nmap_path: str | None = None 26 | db: DB | None = None 27 | task_id: int | None = None 28 | 29 | class Config: 30 | arbitrary_types_allowed = True 31 | 32 | def __init__(self, db: DB, task_id: int, nmap_path: str = None): 33 | super().__init__() 34 | self.db = db 35 | self.task_id = task_id 36 | self.nmap_path = nmap_path 37 | logger.info("初始化工具 Nmap") 38 | 39 | def _run( 40 | self, 41 | **kwargs: Any, 42 | ) -> Any: 43 | nmap = Nmap(self.nmap_path) 44 | ip = kwargs.pop('ip') 45 | if valid_ip_address(ip) is False: 46 | return "IP地址格式错误" 47 | 48 | ports = kwargs.pop('ports') 49 | 50 | now = datetime.now() 51 | results = [] 52 | openports = [] 53 | try: 54 | results = nmap.scan(ip, ports) 55 | except Exception as e: 56 | logger.error("nmap扫描失败: {}", e) 57 | return f"扫描失败: {e}" 58 | if len(results) == 0: 59 | return "未找到任何开放端口" 60 | try: 61 | with self.db.DBSession() as session: 62 | for port in results: 63 | openports.append(f"{port.portid} {port.service}") 64 | pdb = Port() 65 | pdb.target = ip 66 | pdb.task_id = self.task_id 67 | pdb.ip = ip_address(port.ip).exploded 68 | pdb.protocol = port.protocol 69 | pdb.port = port.portid 70 | pdb.service = port.service 71 | pdb.product = port.product 72 | pdb.version = port.version 73 | pdb.checked_time = now 74 | pdb.is_passive = False 75 | if port.extrainfo is not None: 76 | pdb.extra_info = { 77 | "info": port.extrainfo, 78 | } 79 | pdb.source = self.name 80 | try: 81 | session.add(pdb) 82 | session.commit() 83 | except DuplicateException: 84 | session.rollback() 85 | except Exception: 86 | raise 87 | 88 | except exc.SQLAlchemyError as e: 89 | logger.error("数据库错误: {}", e) 90 | return "数据库错误" 91 | except Exception as e: 92 | logger.error("其他错误: {}", e) 93 | return f"其他错误: {e}" 94 | 95 | return f"IP: {ip} 共发现{len(openports)}个开放端口\n{','.join(openports)}" 96 | -------------------------------------------------------------------------------- /recon/cyber_assets_researcher.py: -------------------------------------------------------------------------------- 1 | import os 2 | from ipaddress import ip_address 3 | 4 | from crewai import Agent, Task, Crew 5 | from crewai_tools import BaseTool 6 | from textwrap import dedent 7 | 8 | from helpers.utils import is_domain 9 | from persistence.database import DB 10 | from persistence.orm import ip_is_cdn 11 | from recon.active.masscan_search_tool import MasscanSearchTool 12 | from recon.active.nmap_search_tool import NmapSearchTool 13 | from recon.passive.alienvault_search_tool import AlienVaultSearchTool 14 | from recon.passive.fofa_search_tool import FofaSearchTool 15 | from recon.passive.security_trails_search_tool import SecurityTrailsSearchTool 16 | import validators 17 | from config import logger 18 | 19 | 20 | class CyberAssetsResearchers: 21 | """ 22 | 网络资产研究员 23 | 主要用于网络资产的被动侦察和主动扫描,以扩大攻击面 24 | """ 25 | 26 | def __init__(self, db: DB, llm=None, masscan_path=None, nmap_path=None, verbose=False, 27 | cdn_autonomous_judgment=False, 28 | cdn_apexdomain_threshold=50, 29 | cdn_subdomain_threshold=3): 30 | self.llm = llm 31 | self.db = db 32 | self.masscan_path = masscan_path 33 | self.nmap_path = nmap_path 34 | self.verbose = verbose 35 | self.cdn_autonomous_judgment = cdn_autonomous_judgment 36 | self.cdn_apexdomain_threshold = cdn_apexdomain_threshold 37 | self.cdn_subdomain_threshold = cdn_subdomain_threshold 38 | 39 | def agent_cyber_asset_intelligence_scout(self, llm=None, tools: [BaseTool] = []) -> Agent: 40 | logger.info("初始化代理 网络资产情报侦察员") 41 | agent = Agent( 42 | role='网络资产情报侦察员', 43 | goal='通过被动侦察工具获取相关网络资产信息', 44 | backstory=dedent( 45 | """ 46 | 你是一名经验丰富的网络资产情报侦察员,主要任务是发现网络资产以扩大攻击面。 47 | 你精通互联网协议和各种服务器应用程序,特别是对DNS、HTTP、HTTPS、SMTP、POP3、IMAP等协议有深入的了解。 48 | 你擅长使用网络工具搜索目标的相关资产,专业在于识别和过滤搜索结果中与目标相关或不相关的网络资产。 49 | 你主要使用不同的被动信息收集工具,以避免对目标产生任何影响,如避免生成访问日志等。 50 | 你的工作至关重要,你的工作成果将直接影响后续工作的开展。 51 | """ 52 | ), 53 | tools=tools, 54 | verbose=self.verbose, 55 | allow_delegation=True, 56 | max_rpm=300, 57 | # max_iter=1, 58 | llm=llm, 59 | cache=False, 60 | ) 61 | if llm is not None: 62 | agent.llm = llm 63 | return agent 64 | 65 | def task_cyber_assets_recon(self, agent: Agent, target: str) -> Task: 66 | logger.info("初始化任务 资产侦察") 67 | return Task( 68 | description=dedent( 69 | f""" 70 | 使用多种网络资产搜索引擎搜索目标的相关资产信息。 71 | 尽可能多的获取目标相关资产信息,越多资产这对于后续工作的开展越有利。 72 | 最终结果会被存入数据库以便后续使用。对于同一个目标,每个工具最多只能调用一次。 73 | 根据目标类型选择合适的工具进行搜索。 74 | 需要注意以下几点: 75 | - 如果目标为ipv4或ipv6地址,则必须为公网地址,否则不进行扫描。 76 | 77 | 如果目标为CDN,则跳过目标。 78 | 79 | 目标: `{target}` 80 | """ 81 | ), 82 | expected_output=dedent( 83 | """ 84 | 最终答案是本次搜索结果资产数量,具体的结果已存储在数据库中。不要编造其他额外内容。 85 | """), 86 | agent=agent, 87 | ) 88 | 89 | def agent_port_fast_scanner(self, llm=None, tools: [BaseTool] = []) -> Agent: 90 | logger.info("初始化代理 端口快速扫描员") 91 | agent = Agent( 92 | role='端口快速扫描员', 93 | goal='使用端口扫描工具对目标IP进行快速扫描以获取开放端口', 94 | backstory=dedent( 95 | """ 96 | 你是一名经验丰富的端口扫描员。你只扫描ip地址。 97 | 你擅长使用各种端口扫描工具收集目标的开放端口。 98 | """ 99 | ), 100 | tools=tools, 101 | verbose=self.verbose, 102 | allow_delegation=True, 103 | max_rpm=300, 104 | # max_iter=1, 105 | llm=llm, 106 | cache=False, 107 | ) 108 | if llm is not None: 109 | agent.llm = llm 110 | return agent 111 | 112 | def agent_port_precise_scanner(self, llm=None, tools: [BaseTool] = []) -> Agent: 113 | logger.info("初始化代理 端口精准扫描员") 114 | agent = Agent( 115 | role='端口精准扫描员', 116 | goal='使用端口扫描工具对目标IP的指定端口进行精准扫描以获取端口提供的服务信息', 117 | backstory=dedent( 118 | """ 119 | 你是一名经验丰富的端口扫描员。你只扫描ip的指定端口。 120 | 你擅长使用各种端口扫描工具对目标的指定端口进行探测,收集目标开放端口的服务详情。 121 | 你精通互联网协议和各种服务器应用程序,特别是对DNS、HTTP、HTTPS、SMTP、POP3、IMAP等协议有深入的了解。 122 | """ 123 | ), 124 | tools=tools, 125 | verbose=self.verbose, 126 | allow_delegation=True, 127 | max_rpm=300, 128 | # max_iter=1, 129 | llm=llm, 130 | cache=False, 131 | ) 132 | if llm is not None: 133 | agent.llm = llm 134 | return agent 135 | 136 | def task_port_fast_scan(self, agent: Agent, target: str) -> Task: 137 | logger.info("初始化任务 快速端口扫描") 138 | return Task( 139 | description=dedent( 140 | f""" 141 | 使用多种快速端口扫描工具,对ip进行端口扫描,以尽快获取目标开放端口信息。 142 | 尽可能多的获取目标开放端口,开放端口的数量对后续工作的开展至关重要。 143 | 144 | 目标: `{target}` 145 | """ 146 | ), 147 | expected_output=dedent( 148 | """ 149 | 最终结果为ip地址以及开放的端口数量及具体的端口列表。端口以`,`分割,不要空格。不要编造其他额外内容。 150 | """), 151 | agent=agent, 152 | ) 153 | 154 | def task_port_precise_scan(self, agent: Agent, target: str) -> Task: 155 | logger.info("初始化任务 精准端口扫描") 156 | return Task( 157 | description=dedent( 158 | f""" 159 | 使用多种精准端口扫描工具,对ip和指定的端口列表进行扫描以获取目标开放端口信息。 160 | 尽可能多的获取目标端口的服务信息,开放端口的服务信息对后续工作的开展至关重要。 161 | 162 | 目标可能是一个ip地址,也可能是ip地址和端口列表的组合。 163 | 目标: 164 | {target} 165 | """ 166 | ), 167 | expected_output=dedent( 168 | """ 169 | 最终结果为ip地址和开放的端口数量及具体的端口与服务列表。不要编造其他额外内容。 170 | """), 171 | agent=agent, 172 | ) 173 | 174 | def _reconPortFastScanCrew(self, task_id: int, target: str): 175 | ip = ip_address(target) 176 | if ip.is_private: 177 | # TODO 178 | raise NotImplementedError("暂不支持内网地址的扫描") 179 | 180 | agents = [] 181 | tasks = [] 182 | tools = [] 183 | 184 | pfag = self.agent_port_fast_scanner( 185 | self.llm, 186 | [ 187 | MasscanSearchTool(self.db, task_id, self.masscan_path), 188 | ] 189 | ) 190 | 191 | agents.append(pfag) 192 | 193 | taskf = self.task_port_fast_scan(pfag, ip.exploded) 194 | tasks.append(taskf) 195 | 196 | if len(agents) == 0: 197 | raise Exception("无可用工具") 198 | 199 | logger.info("初始化智能体 快速端口扫描") 200 | return Crew( 201 | agents=agents, 202 | tasks=tasks, 203 | verbose=self.verbose, 204 | share_crew=False 205 | ) 206 | 207 | def _reconPortPreciseScanCrew(self, task_id: int, target: str): 208 | agents = [] 209 | tasks = [] 210 | tools = [] 211 | 212 | ppag = self.agent_port_precise_scanner( 213 | self.llm, 214 | [ 215 | NmapSearchTool(self.db, task_id, self.nmap_path), 216 | ] 217 | ) 218 | agents.append(ppag) 219 | 220 | taskp = self.task_port_precise_scan(ppag, target) 221 | tasks.append(taskp) 222 | 223 | if len(agents) == 0: 224 | raise Exception("无可用工具") 225 | 226 | logger.info("初始化智能体 精准端口扫描") 227 | return Crew( 228 | agents=agents, 229 | tasks=tasks, 230 | verbose=self.verbose, 231 | share_crew=False 232 | ) 233 | 234 | def _reconIpCrew(self, task_id: int, target: str): 235 | ip = ip_address(target) 236 | if ip.is_private: 237 | # TODO 238 | raise NotImplementedError("暂不支持内网地址的扫描") 239 | with self.db.DBSession() as session: 240 | if ip_is_cdn(session, ip.exploded): 241 | raise Exception(f"目标为CDN地址") 242 | 243 | agents = [] 244 | tasks = [] 245 | tools = self._getPassiveReconTools(task_id) 246 | if len(tools) > 0: 247 | agscout = self.agent_cyber_asset_intelligence_scout(self.llm, tools) 248 | agents.append(agscout) 249 | 250 | taskscout = self.task_cyber_assets_recon(agscout, ip.exploded) 251 | tasks.append(taskscout) 252 | 253 | if len(agents) == 0: 254 | raise Exception("无可用工具") 255 | 256 | logger.info("初始化智能体 IP侦察") 257 | return Crew( 258 | agents=agents, 259 | tasks=tasks, 260 | verbose=self.verbose, 261 | share_crew=False 262 | ) 263 | 264 | def _getPassiveReconTools(self, task_id: int) -> []: 265 | tools = [ 266 | AlienVaultSearchTool(self.db, task_id, self.llm, self.verbose, self.cdn_autonomous_judgment, 267 | self.cdn_apexdomain_threshold, self.cdn_subdomain_threshold), 268 | ] 269 | if os.environ.get('FOFA_EMAIL') is not None and os.environ.get('FOFA_API_KEY') is not None: 270 | tools.append(FofaSearchTool(self.db, task_id, self.llm, self.verbose, self.cdn_autonomous_judgment, 271 | self.cdn_apexdomain_threshold, self.cdn_subdomain_threshold)) 272 | if os.environ.get('SECURITYTRAILS_API_KEY') is not None: 273 | tools.append( 274 | SecurityTrailsSearchTool(self.db, task_id, self.llm, self.verbose, self.cdn_autonomous_judgment, 275 | self.cdn_apexdomain_threshold, self.cdn_subdomain_threshold)) 276 | return tools 277 | 278 | def _reconDomainCrew(self, task_id: int, target: str): 279 | agents = [] 280 | tasks = [] 281 | 282 | tools = self._getPassiveReconTools(task_id) 283 | if len(tools) > 0: 284 | agscout = self.agent_cyber_asset_intelligence_scout(self.llm, tools) 285 | agents.append(agscout) 286 | taskscout = self.task_cyber_assets_recon(agscout, target) 287 | tasks.append(taskscout) 288 | 289 | if len(agents) == 0: 290 | raise Exception("无可用工具") 291 | 292 | logger.info("初始化智能体 域名侦察") 293 | return Crew( 294 | agents=agents, 295 | tasks=tasks, 296 | verbose=self.verbose, 297 | share_crew=False 298 | ) 299 | 300 | def reconCrew(self, task_id: int, target: str): 301 | 302 | try: 303 | # ip地址 304 | ipobj = ip_address(target) 305 | logger.info("IP目标 {}", target) 306 | return self._reconIpCrew(task_id, target) 307 | except ValueError: 308 | pass 309 | 310 | if validators.url(target): 311 | # url 312 | logger.info("url目标 {}", target) 313 | return self._reconDomainCrew(task_id, target) 314 | elif is_domain(target, rfc_2782=True): 315 | # domain 316 | logger.info("domain目标 {}", target) 317 | return self._reconDomainCrew(task_id, target) 318 | raise ValueError("目标类型不支持") 319 | 320 | def portScanCrew(self, task_id: int, ip: str): 321 | 322 | portout = self._reconPortFastScanCrew(task_id, ip).kickoff() 323 | logger.info("[{}] {}: {}", task_id, ip, portout) 324 | portout = self._reconPortPreciseScanCrew(task_id, portout).kickoff() 325 | logger.info("[{}] {}: {}", task_id, ip, portout) 326 | -------------------------------------------------------------------------------- /recon/passive/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/recon/passive/__init__.py -------------------------------------------------------------------------------- /recon/passive/alienvault_search_tool.py: -------------------------------------------------------------------------------- 1 | from ipaddress import ip_address 2 | from typing import Type, Any 3 | from pydantic.v1 import BaseModel, Field 4 | from crewai_tools.tools.base_tool import BaseTool 5 | from requests import HTTPError 6 | from sqlalchemy import exc, and_, func, or_ 7 | 8 | from helpers.alienvault_api import AlienVaultApi 9 | from helpers.utils import get_ip_type, valid_ip_address 10 | from persistence.database import DB 11 | from persistence.orm import Domain, Cdn, DuplicateException, update_assets_associate_cdn 12 | from config import logger 13 | from recon.passive.cdn_check import CdnCheck 14 | 15 | 16 | class AlienVaultSearchToolSchema(BaseModel): 17 | """SecurityTrailsSearchTool 的查询参数""" 18 | domain: str = Field( 19 | None, 20 | description="域名。例如:`example.com`") 21 | ip: str = Field(None, description="IP地址。例如:`1.1.1.1`。") 22 | 23 | 24 | class AlienVaultSearchTool(BaseTool): 25 | name: str = "AlienVault" 26 | description: str = "网络资产搜索引擎,不直接接触目标资产,对目标无副作用。支持搜索IP地址、域名的解析记录,不适用于内网ip。同一个目标在短时间内也不应当重复查询。" 27 | args_schema: Type[BaseModel] = AlienVaultSearchToolSchema 28 | db: DB | None = None 29 | task_id: int | None = None 30 | llm: Any = None 31 | verbose: bool = False 32 | cdn_autonomous_judgment: bool = False 33 | cdn_apexdomain_threshold: int = 1 34 | cdn_subdomain_threshold: int = 1 35 | 36 | class Config: 37 | arbitrary_types_allowed = True 38 | 39 | def __init__(self, db: DB, task_id: int, llm=None, verbose=False, cdn_autonomous_judgment=False, 40 | cdn_apexdomain_threshold=50, 41 | cdn_subdomain_threshold=3): 42 | super().__init__() 43 | self.db = db 44 | self.task_id = task_id 45 | self.llm = llm 46 | self.verbose = verbose 47 | self.cdn_autonomous_judgment = cdn_autonomous_judgment 48 | self.cdn_apexdomain_threshold = cdn_apexdomain_threshold 49 | self.cdn_subdomain_threshold = cdn_subdomain_threshold 50 | 51 | logger.info("初始化工具 AlienVault") 52 | 53 | def _run( 54 | self, 55 | **kwargs: Any, 56 | ) -> Any: 57 | domain = kwargs.pop('domain', "") 58 | ip = kwargs.pop('ip', "") 59 | results = [] 60 | 61 | avapi = AlienVaultApi() 62 | if domain == "" and ip == "": 63 | return "domain和ip不能同时为空" 64 | target = domain 65 | try: 66 | if domain != "": 67 | logger.info("AlienVault查询: {}", domain) 68 | results = avapi.search_domain(domain) 69 | else: 70 | if valid_ip_address(ip) is False: 71 | return "IP地址格式错误" 72 | target = ip 73 | logger.info("AlienVault查询: {}", ip) 74 | results = avapi.search_ipv4(ip) 75 | except HTTPError as e: 76 | logger.error("AlienVault查询失败: {}", e) 77 | return f"查询失败: {e}" 78 | 79 | if len(results) == 0: 80 | return "未发现资产" 81 | 82 | if valid_ip_address(target): 83 | cdn_check = CdnCheck(self.db, target, llm=self.llm, verbose=self.verbose, 84 | autonomous_judgment=self.cdn_autonomous_judgment, 85 | apexdomain_threshold=self.cdn_apexdomain_threshold, 86 | subdomain_threshold=self.cdn_subdomain_threshold) 87 | for result in results: 88 | for vdomain in result.sub_domains.values(): 89 | cdn_check.add(result.apex_domain, vdomain.sub_domain) 90 | 91 | if cdn_check.check(): 92 | with self.db.DBSession() as session: 93 | update_assets_associate_cdn(session, ip, cdn_check.get_name()) 94 | return "CDN服务器" 95 | 96 | try: 97 | cdns = {} 98 | with self.db.DBSession() as session: 99 | for result in results: 100 | for vdomain in result.sub_domains.values(): 101 | domaindb = Domain() 102 | domaindb.target = target 103 | domaindb.task_id = self.task_id 104 | domaindb.apex_domain = result.apex_domain 105 | domaindb.host = vdomain.hostname 106 | domaindb.subdomain = vdomain.sub_domain 107 | domaindb.source = self.name 108 | domaindb.cname = [] 109 | domaindb.cname_cdn = [] 110 | hostcdn = session.query(Cdn).filter( 111 | and_( 112 | Cdn.cname != None, 113 | or_( 114 | func.lower(vdomain.hostname).ilike(func.concat('%', Cdn.cname)), 115 | func.lower(result.apex_domain).ilike(func.concat('%', Cdn.cname)) 116 | ) 117 | ) 118 | ).first() 119 | if hostcdn is not None: 120 | domaindb.host_cdn = hostcdn.organization 121 | 122 | domaindb.a = [] 123 | domaindb.a_cdn = [] 124 | domaindb.aaaa = [] 125 | domaindb.aaaa_cdn = [] 126 | domaindb.mx = [] 127 | domaindb.ns = [] 128 | domaindb.soa = [] 129 | domaindb.txt = [] 130 | for record in vdomain.a: 131 | ipobj = ip_address(record) 132 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first() 133 | ip_type = get_ip_type(ip) 134 | domaindb.a.append(ipobj) 135 | if ipcdn is not None: 136 | domaindb.a_cdn.append(ipcdn.organization) 137 | elif domaindb.host_cdn is not None: 138 | domaindb.a_cdn.append(domaindb.host_cdn) 139 | else: 140 | domaindb.a_cdn.append(None) 141 | 142 | for record in vdomain.aaaa: 143 | ipobj = ip_address(record) 144 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first() 145 | domaindb.aaaa.append(ipobj) 146 | if ipcdn is not None: 147 | domaindb.aaaa_cdn.append(ipcdn.organization) 148 | elif domaindb.host_cdn is not None: 149 | domaindb.aaaa_cdn.append(domaindb.host_cdn) 150 | else: 151 | domaindb.aaaa_cdn.append(None) 152 | 153 | for record in vdomain.cname: 154 | domaindb.cname = record.split(',') 155 | for cn in domaindb.cname: 156 | cnamecdn = session.query(Cdn).filter( 157 | and_( 158 | Cdn.cname != None, 159 | func.lower(cn).ilike(func.concat('%', Cdn.cname)) 160 | ) 161 | ).first() 162 | if cnamecdn is not None: 163 | domaindb.cname_cdn.append(cnamecdn.organization) 164 | else: 165 | domaindb.cname_cdn.append(None) 166 | 167 | for record in vdomain.mx: 168 | domaindb.mx.append(record) 169 | for record in vdomain.ns: 170 | domaindb.ns.append(record) 171 | for record in vdomain.soa: 172 | domaindb.soa.append(record) 173 | for record in vdomain.txt: 174 | domaindb.txt.append(record) 175 | 176 | try: 177 | session.add(domaindb) 178 | session.commit() 179 | 180 | acdns = domaindb.associate_cdn() 181 | cdns.update(acdns) 182 | 183 | except DuplicateException: 184 | session.rollback() 185 | except Exception: 186 | raise 187 | if len(cdns) > 0: 188 | with self.db.DBSession() as session: 189 | for ip, cdn in cdns.items(): 190 | update_assets_associate_cdn(session, ip, cdn) 191 | 192 | except exc.SQLAlchemyError as e: 193 | logger.error("数据库错误: {}", e) 194 | return "数据库错误" 195 | except Exception as e: 196 | logger.error("其他错误: {}", e) 197 | return f"其他错误: {e}" 198 | return f"共发现{len(results)}个资产" 199 | -------------------------------------------------------------------------------- /recon/passive/cdn_check.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | from crewai import Agent, Task, Crew 4 | 5 | 6 | class CdnCheck: 7 | 8 | def __init__(self, db, ip, llm=None, verbose=False, autonomous_judgment=True, apexdomain_threshold=50, 9 | subdomain_threshold=3): 10 | """ 11 | :param db: 12 | :param ip: 13 | :param llm: 14 | :param verbose: 15 | :param autonomous_judgment: 是否自主判断,开启时,不考虑数量阈值 16 | :param apexdomain_threshold: 主域名数量阈值 17 | :param subdomain_threshold: 子域名数量阈值 18 | """ 19 | self.llm = llm 20 | self.db = db 21 | self.verbose = verbose 22 | self.ip = ip 23 | self.apexdomain_threshold = apexdomain_threshold 24 | self.subdomain_threshold = subdomain_threshold 25 | self.domains = {} 26 | self.autonomous_judgment = autonomous_judgment 27 | 28 | def get_name(self): 29 | return f"CdnCheck: 自主判断: {self.autonomous_judgment}, 主域名数量阈值: {self.apexdomain_threshold}, 子域名数量阈值: {self.subdomain_threshold}" 30 | 31 | def add(self, apex_domain: str, sub_domain: str): 32 | """ 33 | 添加域名信息 34 | """ 35 | if apex_domain not in self.domains: 36 | self.domains[apex_domain] = set() 37 | self.domains[apex_domain].add(sub_domain) 38 | 39 | def get_statistics(self) -> str: 40 | apex_domains = len(self.domains) 41 | st = f"域名总数: {apex_domains}\n域名列表:\n" 42 | for apex_domain, sub_domains in self.domains.items(): 43 | st += f" {apex_domain}: {len(sub_domains)}\n" 44 | return st 45 | 46 | def check(self) -> bool: 47 | 48 | agent = Agent( 49 | role='CDN服务器甄别人员', 50 | goal='通过ip反查出的域名,判断ip是否为CDN服务器', 51 | backstory=dedent( 52 | """ 53 | 你是一名经验丰富的网络安全专家,专门从事CDN服务提供商的研究和分析。 54 | 你的任务是通过分析关联域名的信息,判断给定的IP地址是否为CDN前置。 55 | 你将利用你丰富的知识和经验,综合关联域名的数量、多样性、结构和模式等因素,做出准确的判断。 56 | """ 57 | ), 58 | verbose=self.verbose, 59 | allow_delegation=False, 60 | max_rpm=300, 61 | # max_iter=1, 62 | llm=self.llm, 63 | cache=False, 64 | ) 65 | 66 | threshold = '' 67 | if self.autonomous_judgment is False: 68 | threshold = f"\n我们可以简单的认为,当主域名数量超过{self.apexdomain_threshold}个,或者存在多个拥有超过{self.subdomain_threshold}个子域名的主域名时,此IP为CDN前置。" 69 | 70 | task = Task( 71 | description=dedent( 72 | f""" 73 | 分析给定的IP地址及其关联域名的统计,判断该IP地址是否为CDN前置。具体分析包括以下几个方面: 74 | - 关联域名的数量和多样性。 75 | - 域名的结构和模式。 76 | - 主域名的特点。 77 | {threshold} 78 | 79 | 以下数据为 ip `{self.ip}` 反查出的主域名总数,主域名列表,以及每个域名对应的子域名数量: 80 | {self.get_statistics()} 81 | """ 82 | ), 83 | expected_output=dedent( 84 | """ 85 | 最终答案是一个明确的判断,该IP地址是否为CDN前置,输出为描述是与否的特定字符`Y`或`N`。 86 | 不要编造其他额外内容。 87 | """), 88 | agent=agent, 89 | ) 90 | 91 | crew = Crew( 92 | agents=[agent], 93 | tasks=[task], 94 | verbose=self.verbose, 95 | share_crew=False 96 | ) 97 | 98 | out = crew.kickoff() 99 | return "Y" in out 100 | -------------------------------------------------------------------------------- /recon/passive/fofa_search_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from ipaddress import ip_address 4 | from typing import Type, Any 5 | from pydantic.v1 import BaseModel, Field 6 | from crewai_tools.tools.base_tool import BaseTool 7 | from sqlalchemy import exc, and_, func, or_ 8 | 9 | from helpers.fofa_api import FofaApi 10 | from helpers.utils import get_ip_type, valid_ip_address 11 | from persistence.database import DB 12 | from persistence.orm import Port, Domain, Cdn, DuplicateException, update_assets_associate_cdn 13 | from tld import get_tld 14 | from config import logger 15 | from recon.passive.cdn_check import CdnCheck 16 | 17 | 18 | class FofaSearchToolSchema(BaseModel): 19 | """FofaSearchToolTool 的查询参数""" 20 | 21 | # 基础类 22 | domain: str = Field( 23 | None, 24 | description="域名,用于搜索包含此关键字的域名资产,支持精确和模糊搜索。例如:`example.com`") 25 | ip: str = Field(None, description="IP地址,支持单一IPv4地址、IPv4 C段和单一IPv6地址。例如:`1.1.1.1` 或 `1.1.1.1/24`") 26 | org: str = Field(None, description="所属组织,用于搜索包含此组织的资产。例如:`Google`") 27 | 28 | # 标记类 29 | app: str = Field( 30 | None, 31 | description="应用名称,用于搜索包含此应用的资产。小众或自研软件结果精确,通用软件如`Apache` `nginx`结果可能不精确。例如:`Apache`") 32 | 33 | # 网站类 34 | title: str = Field(None, description="网页标题,用于搜索包含此标题的资产。例如:`Google`") 35 | header: str = Field( 36 | None, 37 | description="响应头,用于搜索响应头包含此关键字的资产。小众或自研软件结果精确。例如:`X-Elastic-Product`") 38 | body: str = Field(None, description="HTML正文,用于搜索包含此关键字的资产。例如:`百度一下`") 39 | js_name: str = Field(None, description="HTML正文包含的JS,用于搜索包含此JS引用关键字的资产。例如:`js/jquery.js`") 40 | icon_hash: str = Field(None, description="网站图标的hash值,用于搜索包含此图标hash值的资产。例如:`-247388890`") 41 | icp: str = Field( 42 | None, 43 | description="ICP备案号,用于搜索包含此备案号的资产。中国大陆网站需ICP备案。例如:`京ICP证030173号`") 44 | 45 | # 证书类 46 | cert: str = Field(None, description="证书信息,用于搜索证书中包含此关键字的资产。例如:`Let's Encrypt`") 47 | fuzzy: bool = Field( 48 | default=False, 49 | description="是否模糊搜索,用于拓展资产,但会降低准确性,默认为False。只能与domain参数单独使用。") 50 | 51 | 52 | class FofaSearchTool(BaseTool): 53 | name: str = "FOFA" 54 | description: str = "网络资产搜索引擎,不直接接触目标资产,对目标无副作用。支持搜索IP地址、域名、证书等信息,不适用于内网ip。短时间内大量查询可能会被限制。同一个目标在短时间内也不应当重复查询。" 55 | args_schema: Type[BaseModel] = FofaSearchToolSchema 56 | db: DB | None = None 57 | task_id: int | None = None 58 | llm: Any = None 59 | verbose: bool = False 60 | cdn_autonomous_judgment: bool = False 61 | cdn_apexdomain_threshold: int = 1 62 | cdn_subdomain_threshold: int = 1 63 | 64 | class Config: 65 | arbitrary_types_allowed = True 66 | 67 | def __init__(self, db: DB, task_id: int, llm=None, verbose=False, cdn_autonomous_judgment=False, 68 | cdn_apexdomain_threshold=50, 69 | cdn_subdomain_threshold=3): 70 | super().__init__() 71 | self.db = db 72 | self.task_id = task_id 73 | self.llm = llm 74 | self.verbose = verbose 75 | self.cdn_autonomous_judgment = cdn_autonomous_judgment 76 | self.cdn_apexdomain_threshold = cdn_apexdomain_threshold 77 | self.cdn_subdomain_threshold = cdn_subdomain_threshold 78 | logger.info("初始化工具 FOFA") 79 | 80 | def _run( 81 | self, 82 | **kwargs: Any, 83 | ) -> Any: 84 | fofaapi = FofaApi(os.environ.get('FOFA_EMAIL'), os.environ.get('FOFA_API_KEY'), 85 | os.environ.get('FOFA_VERSION', 'base')) 86 | fuzzy = kwargs.pop('fuzzy', False) 87 | target = "" 88 | if kwargs.get('domain') is not None: 89 | target = kwargs.get('domain') 90 | elif kwargs.get('ip') is not None: 91 | target = kwargs.get('ip') 92 | if valid_ip_address(target) is False: 93 | return "IP地址格式错误" 94 | results = [] 95 | try: 96 | logger.info("FOFA查询: {}", kwargs) 97 | results = fofaapi.search(fuzzy=fuzzy, **kwargs) 98 | except Exception as e: 99 | logger.error("fofa查询失败: {}", e) 100 | return f"查询失败: {e}" 101 | if len(results) == 0: 102 | return "未找到任何资产" 103 | 104 | if valid_ip_address(target): 105 | cdn_check = CdnCheck(self.db, target, llm=self.llm, verbose=self.verbose, 106 | autonomous_judgment=self.cdn_autonomous_judgment, 107 | apexdomain_threshold=self.cdn_apexdomain_threshold, 108 | subdomain_threshold=self.cdn_subdomain_threshold) 109 | for result in results: 110 | if result.host is not None and result.host != "": 111 | hostobj = get_tld(result.host, fail_silently=True, as_object=True, fix_protocol=True) 112 | if hostobj is not None: 113 | cdn_check.add(hostobj.fld, hostobj.subdomain) 114 | 115 | if cdn_check.check(): 116 | with self.db.DBSession() as session: 117 | update_assets_associate_cdn(session, target, cdn_check.get_name()) 118 | return "CDN服务器" 119 | try: 120 | cdns = {} 121 | with self.db.DBSession() as session: 122 | for data in results: 123 | pdb = Port() 124 | pdb.target = target 125 | pdb.task_id = self.task_id 126 | pdb.ip = ip_address(data.ip) 127 | 128 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(pdb.ip.exploded)).first() 129 | if ipcdn is not None: 130 | pdb.ip_cdn = ipcdn.organization 131 | 132 | pdb.protocol = data.base_protocol 133 | pdb.port = data.port 134 | pdb.service = data.protocol 135 | pdb.product = data.product 136 | pdb.version = data.version.rstrip("/") 137 | if data.lastupdatetime is not None and data.lastupdatetime != "": 138 | pdb.checked_time = datetime.strptime(data.lastupdatetime, "%Y-%m-%d %H:%M:%S") 139 | pdb.is_passive = True 140 | extra_info = {} 141 | domaindb = None 142 | if data.host is not None and data.host != "": 143 | extra_info["host"] = data.host 144 | hostobj = get_tld(data.host, fail_silently=True, as_object=True, fix_protocol=True) 145 | if hostobj is not None: 146 | domaindb = Domain() 147 | domaindb.target = target 148 | domaindb.task_id = self.task_id 149 | if hostobj.subdomain == "": 150 | domaindb.host = hostobj.fld 151 | else: 152 | domaindb.host = hostobj.subdomain + "." + hostobj.fld 153 | domaindb.apex_domain = hostobj.fld 154 | domaindb.subdomain = hostobj.subdomain 155 | 156 | hostcdn = session.query(Cdn).filter( 157 | and_( 158 | Cdn.cname != None, 159 | or_( 160 | func.lower(domaindb.host).ilike(func.concat('%', Cdn.cname)), 161 | func.lower(domaindb.apex_domain).ilike(func.concat('%', Cdn.cname)) 162 | ) 163 | ) 164 | ).first() 165 | if hostcdn is not None: 166 | domaindb.host_cdn = hostcdn.organization 167 | 168 | if pdb.checked_time is not None: 169 | domaindb.checked_time = pdb.checked_time 170 | domaindb.source = self.name 171 | domaindb.cname = [] 172 | domaindb.cname_cdn = [] 173 | domaindb.a = [] 174 | domaindb.a_cdn = [] 175 | domaindb.aaaa = [] 176 | domaindb.aaaa_cdn = [] 177 | domaindb.mx = [] 178 | domaindb.ns = [] 179 | domaindb.soa = [] 180 | domaindb.txt = [] 181 | if get_ip_type(data.ip) == "ipv4": 182 | ipobj = ip_address(data.ip) 183 | domaindb.a.append(ipobj) 184 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first() 185 | if ipcdn is not None: 186 | domaindb.a_cdn.append(ipcdn.organization) 187 | elif domaindb.host_cdn is not None: 188 | domaindb.a_cdn.append(domaindb.host_cdn) 189 | else: 190 | domaindb.a_cdn.append(None) 191 | else: 192 | ipobj = ip_address(data.ip) 193 | domaindb.aaaa.append(ipobj) 194 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first() 195 | if ipcdn is not None: 196 | domaindb.aaaa_cdn.append(ipcdn.organization) 197 | elif domaindb.host_cdn is not None: 198 | domaindb.aaaa_cdn.append(domaindb.host_cdn) 199 | else: 200 | domaindb.aaaa_cdn.append(None) 201 | 202 | if data.as_organization is not None and data.as_organization != "": 203 | extra_info["as_organization"] = data.as_organization 204 | if data.cname is not None and data.cname != "": 205 | cnametld = get_tld(data.cname, fail_silently=True, as_object=True, fix_protocol=True) 206 | extra_info["cname"] = cnametld.parsed_url.hostname 207 | if domaindb is not None: 208 | domaindb.cname = cnametld.parsed_url.hostname.split(',') 209 | for cn in domaindb.cname: 210 | cnamecdn = session.query(Cdn).filter( 211 | and_( 212 | Cdn.cname != None, 213 | func.lower(cn).ilike(func.concat('%', Cdn.cname)) 214 | ) 215 | ).first() 216 | if cnamecdn is not None: 217 | domaindb.cname_cdn.append(cnamecdn.organization) 218 | else: 219 | domaindb.cname_cdn.append(None) 220 | 221 | if data.domain is not None and data.domain != "": 222 | extra_info["domain"] = data.domain 223 | if data.server is not None and data.server != "": 224 | extra_info["server"] = data.server 225 | if data.os is not None and data.os != "": 226 | extra_info["os"] = data.os 227 | if data.icp is not None and data.icp != "": 228 | extra_info["icp"] = data.icp 229 | if data.title is not None and data.title != "": 230 | extra_info["title"] = data.title 231 | if data.cert is not None and data.cert != "": 232 | extra_info["cert"] = { 233 | "data": data.cert, 234 | "issuer_org": data.certs_issuer_org, 235 | "issuer_cn": data.certs_issuer_cn, 236 | "subject_org": data.certs_subject_org, 237 | "subject_cn": data.certs_subject_cn, 238 | } 239 | pdb.extra_info = extra_info 240 | pdb.source = self.name 241 | try: 242 | session.add(pdb) 243 | session.commit() 244 | 245 | acdns = pdb.associate_cdn() 246 | cdns.update(acdns) 247 | except DuplicateException as e: 248 | session.rollback() 249 | except Exception as e: 250 | raise 251 | 252 | if domaindb is not None: 253 | try: 254 | session.add(domaindb) 255 | session.commit() 256 | 257 | acdns = domaindb.associate_cdn() 258 | cdns.update(acdns) 259 | 260 | except DuplicateException as e: 261 | session.rollback() 262 | except Exception as e: 263 | raise 264 | if len(cdns) > 0: 265 | with self.db.DBSession() as session: 266 | for ip, cdn in cdns.items(): 267 | update_assets_associate_cdn(session, ip, cdn) 268 | 269 | except exc.SQLAlchemyError as e: 270 | logger.error("数据库错误: {}", e) 271 | return "数据库错误" 272 | except Exception as e: 273 | logger.error("其他错误: {}", e) 274 | return f"其他错误: {e}" 275 | 276 | return f"共发现{len(results)}个资产" 277 | -------------------------------------------------------------------------------- /recon/passive/security_trails_search_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | from datetime import datetime 3 | from ipaddress import ip_address 4 | from typing import Type, Any 5 | from pydantic.v1 import BaseModel, Field 6 | from crewai_tools.tools.base_tool import BaseTool 7 | from requests import HTTPError 8 | from sqlalchemy import exc, and_, or_, func 9 | 10 | from helpers.security_trails_api import SecurityTrailsApi 11 | from helpers.utils import get_ip_type, valid_ip_address 12 | from persistence.database import DB 13 | from persistence.orm import Domain, Cdn, DuplicateException, update_assets_associate_cdn 14 | from tld import get_tld 15 | from config import logger 16 | from recon.passive.cdn_check import CdnCheck 17 | 18 | 19 | class SecurityTrailsSearchToolSchema(BaseModel): 20 | """SecurityTrailsSearchTool 的查询参数""" 21 | domain: str = Field( 22 | None, 23 | description="域名,用于搜索包含此关键字的域名资产,支持精确和模糊搜索。例如:`example.com`") 24 | ip: str = Field(None, description="IP地址,支持单一IPv4地址。例如:`1.1.1.1`。使用此参数时,不能携带其他参数。") 25 | history: bool = Field( 26 | default=False, 27 | description="是否查询域名解析历史,仅对domain有效,默认为False。只能与domain参数单独使用。") 28 | fuzzy: bool = Field( 29 | default=False, 30 | description="是否模糊搜索,用于拓展资产,但会降低准确性,默认为False。只能与domain参数单独使用。") 31 | 32 | 33 | class SecurityTrailsSearchTool(BaseTool): 34 | name: str = "SecurityTrails" 35 | description: str = "网络资产搜索引擎,不直接接触目标资产,对目标无副作用。支持搜索IP地址、域名、子域名以及域名的解析记录,不适用于内网ip。短时间内大量查询可能会被限制。同一个目标在短时间内也不应当重复查询。" 36 | args_schema: Type[BaseModel] = SecurityTrailsSearchToolSchema 37 | db: DB | None = None 38 | task_id: int | None = None 39 | llm: Any = None 40 | verbose: bool = False 41 | cdn_autonomous_judgment: bool = False 42 | cdn_apexdomain_threshold: int = 1 43 | cdn_subdomain_threshold: int = 1 44 | 45 | class Config: 46 | arbitrary_types_allowed = True 47 | 48 | def __init__(self, db: DB, task_id: int, llm=None, verbose=False, cdn_autonomous_judgment=False, 49 | cdn_apexdomain_threshold=50, 50 | cdn_subdomain_threshold=3): 51 | super().__init__() 52 | self.db = db 53 | self.task_id = task_id 54 | self.llm = llm 55 | self.verbose = verbose 56 | self.cdn_autonomous_judgment = cdn_autonomous_judgment 57 | self.cdn_apexdomain_threshold = cdn_apexdomain_threshold 58 | self.cdn_subdomain_threshold = cdn_subdomain_threshold 59 | logger.info("初始化工具 SecurityTrails") 60 | 61 | def _run( 62 | self, 63 | **kwargs: Any, 64 | ) -> Any: 65 | stapi = SecurityTrailsApi(os.environ.get('SECURITYTRAILS_API_KEY')) 66 | fuzzy = kwargs.pop('fuzzy', False) 67 | history = kwargs.pop('history', False) 68 | domain = kwargs.pop('domain', "") 69 | ip = kwargs.pop('ip', "") 70 | results = [] 71 | 72 | if history: 73 | if domain == "": 74 | return "domain为空。history参数仅对domain有效" 75 | try: 76 | logger.info("SecurityTrails查询历史解析: {}", domain) 77 | results = stapi.get_history(domain) 78 | except HTTPError as e: 79 | logger.error("SecurityTrails查询失败: {}", e) 80 | return f"查询失败: {e}" 81 | try: 82 | cdns = {} 83 | with self.db.DBSession() as session: 84 | for result in results: 85 | hostobj = get_tld(result.hostname, fail_silently=True, as_object=True, fix_protocol=True) 86 | domaindb = Domain() 87 | domaindb.target = domain 88 | domaindb.task_id = self.task_id 89 | domaindb.apex_domain = hostobj.fld 90 | domaindb.host = result.hostname 91 | domaindb.subdomain = hostobj.subdomain 92 | 93 | hostcdn = session.query(Cdn).filter( 94 | and_( 95 | Cdn.cname != None, 96 | or_( 97 | func.lower(domaindb.host).ilike(func.concat('%', Cdn.cname)), 98 | func.lower(domaindb.apex_domain).ilike(func.concat('%', Cdn.cname)) 99 | ) 100 | ) 101 | ).first() 102 | if hostcdn is not None: 103 | domaindb.host_cdn = hostcdn.organization 104 | 105 | domaindb.source = self.name 106 | domaindb.cname = [] 107 | domaindb.cname_cdn = [] 108 | domaindb.a = [] 109 | domaindb.a_cdn = [] 110 | domaindb.aaaa = [] 111 | domaindb.aaaa_cdn = [] 112 | domaindb.mx = [] 113 | domaindb.ns = [] 114 | domaindb.soa = [] 115 | domaindb.txt = [] 116 | 117 | if result.first_seen != "": 118 | domaindb.first_seen = datetime.strptime(result.first_seen, "%Y-%m-%d") 119 | if result.last_seen != "": 120 | domaindb.last_seen = datetime.strptime(result.last_seen, "%Y-%m-%d") 121 | 122 | if result.ip != "": 123 | ipobj = ip_address(result.ip) 124 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first() 125 | 126 | ip_type = get_ip_type(result.ip) 127 | if ip_type == "ipv4": 128 | domaindb.a.append(ipobj) 129 | if ipcdn is not None: 130 | domaindb.a_cdn.append(ipcdn.organization) 131 | elif domaindb.host_cdn is not None: 132 | domaindb.a_cdn.append(domaindb.host_cdn) 133 | else: 134 | domaindb.a_cdn.append(None) 135 | else: 136 | domaindb.aaaa.append(ipobj) 137 | if ipcdn is not None: 138 | domaindb.aaaa_cdn.append(ipcdn.organization) 139 | elif domaindb.host_cdn is not None: 140 | domaindb.aaaa_cdn.append(domaindb.host_cdn) 141 | else: 142 | domaindb.aaaa_cdn.append(None) 143 | try: 144 | session.add(domaindb) 145 | session.commit() 146 | 147 | acdns = domaindb.associate_cdn() 148 | cdns.update(acdns) 149 | 150 | except DuplicateException: 151 | session.rollback() 152 | except Exception: 153 | raise 154 | 155 | if len(cdns) > 0: 156 | with self.db.DBSession() as session: 157 | for ip, cdn in cdns.items(): 158 | update_assets_associate_cdn(session, ip, cdn) 159 | 160 | except exc.SQLAlchemyError as e: 161 | logger.error("数据库错误: {}", e) 162 | return "数据库错误" 163 | except Exception as e: 164 | logger.error("其他错误: {}", e) 165 | return f"其他错误: {e}" 166 | return f"共发现{len(results)}个资产" 167 | try: 168 | target = domain 169 | if fuzzy: 170 | if domain == "": 171 | return "domain为空。fuzzy参数仅对domain有效" 172 | logger.info("SecurityTrails模糊查询: {}", domain) 173 | results = stapi.search_domain_fuzzy(domain) 174 | elif domain != "": 175 | logger.info("SecurityTrails查询: {}", domain) 176 | results = stapi.search_domain(domain) 177 | elif ip != "": 178 | if valid_ip_address(ip) is False: 179 | return "IP地址格式错误" 180 | target = ip 181 | logger.info("SecurityTrails查询: {}", ip) 182 | results = stapi.search_ip(ip) 183 | else: 184 | return "domain和ip不能同时为空" 185 | except HTTPError as e: 186 | logger.error("SecurityTrails查询失败: {}", e) 187 | return f"查询失败: {e}" 188 | 189 | if len(results) == 0: 190 | return "未发现资产" 191 | 192 | if valid_ip_address(target): 193 | cdn_check = CdnCheck(self.db, target, llm=self.llm, verbose=self.verbose, 194 | autonomous_judgment=self.cdn_autonomous_judgment, 195 | apexdomain_threshold=self.cdn_apexdomain_threshold, 196 | subdomain_threshold=self.cdn_subdomain_threshold) 197 | for result in results: 198 | cdn_check.add(result.apex_domain, result.subdomain) 199 | 200 | if cdn_check.check(): 201 | with self.db.DBSession() as session: 202 | update_assets_associate_cdn(session, target, cdn_check.get_name()) 203 | return "CDN服务器" 204 | 205 | try: 206 | cdns = {} 207 | with self.db.DBSession() as session: 208 | for result in results: 209 | domaindb = Domain() 210 | domaindb.target = target 211 | domaindb.task_id = self.task_id 212 | domaindb.apex_domain = result.apex_domain 213 | domaindb.host = result.hostname 214 | domaindb.subdomain = result.subdomain 215 | 216 | hostcdn = session.query(Cdn).filter( 217 | and_( 218 | Cdn.cname != None, 219 | or_( 220 | func.lower(domaindb.host).ilike(func.concat('%', Cdn.cname)), 221 | func.lower(domaindb.apex_domain).ilike(func.concat('%', Cdn.cname)) 222 | ) 223 | ) 224 | ).first() 225 | if hostcdn is not None: 226 | domaindb.host_cdn = hostcdn.organization 227 | 228 | domaindb.source = self.name 229 | domaindb.cname = [] 230 | domaindb.cname_cdn = [] 231 | domaindb.a = [] 232 | domaindb.a_cdn = [] 233 | domaindb.aaaa = [] 234 | domaindb.aaaa_cdn = [] 235 | domaindb.mx = [] 236 | domaindb.ns = [] 237 | domaindb.soa = [] 238 | domaindb.txt = [] 239 | for ip in result.ips: 240 | ipobj = ip_address(ip) 241 | ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first() 242 | ip_type = get_ip_type(ip) 243 | if ip_type == "ipv4": 244 | domaindb.a.append(ipobj) 245 | if ipcdn is not None: 246 | domaindb.a_cdn.append(ipcdn.organization) 247 | elif domaindb.host_cdn is not None: 248 | domaindb.a_cdn.append(domaindb.host_cdn) 249 | else: 250 | domaindb.a_cdn.append(None) 251 | else: 252 | domaindb.aaaa.append(ipobj) 253 | if ipcdn is not None: 254 | domaindb.aaaa_cdn.append(ipcdn.organization) 255 | elif domaindb.host_cdn is not None: 256 | domaindb.aaaa_cdn.append(domaindb.host_cdn) 257 | else: 258 | domaindb.aaaa_cdn.append(None) 259 | try: 260 | session.add(domaindb) 261 | session.commit() 262 | 263 | acdns = domaindb.associate_cdn() 264 | cdns.update(acdns) 265 | except DuplicateException: 266 | session.rollback() 267 | except Exception: 268 | raise 269 | if len(cdns) > 0: 270 | with self.db.DBSession() as session: 271 | for ip, cdn in cdns.items(): 272 | update_assets_associate_cdn(session, ip, cdn) 273 | 274 | except exc.SQLAlchemyError as e: 275 | logger.error("数据库错误: {}", e) 276 | return "数据库错误" 277 | except Exception as e: 278 | logger.error("其他错误: {}", e) 279 | return f"其他错误: {e}" 280 | return f"共发现{len(results)}个资产" 281 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.12.3 2 | crewai==0.36.0 3 | crewai_tools==0.4.8 4 | embedchain==0.1.116 5 | fake_useragent==1.5.1 6 | FOFA_py==2.0.3 7 | httpx==0.27.0 8 | langchain_openai==0.1.15 9 | langgraph==0.1.7 10 | loguru==0.7.2 11 | mmh3==4.1.0 12 | psycopg2_binary==2.9.9 13 | pydantic==2.8.2 14 | python3_nmap==1.6.0 15 | PyYAML==6.0.1 16 | PyYAML==6.0.1 17 | Requests==2.32.3 18 | SQLAlchemy==2.0.30 19 | tiktoken==0.7.0 20 | tld==0.13 21 | validators==0.28.3 22 | -------------------------------------------------------------------------------- /team.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from exploits.attack_surface_research import AttackSurfaceResearch 4 | from exploits.deploy_attack import DeployAttack 5 | from exploits.vul_scan_expert import VulScanExpert 6 | from persistence.database import DB 7 | from rag.rag import RAG 8 | from recon.cyber_assets_researcher import CyberAssetsResearchers 9 | 10 | 11 | class Team: 12 | """团队""" 13 | 14 | cyberAssetsResearchers: CyberAssetsResearchers | None = None 15 | vulScanExpert: VulScanExpert | None = None 16 | attackSurfaceResearch: AttackSurfaceResearch | None = None 17 | deployAttack: DeployAttack | None = None 18 | 19 | def __init__(self, 20 | db: DB, 21 | rag: RAG, 22 | llm, 23 | debug: Optional[bool] = None, 24 | masscan_path: Optional[str] = None, 25 | nmap_path: Optional[str] = None, 26 | gobuster_path: str = None, 27 | gobuster_wordlist_path: str = None, 28 | nuclei_path: Optional[str] = None, 29 | nuclei_templates_path: Optional[str] = None): 30 | self.cyberAssetsResearchers = CyberAssetsResearchers( 31 | db=db, 32 | llm=llm, 33 | masscan_path=masscan_path, 34 | nmap_path=nmap_path, 35 | verbose=debug 36 | ) 37 | 38 | self.vulScanExpert = VulScanExpert( 39 | db=db, 40 | llm=llm, 41 | gobuster_path=gobuster_path, 42 | gobuster_wordlist_path=gobuster_wordlist_path, 43 | nuclei_path=nuclei_path, 44 | nuclei_templates_path=nuclei_templates_path, 45 | verbose=debug 46 | ) 47 | 48 | self.attackSurfaceResearch = AttackSurfaceResearch( 49 | db=db, 50 | rag=rag, 51 | llm=llm, 52 | verbose=debug 53 | ) 54 | 55 | self.deployAttack = DeployAttack( 56 | db=db, 57 | rag=rag, 58 | llm=llm, 59 | verbose=debug 60 | ) 61 | 62 | def get_recon_crew(self, task_id: int, target: str): 63 | """ 64 | 获取侦察队伍 65 | """ 66 | return self.cyberAssetsResearchers.reconCrew(task_id, target) 67 | 68 | def get_mapping_crew(self, task_id: int, target: str): 69 | """ 70 | 获取测绘队伍 71 | """ 72 | return self.vulScanExpert.fingerprintingCrew(task_id, target) 73 | 74 | def get_vulscan_crew(self, task_id: int, target: str): 75 | """ 76 | 获取漏扫队伍 77 | """ 78 | return self.vulScanExpert.vulScanCrew(task_id, target) 79 | 80 | def get_intelligence_analysis_crew(self, target: str): 81 | """ 82 | 获取情报分析队伍 83 | """ 84 | return self.attackSurfaceResearch.intelligenceAnalysisCrew(target) 85 | 86 | def get_establishing_foothold_research_crew(self, target: str, intelligence: str): 87 | """ 88 | 获取攻击面研究队伍 89 | """ 90 | return self.attackSurfaceResearch.establishingFootholdResearchCrew(target, intelligence) 91 | 92 | def get_attack_plan_review_crew(self, assets: str, intelligence: str, plan: str, review: str | None = None): 93 | """ 94 | 获取攻击计划审核队伍 95 | """ 96 | return self.attackSurfaceResearch.attackPlanReviewCrew(assets, intelligence, plan, review) 97 | 98 | def get_establishing_foothold_deploy_attack_crew(self, asset: str, plan: str): 99 | """ 100 | 获取攻击队伍 101 | """ 102 | return self.deployAttack.establishingFootholdAttackCrew(asset, plan) 103 | -------------------------------------------------------------------------------- /workflow_attack_plan.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum 2 | from langgraph.graph import StateGraph 3 | from typing import TypedDict, Optional 4 | from langgraph.graph.graph import CompiledGraph 5 | from sqlalchemy import and_, func 6 | 7 | from persistence.database import DB 8 | from persistence.orm import Port, WebInfo, Vul, Workflow, UrlEnum 9 | from team import Team 10 | from config import logger 11 | 12 | WORK = 'attack_plan' 13 | 14 | 15 | class StatePlanReview(IntEnum): 16 | INIT = 0 # 初始化 17 | PREPARATION = 1 # 准备工作 18 | MAKE_PLAN = 2 # 制定计划 19 | REVIEW = 3 # 审核 20 | PASS = 4 # 通过 21 | FAIL = 5 # 否决 22 | REWORK = 6 # 重做 23 | 24 | 25 | class Target: 26 | target: str 27 | asset: str 28 | plan: str | None 29 | review: str | None 30 | status: StatePlanReview 31 | 32 | def __init__(self, db: DB, task_id: int, target: str, asset: str, intelligence: str | None, plan: str | None, 33 | review: str | None, 34 | workflow_id=0, status=StatePlanReview.INIT): 35 | self.db = db 36 | self.task_id = task_id 37 | self.workflow_id = workflow_id 38 | self.target = target 39 | self.asset = asset 40 | self.intelligence = intelligence 41 | self.plan = plan 42 | self.review = review 43 | self.status = status 44 | 45 | def init(self): 46 | with self.db.DBSession() as session: 47 | if self.workflow_id == 0: 48 | wf = Workflow() 49 | wf.work = WORK 50 | wf.task_id = self.task_id 51 | wf.status = int(self.status) 52 | wf.data = {'asset': self.asset, 'intelligence': self.intelligence, 'plan': self.plan, 53 | 'review': self.review, 'target': self.target} 54 | session.add(wf) 55 | session.flush() 56 | session.commit() 57 | self.workflow_id = wf.id 58 | 59 | def update_status(self): 60 | with self.db.DBSession() as session: 61 | wf = session.query(Workflow).filter( 62 | and_( 63 | Workflow.id == self.workflow_id 64 | ) 65 | ).first() 66 | wf.status = int(self.status) 67 | wf.data = {'asset': self.asset, 'intelligence': self.intelligence, 'plan': self.plan, 'review': self.review, 68 | 'target': self.target} 69 | session.commit() 70 | 71 | 72 | class TaskStateAttackPlan(TypedDict): 73 | task_id: int 74 | targets: list[Target] 75 | 76 | 77 | class TaskNodesAttackPlan: 78 | team: Team | None = None 79 | db: DB | None = None 80 | 81 | def __init__(self, db: DB, team: Team): 82 | self.db = db 83 | self.team = team 84 | 85 | def init_task(self, state: TaskStateAttackPlan): 86 | """ 87 | 初始化任务 88 | """ 89 | if len(state['targets']) == 0: 90 | # 必须处理完所有遗留任务 91 | state['targets'] = self._assets_intelligence(state['task_id']) 92 | for target in state['targets']: 93 | target.init() 94 | 95 | return state 96 | 97 | def preparation(self, state: TaskStateAttackPlan): 98 | """ 99 | 准备工作 100 | """ 101 | for target in state['targets']: 102 | if target.status in [StatePlanReview.INIT, StatePlanReview.PREPARATION, StatePlanReview.REWORK]: 103 | try: 104 | crew = self.team.get_intelligence_analysis_crew(target.asset) 105 | out = crew.kickoff() 106 | target.intelligence = out 107 | target.status = StatePlanReview.MAKE_PLAN 108 | target.update_status() 109 | logger.info("[preparation {}] {}\n{}", state['task_id'], target.target, out) 110 | except ValueError as e: 111 | logger.debug("[preparation {}] {}\n{}", state['task_id'], target.target, e) 112 | except Exception as e: 113 | logger.error("[preparation {}] {}\n{}", state['task_id'], target.target, e) 114 | return state 115 | 116 | def make_plan(self, state: TaskStateAttackPlan): 117 | """ 118 | 制定计划 119 | """ 120 | for target in state['targets']: 121 | if target.status in [StatePlanReview.MAKE_PLAN, StatePlanReview.REWORK]: 122 | try: 123 | if target.status == StatePlanReview.REWORK and target.review is not None: 124 | crew = self.team.get_attack_plan_review_crew(target.asset, target.intelligence, target.plan, 125 | target.review) 126 | else: 127 | crew = self.team.get_establishing_foothold_research_crew(target.asset, target.intelligence) 128 | out = crew.kickoff() 129 | if out.startswith("FAIL") is False: 130 | target.plan = out 131 | target.status = StatePlanReview.REVIEW 132 | target.update_status() 133 | logger.info("[make_plan {}] {}\n{}", state['task_id'], target.target, out) 134 | except ValueError as e: 135 | logger.debug("[make_plan {}] {}\n{}", state['task_id'], target.target, e) 136 | except Exception as e: 137 | logger.error("[make_plan {}] {}\n{}", state['task_id'], target.target, e) 138 | return state 139 | 140 | def attack_plan_review(self, state: TaskStateAttackPlan): 141 | """ 142 | 检查计划 143 | """ 144 | for target in state['targets']: 145 | if target.status in [StatePlanReview.REVIEW]: 146 | try: 147 | crew = self.team.get_attack_plan_review_crew(target.asset, target.intelligence, target.plan) 148 | out = crew.kickoff().strip() 149 | if out.startswith("PASS"): 150 | target.status = StatePlanReview.PASS 151 | target.review = out[5:] 152 | elif out.startswith("FAIL"): 153 | target.status = StatePlanReview.FAIL 154 | target.review = out[5:] 155 | else: 156 | target.status = StatePlanReview.REWORK 157 | target.review = out 158 | target.update_status() 159 | logger.info("[attack_plan_review {}] {}\n{}", state['task_id'], target.target, out) 160 | except ValueError as e: 161 | logger.debug("[attack_plan_review {}] {}\n{}", state['task_id'], target.target, e) 162 | except Exception as e: 163 | logger.error("[attack_plan_review {}] {}\n{}", state['task_id'], target.target, e) 164 | return state 165 | 166 | def finish(self, state: TaskStateAttackPlan): 167 | """ 168 | 结束任务 169 | """ 170 | # TODO 171 | # for target in state['targets']: 172 | # print('finish', target.target) 173 | # print(target.plan) 174 | return state 175 | 176 | def edge_shuld_finish(self, state: TaskStateAttackPlan): 177 | """ 178 | 条件边 179 | """ 180 | 181 | for target in state['targets']: 182 | if target.status in [StatePlanReview.REWORK]: 183 | return 'rework' 184 | return 'pass' 185 | 186 | def _assets_intelligence(self, task_id: int) -> [Target]: 187 | """ 188 | 获取已探明的资产 189 | """ 190 | datas = {} 191 | with self.db.DBSession() as session: 192 | infos = session.query(WebInfo).filter( 193 | and_( 194 | WebInfo.task_id == task_id, 195 | WebInfo.finger_prints != None, 196 | func.jsonb_array_length(WebInfo.finger_prints) >= 1 197 | ) 198 | ).all() 199 | urlenums = session.query(UrlEnum).filter( 200 | and_( 201 | UrlEnum.task_id == task_id, 202 | UrlEnum.finger_prints != None, 203 | func.jsonb_array_length(UrlEnum.finger_prints) >= 1 204 | ) 205 | ).all() 206 | webs = infos + urlenums 207 | if len(webs) == 0: 208 | infos = session.query(WebInfo).filter(WebInfo.task_id == task_id).all() 209 | urlenums = session.query(UrlEnum).filter(UrlEnum.task_id == task_id).all() 210 | webs = infos + urlenums 211 | for web in webs: 212 | if isinstance(web, WebInfo): 213 | host = web.host 214 | target = web.target 215 | else: 216 | host = web.web_info.host 217 | target = web.web_info.target 218 | if target not in datas: 219 | datas[host] = [] 220 | datas[host].append(web.to_prompt_template()) 221 | 222 | vuls = session.query(Vul).filter(Vul.task_id == task_id).all() 223 | for vul in vuls: 224 | if vul.target not in datas: 225 | datas[vul.target] = [] 226 | datas[vul.target].append(vul.to_prompt_template()) 227 | 228 | ports = session.query(Port).filter( 229 | and_( 230 | Port.task_id == task_id, 231 | Port.ip_cdn == None 232 | ) 233 | ).all() 234 | for port in ports: 235 | if port.ip not in datas: 236 | datas[port.ip] = [] 237 | datas[port.ip].append(port.to_prompt_template()) 238 | 239 | targets = [] 240 | for target, data in datas.items(): 241 | targets.append( 242 | Target( 243 | db=self.db, 244 | task_id=task_id, 245 | target=target, 246 | asset=f"目标: {target}\n{'\n\n---------------\n\n'.join(data)}", 247 | intelligence=None, 248 | plan=None, 249 | review=None 250 | )) 251 | return targets 252 | 253 | 254 | class WorkFlowAttackPlan: 255 | app: CompiledGraph | None = None 256 | debug: bool = False 257 | team: Team | None = None 258 | db: DB | None = None 259 | 260 | def __init__(self, db: DB, team: Team, debug: Optional[bool] = None): 261 | self.db = db 262 | self.team = team 263 | self.debug = debug 264 | 265 | nodes = TaskNodesAttackPlan(db, team) 266 | workflow = StateGraph(TaskStateAttackPlan) 267 | workflow.add_node('init_task', nodes.init_task) 268 | workflow.add_node('preparation', nodes.preparation) 269 | workflow.add_node('make_plan', nodes.make_plan) 270 | workflow.add_node('attack_plan_review', nodes.attack_plan_review) 271 | workflow.add_node('finish', nodes.finish) 272 | 273 | workflow.set_entry_point('init_task') 274 | workflow.set_finish_point('finish') 275 | 276 | workflow.add_edge('init_task', 'preparation') 277 | workflow.add_edge('preparation', 'make_plan') 278 | workflow.add_edge('make_plan', 'attack_plan_review') 279 | workflow.add_conditional_edges( 280 | source='attack_plan_review', 281 | path=nodes.edge_shuld_finish, 282 | path_map={ 283 | 'rework': 'preparation', 284 | 'pass': 'finish' 285 | } 286 | ) 287 | 288 | self.app = workflow.compile(debug=debug) 289 | 290 | def run(self, taskid: int): 291 | state = { 292 | 'task_id': taskid, 293 | 'targets': [] 294 | } 295 | 296 | with self.db.DBSession() as session: 297 | wfs = session.query(Workflow).filter( 298 | and_( 299 | Workflow.task_id == taskid, 300 | Workflow.work == WORK, 301 | Workflow.status.notin_([StatePlanReview.PASS, StatePlanReview.FAIL]) 302 | ) 303 | ).all() 304 | for wf in wfs: 305 | state['targets'].append( 306 | Target( 307 | db=self.db, 308 | task_id=wf.task_id, 309 | target=wf.data['target'], 310 | asset=wf.data['asset'], 311 | intelligence=wf.data['intelligence'], 312 | plan=wf.data['plan'], 313 | review=wf.data['review'], 314 | workflow_id=wf.id, 315 | status=StatePlanReview(wf.status) 316 | ) 317 | ) 318 | 319 | self.app.invoke(state) 320 | -------------------------------------------------------------------------------- /workflow_deploy_attack.py: -------------------------------------------------------------------------------- 1 | from enum import IntEnum 2 | from langgraph.graph import StateGraph 3 | from typing import TypedDict, Optional 4 | from langgraph.graph.graph import CompiledGraph 5 | from sqlalchemy import and_ 6 | 7 | import workflow_attack_plan 8 | from persistence.database import DB 9 | from persistence.orm import Workflow 10 | from team import Team 11 | from config import logger 12 | 13 | WORK = 'deploy_attack' 14 | 15 | 16 | class StateDeployAttack(IntEnum): 17 | INIT = 0 # 初始化 18 | DEPLOY = 1 # 部署 19 | ATTACK = 2 # 攻击 20 | REWORK = 3 # 重试 21 | FINISH = 99 # 结束 22 | 23 | 24 | class Target: 25 | target: str 26 | asset: str 27 | plan: str 28 | status: StateDeployAttack 29 | 30 | def __init__(self, db: DB, task_id: int, target: str, asset: str, plan: str, workflow_id=0, status=StateDeployAttack.INIT): 31 | self.db = db 32 | self.task_id = task_id 33 | self.workflow_id = workflow_id 34 | self.target = target 35 | self.asset = asset 36 | self.plan = plan 37 | self.status = status 38 | 39 | def init(self): 40 | with self.db.DBSession() as session: 41 | if self.workflow_id == 0: 42 | wf = Workflow() 43 | wf.work = WORK 44 | wf.task_id = self.task_id 45 | wf.status = int(self.status) 46 | wf.data = {'target': self.target, 'asset': self.asset, 'plan': self.plan} 47 | session.add(wf) 48 | session.flush() 49 | session.commit() 50 | self.workflow_id = wf.id 51 | 52 | def update_status(self): 53 | with self.db.DBSession() as session: 54 | wf = session.query(Workflow).filter( 55 | and_( 56 | Workflow.id == self.workflow_id 57 | ) 58 | ).first() 59 | wf.status = int(self.status) 60 | wf.data = {'target': self.target, 'asset': self.asset, 'plan': self.plan} 61 | session.commit() 62 | 63 | 64 | class TaskStateDeployAttack(TypedDict): 65 | task_id: int 66 | targets: list[Target] 67 | 68 | 69 | class TaskNodesDeployAttack: 70 | team: Team | None = None 71 | db: DB | None = None 72 | 73 | def __init__(self, db: DB, team: Team): 74 | self.db = db 75 | self.team = team 76 | 77 | def init_task(self, state: TaskStateDeployAttack): 78 | """ 79 | 初始化任务 80 | """ 81 | if len(state['targets']) == 0: 82 | # 必须处理完所有遗留任务 83 | state['targets'] = self._load_attack_plan(state['task_id']) 84 | for target in state['targets']: 85 | target.init() 86 | 87 | return state 88 | 89 | def deploy(self, state: TaskStateDeployAttack): 90 | for target in state['targets']: 91 | if target.status in [StateDeployAttack.INIT, StateDeployAttack.REWORK, StateDeployAttack.DEPLOY]: 92 | # TODO 93 | target.status = StateDeployAttack.ATTACK 94 | target.update_status() 95 | 96 | return state 97 | 98 | def attack(self, state: TaskStateDeployAttack): 99 | for target in state['targets']: 100 | if target.status in [StateDeployAttack.ATTACK]: 101 | try: 102 | crew = self.team.get_establishing_foothold_deploy_attack_crew(target.asset, target.plan) 103 | out = crew.kickoff() 104 | # TODO 判定攻击是否成功 105 | target.status = StateDeployAttack.FINISH 106 | target.update_status() 107 | 108 | logger.info("[attack {}] {}\n{}", state['task_id'], target.target, out) 109 | except Exception as e: 110 | logger.error("[attack {}] {}\n{}", state['task_id'], target.target, e) 111 | 112 | return state 113 | 114 | def finish(self, state: TaskStateDeployAttack): 115 | """ 116 | 结束任务 TODO 117 | """ 118 | return state 119 | 120 | def edge_shuld_finish(self, state: TaskStateDeployAttack): 121 | """ 122 | 条件边 123 | """ 124 | 125 | for target in state['targets']: 126 | if target.status in [StateDeployAttack.REWORK]: 127 | return 'rework' 128 | return 'pass' 129 | 130 | def _load_attack_plan(self, task_id: int) -> [Target]: 131 | targets = [] 132 | with self.db.DBSession() as session: 133 | wfs = session.query(Workflow).filter( 134 | and_( 135 | Workflow.task_id == task_id, 136 | Workflow.work == workflow_attack_plan.WORK, 137 | Workflow.status == workflow_attack_plan.StatePlanReview.PASS 138 | ) 139 | ).all() 140 | for wf in wfs: 141 | targets.append( 142 | Target( 143 | db=self.db, 144 | task_id=wf.task_id, 145 | target=wf.data['target'], 146 | asset=wf.data['asset'], 147 | plan=wf.data['plan'] 148 | ) 149 | ) 150 | return targets 151 | 152 | 153 | class WorkFlowDeployAttack: 154 | app: CompiledGraph | None = None 155 | debug: bool = False 156 | team: Team | None = None 157 | db: DB | None = None 158 | 159 | def __init__(self, db: DB, team: Team, debug: Optional[bool] = None): 160 | self.db = db 161 | self.team = team 162 | self.debug = debug 163 | 164 | nodes = TaskNodesDeployAttack(db, team) 165 | workflow = StateGraph(TaskStateDeployAttack) 166 | workflow.add_node('init_task', nodes.init_task) 167 | workflow.add_node('deploy', nodes.deploy) 168 | workflow.add_node('attack', nodes.attack) 169 | workflow.add_node('finish', nodes.finish) 170 | 171 | workflow.set_entry_point('init_task') 172 | workflow.set_finish_point('finish') 173 | 174 | workflow.add_edge('init_task', 'deploy') 175 | workflow.add_edge('deploy', 'attack') 176 | 177 | workflow.add_conditional_edges( 178 | source='attack', 179 | path=nodes.edge_shuld_finish, 180 | path_map={ 181 | 'rework': 'deploy', 182 | 'pass': 'finish' 183 | } 184 | ) 185 | 186 | self.app = workflow.compile(debug=debug) 187 | 188 | def run(self, taskid: int): 189 | state = { 190 | 'task_id': taskid, 191 | 'targets': [] 192 | } 193 | 194 | with self.db.DBSession() as session: 195 | wfs = session.query(Workflow).filter( 196 | and_( 197 | Workflow.task_id == taskid, 198 | Workflow.work == WORK, 199 | Workflow.status != StateDeployAttack.FINISH 200 | ) 201 | ).all() 202 | for wf in wfs: 203 | state['targets'].append( 204 | Target( 205 | db=self.db, 206 | task_id=wf.task_id, 207 | target=wf.data['target'], 208 | asset=wf.data['asset'], 209 | plan=wf.data['plan'], 210 | workflow_id=wf.id, 211 | status=StateDeployAttack(wf.status) 212 | ) 213 | ) 214 | 215 | self.app.invoke(state) 216 | --------------------------------------------------------------------------------