├── .gitignore
├── README.md
├── bladerazor.py
├── cdn_servers.yaml
├── cmd
    ├── knowledge_import.py
    └── run.py
├── config.py
├── exploits
    ├── __init__.py
    ├── attack_surface_research.py
    ├── deploy_attack.py
    ├── scanner
    │   ├── __init__.py
    │   └── nuclei_scanner_tool.py
    ├── vul_scan_expert.py
    └── web
    │   ├── __init__.py
    │   ├── curl_tool.py
    │   ├── gobuster_dir_tool.py
    │   ├── host_crawler_tool.py
    │   └── html_parse_info_tool.py
├── helpers
    ├── __init__.py
    ├── alienvault_api.py
    ├── crawler.py
    ├── fingers
    │   ├── __init__.py
    │   ├── assets
    │   │   ├── arl_finger.json
    │   │   ├── custom.json
    │   │   ├── finger.json
    │   │   └── web_fingerprint_v3.json
    │   └── web_fingers.py
    ├── fofa_api.py
    ├── gobuster.py
    ├── html_information_leak_analyze.py
    ├── masscan.py
    ├── nmap.py
    ├── nuclei.py
    ├── security_trails_api.py
    └── utils.py
├── persistence
    ├── __init__.py
    ├── database.py
    ├── orm.py
    └── vectordb.py
├── rag
    ├── __init__.py
    ├── rag.py
    └── rag_search_tool.py
├── recon
    ├── __init__.py
    ├── active
    │   ├── __init__.py
    │   ├── masscan_search_tool.py
    │   └── nmap_search_tool.py
    ├── cyber_assets_researcher.py
    └── passive
    │   ├── __init__.py
    │   ├── alienvault_search_tool.py
    │   ├── cdn_check.py
    │   ├── fofa_search_tool.py
    │   └── security_trails_search_tool.py
├── requirements.txt
├── team.py
├── workflow_attack_plan.py
├── workflow_deploy_attack.py
└── workflow_pre_attack.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | ### JetBrains template
  2 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
  3 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  4 | 
  5 | # User-specific stuff
  6 | .idea
  7 | .idea/
  8 | .idea/*
  9 | .idea/**/workspace.xml
 10 | .idea/**/tasks.xml
 11 | .idea/**/usage.statistics.xml
 12 | .idea/**/dictionaries
 13 | .idea/**/shelf
 14 | 
 15 | # AWS User-specific
 16 | .idea/**/aws.xml
 17 | 
 18 | # Generated files
 19 | .idea/**/contentModel.xml
 20 | 
 21 | # Sensitive or high-churn files
 22 | .idea/**/dataSources/
 23 | .idea/**/dataSources.ids
 24 | .idea/**/dataSources.local.xml
 25 | .idea/**/sqlDataSources.xml
 26 | .idea/**/dynamic.xml
 27 | .idea/**/uiDesigner.xml
 28 | .idea/**/dbnavigator.xml
 29 | 
 30 | # Gradle
 31 | .idea/**/gradle.xml
 32 | .idea/**/libraries
 33 | 
 34 | # Gradle and Maven with auto-import
 35 | # When using Gradle or Maven with auto-import, you should exclude module files,
 36 | # since they will be recreated, and may cause churn.  Uncomment if using
 37 | # auto-import.
 38 | # .idea/artifacts
 39 | # .idea/compiler.xml
 40 | # .idea/jarRepositories.xml
 41 | # .idea/modules.xml
 42 | # .idea/*.iml
 43 | # .idea/modules
 44 | # *.iml
 45 | # *.ipr
 46 | 
 47 | # CMake
 48 | cmake-build-*/
 49 | 
 50 | # Mongo Explorer plugin
 51 | .idea/**/mongoSettings.xml
 52 | 
 53 | # File-based project format
 54 | *.iws
 55 | 
 56 | # IntelliJ
 57 | out/
 58 | 
 59 | # mpeltonen/sbt-idea plugin
 60 | .idea_modules/
 61 | 
 62 | # JIRA plugin
 63 | atlassian-ide-plugin.xml
 64 | 
 65 | # Cursive Clojure plugin
 66 | .idea/replstate.xml
 67 | 
 68 | # SonarLint plugin
 69 | .idea/sonarlint/
 70 | 
 71 | # Crashlytics plugin (for Android Studio and IntelliJ)
 72 | com_crashlytics_export_strings.xml
 73 | crashlytics.properties
 74 | crashlytics-build.properties
 75 | fabric.properties
 76 | 
 77 | # Editor-based Rest Client
 78 | .idea/httpRequests
 79 | 
 80 | # Android studio 3.1+ serialized cache file
 81 | .idea/caches/build_file_checksums.ser
 82 | 
 83 | ### VirtualEnv template
 84 | # Virtualenv
 85 | # http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
 86 | .Python
 87 | [Bb]in
 88 | [Ii]nclude
 89 | [Ll]ib
 90 | [Ll]ib64
 91 | [Ll]ocal
 92 | [Ss]cripts
 93 | pyvenv.cfg
 94 | .venv
 95 | pip-selfcheck.json
 96 | 
 97 | ### macOS template
 98 | # General
 99 | .DS_Store
100 | .AppleDouble
101 | .LSOverride
102 | 
103 | # Icon must end with two \r
104 | Icon
105 | 
106 | # Thumbnails
107 | ._*
108 | 
109 | # Files that might appear in the root of a volume
110 | .DocumentRevisions-V100
111 | .fseventsd
112 | .Spotlight-V100
113 | .TemporaryItems
114 | .Trashes
115 | .VolumeIcon.icns
116 | .com.apple.timemachine.donotpresent
117 | 
118 | # Directories potentially created on remote AFP share
119 | .AppleDB
120 | .AppleDesktop
121 | Network Trash Folder
122 | Temporary Items
123 | .apdisk
124 | 
125 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Blade Razor 刃影
  2 | 
  3 | 由人工智能驱动的渗透测试解决方案
  4 | 
  5 | An AI-Driven Pentesting Solution.
  6 | 
  7 | 资产侦察工具借鉴了 [OneForAll](https://github.com/shmilylty/OneForAll)
  8 | 
  9 | ## 说明
 10 | 
 11 | 本项目初衷只用于验证AI在渗透测试中的应用。
 12 | 
 13 | 渗透测试本质上是基于网络安全人员的经验和知识，利用各种工具和技术对目标系统进行安全评估。
 14 | 
 15 | 而LLM以及langchain这类ai agent的出现，是我们可以利用大模型的能力，来具体实施一些事情。
 16 | 
 17 | 经过验证，LLM在渗透测试中的应用是可行的，以下四点对最终效果有决定性影响：
 18 | 
 19 | 1. 大模型的选择
 20 |    只要参数大即可，无需过于关注模型是否针对特定领域进行过微调，特定领域的知识，用rag可以解决。
 21 | 2. RAG
 22 |    相当于脱离LLM的外挂知识库，只要私有知识库足够完善，其“经验”就会足够丰富。
 23 | 3. 提示词
 24 |    通过提示词模拟出各种角色，如黑客、安全研究员、安全工程师等，这样可以让LLM更好的理解我们的需求。
 25 | 4. 外部工具
 26 |    武器库越丰富肯定越好
 27 | 
 28 | ## TODO
 29 | 
 30 | 待办事项过多...
 31 | 
 32 | ## 使用
 33 | 
 34 |     # 创建一个 python 3.12 虚拟环境, 你也可以用 virtualenv
 35 |     conda create -n bladerazor python=3.12.3
 36 |     conda activate bladerazor
 37 |     
 38 |     # 更新
 39 |     conda update --all
 40 |     pip install pip-review
 41 |     pip-review --local --auto
 42 | 
 43 |     # 安装依赖
 44 |     pip install -r requirements.txt
 45 | 
 46 | ## 环境变量
 47 | 
 48 | LLM模型相关的配置遵循langchain的配置方式，具体参考[langchain](https://python.langchain.com/v0.1/docs/integrations/llms/)
 49 | 
 50 | ```
 51 | OPENAI_API_BASE=https://xxx # 如果使用ollama等可以修改这里
 52 | OPENAI_API_KEY=xxxx
 53 | OPENAI_MODEL_NAME=gpt-4o
 54 | 
 55 | # crewai遥测，最好关闭
 56 | OTEL_SDK_DISABLED=true
 57 | 
 58 | 
 59 | FOFA_API_KEY=xxxxxxxx
 60 | FOFA_EMAIL=fofauser@emailaddress
 61 | FOFA_VERSION=ent
 62 | 
 63 | GOBUSTER_PATH=/your/gobuster/path/folder
 64 | GOBUSTER_WORDLIST_PATH=/your/wordlists/small.txt
 65 | 
 66 | NUCLEI_PATH=/your/nuclei/path/folder
 67 | NUCLEI_TEMPLATES_PATH=/your/nuclei/path/folder/templates
 68 | 
 69 | SECURITYTRAILS_API_KEY=xxxxxxxx
 70 | ```
 71 | 
 72 | ## 导入知识库
 73 | 
 74 | 工具为`cmd/knowledge.py`，默认使用openai的嵌入式模型，请参阅源码。
 75 | 
 76 | ## 执行
 77 | 
 78 | 目前代码在`bladerazor.py`，请参阅源码。
 79 | 
 80 | ## 数据库
 81 | 
 82 |     docker run --name bladerazor-pg \
 83 |         -e POSTGRES_USER=bladerazor \
 84 |         -e POSTGRES_PASSWORD=123456 \
 85 |         -e POSTGRES_DB=bladerazor \
 86 |         -p 15432:5432 \
 87 |         -d pgvector/pgvector:pg16
 88 | 
 89 | ## LLM
 90 | 
 91 | 本地模型受制于硬件，ollma或者lmstudio都没有完美的跑起来
 92 | 
 93 | 目前只测试了以下模型:
 94 | 
 95 | | LLM           | 效果 | 推荐   |
 96 | |---------------|----|------|
 97 | | gpt-3.5-turbo | 可用 | ⭐⭐⭐  | 
 98 | | gpt-4o        | 可用 | ⭐⭐⭐⭐ | 
 99 | 
100 | ## 工作机制
101 | 
102 | ```mermaid
103 | ---
104 | title: 预攻击阶段
105 | ---
106 | stateDiagram-v2
107 |     state "资产侦察" as Recon
108 |     state "资产测绘" as AssetMapping
109 |     state "端口扫描" as PortScan
110 |     state "漏扫" as VulScan
111 |     state "目录枚举" as DirectoryBruteforcing
112 |     state new_recon_assets_state <<choice>>
113 |     state new_mapping_assets_state <<choice>>
114 |     [*] --> Recon
115 |     Recon --> new_recon_assets_state
116 |     new_recon_assets_state --> Recon: 有新资产
117 |     new_recon_assets_state --> AssetMapping: 无新资产
118 |     AssetMapping --> new_mapping_assets_state
119 |     new_mapping_assets_state --> Recon: 有新资产
120 |     new_mapping_assets_state --> PortScan: 无新资产
121 |     PortScan --> VulScan
122 |     VulScan --> DirectoryBruteforcing
123 |     DirectoryBruteforcing --> [*]
124 | ```
125 | 
126 | ```mermaid
127 | ---
128 | title: 攻击阶段
129 | ---
130 | stateDiagram-v2
131 |     state "情报分析" as IntelligenceAnalysis
132 |     state "攻击面分析" as AttackSurfaceResearch
133 |     state "打点研究" as EstablishingFootholdResearch
134 |     state "审核攻击计划" as AttackPlanReview
135 |     state "部署并实施攻击" as DeployAndExecuteTheAttack
136 |     state attack_plan_review_state <<choice>>
137 |     [*] --> IntelligenceAnalysis
138 |     IntelligenceAnalysis --> AttackSurfaceResearch
139 |     AttackSurfaceResearch --> EstablishingFootholdResearch
140 |     EstablishingFootholdResearch --> AttackPlanReview
141 |     AttackPlanReview --> attack_plan_review_state
142 |     attack_plan_review_state --> EstablishingFootholdResearch: 重做
143 |     attack_plan_review_state --> [*]: 否决
144 |     attack_plan_review_state --> DeployAndExecuteTheAttack: 通过
145 |     DeployAndExecuteTheAttack --> [*]
146 | ```
147 | 


--------------------------------------------------------------------------------
/bladerazor.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import opentelemetry.sdk.trace
  5 | from embedchain.config import BaseLlmConfig
  6 | from embedchain.embedder.openai import OpenAIEmbedder
  7 | from embedchain.llm.openai import OpenAILlm
  8 | from sqlalchemy import func, and_
  9 | 
 10 | from exploits.attack_surface_research import AttackSurfaceResearch
 11 | from workflow_attack_plan import WorkFlowAttackPlan
 12 | from workflow_deploy_attack import WorkFlowDeployAttack
 13 | from workflow_pre_attack import WorkFlowPreAttack
 14 | from persistence.vectordb import NewEmbedChain
 15 | from rag.rag import RAG
 16 | from rag.rag_search_tool import RagSearchTool
 17 | from team import Team
 18 | 
 19 | opentelemetry.sdk.trace.logger.setLevel(logging.CRITICAL)
 20 | from langchain_openai import ChatOpenAI
 21 | 
 22 | from persistence.database import DB
 23 | from persistence.orm import PenTestTask, Vul, WebInfo, Port
 24 | from recon.cyber_assets_researcher import CyberAssetsResearchers
 25 | 
 26 | if __name__ == '__main__':
 27 |     # TODO 移至 cmd 目录
 28 |     # PROXY_SOCKS = "http://localhost:1080"
 29 |     # os.environ['http_proxy'] = PROXY_SOCKS
 30 |     # os.environ['HTTP_PROXY'] = PROXY_SOCKS
 31 |     # os.environ['https_proxy'] = PROXY_SOCKS
 32 |     # os.environ['HTTPS_PROXY'] = PROXY_SOCKS
 33 |     # os.environ["OPENAI_API_KEY"] = OPENAI_KEY
 34 |     # os.environ["OPENAI_MODEL_NAME"] = "gpt-3.5-turbo"
 35 | 
 36 |     debug = True
 37 |     # 初始化数据库
 38 |     db = DB(
 39 |         user='bladerazor',
 40 |         password='123456',
 41 |         host='localhost',
 42 |         port=15432,
 43 |         dbname='bladerazor',
 44 |         # echo=debug,
 45 |     )
 46 | 
 47 |     # 初始化RAG
 48 |     embder = OpenAIEmbedder()
 49 |     rag = RAG(db=db, embder=embder)
 50 | 
 51 |     # ragtool = RagSearchTool(rag)
 52 | 
 53 |     # 初始化LLM
 54 |     llm = ChatOpenAI(
 55 |         temperature=0.9,
 56 |         # max_tokens=16385,
 57 |         # http_client=httpx.Client(proxy=os.environ['PROXY']),
 58 |         # http_client=httpx.AsyncClient(proxy=os.environ['PROXY']),
 59 |     )
 60 | 
 61 |     # 初始化团队
 62 |     team = Team(
 63 |         db=db,
 64 |         rag=rag,
 65 |         llm=llm,
 66 |         debug=debug,
 67 |         nmap_path=os.getenv('NMAP_PATH'),
 68 |         nuclei_path=os.environ['NUCLEI_PATH'],
 69 |         nuclei_templates_path=os.environ['NUCLEI_TEMPLATES_PATH'],
 70 |         gobuster_path=os.environ['GOBUSTER_PATH'],
 71 |         gobuster_wordlist_path=os.environ['GOBUSTER_WORDLIST_PATH'],
 72 |     )
 73 | 
 74 |     # 目标
 75 |     target = 'https://www.example.com/'
 76 |     print('target', target)
 77 |     task_id = 0
 78 | 
 79 |     with db.DBSession() as session:
 80 |         task = PenTestTask()
 81 |         task.target = target
 82 |         task.name = target
 83 |         session.add(task)
 84 |         session.commit()
 85 |         task_id = task.id
 86 | 
 87 | 
 88 | 
 89 |     # 工作流 - 预攻击
 90 |     workflow = WorkFlowPreAttack(
 91 |         db=db,
 92 |         team=team,
 93 |         debug=debug
 94 |     )
 95 |     workflow.run(task_id, target)
 96 | 
 97 |     # 工作流 - 攻击计划
 98 |     workflowAttack = WorkFlowAttackPlan(
 99 |         db=db,
100 |         team=team,
101 |         debug=debug
102 |     )
103 | 
104 |     workflowAttack.run(task_id)
105 | 
106 |     # 工作流 - 部署攻击
107 |     workflowDeployAttack = WorkFlowDeployAttack(
108 |         db=db,
109 |         team=team,
110 |         debug=debug
111 |     )
112 | 
113 |     workflowDeployAttack.run(task_id)
114 | 


--------------------------------------------------------------------------------
/cmd/knowledge_import.py:
--------------------------------------------------------------------------------
 1 | from embedchain.embedder.openai import OpenAIEmbedder
 2 | 
 3 | from persistence.database import DB
 4 | from rag.rag import RAG
 5 | 
 6 | 
 7 | def add_knowledge():
 8 |     print('add_knowledge')
 9 |     pass
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     # TODO 修改为命令行模式
14 |     db = DB(
15 |         user='bladerazor',
16 |         password='123456',
17 |         host='localhost',
18 |         port=15432,
19 |         dbname='bladerazor',
20 |         # echo=debug,
21 |     )
22 | 
23 |     # 默认嵌入式模型为 text-embedding-ada-002
24 |     embder = OpenAIEmbedder()
25 | 
26 |     rag = RAG(db=db, embder=embder)
27 |     # 添加目录，例如某个从git克隆的知识库
28 |     rag.add_knowledge_folder('/my/knowledge/folder')
29 |     # 添加url，例如某个在线的知识
30 |     rag.add_knowledge_url("urlfor://online/sec/knowledge")


--------------------------------------------------------------------------------
/cmd/run.py:
--------------------------------------------------------------------------------
1 | if __name__ == '__main__':
2 |     # TODO 程序入口
3 |     pass
4 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from loguru import logger
 4 | 
 5 | stdout_fmt = '<cyan>{time:HH:mm:ss,SSS}</cyan> ' \
 6 |              '[<level>{level: <5}</level>] ' \
 7 |              '<blue>{module}</blue>:<cyan>{line}</cyan> - ' \
 8 |              '<level>{message}</level>'
 9 | 
10 | logger.remove()
11 | logger.level(name='TRACE', color='<cyan><bold>')
12 | logger.level(name='DEBUG', color='<blue><bold>')
13 | logger.level(name='INFOR', no=20, color='<green><bold>')
14 | logger.level(name='QUITE', no=25, color='<green><bold>')
15 | logger.level(name='ALERT', no=30, color='<yellow><bold>')
16 | logger.level(name='ERROR', color='<red><bold>')
17 | logger.level(name='FATAL', no=50, color='<RED><bold>')
18 | 
19 | logger.add(sys.stderr, level='DEBUG', format=stdout_fmt, enqueue=True)
20 | 


--------------------------------------------------------------------------------
/exploits/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/exploits/__init__.py


--------------------------------------------------------------------------------
/exploits/attack_surface_research.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | from crewai import Agent, Task, Crew
  4 | from crewai_tools import BaseTool
  5 | 
  6 | from persistence.database import DB
  7 | from config import logger
  8 | from rag.rag import RAG
  9 | from rag.rag_search_tool import RagSearchTool
 10 | 
 11 | 
 12 | class AttackSurfaceResearch:
 13 |     """
 14 |     攻击面研究
 15 |     根据已探明的网络资产，进行漏洞分析与利用。
 16 |     """
 17 | 
 18 |     def __init__(self, db: DB, rag: RAG, llm=None, verbose: bool = False):
 19 |         self.llm = llm
 20 |         self.db = db
 21 |         self.rag = rag
 22 |         self.verbose = verbose
 23 | 
 24 |     def _getKnowledgeTools(self) -> []:
 25 |         tools = []
 26 |         tools.append(RagSearchTool(self.rag, self.llm, self.verbose))
 27 |         return tools
 28 | 
 29 |     def task_information_leak_research(self, agent: Agent, assets: str) -> Task:
 30 |         logger.info("初始化任务 信息泄露研究")
 31 |         return Task(
 32 |             agent=agent,
 33 |             description=dedent(
 34 |                 f"""                  
 35 |                 识别并分析有用的实体，需要注意以下几点：
 36 |                 * 实体可能是敏感信息，如密码、密钥等。
 37 |                 * 实体可能是系统信息，如版本信息、路径信息等。
 38 |                 * 实体可能是配置信息，如数据库配置、系统配置等。
 39 |                 * 实体可能是用户信息，如用户名等。
 40 |         
 41 |                 请特别留意以下资产：
 42 |                 {assets}
 43 |                 """),
 44 |             expected_output=dedent(
 45 |                 """
 46 |                 最终答案应包括你发现的所有敏感信息的列表。
 47 |                 只输出已发现的敏感信息。
 48 |                 不要将原始输入作为输出。
 49 |                 确保信息分类明确，例如：密码、密钥、版本信息、路径信息、数据库配置、系统配置、用户名等。
 50 |                 """),
 51 |         )
 52 | 
 53 |     def agent_intelligence_analysis_expert(self, llm=None) -> Agent:
 54 |         logger.info("初始化代理 情报分析专家")
 55 |         agent = Agent(
 56 |             role='情报分析专家',
 57 |             goal='根据目标的所有信息，提取有用的情报，为后续工作提供支持',
 58 |             backstory=dedent(
 59 |                 """
 60 |                 你是一名经验丰富的情报分析专家，隶属于攻击团队。
 61 |                 你的主要任务是利用目标的所有已知信息，查找对工作有利的情报。
 62 |                 你擅长发现潜在的情报，如敏感信息、漏洞信息等，这些信息有助于了解系统结构并找到潜在漏洞。
 63 |         
 64 |                 以下是需要特别注意的方面，包括但不限于：
 65 |                 * 源代码
 66 |                 * 注释
 67 |                 * 隐藏字段
 68 |                 * 错误信息
 69 |                 * 错误页面中的堆栈跟踪信息
 70 |                 * 版本信息
 71 |                 * 文件路径
 72 |                 * 电子邮件地址
 73 |                 * IP地址
 74 |                 * Meta标签
 75 |                 * SQL报错信息
 76 |         
 77 |                 通过详细分析这些方面的信息，你将能够提取有用的情报，为团队的后续工作提供关键支持。
 78 |                 """),
 79 |             verbose=self.verbose,
 80 |             allow_delegation=True,
 81 |             max_rpm=300,
 82 |             # max_iter=1,
 83 |             llm=llm,
 84 |             cache=False,
 85 |         )
 86 | 
 87 |         if llm is not None:
 88 |             agent.llm = llm
 89 |         return agent
 90 | 
 91 |     def intelligenceAnalysisCrew(self, target: str):
 92 |         agents = []
 93 |         tasks = []
 94 | 
 95 |         agiae = self.agent_intelligence_analysis_expert(self.llm)
 96 |         agents.append(agiae)
 97 |         tasks.append(self.task_information_leak_research(agiae, target))
 98 | 
 99 |         logger.info("初始化智能体 情报分析")
100 |         return Crew(
101 |             agents=agents,
102 |             tasks=tasks,
103 |             verbose=self.verbose,
104 |             share_crew=False,
105 |             cache=False
106 |         )
107 | 
108 |     def task_attack_surface_research(self, agent: Agent, target: str, intelligence: str) -> Task:
109 |         logger.info("初始化任务 攻击面研究")
110 |         return Task(
111 |             agent=agent,
112 |             description=dedent(
113 |                 f"""
114 |                 对目标的技术栈、指纹信息、暴露的端口服务、漏扫结果以及掌握的情报进行深入分析研究，以找到所有可能的攻击面。
115 | 
116 |                 你应当遵循以下思路：
117 |                 1. 目标是CMS还是程序？例如WordPress或phpMyAdmin。
118 |                     确定目标类型后，才能找到对应的漏洞。
119 |                 2. 哪些前置代理了流量？例如CDN、Nginx或Apache。
120 |                     有些程序自带前置（如Go），有些程序需要前置代理（如PHP需要Nginx或Apache）。
121 |                     如果存在前置，那么需要分别找漏洞，甚至有些漏洞需要前置与后端配合。
122 |                 3. 有哪些情报可以利用？
123 |                     如果页面中包含一些敏感信息（如报错信息、注释、路径等），这些信息有助于推测目标程序及版本。
124 |         
125 |                 按上述思路操作，避免发生低级错误（如在PHP的目标上找Java的漏洞）。
126 |         
127 |                 需要注意的方面包括但不限于以下内容：
128 |                 * 对于已明确指纹信息的目标，根据指纹信息找到对应的漏洞。
129 |                 * 不要错误地将其他产品的漏洞应用到目标上。
130 |                 * 注意产品一致性，例如Microsoft Office的漏洞不适用于WordPress，Windows的漏洞不适用于Linux。
131 |                 * 确保漏洞与目标匹配，例如Java框架的漏洞不适用于PHP框架。
132 |                 * 考虑漏洞的利用条件，例如是否需要登录、是否需要特定的请求等。
133 |                 * 描述漏洞的利用方法，例如是否需要上传文件、是否需要执行命令等。
134 |                 * 评估漏洞的风险等级，例如是否可以获取root权限、是否可以获取敏感信息等。
135 |                 * 考虑漏洞的利用复杂度，例如是否需要特定的环境、是否需要特定的工具等。
136 |                 * 如果前置有CDN，可能存在WAF，需要考虑绕过WAF的方法或找到原始服务器的方法。
137 |                 * 对于配置不当造成的安全威胁，仅关注对目标造成实质性影响的漏洞（如信息泄漏、XSS攻击等）。
138 |         
139 |                 使用错误的漏洞攻击已确定程序（如使用PHP的漏洞攻击Java程序）将会造成灾难性后果！
140 |         
141 |                 目标：
142 |                 {target}
143 |         
144 |                 已掌握的情报：
145 |                 {intelligence}
146 |                 """),
147 |             expected_output=dedent(
148 |                 """
149 |                 最终答案应包括你发现的所有攻击面列表，并根据严重程度排序，越详细越好。
150 |                 确保每个攻击面都包含以下信息：
151 |                 * 漏洞点，具体用于攻击的点，如url地址
152 |                 * 攻击步骤
153 |                 * 攻击方法
154 |                 * 使用工具
155 |                 * 攻击风险
156 |                 * 可能的漏洞利用条件和方法
157 |                 * 漏洞的风险等级和利用复杂度
158 |                 """),
159 |         )
160 | 
161 |     def task_establishing_foothold_research_rework(self, agent: Agent, plan: str,
162 |                                                    review: str) -> Task:
163 |         logger.info("初始化任务 打点研究返工")
164 |         return Task(
165 |             agent=agent,
166 |             description=dedent(
167 |                 f"""
168 |                 根据攻击计划的审核结果，重新进行研究，确保所有潜在攻击面都得到充分考虑。
169 | 
170 |                 攻击计划:
171 |                 {plan}
172 |         
173 |                 审核结果:
174 |                 {review}
175 |         
176 |                 你需要进行以下步骤：
177 |                 1. 分析审核结果，找出需要修改或补充的部分。
178 |                 2. 重新评估每个攻击面，确保考虑到所有可能的漏洞。
179 |                 3. 对每个攻击面进行详细研究，找出可能的漏洞利用方法。
180 |                 4. 根据严重程度对漏洞进行排序，确保最关键的漏洞优先处理。
181 |                 5. 确保研究结果详细，包括漏洞的利用条件、方法、工具、风险等级和利用复杂度。
182 |         
183 |                 避免低级错误，例如将不适用于目标系统的漏洞应用到目标上。 
184 |                 """),
185 |             expected_output=dedent(
186 |                 """
187 |                 最终答案应包括每个攻击面的漏洞利用方法，并根据严重程度排序，越详细越好。你是攻击队的一员，不需要提供修复建议，不要编造答案。
188 |         
189 |                 确保每个漏洞描述包括：
190 |                 * 攻击步骤
191 |                 * 攻击方法
192 |                 * 使用工具
193 |                 * 攻击风险
194 |                 * 漏洞的利用条件和方法
195 |                 * 漏洞的风险等级和利用复杂度
196 |                 """),
197 |         )
198 | 
199 |     def task_establishing_foothold_research(self, agent: Agent) -> Task:
200 |         logger.info("初始化任务 打点研究")
201 |         return Task(
202 |             agent=agent,
203 |             description=dedent(
204 |                 f"""
205 |                 根据攻击面列表，结合本地知识库，为每一个攻击面找到所有攻击方法。
206 | 
207 |                 请遵循以下原则：
208 |                 * 不需要技术检测类的方案，如指纹识别等。
209 |                 * 对于已明确指纹信息的目标，需要根据指纹信息找到对应的漏洞。
210 |                 * 不要错误地将其他产品的漏洞应用到目标上。
211 |                 * 注意产品一致性，例如Microsoft Office的漏洞不适用于WordPress，Windows的漏洞不适用于Linux。
212 |                 * 确保漏洞与目标匹配，例如Java框架的漏洞不适用于PHP框架。
213 |                 * 考虑漏洞的利用条件，例如是否需要登录、是否需要特定的请求等。
214 |                 * 描述漏洞的利用方法，例如是否需要上传文件、是否需要执行命令等。
215 |                 * 评估漏洞的风险等级，例如是否可以获取root权限、是否可以获取敏感信息等。
216 |                 * 考虑漏洞的利用复杂度，例如是否需要特定的环境、是否需要特定的工具等。
217 |                 * 如果前置有CDN，可能存在WAF，需要考虑绕过WAF的方法或找到原始服务器的方法。
218 |                 * 对于配置不当造成的安全威胁，仅关注对目标造成实质性影响的漏洞（如信息泄漏、XSS攻击等）。
219 |         
220 |                 你应当确认每个漏洞的类型、风险等级以及利用复杂度。
221 |         
222 |                 避免低级错误，例如在已确定技术栈的程序中使用错误的漏洞（如使用PHP的漏洞攻击Java程序），这将会造成灾难性后果！
223 |                 """),
224 |             expected_output=dedent(
225 |                 """
226 |                 最终答案应包括每个攻击面的漏洞利用方法，并根据严重程度排序，越详细越好。你是攻击队的一员，不需要提供修复建议，不要编造答案。
227 | 
228 |                 确保每个漏洞描述包括：
229 |                 * 攻击步骤
230 |                 * 攻击方法
231 |                 * 使用工具
232 |                 * 攻击风险
233 |                 * 漏洞的利用条件和方法
234 |                 * 漏洞的风险等级和利用复杂度
235 |                 """),
236 |         )
237 | 
238 |     def agent_establishing_foothold_research_expert(self, llm=None, tools: [BaseTool] = []) -> Agent:
239 |         logger.info("初始化代理 打点研究专家")
240 |         agent = Agent(
241 |             role='打点研究专家',
242 |             goal='根据目标的已知情报，结合本地知识库，研究并制定一套渗透方案',
243 |             backstory=dedent(
244 |                 """
245 |                 你是一名经验丰富的打点研究专家，隶属于攻击团队。
246 |                 你的主要任务是利用已知的各种情报，对目标制定一套详细且可执行的渗透方案。
247 |                 方案中不需要给出修复建议，而是专注于攻击策略的制定。
248 |         
249 |                 任务要求：
250 |                 1. 利用RAG搜索工具，围绕目标的技术栈进行深入搜索。
251 |                 2. 详细描述每一步攻击步骤、方法、工具以及可能的攻击风险。
252 |                 3. 输出的攻击方案需要包括：
253 |                     - 攻击路径的选择
254 |                     - 可能的攻击入口点
255 |                     - 每个步骤的具体执行方法
256 |                     - 所需工具及其使用方法
257 |                     - 潜在的风险评估及规避策略
258 |                     
259 |                 使用搜索工具如RAG时应注意以下几点：
260 |                 1. 应使用详细的关键词，针对性地搜索与目标技术栈相关的漏洞和配置缺陷。
261 |                 2. 避免查询与目标明显不相关的内容，如特定的IP、域名或无关的技术栈。
262 |                 3. 系统性的审核这些工具的输出，确保每项结果都与实际的安全目标直接相关。
263 |                     审核过程中应遵循的步骤包括：
264 |                     1. 核实每一条输出结果是否与搜索关键词直接相关。任何与查询技术栈不符的结果都应被标记为不相关。
265 |                     2. 识别和记录结果中的任何可能误导的信息，如错误的技术细节或与查询不符的上下文。
266 |                     3. 对每个不相关或误导性的结果提供简要的分析说明，解释为什么这些结果不适合当前的搜索需求。
267 |                     4. 提出修改搜索策略的建议，如更换或细化关键词，以提高未来搜索的相关性和效果。
268 |                     
269 |                     示例：
270 |                         搜索关键词：'Apache Struts OGNL injection'
271 |                         搜索结果：'Apache Struts version 2.3.31 RCE vulnerability details'
272 |                         审核步骤：确认搜索结果与关键词的相关性，验证漏洞详情的准确性和目标系统的适用性。
273 |                     
274 |                         如果结果涉及到不相关的框架，如'.NET framework vulnerability'，则标记为不相关，并在报告中指出。
275 |         
276 |                 你的工作至关重要，攻击方案的质量将直接影响团队后续工作的开展和成功率。
277 |                 
278 |                 如果无法找到合适的攻击方案，直接输出字符串`FAIL`。
279 |                 """),
280 |             tools=tools,
281 |             verbose=self.verbose,
282 |             allow_delegation=True,
283 |             max_rpm=300,
284 |             # max_iter=1,
285 |             llm=llm,
286 |             cache=False,
287 |         )
288 | 
289 |         if llm is not None:
290 |             agent.llm = llm
291 |         return agent
292 | 
293 |     def establishingFootholdResearchCrew(self, target: str, intelligence: str):
294 |         agents = []
295 |         tasks = []
296 |         tools = self._getKnowledgeTools()
297 | 
298 |         ag = self.agent_establishing_foothold_research_expert(self.llm, tools)
299 |         agents.append(ag)
300 | 
301 |         tasks.append(self.task_attack_surface_research(ag, target, intelligence))
302 |         tasks.append(self.task_establishing_foothold_research(ag))
303 | 
304 |         logger.info("初始化智能体 打点研究")
305 |         return Crew(
306 |             agents=agents,
307 |             tasks=tasks,
308 |             verbose=self.verbose,
309 |             share_crew=False,
310 |             cache=False
311 |         )
312 | 
313 |     def task_attack_plan_review(self, agent: Agent, assets: str, intelligence: str, plan: str) -> Task:
314 |         logger.info("初始化任务 攻击方案审核")
315 |         return Task(
316 |             agent=agent,
317 |             description=dedent(
318 |                 f"""
319 |                 结合本地知识库，对攻击面与攻击计划进行审核。
320 |                 你的任务是审核攻击面与攻击计划是否匹配。
321 |         
322 |                 审核思路：
323 |                 1. 目标的技术栈是否明确？
324 |                     确认方案是否按照已知情报正确识别出目标的程序（如WordPress或phpMyAdmin）。
325 |                 2. 使用的漏洞是否匹配目标技术栈？
326 |                     确认方案是否使用了与目标技术栈不匹配的漏洞（如使用PHP的漏洞攻击Java程序）。
327 |                     如果使用了不匹配的漏洞，请移除相应方案。
328 |         
329 |                 基本原则：
330 |                 * 攻击方案可能很长，不要因为一点而全盘否定。
331 |                 * 不要因为攻击面缺失实体而否定攻击方案。
332 |                 * 确保漏洞与目标匹配（如Java框架的漏洞不适用于PHP框架）。
333 |                 * 注意漏洞的利用条件（如操作系统是否与目标匹配）。
334 |                 * 不能遗漏高危攻击面。
335 |                 * 不要混淆目标实体与漏洞实体。目标实体是你要进行渗透的对象，漏洞实体是漏洞的利用条件、特性以及关联的程序类型等信息的集合。
336 |                 * 如果仅有少部分不合理，可以提出优化建议，而非直接否决。
337 |         
338 |                 避免低级错误，例如对已确定技术栈的程序使用错误的漏洞（如使用PHP的漏洞攻击Java程序），这将会造成灾难性后果。
339 |         
340 |                 审核内容：
341 |                 攻击面：
342 |                 {assets}
343 |         
344 |                 已掌握的情报：
345 |                 {intelligence}
346 |         
347 |                 攻击计划：
348 |                 {plan}
349 |                 """),
350 |             expected_output=dedent(
351 |                 """
352 |                 最终答案应为以下三种类型之一：
353 |                 
354 |                 如果审核通过：
355 |                     只需要回答字符串`PASS`
356 |                 如果不通过且方案完全不可行：
357 |                     以字符串`FAIL`开头，后面跟上不可行的原因
358 |                 如果不通过但需要优化：
359 |                     返回优化后的攻击方案，并以字符串`OPTIMIZE`开头
360 |                 """),
361 |         )
362 | 
363 |     def agent_attack_plan_review_expert(self, llm=None, tools: [BaseTool] = []) -> Agent:
364 |         logger.info("初始化代理 攻击方案审核专家")
365 |         agent = Agent(
366 |             role='攻击方案审核专家',
367 |             goal='根据目标的已知情报，结合本地知识库，对攻击方案进行审核',
368 |             backstory=dedent(
369 |                 """
370 |                 你是一名经验丰富的攻击方案审核专家，隶属于攻击团队。
371 |                 你的主要任务是利用已知的各种情报与知识对攻击方案进行审核。
372 |                 你的工作至关重要，你的工作成果将直接影响后续工作的开展。
373 |                 
374 |                 使用搜索工具如RAG时应注意以下几点：
375 |                 1. 应使用详细的关键词，针对性地搜索与目标技术栈相关的漏洞和配置缺陷。
376 |                 2. 避免查询与目标明显不相关的内容，如特定的IP、域名或无关的技术栈。
377 |                 3. 系统性的审核这些工具的输出，确保每项结果都与实际的安全目标直接相关。
378 |                     审核过程中应遵循的步骤包括：
379 |                     1. 核实每一条输出结果是否与搜索关键词直接相关。任何与查询技术栈不符的结果都应被标记为不相关。
380 |                     2. 识别和记录结果中的任何可能误导的信息，如错误的技术细节或与查询不符的上下文。
381 |                     3. 对每个不相关或误导性的结果提供简要的分析说明，解释为什么这些结果不适合当前的搜索需求。
382 |                     4. 提出修改搜索策略的建议，如更换或细化关键词，以提高未来搜索的相关性和效果。
383 |                     
384 |                     示例：
385 |                         搜索关键词：'Apache Struts OGNL injection'
386 |                         搜索结果：'Apache Struts version 2.3.31 RCE vulnerability details'
387 |                         审核步骤：确认搜索结果与关键词的相关性，验证漏洞详情的准确性和目标系统的适用性。
388 |                     
389 |                         如果结果涉及到不相关的框架，如'.NET framework vulnerability'，则标记为不相关，并在报告中指出。
390 |         
391 |         
392 |                 你深知：
393 |                 * 没有绝对的安全，只有相对的安全。
394 |                 * 一个好的攻击方案是成功的一半。
395 |                 * 防火墙并不是绝对的障碍，总有绕过的方法。
396 |                 * 你的任务是找到最佳和最适合当前情报的攻击方案，而不仅仅是寻找可用的方案。
397 |         
398 |                 在审核过程中，你需要：
399 |                 1. 核实攻击方案是否充分利用已知情报。
400 |                 2. 确保方案的实施方式与目标技术栈兼容。
401 |                 3. 评估方案的可行性，避免使用不匹配或过时的攻击策略。
402 |                 4. 优化方案，以提高攻击成功率和效率。
403 |                 """),
404 |             tools=tools,
405 |             verbose=self.verbose,
406 |             allow_delegation=True,
407 |             max_rpm=300,
408 |             # max_iter=1,
409 |             llm=llm,
410 |             cache=False,
411 |         )
412 | 
413 |         if llm is not None:
414 |             agent.llm = llm
415 |         return agent
416 | 
417 |     def attackPlanReviewCrew(self, assets: str, intelligence: str, plan: str, review: str | None = None):
418 |         agents = []
419 |         tasks = []
420 |         tools = self._getKnowledgeTools()
421 | 
422 |         ag = self.agent_attack_plan_review_expert(self.llm, tools)
423 |         agents.append(ag)
424 | 
425 |         tasks.append(self.task_attack_surface_research(ag, assets, intelligence))
426 |         tasks.append(self.task_establishing_foothold_research(ag))
427 |         tasks.append(self.task_attack_plan_review(ag, assets, intelligence, plan))
428 | 
429 |         if review is not None:
430 |             tasks.append(self.task_establishing_foothold_research_rework(ag, plan, review))
431 | 
432 |         logger.info("初始化智能体 攻击方案审核")
433 |         return Crew(
434 |             agents=agents,
435 |             tasks=tasks,
436 |             verbose=self.verbose,
437 |             share_crew=False,
438 |             cache=True
439 |         )
440 | 


--------------------------------------------------------------------------------
/exploits/deploy_attack.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | from crewai import Agent, Task, Crew
  4 | from crewai_tools import BaseTool
  5 | 
  6 | from exploits.web.curl_tool import CurlTool
  7 | from persistence.database import DB
  8 | from rag.rag import RAG
  9 | from config import logger
 10 | 
 11 | 
 12 | class DeployAttack:
 13 |     """
 14 |     部署攻击
 15 |     """
 16 | 
 17 |     def __init__(self, db: DB, rag: RAG, llm=None, verbose: bool = False):
 18 |         self.llm = llm
 19 |         self.db = db
 20 |         self.rag = rag
 21 |         self.verbose = verbose
 22 | 
 23 |     def _getPenTestTools(self) -> []:
 24 |         tools = []
 25 |         tools.append(CurlTool())
 26 |         return tools
 27 | 
 28 |     def task_establishing_foothold_attack(self, agent: Agent, asset: str, plan: str) -> Task:
 29 |         logger.info("初始化任务 实施打点")
 30 |         return Task(
 31 |             agent=agent,
 32 |             description=dedent(
 33 |                 f"""                  
 34 |                 你的任务是根据攻击计划，针对目标灵活运用策略并合理使用各种工具，成功利用漏洞，最终取得敏感信息或得到控制权。
 35 |                 这一过程将涉及多种渗透工具和策略，以确保在目标网络或系统中安全且隐蔽地实施打点行动。            
 36 |                 在整个任务过程中，务必保持隐蔽性和安全性，避免任何可能引发目标系统警觉的行为。
 37 |                 
 38 |                 你应该根据你所使用的工具的特性，分析其输出，以此来确定你的攻击是否成功。
 39 |                 
 40 |                 注意，首先判断攻击计划是否合理，然后根据计划执行攻击。
 41 |                 
 42 |                 目标:
 43 |                 {asset}
 44 | 
 45 |                 攻击计划:
 46 |                 {plan}
 47 |                 """),
 48 |             expected_output=dedent(
 49 |                 """
 50 |                 对于敏感信息泄漏类型的任务，
 51 |                 最终答案应包括你发现的所有敏感信息的列表。
 52 |                 只输出已发现的敏感信息。
 53 |                 不要将原始输入作为输出。
 54 |                 确保信息分类明确，例如：密码、密钥、版本信息、路径信息、数据库配置、系统配置、用户名等。
 55 |                 
 56 |                 对于控制权获取类型的任务，
 57 |                 最终答案应包括你成功获取的控制权的列表。
 58 |                 """),
 59 |         )
 60 | 
 61 |     def agent_establishing_foothold_attacker(self, llm=None, tools: [BaseTool] = []) -> Agent:
 62 |         logger.info("初始化代理 打点实施专家")
 63 |         agent = Agent(
 64 |             role='打点实施专家',
 65 |             goal='通过高效利用已识别的安全漏洞，在目标网络或系统中建立并维持立足点，确保后续的渗透测试和长期的网络侦察得以成功进行。',
 66 |             backstory=dedent(
 67 |                 """
 68 |                 你是一名经验丰富的打点专家，隶属于攻击团队。
 69 |                 你的核心任务是通过巧妙的技术手段和深厚的安全知识，在目标系统中建立持久的访问权限。
 70 |                 你精通多种渗透工具和技术，能够在复杂的网络环境中识别并利用微妙的安全漏洞。
 71 |                 你的技能不仅限于技术渗透，还包括对攻击策略的深入理解，能够识别并利用复杂系统的微妙弱点。
 72 |                 你的主要职责是通过技术手段建立初始的访问权，并扩展这些访问权限以形成持久的立足点。
 73 |                 你的工作对于后续的渗透阶段至关重要，因为它们提供了进行深入分析和数据窃取所需的访问基础。
 74 |         
 75 |                 你的专业知识和战术洞察力使你成为攻击团队中不可或缺的一员，你对网络安全的热情和对挑战的渴望推动你不断寻找更有效的渗透方法。
 76 |                 """),
 77 |             tools=tools,
 78 |             verbose=self.verbose,
 79 |             allow_delegation=True,
 80 |             max_rpm=300,
 81 |             # max_iter=1,
 82 |             llm=llm,
 83 |             cache=False,
 84 |         )
 85 | 
 86 |         if llm is not None:
 87 |             agent.llm = llm
 88 |         return agent
 89 | 
 90 |     def establishingFootholdAttackCrew(self, asset: str, plan: str):
 91 |         agents = []
 92 |         tasks = []
 93 |         tools = self._getPenTestTools()
 94 | 
 95 |         agefa = self.agent_establishing_foothold_attacker(self.llm, tools=tools)
 96 |         agents.append(agefa)
 97 |         tasks.append(self.task_establishing_foothold_attack(agefa, asset, plan))
 98 | 
 99 |         logger.info("初始化智能体 打点实施")
100 |         return Crew(
101 |             agents=agents,
102 |             tasks=tasks,
103 |             verbose=self.verbose,
104 |             share_crew=False,
105 |             cache=False
106 |         )
107 | 


--------------------------------------------------------------------------------
/exploits/scanner/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/exploits/scanner/__init__.py


--------------------------------------------------------------------------------
/exploits/scanner/nuclei_scanner_tool.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | import tempfile
  4 | from datetime import datetime
  5 | from typing import Type, Any
  6 | 
  7 | import validators
  8 | from crewai_tools import BaseTool
  9 | from embedchain import App
 10 | from embedchain.models.data_type import DataType
 11 | from pydantic.v1 import BaseModel, Field
 12 | from sqlalchemy import exc
 13 | 
 14 | from helpers.nuclei import Nuclei
 15 | from helpers.utils import is_domain
 16 | from persistence.database import DB
 17 | from config import logger
 18 | from persistence.orm import Vul, DuplicateException
 19 | 
 20 | 
 21 | class NucleiScannerToolSchema(BaseModel):
 22 |     """NucleiScannerToolSchema 的查询参数"""
 23 |     target: str = Field(..., description="host地址或ip地址, 例如：`https://example.com` 或 `example.com` 或 `1.1.1.1`")
 24 |     template: str = Field(None, description="模板名称, 例如：`http` 或 `vulnerabilities`")
 25 | 
 26 | 
 27 | class NucleiScannerTool(BaseTool):
 28 |     name: str = "Nuclei"
 29 |     description: str = "一款自动化的安全漏洞检测工具，可以进行进行快速、定制化的漏洞扫描。支持域名、URL、IP地址等。"
 30 |     args_schema: Type[BaseModel] = NucleiScannerToolSchema
 31 |     db: DB | None = None
 32 |     embedder: App | None = None
 33 |     nuclei_path: str | None = None
 34 |     templates_path: str | None = None
 35 |     task_id: int | None = None
 36 | 
 37 |     class Config:
 38 |         arbitrary_types_allowed = True
 39 | 
 40 |     def _init_embedder(self, embedder_cfg):
 41 |         self.embedder = App.from_config(config=embedder_cfg)
 42 | 
 43 |         data_sources = self.embedder.get_data_sources()
 44 |         if len(data_sources) > 0:
 45 |             return
 46 |         nuclei = Nuclei(nucleiAbsPath=self.nuclei_path, templatesAbsPath=self.templates_path)
 47 |         tpls = nuclei.nucleiTemplates(False)
 48 |         outputPath = os.path.join(tempfile.gettempdir(), 'nuclei_tpl_tmp.csv')
 49 |         datalist = []
 50 |         for tpl in tpls:
 51 |             tpldata = {
 52 |                 'template': tpl.file,
 53 |                 'dir': tpl.dir,
 54 |                 'name': tpl.name.strip() if tpl.name is not None else None,
 55 |                 'tags': ','.join(tpl.tags) if tpl.tags is not None and len(tpl.tags) > 0 else '',
 56 |                 'description': tpl.description.strip() if tpl.description is not None else None,
 57 |             }
 58 | 
 59 |             datalist.append(tpldata)
 60 |         keys = datalist[0].keys()
 61 |         with open(outputPath, 'w', newline='', encoding='utf-8') as f:
 62 |             dict_writer = csv.DictWriter(f, fieldnames=keys)
 63 |             dict_writer.writeheader()
 64 |             dict_writer.writerows(datalist)
 65 | 
 66 |         self.embedder.add(source=outputPath, data_type=DataType.CSV)
 67 |         os.remove(outputPath)
 68 | 
 69 |     def __init__(self, db: DB, task_id: int, embedder_cfg: dict = None, nuclei_path: str = None,
 70 |                  templates_path: str = None):
 71 |         super().__init__()
 72 |         self.db = db
 73 |         self.task_id = task_id
 74 |         self.nuclei_path = nuclei_path
 75 |         self.templates_path = templates_path
 76 |         logger.info("初始化工具 Nuclei")
 77 |         # TODO 需要完善推理逻辑
 78 |         # self._init_embedder(embedder_cfg)
 79 | 
 80 |     def reasoningTemplates(self, target) -> [str]:
 81 |         # TODO 需要推理合适的模板
 82 |         # answer, sources = self.embedder.query(
 83 |         #     f'给我多个适合以下目标的模板名称:\n{target}', citations=True,
 84 |         #     kwargs={'n_results': 100})
 85 |         return ['http', 'network']
 86 | 
 87 |     def _run(
 88 |             self,
 89 |             **kwargs: Any,
 90 |     ) -> Any:
 91 |         target = kwargs.pop('target', "")
 92 |         if target == "":
 93 |             return "target为空"
 94 |         target_type = ""
 95 |         if validators.url(target):
 96 |             target_type = "url"
 97 |         elif validators.ipv4(target):
 98 |             target_type = "ipv4"
 99 |         elif validators.ipv6(target):
100 |             target_type = "ipv6"
101 |         elif is_domain(target):
102 |             target_type = "domain"
103 |         else:
104 |             return "target不合法"
105 | 
106 |         template = kwargs.pop('template', None)
107 |         try:
108 |             logger.info("Nuclei: {} {}", target, template)
109 |             now = datetime.now()
110 |             try:
111 |                 nuclei = Nuclei(nucleiAbsPath=self.nuclei_path, templatesAbsPath=self.templates_path)
112 |                 result = nuclei.scan(target, self.reasoningTemplates(target_type))
113 |                 if len(result) == 0:
114 |                     return "未找到任何结果"
115 | 
116 |                 with self.db.DBSession() as session:
117 |                     for data in result:
118 |                         vuldb = Vul()
119 |                         vuldb.task_id = self.task_id
120 |                         vuldb.target = target
121 |                         vuldb.host = data.host
122 |                         vuldb.type = data.type
123 |                         vuldb.vul_name = data.vulnerability_name
124 |                         vuldb.vul_detail = data.vulnerability_detail
125 |                         vuldb.vul_point = data.vulnerable_at
126 |                         vuldb.solution = data.solution
127 |                         vuldb.cve_id = data.cve_id
128 |                         vuldb.cwe_id = data.cwe_id
129 |                         vuldb.severity = data.severity
130 |                         vuldb.description = data.description
131 |                         vuldb.extra_info = data.dict(
132 |                             exclude_none=True,
133 |                             exclude={'host', 'type', 'vulnerability_name', 'vulnerability_detail',
134 |                                      'vulnerable_at', 'solution', 'cve_id', 'cwe_id', 'severity',
135 |                                      'description'})
136 |                         vuldb.source = self.name
137 |                         vuldb.created = now
138 |                         try:
139 |                             session.add(vuldb)
140 |                             session.commit()
141 |                         except DuplicateException as e:
142 |                             session.rollback()
143 |                         except Exception as e:
144 |                             raise
145 |                 return f"共发现{len(result)}个漏洞"
146 |             except exc.SQLAlchemyError as e:
147 |                 logger.error("数据库错误: {}", e)
148 |                 return "数据库错误"
149 |             except Exception as e:
150 |                 logger.error("其他错误: {}", e)
151 |                 return f"其他错误: {e}"
152 |         except Exception as e:
153 |             logger.error("获取失败: {}", e)
154 |             return f"获取失败: {e}"
155 | 


--------------------------------------------------------------------------------
/exploits/vul_scan_expert.py:
--------------------------------------------------------------------------------
  1 | from ipaddress import ip_address
  2 | from textwrap import dedent
  3 | 
  4 | import validators
  5 | from crewai import Agent, Task, Crew
  6 | from crewai_tools import BaseTool
  7 | from sqlalchemy import select, and_
  8 | 
  9 | from exploits.scanner.nuclei_scanner_tool import NucleiScannerTool
 10 | from exploits.web.gobuster_dir_tool import GobusterDirTool
 11 | from exploits.web.host_crawler_tool import HostCrawlerTool
 12 | from helpers.utils import is_domain
 13 | from persistence.database import DB
 14 | from config import logger
 15 | from persistence.orm import WebInfo, UrlEnum
 16 | 
 17 | 
 18 | class VulScanExpert:
 19 |     """
 20 |     漏洞分析专家
 21 |     根据已探明的网络资产，进行漏洞分析。
 22 |     """
 23 | 
 24 |     # TODO
 25 |     def __init__(self, db: DB, llm=None, nuclei_path: str = None, nuclei_templates_path: str = None,
 26 |                  gobuster_path: str = None, gobuster_wordlist_path: str = None,
 27 |                  verbose: bool = False):
 28 |         self.llm = llm
 29 |         self.db = db
 30 |         self.nuclei_path = nuclei_path
 31 |         self.nuclei_templates_path = nuclei_templates_path
 32 |         self.gobuster_path = gobuster_path
 33 |         self.gobuster_wordlist_path = gobuster_wordlist_path
 34 |         self.verbose = verbose
 35 | 
 36 |     def agent_cyber_assets_fingerprinting_expert(self, llm=None, tools: [BaseTool] = []) -> Agent:
 37 |         logger.info("初始化代理 网络资产指纹识别专家")
 38 |         agent = Agent(
 39 |             role='网络资产指纹识别专家',
 40 |             goal='对网络资产进行指纹识别，最终掌握目标的所有特征，为后续工作打下基础。',
 41 |             backstory=dedent(
 42 |                 """
 43 |                 你是一名经验丰富的网络资产指纹识别专家，隶属于攻击团队。
 44 |                 你的主要任务是利用指纹识别工具分析网络服务的特征，例如端口提供的服务、web程序的开发语言、web程序使用的中间件等等。
 45 |                 你的工作至关重要，你的工作成果将直接影响后续工作的开展。
 46 |                 """),
 47 |             tools=tools,
 48 |             verbose=self.verbose,
 49 |             allow_delegation=True,
 50 |             max_rpm=300,
 51 |             # max_iter=1,
 52 |             llm=llm,
 53 |             cache=False,
 54 |         )
 55 | 
 56 |         if llm is not None:
 57 |             agent.llm = llm
 58 |         return agent
 59 | 
 60 |     def task_cyber_assets_fingerprinting(self, agent: Agent, target: str) -> Task:
 61 |         logger.info("初始化任务 网络资产指纹识别")
 62 |         return Task(
 63 |             agent=agent,
 64 |             description=dedent(
 65 |                 f"""
 66 |                 使用多种指纹识别工具对目标进行指纹识别，为漏洞扫描和利用提供基础信息:
 67 |                 - 探明暴露的TCP/UDP端口的服务, 例如探明某个端口是SSH服务还是HTTP服务，以及服务的版本信息。
 68 |                 - 探明Web应用程序的类型和版本信息，例如探明某个Web应用程序是WordPress还是Joomla，以及Web应用程序的版本信息。
 69 |                 
 70 |                 目标: `{target}`
 71 |                 """),
 72 |             expected_output=dedent(
 73 |                 """
 74 |                 最终答案是本次探测结果数量，具体的结果已存储在数据库中。不要编造其他额外内容。
 75 |                 """),
 76 |         )
 77 | 
 78 |     def _getFingerPrintingTools(self, task_id: int, target) -> []:
 79 |         tools = []
 80 |         if validators.url(target) or is_domain(target):
 81 |             tools.append(HostCrawlerTool(self.db, task_id))
 82 |         return tools
 83 | 
 84 |     def _fingerPrintingCrew(self, task_id: int, target: str):
 85 | 
 86 |         agents = []
 87 |         tasks = []
 88 |         tools = self._getFingerPrintingTools(task_id, target)
 89 | 
 90 |         if len(tools) > 0:
 91 |             ag = self.agent_cyber_assets_fingerprinting_expert(self.llm, tools)
 92 |             agents.append(ag)
 93 | 
 94 |             tasks.append(self.task_cyber_assets_fingerprinting(ag, target))
 95 | 
 96 |         if len(agents) == 0 or len(tasks) == 0:
 97 |             raise Exception("无可用工具")
 98 | 
 99 |         logger.info("初始化智能体 指纹识别")
100 |         return Crew(
101 |             agents=agents,
102 |             tasks=tasks,
103 |             verbose=self.verbose,
104 |             share_crew=False
105 |         )
106 | 
107 |     def fingerprintingCrew(self, task_id: int, target: str):
108 |         if validators.url(target):
109 |             # url
110 |             logger.info("url目标 {}", target)
111 |         elif is_domain(target):
112 |             # domain
113 |             logger.info("domain目标 {}", target)
114 |         else:
115 |             try:
116 |                 ip_address(target)
117 |                 logger.info("IP目标 {}", target)
118 |             except ValueError:
119 |                 raise ValueError("目标类型不支持")
120 |         return self._fingerPrintingCrew(task_id, target)
121 | 
122 |     def do_directory_bruteforcing(self, task_id: int):
123 | 
124 |         datas = []
125 |         with self.db.DBSession() as session:
126 |             subquery = select(UrlEnum.web_info_id).where(UrlEnum.task_id == task_id).subquery()
127 |             infos = session.query(WebInfo).filter(
128 |                 and_(
129 |                     WebInfo.task_id == task_id),
130 |                 WebInfo.id.notin_(subquery)
131 |             ).all()
132 |             for info in infos:
133 |                 datas.append((info.id, info.url))
134 | 
135 |         for id, url in datas:
136 |             gobuster = GobusterDirTool(self.db, task_id, id, self.gobuster_path, self.gobuster_wordlist_path)
137 |             gobuster.run(url=url)
138 | 
139 |     def _getVulScanTools(self, task_id: int) -> []:
140 |         tools = []
141 |         if self.nuclei_path and self.nuclei_templates_path:
142 |             tools.append(
143 |                 NucleiScannerTool(self.db, task_id, nuclei_path=self.nuclei_path,
144 |                                   templates_path=self.nuclei_templates_path))
145 |         return tools
146 | 
147 |     def agent_vulnerability_scanning_expert(self, llm=None, tools: [BaseTool] = []) -> Agent:
148 |         logger.info("初始化代理 漏扫专家")
149 |         agent = Agent(
150 |             role='漏洞扫描专家',
151 |             goal='对网络资产进行漏洞扫描，以发现潜在的漏洞和弱点',
152 |             backstory=dedent(
153 |                 """
154 |                 你是一名经验丰富的漏洞扫描专家，隶属于攻击团队。
155 |                 你的主要任务是利用各种工具对目标进行漏洞扫描，以发现漏洞利用点、关键信息的泄漏、以及其他对渗透有用的各种信息。
156 |                 你的工作至关重要，你的工作成果将直接影响后续工作的开展。
157 |                 """),
158 |             tools=tools,
159 |             verbose=self.verbose,
160 |             allow_delegation=True,
161 |             max_rpm=300,
162 |             # max_iter=1,
163 |             llm=llm,
164 |             cache=False,
165 |         )
166 | 
167 |         if llm is not None:
168 |             agent.llm = llm
169 |         return agent
170 | 
171 |     def task_vulnerability_scanning_expert(self, agent: Agent, target: str) -> Task:
172 |         logger.info("初始化任务 漏洞扫描")
173 |         return Task(
174 |             agent=agent,
175 |             description=dedent(
176 |                 f"""
177 |                 对目标进行全面的漏洞扫描，发现尽可能多的安全漏洞。
178 | 
179 |                 目标: `{target}`
180 |                 """),
181 |             expected_output=dedent(
182 |                 """
183 |                 最终答案是本次探测结果数量，具体的结果已存储在数据库中。不要编造其他额外内容。
184 |                 """),
185 |         )
186 | 
187 |     def vulScanCrew(self, task_id: int, target: str):
188 |         if validators.url(target):
189 |             # url
190 |             logger.info("url目标 {}", target)
191 |         elif is_domain(target):
192 |             # domain
193 |             logger.info("domain目标 {}", target)
194 |         else:
195 |             try:
196 |                 ip_address(target)
197 |                 logger.info("IP目标 {}", target)
198 |             except ValueError:
199 |                 raise ValueError("目标类型不支持")
200 | 
201 |         agents = []
202 |         tasks = []
203 |         tools = self._getVulScanTools(task_id)
204 | 
205 |         if len(tools) > 0:
206 |             ag = self.agent_vulnerability_scanning_expert(self.llm, tools)
207 |             agents.append(ag)
208 | 
209 |             tasks.append(self.task_vulnerability_scanning_expert(ag, target))
210 | 
211 |         if len(agents) == 0 or len(tasks) == 0:
212 |             raise Exception("无可用工具")
213 | 
214 |         logger.info("初始化智能体 漏洞扫描")
215 |         return Crew(
216 |             agents=agents,
217 |             tasks=tasks,
218 |             verbose=self.verbose,
219 |             share_crew=False
220 |         )
221 | 


--------------------------------------------------------------------------------
/exploits/web/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/exploits/web/__init__.py


--------------------------------------------------------------------------------
/exploits/web/curl_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Type, Any
 2 | 
 3 | import httpx
 4 | import validators
 5 | from crewai_tools import BaseTool
 6 | from pydantic.v1 import BaseModel, Field
 7 | 
 8 | from config import logger
 9 | 
10 | 
11 | class CurlToolSchema(BaseModel):
12 |     """CurlToolSchema 的查询参数"""
13 |     url: str = Field(..., description="url地址")
14 |     method: str = Field(..., description="请求方法，如 'GET' 或 'POST'")
15 |     headers: dict[str, str] = Field(description="字典形式的 HTTP 头部")
16 |     data: dict[str, str] = Field(description="用于 POST 请求的表单数据")
17 |     json_data: Any = Field(description="用于 POST 请求的 JSON 数据", alias='json')
18 |     params: str = Field(description="用于 GET 请求的查询参数")
19 | 
20 | 
21 | class CurlTool(BaseTool):
22 |     name: str = "Curl"
23 |     description: str = "送HTTP请求并返回原始HTTP响应格式的字符串"
24 |     args_schema: Type[BaseModel] = CurlToolSchema
25 | 
26 |     def __init__(self):
27 |         super().__init__()
28 |         logger.info("初始化工具 Curl")
29 | 
30 |     def _run(
31 |             self,
32 |             **kwargs: Any,
33 |     ) -> Any:
34 |         url = kwargs.pop('url', "")
35 |         if url == "":
36 |             return "url为空"
37 | 
38 |         if validators.url(url) is False:
39 |             return "url地址不合法"
40 | 
41 |         method = kwargs.pop('method', "GET")
42 |         headers = kwargs.pop('headers', None)
43 |         data = kwargs.pop('data', None)
44 |         json = kwargs.pop('json', None)
45 |         params = kwargs.pop('params', None)
46 |         try:
47 |             logger.info("Curl: {}", url)
48 |             with httpx.Client(verify=False) as client:
49 |                 response = client.request(
50 |                     method=method,
51 |                     url=url,
52 |                     headers=headers,
53 |                     data=data,
54 |                     json=json,
55 |                     params=params
56 |                 )
57 |                 status_line = f"{response.http_version} {response.status_code} {response.reason_phrase}\r\n"
58 | 
59 |                 # 头部
60 |                 headers = ""
61 |                 for name, value in response.headers.items():
62 |                     headers += f"{name}: {value}\r\n"
63 | 
64 |                 # 确保响应体以新行结束
65 |                 body = response.text
66 |                 if not body.endswith('\r\n'):
67 |                     body += '\r\n'
68 | 
69 |                 # 组装完整的响应字符串
70 |                 return f"{status_line}{headers}\r\n{body}"
71 | 
72 |         except Exception as e:
73 |             logger.error("获取失败: {}", e)
74 |             return f"请求失败: {e}"
75 | 


--------------------------------------------------------------------------------
/exploits/web/gobuster_dir_tool.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | from typing import Type, Any
  3 | from urllib.parse import urljoin
  4 | 
  5 | from crewai_tools import BaseTool
  6 | from pydantic.v1 import BaseModel, Field
  7 | from sqlalchemy import exc
  8 | from datetime import datetime
  9 | 
 10 | from helpers import fingers
 11 | from helpers.crawler import crawl_host
 12 | from helpers.gobuster import Gobuster
 13 | from config import logger
 14 | from persistence.database import DB
 15 | from persistence.orm import DuplicateException, UrlEnum
 16 | 
 17 | 
 18 | class GobusterDirToolSchema(BaseModel):
 19 |     """GobusterDirToolSchema 的查询参数"""
 20 |     url: str = Field(..., description="host或url地址, 一个host或者完整的url地址")
 21 | 
 22 | 
 23 | class GobusterDirTool(BaseTool):
 24 |     name: str = "Gobuster"
 25 |     description: str = "url路径枚举工具，用于发现隐藏或未公开的url路径"
 26 |     args_schema: Type[BaseModel] = GobusterDirToolSchema
 27 |     db: DB | None = None
 28 |     task_id: int | None = None
 29 |     webinfo_id: int | None = None
 30 |     gobuster_path: str | None = None
 31 |     wordlist_path: str | None = None
 32 | 
 33 |     class Config:
 34 |         arbitrary_types_allowed = True
 35 | 
 36 |     def __init__(self, db: DB, task_id: int, webinfo_id: int, gobuster_path: str = None, wordlist_path: str = None):
 37 |         super().__init__()
 38 |         self.db = db
 39 |         self.task_id = task_id
 40 |         self.webinfo_id = webinfo_id
 41 |         self.gobuster_path = gobuster_path
 42 |         self.wordlist_path = wordlist_path
 43 |         logger.info("初始化工具 Gobuster")
 44 | 
 45 |     def _run(
 46 |             self,
 47 |             **kwargs: Any,
 48 |     ) -> Any:
 49 |         url = kwargs.pop('url', "")
 50 |         if url == "":
 51 |             return "url为空"
 52 | 
 53 |         try:
 54 |             logger.info("Gobuster: {}", url)
 55 |             gobuster = Gobuster(wordlist=self.wordlist_path, gobusterAbsPath=self.gobuster_path)
 56 |             result = gobuster.dir(url)
 57 |             # 添加一个随机path
 58 |             result.append(str(uuid.uuid4()))
 59 |             if len(result) == 0:
 60 |                 return "未找到任何结果"
 61 | 
 62 |             now = datetime.now()
 63 |             with self.db.DBSession() as session:
 64 |                 for r in result:
 65 |                     htmls = crawl_host(urljoin(url, r))
 66 |                     if len(htmls) == 0:
 67 |                         continue
 68 |                     for html in htmls:
 69 |                         matched = fingers.Match(html.headers, html.body, [i.mmh3hash for i in html.favicons],
 70 |                                                 [i.md5hash for i in html.favicons])
 71 |                         urlenumdb = UrlEnum()
 72 |                         urlenumdb.task_id = self.task_id
 73 |                         urlenumdb.web_info_id = self.webinfo_id
 74 |                         urlenumdb.host = html.host
 75 |                         urlenumdb.schema = html.schema
 76 |                         urlenumdb.url = html.url
 77 |                         urlenumdb.path = r
 78 |                         urlenumdb.current_redirects = html.current_redirects
 79 |                         urlenumdb.redirect_to = html.redirect_to
 80 |                         urlenumdb.title = html.title
 81 |                         urlenumdb.status = html.status
 82 |                         urlenumdb.headers = html.headers
 83 |                         urlenumdb.favicons = [favicon.to_dict() for favicon in html.favicons]
 84 |                         urlenumdb.body = html.body
 85 |                         urlenumdb.created = now
 86 |                         urlenumdb.finger_prints = [match.to_dict() for match in matched]
 87 |                         urlenumdb.source = self.name
 88 |                         try:
 89 |                             session.add(urlenumdb)
 90 |                             session.commit()
 91 |                         except DuplicateException as e:
 92 |                             session.rollback()
 93 |                         except Exception as e:
 94 |                             raise
 95 | 
 96 |             return f"共发现{len(result)}个目录"
 97 |         except exc.SQLAlchemyError as e:
 98 |             logger.error("数据库错误: {}", e)
 99 |             return "数据库错误"
100 |         except Exception as e:
101 |             logger.error("获取失败: {}", e)
102 |             return f"获取失败: {e}"
103 | 


--------------------------------------------------------------------------------
/exploits/web/host_crawler_tool.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from typing import Type, Any
  3 | from urllib.parse import urlparse, urlunparse
  4 | 
  5 | import validators
  6 | from crewai_tools import BaseTool
  7 | from pydantic.v1 import BaseModel, Field
  8 | from sqlalchemy import exc
  9 | 
 10 | from helpers import fingers
 11 | from helpers.crawler import crawl_host
 12 | from helpers.utils import is_domain
 13 | from persistence.database import DB
 14 | from config import logger
 15 | from persistence.orm import WebInfo, Cdn, DuplicateException
 16 | 
 17 | 
 18 | class HostCrawlerToolSchema(BaseModel):
 19 |     """HostCrawlerToolSchema 的查询参数"""
 20 |     host: str = Field(...,
 21 |                       description="host或url地址, 一个host或者完整的url地址")
 22 | 
 23 | 
 24 | class HostCrawlerTool(BaseTool):
 25 |     name: str = "HostCrawler"
 26 |     description: str = "根据host或url，获取网页的图标、标题、响应头、HTML正文等信息，同时根据指纹判断其应用信息。"
 27 |     args_schema: Type[BaseModel] = HostCrawlerToolSchema
 28 |     db: DB | None = None
 29 |     task_id: int | None = None
 30 | 
 31 |     class Config:
 32 |         arbitrary_types_allowed = True
 33 | 
 34 |     def __init__(self, db: DB, task_id: int):
 35 |         super().__init__()
 36 |         self.db = db
 37 |         self.task_id = task_id
 38 |         logger.info("初始化工具 HostCrawler")
 39 | 
 40 |     def _run(
 41 |             self,
 42 |             **kwargs: Any,
 43 |     ) -> Any:
 44 |         host = kwargs.pop('host', "")
 45 |         if host == "":
 46 |             return "host为空"
 47 | 
 48 |         if is_domain(host) is True:
 49 |             url_parsed = urlparse("http://" + host)
 50 |             host = urlunparse((url_parsed.scheme, url_parsed.netloc, url_parsed.path, '', '', ''))
 51 | 
 52 |         if validators.url(host) is False:
 53 |             return "host地址不合法"
 54 |         try:
 55 |             logger.info("HostCrawler: {}", host)
 56 |             now = datetime.now()
 57 |             htmls = crawl_host(host)
 58 |             if len(htmls) == 0:
 59 |                 return "获取失败"
 60 |             try:
 61 |                 with self.db.DBSession() as session:
 62 |                     for html in htmls:
 63 |                         matched = fingers.Match(html.headers, html.body, [i.mmh3hash for i in html.favicons],
 64 |                                                 [i.md5hash for i in html.favicons])
 65 |                         webinfodb = WebInfo()
 66 |                         webinfodb.target = host
 67 |                         webinfodb.task_id = self.task_id
 68 |                         webinfodb.host = html.host
 69 |                         webinfodb.schema = html.schema
 70 |                         webinfodb.url = html.url
 71 |                         webinfodb.current_redirects = html.current_redirects
 72 |                         webinfodb.redirect_to = html.redirect_to
 73 |                         webinfodb.ip = html.ip
 74 |                         ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(html.ip)).first()
 75 |                         if ipcdn is not None:
 76 |                             webinfodb.ip_cdn = ipcdn.organization
 77 |                         webinfodb.port = html.port
 78 |                         webinfodb.title = html.title
 79 |                         webinfodb.status = html.status
 80 |                         webinfodb.headers = html.headers
 81 |                         webinfodb.favicons = [favicon.to_dict() for favicon in html.favicons]
 82 |                         webinfodb.body = html.body
 83 |                         webinfodb.certs = html.certs
 84 |                         webinfodb.created = now
 85 |                         webinfodb.finger_prints = [match.to_dict() for match in matched]
 86 |                         webinfodb.source = self.name
 87 | 
 88 |                         try:
 89 |                             session.add(webinfodb)
 90 |                             session.commit()
 91 |                         except DuplicateException as e:
 92 |                             session.rollback()
 93 |                         except Exception as e:
 94 |                             raise
 95 |             except exc.SQLAlchemyError as e:
 96 |                 logger.error("数据库错误: {}", e)
 97 |                 return "数据库错误"
 98 |             except Exception as e:
 99 |                 logger.error("其他错误: {}", e)
100 |                 return f"其他错误: {e}"
101 |             return f"共发现{len(htmls)}个特征"
102 |         except Exception as e:
103 |             logger.error("获取失败: {}", e)
104 |             return f"获取失败: {e}"
105 | 


--------------------------------------------------------------------------------
/exploits/web/html_parse_info_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Type, Any
 2 | 
 3 | from crewai_tools import BaseTool
 4 | from pydantic.v1 import BaseModel, Field
 5 | from config import logger
 6 | from helpers.html_information_leak_analyze import analyze
 7 | 
 8 | 
 9 | class HtmlInformationLeakAnalyzeToolSchema(BaseModel):
10 |     """HtmlInformationLeakAnalyzeToolSchema 的查询参数"""
11 |     html: str = Field(..., description="html源代码")
12 | 
13 | 
14 | class HtmlInformationLeakAnalyzeTool(BaseTool):
15 |     name: str = "HtmlInformationLeakAnalyze"
16 |     description: str = "从html源代码中得到潜在的敏感信息"
17 |     args_schema: Type[BaseModel] = HtmlInformationLeakAnalyzeToolSchema
18 | 
19 |     def __init__(self, ):
20 |         super().__init__()
21 |         logger.info("初始化工具 HtmlInformationLeakAnalyze")
22 | 
23 |     def _run(
24 |             self,
25 |             **kwargs: Any,
26 |     ) -> Any:
27 |         html = kwargs.pop('html', "")
28 |         if html == "":
29 |             return "html为空"
30 |         datas = analyze(html)
31 | 
32 |         if datas == "":
33 |             return "未找到任何信息"
34 | 
35 |         return datas
36 | 


--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/helpers/__init__.py


--------------------------------------------------------------------------------
/helpers/alienvault_api.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List
  2 | 
  3 | import requests
  4 | from pydantic.v1 import BaseModel, Field
  5 | from tld import get_tld
  6 | 
  7 | from config import logger
  8 | 
  9 | 
 10 | class SubDomain(BaseModel):
 11 |     apex_domain: Optional[str] = Field(description="顶级域名")
 12 |     sub_domain: Optional[str] = Field(description="子域名")
 13 |     hostname: Optional[str] = Field(description="域名")
 14 | 
 15 |     a: List[str] = Field(description="A记录")
 16 |     cname: List[str] = Field(description="CNAME记录")
 17 |     aaaa: List[str] = Field(description="AAAA记录")
 18 |     mx: List[str] = Field(description="MX记录")
 19 |     ns: List[str] = Field(description="NS记录")
 20 |     soa: List[str] = Field(description="SOA记录")
 21 |     txt: List[str] = Field(description="TXT记录")
 22 | 
 23 | 
 24 | class AlienVaultResult(BaseModel):
 25 |     apex_domain: Optional[str] = Field(description="顶级域名")
 26 |     sub_domains: dict[str, SubDomain] = Field(description="子域名")
 27 | 
 28 | 
 29 | class AlienVaultApi:
 30 |     def __init__(self, base_url: str = "https://otx.alienvault.com/api/v1/indicators", proxies: dict = None,
 31 |                  timeout: int = 15):
 32 |         self.base_url = base_url
 33 |         self.timeout = timeout
 34 |         self._session = requests.Session()
 35 |         if proxies:
 36 |             self._session.proxies.update(proxies)
 37 |             self._session.trust_env = False
 38 | 
 39 |     def _search(self, stype, value) -> List[AlienVaultResult]:
 40 |         url = f"{self.base_url}/{stype}/{value}/passive_dns"
 41 |         logger.debug("AlienVault: {upath}", upath=url)
 42 |         headers = {
 43 |             "Content-Type": "application/json"
 44 |         }
 45 |         response = self._session.request("GET", url, headers=headers, timeout=self.timeout)
 46 |         response.raise_for_status()
 47 |         res = response.json()
 48 | 
 49 |         domains = {}
 50 |         if "passive_dns" in res:
 51 |             for item in res["passive_dns"]:
 52 | 
 53 |                 domain_obj = get_tld(item["hostname"], fail_silently=True, as_object=True, fix_protocol=True)
 54 |                 apex_domain = domain_obj.fld
 55 |                 sub_domain = domain_obj.subdomain
 56 |                 val = item["address"]
 57 |                 if item["record_type"] == "CNAME":
 58 |                     val = item["address"]
 59 |                 elif item["record_type"] == "SOA" or item["record_type"] == "NS":
 60 |                     val = item["address"]
 61 | 
 62 |                 if apex_domain in domains:
 63 |                     domain = domains[apex_domain]
 64 |                 else:
 65 |                     domain = AlienVaultResult(
 66 |                         apex_domain=apex_domain,
 67 |                         sub_domains={}
 68 |                     )
 69 |                     domains[apex_domain] = domain
 70 | 
 71 |                 if sub_domain in domain.sub_domains:
 72 |                     record = domain.sub_domains[sub_domain]
 73 |                 else:
 74 |                     record = SubDomain(
 75 |                         apex_domain=apex_domain,
 76 |                         sub_domain=sub_domain,
 77 |                         hostname=item["hostname"],
 78 |                         a=[],
 79 |                         cname=[],
 80 |                         aaaa=[],
 81 |                         mx=[],
 82 |                         ns=[],
 83 |                         soa=[],
 84 |                         txt=[]
 85 |                     )
 86 |                     domain.sub_domains[sub_domain] = record
 87 | 
 88 |                 if item["record_type"] == "A":
 89 |                     record.a.append(val)
 90 |                 elif item["record_type"] == "AAAA":
 91 |                     record.aaaa.append(val)
 92 |                 elif item["record_type"] == "SOA":
 93 |                     record.soa.append(val)
 94 |                 elif item["record_type"] == "NS":
 95 |                     record.ns.append(val)
 96 |                 elif item["record_type"] == "TXT":
 97 |                     record.txt.append(val)
 98 |                 elif item["record_type"] == "SOA":
 99 |                     record.soa.append(val)
100 |                 elif item["record_type"] == "MX":
101 |                     record.mx.append(val)
102 |                 elif item["record_type"] == "CNAME":
103 |                     record.cname.append(val)
104 | 
105 |         return list(domains.values())
106 | 
107 |     def search_domain(self, domain: str) -> List[AlienVaultResult]:
108 |         return self._search("domain", domain)
109 | 
110 |     def search_ipv4(self, ip: str) -> List[AlienVaultResult]:
111 |         return self._search("IPv4", ip)
112 | 
113 |     def search_ipv6(self, ip: str) -> List[AlienVaultResult]:
114 |         return self._search("IPv6", ip)
115 | 


--------------------------------------------------------------------------------
/helpers/crawler.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import hashlib
  3 | from urllib.parse import urlparse, urlunparse, urljoin
  4 | 
  5 | import httpx
  6 | import mmh3
  7 | from bs4 import BeautifulSoup
  8 | from config import logger
  9 | 
 10 | AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36'
 11 | 
 12 | 
 13 | class Favicon:
 14 |     def __init__(self, b64data: str, md5hash: str, mmh3hash: int):
 15 |         self.b64data = b64data
 16 |         self.md5hash = md5hash
 17 |         self.mmh3hash = mmh3hash
 18 | 
 19 |     def __eq__(self, other):
 20 |         return self.md5hash == other.md5hash
 21 | 
 22 |     def __hash__(self):
 23 |         return hash(self.md5hash)
 24 | 
 25 |     def to_dict(self):
 26 |         return {
 27 |             'b64data': self.b64data,
 28 |             'md5hash': self.md5hash,
 29 |             'mmh3hash': self.mmh3hash
 30 |         }
 31 | 
 32 |     def __repr__(self) -> str:
 33 |         return f"Favicon(md5hash={self.md5hash!r}, mmh3hash={self.mmh3hash!r})"
 34 | 
 35 | 
 36 | class HttpHtml:
 37 |     def __init__(self,
 38 |                  host: str,
 39 |                  url: str,
 40 |                  schema: str,
 41 |                  title: str,
 42 |                  headers: dict,
 43 |                  status: int,
 44 |                  body: str,
 45 |                  current_redirects: int = 0,
 46 |                  redirect_to: str = None,
 47 |                  favicons: [Favicon] = None,
 48 |                  ip=None,
 49 |                  port=None,
 50 |                  certs=None):
 51 |         self.host = host
 52 |         self.url = url
 53 |         self.schema = schema
 54 |         self.current_redirects = current_redirects
 55 |         self.redirect_to = redirect_to
 56 |         self.favicons = favicons
 57 |         self.title = title
 58 |         self.headers = headers
 59 |         self.status = status
 60 |         self.body = body
 61 |         self.ip = ip
 62 |         self.port = port
 63 |         self.certs = certs
 64 | 
 65 |     def __repr__(self) -> str:
 66 |         return f"HttpHtml(host={self.host!r}, favicons={self.favicons!r}, headers={self.headers!r}, title={self.title!r})"
 67 | 
 68 | 
 69 | def _get_favicon_hash(url: str) -> Favicon | None:
 70 |     try:
 71 |         with httpx.Client(headers={'User-Agent': AGENT}, timeout=5, verify=False) as client:
 72 |             res = client.get(url)
 73 |             if res.status_code != 200:
 74 |                 return None
 75 |             if len(res.content) == 0:
 76 |                 return None
 77 |             b64data = base64.encodebytes(res.content)
 78 |             md5data = hashlib.md5(res.content).hexdigest()
 79 |             mmh3hash = mmh3.hash(b64data)
 80 | 
 81 |             favicon = Favicon(b64data.decode("utf-8"), md5data, mmh3hash)
 82 | 
 83 |             return favicon
 84 |     except httpx.RequestError as e:
 85 |         return None
 86 | 
 87 | 
 88 | LINK_RELS = [
 89 |     'icon',
 90 |     'shortcut icon',
 91 |     'apple-touch-icon',
 92 |     'apple-touch-icon-precomposed',
 93 | ]
 94 | 
 95 | META_NAMES = ['msapplication-TileImage', 'og:image']
 96 | 
 97 | 
 98 | def _get_favicons_urls(host: str, body: str) -> [str]:
 99 |     soup = BeautifulSoup(body, features='html.parser')
100 |     link_tags = set()
101 |     for rel in LINK_RELS:
102 |         for link_tag in soup.find_all(
103 |                 'link', attrs={'rel': lambda r: r and r.lower() == rel, 'href': True}
104 |         ):
105 |             link_tags.add(link_tag)
106 | 
107 |     meta_tags = set()
108 |     for meta_tag in soup.find_all('meta', attrs={'content': True}):
109 |         meta_type = meta_tag.get('name') or meta_tag.get('property') or ''
110 |         meta_type = meta_type.lower()
111 |         for name in META_NAMES:
112 |             if meta_type == name.lower():
113 |                 meta_tags.add(meta_tag)
114 | 
115 |     urls = set()
116 |     hu = urlparse(host)
117 |     urls.add(urlunparse((hu.scheme, hu.netloc, 'favicon.ico', '', '', '')))
118 | 
119 |     for tag in link_tags | meta_tags:
120 |         href = tag.get('href', '') or tag.get('content', '')
121 |         href = href.strip()
122 | 
123 |         if not href or href.startswith('data:image/'):
124 |             continue
125 | 
126 |         if bool(urlparse(href).netloc):
127 |             url_parsed = href
128 |         else:
129 |             url_parsed = urljoin(host, href)
130 | 
131 |         # repair '//cdn.network.com/favicon.png' or `icon.png?v2`
132 |         scheme = urlparse(host).scheme
133 |         url_parsed = urlparse(url_parsed, scheme=scheme)
134 |         urls.add(urlunparse((url_parsed.scheme, url_parsed.netloc, url_parsed.path, '', '', '')))
135 | 
136 |     return list(urls)
137 | 
138 | 
139 | def _get_favicons_from_urls(urls: [str]) -> [Favicon]:
140 |     favicons = set()
141 |     if urls is None or len(urls) == 0:
142 |         return favicons
143 |     for url in urls:
144 |         logger.debug("crawl_host: favicon {}", url)
145 |         favicon = _get_favicon_hash(url)
146 |         if favicon is not None:
147 |             favicons.add(favicon)
148 |     return list(favicons)
149 | 
150 | 
151 | def crawl_host(host: str) -> [HttpHtml]:
152 |     """
153 |     从host获取html
154 |     :param host: 要抓取的主机地址
155 |     :return: 包含HttpHtml对象的列表，每个对象都是从一次HTTP请求中获得的HTML和相关信息
156 |     """
157 |     logger.debug("crawl_host: {}", host)
158 |     MAX_REDIRECTS = 3  # 定义最大重定向次数
159 | 
160 |     def fetch_url(url, current_redirects=0) -> [HttpHtml]:
161 |         """
162 |         辅助函数，用于处理单个URL的获取和重定向
163 |         :param url: 请求的URL
164 |         :param current_redirects: 当前重定向次数
165 |         :return: 收集的Html对象列表
166 |         """
167 |         if current_redirects > MAX_REDIRECTS:
168 |             logger.debug("crawl_host: Reached max redirects: {}", MAX_REDIRECTS)
169 |             return []
170 | 
171 |         with httpx.Client(headers={'User-Agent': AGENT}, timeout=5, verify=False) as client:
172 |             res = client.get(url)
173 |             socket = res.stream._stream._httpcore_stream._stream._connection._network_stream._sock
174 |             try:
175 |                 server_ip, server_port = socket.getpeername()
176 |             except OSError:
177 |                 server_ip, server_port = None, None
178 | 
179 |             certs = None
180 |             if hasattr(socket, '_sslobj') and socket._sslobj:
181 |                 certs = [c.get_info() for c in socket._sslobj.get_unverified_chain()]
182 | 
183 |             title = ''
184 |             if res.content:
185 |                 soup = BeautifulSoup(res.content, features='html.parser')
186 |                 if soup.title:
187 |                     title = str(soup.title.string)
188 |             htmlobj = HttpHtml(
189 |                 host=res.url.host,
190 |                 url=str(res.url),
191 |                 schema=res.url.scheme,
192 |                 title=title,
193 |                 headers=dict(res.headers),
194 |                 status=res.status_code,
195 |                 body=res.text,
196 |                 ip=server_ip,
197 |                 port=server_port,
198 |                 certs=certs
199 |             )
200 | 
201 |         favicons_url = _get_favicons_urls(htmlobj.url, htmlobj.body)
202 |         htmlobj.favicons = _get_favicons_from_urls(favicons_url)
203 |         htmlobj.current_redirects = current_redirects
204 |         htmls = [htmlobj]
205 | 
206 |         # 处理重定向
207 |         if res.is_redirect:
208 |             new_url = httpx.URL(res.headers['Location'])
209 |             if new_url.is_relative_url:
210 |                 new_url = res.url.join(new_url)
211 |             htmlobj.redirect_to = str(new_url)
212 |             logger.debug("crawl_host: Redirecting to: {}", htmlobj.redirect_to)
213 |             htmls += fetch_url(htmlobj.redirect_to, current_redirects + 1)
214 | 
215 |         return htmls
216 | 
217 |     return fetch_url(host)
218 | 


--------------------------------------------------------------------------------
/helpers/fingers/__init__.py:
--------------------------------------------------------------------------------
 1 | from helpers.fingers.web_fingers import WebFingers, MatchItem
 2 | 
 3 | _webFingersObj: WebFingers = WebFingers()
 4 | 
 5 | 
 6 | def Match(headers: dict = None, body: str = None, favicon_int: int | str | list[int] | list[str] = None,
 7 |           favicon_md5: int | str | list[int] | list[str] = None) -> \
 8 |         [MatchItem]:
 9 |     return _webFingersObj.match(headers, body, favicon_int, favicon_md5)
10 | 


--------------------------------------------------------------------------------
/helpers/fingers/assets/custom.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "fingerprint": [
 3 |     {
 4 |       "cms": "thinkadmin报错页面",
 5 |       "method": "keyword",
 6 |       "location": "body",
 7 |       "keyword": [
 8 |         "ADMIN_MODULE",
 9 |         "十年磨一剑-为API开发设计的高性能框架",
10 |         "http://www.thinkphp.cn",
11 |         "Environment Variables",
12 |         "ThinkPHP"
13 |       ]
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/helpers/fingers/web_fingers.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from enum import Enum
  4 | from os import path
  5 | 
  6 | 
  7 | class MatchType(Enum):
  8 |     HEADER = "header"
  9 |     BODY = "body"
 10 |     FAVICON = "favicon"
 11 | 
 12 | 
 13 | class MatchItem:
 14 |     def __init__(self, name: str, mtype: MatchType, matches: dict[str, str]):
 15 |         self.name = name
 16 |         self.match_type = mtype
 17 |         self.matches = matches
 18 | 
 19 |     def to_dict(self):
 20 |         return {
 21 |             'name': self.name,
 22 |             'type': self.match_type.value,
 23 |             'matches': self.matches
 24 |         }
 25 | 
 26 |     def __repr__(self):
 27 |         return f"MatchItem(name={self.name!r}, type={self.match_type!r}, matches={self.matches!r})"
 28 | 
 29 | 
 30 | class FingerPrint:
 31 |     def __init__(self,
 32 |                  name: str,
 33 |                  headers=None,
 34 |                  body=None,
 35 |                  favicons=None
 36 |                  ):
 37 |         self.name = name
 38 |         self.headers = headers
 39 |         self.body = body
 40 |         self.favicons = favicons
 41 | 
 42 |     def _match_favicon(self, favicon: str = None) -> MatchItem | None:
 43 |         if favicon is None:
 44 |             return None
 45 |         if self.favicons is None:
 46 |             return None
 47 |         for sv in self.favicons:
 48 |             if sv == favicon:
 49 |                 return MatchItem(self.name, MatchType.FAVICON, {favicon: sv})
 50 |         return None
 51 | 
 52 |     def _match_body(self, body: str = None) -> MatchItem | None:
 53 |         if body is None:
 54 |             return None
 55 |         if self.body is None:
 56 |             return None
 57 | 
 58 |         matches = {}
 59 |         for sv in self.body:
 60 |             if sv in body:
 61 |                 matches[sv] = "<body>"
 62 | 
 63 |         if len(matches) == len(self.body):
 64 |             return MatchItem(self.name, MatchType.BODY, matches)
 65 |         return None
 66 | 
 67 |     def _match_headers(self, headers: dict = None) -> MatchItem | None:
 68 |         if headers is None:
 69 |             return None
 70 | 
 71 |         if self.headers is None:
 72 |             return None
 73 | 
 74 |         if type(self.headers) is dict:
 75 |             matches = {}
 76 |             for key, value in self.headers.items():
 77 |                 if key in headers:
 78 |                     if value in headers[key]:
 79 |                         matches[key] = f"{value} <in> {headers[key]}"
 80 |             if len(matches) == len(self.headers):
 81 |                 return MatchItem(self.name, MatchType.HEADER, matches)
 82 | 
 83 |         elif type(self.headers) is list:
 84 |             items = headers.items()
 85 |             matches = {}
 86 |             for sv in self.headers:
 87 |                 for index, tv in enumerate([i[1] for i in items]):
 88 |                     if sv in tv:
 89 |                         matches[list(headers)[index]] = f"{sv} <in> {tv}"
 90 |             if len(matches) >= len(self.headers):
 91 |                 return MatchItem(self.name, MatchType.HEADER, matches)
 92 | 
 93 |         return None
 94 | 
 95 |     def match(self, headers: dict = None, body: str = None, favicon_int: int | str | list[int] | list[str] = None,
 96 |               favicon_md5: int | str | list[int] | list[str] = None) -> MatchItem | None:
 97 |         matched = self._match_headers(headers)
 98 |         if matched is not None:
 99 |             return matched
100 | 
101 |         matched = self._match_body(body)
102 |         if matched is not None:
103 |             return matched
104 | 
105 |         favs = []
106 |         if type(favicon_int) is str or type(favicon_int) is int:
107 |             favs.append(str(favicon_int))
108 |         elif type(favicon_int) is list:
109 |             for fav in favicon_int:
110 |                 favs.append(str(fav))
111 | 
112 |         if type(favicon_md5) is str or type(favicon_md5) is int:
113 |             favs.append(str(favicon_md5))
114 |         elif type(favicon_md5) is list:
115 |             for fav in favicon_md5:
116 |                 favs.append(str(fav))
117 | 
118 |         for fav in favs:
119 |             matched = self._match_favicon(fav)
120 |             if matched is not None:
121 |                 return matched
122 | 
123 |         return None
124 | 
125 |     def __repr__(self):
126 |         return f"FingerPrint(name={self.name!r}, headers={self.headers!r}, body={self.body!r}, favicons={self.favicons!r})"
127 | 
128 | 
129 | class WebFingers:
130 |     fingers: [FingerPrint] = []
131 | 
132 |     def __init__(self, base_path=os.path.dirname(__file__)):
133 |         self.base_path = base_path
134 |         self.fingers = self._parse_arl_fingers() + self._parse_web_fingerprint()
135 | 
136 |     def match(self, headers: dict = None, body: str = None, favicon_int: int | str | list[int] | list[str] = None,
137 |               favicon_md5: int | str | list[int] | list[str] = None) -> \
138 |             [MatchItem]:
139 |         matched = []
140 |         for finger in self.fingers:
141 |             matechitem = finger.match(headers, body, favicon_int, favicon_md5)
142 |             if matechitem is not None:
143 |                 matched.append(matechitem)
144 |         return matched
145 | 
146 |     def _parse_web_fingerprint(self) -> [FingerPrint]:
147 |         fingers = []
148 |         # https://github.com/0x727/FingerprintHub/
149 |         with open(path.join(self.base_path, 'assets/web_fingerprint_v3.json'), encoding='utf-8') as f:
150 |             data = json.load(f)
151 |             for item in data:
152 |                 if item['request_data'] != "":
153 |                     continue
154 |                 elif item['request_method'] != "get":
155 |                     continue
156 |                 elif item['path'] != "/":
157 |                     continue
158 | 
159 |                 finger = FingerPrint(name=item['name'])
160 |                 if len(item['headers']) > 0:
161 |                     finger.headers = item['headers']
162 |                 if len(item['keyword']) > 0:
163 |                     finger.body = item['keyword']
164 |                 if len(item['favicon_hash']) > 0:
165 |                     finger.favicons = item['favicon_hash']
166 | 
167 |                 fingers.append(finger)
168 | 
169 |         return fingers
170 | 
171 |     def _parse_arl_fingers(self) -> [FingerPrint]:
172 |         # https://github.com/loecho-sec/ARL-Finger-ADD
173 |         datas = []
174 |         with open(path.join(self.base_path, 'assets/arl_finger.json'), encoding='utf-8') as f:
175 |             data = json.load(f)
176 |             datas += data['fingerprint']
177 |         # https://github.com/EASY233/Finger/
178 |         with open(path.join(self.base_path, 'assets/finger.json'), encoding='utf-8') as f:
179 |             data = json.load(f)
180 |             datas += data['fingerprint']
181 |         with open(path.join(self.base_path, 'assets/custom.json'), encoding='utf-8') as f:
182 |             data = json.load(f)
183 |             datas += data['fingerprint']
184 | 
185 |         fingers = []
186 |         for item in datas:
187 |             finger = FingerPrint(name=item['cms'])
188 |             if item['method'] == "keyword":
189 |                 if item['location'] == "header":
190 |                     finger.headers = item['keyword']
191 |                 else:
192 |                     finger.body = item['keyword']
193 |             elif item['method'] == "faviconhash":
194 |                 finger.favicons = item['keyword']
195 |             else:
196 |                 continue
197 |             fingers.append(finger)
198 | 
199 |         return fingers
200 | 


--------------------------------------------------------------------------------
/helpers/fofa_api.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Optional
  2 | 
  3 | from fofa.client import Client
  4 | from pydantic.v1 import BaseModel, Field
  5 | from config import logger
  6 | 
  7 | 
  8 | class FofaResult(BaseModel):
  9 |     ip: Optional[str] = Field(ver='base', description="ip地址")
 10 |     port: Optional[str] = Field(ver='base', description="port")
 11 |     protocol: Optional[str] = Field(ver='base', description="协议名")
 12 |     country: Optional[str] = Field(ver='base', description="国家代码")
 13 |     country_name: Optional[str] = Field(ver='base', description="国家名")
 14 |     region: Optional[str] = Field(ver='base', description="区域")
 15 |     city: Optional[str] = Field(ver='base', description="城市")
 16 |     as_number: Optional[str] = Field(ver='base', description="asn编号")
 17 |     as_organization: Optional[str] = Field(ver='base', description="asn组织")
 18 |     host: Optional[str] = Field(ver='base', description="完全限定域名 (FQDN)")
 19 |     domain: Optional[str] = Field(ver='base', description="域名")
 20 |     os: Optional[str] = Field(ver='base', description="操作系统")
 21 |     server: Optional[str] = Field(ver='base', description="网站server")
 22 |     icp: Optional[str] = Field(ver='base', description="icp备案号")
 23 |     title: Optional[str] = Field(ver='base', description="网站标题")
 24 |     jarm: Optional[str] = Field(ver='base', description="jarm 指纹")
 25 |     header: Optional[str] = Field(ver='base', description="网站header")
 26 |     banner: Optional[str] = Field(ver='base', description="协议 banner")
 27 |     base_protocol: Optional[str] = Field(ver='base', description="基础协议，比如tcp/udp")
 28 |     link: Optional[str] = Field(ver='base', description="资产的URL链接")
 29 |     cert: Optional[str] = Field(ver='base', description="证书")
 30 |     certs_issuer_org: Optional[str] = Field(ver='base', description="证书颁发者组织")
 31 |     certs_issuer_cn: Optional[str] = Field(ver='base', description="证书颁发者通用名称")
 32 |     certs_subject_org: Optional[str] = Field(ver='base', description="证书持有者组织")
 33 |     certs_subject_cn: Optional[str] = Field(ver='base', description="证书持有者通用名称")
 34 |     tls_ja3s: Optional[str] = Field(ver='base', description="ja3s指纹信息")
 35 |     tls_version: Optional[str] = Field(ver='base', description="tls协议版本")
 36 |     product: Optional[str] = Field(ver='pro', description="专业版本及以上")
 37 |     product_category: Optional[str] = Field(ver='pro', description="产品分类")
 38 |     version: Optional[str] = Field(ver='pro', description="产品版本号")
 39 |     lastupdatetime: Optional[str] = Field(ver='pro', description="FOFA最后更新时间")
 40 |     cname: Optional[str] = Field(ver='pro', description="域名cname")
 41 |     icon_hash: Optional[str] = Field(ver='bus', description="返回的icon_hash值")
 42 |     certs_valid: Optional[str] = Field(ver='bus', description="证书是否有效")
 43 |     version: Optional[str] = Field(ver='bus', description="产品版本号")
 44 |     cname_domain: Optional[str] = Field(ver='bus', description="cname的域名")
 45 |     body: Optional[str] = Field(ver='bus', description="网站正文内容")
 46 |     icon: Optional[str] = Field(ver='ent', description="icon 图标")
 47 |     fid: Optional[str] = Field(ver='ent', description="fid")
 48 |     structinfo: Optional[str] = Field(ver='ent', description="结构化信息 (部分协议支持、比如elastic、mongodb)")
 49 | 
 50 |     @classmethod
 51 |     def GetFields(cls, ver='base'):
 52 |         vers = ['base']
 53 |         if ver == 'pro':
 54 |             vers = ['base', 'pro']
 55 |         elif ver == 'bus':
 56 |             vers = ['base', 'pro', 'bus']
 57 |         elif ver == 'ent':
 58 |             vers = ['base', 'pro', 'bus', 'ent']
 59 | 
 60 |         fields = []
 61 |         for field in cls.__fields__.values():
 62 |             if field.field_info.extra['ver'] in vers:
 63 |                 fields.append(field.name)
 64 |         return fields
 65 | 
 66 |     @classmethod
 67 |     def LoadFromList(cls, datas: list, fields: list) -> '[FofaResult]':
 68 |         results = []
 69 |         for data in datas:
 70 |             kv = {}
 71 |             for i, item in enumerate(data):
 72 |                 kv[fields[i]] = item
 73 |             results.append(cls.parse_obj(kv))
 74 |         return results
 75 | 
 76 | 
 77 | def _assembly_query_str(fuzzy=True, **kwargs: Any) -> (str, str):
 78 |     for key, value in kwargs.items():
 79 |         if key == "domain":
 80 |             if fuzzy:
 81 |                 return f"domain=\"{value}\" || host=\"{value}\" || cname=\"{value}\"  || cname_domain=\"{value}\"", value
 82 |             else:
 83 |                 return f"domain=\"{value}\" || cname=\"{value}\"  || cname_domain=\"{value}\"", value
 84 |         elif key == "app":
 85 |             return f"app=\"{value}\" || product=\"{value}\"", value
 86 |         else:
 87 |             return f"{key}=\"{value}\"", value
 88 | 
 89 | 
 90 | class FofaApi:
 91 |     """
 92 |     使用FOFA API进行搜索
 93 |     """
 94 | 
 95 |     def __init__(self, email: str, api_key: str, version: str = 'base'):
 96 |         self.client = Client(email=email, key=api_key)
 97 |         self.client._session.trust_env = False
 98 |         self.fields = FofaResult.GetFields(version)
 99 | 
100 |     def search(self, fuzzy=False, max_size=500, page_size=200, **kwargs: Any) -> [FofaResult]:
101 |         """
102 |         搜索
103 |         fuzzy: bool, 是否模糊搜索
104 |         max_size: int, 最大返回数量
105 |         page_size: int, 每页数量
106 |         """
107 |         fields = ','.join(self.fields)
108 |         query_str, val = _assembly_query_str(fuzzy=fuzzy, **kwargs)
109 |         next = ""
110 |         results = []
111 |         while True:
112 |             logger.debug("FOFA: {query_str} {next}", query_str=query_str, next=next)
113 |             r = self.client.search_next(query_str=query_str, size=page_size, fields=fields, full=True, next=next)
114 |             data = r['results']
115 |             if fuzzy is False:
116 |                 for item in data:
117 |                     if val in ' '.join(item):
118 |                         results.append(item)
119 |             else:
120 |                 results += data
121 |             if len(results) >= max_size or len(data) <= 0 or r['next'] == "":
122 |                 break
123 |             next = r['next']
124 |         return FofaResult.LoadFromList(results, self.fields)
125 | 


--------------------------------------------------------------------------------
/helpers/gobuster.py:
--------------------------------------------------------------------------------
  1 | # https://github.com/OJ/gobuster
  2 | import os
  3 | import re
  4 | import shutil
  5 | import subprocess
  6 | 
  7 | 
  8 | class GobusterNotFound(Exception):
  9 |     pass
 10 | 
 11 | 
 12 | class WordlistNotFound(Exception):
 13 |     pass
 14 | 
 15 | 
 16 | class IllegalArgumentException(Exception):
 17 |     pass
 18 | 
 19 | 
 20 | class GobusterException(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | class Gobuster:
 25 | 
 26 |     def __init__(self, wordlist, threads=10, gobusterAbsPath=None):
 27 |         self.gobusterPath = gobusterAbsPath
 28 |         if self.gobusterPath is None:
 29 |             self.gobusterPath = shutil.which("gobuster")
 30 | 
 31 |         if self.gobusterPath is None or not os.path.exists(self.gobusterPath):
 32 |             raise GobusterNotFound("gobuster not found in path")
 33 | 
 34 |         self.wordlist = wordlist
 35 |         if not os.path.exists(self.wordlist):
 36 |             raise WordlistNotFound(f"Wordlist not found at {self.wordlist}")
 37 |         self.threads = threads
 38 |         self.gobusterPath = os.path.dirname(self.gobusterPath)
 39 |         self.verbose = bool()
 40 |         self.creationflags = subprocess.CREATE_NO_WINDOW if subprocess.sys.platform == 'win32' else 0
 41 | 
 42 |         # Allow changing the path where nuclei is installed (instead of expecting it to be in $PATH)
 43 |         # Check if the '/' is at the end - and remove it if "yes"
 44 |         if gobusterAbsPath is not None and gobusterAbsPath[-1] == "/":
 45 |             self.nucleiPath = gobusterAbsPath[:-1]
 46 | 
 47 |         self.gobusterBinary = "gobuster"
 48 |         if self.gobusterPath:
 49 |             self.gobusterBinary = os.path.join(self.gobusterPath, self.gobusterBinary)
 50 | 
 51 |     def dir(self, url, exclude_status_code=None, verbose=False) -> [str]:
 52 |         command = [
 53 |             '--url', url,
 54 |             '--no-tls-validation',
 55 |             '--no-status',
 56 |             '--hide-length'
 57 |         ]
 58 | 
 59 |         if exclude_status_code is not None:
 60 |             command.extend(['--status-codes-blacklist', exclude_status_code])
 61 | 
 62 |         report = self.exec('dir', command, bufsize=1, verbose=verbose)
 63 |         paths = set()
 64 |         for line in report.split('\n'):
 65 |             paths.add(line.strip())
 66 |         return list(paths)
 67 | 
 68 |     def exec(self, mode, cmd: [], bufsize=-1, verbose=False) -> str:
 69 |         command = [self.gobusterBinary, mode, "--no-color", "--no-progress", "--quiet",
 70 |                    "--threads", str(self.threads),
 71 |                    "--wordlist", self.wordlist]
 72 |         command.extend(cmd)
 73 |         print(f"[Gobuster] [INFO] {' '.join(command)}")
 74 | 
 75 |         # 启动进程
 76 |         process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True,
 77 |                                    bufsize=bufsize,
 78 |                                    creationflags=self.creationflags)
 79 | 
 80 |         outputs = []
 81 |         try:
 82 |             for line in iter(process.stdout.readline, ''):
 83 |                 line = line.strip().lstrip('\r\x1b[2K')
 84 |                 line = re.sub(r'\[.*?\]', '', line).strip()
 85 | 
 86 |                 if verbose:
 87 |                     print(line)
 88 |                 if line != '':
 89 |                     outputs.append(line)
 90 |         except KeyboardInterrupt:
 91 |             print(f"[Gobuster] [INFO] Process interrupted by user.")
 92 |         finally:
 93 |             process.stdout.close()
 94 |             process.wait()
 95 | 
 96 |         output = '\n'.join(outputs)
 97 |         if output.startswith("Error: "):
 98 |             raise GobusterException(output)
 99 |         return output
100 | 


--------------------------------------------------------------------------------
/helpers/html_information_leak_analyze.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from bs4 import BeautifulSoup, Comment
 4 | 
 5 | 
 6 | def analyze(html: str, pure=False) -> str:
 7 |     soup = BeautifulSoup(html, 'html.parser')
 8 |     datas = ""
 9 |     puretext = '\n'.join([line.rstrip() for line in soup.get_text(separator='\n').split('\n') if line.rstrip()])
10 | 
11 |     # 匹配HTML注释
12 |     comments = soup.find_all(string=lambda text: isinstance(text, Comment))
13 |     comments = list(set(comments))
14 |     if len(comments) > 0:
15 |         datas = f"{datas}\n\nHTML注释: {comments}"
16 | 
17 |     # 匹配隐藏字段
18 |     hidden_fields = [(tag['name'], tag['value']) for tag in soup.find_all('input', type='hidden')]
19 |     hidden_fields = list(set(hidden_fields))
20 |     if len(hidden_fields) > 0:
21 |         datas = f"{datas}\n隐藏字段: {hidden_fields}"
22 | 
23 |     # 匹配电子邮件地址
24 |     emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', html)
25 |     emails = list(set(emails))
26 |     if len(emails) > 0:
27 |         datas = f"{datas}\nemail地址: {emails}"
28 | 
29 |     # 匹配文件路径
30 |     file_paths = [tag['src'] for tag in soup.find_all(src=True)] + [tag['href'] for tag in soup.find_all(href=True)]
31 |     file_paths.extend(find_paths(puretext))
32 |     file_paths = list(set(file_paths))
33 |     if len(file_paths) > 0:
34 |         datas = f"{datas}\n文件路径: {file_paths}"
35 | 
36 |     # 匹配内部IP地址
37 |     ips = re.findall(
38 |         r'(?:\d|1?\d\d|2[0-4]\d|25[0-5])(?:\.(?:\d|1?\d\d|2[0-4]\d|25[0-5])){3}', html)
39 |     ips = list(set(ips))
40 |     if len(ips) > 0:
41 |         datas = f"{datas}\nIP地址: {ips}"
42 | 
43 |     # 匹配meta标签中的信息
44 |     meta_tags = [f"{tag['name']}={tag['content']}" for tag in
45 |                  soup.find_all('meta', attrs={'name': True, 'content': True})]
46 |     meta_tags = list(set(meta_tags))
47 |     if len(meta_tags) > 0:
48 |         datas = f"{datas}\nmeta标签中的信息: {meta_tags}"
49 | 
50 |     # 匹配表单字段
51 |     form_fields = [(tag['name'], tag['value']) for tag in
52 |                    soup.find_all('input', attrs={'name': True, 'value': True})]
53 |     form_fields = list(set(form_fields))
54 |     if len(form_fields) > 0:
55 |         datas = f"{datas}\n表单字段: {form_fields}"
56 | 
57 |     if pure and puretext != "":
58 |         datas = f"{datas}\n\n纯文本内容:\n`{puretext}`"
59 | 
60 |     datas = datas.strip()
61 |     return datas
62 | 
63 | 
64 | def find_paths(text: str) -> []:
65 |     # 正则表达式模式，用于匹配潜在的路径信息
66 |     path_patterns = [
67 |         r"(?:[a-z]:)?(?:[\\\/][a-z0-9_. -]*)+",
68 |     ]
69 | 
70 |     paths = set()
71 |     for pattern in path_patterns:
72 |         matches = re.findall(pattern, text, re.MULTILINE | re.IGNORECASE)
73 |         for match in matches:
74 |             p = match.strip()
75 |             slashs = p.replace('\\', '').replace('/', '').strip()
76 |             if slashs == "":
77 |                 continue
78 |             paths.add(match.strip())
79 | 
80 |     return list(paths)
81 | 


--------------------------------------------------------------------------------
/helpers/masscan.py:
--------------------------------------------------------------------------------
  1 | """
  2 | https://github.com/MyKings/python-masscan/
  3 | """
  4 | from config import logger
  5 | import json
  6 | import re
  7 | import sys
  8 | import shlex
  9 | 
 10 | if sys.platform == "win32":
 11 |     shlex.split = lambda s, comments=False, posix=True: s
 12 | import os
 13 | import shutil
 14 | import subprocess
 15 | from pydantic.v1 import BaseModel, Field
 16 | from typing import Optional
 17 | 
 18 | 
 19 | class MasscanNotFound(Exception):
 20 |     pass
 21 | 
 22 | 
 23 | class PortResult(BaseModel):
 24 |     ip: Optional[str] = Field(description="ip")
 25 |     port: Optional[str] = Field(description="port")
 26 |     proto: Optional[str] = Field(description="协议")
 27 | 
 28 | 
 29 | class Masscan:
 30 |     """
 31 |     Class which allows to use masscan from Python.
 32 |     """
 33 | 
 34 |     def __init__(self, masscanPath=None):
 35 |         self._masscan_path = shutil.which("masscan", path=masscanPath)
 36 |         if not self._masscan_path:
 37 |             raise MasscanNotFound("[ERROR] Masscan not found in path")
 38 | 
 39 |     def scan(self, hosts='127.0.0.1', ports="1-65535", arguments='') -> [PortResult]:
 40 |         """
 41 |         Scan given hosts.
 42 | 
 43 |         May raise PortScannerError exception if masscan output was not XML
 44 | 
 45 |         Test existence of the following key to know
 46 |         if something went wrong : ['masscan']['scaninfo']['error']
 47 |         If not present, everything was ok.
 48 |         """
 49 | 
 50 |         assert type(hosts) is str, 'Wrong type for [hosts], should be a string [was {0}]'.format(
 51 |             type(hosts))  # noqa
 52 |         assert type(ports) in (str, type(None)), 'Wrong type for [ports], should be a string [was {0}]'.format(
 53 |             type(ports))  # noqa
 54 |         assert type(arguments) is str, 'Wrong type for [arguments], should be a string [was {0}]'.format(
 55 |             type(arguments))  # noqa
 56 | 
 57 |         h_args = shlex.split(hosts)
 58 |         f_args = shlex.split(arguments)
 59 | 
 60 |         # Launch scan
 61 |         args = [self._masscan_path, '-oJ', '-'] + h_args + ['-p', ports] * (ports is not None) + f_args
 62 | 
 63 |         logger.debug("{args}", args=' '.join(args))
 64 |         p = subprocess.Popen(
 65 |             args,
 66 |             bufsize=100000,
 67 |             stdin=subprocess.PIPE,
 68 |             stdout=subprocess.PIPE,
 69 |             stderr=subprocess.PIPE
 70 |         )
 71 | 
 72 |         # wait until finished
 73 |         # get output
 74 |         masscan_output, masscan_err = p.communicate()
 75 | 
 76 |         if isinstance(masscan_output, bytes):
 77 |             masscan_output = masscan_output.decode('utf-8')
 78 |         if isinstance(masscan_err, bytes):
 79 |             masscan_err = masscan_err.decode('utf-8')
 80 | 
 81 |         # If there was something on stderr, there was a problem so abort...  in
 82 |         # fact not always. As stated by AlenLPeacock :
 83 |         # This actually makes python-masscan mostly unusable on most real-life
 84 |         # networks -- a particular subnet might have dozens of scannable hosts,
 85 |         # but if a single one is unreachable or unroutable during the scan,
 86 |         # masscan.scan() returns nothing. This behavior also diverges significantly
 87 |         # from commandline masscan, which simply stderrs individual problems but
 88 |         # keeps on trucking.
 89 | 
 90 |         masscan_err_keep_trace = []
 91 |         masscan_warn_keep_trace = []
 92 |         if len(masscan_err) > 0:
 93 |             regex_warning = re.compile('^Warning: .*', re.IGNORECASE)
 94 |             for line in masscan_err.split(os.linesep):
 95 |                 if len(line) > 0:
 96 |                     rgw = regex_warning.search(line)
 97 |                     if rgw is not None:
 98 |                         # sys.stderr.write(line+os.linesep)
 99 |                         masscan_warn_keep_trace.append(line + os.linesep)
100 |                     else:
101 |                         # raise PortScannerError(masscan_err)
102 |                         masscan_err_keep_trace.append(masscan_err)
103 |         return self._load_scan_result(masscan_output)
104 | 
105 |     def _load_scan_result(self, scan_result: str) -> [PortResult]:
106 |         datas = []
107 |         result = json.loads(scan_result)
108 |         for r in result:
109 |             for port in r['ports']:
110 |                 datas.append(PortResult(ip=r['ip'], port=port['port'], proto=port['proto']))
111 |         return datas
112 | 


--------------------------------------------------------------------------------
/helpers/nmap.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import shlex
 3 | from config import logger
 4 | 
 5 | if sys.platform == "win32":
 6 |     shlex.split = lambda s, comments=False, posix=True: s
 7 | 
 8 | import nmap3
 9 | from pydantic.v1 import BaseModel, Field
10 | from typing import Optional
11 | 
12 | 
13 | class PortResult(BaseModel):
14 |     protocol: Optional[str] = Field(description="协议")
15 |     ip: Optional[str] = Field(description="ip")
16 |     portid: Optional[str] = Field(description="port")
17 |     service: Optional[str] = Field(description="服务")
18 |     product: Optional[str] = Field(description="产品")
19 |     version: Optional[str] = Field(description="版本")
20 |     extrainfo: Optional[str] = Field(description="额外信息")
21 | 
22 | 
23 | class Nmap:
24 |     def __init__(self, path: str = None):
25 |         self.nmap = nmap3.NmapScanTechniques(path=path)
26 | 
27 |     def scan(self, target: str, ports: str = '-') -> [PortResult]:
28 |         args = f"-p{ports} -Pn -sV -sC -A"
29 |         logger.debug("nmap {target} {args}", target=target, args=args)
30 |         results = self.nmap.nmap_tcp_scan(target=target, args=args)
31 |         results.pop("stats")
32 |         results.pop("runtime")
33 |         results.pop("task_results")
34 |         ports = []
35 |         for ip, host in results.items():
36 |             for port in host["ports"]:
37 |                 pkv = {}
38 |                 pkv["ip"] = ip
39 |                 pkv["protocol"] = port["protocol"]
40 |                 pkv["portid"] = port["portid"]
41 |                 if "service" in port:
42 |                     if port["service"]["name"] != "tcpwrapped":
43 |                         pkv["service"] = port["service"]["name"]
44 |                         if "product" in port["service"]:
45 |                             pkv["product"] = port["service"]["product"]
46 |                         if "version" in port["service"]:
47 |                             pkv["version"] = port["service"]["version"]
48 |                         if "extrainfo" in port["service"]:
49 |                             pkv["extrainfo"] = port["service"]["extrainfo"]
50 |                 ports.append(PortResult(**pkv))
51 | 
52 |         return ports
53 | 


--------------------------------------------------------------------------------
/helpers/security_trails_api.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List
  2 | from tld import get_tld
  3 | import requests
  4 | from pydantic.v1 import BaseModel, Field
  5 | from helpers.utils import get_ip_type
  6 | from config import logger
  7 | 
  8 | keymapping = {
  9 |     "a": "ip",
 10 |     "aaaa": "ipv6",
 11 |     "mx": "hostname",
 12 |     "ns": "nameserver",
 13 |     "soa": "email",
 14 |     "txt": "value",
 15 | }
 16 | 
 17 | 
 18 | class Record(BaseModel):
 19 |     first_seen: Optional[str] = Field(description="首次发现时间")
 20 |     value: Optional[str] = Field(description="值")
 21 |     count: Optional[int] = Field(description="数量")
 22 |     organization: Optional[str] = Field(description="归属组织")
 23 | 
 24 | 
 25 | class DomainInfo(BaseModel):
 26 |     apex_domain: Optional[str] = Field(description="顶级域名")
 27 |     hostname: Optional[str] = Field(description="主机名")
 28 |     subdomain_count: Optional[int] = Field(description="子域名数量")
 29 |     a: List[Record] = Field(description="A记录")
 30 |     aaaa: List[Record] = Field(description="AAAA记录")
 31 |     mx: List[Record] = Field(description="MX记录")
 32 |     ns: List[Record] = Field(description="NS记录")
 33 |     soa: List[Record] = Field(description="SOA记录")
 34 |     txt: List[Record] = Field(description="TXT记录")
 35 | 
 36 | 
 37 | class Domain(BaseModel):
 38 |     apex_domain: Optional[str] = Field(description="顶级域名")
 39 |     hostname: Optional[str] = Field(description="域名")
 40 |     subdomain: Optional[str] = Field(description="子域名")
 41 |     ips: List[str] = Field(description="IP地址")
 42 | 
 43 | 
 44 | class History(BaseModel):
 45 |     first_seen: Optional[str] = Field(description="首次发现时间")
 46 |     last_seen: Optional[str] = Field(description="最后发现时间")
 47 |     hostname: Optional[str] = Field(description="域名")
 48 |     ip: Optional[str] = Field(description="IP地址")
 49 | 
 50 | 
 51 | class SecurityTrailsApi:
 52 |     def __init__(self, api_key: str, base_url: str = "https://api.securitytrails.com/v1", proxies: dict = None,
 53 |                  timeout: int = 10):
 54 |         self.api_key = api_key
 55 |         self.base_url = base_url
 56 |         self.timeout = timeout
 57 |         self._session = requests.Session()
 58 |         if proxies:
 59 |             self._session.proxies.update(proxies)
 60 |             self._session.trust_env = False
 61 | 
 62 |     def _request(self, method: str, path: str, data: dict = None):
 63 |         url = f"{self.base_url}{path}"
 64 |         headers = {
 65 |             "apikey": self.api_key,
 66 |             "Content-Type": "application/json"
 67 |         }
 68 |         response = self._session.request(method, url, headers=headers, timeout=self.timeout, json=data)
 69 |         response.raise_for_status()
 70 |         return response.json()
 71 | 
 72 |     def _search(self, **filters) -> List[Domain]:
 73 |         records = []
 74 |         qd = {
 75 |             "filter": filters
 76 |         }
 77 |         maxpage = 100
 78 |         page = 1
 79 |         while True:
 80 |             upath = f"/domains/list?include_ips=true&page={page}"
 81 |             logger.debug("SecurityTrails: {upath} {qd}", upath=upath, qd=qd)
 82 |             res = self._request("POST", upath, qd)
 83 |             for item in res["records"]:
 84 |                 domainobj = get_tld(item["hostname"], fail_silently=True, as_object=True, fix_protocol=True)
 85 |                 kvs = {
 86 |                     "hostname": item["hostname"],
 87 |                     "apex_domain": domainobj.fld,
 88 |                     "subdomain": domainobj.subdomain,
 89 |                     "ips": item["ips"]
 90 |                 }
 91 |                 records.append(Domain(**kvs))
 92 | 
 93 |             if page >= maxpage or page >= res["meta"]["total_pages"]:
 94 |                 break
 95 |             page += 1
 96 | 
 97 |         return records
 98 | 
 99 |     def search_domain(self, domain: str) -> List[Domain]:
100 |         filters = {
101 |             "apex_domain": domain,
102 |         }
103 |         return self._search(**filters)
104 | 
105 |     def search_domain_fuzzy(self, keyword: str) -> List[Domain]:
106 |         filters = {
107 |             "keyword": keyword,
108 |         }
109 |         return self._search(**filters)
110 | 
111 |     def search_subdomain(self, subdomain: str) -> List[Domain]:
112 |         filters = {
113 |             "subdomain": subdomain,
114 |         }
115 |         return self._search(**filters)
116 | 
117 |     def search_ip(self, ip: str) -> List[Domain]:
118 |         iptype = get_ip_type(ip)
119 |         filters = {
120 |             iptype: ip,
121 |         }
122 |         return self._search(**filters)
123 | 
124 |     def _history(self, hostname: str, types="a") -> List[History]:
125 |         records = []
126 |         maxpage = 100
127 |         page = 1
128 |         while True:
129 |             upath = f"/history/{hostname}/dns/{types}?page={page}"
130 |             logger.debug("SecurityTrails: {upath}", upath=upath)
131 |             res = self._request("GET", upath)
132 |             for item in res["records"]:
133 |                 for ip in item["values"]:
134 |                     kvs = {
135 |                         "first_seen": item["first_seen"],
136 |                         "last_seen": item["last_seen"],
137 |                         "hostname": hostname,
138 |                         "ip": ip["ip"]
139 |                     }
140 |                     records.append(History(**kvs))
141 | 
142 |             if page >= maxpage or page >= res["pages"]:
143 |                 break
144 |             page += 1
145 | 
146 |         return records
147 | 
148 |     def get_history(self, hostname: str) -> List[History]:
149 |         return self._history(hostname, "a")
150 | 
151 |     def get_current_domain_info(self, domain: str) -> DomainInfo:
152 |         upath = f"/domains/{domain}"
153 |         logger.debug("SecurityTrails: {upath}", upath=upath)
154 |         res = self._request("GET", upath)
155 |         data = {
156 |             "apex_domain": res["apex_domain"],
157 |             "hostname": res["hostname"],
158 |             "subdomain_count": 0,
159 |             "a": [],
160 |             "aaaa": [],
161 |             "mx": [],
162 |             "ns": [],
163 |             "soa": [],
164 |             "txt": [],
165 |         }
166 | 
167 |         if res["subdomain_count"] is not None:
168 |             data["subdomain_count"] = res["subdomain_count"]
169 | 
170 |         currentdns = res["current_dns"]
171 |         if "first_seen" in currentdns["a"]:
172 |             for item in currentdns["a"]["values"]:
173 |                 data["a"].append(Record(
174 |                     first_seen=currentdns["a"]["first_seen"],
175 |                     value=item["ip"],
176 |                     count=item["ip_count"],
177 |                     organization=item["ip_organization"]
178 |                 ))
179 |         if "first_seen" in currentdns["aaaa"]:
180 |             for item in currentdns["aaaa"]["values"]:
181 |                 data["aaaa"].append(Record(
182 |                     first_seen=currentdns["aaaa"]["first_seen"],
183 |                     value=item["ipv6"],
184 |                     count=item["ipv6_count"],
185 |                     organization=item["ipv6_organization"]
186 |                 ))
187 |         if "first_seen" in currentdns["mx"]:
188 |             for item in currentdns["mx"]["values"]:
189 |                 data["mx"].append(Record(
190 |                     first_seen=currentdns["mx"]["first_seen"],
191 |                     value=item["hostname"],
192 |                     count=item["hostname_count"],
193 |                     organization=item["hostname_organization"]
194 |                 ))
195 |         if "first_seen" in currentdns["ns"]:
196 |             for item in currentdns["ns"]["values"]:
197 |                 data["ns"].append(Record(
198 |                     first_seen=currentdns["ns"]["first_seen"],
199 |                     value=item["nameserver"],
200 |                     count=item["nameserver_count"],
201 |                     organization=item["nameserver_organization"]
202 |                 ))
203 |         if "first_seen" in currentdns["soa"]:
204 |             for item in currentdns["soa"]["values"]:
205 |                 data["soa"].append(Record(
206 |                     first_seen=currentdns["soa"]["first_seen"],
207 |                     value=item["email"],
208 |                     count=item["email_count"],
209 |                 ))
210 |         if "first_seen" in currentdns["txt"]:
211 |             for item in currentdns["txt"]["values"]:
212 |                 data["txt"].append(Record(
213 |                     first_seen=currentdns["txt"]["first_seen"],
214 |                     value=item["value"],
215 |                 ))
216 | 
217 |         return DomainInfo(**data)
218 | 


--------------------------------------------------------------------------------
/helpers/utils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | import tiktoken
  4 | from ipaddress import ip_address, IPv4Address, IPv6Address
  5 | 
  6 | import validators
  7 | from validators.domain import _iana_tld
  8 | 
  9 | 
 10 | def num_tokens_from_string(string: str, encoding_name: str) -> int:
 11 |     """Returns the number of tokens in a text string."""
 12 |     # TODO
 13 |     encoding = tiktoken.encoding_for_model(encoding_name)
 14 |     num_tokens = len(encoding.encode(string))
 15 |     return num_tokens
 16 | 
 17 | 
 18 | def valid_ip_address(ip: str) -> bool:
 19 |     try:
 20 |         return type(ip_address(ip)) is IPv4Address or IPv6Address
 21 |     except ValueError:
 22 |         return False
 23 | 
 24 | 
 25 | def get_ip_type(IP: str) -> str:
 26 |     try:
 27 |         if type(ip_address(IP)) is IPv4Address:
 28 |             return "ipv4"
 29 |         else:
 30 |             return "ipv6"
 31 |     except ValueError:
 32 |         return "invalid"
 33 | 
 34 | 
 35 | def is_private_ip(ip: str) -> bool:
 36 |     try:
 37 |         ip_obj = ip_address(ip)
 38 |         return ip_obj.is_private
 39 |     except ValueError:
 40 |         return False
 41 | 
 42 | 
 43 | def is_domain(
 44 |     value: str, /, *, consider_tld: bool = False, rfc_1034: bool = False, rfc_2782: bool = False
 45 | ):
 46 |     """Return whether or not given value is a valid domain.
 47 | 
 48 |     Examples:
 49 |         >>> domain('example.com')
 50 |         # Output: True
 51 |         >>> domain('example.com/')
 52 |         # Output: ValidationError(func=domain, ...)
 53 |         >>> # Supports IDN domains as well::
 54 |         >>> domain('xn----gtbspbbmkef.xn--p1ai')
 55 |         # Output: True
 56 | 
 57 |     Args:
 58 |         value:
 59 |             Domain string to validate.
 60 |         consider_tld:
 61 |             Restrict domain to TLDs allowed by IANA.
 62 |         rfc_1034:
 63 |             Allows optional trailing dot in the domain name.
 64 |             Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
 65 |         rfc_2782:
 66 |             Domain name is of type service record.
 67 |             Allows optional underscores in the domain name.
 68 |             Ref: [RFC 2782](https://www.rfc-editor.org/rfc/rfc2782).
 69 | 
 70 | 
 71 |     Returns:
 72 |         (Literal[True]): If `value` is a valid domain name.
 73 |         (ValidationError): If `value` is an invalid domain name.
 74 | 
 75 |     Raises:
 76 |         (UnicodeError): If `value` cannot be encoded into `idna` or decoded into `utf-8`.
 77 |     """
 78 |     sr = validators.domain(value, consider_tld=consider_tld, rfc_1034=rfc_1034, rfc_2782=rfc_2782)
 79 |     if sr is True:
 80 |         return True
 81 | 
 82 |     if not value:
 83 |         return False
 84 | 
 85 |     if consider_tld and value.rstrip(".").rsplit(".", 1)[-1].upper() not in _iana_tld():
 86 |         return False
 87 | 
 88 |     try:
 89 | 
 90 |         service_record = r"_" if rfc_2782 else ""
 91 |         trailing_dot = r"\.?$" if rfc_1034 else r"$"
 92 | 
 93 |         return re.match(
 94 |             # First character of the domain
 95 |             rf"^(?:[a-z0-9{service_record}]"
 96 |             # Sub-domain
 97 |             + rf"(?:[a-z0-9-{service_record}]{{0,61}}"
 98 |             # Hostname
 99 |             + rf"[a-z0-9{service_record}])?\.)"
100 |             # First 61 characters of the gTLD
101 |             + r"+[a-z0-9][a-z0-9-_]{0,61}"
102 |             # Last character of the gTLD
103 |             + rf"[a-z]{trailing_dot}",
104 |             value.encode("idna").decode("utf-8"),
105 |             re.IGNORECASE,
106 |         )
107 |     except UnicodeError as err:
108 |         raise UnicodeError(f"Unable to encode/decode {value}") from err


--------------------------------------------------------------------------------
/persistence/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/persistence/__init__.py


--------------------------------------------------------------------------------
/persistence/database.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import create_engine, func, text
 2 | import threading
 3 | 
 4 | from sqlalchemy.orm import sessionmaker, scoped_session
 5 | 
 6 | from persistence.orm import Base, Cdn
 7 | 
 8 | 
 9 | class DB(object):
10 |     _instance_lock = threading.Lock()
11 | 
12 |     def __init__(self, user: str, password: str, host: str, port: int, dbname: str, echo: bool = False):
13 |         self._engine = create_engine(
14 |             url=f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{dbname}",
15 |             echo=echo,  # echo 设为 True 会打印出实际执行的 sql，调试的时候更方便
16 |             future=True,  # 使用 SQLAlchemy 2.0 API，向后兼容
17 |             pool_size=5,  # 连接池的大小默认为 5 个，设置为 0 时表示连接无限制
18 |             pool_recycle=3600,  # 设置时间以限制数据库自动断开
19 |         )
20 |         with self._engine.connect() as conn:
21 |             conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
22 |             conn.commit()
23 |         Base.metadata.create_all(self._engine)
24 |         self.DBSession = scoped_session(sessionmaker(bind=self._engine))
25 |         with self.DBSession() as session:
26 |             cdncount = session.query(func.count(Cdn.id)).scalar()
27 |             if cdncount == 0:
28 |                 import yaml
29 |                 from ipaddress import ip_network
30 |                 with open("cdn_servers.yaml", encoding='utf-8') as yamlstream:
31 |                     try:
32 |                         yamldata = yaml.safe_load(yamlstream)
33 |                         for cdn, vals in yamldata["cidr"].items():
34 |                             for cidr in vals:
35 |                                 session.add(Cdn(organization=cdn, cidr=ip_network(cidr)))
36 |                         for cdn, vals in yamldata["cname"].items():
37 |                             for cname in vals:
38 |                                 session.add(Cdn(organization=cdn, cname=cname))
39 |                         session.commit()
40 |                     except Exception as e:
41 |                         raise e
42 | 
43 |     def __new__(cls, *args, **kwargs):
44 |         if not hasattr(DB, "_instance"):
45 |             with DB._instance_lock:
46 |                 if not hasattr(DB, "_instance"):
47 |                     DB._instance = object.__new__(cls)
48 |         return DB._instance
49 | 


--------------------------------------------------------------------------------
/persistence/vectordb.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Any, Optional, Union
  3 | 
  4 | from embedchain.config import AppConfig
  5 | from embedchain.config.vectordb.base import BaseVectorDbConfig
  6 | from embedchain.embedder.base import BaseEmbedder
  7 | from embedchain.helpers.json_serializable import register_deserializable
  8 | from embedchain.vectordb.base import BaseVectorDB
  9 | from sqlalchemy import Table, MetaData, Column, Text, String, inspect, func, and_
 10 | from sqlalchemy.dialects.postgresql import JSONB
 11 | from sqlalchemy.ext.hybrid import hybrid_property
 12 | from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, aliased
 13 | from pgvector.sqlalchemy import Vector
 14 | 
 15 | from embedchain import App
 16 | from config import logger
 17 | 
 18 | from persistence.database import DB
 19 | 
 20 | 
 21 | @register_deserializable
 22 | class PostgresqlDBConfig(BaseVectorDbConfig):
 23 |     def __init__(
 24 |             self,
 25 |             collection_name: Optional[str] = None,
 26 |             dir: Optional[str] = None,
 27 |     ):
 28 |         """
 29 |         Initializes a configuration class instance for an Postgresql client.
 30 |         """
 31 |         super().__init__(collection_name=collection_name, dir=dir)
 32 | 
 33 | 
 34 | @register_deserializable
 35 | class PgvectorDB(BaseVectorDB):
 36 |     """
 37 |     PostgresqlDB as vector database
 38 |     """
 39 | 
 40 |     def __init__(
 41 |             self,
 42 |             db: DB,
 43 |             config: PostgresqlDBConfig,
 44 |     ):
 45 | 
 46 |         self.db = db
 47 |         self.config = config
 48 |         self.cls_KnowledgeVectors = None
 49 | 
 50 |         super().__init__(config)
 51 | 
 52 |     def _initialize(self):
 53 |         pass
 54 | 
 55 |     def _get_or_create_db(self):
 56 |         """Called during initialization"""
 57 |         return self.db
 58 | 
 59 |     def _get_or_create_collection(self):
 60 |         try:
 61 |             with self.db.DBSession() as session:
 62 |                 engine = session.get_bind()
 63 |                 if not inspect(engine).has_table(self.config.collection_name):
 64 |                     metadata = MetaData()
 65 |                     metadata.reflect(bind=engine)
 66 |                     table = Table(self.config.collection_name, metadata,
 67 |                                   Column('id', String, primary_key=True, nullable=False),
 68 |                                   Column('vector', Vector(self.embedder.vector_dimension)),
 69 |                                   Column('doc', Text),
 70 |                                   Column('meta_data', JSONB)
 71 |                                   )
 72 |                     metadata.create_all(engine, [table])
 73 |                     session.commit()
 74 |         except Exception as e:
 75 |             logger.error(e)
 76 | 
 77 |         if self.cls_KnowledgeVectors is None:
 78 |             class KnowledgeVectors(Base):
 79 |                 __tablename__: str = self.config.collection_name
 80 | 
 81 |                 id: Mapped[str] = mapped_column(String(), primary_key=True)
 82 |                 vector: Mapped[[float]] = mapped_column(Vector())
 83 |                 doc: Mapped[str] = mapped_column(Text())
 84 |                 meta_data: Mapped[object] = mapped_column(JSONB)
 85 | 
 86 |                 @hybrid_property
 87 |                 def distance(self):
 88 |                     # 仅用于标识，在实际查询中不会用到
 89 |                     pass
 90 | 
 91 |                 @distance.expression
 92 |                 def distance(cls, query_vector):
 93 |                     return cls.vector.cosine_distance(query_vector)
 94 | 
 95 |             KnowledgeVectors.__name__ = f"KnowledgeVectors_{self.config.collection_name}"
 96 |             self.cls_KnowledgeVectors = KnowledgeVectors
 97 |         return self.db
 98 | 
 99 |     def get(self, ids: Optional[list[str]] = None, where: Optional[dict[str, any]] = None, limit: Optional[int] = None):
100 |         result = {"ids": [], "metadatas": []}
101 |         try:
102 |             with self.db.DBSession() as session:
103 |                 kncls = self.cls_KnowledgeVectors
104 | 
105 |                 clauses = []
106 |                 if ids:
107 |                     clauses.append(kncls.id.in_(ids))
108 |                 if where:
109 |                     clauses = clauses + [func.jsonb_extract_path_text(kncls.meta_data, key) == value for key, value in
110 |                                          where.items()]
111 | 
112 |                 query = session.query(kncls).filter(and_(*clauses))
113 |                 if limit:
114 |                     query = query.limit(limit)
115 | 
116 |                 datas = query.all()
117 |                 for data in datas:
118 |                     result["ids"].append(data.id)
119 |                     result["metadatas"].append(data.meta_data)
120 |         except Exception as e:
121 |             logger.error(e)
122 |         return result
123 | 
124 |     def add(self, documents: list[str], metadatas: list[object], ids: list[str]) -> Any:
125 |         to_ingest = list(zip(documents, metadatas, ids))
126 | 
127 |         try:
128 |             with self.db.DBSession() as session:
129 |                 count = 0
130 |                 for doc, meta, id in to_ingest:
131 |                     kvobj = self.cls_KnowledgeVectors()
132 |                     kvobj.id = id
133 |                     kvobj.doc = doc
134 |                     kvobj.meta_data = meta
135 |                     kvobj.vector = self.embedder.embedding_fn([doc])[0]
136 |                     session.add(kvobj)
137 |                     count += 1
138 |                     if count >= 10:
139 |                         session.commit()
140 |                         count = 0
141 |                 session.commit()
142 |         except Exception as e:
143 |             logger.error(e)
144 | 
145 |     def query(
146 |             self,
147 |             input_query: str,
148 |             n_results: int,
149 |             where: dict[str, any],
150 |             citations: bool = False,
151 |             **kwargs: Optional[dict[str, Any]],
152 |     ) -> Union[list[tuple[str, dict]], list[str]]:
153 |         input_query_vector = self.embedder.embedding_fn([input_query])
154 |         query_vector = input_query_vector[0]
155 | 
156 |         result = []
157 |         try:
158 |             with self.db.DBSession() as session:
159 |                 kncls = self.cls_KnowledgeVectors
160 | 
161 |                 clauses = []
162 |                 if where:
163 |                     clauses = clauses + [func.jsonb_extract_path_text(kncls.meta_data, key) == value for key, value in
164 |                                          where.items()]
165 | 
166 |                 subquery = session.query(
167 |                     kncls.id,
168 |                     kncls.vector.cosine_distance(query_vector).label('distance')
169 |                 ).filter(
170 |                     and_(*clauses)
171 |                 ).subquery()
172 | 
173 |                 alias_knowledge_vectors = aliased(kncls, subquery)
174 | 
175 |                 query = session.query(
176 |                     kncls,
177 |                     subquery.c.distance
178 |                 ).join(
179 |                     subquery, kncls.id == subquery.c.id
180 |                 ).order_by(
181 |                     subquery.c.distance.asc()
182 |                 ).limit(n_results)
183 | 
184 |                 datas = query.all()
185 |                 for data, distance in datas:
186 |                     if citations:
187 |                         metadata = data.meta_data
188 |                         metadata['score'] = distance
189 |                         result.append((data.doc, metadata))
190 |                     else:
191 |                         result.append(data.doc)
192 |         except Exception as e:
193 |             logger.error(e)
194 |         return result
195 | 
196 |     def set_collection_name(self, name: str):
197 |         """
198 |         Set the name of the collection. A collection is an isolated space for vectors.
199 | 
200 |         :param name: Name of the collection.
201 |         :type name: str
202 |         """
203 |         if not isinstance(name, str):
204 |             raise TypeError("Collection name must be a string")
205 |         self.config.collection_name = '{prefix}_{suffix}'.format(prefix=name, suffix=self.embedder.config.model)
206 |         self._get_or_create_collection()
207 | 
208 |     def count(self) -> int:
209 |         count = 0
210 |         try:
211 |             with self.db.DBSession() as session:
212 |                 count = session.query(func.count(self.cls_KnowledgeVectors.id)).scalar()
213 |         except Exception as e:
214 |             logger.error(e)
215 |         return count
216 | 
217 |     def delete(self, where):
218 |         try:
219 |             with self.db.DBSession() as session:
220 |                 kncls = self.cls_KnowledgeVectors
221 |                 clauses = [func.jsonb_extract_path_text(kncls.meta_data, key) == value for key, value in where.items()]
222 |                 session.query(kncls).filter(and_(*clauses)).delete()
223 |                 session.commit()
224 |         except Exception as e:
225 |             logger.error(e)
226 | 
227 |     def reset(self):
228 |         try:
229 |             with self.db.DBSession() as session:
230 |                 session.query(self.cls_KnowledgeVectors).delete()
231 |                 session.commit()
232 |         except Exception as e:
233 |             logger.error(e)
234 | 
235 | 
236 | class Base(DeclarativeBase):
237 |     pass
238 | 
239 | 
240 | def NewEmbedChain(db: DB, embder: BaseEmbedder, collection_name="knowledge_vectors") -> App:
241 |     # embedchain元数据存储在Postgresql数据库中
242 |     os.environ['EMBEDCHAIN_DB_URI'] = db.DBSession.bind.url.render_as_string(False)
243 | 
244 |     cfg = PostgresqlDBConfig(collection_name=collection_name)
245 |     pdb = PgvectorDB(db, config=cfg)
246 | 
247 |     app_config = AppConfig(collect_metrics=False)
248 | 
249 |     app = App(
250 |         config=app_config,
251 |         llm=None,
252 |         db=pdb,
253 |         embedding_model=embder,
254 |         config_data=None,
255 |         auto_deploy=False,
256 |         chunker=None,
257 |         cache_config=None,
258 |         memory_config=None,
259 |     )
260 |     return app
261 | 


--------------------------------------------------------------------------------
/rag/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/rag/__init__.py


--------------------------------------------------------------------------------
/rag/rag.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from typing import Optional, List
 3 | 
 4 | from embedchain.embedder.base import BaseEmbedder
 5 | from embedchain.loaders.directory_loader import DirectoryLoader
 6 | from embedchain.models.data_type import DataType
 7 | from pydantic import Field
 8 | from pydantic.v1 import BaseModel
 9 | 
10 | from persistence.database import DB
11 | from persistence.vectordb import NewEmbedChain
12 | 
13 | 
14 | class Source(BaseModel):
15 |     content: Optional[str] = Field(description="内容")
16 |     data_type: Optional[str] = Field(description="类型")
17 |     src: Optional[str] = Field(description="引用来源")
18 | 
19 |     def __str__(self):
20 |         if self.data_type == DataType.WEB_PAGE:
21 |             return f"来源: {self.src}\n内容:\n{self.content}"
22 |         return self.content
23 | 
24 |     def __repr__(self):
25 |         return self.__str__()
26 | 
27 | 
28 | class Answer(BaseModel):
29 |     answer: Optional[str] = Field(description="答案")
30 |     sources: List[Source] = Field(description="来源")
31 | 
32 |     def __str__(self):
33 |         srcs = "\n\n====================\n\n".join([str(src) for src in self.sources])
34 |         return f"答案: {self.answer}\n\n详情:\n{srcs}"
35 | 
36 | 
37 | class RAG(object):
38 |     _instance_lock = threading.Lock()
39 | 
40 |     def __init__(self, db: DB, embder: BaseEmbedder, collection_name="knowledge_vectors"):
41 |         self._embedchain_app = NewEmbedChain(db=db, embder=embder, collection_name=collection_name)
42 | 
43 |     def __new__(cls, *args, **kwargs):
44 |         if not hasattr(RAG, "_instance"):
45 |             with RAG._instance_lock:
46 |                 if not hasattr(RAG, "_instance"):
47 |                     RAG._instance = object.__new__(cls)
48 |         return RAG._instance
49 | 
50 |     def add_knowledge_url(self, url: str):
51 |         self._embedchain_app.add(url, data_type=DataType.WEB_PAGE)
52 | 
53 |     def add_knowledge_folder(self, folder_path: str):
54 |         lconfig = {
55 |             "recursive": True,
56 |             "extensions": [".txt", ".md", ".readme", ".README"]
57 |         }
58 |         loader = DirectoryLoader(config=lconfig)
59 | 
60 |         self._embedchain_app.add(folder_path, data_type=DataType.DIRECTORY, loader=loader)
61 | 
62 |     def search(self, query: str, num_documents=3):
63 |         return self._embedchain_app.search(query, num_documents=num_documents)
64 | 
65 |     def query(self, query: str) -> Answer:
66 |         answer, sources = self._embedchain_app.query(query, citations=True)
67 | 
68 |         srclist = []
69 |         for content, metadata in sources:
70 |             src = Source(content=content, data_type=metadata['data_type'], src=metadata['url'])
71 |             srclist.append(src)
72 | 
73 |         return Answer(answer=answer, sources=srclist)
74 | 


--------------------------------------------------------------------------------
/rag/rag_search_tool.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from textwrap import dedent
  3 | from typing import Type, Any
  4 | 
  5 | from crewai import Task, Agent, Crew
  6 | from crewai_tools import BaseTool
  7 | from pydantic.v1 import BaseModel, Field
  8 | 
  9 | from helpers.html_information_leak_analyze import find_paths
 10 | from helpers.utils import is_domain
 11 | from rag.rag import RAG
 12 | from config import logger
 13 | 
 14 | 
 15 | class RagSearchToolSchema(BaseModel):
 16 |     """RagSearchTool 的查询参数"""
 17 |     search_query: str = Field(..., description="搜索的内容，问题应当具有代表性，使用实体关键词，避免使用自然语言")
 18 | 
 19 | 
 20 | class RagSearchTool(BaseTool):
 21 |     name: str = "RAG知识搜索"
 22 |     description: str = "搜索本地知识库。对于特定ip地址或者特定域名，不可以使用该工具。"
 23 |     args_schema: Type[BaseModel] = RagSearchToolSchema
 24 |     masscan_path: str | None = None
 25 |     rag: RAG | None = None
 26 |     verbose: bool = False
 27 |     llm: Any | None = None
 28 | 
 29 |     class Config:
 30 |         arbitrary_types_allowed = True
 31 | 
 32 |     def __init__(self, rag: RAG, llm, verbose=False):
 33 |         super().__init__()
 34 |         self.rag = rag
 35 |         self.verbose = verbose
 36 |         self.llm = llm
 37 |         logger.info("初始化工具 RAG知识搜索")
 38 | 
 39 |     def _run(
 40 |             self,
 41 |             **kwargs: Any,
 42 |     ) -> Any:
 43 |         search_query = kwargs.pop('search_query')
 44 | 
 45 |         # TODO
 46 |         ips = re.findall(
 47 |             r'(?:\d|1?\d\d|2[0-4]\d|25[0-5])(?:\.(?:\d|1?\d\d|2[0-4]\d|25[0-5])){3}', search_query)
 48 |         if len(ips) > 0:
 49 |             return "不可以使用该工具搜索特定ip地址"
 50 | 
 51 |         paths = find_paths(search_query)
 52 |         if len(paths) > 0:
 53 |             return "不可以使用该工具搜索特定路径"
 54 | 
 55 |         if is_domain(search_query):
 56 |             return "不可以使用该工具搜索特定域名"
 57 | 
 58 |         try:
 59 |             answer = self.rag.query(search_query)
 60 |             out = self.review(search_query, str(answer))
 61 |             if "PASS" in out:
 62 |                 return answer
 63 |             elif "FAIL" in out:
 64 |                 return "查询结果不符合要求, 请重新查询"
 65 |             else:
 66 |                 return out
 67 |         except Exception as e:
 68 |             logger.error("知识库搜索失败: {}", e)
 69 |             return f"查询失败: {e}"
 70 | 
 71 |     def review(self, query: str, answer: str) -> str:
 72 |         agent = Agent(
 73 |             role='搜索结果审核专家',
 74 |             goal='确保RAG工具的搜索输出与查询意图完全一致，优化搜索效率和结果的相关性。',
 75 |             backstory=dedent(
 76 |                 """
 77 |                 你是一名专注于搜索技术和结果验证的专家，隶属于技术保障团队。
 78 |                 你的任务是对RAG等智能搜索工具的输出进行精确的审核，确保每个搜索结果都严格符合预设的查询意图和技术需求。
 79 | 
 80 |                 你具备深厚的技术背景，特别擅长：
 81 |                 - 分析和解析复杂的搜索结果。
 82 |                 - 识别与查询意图不符的搜索输出。
 83 |                 - 使用逻辑和技术知识来评估搜索结果的技术相关性。
 84 |                 - 提出实用的改进建议，帮助改进搜索工具的算法和查询策略。
 85 | 
 86 |                 你的目标是通过严格的审核流程，提升搜索工具的准确性和效率，从而支持团队的技术决策和安全操作。
 87 |                 """
 88 |             ),
 89 |             verbose=self.verbose,
 90 |             llm=self.llm,
 91 |             allow_delegation=True,
 92 |             max_rpm=300,
 93 |             cache=False,
 94 |         )
 95 | 
 96 |         task = Task(
 97 |             agent=agent,
 98 |             description=dedent(
 99 |                 f"""                  
100 |                 审核RAG搜索工具输出的结果，确保搜索结果与查询意图保持一致性。
101 |                 在进行技术栈、漏洞和配置信息的搜索时，需要特别注意结果的相关性。
102 | 
103 |                 审核流程如下：
104 |                 1. 确认每一项搜索结果是否直接相关于查询的技术栈或问题。例如，如果搜索关键词为Java相关技术，结果应严格限于Java环境，不应包含如.NET或其他无关技术的信息。
105 |                 2. 分析搜索结果中的误报，标识出那些明显不符合查询条件的内容。
106 |                 3. 提出改进搜索策略的建议，如调整关键词、使用更精确的查询语句等，以减少未来的误报。
107 | 
108 |                 这个任务的目的是提高搜索效率和准确性，确保团队能够获得最相关和可行的信息。
109 |                 
110 |                 请根据以下搜索结果，回答是否通过审核：
111 |                 查询内容：
112 |                 {query}
113 |                 
114 |                 ------------------------
115 |                 
116 |                 搜索结果：
117 |                 {answer}
118 |                 """),
119 |             expected_output=dedent(
120 |                 """
121 |                 最终答案应为以下三种类型之一，不要编造其他内容：
122 |                 
123 |                 如果审核通过：
124 |                     只需要回答字符串`PASS`
125 |                 如果不通过，但是存在合理回答：
126 |                     移除不合理的回答，然后提供合理的回答
127 |                 如果不通过，且没有合理回答：
128 |                     只需要回答字符串`FAIL`
129 |                 """),
130 |         )
131 | 
132 |         return Crew(
133 |             agents=[agent],
134 |             tasks=[task],
135 |             verbose=self.verbose,
136 |             share_crew=False,
137 |             cache=True
138 |         ).kickoff()
139 | 


--------------------------------------------------------------------------------
/recon/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | 用于情报搜集的工具包。
3 | 
4 | - `active` 子包用于主动侦察。
5 | - `passive` 子包用于被动侦察。
6 | 
7 | """


--------------------------------------------------------------------------------
/recon/active/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | 主动信息搜集工具
3 | """


--------------------------------------------------------------------------------
/recon/active/masscan_search_tool.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from ipaddress import ip_address
 3 | from typing import Type, Any
 4 | from pydantic.v1 import BaseModel, Field
 5 | from crewai_tools.tools.base_tool import BaseTool
 6 | from sqlalchemy import exc
 7 | 
 8 | from helpers.masscan import Masscan
 9 | from helpers.utils import valid_ip_address
10 | from persistence.database import DB
11 | from persistence.orm import Port, DuplicateException
12 | from config import logger
13 | 
14 | 
15 | class MasscanSearchToolSchema(BaseModel):
16 |     """MasscanSearchToolSchema 的查询参数"""
17 |     ip: str = Field(..., description="ip地址")
18 | 
19 | 
20 | class MasscanSearchTool(BaseTool):
21 |     name: str = "Masscan"
22 |     description: str = "使用Masscan扫描ip地址，扫描全端口，仅用于发现开放端口。扫描速度较快，但结果可能会不准确。"
23 |     args_schema: Type[BaseModel] = MasscanSearchToolSchema
24 |     masscan_path: str | None = None
25 |     db: DB | None = None
26 |     task_id: int | None = None
27 | 
28 |     class Config:
29 |         arbitrary_types_allowed = True
30 | 
31 |     def __init__(self, db: DB, task_id: int, masscan_path: str = None):
32 |         super().__init__()
33 |         self.db = db
34 |         self.task_id = task_id
35 |         self.masscan_path = masscan_path
36 |         logger.info("初始化工具 Masscan")
37 | 
38 |     def _run(
39 |             self,
40 |             **kwargs: Any,
41 |     ) -> Any:
42 |         masscan = Masscan(self.masscan_path)
43 |         ip = kwargs.pop('ip')
44 |         if valid_ip_address(ip) is False:
45 |             return "IP地址格式错误"
46 |         now = datetime.now()
47 |         results = []
48 |         openports = []
49 |         try:
50 |             results = masscan.scan(hosts=ip)
51 |         except Exception as e:
52 |             logger.error("masscan扫描失败: {}", e)
53 |             return f"扫描失败: {e}"
54 |         if len(results) == 0:
55 |             return "未找到任何开放端口"
56 |         try:
57 |             with self.db.DBSession() as session:
58 |                 for port in results:
59 |                     openports.append(port.port)
60 |                     pdb = Port()
61 |                     pdb.target = ip
62 |                     pdb.task_id = self.task_id
63 |                     pdb.ip = ip_address(port.ip).exploded
64 |                     pdb.protocol = port.proto
65 |                     pdb.port = port.port
66 |                     pdb.checked_time = now
67 |                     pdb.is_passive = False
68 |                     pdb.source = self.name
69 |                     try:
70 |                         session.add(pdb)
71 |                         session.commit()
72 |                     except DuplicateException:
73 |                         session.rollback()
74 |                     except Exception:
75 |                         raise
76 | 
77 |         except exc.SQLAlchemyError as e:
78 |             logger.error("数据库错误: {}", e)
79 |             return "数据库错误"
80 |         except Exception as e:
81 |             logger.error("其他错误: {}", e)
82 |             return f"其他错误: {e}"
83 | 
84 |         return f"IP: {ip} 共发现{len(openports)}个开放端口\n{','.join(openports)}"
85 | 


--------------------------------------------------------------------------------
/recon/active/nmap_search_tool.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from ipaddress import ip_address
 3 | from typing import Type, Any
 4 | from pydantic.v1 import BaseModel, Field
 5 | from crewai_tools.tools.base_tool import BaseTool
 6 | from sqlalchemy import exc
 7 | 
 8 | from helpers.nmap import Nmap
 9 | from helpers.utils import valid_ip_address
10 | from persistence.database import DB
11 | from persistence.orm import Port, DuplicateException
12 | from config import logger
13 | 
14 | 
15 | class NmapSearchToolSchema(BaseModel):
16 |     """NmapSearchToolSchema 的查询参数"""
17 |     ip: str = Field(..., description="ip地址")
18 |     ports: str = Field("-", description="端口，','分割")
19 | 
20 | 
21 | class NmapSearchTool(BaseTool):
22 |     name: str = "Nmap"
23 |     description: str = "使用Nmap扫描ip地址，发现开放端口的服务信息。扫描较慢，但结果精准。"
24 |     args_schema: Type[BaseModel] = NmapSearchToolSchema
25 |     nmap_path: str | None = None
26 |     db: DB | None = None
27 |     task_id: int | None = None
28 | 
29 |     class Config:
30 |         arbitrary_types_allowed = True
31 | 
32 |     def __init__(self, db: DB, task_id: int, nmap_path: str = None):
33 |         super().__init__()
34 |         self.db = db
35 |         self.task_id = task_id
36 |         self.nmap_path = nmap_path
37 |         logger.info("初始化工具 Nmap")
38 | 
39 |     def _run(
40 |             self,
41 |             **kwargs: Any,
42 |     ) -> Any:
43 |         nmap = Nmap(self.nmap_path)
44 |         ip = kwargs.pop('ip')
45 |         if valid_ip_address(ip) is False:
46 |             return "IP地址格式错误"
47 | 
48 |         ports = kwargs.pop('ports')
49 | 
50 |         now = datetime.now()
51 |         results = []
52 |         openports = []
53 |         try:
54 |             results = nmap.scan(ip, ports)
55 |         except Exception as e:
56 |             logger.error("nmap扫描失败: {}", e)
57 |             return f"扫描失败: {e}"
58 |         if len(results) == 0:
59 |             return "未找到任何开放端口"
60 |         try:
61 |             with self.db.DBSession() as session:
62 |                 for port in results:
63 |                     openports.append(f"{port.portid} {port.service}")
64 |                     pdb = Port()
65 |                     pdb.target = ip
66 |                     pdb.task_id = self.task_id
67 |                     pdb.ip = ip_address(port.ip).exploded
68 |                     pdb.protocol = port.protocol
69 |                     pdb.port = port.portid
70 |                     pdb.service = port.service
71 |                     pdb.product = port.product
72 |                     pdb.version = port.version
73 |                     pdb.checked_time = now
74 |                     pdb.is_passive = False
75 |                     if port.extrainfo is not None:
76 |                         pdb.extra_info = {
77 |                             "info": port.extrainfo,
78 |                         }
79 |                     pdb.source = self.name
80 |                     try:
81 |                         session.add(pdb)
82 |                         session.commit()
83 |                     except DuplicateException:
84 |                         session.rollback()
85 |                     except Exception:
86 |                         raise
87 | 
88 |         except exc.SQLAlchemyError as e:
89 |             logger.error("数据库错误: {}", e)
90 |             return "数据库错误"
91 |         except Exception as e:
92 |             logger.error("其他错误: {}", e)
93 |             return f"其他错误: {e}"
94 | 
95 |         return f"IP: {ip} 共发现{len(openports)}个开放端口\n{','.join(openports)}"
96 | 


--------------------------------------------------------------------------------
/recon/cyber_assets_researcher.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from ipaddress import ip_address
  3 | 
  4 | from crewai import Agent, Task, Crew
  5 | from crewai_tools import BaseTool
  6 | from textwrap import dedent
  7 | 
  8 | from helpers.utils import is_domain
  9 | from persistence.database import DB
 10 | from persistence.orm import ip_is_cdn
 11 | from recon.active.masscan_search_tool import MasscanSearchTool
 12 | from recon.active.nmap_search_tool import NmapSearchTool
 13 | from recon.passive.alienvault_search_tool import AlienVaultSearchTool
 14 | from recon.passive.fofa_search_tool import FofaSearchTool
 15 | from recon.passive.security_trails_search_tool import SecurityTrailsSearchTool
 16 | import validators
 17 | from config import logger
 18 | 
 19 | 
 20 | class CyberAssetsResearchers:
 21 |     """
 22 |     网络资产研究员
 23 |     主要用于网络资产的被动侦察和主动扫描，以扩大攻击面
 24 |     """
 25 | 
 26 |     def __init__(self, db: DB, llm=None, masscan_path=None, nmap_path=None, verbose=False,
 27 |                  cdn_autonomous_judgment=False,
 28 |                  cdn_apexdomain_threshold=50,
 29 |                  cdn_subdomain_threshold=3):
 30 |         self.llm = llm
 31 |         self.db = db
 32 |         self.masscan_path = masscan_path
 33 |         self.nmap_path = nmap_path
 34 |         self.verbose = verbose
 35 |         self.cdn_autonomous_judgment = cdn_autonomous_judgment
 36 |         self.cdn_apexdomain_threshold = cdn_apexdomain_threshold
 37 |         self.cdn_subdomain_threshold = cdn_subdomain_threshold
 38 | 
 39 |     def agent_cyber_asset_intelligence_scout(self, llm=None, tools: [BaseTool] = []) -> Agent:
 40 |         logger.info("初始化代理 网络资产情报侦察员")
 41 |         agent = Agent(
 42 |             role='网络资产情报侦察员',
 43 |             goal='通过被动侦察工具获取相关网络资产信息',
 44 |             backstory=dedent(
 45 |                 """
 46 |                 你是一名经验丰富的网络资产情报侦察员，主要任务是发现网络资产以扩大攻击面。
 47 |                 你精通互联网协议和各种服务器应用程序，特别是对DNS、HTTP、HTTPS、SMTP、POP3、IMAP等协议有深入的了解。
 48 |                 你擅长使用网络工具搜索目标的相关资产，专业在于识别和过滤搜索结果中与目标相关或不相关的网络资产。
 49 |                 你主要使用不同的被动信息收集工具，以避免对目标产生任何影响，如避免生成访问日志等。
 50 |                 你的工作至关重要，你的工作成果将直接影响后续工作的开展。
 51 |                 """
 52 |             ),
 53 |             tools=tools,
 54 |             verbose=self.verbose,
 55 |             allow_delegation=True,
 56 |             max_rpm=300,
 57 |             # max_iter=1,
 58 |             llm=llm,
 59 |             cache=False,
 60 |         )
 61 |         if llm is not None:
 62 |             agent.llm = llm
 63 |         return agent
 64 | 
 65 |     def task_cyber_assets_recon(self, agent: Agent, target: str) -> Task:
 66 |         logger.info("初始化任务 资产侦察")
 67 |         return Task(
 68 |             description=dedent(
 69 |                 f"""
 70 |                 使用多种网络资产搜索引擎搜索目标的相关资产信息。
 71 |                 尽可能多的获取目标相关资产信息，越多资产这对于后续工作的开展越有利。
 72 |                 最终结果会被存入数据库以便后续使用。对于同一个目标，每个工具最多只能调用一次。
 73 |                 根据目标类型选择合适的工具进行搜索。
 74 |                 需要注意以下几点:
 75 |                 - 如果目标为ipv4或ipv6地址，则必须为公网地址，否则不进行扫描。
 76 |                 
 77 |                 如果目标为CDN，则跳过目标。
 78 | 
 79 |                 目标: `{target}`
 80 |                 """
 81 |             ),
 82 |             expected_output=dedent(
 83 |                 """
 84 |                 最终答案是本次搜索结果资产数量，具体的结果已存储在数据库中。不要编造其他额外内容。
 85 |                 """),
 86 |             agent=agent,
 87 |         )
 88 | 
 89 |     def agent_port_fast_scanner(self, llm=None, tools: [BaseTool] = []) -> Agent:
 90 |         logger.info("初始化代理 端口快速扫描员")
 91 |         agent = Agent(
 92 |             role='端口快速扫描员',
 93 |             goal='使用端口扫描工具对目标IP进行快速扫描以获取开放端口',
 94 |             backstory=dedent(
 95 |                 """
 96 |                 你是一名经验丰富的端口扫描员。你只扫描ip地址。                
 97 |                 你擅长使用各种端口扫描工具收集目标的开放端口。
 98 |                 """
 99 |             ),
100 |             tools=tools,
101 |             verbose=self.verbose,
102 |             allow_delegation=True,
103 |             max_rpm=300,
104 |             # max_iter=1,
105 |             llm=llm,
106 |             cache=False,
107 |         )
108 |         if llm is not None:
109 |             agent.llm = llm
110 |         return agent
111 | 
112 |     def agent_port_precise_scanner(self, llm=None, tools: [BaseTool] = []) -> Agent:
113 |         logger.info("初始化代理 端口精准扫描员")
114 |         agent = Agent(
115 |             role='端口精准扫描员',
116 |             goal='使用端口扫描工具对目标IP的指定端口进行精准扫描以获取端口提供的服务信息',
117 |             backstory=dedent(
118 |                 """
119 |                 你是一名经验丰富的端口扫描员。你只扫描ip的指定端口。                
120 |                 你擅长使用各种端口扫描工具对目标的指定端口进行探测，收集目标开放端口的服务详情。
121 |                 你精通互联网协议和各种服务器应用程序，特别是对DNS、HTTP、HTTPS、SMTP、POP3、IMAP等协议有深入的了解。
122 |                 """
123 |             ),
124 |             tools=tools,
125 |             verbose=self.verbose,
126 |             allow_delegation=True,
127 |             max_rpm=300,
128 |             # max_iter=1,
129 |             llm=llm,
130 |             cache=False,
131 |         )
132 |         if llm is not None:
133 |             agent.llm = llm
134 |         return agent
135 | 
136 |     def task_port_fast_scan(self, agent: Agent, target: str) -> Task:
137 |         logger.info("初始化任务 快速端口扫描")
138 |         return Task(
139 |             description=dedent(
140 |                 f"""
141 |                 使用多种快速端口扫描工具，对ip进行端口扫描，以尽快获取目标开放端口信息。                
142 |                 尽可能多的获取目标开放端口，开放端口的数量对后续工作的开展至关重要。
143 |                 
144 |                 目标: `{target}`
145 |                 """
146 |             ),
147 |             expected_output=dedent(
148 |                 """
149 |                 最终结果为ip地址以及开放的端口数量及具体的端口列表。端口以`,`分割，不要空格。不要编造其他额外内容。
150 |                 """),
151 |             agent=agent,
152 |         )
153 | 
154 |     def task_port_precise_scan(self, agent: Agent, target: str) -> Task:
155 |         logger.info("初始化任务 精准端口扫描")
156 |         return Task(
157 |             description=dedent(
158 |                 f"""
159 |                 使用多种精准端口扫描工具，对ip和指定的端口列表进行扫描以获取目标开放端口信息。
160 |                 尽可能多的获取目标端口的服务信息，开放端口的服务信息对后续工作的开展至关重要。
161 |                 
162 |                 目标可能是一个ip地址，也可能是ip地址和端口列表的组合。                
163 |                 目标:
164 |                 {target}
165 |                 """
166 |             ),
167 |             expected_output=dedent(
168 |                 """
169 |                 最终结果为ip地址和开放的端口数量及具体的端口与服务列表。不要编造其他额外内容。
170 |                 """),
171 |             agent=agent,
172 |         )
173 | 
174 |     def _reconPortFastScanCrew(self, task_id: int, target: str):
175 |         ip = ip_address(target)
176 |         if ip.is_private:
177 |             # TODO
178 |             raise NotImplementedError("暂不支持内网地址的扫描")
179 | 
180 |         agents = []
181 |         tasks = []
182 |         tools = []
183 | 
184 |         pfag = self.agent_port_fast_scanner(
185 |             self.llm,
186 |             [
187 |                 MasscanSearchTool(self.db, task_id, self.masscan_path),
188 |             ]
189 |         )
190 | 
191 |         agents.append(pfag)
192 | 
193 |         taskf = self.task_port_fast_scan(pfag, ip.exploded)
194 |         tasks.append(taskf)
195 | 
196 |         if len(agents) == 0:
197 |             raise Exception("无可用工具")
198 | 
199 |         logger.info("初始化智能体 快速端口扫描")
200 |         return Crew(
201 |             agents=agents,
202 |             tasks=tasks,
203 |             verbose=self.verbose,
204 |             share_crew=False
205 |         )
206 | 
207 |     def _reconPortPreciseScanCrew(self, task_id: int, target: str):
208 |         agents = []
209 |         tasks = []
210 |         tools = []
211 | 
212 |         ppag = self.agent_port_precise_scanner(
213 |             self.llm,
214 |             [
215 |                 NmapSearchTool(self.db, task_id, self.nmap_path),
216 |             ]
217 |         )
218 |         agents.append(ppag)
219 | 
220 |         taskp = self.task_port_precise_scan(ppag, target)
221 |         tasks.append(taskp)
222 | 
223 |         if len(agents) == 0:
224 |             raise Exception("无可用工具")
225 | 
226 |         logger.info("初始化智能体 精准端口扫描")
227 |         return Crew(
228 |             agents=agents,
229 |             tasks=tasks,
230 |             verbose=self.verbose,
231 |             share_crew=False
232 |         )
233 | 
234 |     def _reconIpCrew(self, task_id: int, target: str):
235 |         ip = ip_address(target)
236 |         if ip.is_private:
237 |             # TODO
238 |             raise NotImplementedError("暂不支持内网地址的扫描")
239 |         with self.db.DBSession() as session:
240 |             if ip_is_cdn(session, ip.exploded):
241 |                 raise Exception(f"目标为CDN地址")
242 | 
243 |         agents = []
244 |         tasks = []
245 |         tools = self._getPassiveReconTools(task_id)
246 |         if len(tools) > 0:
247 |             agscout = self.agent_cyber_asset_intelligence_scout(self.llm, tools)
248 |             agents.append(agscout)
249 | 
250 |             taskscout = self.task_cyber_assets_recon(agscout, ip.exploded)
251 |             tasks.append(taskscout)
252 | 
253 |         if len(agents) == 0:
254 |             raise Exception("无可用工具")
255 | 
256 |         logger.info("初始化智能体 IP侦察")
257 |         return Crew(
258 |             agents=agents,
259 |             tasks=tasks,
260 |             verbose=self.verbose,
261 |             share_crew=False
262 |         )
263 | 
264 |     def _getPassiveReconTools(self, task_id: int) -> []:
265 |         tools = [
266 |             AlienVaultSearchTool(self.db, task_id, self.llm, self.verbose, self.cdn_autonomous_judgment,
267 |                                  self.cdn_apexdomain_threshold, self.cdn_subdomain_threshold),
268 |         ]
269 |         if os.environ.get('FOFA_EMAIL') is not None and os.environ.get('FOFA_API_KEY') is not None:
270 |             tools.append(FofaSearchTool(self.db, task_id, self.llm, self.verbose, self.cdn_autonomous_judgment,
271 |                                         self.cdn_apexdomain_threshold, self.cdn_subdomain_threshold))
272 |         if os.environ.get('SECURITYTRAILS_API_KEY') is not None:
273 |             tools.append(
274 |                 SecurityTrailsSearchTool(self.db, task_id, self.llm, self.verbose, self.cdn_autonomous_judgment,
275 |                                          self.cdn_apexdomain_threshold, self.cdn_subdomain_threshold))
276 |         return tools
277 | 
278 |     def _reconDomainCrew(self, task_id: int, target: str):
279 |         agents = []
280 |         tasks = []
281 | 
282 |         tools = self._getPassiveReconTools(task_id)
283 |         if len(tools) > 0:
284 |             agscout = self.agent_cyber_asset_intelligence_scout(self.llm, tools)
285 |             agents.append(agscout)
286 |             taskscout = self.task_cyber_assets_recon(agscout, target)
287 |             tasks.append(taskscout)
288 | 
289 |         if len(agents) == 0:
290 |             raise Exception("无可用工具")
291 | 
292 |         logger.info("初始化智能体 域名侦察")
293 |         return Crew(
294 |             agents=agents,
295 |             tasks=tasks,
296 |             verbose=self.verbose,
297 |             share_crew=False
298 |         )
299 | 
300 |     def reconCrew(self, task_id: int, target: str):
301 | 
302 |         try:
303 |             # ip地址
304 |             ipobj = ip_address(target)
305 |             logger.info("IP目标 {}", target)
306 |             return self._reconIpCrew(task_id, target)
307 |         except ValueError:
308 |             pass
309 | 
310 |         if validators.url(target):
311 |             # url
312 |             logger.info("url目标 {}", target)
313 |             return self._reconDomainCrew(task_id, target)
314 |         elif is_domain(target, rfc_2782=True):
315 |             # domain
316 |             logger.info("domain目标 {}", target)
317 |             return self._reconDomainCrew(task_id, target)
318 |         raise ValueError("目标类型不支持")
319 | 
320 |     def portScanCrew(self, task_id: int, ip: str):
321 | 
322 |         portout = self._reconPortFastScanCrew(task_id, ip).kickoff()
323 |         logger.info("[{}] {}: {}", task_id, ip, portout)
324 |         portout = self._reconPortPreciseScanCrew(task_id, portout).kickoff()
325 |         logger.info("[{}] {}: {}", task_id, ip, portout)
326 | 


--------------------------------------------------------------------------------
/recon/passive/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hangxin1940/bladerazor/0fbeeaef7c4ae3ad2390b2572f3e677ee32bb34b/recon/passive/__init__.py


--------------------------------------------------------------------------------
/recon/passive/alienvault_search_tool.py:
--------------------------------------------------------------------------------
  1 | from ipaddress import ip_address
  2 | from typing import Type, Any
  3 | from pydantic.v1 import BaseModel, Field
  4 | from crewai_tools.tools.base_tool import BaseTool
  5 | from requests import HTTPError
  6 | from sqlalchemy import exc, and_, func, or_
  7 | 
  8 | from helpers.alienvault_api import AlienVaultApi
  9 | from helpers.utils import get_ip_type, valid_ip_address
 10 | from persistence.database import DB
 11 | from persistence.orm import Domain, Cdn, DuplicateException, update_assets_associate_cdn
 12 | from config import logger
 13 | from recon.passive.cdn_check import CdnCheck
 14 | 
 15 | 
 16 | class AlienVaultSearchToolSchema(BaseModel):
 17 |     """SecurityTrailsSearchTool 的查询参数"""
 18 |     domain: str = Field(
 19 |         None,
 20 |         description="域名。例如：`example.com`")
 21 |     ip: str = Field(None, description="IP地址。例如：`1.1.1.1`。")
 22 | 
 23 | 
 24 | class AlienVaultSearchTool(BaseTool):
 25 |     name: str = "AlienVault"
 26 |     description: str = "网络资产搜索引擎，不直接接触目标资产，对目标无副作用。支持搜索IP地址、域名的解析记录，不适用于内网ip。同一个目标在短时间内也不应当重复查询。"
 27 |     args_schema: Type[BaseModel] = AlienVaultSearchToolSchema
 28 |     db: DB | None = None
 29 |     task_id: int | None = None
 30 |     llm: Any = None
 31 |     verbose: bool = False
 32 |     cdn_autonomous_judgment: bool = False
 33 |     cdn_apexdomain_threshold: int = 1
 34 |     cdn_subdomain_threshold: int = 1
 35 | 
 36 |     class Config:
 37 |         arbitrary_types_allowed = True
 38 | 
 39 |     def __init__(self, db: DB, task_id: int, llm=None, verbose=False, cdn_autonomous_judgment=False,
 40 |                  cdn_apexdomain_threshold=50,
 41 |                  cdn_subdomain_threshold=3):
 42 |         super().__init__()
 43 |         self.db = db
 44 |         self.task_id = task_id
 45 |         self.llm = llm
 46 |         self.verbose = verbose
 47 |         self.cdn_autonomous_judgment = cdn_autonomous_judgment
 48 |         self.cdn_apexdomain_threshold = cdn_apexdomain_threshold
 49 |         self.cdn_subdomain_threshold = cdn_subdomain_threshold
 50 | 
 51 |         logger.info("初始化工具 AlienVault")
 52 | 
 53 |     def _run(
 54 |             self,
 55 |             **kwargs: Any,
 56 |     ) -> Any:
 57 |         domain = kwargs.pop('domain', "")
 58 |         ip = kwargs.pop('ip', "")
 59 |         results = []
 60 | 
 61 |         avapi = AlienVaultApi()
 62 |         if domain == "" and ip == "":
 63 |             return "domain和ip不能同时为空"
 64 |         target = domain
 65 |         try:
 66 |             if domain != "":
 67 |                 logger.info("AlienVault查询: {}", domain)
 68 |                 results = avapi.search_domain(domain)
 69 |             else:
 70 |                 if valid_ip_address(ip) is False:
 71 |                     return "IP地址格式错误"
 72 |                 target = ip
 73 |                 logger.info("AlienVault查询: {}", ip)
 74 |                 results = avapi.search_ipv4(ip)
 75 |         except HTTPError as e:
 76 |             logger.error("AlienVault查询失败: {}", e)
 77 |             return f"查询失败: {e}"
 78 | 
 79 |         if len(results) == 0:
 80 |             return "未发现资产"
 81 | 
 82 |         if valid_ip_address(target):
 83 |             cdn_check = CdnCheck(self.db, target, llm=self.llm, verbose=self.verbose,
 84 |                                  autonomous_judgment=self.cdn_autonomous_judgment,
 85 |                                  apexdomain_threshold=self.cdn_apexdomain_threshold,
 86 |                                  subdomain_threshold=self.cdn_subdomain_threshold)
 87 |             for result in results:
 88 |                 for vdomain in result.sub_domains.values():
 89 |                     cdn_check.add(result.apex_domain, vdomain.sub_domain)
 90 | 
 91 |             if cdn_check.check():
 92 |                 with self.db.DBSession() as session:
 93 |                     update_assets_associate_cdn(session, ip, cdn_check.get_name())
 94 |                 return "CDN服务器"
 95 | 
 96 |         try:
 97 |             cdns = {}
 98 |             with self.db.DBSession() as session:
 99 |                 for result in results:
100 |                     for vdomain in result.sub_domains.values():
101 |                         domaindb = Domain()
102 |                         domaindb.target = target
103 |                         domaindb.task_id = self.task_id
104 |                         domaindb.apex_domain = result.apex_domain
105 |                         domaindb.host = vdomain.hostname
106 |                         domaindb.subdomain = vdomain.sub_domain
107 |                         domaindb.source = self.name
108 |                         domaindb.cname = []
109 |                         domaindb.cname_cdn = []
110 |                         hostcdn = session.query(Cdn).filter(
111 |                             and_(
112 |                                 Cdn.cname != None,
113 |                                 or_(
114 |                                     func.lower(vdomain.hostname).ilike(func.concat('%', Cdn.cname)),
115 |                                     func.lower(result.apex_domain).ilike(func.concat('%', Cdn.cname))
116 |                                 )
117 |                             )
118 |                         ).first()
119 |                         if hostcdn is not None:
120 |                             domaindb.host_cdn = hostcdn.organization
121 | 
122 |                         domaindb.a = []
123 |                         domaindb.a_cdn = []
124 |                         domaindb.aaaa = []
125 |                         domaindb.aaaa_cdn = []
126 |                         domaindb.mx = []
127 |                         domaindb.ns = []
128 |                         domaindb.soa = []
129 |                         domaindb.txt = []
130 |                         for record in vdomain.a:
131 |                             ipobj = ip_address(record)
132 |                             ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first()
133 |                             ip_type = get_ip_type(ip)
134 |                             domaindb.a.append(ipobj)
135 |                             if ipcdn is not None:
136 |                                 domaindb.a_cdn.append(ipcdn.organization)
137 |                             elif domaindb.host_cdn is not None:
138 |                                 domaindb.a_cdn.append(domaindb.host_cdn)
139 |                             else:
140 |                                 domaindb.a_cdn.append(None)
141 | 
142 |                         for record in vdomain.aaaa:
143 |                             ipobj = ip_address(record)
144 |                             ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first()
145 |                             domaindb.aaaa.append(ipobj)
146 |                             if ipcdn is not None:
147 |                                 domaindb.aaaa_cdn.append(ipcdn.organization)
148 |                             elif domaindb.host_cdn is not None:
149 |                                 domaindb.aaaa_cdn.append(domaindb.host_cdn)
150 |                             else:
151 |                                 domaindb.aaaa_cdn.append(None)
152 | 
153 |                         for record in vdomain.cname:
154 |                             domaindb.cname = record.split(',')
155 |                             for cn in domaindb.cname:
156 |                                 cnamecdn = session.query(Cdn).filter(
157 |                                     and_(
158 |                                         Cdn.cname != None,
159 |                                         func.lower(cn).ilike(func.concat('%', Cdn.cname))
160 |                                     )
161 |                                 ).first()
162 |                                 if cnamecdn is not None:
163 |                                     domaindb.cname_cdn.append(cnamecdn.organization)
164 |                                 else:
165 |                                     domaindb.cname_cdn.append(None)
166 | 
167 |                         for record in vdomain.mx:
168 |                             domaindb.mx.append(record)
169 |                         for record in vdomain.ns:
170 |                             domaindb.ns.append(record)
171 |                         for record in vdomain.soa:
172 |                             domaindb.soa.append(record)
173 |                         for record in vdomain.txt:
174 |                             domaindb.txt.append(record)
175 | 
176 |                         try:
177 |                             session.add(domaindb)
178 |                             session.commit()
179 | 
180 |                             acdns = domaindb.associate_cdn()
181 |                             cdns.update(acdns)
182 | 
183 |                         except DuplicateException:
184 |                             session.rollback()
185 |                         except Exception:
186 |                             raise
187 |             if len(cdns) > 0:
188 |                 with self.db.DBSession() as session:
189 |                     for ip, cdn in cdns.items():
190 |                         update_assets_associate_cdn(session, ip, cdn)
191 | 
192 |         except exc.SQLAlchemyError as e:
193 |             logger.error("数据库错误: {}", e)
194 |             return "数据库错误"
195 |         except Exception as e:
196 |             logger.error("其他错误: {}", e)
197 |             return f"其他错误: {e}"
198 |         return f"共发现{len(results)}个资产"
199 | 


--------------------------------------------------------------------------------
/recon/passive/cdn_check.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | 
  3 | from crewai import Agent, Task, Crew
  4 | 
  5 | 
  6 | class CdnCheck:
  7 | 
  8 |     def __init__(self, db, ip, llm=None, verbose=False, autonomous_judgment=True, apexdomain_threshold=50,
  9 |                  subdomain_threshold=3):
 10 |         """
 11 |         :param db:
 12 |         :param ip:
 13 |         :param llm:
 14 |         :param verbose:
 15 |         :param autonomous_judgment: 是否自主判断，开启时，不考虑数量阈值
 16 |         :param apexdomain_threshold: 主域名数量阈值
 17 |         :param subdomain_threshold: 子域名数量阈值
 18 |         """
 19 |         self.llm = llm
 20 |         self.db = db
 21 |         self.verbose = verbose
 22 |         self.ip = ip
 23 |         self.apexdomain_threshold = apexdomain_threshold
 24 |         self.subdomain_threshold = subdomain_threshold
 25 |         self.domains = {}
 26 |         self.autonomous_judgment = autonomous_judgment
 27 | 
 28 |     def get_name(self):
 29 |         return f"CdnCheck: 自主判断: {self.autonomous_judgment}, 主域名数量阈值: {self.apexdomain_threshold}, 子域名数量阈值: {self.subdomain_threshold}"
 30 | 
 31 |     def add(self, apex_domain: str, sub_domain: str):
 32 |         """
 33 |         添加域名信息
 34 |         """
 35 |         if apex_domain not in self.domains:
 36 |             self.domains[apex_domain] = set()
 37 |         self.domains[apex_domain].add(sub_domain)
 38 | 
 39 |     def get_statistics(self) -> str:
 40 |         apex_domains = len(self.domains)
 41 |         st = f"域名总数: {apex_domains}\n域名列表:\n"
 42 |         for apex_domain, sub_domains in self.domains.items():
 43 |             st += f" {apex_domain}: {len(sub_domains)}\n"
 44 |         return st
 45 | 
 46 |     def check(self) -> bool:
 47 | 
 48 |         agent = Agent(
 49 |             role='CDN服务器甄别人员',
 50 |             goal='通过ip反查出的域名，判断ip是否为CDN服务器',
 51 |             backstory=dedent(
 52 |                 """
 53 |                 你是一名经验丰富的网络安全专家，专门从事CDN服务提供商的研究和分析。
 54 |                 你的任务是通过分析关联域名的信息，判断给定的IP地址是否为CDN前置。
 55 |                 你将利用你丰富的知识和经验，综合关联域名的数量、多样性、结构和模式等因素，做出准确的判断。
 56 |                 """
 57 |             ),
 58 |             verbose=self.verbose,
 59 |             allow_delegation=False,
 60 |             max_rpm=300,
 61 |             # max_iter=1,
 62 |             llm=self.llm,
 63 |             cache=False,
 64 |         )
 65 | 
 66 |         threshold = ''
 67 |         if self.autonomous_judgment is False:
 68 |             threshold = f"\n我们可以简单的认为，当主域名数量超过{self.apexdomain_threshold}个，或者存在多个拥有超过{self.subdomain_threshold}个子域名的主域名时，此IP为CDN前置。"
 69 | 
 70 |         task = Task(
 71 |             description=dedent(
 72 |                 f"""
 73 |                        分析给定的IP地址及其关联域名的统计，判断该IP地址是否为CDN前置。具体分析包括以下几个方面：
 74 |                        - 关联域名的数量和多样性。
 75 |                        - 域名的结构和模式。
 76 |                        - 主域名的特点。
 77 |                        {threshold}
 78 | 
 79 |                        以下数据为 ip `{self.ip}` 反查出的主域名总数，主域名列表，以及每个域名对应的子域名数量：
 80 |                        {self.get_statistics()}
 81 |                        """
 82 |             ),
 83 |             expected_output=dedent(
 84 |                 """
 85 |                 最终答案是一个明确的判断，该IP地址是否为CDN前置，输出为描述是与否的特定字符`Y`或`N`。
 86 |                 不要编造其他额外内容。
 87 |                 """),
 88 |             agent=agent,
 89 |         )
 90 | 
 91 |         crew = Crew(
 92 |             agents=[agent],
 93 |             tasks=[task],
 94 |             verbose=self.verbose,
 95 |             share_crew=False
 96 |         )
 97 | 
 98 |         out = crew.kickoff()
 99 |         return "Y" in out
100 | 


--------------------------------------------------------------------------------
/recon/passive/fofa_search_tool.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import datetime
  3 | from ipaddress import ip_address
  4 | from typing import Type, Any
  5 | from pydantic.v1 import BaseModel, Field
  6 | from crewai_tools.tools.base_tool import BaseTool
  7 | from sqlalchemy import exc, and_, func, or_
  8 | 
  9 | from helpers.fofa_api import FofaApi
 10 | from helpers.utils import get_ip_type, valid_ip_address
 11 | from persistence.database import DB
 12 | from persistence.orm import Port, Domain, Cdn, DuplicateException, update_assets_associate_cdn
 13 | from tld import get_tld
 14 | from config import logger
 15 | from recon.passive.cdn_check import CdnCheck
 16 | 
 17 | 
 18 | class FofaSearchToolSchema(BaseModel):
 19 |     """FofaSearchToolTool 的查询参数"""
 20 | 
 21 |     # 基础类
 22 |     domain: str = Field(
 23 |         None,
 24 |         description="域名，用于搜索包含此关键字的域名资产，支持精确和模糊搜索。例如：`example.com`")
 25 |     ip: str = Field(None, description="IP地址，支持单一IPv4地址、IPv4 C段和单一IPv6地址。例如：`1.1.1.1` 或 `1.1.1.1/24`")
 26 |     org: str = Field(None, description="所属组织，用于搜索包含此组织的资产。例如：`Google`")
 27 | 
 28 |     # 标记类
 29 |     app: str = Field(
 30 |         None,
 31 |         description="应用名称，用于搜索包含此应用的资产。小众或自研软件结果精确，通用软件如`Apache` `nginx`结果可能不精确。例如：`Apache`")
 32 | 
 33 |     # 网站类
 34 |     title: str = Field(None, description="网页标题，用于搜索包含此标题的资产。例如：`Google`")
 35 |     header: str = Field(
 36 |         None,
 37 |         description="响应头，用于搜索响应头包含此关键字的资产。小众或自研软件结果精确。例如：`X-Elastic-Product`")
 38 |     body: str = Field(None, description="HTML正文，用于搜索包含此关键字的资产。例如：`百度一下`")
 39 |     js_name: str = Field(None, description="HTML正文包含的JS，用于搜索包含此JS引用关键字的资产。例如：`js/jquery.js`")
 40 |     icon_hash: str = Field(None, description="网站图标的hash值，用于搜索包含此图标hash值的资产。例如：`-247388890`")
 41 |     icp: str = Field(
 42 |         None,
 43 |         description="ICP备案号，用于搜索包含此备案号的资产。中国大陆网站需ICP备案。例如：`京ICP证030173号`")
 44 | 
 45 |     # 证书类
 46 |     cert: str = Field(None, description="证书信息，用于搜索证书中包含此关键字的资产。例如：`Let's Encrypt`")
 47 |     fuzzy: bool = Field(
 48 |         default=False,
 49 |         description="是否模糊搜索，用于拓展资产，但会降低准确性，默认为False。只能与domain参数单独使用。")
 50 | 
 51 | 
 52 | class FofaSearchTool(BaseTool):
 53 |     name: str = "FOFA"
 54 |     description: str = "网络资产搜索引擎，不直接接触目标资产，对目标无副作用。支持搜索IP地址、域名、证书等信息，不适用于内网ip。短时间内大量查询可能会被限制。同一个目标在短时间内也不应当重复查询。"
 55 |     args_schema: Type[BaseModel] = FofaSearchToolSchema
 56 |     db: DB | None = None
 57 |     task_id: int | None = None
 58 |     llm: Any = None
 59 |     verbose: bool = False
 60 |     cdn_autonomous_judgment: bool = False
 61 |     cdn_apexdomain_threshold: int = 1
 62 |     cdn_subdomain_threshold: int = 1
 63 | 
 64 |     class Config:
 65 |         arbitrary_types_allowed = True
 66 | 
 67 |     def __init__(self, db: DB, task_id: int, llm=None, verbose=False, cdn_autonomous_judgment=False,
 68 |                  cdn_apexdomain_threshold=50,
 69 |                  cdn_subdomain_threshold=3):
 70 |         super().__init__()
 71 |         self.db = db
 72 |         self.task_id = task_id
 73 |         self.llm = llm
 74 |         self.verbose = verbose
 75 |         self.cdn_autonomous_judgment = cdn_autonomous_judgment
 76 |         self.cdn_apexdomain_threshold = cdn_apexdomain_threshold
 77 |         self.cdn_subdomain_threshold = cdn_subdomain_threshold
 78 |         logger.info("初始化工具 FOFA")
 79 | 
 80 |     def _run(
 81 |             self,
 82 |             **kwargs: Any,
 83 |     ) -> Any:
 84 |         fofaapi = FofaApi(os.environ.get('FOFA_EMAIL'), os.environ.get('FOFA_API_KEY'),
 85 |                           os.environ.get('FOFA_VERSION', 'base'))
 86 |         fuzzy = kwargs.pop('fuzzy', False)
 87 |         target = ""
 88 |         if kwargs.get('domain') is not None:
 89 |             target = kwargs.get('domain')
 90 |         elif kwargs.get('ip') is not None:
 91 |             target = kwargs.get('ip')
 92 |             if valid_ip_address(target) is False:
 93 |                 return "IP地址格式错误"
 94 |         results = []
 95 |         try:
 96 |             logger.info("FOFA查询: {}", kwargs)
 97 |             results = fofaapi.search(fuzzy=fuzzy, **kwargs)
 98 |         except Exception as e:
 99 |             logger.error("fofa查询失败: {}", e)
100 |             return f"查询失败: {e}"
101 |         if len(results) == 0:
102 |             return "未找到任何资产"
103 | 
104 |         if valid_ip_address(target):
105 |             cdn_check = CdnCheck(self.db, target, llm=self.llm, verbose=self.verbose,
106 |                                  autonomous_judgment=self.cdn_autonomous_judgment,
107 |                                  apexdomain_threshold=self.cdn_apexdomain_threshold,
108 |                                  subdomain_threshold=self.cdn_subdomain_threshold)
109 |             for result in results:
110 |                 if result.host is not None and result.host != "":
111 |                     hostobj = get_tld(result.host, fail_silently=True, as_object=True, fix_protocol=True)
112 |                     if hostobj is not None:
113 |                         cdn_check.add(hostobj.fld, hostobj.subdomain)
114 | 
115 |             if cdn_check.check():
116 |                 with self.db.DBSession() as session:
117 |                     update_assets_associate_cdn(session, target, cdn_check.get_name())
118 |                 return "CDN服务器"
119 |         try:
120 |             cdns = {}
121 |             with self.db.DBSession() as session:
122 |                 for data in results:
123 |                     pdb = Port()
124 |                     pdb.target = target
125 |                     pdb.task_id = self.task_id
126 |                     pdb.ip = ip_address(data.ip)
127 | 
128 |                     ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(pdb.ip.exploded)).first()
129 |                     if ipcdn is not None:
130 |                         pdb.ip_cdn = ipcdn.organization
131 | 
132 |                     pdb.protocol = data.base_protocol
133 |                     pdb.port = data.port
134 |                     pdb.service = data.protocol
135 |                     pdb.product = data.product
136 |                     pdb.version = data.version.rstrip("/")
137 |                     if data.lastupdatetime is not None and data.lastupdatetime != "":
138 |                         pdb.checked_time = datetime.strptime(data.lastupdatetime, "%Y-%m-%d %H:%M:%S")
139 |                     pdb.is_passive = True
140 |                     extra_info = {}
141 |                     domaindb = None
142 |                     if data.host is not None and data.host != "":
143 |                         extra_info["host"] = data.host
144 |                         hostobj = get_tld(data.host, fail_silently=True, as_object=True, fix_protocol=True)
145 |                         if hostobj is not None:
146 |                             domaindb = Domain()
147 |                             domaindb.target = target
148 |                             domaindb.task_id = self.task_id
149 |                             if hostobj.subdomain == "":
150 |                                 domaindb.host = hostobj.fld
151 |                             else:
152 |                                 domaindb.host = hostobj.subdomain + "." + hostobj.fld
153 |                             domaindb.apex_domain = hostobj.fld
154 |                             domaindb.subdomain = hostobj.subdomain
155 | 
156 |                             hostcdn = session.query(Cdn).filter(
157 |                                 and_(
158 |                                     Cdn.cname != None,
159 |                                     or_(
160 |                                         func.lower(domaindb.host).ilike(func.concat('%', Cdn.cname)),
161 |                                         func.lower(domaindb.apex_domain).ilike(func.concat('%', Cdn.cname))
162 |                                     )
163 |                                 )
164 |                             ).first()
165 |                             if hostcdn is not None:
166 |                                 domaindb.host_cdn = hostcdn.organization
167 | 
168 |                             if pdb.checked_time is not None:
169 |                                 domaindb.checked_time = pdb.checked_time
170 |                             domaindb.source = self.name
171 |                             domaindb.cname = []
172 |                             domaindb.cname_cdn = []
173 |                             domaindb.a = []
174 |                             domaindb.a_cdn = []
175 |                             domaindb.aaaa = []
176 |                             domaindb.aaaa_cdn = []
177 |                             domaindb.mx = []
178 |                             domaindb.ns = []
179 |                             domaindb.soa = []
180 |                             domaindb.txt = []
181 |                             if get_ip_type(data.ip) == "ipv4":
182 |                                 ipobj = ip_address(data.ip)
183 |                                 domaindb.a.append(ipobj)
184 |                                 ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first()
185 |                                 if ipcdn is not None:
186 |                                     domaindb.a_cdn.append(ipcdn.organization)
187 |                                 elif domaindb.host_cdn is not None:
188 |                                     domaindb.a_cdn.append(domaindb.host_cdn)
189 |                                 else:
190 |                                     domaindb.a_cdn.append(None)
191 |                             else:
192 |                                 ipobj = ip_address(data.ip)
193 |                                 domaindb.aaaa.append(ipobj)
194 |                                 ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first()
195 |                                 if ipcdn is not None:
196 |                                     domaindb.aaaa_cdn.append(ipcdn.organization)
197 |                                 elif domaindb.host_cdn is not None:
198 |                                     domaindb.aaaa_cdn.append(domaindb.host_cdn)
199 |                                 else:
200 |                                     domaindb.aaaa_cdn.append(None)
201 | 
202 |                     if data.as_organization is not None and data.as_organization != "":
203 |                         extra_info["as_organization"] = data.as_organization
204 |                     if data.cname is not None and data.cname != "":
205 |                         cnametld = get_tld(data.cname, fail_silently=True, as_object=True, fix_protocol=True)
206 |                         extra_info["cname"] = cnametld.parsed_url.hostname
207 |                         if domaindb is not None:
208 |                             domaindb.cname = cnametld.parsed_url.hostname.split(',')
209 |                             for cn in domaindb.cname:
210 |                                 cnamecdn = session.query(Cdn).filter(
211 |                                     and_(
212 |                                         Cdn.cname != None,
213 |                                         func.lower(cn).ilike(func.concat('%', Cdn.cname))
214 |                                     )
215 |                                 ).first()
216 |                                 if cnamecdn is not None:
217 |                                     domaindb.cname_cdn.append(cnamecdn.organization)
218 |                                 else:
219 |                                     domaindb.cname_cdn.append(None)
220 | 
221 |                     if data.domain is not None and data.domain != "":
222 |                         extra_info["domain"] = data.domain
223 |                     if data.server is not None and data.server != "":
224 |                         extra_info["server"] = data.server
225 |                     if data.os is not None and data.os != "":
226 |                         extra_info["os"] = data.os
227 |                     if data.icp is not None and data.icp != "":
228 |                         extra_info["icp"] = data.icp
229 |                     if data.title is not None and data.title != "":
230 |                         extra_info["title"] = data.title
231 |                     if data.cert is not None and data.cert != "":
232 |                         extra_info["cert"] = {
233 |                             "data": data.cert,
234 |                             "issuer_org": data.certs_issuer_org,
235 |                             "issuer_cn": data.certs_issuer_cn,
236 |                             "subject_org": data.certs_subject_org,
237 |                             "subject_cn": data.certs_subject_cn,
238 |                         }
239 |                     pdb.extra_info = extra_info
240 |                     pdb.source = self.name
241 |                     try:
242 |                         session.add(pdb)
243 |                         session.commit()
244 | 
245 |                         acdns = pdb.associate_cdn()
246 |                         cdns.update(acdns)
247 |                     except DuplicateException as e:
248 |                         session.rollback()
249 |                     except Exception as e:
250 |                         raise
251 | 
252 |                     if domaindb is not None:
253 |                         try:
254 |                             session.add(domaindb)
255 |                             session.commit()
256 | 
257 |                             acdns = domaindb.associate_cdn()
258 |                             cdns.update(acdns)
259 | 
260 |                         except DuplicateException as e:
261 |                             session.rollback()
262 |                         except Exception as e:
263 |                             raise
264 |             if len(cdns) > 0:
265 |                 with self.db.DBSession() as session:
266 |                     for ip, cdn in cdns.items():
267 |                         update_assets_associate_cdn(session, ip, cdn)
268 | 
269 |         except exc.SQLAlchemyError as e:
270 |             logger.error("数据库错误: {}", e)
271 |             return "数据库错误"
272 |         except Exception as e:
273 |             logger.error("其他错误: {}", e)
274 |             return f"其他错误: {e}"
275 | 
276 |         return f"共发现{len(results)}个资产"
277 | 


--------------------------------------------------------------------------------
/recon/passive/security_trails_search_tool.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import datetime
  3 | from ipaddress import ip_address
  4 | from typing import Type, Any
  5 | from pydantic.v1 import BaseModel, Field
  6 | from crewai_tools.tools.base_tool import BaseTool
  7 | from requests import HTTPError
  8 | from sqlalchemy import exc, and_, or_, func
  9 | 
 10 | from helpers.security_trails_api import SecurityTrailsApi
 11 | from helpers.utils import get_ip_type, valid_ip_address
 12 | from persistence.database import DB
 13 | from persistence.orm import Domain, Cdn, DuplicateException, update_assets_associate_cdn
 14 | from tld import get_tld
 15 | from config import logger
 16 | from recon.passive.cdn_check import CdnCheck
 17 | 
 18 | 
 19 | class SecurityTrailsSearchToolSchema(BaseModel):
 20 |     """SecurityTrailsSearchTool 的查询参数"""
 21 |     domain: str = Field(
 22 |         None,
 23 |         description="域名，用于搜索包含此关键字的域名资产，支持精确和模糊搜索。例如：`example.com`")
 24 |     ip: str = Field(None, description="IP地址，支持单一IPv4地址。例如：`1.1.1.1`。使用此参数时，不能携带其他参数。")
 25 |     history: bool = Field(
 26 |         default=False,
 27 |         description="是否查询域名解析历史，仅对domain有效，默认为False。只能与domain参数单独使用。")
 28 |     fuzzy: bool = Field(
 29 |         default=False,
 30 |         description="是否模糊搜索，用于拓展资产，但会降低准确性，默认为False。只能与domain参数单独使用。")
 31 | 
 32 | 
 33 | class SecurityTrailsSearchTool(BaseTool):
 34 |     name: str = "SecurityTrails"
 35 |     description: str = "网络资产搜索引擎，不直接接触目标资产，对目标无副作用。支持搜索IP地址、域名、子域名以及域名的解析记录，不适用于内网ip。短时间内大量查询可能会被限制。同一个目标在短时间内也不应当重复查询。"
 36 |     args_schema: Type[BaseModel] = SecurityTrailsSearchToolSchema
 37 |     db: DB | None = None
 38 |     task_id: int | None = None
 39 |     llm: Any = None
 40 |     verbose: bool = False
 41 |     cdn_autonomous_judgment: bool = False
 42 |     cdn_apexdomain_threshold: int = 1
 43 |     cdn_subdomain_threshold: int = 1
 44 | 
 45 |     class Config:
 46 |         arbitrary_types_allowed = True
 47 | 
 48 |     def __init__(self, db: DB, task_id: int, llm=None, verbose=False, cdn_autonomous_judgment=False,
 49 |                  cdn_apexdomain_threshold=50,
 50 |                  cdn_subdomain_threshold=3):
 51 |         super().__init__()
 52 |         self.db = db
 53 |         self.task_id = task_id
 54 |         self.llm = llm
 55 |         self.verbose = verbose
 56 |         self.cdn_autonomous_judgment = cdn_autonomous_judgment
 57 |         self.cdn_apexdomain_threshold = cdn_apexdomain_threshold
 58 |         self.cdn_subdomain_threshold = cdn_subdomain_threshold
 59 |         logger.info("初始化工具 SecurityTrails")
 60 | 
 61 |     def _run(
 62 |             self,
 63 |             **kwargs: Any,
 64 |     ) -> Any:
 65 |         stapi = SecurityTrailsApi(os.environ.get('SECURITYTRAILS_API_KEY'))
 66 |         fuzzy = kwargs.pop('fuzzy', False)
 67 |         history = kwargs.pop('history', False)
 68 |         domain = kwargs.pop('domain', "")
 69 |         ip = kwargs.pop('ip', "")
 70 |         results = []
 71 | 
 72 |         if history:
 73 |             if domain == "":
 74 |                 return "domain为空。history参数仅对domain有效"
 75 |             try:
 76 |                 logger.info("SecurityTrails查询历史解析: {}", domain)
 77 |                 results = stapi.get_history(domain)
 78 |             except HTTPError as e:
 79 |                 logger.error("SecurityTrails查询失败: {}", e)
 80 |                 return f"查询失败: {e}"
 81 |             try:
 82 |                 cdns = {}
 83 |                 with self.db.DBSession() as session:
 84 |                     for result in results:
 85 |                         hostobj = get_tld(result.hostname, fail_silently=True, as_object=True, fix_protocol=True)
 86 |                         domaindb = Domain()
 87 |                         domaindb.target = domain
 88 |                         domaindb.task_id = self.task_id
 89 |                         domaindb.apex_domain = hostobj.fld
 90 |                         domaindb.host = result.hostname
 91 |                         domaindb.subdomain = hostobj.subdomain
 92 | 
 93 |                         hostcdn = session.query(Cdn).filter(
 94 |                             and_(
 95 |                                 Cdn.cname != None,
 96 |                                 or_(
 97 |                                     func.lower(domaindb.host).ilike(func.concat('%', Cdn.cname)),
 98 |                                     func.lower(domaindb.apex_domain).ilike(func.concat('%', Cdn.cname))
 99 |                                 )
100 |                             )
101 |                         ).first()
102 |                         if hostcdn is not None:
103 |                             domaindb.host_cdn = hostcdn.organization
104 | 
105 |                         domaindb.source = self.name
106 |                         domaindb.cname = []
107 |                         domaindb.cname_cdn = []
108 |                         domaindb.a = []
109 |                         domaindb.a_cdn = []
110 |                         domaindb.aaaa = []
111 |                         domaindb.aaaa_cdn = []
112 |                         domaindb.mx = []
113 |                         domaindb.ns = []
114 |                         domaindb.soa = []
115 |                         domaindb.txt = []
116 | 
117 |                         if result.first_seen != "":
118 |                             domaindb.first_seen = datetime.strptime(result.first_seen, "%Y-%m-%d")
119 |                         if result.last_seen != "":
120 |                             domaindb.last_seen = datetime.strptime(result.last_seen, "%Y-%m-%d")
121 | 
122 |                         if result.ip != "":
123 |                             ipobj = ip_address(result.ip)
124 |                             ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first()
125 | 
126 |                             ip_type = get_ip_type(result.ip)
127 |                             if ip_type == "ipv4":
128 |                                 domaindb.a.append(ipobj)
129 |                                 if ipcdn is not None:
130 |                                     domaindb.a_cdn.append(ipcdn.organization)
131 |                                 elif domaindb.host_cdn is not None:
132 |                                     domaindb.a_cdn.append(domaindb.host_cdn)
133 |                                 else:
134 |                                     domaindb.a_cdn.append(None)
135 |                             else:
136 |                                 domaindb.aaaa.append(ipobj)
137 |                                 if ipcdn is not None:
138 |                                     domaindb.aaaa_cdn.append(ipcdn.organization)
139 |                                 elif domaindb.host_cdn is not None:
140 |                                     domaindb.aaaa_cdn.append(domaindb.host_cdn)
141 |                                 else:
142 |                                     domaindb.aaaa_cdn.append(None)
143 |                         try:
144 |                             session.add(domaindb)
145 |                             session.commit()
146 | 
147 |                             acdns = domaindb.associate_cdn()
148 |                             cdns.update(acdns)
149 | 
150 |                         except DuplicateException:
151 |                             session.rollback()
152 |                         except Exception:
153 |                             raise
154 | 
155 |                 if len(cdns) > 0:
156 |                     with self.db.DBSession() as session:
157 |                         for ip, cdn in cdns.items():
158 |                             update_assets_associate_cdn(session, ip, cdn)
159 | 
160 |             except exc.SQLAlchemyError as e:
161 |                 logger.error("数据库错误: {}", e)
162 |                 return "数据库错误"
163 |             except Exception as e:
164 |                 logger.error("其他错误: {}", e)
165 |                 return f"其他错误: {e}"
166 |             return f"共发现{len(results)}个资产"
167 |         try:
168 |             target = domain
169 |             if fuzzy:
170 |                 if domain == "":
171 |                     return "domain为空。fuzzy参数仅对domain有效"
172 |                 logger.info("SecurityTrails模糊查询: {}", domain)
173 |                 results = stapi.search_domain_fuzzy(domain)
174 |             elif domain != "":
175 |                 logger.info("SecurityTrails查询: {}", domain)
176 |                 results = stapi.search_domain(domain)
177 |             elif ip != "":
178 |                 if valid_ip_address(ip) is False:
179 |                     return "IP地址格式错误"
180 |                 target = ip
181 |                 logger.info("SecurityTrails查询: {}", ip)
182 |                 results = stapi.search_ip(ip)
183 |             else:
184 |                 return "domain和ip不能同时为空"
185 |         except HTTPError as e:
186 |             logger.error("SecurityTrails查询失败: {}", e)
187 |             return f"查询失败: {e}"
188 | 
189 |         if len(results) == 0:
190 |             return "未发现资产"
191 | 
192 |         if valid_ip_address(target):
193 |             cdn_check = CdnCheck(self.db, target, llm=self.llm, verbose=self.verbose,
194 |                                  autonomous_judgment=self.cdn_autonomous_judgment,
195 |                                  apexdomain_threshold=self.cdn_apexdomain_threshold,
196 |                                  subdomain_threshold=self.cdn_subdomain_threshold)
197 |             for result in results:
198 |                 cdn_check.add(result.apex_domain, result.subdomain)
199 | 
200 |             if cdn_check.check():
201 |                 with self.db.DBSession() as session:
202 |                     update_assets_associate_cdn(session, target, cdn_check.get_name())
203 |                 return "CDN服务器"
204 | 
205 |         try:
206 |             cdns = {}
207 |             with self.db.DBSession() as session:
208 |                 for result in results:
209 |                     domaindb = Domain()
210 |                     domaindb.target = target
211 |                     domaindb.task_id = self.task_id
212 |                     domaindb.apex_domain = result.apex_domain
213 |                     domaindb.host = result.hostname
214 |                     domaindb.subdomain = result.subdomain
215 | 
216 |                     hostcdn = session.query(Cdn).filter(
217 |                         and_(
218 |                             Cdn.cname != None,
219 |                             or_(
220 |                                 func.lower(domaindb.host).ilike(func.concat('%', Cdn.cname)),
221 |                                 func.lower(domaindb.apex_domain).ilike(func.concat('%', Cdn.cname))
222 |                             )
223 |                         )
224 |                     ).first()
225 |                     if hostcdn is not None:
226 |                         domaindb.host_cdn = hostcdn.organization
227 | 
228 |                     domaindb.source = self.name
229 |                     domaindb.cname = []
230 |                     domaindb.cname_cdn = []
231 |                     domaindb.a = []
232 |                     domaindb.a_cdn = []
233 |                     domaindb.aaaa = []
234 |                     domaindb.aaaa_cdn = []
235 |                     domaindb.mx = []
236 |                     domaindb.ns = []
237 |                     domaindb.soa = []
238 |                     domaindb.txt = []
239 |                     for ip in result.ips:
240 |                         ipobj = ip_address(ip)
241 |                         ipcdn = session.query(Cdn).filter(Cdn.cidr.op('>>')(ipobj.exploded)).first()
242 |                         ip_type = get_ip_type(ip)
243 |                         if ip_type == "ipv4":
244 |                             domaindb.a.append(ipobj)
245 |                             if ipcdn is not None:
246 |                                 domaindb.a_cdn.append(ipcdn.organization)
247 |                             elif domaindb.host_cdn is not None:
248 |                                 domaindb.a_cdn.append(domaindb.host_cdn)
249 |                             else:
250 |                                 domaindb.a_cdn.append(None)
251 |                         else:
252 |                             domaindb.aaaa.append(ipobj)
253 |                             if ipcdn is not None:
254 |                                 domaindb.aaaa_cdn.append(ipcdn.organization)
255 |                             elif domaindb.host_cdn is not None:
256 |                                 domaindb.aaaa_cdn.append(domaindb.host_cdn)
257 |                             else:
258 |                                 domaindb.aaaa_cdn.append(None)
259 |                     try:
260 |                         session.add(domaindb)
261 |                         session.commit()
262 | 
263 |                         acdns = domaindb.associate_cdn()
264 |                         cdns.update(acdns)
265 |                     except DuplicateException:
266 |                         session.rollback()
267 |                     except Exception:
268 |                         raise
269 |             if len(cdns) > 0:
270 |                 with self.db.DBSession() as session:
271 |                     for ip, cdn in cdns.items():
272 |                         update_assets_associate_cdn(session, ip, cdn)
273 | 
274 |         except exc.SQLAlchemyError as e:
275 |             logger.error("数据库错误: {}", e)
276 |             return "数据库错误"
277 |         except Exception as e:
278 |             logger.error("其他错误: {}", e)
279 |             return f"其他错误: {e}"
280 |         return f"共发现{len(results)}个资产"
281 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.12.3
 2 | crewai==0.36.0
 3 | crewai_tools==0.4.8
 4 | embedchain==0.1.116
 5 | fake_useragent==1.5.1
 6 | FOFA_py==2.0.3
 7 | httpx==0.27.0
 8 | langchain_openai==0.1.15
 9 | langgraph==0.1.7
10 | loguru==0.7.2
11 | mmh3==4.1.0
12 | psycopg2_binary==2.9.9
13 | pydantic==2.8.2
14 | python3_nmap==1.6.0
15 | PyYAML==6.0.1
16 | PyYAML==6.0.1
17 | Requests==2.32.3
18 | SQLAlchemy==2.0.30
19 | tiktoken==0.7.0
20 | tld==0.13
21 | validators==0.28.3
22 | 


--------------------------------------------------------------------------------
/team.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional
  2 | 
  3 | from exploits.attack_surface_research import AttackSurfaceResearch
  4 | from exploits.deploy_attack import DeployAttack
  5 | from exploits.vul_scan_expert import VulScanExpert
  6 | from persistence.database import DB
  7 | from rag.rag import RAG
  8 | from recon.cyber_assets_researcher import CyberAssetsResearchers
  9 | 
 10 | 
 11 | class Team:
 12 |     """团队"""
 13 | 
 14 |     cyberAssetsResearchers: CyberAssetsResearchers | None = None
 15 |     vulScanExpert: VulScanExpert | None = None
 16 |     attackSurfaceResearch: AttackSurfaceResearch | None = None
 17 |     deployAttack: DeployAttack | None = None
 18 | 
 19 |     def __init__(self,
 20 |                  db: DB,
 21 |                  rag: RAG,
 22 |                  llm,
 23 |                  debug: Optional[bool] = None,
 24 |                  masscan_path: Optional[str] = None,
 25 |                  nmap_path: Optional[str] = None,
 26 |                  gobuster_path: str = None,
 27 |                  gobuster_wordlist_path: str = None,
 28 |                  nuclei_path: Optional[str] = None,
 29 |                  nuclei_templates_path: Optional[str] = None):
 30 |         self.cyberAssetsResearchers = CyberAssetsResearchers(
 31 |             db=db,
 32 |             llm=llm,
 33 |             masscan_path=masscan_path,
 34 |             nmap_path=nmap_path,
 35 |             verbose=debug
 36 |         )
 37 | 
 38 |         self.vulScanExpert = VulScanExpert(
 39 |             db=db,
 40 |             llm=llm,
 41 |             gobuster_path=gobuster_path,
 42 |             gobuster_wordlist_path=gobuster_wordlist_path,
 43 |             nuclei_path=nuclei_path,
 44 |             nuclei_templates_path=nuclei_templates_path,
 45 |             verbose=debug
 46 |         )
 47 | 
 48 |         self.attackSurfaceResearch = AttackSurfaceResearch(
 49 |             db=db,
 50 |             rag=rag,
 51 |             llm=llm,
 52 |             verbose=debug
 53 |         )
 54 | 
 55 |         self.deployAttack = DeployAttack(
 56 |             db=db,
 57 |             rag=rag,
 58 |             llm=llm,
 59 |             verbose=debug
 60 |         )
 61 | 
 62 |     def get_recon_crew(self, task_id: int, target: str):
 63 |         """
 64 |         获取侦察队伍
 65 |         """
 66 |         return self.cyberAssetsResearchers.reconCrew(task_id, target)
 67 | 
 68 |     def get_mapping_crew(self, task_id: int, target: str):
 69 |         """
 70 |         获取测绘队伍
 71 |         """
 72 |         return self.vulScanExpert.fingerprintingCrew(task_id, target)
 73 | 
 74 |     def get_vulscan_crew(self, task_id: int, target: str):
 75 |         """
 76 |         获取漏扫队伍
 77 |         """
 78 |         return self.vulScanExpert.vulScanCrew(task_id, target)
 79 | 
 80 |     def get_intelligence_analysis_crew(self, target: str):
 81 |         """
 82 |         获取情报分析队伍
 83 |         """
 84 |         return self.attackSurfaceResearch.intelligenceAnalysisCrew(target)
 85 | 
 86 |     def get_establishing_foothold_research_crew(self, target: str, intelligence: str):
 87 |         """
 88 |         获取攻击面研究队伍
 89 |         """
 90 |         return self.attackSurfaceResearch.establishingFootholdResearchCrew(target, intelligence)
 91 | 
 92 |     def get_attack_plan_review_crew(self, assets: str, intelligence: str, plan: str, review: str | None = None):
 93 |         """
 94 |         获取攻击计划审核队伍
 95 |         """
 96 |         return self.attackSurfaceResearch.attackPlanReviewCrew(assets, intelligence, plan, review)
 97 | 
 98 |     def get_establishing_foothold_deploy_attack_crew(self, asset: str, plan: str):
 99 |         """
100 |         获取攻击队伍
101 |         """
102 |         return self.deployAttack.establishingFootholdAttackCrew(asset, plan)
103 | 


--------------------------------------------------------------------------------
/workflow_attack_plan.py:
--------------------------------------------------------------------------------
  1 | from enum import IntEnum
  2 | from langgraph.graph import StateGraph
  3 | from typing import TypedDict, Optional
  4 | from langgraph.graph.graph import CompiledGraph
  5 | from sqlalchemy import and_, func
  6 | 
  7 | from persistence.database import DB
  8 | from persistence.orm import Port, WebInfo, Vul, Workflow, UrlEnum
  9 | from team import Team
 10 | from config import logger
 11 | 
 12 | WORK = 'attack_plan'
 13 | 
 14 | 
 15 | class StatePlanReview(IntEnum):
 16 |     INIT = 0  # 初始化
 17 |     PREPARATION = 1  # 准备工作
 18 |     MAKE_PLAN = 2  # 制定计划
 19 |     REVIEW = 3  # 审核
 20 |     PASS = 4  # 通过
 21 |     FAIL = 5  # 否决
 22 |     REWORK = 6  # 重做
 23 | 
 24 | 
 25 | class Target:
 26 |     target: str
 27 |     asset: str
 28 |     plan: str | None
 29 |     review: str | None
 30 |     status: StatePlanReview
 31 | 
 32 |     def __init__(self, db: DB, task_id: int, target: str, asset: str, intelligence: str | None, plan: str | None,
 33 |                  review: str | None,
 34 |                  workflow_id=0, status=StatePlanReview.INIT):
 35 |         self.db = db
 36 |         self.task_id = task_id
 37 |         self.workflow_id = workflow_id
 38 |         self.target = target
 39 |         self.asset = asset
 40 |         self.intelligence = intelligence
 41 |         self.plan = plan
 42 |         self.review = review
 43 |         self.status = status
 44 | 
 45 |     def init(self):
 46 |         with self.db.DBSession() as session:
 47 |             if self.workflow_id == 0:
 48 |                 wf = Workflow()
 49 |                 wf.work = WORK
 50 |                 wf.task_id = self.task_id
 51 |                 wf.status = int(self.status)
 52 |                 wf.data = {'asset': self.asset, 'intelligence': self.intelligence, 'plan': self.plan,
 53 |                            'review': self.review, 'target': self.target}
 54 |                 session.add(wf)
 55 |                 session.flush()
 56 |                 session.commit()
 57 |                 self.workflow_id = wf.id
 58 | 
 59 |     def update_status(self):
 60 |         with self.db.DBSession() as session:
 61 |             wf = session.query(Workflow).filter(
 62 |                 and_(
 63 |                     Workflow.id == self.workflow_id
 64 |                 )
 65 |             ).first()
 66 |             wf.status = int(self.status)
 67 |             wf.data = {'asset': self.asset, 'intelligence': self.intelligence, 'plan': self.plan, 'review': self.review,
 68 |                        'target': self.target}
 69 |             session.commit()
 70 | 
 71 | 
 72 | class TaskStateAttackPlan(TypedDict):
 73 |     task_id: int
 74 |     targets: list[Target]
 75 | 
 76 | 
 77 | class TaskNodesAttackPlan:
 78 |     team: Team | None = None
 79 |     db: DB | None = None
 80 | 
 81 |     def __init__(self, db: DB, team: Team):
 82 |         self.db = db
 83 |         self.team = team
 84 | 
 85 |     def init_task(self, state: TaskStateAttackPlan):
 86 |         """
 87 |         初始化任务
 88 |         """
 89 |         if len(state['targets']) == 0:
 90 |             # 必须处理完所有遗留任务
 91 |             state['targets'] = self._assets_intelligence(state['task_id'])
 92 |             for target in state['targets']:
 93 |                 target.init()
 94 | 
 95 |         return state
 96 | 
 97 |     def preparation(self, state: TaskStateAttackPlan):
 98 |         """
 99 |         准备工作
100 |         """
101 |         for target in state['targets']:
102 |             if target.status in [StatePlanReview.INIT, StatePlanReview.PREPARATION, StatePlanReview.REWORK]:
103 |                 try:
104 |                     crew = self.team.get_intelligence_analysis_crew(target.asset)
105 |                     out = crew.kickoff()
106 |                     target.intelligence = out
107 |                     target.status = StatePlanReview.MAKE_PLAN
108 |                     target.update_status()
109 |                     logger.info("[preparation {}] {}\n{}", state['task_id'], target.target, out)
110 |                 except ValueError as e:
111 |                     logger.debug("[preparation {}] {}\n{}", state['task_id'], target.target, e)
112 |                 except Exception as e:
113 |                     logger.error("[preparation {}] {}\n{}", state['task_id'], target.target, e)
114 |         return state
115 | 
116 |     def make_plan(self, state: TaskStateAttackPlan):
117 |         """
118 |         制定计划
119 |         """
120 |         for target in state['targets']:
121 |             if target.status in [StatePlanReview.MAKE_PLAN, StatePlanReview.REWORK]:
122 |                 try:
123 |                     if target.status == StatePlanReview.REWORK and target.review is not None:
124 |                         crew = self.team.get_attack_plan_review_crew(target.asset, target.intelligence, target.plan,
125 |                                                                      target.review)
126 |                     else:
127 |                         crew = self.team.get_establishing_foothold_research_crew(target.asset, target.intelligence)
128 |                     out = crew.kickoff()
129 |                     if out.startswith("FAIL") is False:
130 |                         target.plan = out
131 |                         target.status = StatePlanReview.REVIEW
132 |                         target.update_status()
133 |                     logger.info("[make_plan {}] {}\n{}", state['task_id'], target.target, out)
134 |                 except ValueError as e:
135 |                     logger.debug("[make_plan {}] {}\n{}", state['task_id'], target.target, e)
136 |                 except Exception as e:
137 |                     logger.error("[make_plan {}] {}\n{}", state['task_id'], target.target, e)
138 |         return state
139 | 
140 |     def attack_plan_review(self, state: TaskStateAttackPlan):
141 |         """
142 |         检查计划
143 |         """
144 |         for target in state['targets']:
145 |             if target.status in [StatePlanReview.REVIEW]:
146 |                 try:
147 |                     crew = self.team.get_attack_plan_review_crew(target.asset, target.intelligence, target.plan)
148 |                     out = crew.kickoff().strip()
149 |                     if out.startswith("PASS"):
150 |                         target.status = StatePlanReview.PASS
151 |                         target.review = out[5:]
152 |                     elif out.startswith("FAIL"):
153 |                         target.status = StatePlanReview.FAIL
154 |                         target.review = out[5:]
155 |                     else:
156 |                         target.status = StatePlanReview.REWORK
157 |                         target.review = out
158 |                     target.update_status()
159 |                     logger.info("[attack_plan_review {}] {}\n{}", state['task_id'], target.target, out)
160 |                 except ValueError as e:
161 |                     logger.debug("[attack_plan_review {}] {}\n{}", state['task_id'], target.target, e)
162 |                 except Exception as e:
163 |                     logger.error("[attack_plan_review {}] {}\n{}", state['task_id'], target.target, e)
164 |         return state
165 | 
166 |     def finish(self, state: TaskStateAttackPlan):
167 |         """
168 |         结束任务
169 |         """
170 |         # TODO
171 |         # for target in state['targets']:
172 |         #     print('finish', target.target)
173 |         #     print(target.plan)
174 |         return state
175 | 
176 |     def edge_shuld_finish(self, state: TaskStateAttackPlan):
177 |         """
178 |         条件边
179 |         """
180 | 
181 |         for target in state['targets']:
182 |             if target.status in [StatePlanReview.REWORK]:
183 |                 return 'rework'
184 |         return 'pass'
185 | 
186 |     def _assets_intelligence(self, task_id: int) -> [Target]:
187 |         """
188 |         获取已探明的资产
189 |         """
190 |         datas = {}
191 |         with self.db.DBSession() as session:
192 |             infos = session.query(WebInfo).filter(
193 |                 and_(
194 |                     WebInfo.task_id == task_id,
195 |                     WebInfo.finger_prints != None,
196 |                     func.jsonb_array_length(WebInfo.finger_prints) >= 1
197 |                 )
198 |             ).all()
199 |             urlenums = session.query(UrlEnum).filter(
200 |                 and_(
201 |                     UrlEnum.task_id == task_id,
202 |                     UrlEnum.finger_prints != None,
203 |                     func.jsonb_array_length(UrlEnum.finger_prints) >= 1
204 |                 )
205 |             ).all()
206 |             webs = infos + urlenums
207 |             if len(webs) == 0:
208 |                 infos = session.query(WebInfo).filter(WebInfo.task_id == task_id).all()
209 |                 urlenums = session.query(UrlEnum).filter(UrlEnum.task_id == task_id).all()
210 |                 webs = infos + urlenums
211 |             for web in webs:
212 |                 if isinstance(web, WebInfo):
213 |                     host = web.host
214 |                     target = web.target
215 |                 else:
216 |                     host = web.web_info.host
217 |                     target = web.web_info.target
218 |                 if target not in datas:
219 |                     datas[host] = []
220 |                 datas[host].append(web.to_prompt_template())
221 | 
222 |             vuls = session.query(Vul).filter(Vul.task_id == task_id).all()
223 |             for vul in vuls:
224 |                 if vul.target not in datas:
225 |                     datas[vul.target] = []
226 |                 datas[vul.target].append(vul.to_prompt_template())
227 | 
228 |             ports = session.query(Port).filter(
229 |                 and_(
230 |                     Port.task_id == task_id,
231 |                     Port.ip_cdn == None
232 |                 )
233 |             ).all()
234 |             for port in ports:
235 |                 if port.ip not in datas:
236 |                     datas[port.ip] = []
237 |                 datas[port.ip].append(port.to_prompt_template())
238 | 
239 |         targets = []
240 |         for target, data in datas.items():
241 |             targets.append(
242 |                 Target(
243 |                     db=self.db,
244 |                     task_id=task_id,
245 |                     target=target,
246 |                     asset=f"目标: {target}\n{'\n\n---------------\n\n'.join(data)}",
247 |                     intelligence=None,
248 |                     plan=None,
249 |                     review=None
250 |                 ))
251 |         return targets
252 | 
253 | 
254 | class WorkFlowAttackPlan:
255 |     app: CompiledGraph | None = None
256 |     debug: bool = False
257 |     team: Team | None = None
258 |     db: DB | None = None
259 | 
260 |     def __init__(self, db: DB, team: Team, debug: Optional[bool] = None):
261 |         self.db = db
262 |         self.team = team
263 |         self.debug = debug
264 | 
265 |         nodes = TaskNodesAttackPlan(db, team)
266 |         workflow = StateGraph(TaskStateAttackPlan)
267 |         workflow.add_node('init_task', nodes.init_task)
268 |         workflow.add_node('preparation', nodes.preparation)
269 |         workflow.add_node('make_plan', nodes.make_plan)
270 |         workflow.add_node('attack_plan_review', nodes.attack_plan_review)
271 |         workflow.add_node('finish', nodes.finish)
272 | 
273 |         workflow.set_entry_point('init_task')
274 |         workflow.set_finish_point('finish')
275 | 
276 |         workflow.add_edge('init_task', 'preparation')
277 |         workflow.add_edge('preparation', 'make_plan')
278 |         workflow.add_edge('make_plan', 'attack_plan_review')
279 |         workflow.add_conditional_edges(
280 |             source='attack_plan_review',
281 |             path=nodes.edge_shuld_finish,
282 |             path_map={
283 |                 'rework': 'preparation',
284 |                 'pass': 'finish'
285 |             }
286 |         )
287 | 
288 |         self.app = workflow.compile(debug=debug)
289 | 
290 |     def run(self, taskid: int):
291 |         state = {
292 |             'task_id': taskid,
293 |             'targets': []
294 |         }
295 | 
296 |         with self.db.DBSession() as session:
297 |             wfs = session.query(Workflow).filter(
298 |                 and_(
299 |                     Workflow.task_id == taskid,
300 |                     Workflow.work == WORK,
301 |                     Workflow.status.notin_([StatePlanReview.PASS, StatePlanReview.FAIL])
302 |                 )
303 |             ).all()
304 |             for wf in wfs:
305 |                 state['targets'].append(
306 |                     Target(
307 |                         db=self.db,
308 |                         task_id=wf.task_id,
309 |                         target=wf.data['target'],
310 |                         asset=wf.data['asset'],
311 |                         intelligence=wf.data['intelligence'],
312 |                         plan=wf.data['plan'],
313 |                         review=wf.data['review'],
314 |                         workflow_id=wf.id,
315 |                         status=StatePlanReview(wf.status)
316 |                     )
317 |                 )
318 | 
319 |         self.app.invoke(state)
320 | 


--------------------------------------------------------------------------------
/workflow_deploy_attack.py:
--------------------------------------------------------------------------------
  1 | from enum import IntEnum
  2 | from langgraph.graph import StateGraph
  3 | from typing import TypedDict, Optional
  4 | from langgraph.graph.graph import CompiledGraph
  5 | from sqlalchemy import and_
  6 | 
  7 | import workflow_attack_plan
  8 | from persistence.database import DB
  9 | from persistence.orm import Workflow
 10 | from team import Team
 11 | from config import logger
 12 | 
 13 | WORK = 'deploy_attack'
 14 | 
 15 | 
 16 | class StateDeployAttack(IntEnum):
 17 |     INIT = 0  # 初始化
 18 |     DEPLOY = 1  # 部署
 19 |     ATTACK = 2  # 攻击
 20 |     REWORK = 3  # 重试
 21 |     FINISH = 99  # 结束
 22 | 
 23 | 
 24 | class Target:
 25 |     target: str
 26 |     asset: str
 27 |     plan: str
 28 |     status: StateDeployAttack
 29 | 
 30 |     def __init__(self, db: DB, task_id: int, target: str, asset: str, plan: str, workflow_id=0, status=StateDeployAttack.INIT):
 31 |         self.db = db
 32 |         self.task_id = task_id
 33 |         self.workflow_id = workflow_id
 34 |         self.target = target
 35 |         self.asset = asset
 36 |         self.plan = plan
 37 |         self.status = status
 38 | 
 39 |     def init(self):
 40 |         with self.db.DBSession() as session:
 41 |             if self.workflow_id == 0:
 42 |                 wf = Workflow()
 43 |                 wf.work = WORK
 44 |                 wf.task_id = self.task_id
 45 |                 wf.status = int(self.status)
 46 |                 wf.data = {'target': self.target, 'asset': self.asset, 'plan': self.plan}
 47 |                 session.add(wf)
 48 |                 session.flush()
 49 |                 session.commit()
 50 |                 self.workflow_id = wf.id
 51 | 
 52 |     def update_status(self):
 53 |         with self.db.DBSession() as session:
 54 |             wf = session.query(Workflow).filter(
 55 |                 and_(
 56 |                     Workflow.id == self.workflow_id
 57 |                 )
 58 |             ).first()
 59 |             wf.status = int(self.status)
 60 |             wf.data = {'target': self.target, 'asset': self.asset, 'plan': self.plan}
 61 |             session.commit()
 62 | 
 63 | 
 64 | class TaskStateDeployAttack(TypedDict):
 65 |     task_id: int
 66 |     targets: list[Target]
 67 | 
 68 | 
 69 | class TaskNodesDeployAttack:
 70 |     team: Team | None = None
 71 |     db: DB | None = None
 72 | 
 73 |     def __init__(self, db: DB, team: Team):
 74 |         self.db = db
 75 |         self.team = team
 76 | 
 77 |     def init_task(self, state: TaskStateDeployAttack):
 78 |         """
 79 |         初始化任务
 80 |         """
 81 |         if len(state['targets']) == 0:
 82 |             # 必须处理完所有遗留任务
 83 |             state['targets'] = self._load_attack_plan(state['task_id'])
 84 |             for target in state['targets']:
 85 |                 target.init()
 86 | 
 87 |         return state
 88 | 
 89 |     def deploy(self, state: TaskStateDeployAttack):
 90 |         for target in state['targets']:
 91 |             if target.status in [StateDeployAttack.INIT, StateDeployAttack.REWORK, StateDeployAttack.DEPLOY]:
 92 |                 # TODO
 93 |                 target.status = StateDeployAttack.ATTACK
 94 |                 target.update_status()
 95 | 
 96 |         return state
 97 | 
 98 |     def attack(self, state: TaskStateDeployAttack):
 99 |         for target in state['targets']:
100 |             if target.status in [StateDeployAttack.ATTACK]:
101 |                 try:
102 |                     crew = self.team.get_establishing_foothold_deploy_attack_crew(target.asset, target.plan)
103 |                     out = crew.kickoff()
104 |                     # TODO 判定攻击是否成功
105 |                     target.status = StateDeployAttack.FINISH
106 |                     target.update_status()
107 | 
108 |                     logger.info("[attack {}] {}\n{}", state['task_id'], target.target, out)
109 |                 except Exception as e:
110 |                     logger.error("[attack {}] {}\n{}", state['task_id'], target.target, e)
111 | 
112 |         return state
113 | 
114 |     def finish(self, state: TaskStateDeployAttack):
115 |         """
116 |         结束任务 TODO
117 |         """
118 |         return state
119 | 
120 |     def edge_shuld_finish(self, state: TaskStateDeployAttack):
121 |         """
122 |         条件边
123 |         """
124 | 
125 |         for target in state['targets']:
126 |             if target.status in [StateDeployAttack.REWORK]:
127 |                 return 'rework'
128 |         return 'pass'
129 | 
130 |     def _load_attack_plan(self, task_id: int) -> [Target]:
131 |         targets = []
132 |         with self.db.DBSession() as session:
133 |             wfs = session.query(Workflow).filter(
134 |                 and_(
135 |                     Workflow.task_id == task_id,
136 |                     Workflow.work == workflow_attack_plan.WORK,
137 |                     Workflow.status == workflow_attack_plan.StatePlanReview.PASS
138 |                 )
139 |             ).all()
140 |             for wf in wfs:
141 |                 targets.append(
142 |                     Target(
143 |                         db=self.db,
144 |                         task_id=wf.task_id,
145 |                         target=wf.data['target'],
146 |                         asset=wf.data['asset'],
147 |                         plan=wf.data['plan']
148 |                     )
149 |                 )
150 |         return targets
151 | 
152 | 
153 | class WorkFlowDeployAttack:
154 |     app: CompiledGraph | None = None
155 |     debug: bool = False
156 |     team: Team | None = None
157 |     db: DB | None = None
158 | 
159 |     def __init__(self, db: DB, team: Team, debug: Optional[bool] = None):
160 |         self.db = db
161 |         self.team = team
162 |         self.debug = debug
163 | 
164 |         nodes = TaskNodesDeployAttack(db, team)
165 |         workflow = StateGraph(TaskStateDeployAttack)
166 |         workflow.add_node('init_task', nodes.init_task)
167 |         workflow.add_node('deploy', nodes.deploy)
168 |         workflow.add_node('attack', nodes.attack)
169 |         workflow.add_node('finish', nodes.finish)
170 | 
171 |         workflow.set_entry_point('init_task')
172 |         workflow.set_finish_point('finish')
173 | 
174 |         workflow.add_edge('init_task', 'deploy')
175 |         workflow.add_edge('deploy', 'attack')
176 | 
177 |         workflow.add_conditional_edges(
178 |             source='attack',
179 |             path=nodes.edge_shuld_finish,
180 |             path_map={
181 |                 'rework': 'deploy',
182 |                 'pass': 'finish'
183 |             }
184 |         )
185 | 
186 |         self.app = workflow.compile(debug=debug)
187 | 
188 |     def run(self, taskid: int):
189 |         state = {
190 |             'task_id': taskid,
191 |             'targets': []
192 |         }
193 | 
194 |         with self.db.DBSession() as session:
195 |             wfs = session.query(Workflow).filter(
196 |                 and_(
197 |                     Workflow.task_id == taskid,
198 |                     Workflow.work == WORK,
199 |                     Workflow.status != StateDeployAttack.FINISH
200 |                 )
201 |             ).all()
202 |             for wf in wfs:
203 |                 state['targets'].append(
204 |                     Target(
205 |                         db=self.db,
206 |                         task_id=wf.task_id,
207 |                         target=wf.data['target'],
208 |                         asset=wf.data['asset'],
209 |                         plan=wf.data['plan'],
210 |                         workflow_id=wf.id,
211 |                         status=StateDeployAttack(wf.status)
212 |                     )
213 |                 )
214 | 
215 |         self.app.invoke(state)
216 | 


--------------------------------------------------------------------------------