├── .gitignore ├── LICENSE ├── README.md ├── firmanalyzer ├── CVEsearch.py ├── Examples │ ├── buffalo │ │ ├── explore.log │ │ ├── report.txt │ │ └── summary.txt │ ├── dap-3662.log │ ├── dlink │ │ ├── DAP-2690_REVB_FIRMWARE_3.16.RC100_WW │ │ │ ├── report.txt │ │ │ └── summary.txt │ │ └── DAP-3662 │ │ │ ├── dap-3662.log │ │ │ ├── report.txt │ │ │ ├── summary.txt │ │ │ └── token_usage.jsonl │ ├── explore.log │ ├── mikrotik │ │ ├── routeros-7.5-mipsbe │ │ │ ├── report.txt │ │ │ ├── summary.txt │ │ │ └── token_usage.jsonl │ │ └── routeros-x86-6.49.6 │ │ │ ├── report.txt │ │ │ ├── summary.txt │ │ │ └── token_usage.jsonl │ ├── openwrt │ │ ├── fbecd0858ac36049e04bf9dd8acd3dd53fed97b4 │ │ │ ├── report.txt │ │ │ ├── summary.txt │ │ │ └── token_usage.jsonl │ │ └── openwrtSample │ │ │ ├── report.txt │ │ │ └── summary.txt │ ├── summary_example.md │ ├── tp-link │ │ └── TL-IPC55x_P_-x_2.0 │ │ │ ├── report.txt │ │ │ └── summary.txt │ └── ui │ │ ├── Ubiquiti 4.3.22.11330 │ │ ├── report.txt │ │ └── summary.txt │ │ └── Ubiquiti Linux-2.6.32.71 │ │ ├── report.txt │ │ └── summary ├── LogManage.py ├── R2decetor.py ├── README.md ├── analyze.py ├── callback.py ├── client.py ├── disassembly.py ├── explore.py ├── llm_config.py ├── requirements.yaml ├── run.py ├── securityscan.py ├── tools.py └── utils.py └── firmcrawler ├── Examples ├── dlink │ ├── navigation_links.jsonl │ ├── screenshot_1.png │ ├── screenshot_2.png │ ├── screenshot_3.png │ ├── screenshot_4.png │ └── screenshot_5.png ├── openwrt │ ├── screenshot_1.png │ ├── screenshot_2.png │ ├── screenshot_3.png │ ├── screenshot_4.png │ └── screenshot_5.png ├── tp-link │ ├── navigation_links.jsonl │ ├── screenshot_1.png │ ├── screenshot_2.png │ ├── screenshot_3.png │ ├── screenshot_4.png │ └── screenshot_5.png ├── ui │ ├── navigation_links.jsonl │ ├── screenshot_1.png │ ├── screenshot_2.png │ ├── screenshot_3.png │ ├── screenshot_4.png │ └── screenshot_5.png └── zyxel │ ├── navigation_links.jsonl │ ├── screenshot_1.png │ ├── screenshot_2.png │ ├── screenshot_3.png │ ├── screenshot_4.png │ └── screenshot_5.png ├── README.md ├── callback.py ├── crawler.py ├── mark_page.js ├── prompts ├── base.yaml ├── dlink.yaml ├── foscam.yaml └── ui.yaml ├── run.py └── web_intrect.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.ini 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | firmanalyzer/result/ 6 | .idea/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 10 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. 54 | 55 | "Contributor" shall mean Licensor and any individual or Legal Entity 56 | on behalf of whom a Contribution has been received by Licensor and 57 | subsequently incorporated within the Work. 58 | 59 | 2. Grant of Copyright License... 60 | 61 | 3. Grant of Patent License... 62 | 63 | 4. Redistribution... 64 | 65 | 5. Submission of Contributions... 66 | 67 | 6. Trademarks... 68 | 69 | 7. Disclaimer of Warranty... 70 | 71 | 8. Limitation of Liability... 72 | 73 | 9. Accepting Warranty or Additional Liability... 74 | 75 | 10. Additional Terms by Licensor Zhang Xiangrui: 76 | 77 | a) The Work and Derivative Works may be used for **academic and 78 | non-commercial research purposes only**. 79 | 80 | b) Any form of **commercial use** (including but not limited to: 81 | integration into commercial products, providing commercial 82 | services, SaaS platforms, or generating revenue through the 83 | Work or Derivative Works) is **prohibited without explicit, 84 | written permission** from the Licensor. 85 | 86 | c) If You produce academic papers, tools, or software based on or 87 | using the Work or its Derivative Works, **you must also release 88 | those related codes under an open-source license** that is 89 | compatible with the Apache License, Version 2.0. 90 | 91 | d) You must properly attribute the original author(s) in any public 92 | or published work that uses or builds upon this Work. 93 | 94 | END OF TERMS AND CONDITIONS 95 | 96 | APPENDIX: How to apply the Apache License to your work. 97 | 98 | To apply the Apache License to your work, attach the following 99 | boilerplate notice, with the fields enclosed by brackets "[]" 100 | replaced with your own identifying information. 101 | 102 | Copyright 2025 Zhang Xiangrui 103 | 104 | Licensed under the Apache License, Version 2.0 (the "License"); 105 | you may not use this file except in compliance with the License. 106 | You may obtain a copy of the License at 107 | 108 | http://www.apache.org/licenses/LICENSE-2.0 109 | 110 | Unless required by applicable law or agreed to in writing, software 111 | distributed under the License is distributed on an "AS IS" BASIS, 112 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 113 | See the License for the specific language governing permissions and 114 | limitations under the License. 115 | 116 | Additional Restrictions: 117 | - For academic use only. 118 | - Commercial use is prohibited without permission. 119 | - Research results based on this code must also be open-sourced. 120 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FirmLLM 2 | 3 | A Multi-Agent System for Large-Scale Firmware Collection and Analysis Based on Large Language Models 4 | 5 | FirmLLM is designed to **automate long-cycle analysis tasks** by leveraging multi-agent collaboration and LLMs, enabling scalable and intelligent firmware collection and vulnerability analysis. It aims to **reduce the manual burden on security analysts** by streamlining and accelerating complex firmware security workflows. 6 | 7 | --- 8 | 9 | ## Overview 10 | 11 | **FirmLLM** is an integrated system for large-scale firmware acquisition and security analysis. It leverages multi-agent collaboration and large language models (LLMs) to achieve intelligent automation in both crawling and analysis. 12 | 13 | The system consists of two main modules: 14 | 15 | - **FirmCrawler**: A multi-agent web crawler for automated firmware collection from online sources 16 | - **FirmAnalyzer**: A firmware security analysis engine that combines static tools and LLM capabilities 17 | 18 | --- 19 | 20 | ## 📁 Project Structure 21 | 22 | ``` 23 | . 24 | ├── firmcrawler/ # Firmware Collection System 25 | ├── firmanalyzer/ # Firmware Analysis System 26 | └── Examples/ # Analysis Result Samples 27 | ``` 28 | 29 | --- 30 | 31 | ## 🕸️ Firmware Collection System (FirmCrawler) 32 | 33 | **FirmCrawler** is used for automated collection of firmware files from various manufacturers. The system is built upon and optimized from [WebVoyager](https://langchain-ai.github.io/langgraph/tutorials/web-navigation/web_voyager/) [[paper]](https://arxiv.org/abs/2401.13919), implementing a multi-agent collaboration system for large-scale firmware collection. 34 | WebVoyager is an innovative Large Multimodal Model (LMM) powered web agent that can complete user instructions end-to-end by interacting with real-world websites. 35 | 36 | ### 🔧 Key Features 37 | 38 | - Autonomous interaction with vendor sites 39 | - Intelligent firmware file identification and retrieval 40 | - Multi-agent concurrent crawling 41 | - Distributed task scheduling 42 | 43 | ### 📄 Core Files 44 | 45 | - `crawler.py`: Core crawler logic 46 | - `web_intrect.py`: Web interaction controller 47 | - `mark_page.js`: Web element tagging script 48 | - `prompt.yaml`: Vendor-specific prompt configuration for LLM 49 | 50 | --- 51 | 52 | ## 🔍 Firmware Analysis System (FirmAnalyzer) 53 | 54 | FirmAnalyzer aims to **mimic human-like reasoning when analyzing firmware behavior**, leveraging the semantic understanding and code interpretation capabilities of LLMs to autonomously **prioritize, interpret, and investigate potential security issues**. 55 | This design helps **alleviate the heavy workload typically required from security professionals** in manual firmware auditing. 56 | 57 | 58 | ### 🛠️ Key Capabilities 59 | 60 | - Firmware unpacking and filesystem detection 61 | - Static and semantic vulnerability detection 62 | - Binary disassembly and logic analysis 63 | - CVE correlation and risk scoring 64 | - Human-readable reports powered by LLMs 65 | 66 | ### 📄 Core Files 67 | 68 | - `run.py`: Entry point for analysis 69 | - `explore.py`: Filesystem exploration and metadata gathering 70 | - `analyze.py`: Firmware file analysis module 71 | - **Sensitive Info Pattern Matching**: Regex and semantic pattern detection (preliminary filtering to guide LLM inspection and direction) 72 | - **Shell Command Executor**: Executes Linux tools such as `cat`, `grep`, etc. 73 | - **CVE Query Tool**: Queries vulnerability data through APIs 74 | - **Disassembly Assistant**: Integrates `radare2` with LLM for binary reasoning 75 | - `requirements.yaml`: A user-configurable file that defines the overall analysis plan. It allows users to customize: 76 | - The **scope of analysis** and specific **target components** 77 | - **Directory scanning priorities** (e.g., focus on `/etc`, `/bin`, or `/www`) 78 | - **Security analysis strategies**, such as whether to perform code-level audits or pattern-based detections 79 | - **Risk severity classification rules** to define what constitutes high/medium/low severity 80 | - The **format and structure** of generated **reports and summaries** 81 | --- 82 | 83 | ## 📊 Example Outputs (Examples) 84 | 85 | Contains complete logs and reports from analyzing firmware using **DeepSeek-v3** and **DeepSeek-r1** models. 86 | 87 | ### 📝 Files 88 | 89 | - `explore.log`: Full exploration log 90 | - `report.txt`: File-by-file security analysis 91 | - `summary.txt`: Overall security summary and risk level 92 | 93 | ### 🤖 Model Performance Notes 94 | 95 | We primarily use **DeepSeek-v3** and **DeepSeek-r1** for firmware analysis. 96 | In addition, **Claude-Sonnet-3.7** may demonstrates promising performance in code auditing tasks, especially for semantic reasoning and cross-file logic analysis. 97 | DeepSeek offers lower costs, but we are actively experimenting and comparing results across multiple models to find the optimal configuration. 98 | 99 | --- 100 | 101 | ## ⚠️ Limitations and Challenges 102 | 103 | ### Format & Semantic Limitations 104 | 105 | - Inability to automatically decrypt or handle proprietary firmware formats 106 | - LLMs may hallucinate when analyzing complex control flows 107 | - Limited understanding of intricate build scripts and service configurations 108 | --- 109 | 110 | ## 🚧 Roadmap 111 | 112 | Upcoming work will focus on: 113 | 114 | - **Integrating more static analysis tools**, especially for binary-focused workflows to improve analysis coverage and accuracy 115 | - **Combining symbolic execution with LLMs** to enhance deep vulnerability discovery and validation 116 | - **Exploring firmware runtime state analysis** (e.g., process behavior, service call graphs, and system responses) to prioritize high-risk issues 117 | 118 | --- 119 | 120 | ## 📌 Notes & Compliance 121 | 122 | 1. All results are for **research purposes only** 123 | 2. Please ensure legal and ethical use of the system 124 | 3. Verification should be performed in sandboxed/test environments 125 | 4. We are actively engaging with vendors to confirm findings and, under compliant conditions, release more security reports 126 | 127 | --- 128 | -------------------------------------------------------------------------------- /firmanalyzer/CVEsearch.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import requests 4 | import json 5 | import time 6 | from bs4 import BeautifulSoup # Add BeautifulSoup for parsing HTML 7 | from typing import List, Dict, Any 8 | from client import create_openai_client 9 | 10 | def parse_openai_response(response_text: str) -> List[Dict[str, str]]: 11 | """ 12 | 从LLM响应中提取并验证JSON数据 13 | 返回结构示例:[{"cve_id": "CVE-XXXX-XXXX", "description": "..."}] 14 | """ 15 | # 匹配JSON数组模式(支持多行) 16 | json_pattern = r'\[\s*\{.*?\}\s*\]' 17 | matches = re.finditer(json_pattern, response_text, re.DOTALL) 18 | 19 | valid_entries = [] 20 | 21 | for match in matches: 22 | try: 23 | # 尝试解析JSON 24 | json_str = match.group() 25 | parsed = json.loads(json_str) 26 | 27 | # 验证数据结构 28 | if isinstance(parsed, list): 29 | for entry in parsed: 30 | if all(key in entry for key in ('cve_id', 'description')): 31 | # 基础字段验证 32 | valid_entry = { 33 | 'cve_id': str(entry['cve_id']).strip(), 34 | 'description': str(entry['description']).strip() 35 | } 36 | # 可选添加额外验证(如CVE ID格式) 37 | if valid_entry['cve_id'].startswith('CVE-'): 38 | valid_entries.append(valid_entry) 39 | else: 40 | print(f"Invalid CVE ID format: {valid_entry['cve_id']}") 41 | else: 42 | print(f"Expected JSON array, got {type(parsed)}") 43 | 44 | except json.JSONDecodeError as e: 45 | print(f"JSON解析失败: {str(e)}") 46 | print(f"Problematic JSON: {json_str}") 47 | 48 | return valid_entries 49 | def parse_component_and_version(component: str) -> tuple: 50 | """使用正则表达式匹配版本号""" 51 | version_pattern = r"\b(v?\d+\.\d+[\w.]*)\b" 52 | match = re.search(version_pattern, component) 53 | if match: 54 | version = match.group(1) 55 | name = component.replace(version, "").strip() 56 | return name, version 57 | return component, "" 58 | 59 | def parse_mitre_html(html_content, component_name, version): 60 | """Parse MITRE HTML response to extract CVE information""" 61 | soup = BeautifulSoup(html_content, 'html.parser') 62 | cve_items = [] 63 | 64 | # Find tables (not dependent on specific ID) 65 | tables = soup.find_all('table') 66 | 67 | for table in tables: 68 | rows = table.find_all('tr') 69 | for row in rows: 70 | cells = row.find_all('td') 71 | if len(cells) >= 2: # Ensure at least two columns 72 | cve_cell = cells[0] 73 | desc_cell = cells[1] 74 | 75 | # Find CVE ID 76 | cve_link = cve_cell.find('a') 77 | if cve_link and 'CVE-' in cve_link.text: 78 | cve_id = cve_link.text.strip() 79 | desc_text = desc_cell.text.strip() 80 | 81 | # Check if description contains component name (case insensitive) 82 | if component_name.lower() in desc_text.lower(): 83 | cve_item = { 84 | 'cve_id': cve_id, 85 | 'description': desc_text, 86 | 'source': 'MITRE' 87 | } 88 | cve_items.append(cve_item) 89 | 90 | print(f"Found {len(cve_items)} related CVEs from MITRE") 91 | return cve_items 92 | 93 | def analyze_cve_with_openai(component_name: str, version: str, search_results: Dict[str, Any]) -> List[str]: 94 | """Analyze CVE results using OpenAI, supporting batch processing""" 95 | # Extract all CVE entries from search results 96 | all_cves = [] 97 | if search_results.get("mitre"): 98 | all_cves.extend(search_results["mitre"]) 99 | 100 | # Batch size 101 | BATCH_SIZE = 15 102 | analyzed_results = [] 103 | 104 | # Process CVEs in batches 105 | for i in range(0, len(all_cves), BATCH_SIZE): 106 | batch = all_cves[i:i + BATCH_SIZE] 107 | batch_num = i//BATCH_SIZE + 1 108 | 109 | # Prepare prompt 110 | prompt = f""" 111 | Target Component: {component_name}{version} 112 | 113 | Please analyze the following {len(batch)} CVEs to determine if they affect this version. Do not make extra inferences. 114 | {json.dumps(batch, indent=2)} 115 | 116 | **Version Analysis Rules**: 117 | When analyzing CVEs: 118 | - Only include CVEs where the vulnerable version range INCLUDES the component version 119 | For example, if the component version is 1.01, and the CVE description mentions version through v1.02, it should be included. 120 | 121 | Please return all matching CVEs in JSON format, otherwise return an empty list. Do not include any other content. Format as follows: 122 | ''' 123 | [ 124 | {{ 125 | "cve_id": "CVE-ID", 126 | "description": "Vulnerability description", 127 | }} 128 | ] 129 | ''' 130 | 131 | """ 132 | print(prompt) 133 | try: 134 | # Call OpenAI API 135 | model,client = create_openai_client() 136 | response = client.chat.completions.create( 137 | model=model, 138 | messages=[ 139 | {"role": "system", "content": "You are a security expert specializing in analyzing CVE vulnerability information to confirm which CVEs affect the target component."}, 140 | {"role": "user", "content": prompt} 141 | ], 142 | temperature=0 143 | ) 144 | # Get response text 145 | if response.choices and response.choices[0].message: 146 | result_text = response.choices[0].message.content 147 | print(f"\n=== Batch {batch_num} Analysis Results ===") 148 | parsed_data = parse_openai_response(result_text) 149 | analyzed_results.extend(parsed_data) 150 | print(parsed_data) 151 | print(f"Analyzed {len(analyzed_results) * BATCH_SIZE}/{len(all_cves)} CVEs") 152 | 153 | # Add delay to avoid rate limits 154 | time.sleep(1) 155 | 156 | except Exception as e: 157 | print(f"Error processing batch {batch_num}: {e}") 158 | continue 159 | 160 | print("\n=== Merging Analysis Results ===") 161 | print(json.dumps(analyzed_results, indent=2, ensure_ascii=False)) # 修改这里 162 | 163 | return analyzed_results 164 | 165 | def query_nvd_and_mitre(component: str, save_path: str = None): 166 | """ 167 | Query CVE information for a component 168 | :param component: Component name, e.g. "BusyBox 1.01" or "BusyBox v1.01" 169 | :param save_path: Optional path to save results to JSON file 170 | """ 171 | # Parse component name and version 172 | component_name, version = parse_component_and_version(component) 173 | 174 | try: 175 | print(f"\nQuerying MITRE: {component}") 176 | mitre_url = f"https://cve.mitre.org/cgi-bin/cvekey.cgi?keyword={component}" 177 | mitre_response = requests.get(mitre_url) 178 | 179 | if mitre_response.status_code == 200: 180 | mitre_results = parse_mitre_html(mitre_response.text, component_name, version) 181 | else: 182 | print(f"MITRE returned error status code: {mitre_response.status_code}") 183 | mitre_results = None 184 | 185 | except Exception as e: 186 | print(f"MITRE query error: {e}") 187 | mitre_results = None 188 | 189 | # Integrate results 190 | results = { 191 | "mitre": mitre_results, 192 | "component": component_name, 193 | "version": version 194 | } 195 | 196 | # Sort and limit results before analysis 197 | if results["mitre"]: 198 | # Sort CVEs by ID in descending order (newest first) 199 | sorted_cves = sorted(results["mitre"], 200 | key=lambda x: x['cve_id'], 201 | reverse=True) 202 | 203 | results["mitre"] = sorted_cves[:100] 204 | print(f"Limited to newest 100 CVEs from {len(sorted_cves)} total CVEs") 205 | 206 | # Analyze results using OpenAI 207 | analyzed_results = analyze_cve_with_openai(component_name, version, results) 208 | 209 | # Save results if save_path provided 210 | if save_path: 211 | try: 212 | os.makedirs(save_path, exist_ok=True) 213 | output_file = os.path.join(save_path, "cve_search.txt") 214 | with open(output_file, 'a', encoding="utf-8") as f: 215 | save_data = { 216 | "analyzed_results": analyzed_results 217 | } 218 | json.dump(save_data, f, indent=2) 219 | print(f"Results saved to {output_file}") 220 | except Exception as e: 221 | print(f"Error saving results to file: {e}") 222 | print(f"\n=== Final Results ===") 223 | print(f"Total vulnerabilities found: {len(analyzed_results)}") 224 | for idx, cve in enumerate(analyzed_results, 1): 225 | print(f"{idx}. [{cve['cve_id']}] {cve['description'][:60]}...") 226 | 227 | return { 228 | "raw_results": results, 229 | "analyzed_results": analyzed_results 230 | } 231 | 232 | 233 | if __name__ == "__main__": 234 | components = [ 235 | "BusyBox 1.01", 236 | "hostapd 2.0-devel", 237 | "wpa_supplicant 2.0-devel" 238 | ] 239 | 240 | print("Starting component vulnerability query...") 241 | all_results = {} 242 | for component in components: 243 | print(f"\n=== Querying component: {component} ===") 244 | results = query_nvd_and_mitre(component,"./cve_results") 245 | all_results[component] = results 246 | time.sleep(1) # Add brief delay 247 | 248 | print("\nQuery complete") 249 | -------------------------------------------------------------------------------- /firmanalyzer/Examples/buffalo/summary.txt: -------------------------------------------------------------------------------- 1 | ''' 2 | Firmware Summary: 3 | [Basic Info] 4 | - Name/Version: Buffalo WSR1166DD Firmware v1.5.5 5 | - Device/Type/Arch: Buffalo WSR1166DD, MIPS Architecture 6 | - Key Info: Kernel version 4.4.25, Built on 2017-12-04 7 | 8 | [Coverage] 9 | - Key Dirs: /bin, /lib/firmware, /sys/firmware, /sbin, /lib, /etc, /etc/ssl, /www 10 | - Key Files: /bin/busybox, /bin/ipcalc.sh, /bin/uclient-fetch, /bin/config_generate, /sbin/init, /sbin/netifd, /sbin/procd, /sbin/kmodloader, /sbin/logread, /sbin/fw3, /sbin/sysupgrade, /sbin/wifi, /lib/libc.so, /lib/libuci.so, /lib/libubox.so, /lib/uboot-envtools.sh, /lib/functions.sh, /etc/preinit, /etc/shadow, /etc/key-build.pub, /etc/sysctl.conf, /etc/rc.common, /etc/hotplug.json, /etc/profile, /etc/services, /etc/ssl/openssl.cnf, /etc/config/dropbear, /etc/config/uhttpd, /etc/config/ddns, /etc/config/etherwake, /etc/config/pptpd, /etc/config/rpcd, /etc/config/snmpd, /etc/init.d/dropbear, /etc/init.d/boot, /etc/init.d/embeddd, /etc/init.d/openvpn, /etc/init.d/pptpd, /etc/init.d/snmpd, /etc/ppp/options, /www/index.html, /etc/openvpn/server.conf, /lib/functions/preinit.sh, /lib/functions/uci-defaults.sh 11 | 12 | [Main Issues] 13 | - Critical Issues: 14 | - /etc/shadow: Root account has no password set (empty password field), confirmed by /etc/profile script warning, allowing unrestricted access and potential full system compromise. 15 | - /etc/key-build.pub: Private key exposure ("private key 605c7f1ccfcc1f58") in a public file, risking unauthorized access to sensitive systems or data. 16 | - /etc/ssl/openssl.cnf: References to private keys (cakey.pem, tsakey.pem) and placeholder passwords ('secret') expose sensitive cryptographic material. Weak hash algorithms (md5, sha1) are specified as acceptable message digests, increasing risks of collision attacks. 17 | - /etc/config/uhttpd: Explicit reference to private key file (/etc/uhttpd.key). If improperly secured, it could allow decryption of HTTPS traffic or server impersonation. 18 | - /bin/busybox: Multiple vulnerabilities (CVE-2019-5747, CVE-2021-28831, CVE-2018-20679, CVE-2017-16544, CVE-2017-15874, CVE-2017-15873, CVE-2016-6301, CVE-2015-9261, CVE-2022-30065, CVE-2021-42381, CVE-2023-42366, CVE-2023-42365, CVE-2023-42364, CVE-2023-42363) including heap-buffer-overflow, use-after-free, integer underflow/overflow, remote code execution, and denial of service risks. 19 | - /etc/config/dropbear: 'PasswordAuth', 'RootPasswordAuth', and 'RootLogin' enabled by default, increasing brute force attack risks. Multiple CVEs identified (e.g., CVE-2023-48795, CVE-2021-36369, CVE-2020-36254, CVE-2019-12953, CVE-2018-15599) affecting Dropbear, with CVE-2023-48795 rated critical for integrity check bypass. 20 | - /sbin/init: Use of insecure function sprintf without input validation or bounds checking at address 0x4015c8, increasing the risk of arbitrary code execution. 21 | - /sbin/netifd: Format string vulnerabilities identified in printf (0x402974) and fprintf calls within sym._fini (0x4115a4). 22 | - /sbin/procd: Absence of input validation or sanitization in the function chain leading to insecure functions like 'gets' and 'sscanf'. 23 | - /sbin/fw3: Remote code execution vulnerability in `sym._init` due to insecure use of `gets` and dynamic code execution via `jalr t9`. 24 | - /sbin/logread: OS Command Injection vulnerability (CVE-2023-29778) in logread component. 25 | - /sbin/sysupgrade: '--restore-backup' option allows restoring configuration from a URL without proper validation, enabling arbitrary code execution if a malicious URL is provided. 26 | - /sbin/wifi: Multiple critical vulnerabilities (CVE-2024-53940, CVE-2024-53939, CVE-2024-53937, CVE-2017-8772, CVE-2024-53938) including command injection, default credentials, and unauthenticated TELNET access. 27 | - /lib/libc.so: Vulnerable to CVE-2019-14697, allowing out-of-bounds writes due to x87 floating-point stack adjustment imbalance. memcpy operation in sym.__init_ssp lacks input validation, and use of 'gets' in sym.bind_textdomain_codeset introduces buffer overflow risks. 28 | - /etc/services: Insecure service configurations detected, including telnet (23/tcp), ftp (21/tcp), and finger (79/tcp), transmitting data in plaintext and exposing the system to interception and misuse. 29 | - /etc/config/etherwake: Plaintext password (AABBCCDDEEFF) used for wake-on-LAN functionality, exposing sensitive credentials. 30 | - /etc/config/pptpd: Plaintext credentials ('youruser'/'yourpass') and PPTP server IP configurations (local: 192.168.0.1, remote range: 192.168.0.20-30) exposed, increasing the risk of unauthorized access. 31 | - /etc/config/snmpd: Default SNMP community strings "public" (read-only) and "private" (read-write) configured, exposing the system to unauthorized access and manipulation. Presence of outdated SNMP versions v1 and v2c increases risks of data interception and exploitation (CVE-2005-3722). 32 | - /etc/init.d/boot: Hardcoded PSK (Pre-Shared Key) value 'FacWPAPSK1_BK=12345' exposes wireless network credentials, allowing unauthorized access if extracted. 33 | - /etc/init.d/embeddd: Sensitive DDNS credentials (username, password, update_url) defined in plaintext within the script, risking exposure of authentication data. 34 | - /etc/init.d/openvpn: Sensitive certificate and key data (ca_inline, cert_inline, key_inline, tls_inline) written in plaintext to `/var/run/ovpn.config`, and plaintext username/password written to `/var/run/ovpn_auth.txt` when user_pass_auth is enabled, risking exposure of cryptographic material and authentication data. Management interface bound to 127.0.0.1:14 without clear access restrictions, potentially allowing unauthorized control. 35 | - /etc/init.d/pptpd: Plaintext password written to $CHAP_SECRETS file without encryption, exposing login credentials. Firewall rule allows unrestricted TCP access on port 1723, increasing exposure risks. 36 | - /etc/ppp/options: Logging disabled by setting logfile to /dev/null, removing audit trails. Additionally, maxfail 0 permits unlimited failed connection attempts, increasing brute-force risks. 37 | - /etc/openvpn/server.conf: Management interface bound to 127.0.0.1:14 without additional authentication mechanisms, posing a risk if improperly exposed externally. 38 | - /lib/functions/uci-defaults.sh: Potential sensitive data exposure identified through patterns resembling password handling (e.g., 'local key="$2"', 'json_add_boolean "$key"'). If these handle credentials without proper security measures, it could lead to sensitive data exposure. 39 | 40 | - Major Issues: 41 | - /etc/sysctl.conf: Disabled bridge firewalling (net.bridge.bridge-nf-call-* set to 0) and enabled IP forwarding for IPv4 and IPv6 (net.ipv4.ip_forward=1, net.ipv6.conf.all.forwarding=1), increasing attack surface and potential exposure to network attacks. 42 | - /lib/libuci.so: Affected by CVE-2020-28951, introducing use-after-free risks when handling malicious package names, potentially leading to memory corruption or privilege escalation. 43 | - /lib/libubox.so: Affected by CVE-2020-7248, a stack-based buffer overflow in JSON serialization functionality, potentially allowing remote code execution. 44 | - /sbin/kmodloader: Stack-based buffer overflow in __dn_expand function (CVE-2014-3484), affecting musl libc versions before 1.1.2 and versions 0.9.13 through 1.0.3. 45 | - /lib/uboot-envtools.sh: Lack of input validation before writing configurations to /etc/fw_env.config, potentially allowing malicious configuration injection. 46 | - /bin/ipcalc.sh: Lack of input validation and sanitization in ARGV handling within functions like ip2int(), int2ip(), and compl32(). 47 | - /etc/hotplug.json: Creation of device nodes with broad permissions (e.g., '0666') and insecure use of 'exec' to call external scripts like '/sbin/hotplug-call', risking unauthorized access and script execution. 48 | - /etc/config/ddns: Explicit fields for username ("your_username") and password ("your_password") placeholders, which could expose credentials if replaced with actual values. 49 | - /etc/config/rpcd: Plaintext username ('webuser') and password ('$p$webuser') stored in an insecure format, increasing the risk of unauthorized access. 50 | - /etc/init.d/snmpd: Dynamic construction of SNMP configuration file (/var/run/snmpd.conf) using user-provided values (e.g., community strings, source IPs) without secure defaults or validation, risking unauthorized access. 51 | - /www/index.html: Version information exposure ('1.1.0+b3321bb2') in the meta tag could allow attackers to identify and exploit known vulnerabilities associated with this specific version. 52 | 53 | - Other Issues: 54 | - /etc/preinit: Version information exposure ('2006-2016') could indicate outdated software components with potential unpatched vulnerabilities. 55 | - /etc/rc.common: Version information exposure ('2006-2012') and insecure handling of service control variables ($START, $STOP) without proper validation, risking unintended service behavior or exposure. 56 | - /bin/busybox: Exposure of sensitive content such as '/etc/passwd' and '/etc/shadow' strings. 57 | - /bin/config_generate: Default credentials ('username'/'password') embedded in plaintext for PPPoE configuration. 58 | - /bin/uclient-fetch: Presence of '--password=' string indicates potential insecure handling or exposure of credentials. 59 | - /lib/functions.sh: Code injection risk in append function due to unsanitized use of 'eval' with user-controlled variables $var and $value. Exposure of version information ("Copyright (C) 2006-2014 OpenWrt.org") could help attackers identify outdated software. 60 | ''' 61 | 62 | -------------------------------------------------------------------------------- /firmanalyzer/Examples/dlink/DAP-2690_REVB_FIRMWARE_3.16.RC100_WW/summary.txt: -------------------------------------------------------------------------------- 1 | ``` 2 | Firmware Summary: 3 | [Basic Info] 4 | - Name/Version: DAP-2690_REVB_FIRMWARE_3.16.RC100_WW 5 | - Device/Type/Arch: DAP-2690 / Wireless Access Point / MIPS 6 | - Key Info: Created 2016-03-21, BusyBox v1.14.1, SquashFS v4.0 7 | 8 | [Coverage] 9 | - Key Dirs: /bin, /etc, /etc/cert, /etc/config, /etc/ssh, /bin/busybox.extracted, /sbin, /htdocs, /www, /lib, /usr 10 | - Key Files: /bin/busybox, /bin/ip, /bin/iplink, /bin/iproute, /bin/ping, /bin/ping6, /bin/sh, /bin/ps, /bin/mount, /bin/umount, /etc/cert/key.pem, /etc/config/defaultvalue.gz, /etc/ssh/ssh_host_dsa_key, /sbin/init, /sbin/hostapd, /sbin/httpd, /sbin/wpa_supplicant, /sbin/ifconfig, /sbin/insmod, /sbin/iwconfig, /sbin/iwlist, /sbin/iwpriv, /sbin/klogd, /sbin/lsmod, /sbin/mdev, /sbin/modprobe, /sbin/rmmod, /sbin/route, /sbin/sysctl, /sbin/syslogd, /sbin/tunctl, /sbin/vconfig, /sbin/wlanconfig, /sbin/ebtables, /sbin/ebtables-restore, /sbin/ebtables-save 11 | 12 | [Main Issues] 13 | - Critical Issues: 14 | - /bin/busybox, /bin/iproute, /bin/ping, /bin/ping6, /bin/sh, /bin/ps, /bin/mount, /bin/umount, /sbin/init, /sbin/ifconfig, /sbin/insmod, /sbin/lsmod, /sbin/mdev, /sbin/modprobe, /sbin/rmmod, /sbin/route, /sbin/sysctl, /sbin/tunctl, /sbin/vconfig: Multiple critical CVEs (CVE-2022-48174, CVE-2022-30065, CVE-2022-28391, CVE-2021-42386, CVE-2018-20679, CVE-2021-42373-86) in BusyBox v1.14.1 leading to RCE, stack overflow, and use-after-free vulnerabilities 15 | - /bin/busybox, /bin/ip, /bin/iplink, /bin/ping6, /bin/sh, /bin/ps, /bin/mount, /sbin/init, /sbin/ifconfig, /sbin/lsmod, /sbin/mdev, /sbin/modprobe, /sbin/route, /sbin/tunctl, /sbin/vconfig: Unsafe function usage (gets, fgets, strcpy) without bounds checking in multiple functions (fcn.00415988, fcn.00408a2c, fcn.0040bf38, fcn.0041ca1c, fcn.00419880, fcn.00443400, fcn.00445598, fcn.0040fe9c) 16 | - /sbin/httpd: Multiple critical vulnerabilities in httpd v1.3.6 (CVE-2007-3304, CVE-2007-3847, CVE-2007-5000) including buffer overflow and remote code execution 17 | - /sbin/httpd: Unsafe function usage (memcpy, strcpy) without bounds checking in functions (fcn.0040d3d4, sym.process_request) risking RCE 18 | - /sbin/hostapd: Multiple CVEs (CVE-2022-23304, CVE-2019-9499, CVE-2016-4476) in hostapd v0.8.x allowing side-channel attacks, authentication bypass, and DoS 19 | - /sbin/wpa_supplicant: Multiple CVEs (CVE-2023-52160, CVE-2022-23304, CVE-2022-23303, CVE-2021-27803, CVE-2019-9496) in wpa_supplicant v3.3.2 allowing authentication bypass, side-channel attacks, and DoS 20 | - /sbin/iwconfig, /sbin/iwlist, /sbin/iwpriv: Multiple high-risk vulnerabilities including buffer overflows (fcn.00403850, sym.iw_enum_devices, sym.iw_get_stats, fcn.00406440, sym.iw_print_key) due to improper use of `fgets`, `memcpy`, `strncpy`, and `strcpy` without bounds checking, risking RCE and memory corruption 21 | - /sbin/klogd: Critical vulnerabilities in klogd v1.3.6 (CVE-2001-0738) and unsafe function usage (strcpy, sprintf, system) in functions (fcn.00400d54, fcn.00403538, fcn.00402890, fcn.00403d08) risking buffer overflow, command injection, and format string vulnerabilities 22 | - /sbin/syslogd: Critical vulnerabilities in functions (fcn.004017b0, fcn.00402be4, fcn.00404e00, fcn.0040275c) due to insecure function usage (`sprintf`, `popen`, `strcpy`) and lack of input validation, risking buffer overflow, command injection, and format string vulnerabilities 23 | - /sbin/vconfig: Critical vulnerabilities in functions (fcn.00415988, fcn.00408a2c, fcn.0040bf38) due to insecure function usage (gets, strcpy, fgets) and lack of input validation, risking buffer overflow and arbitrary code execution 24 | - /sbin/ebtables-restore: Format string vulnerability in `main` function and critical use of `gets` in `entry0` function, risking RCE and privilege escalation 25 | - /sbin/ebtables-save: Command injection vulnerability due to insecure use of backticks and environment variables (`EBTABLES_SAVE_COUNTER`), risking arbitrary command execution 26 | 27 | - Major Issues: 28 | - /etc/cert/key.pem: Private key stored in firmware without proper protection 29 | - /etc/ssh/ssh_host_dsa_key: SSH host key stored in firmware 30 | - /sbin/httpd: IP addresses (171.10.37, 39.32.9) embedded in version strings exposing sensitive network information 31 | - /sbin/insmod: Multiple CVEs (CVE-2018-7170, CVE-2018-7171, CVE-2018-7172) in sed v4.0 allowing buffer overflow and arbitrary code execution 32 | - /sbin/iwconfig, /sbin/iwlist, /sbin/iwpriv: Version information exposure (4.3.3, 3.3.2, 2.4, 2.0, 802.11) and insecure memory handling in functions (fcn.00401758, fcn.00404878, fcn.0040143c, fcn.00401f40, fcn.00406358) risking format string vulnerabilities and unvalidated input exploitation 33 | - /sbin/wlanconfig, /sbin/ebtables: Version information exposure (3.3.2, 4.3.3, 2.0) which could be used to identify potential vulnerabilities 34 | 35 | - Other Issues: 36 | - /etc/config/defaultvalue.gz: Contains default configuration values that could be exploited if not properly secured 37 | - /sbin/init, /sbin/hostapd, /sbin/httpd, /sbin/wpa_supplicant, /sbin/ifconfig, /sbin/insmod, /sbin/lsmod, /sbin/mdev, /sbin/modprobe, /sbin/rmmod, /sbin/route, /sbin/sysctl, /sbin/tunctl, /sbin/vconfig, /sbin/wlanconfig, /sbin/ebtables: Version information exposure (BusyBox v1.14.1, sed v4.0, hostapd v0.8.x, httpd v1.3.6, wpa_supplicant v3.3.2) that could be used to identify potential vulnerabilities 38 | ``` 39 | -------------------------------------------------------------------------------- /firmanalyzer/Examples/dlink/DAP-3662/summary.txt: -------------------------------------------------------------------------------- 1 | ''' 2 | Firmware Summary: 3 | [Basic Info] 4 | - Name/Version: DAP-3662 REVA FIRMWARE 1.00RC015 5 | - Device/Type/Arch: DAP-3662 / Linux / MIPS 6 | - Key Info: Kernel version 2.6.31, created 2015-01-20 7 | 8 | [Coverage] 9 | - Key Dirs: /bin, /dev, /etc, /lib, /root, /sbin, /www 10 | - Key Files: bin/busybox, etc/ssh/ssh_host_dsa_key, etc/ssh/ssh_host_rsa_key, lib/libcrypto.so.0.9.8, lib/libssl.so.0.9.8, lib/ld-uClibc.so.0, lib/libc.so.0, lib/libradiusclient-ng.so.2, lib/libz.so.1.2.3, lib/libpthread.so.0, lib/libqc98xx.so, sbin/hostapd, sbin/httpd, sbin/ebtables, sbin/ebtables-restore, sbin/wpa_supplicant, sbin/iwconfig, sbin/iwpriv, sbin/klogd, sbin/syslogd, www/onlanchange.php, www/adv_array_auth.php, www/adv_array_scan.php, www/adv_captivals.php, www/adv_url_addr.php, www/__action.php, www/__action_bsc.php, www/__action_dhcp_server.php, www/adv_ap_array.php, www/adv_mssid.php, www/bsc_capwap.php, www/bsc_lan.php, www/bsc_wlan.php, www/st_device.php, www/st_stats_lan.php, www/st_wds_info.php, www/tool_admin.php, www/adv_acl.php, www/adv_array_config.php, www/adv_8021q_s.php, www/adv_qos.php, www/adv_radiusserver.php, www/adv_url.php, www/check_radiusclient.php, www/sys_cfg_valid.php, www/sys_fw_update.php, www/sys_setting.php, www/tool_sntp.php, www/login.php, www/session_login.php, www/adv_dhcpd.php, www/adv_mdhcpd.php, www/adv_arpspoofing.php, www/adv_8021q.php, www/adv_radiusclient.php, www/st_log.php 11 | 12 | [Main Issues] 13 | - Critical Issues: 14 | [bin/busybox]: Contains two high-risk functions (fcn.004120cc and fcn.00427b4c) using 'system' with dynamic input, exposing command injection vulnerabilities. Environment variables processed without validation in fcn.00427b4c. 15 | [bin/busybox]: Vulnerable to CVE-2021-28831 (invalid free/segmentation fault) and CVE-2019-5747 (out-of-bounds read) due to outdated version 1.14.1. 16 | [etc/ssh/ssh_host_dsa_key]: Contains exposed DSA private key, leading to potential unauthorized access if compromised. 17 | [etc/ssh/ssh_host_rsa_key]: Contains exposed RSA private key, leading to potential unauthorized access if compromised. 18 | [lib/libcrypto.so.0.9.8]: Outdated version (0.9.8za) vulnerable to CVE-2019-1547 (ECDSA key recovery) and contains potential password string 'SGCKEYSALT'. 19 | [lib/libssl.so.0.9.8]: Outdated version (0.9.8za) vulnerable to CVE-2008-1678 (memory leak) and contains high-risk vulnerabilities in 'sym.dtls1_read_bytes' (buffer overflow) and 'sym.dtls1_read_failed' (format string). 20 | [lib/ld-uClibc.so.0]: Outdated version (1.7.0) vulnerable to CVE-2022-30295 (DNS cache poisoning), CVE-2022-29503 (memory corruption), CVE-2017-9729 (stack exhaustion), and CVE-2017-9728 (out-of-bounds read). 21 | [lib/libc.so.0]: Vulnerable to CVE-2018-1000001 (buffer underflow in realpath()) and contains critical vulnerabilities in memcpy and fread_unlocked functions due to lack of bounds checking, leading to potential remote code execution. 22 | [lib/libradiusclient-ng.so.2]: Critical buffer overflow vulnerabilities in `sym.rc_read_dictionary` function due to insecure use of `fgets` and `strcpy` without proper bounds checking or input validation. 23 | [lib/libz.so.1.2.3]: Vulnerable to multiple CVEs (CVE-2018-25032, CVE-2005-2096, CVE-2005-1849, CVE-2004-0797, CVE-2002-0059) including memory corruption, buffer overflow, and denial of service issues. 24 | [lib/libpthread.so.0]: Vulnerable to CVE-2022-29503 (memory corruption) and contains critical indirect function call vulnerability in 'sym.pthread_attr_getschedparam' leading to potential control flow hijacking and remote code execution. 25 | [lib/libqc98xx.so]: Critical buffer overflow vulnerabilities in 'sym.art_otpRead' (indirect control via gp register) and 'sym.art_memRead' (memcpy in loop without bounds checking), leading to potential memory corruption and remote code execution. 26 | [sbin/hostapd]: Linked to outdated OpenSSL libraries (libssl.so.0.9.8, libcrypto.so.0.9.8) known to have critical vulnerabilities like Heartbleed (CVE-2014-0160), CVE-2008-1678 (memory leak), and CVE-2019-1547 (ECDSA key recovery). 27 | [sbin/iwconfig]: Function fcn.00403568 contains buffer overflow vulnerability (strcpy without length checks) and format string vulnerability (sscanf with limited input validation), allowing remote code execution. 28 | [sbin/iwpriv]: Critical format string vulnerability in sym.iw_get_priv_info function (printf with unvalidated user input), exposing remote code execution risk. 29 | [sbin/klogd]: Command injection and buffer overflow vulnerabilities in function fcn.00400d54 (system calls with hardcoded commands and strcpy without bounds checking), accessible from main function. 30 | [sbin/klogd]: Vulnerable to CVE-2001-0738 (DoS via null bytes in log messages) due to detected version 1.3.6. 31 | [www/onlanchange.php]: Command injection vulnerability in `query()` function without proper input validation, allowing malicious command execution if input is controlled by attacker. 32 | [www/adv_captivals.php]: Exposure of sensitive credentials including RADIUS server secrets ($cfg_radius_sec, $cfg_acc_sec) and LDAP passwords ($cfg_ldap_password) in the code, with potential risk even if encrypted via 'queryEnc' function. 33 | [www/adv_url_addr.php]: Insecure handling and storage of passwords in code segments like 'var r_list=[['index','name','passwd','status']' and 'echo ", 34 | ['".$@."','".get("j","name")."','".queryEnc("passwd")."','".query("enable")."']";'. 35 | [www/__action.php]: Hardcoded file paths like '/var/proc/web/session:'.$sid.'/user/group' and '/www/permission_deny.php' could be exploited if not properly secured. Sensitive information such as user_name, new_password, and other configuration details are exposed in HTML comments. 36 | [www/__action_bsc.php]: Hardcoded password/key exposure in 'echo "key = ". $f_key ."\n";' at line 96, revealing sensitive configuration parameters (f_auth, f_cipher, f_key, f_radius_srv_1, f_radius_sec_1) without proper protection. 37 | [www/__action_dhcp_server.php]: Insecure configuration handling and lack of input validation for user-supplied variables (srv_enable, ipaddr, f_endip), allowing potential injection or manipulation of DHCP server settings. 38 | [www/adv_ap_array.php]: Password exposure in 'ap_array_pwd' field and version information disclosure ('m_version', 'ap_array_ver'), with potential insecure handling of encrypted query data via 'queryEnc' function. 39 | [www/adv_mssid.php]: Exposure of sensitive network credentials and configurations, including WEP keys, WPA PSK, and RADIUS secrets, through the `queryEnc` function. 40 | [www/bsc_capwap.php]: Potential hardcoded credentials and sensitive configurations detected, including 'admin:password' and 'API_KEY=12345'. Missing '__admin_check.php' file could lead to unauthorized access. 41 | [www/bsc_wlan.php]: Insecure WEP key handling and weak password validation practices, including plaintext WEP key references and weak validation logic. 42 | [www/st_device.php]: Active use and configuration of WEP encryption for both 2.4GHz and 5GHz networks, exposing the network to security risks such as eavesdropping and unauthorized access. 43 | [www/st_stats_lan.php]: Hardcoded database credentials (username: 'admin', password: 'admin123') and sensitive configuration data exposed in plaintext. 44 | [www/st_wds_info.php]: Hardcoded database credentials (username: 'admin', password: 'password123') and API keys ('api_key=1234567890abcdef') exposed in plaintext. 45 | [www/tool_admin.php]: Password handling logic with weak validation and no encryption or hashing mechanisms detected, leaving passwords potentially exposed in plaintext. 46 | [www/adv_acl.php]: File upload vulnerability in form "frm_acl" with insufficient input validation, only checking file extension (".acl") without content validation, allowing potential malicious file uploads. 47 | [www/adv_radiusserver.php]: Insecure password handling with plaintext storage, no encryption or hashing mechanisms, and potential hardcoded credentials. Lack of input validation and access control mechanisms. 48 | [www/adv_url.php]: Insecure password handling and storage with plaintext references and unclear encryption implementation in 'queryEnc' function. 49 | [www/check_radiusclient.php]: Insecure file handling and session management issues using `unlink` to delete session-related files without proper validation or sanitization, potentially leading to unintended file deletion or manipulation. 50 | [www/sys_fw_update.php]: Insecure session and authentication configurations explicitly disabling authentication (`$NO_NEED_AUTH="1";`) and session timeout (`$NO_SESSION_TIMEOUT="1";`), allowing unauthorized access or session hijacking. 51 | [www/sys_setting.php]: Lack of CSRF protection in critical functions (`do_reboot()`, `do_factory_reset()`, `do_clear_language()`), allowing potential CSRF attacks to execute unintended actions. 52 | [www/tool_sntp.php]: Use of hardcoded, potentially untrusted NTP server IP '207.232.83.70', exposing the device to time synchronization attacks or exploitation if the server is compromised. 53 | [www/login.php]: Insecure handling of session cookies and lack of secure cookie attributes (HttpOnly, Secure), exposing session cookies to theft via XSS or man-in-the-middle attacks. 54 | [www/session_login.php]: Exposure of client MAC addresses and IP addresses through PHP variables embedded in JavaScript, leading to potential information disclosure. 55 | [www/adv_dhcpd.php]: Exposure of IP address and network configuration details, including DHCP server pool settings and LAN IP addresses, which could be exploited if accessed by unauthorized users. 56 | [www/adv_mdhcpd.php]: Potential input validation issues in IP address handling functions (is_valid_ip3, is_valid_mask, invalid_ip_mask), leading to IP spoofing or misconfiguration. 57 | [www/adv_arpspoofing.php]: Insecure handling of IP and MAC addresses without proper input validation or sanitization, potentially leading to injection attacks or unauthorized access to network data. 58 | [www/adv_8021q.php]: Potential injection vulnerabilities due to dynamic script generation using variables without clear input validation or sanitization. 59 | [www/adv_radiusclient.php]: Potential exposure of RADIUS server IP and password in the HTML form, though the password field is masked as a password input type. 60 | [www/st_log.php]: The 'doClear()' function allows clearing system logs, which could be exploited to erase evidence of malicious activity. While it is restricted to users with 'AUTH_GROUP' set to '0', this restriction relies on the integrity of the authentication mechanism. 61 | 62 | - Major Issues: 63 | [bin/busybox]: Exposes root credential (USER=root) and version information (v1.14.1, 1.14.1, 9.9) which could be exploited if used insecurely. 64 | [sbin/hostapd]: Contains version strings 'v2.0-devel' exposing outdated software version, and sensitive content (URLs, IPs, email). Handles sensitive cryptographic material (WEP keys, EAPOL-Key, MPPE keys) with potential exposure risk. 65 | [sbin/httpd]: Exposes firmware version path '/runtime/sys/info/firmwareversion' and HTTP version error message indicating potential misconfigurations. 66 | [sbin/wpa_supplicant]: Exposes version 'v2.0-devel' and contains sensitive information (WEP/WPA keys, EAPOL-Key frames) with potential network service exposure (IPs, URLs). 67 | [sbin/ebtables, sbin/ebtables-restore]: Potentially vulnerable to multiple CVEs including CVE-2022-48641 (memory leak), CVE-2011-1080 (information disclosure), CVE-2010-0007 (access control bypass), and CVE-2005-3110 (DoS). 68 | [sbin/syslogd]: Potential format string vulnerability in function fcn.004011f0 (printf with hardcoded string format without input validation), accessible from privileged contexts. 69 | [www/adv_array_auth.php]: Sensitive data exposure in JavaScript arrays (`ar_auth_list` and `serial_list`) containing 'name', 'password', 'group', 'number', 'duration', 'enddate', and 'device' embedded directly in HTML output. 70 | [www/adv_array_scan.php]: Exposure of sensitive configuration data (WLAN settings, AP array passwords, IPv6 status) and debug information in HTML output. 71 | [www/bsc_lan.php]: Potential sensitive information exposure in debug comments, including IP addresses, netmask, gateway, and DNS settings. 72 | [www/adv_array_config.php]: Sensitive configuration exposure through debug output (`echo ">Analyzer: Raw findings 26 | 27 | Analyzer->>LLMAssistant: Risk assessment 28 | LLMAssistant-->>Analyzer: Security evaluation 29 | 30 | Analyzer->>Tools: Configuration check 31 | Tools-->>Analyzer: Config issues 32 | end 33 | 34 | Analyzer-->>Explorer: Analysis results 35 | Explorer->>Explorer: Summarize findings 36 | Note over Explorer: Make next decision 37 | Explorer->>Explorer: Select next target 38 | end 39 | 40 | Explorer-->>User: Security report 41 | ``` 42 | 43 | ## Core Components 44 | 45 | ### Main Components 46 | - **main.py** - System entry point and orchestration 47 | - **explore.py** - Firmware filesystem exploration engine 48 | - **analyze.py** - File analysis and security assessment core 49 | 50 | ### Configuration 51 | - **requirements.yaml** 52 | - File type definitions and analysis rules 53 | - Priority directory configurations 54 | - Security severity criteria 55 | - Analysis workflow specifications 56 | 57 | ## Requirements 58 | - Python 3.8+ 59 | - radare2 5.9.9 60 | - binwalk v3.10(rust-binwalk) 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /firmanalyzer/callback.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from typing import Dict, Any, List 4 | from langchain_core.callbacks import BaseCallbackHandler 5 | 6 | class TokenUsageCallbackHandler(BaseCallbackHandler): 7 | def __init__(self, save_path): 8 | super().__init__() 9 | self.total_prompt_tokens = 0 10 | self.total_completion_tokens = 0 11 | self.total_cost = 0 12 | self.save_path = save_path 13 | 14 | def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None: 15 | """Called when LLM invocation starts""" 16 | logging.info("[Prompt]") 17 | for i, prompt in enumerate(prompts, 1): 18 | logging.info(f"\n{'-' * 50}\n{prompt}\n{'-' * 50}") 19 | 20 | def on_llm_end(self, response: Any, **kwargs: Any) -> None: 21 | """Called when LLM invocation ends""" 22 | try: 23 | # Log the raw LLM response 24 | if hasattr(response, 'generations'): 25 | for gen in response.generations: 26 | if gen: 27 | logging.info(f"\n{'-' * 50}\n{gen[0].text}\n{'-' * 50}") 28 | 29 | if hasattr(response, 'llm_output') and response.llm_output: 30 | token_usage = response.llm_output.get('token_usage', {}) 31 | 32 | prompt_tokens = token_usage.get('prompt_tokens', 0) 33 | completion_tokens = token_usage.get('completion_tokens', 0) 34 | 35 | input_cost = prompt_tokens * (2.50 / 1_000_000) 36 | output_cost = completion_tokens * (10.00 / 1_000_000) 37 | 38 | self.total_prompt_tokens += prompt_tokens 39 | self.total_completion_tokens += completion_tokens 40 | self.total_cost += input_cost + output_cost 41 | 42 | print(f"\n📊 [Token Usage for this call]") 43 | print(f"Input tokens: {prompt_tokens:,} (${input_cost:.4f})") 44 | print(f"Output tokens: {completion_tokens:,} (${output_cost:.4f})") 45 | print(f"Total tokens: {prompt_tokens + completion_tokens:,}") 46 | print(f"Running total cost: ${self.total_cost:.4f}") 47 | 48 | # Save token usage 49 | usage_data = { 50 | "input_cost": input_cost, 51 | "output_cost": output_cost, 52 | "total_cost": self.total_cost 53 | } 54 | 55 | with open(self.save_path, 'a', encoding='utf-8') as f: 56 | json.dump(usage_data, f, ensure_ascii=False) 57 | f.write("\n") 58 | 59 | except Exception as e: 60 | logging.error(f"Error in token usage calculation: {str(e)}") 61 | if hasattr(response, 'llm_output'): 62 | logging.debug("LLM output: %s", response.llm_output) -------------------------------------------------------------------------------- /firmanalyzer/client.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from openai import OpenAI 3 | 4 | def get_api_key(): 5 | config = configparser.ConfigParser() 6 | config.read('config.ini') 7 | try: 8 | return config['Settings']['Model'],config['Settings']['ModelApiKey'], config['Settings']['OrgId'], config['Settings']['ProjectId'],config['Settings']['BaseURL'] 9 | except KeyError: 10 | raise KeyError("Cannot find 'ModelApiKey' in 'Settings' section of config.ini") 11 | 12 | def create_openai_client(): 13 | model,api_key, org_id, project_id, base_url = get_api_key() 14 | try: 15 | client = OpenAI( 16 | api_key=api_key, 17 | base_url=base_url, 18 | # organization=org_id, 19 | # project=project_id, 20 | timeout=60, 21 | ) 22 | return model,client 23 | except Exception as e: 24 | print(f"[OpenAI] Failed to create client: {str(e)}") 25 | raise -------------------------------------------------------------------------------- /firmanalyzer/llm_config.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import configparser 3 | from langchain_openai import ChatOpenAI 4 | from typing import Optional, Tuple, List 5 | from langchain_core.runnables import RunnableLambda 6 | 7 | class LLMClient: 8 | _instance = None 9 | _llm = None 10 | _callbacks = None 11 | 12 | def __new__(cls, config_path: str = 'config.ini'): 13 | if cls._instance is None: 14 | cls._instance = super(LLMClient, cls).__new__(cls) 15 | return cls._instance 16 | 17 | def __init__(self, config_path: str = 'config.ini'): 18 | if not hasattr(self, 'initialized'): 19 | self.config_path = config_path 20 | self.model = None 21 | self.api_key = None 22 | self.org_id = None 23 | self.project_id = None 24 | self.base_url = None 25 | self._callbacks = [] 26 | self.initialized = True 27 | 28 | def add_callback(self, callback) -> None: 29 | if callback not in self._callbacks: 30 | self._callbacks.append(callback) 31 | if self._llm: 32 | self._llm.callbacks = self._callbacks 33 | 34 | def set_callbacks(self, callbacks: List) -> None: 35 | self._callbacks = callbacks 36 | if self._llm: 37 | self._llm.callbacks = self._callbacks 38 | 39 | def get_config(self) -> Tuple[str, str, str, str, str]: 40 | if all([self.model, self.api_key, self.org_id, self.project_id, self.base_url]): 41 | return self.model, self.api_key, self.org_id, self.project_id, self.base_url 42 | 43 | config = configparser.ConfigParser() 44 | config.read(self.config_path) 45 | 46 | try: 47 | self.model = config['Settings']['Model'] 48 | self.api_key = config['Settings']['ModelApiKey'] 49 | self.org_id = config['Settings']['OrgId'] 50 | self.project_id = config['Settings']['ProjectId'] 51 | self.base_url = config['Settings']['BaseURL'] 52 | 53 | return self.model, self.api_key, self.org_id, self.project_id, self.base_url 54 | 55 | except KeyError as e: 56 | logging.error(f"Missing required config: {str(e)}") 57 | raise KeyError(f"Missing required config in 'Settings' section of {self.config_path}: {str(e)}") 58 | except Exception as e: 59 | logging.error(f"Error reading config: {str(e)}") 60 | raise 61 | 62 | def get_llm(self, temperature: float = 0) -> ChatOpenAI: 63 | if not self._llm: 64 | model, api_key, org_id, project_id, base_url = self.get_config() 65 | if model == "deepseek-reasoner": 66 | base_url = "https://api.deepseek.com/beta" 67 | self._llm = ChatOpenAI( 68 | model=model, 69 | api_key=api_key, 70 | # temperature=temperature, 71 | # organization=org_id, 72 | base_url=base_url, 73 | callbacks=self._callbacks, 74 | timeout=120 75 | ) 76 | 77 | return self._llm 78 | 79 | def stream(self, messages: List[dict], temperature: Optional[float] = 0) -> str: 80 | try: 81 | llm = self.get_llm(temperature=temperature) 82 | result = [] 83 | for chunk in llm.stream(messages): 84 | result.append(chunk.content) 85 | return "".join(result) 86 | except Exception as e: 87 | logging.error(f"Stream call failed: {str(e)}") 88 | raise 89 | 90 | def invoke(self, messages: List[dict], temperature: Optional[float] = 0) -> str: 91 | try: 92 | llm = self.get_llm(temperature=temperature) 93 | return llm.invoke(messages).content 94 | except Exception as e: 95 | logging.error(f"Invoke call failed: {str(e)}") 96 | raise 97 | 98 | def as_runnable(self, temperature: Optional[float] = 0) -> RunnableLambda: 99 | def _invoke(content: str) -> str: 100 | messages = [{"role": "user", "content": content}] 101 | return self.stream(messages, temperature=temperature) 102 | 103 | return RunnableLambda(_invoke) 104 | 105 | def as_runnable_sync(self, temperature: Optional[float] = 0) -> RunnableLambda: 106 | def _invoke(content: str) -> str: 107 | messages = [{"role": "user", "content": content}] 108 | return self.invoke(messages, temperature=temperature).content 109 | 110 | return RunnableLambda(_invoke) -------------------------------------------------------------------------------- /firmanalyzer/requirements.yaml: -------------------------------------------------------------------------------- 1 | file_requirements: 2 | user: | 3 | - Focus analysis on files that can be externally triggered, accept user input, or impact system behavior. Prioritize files that meet **any** of the following conditions: 4 | 5 | - Executable binaries (ELF or scripts): 6 | * Accept external input (parameters, environment variables, file content, network data) 7 | * Registered in system startup scripts (e.g. rcS, init.d, inittab) 8 | * Triggered by web forms, CGI, or JavaScript requests 9 | * Perform critical operations such as command execution, file manipulation, or network access 10 | * Contain dangerous function calls (e.g., system, strcpy, sprintf, popen) 11 | 12 | - Dynamic libraries or shared objects (.so): 13 | * Files are shared object type (.so), or versioned links (e.g. libssl.so.1.0.2) 14 | * Present in dependency chains of known components or executables 15 | 16 | - Network service components: 17 | * Contain socket, bind, listen functionality 18 | * Bind to external interfaces (eth0, br0, 0.0.0.0) or open ports 19 | * Examples include: telnetd, pppd, udhcpd, smac2_tftpd, UDPserver, miniigd 20 | * Evaluate whether these accept unauthenticated input or expose default services 21 | 22 | - Web-related files: 23 | * Web server binaries (boa, httpd, mini_httpd, etc.) 24 | * Executable CGI scripts or shell scripts, commonly located in /bin, /cgi-bin, /www 25 | * HTML or JavaScript files containing forms, user input fields, or dynamic requests 26 | * Web configuration files (e.g., boa.conf) that define CGI paths or script execution permissions 27 | 28 | - Startup and configuration files: 29 | * Startup scripts (e.g., /etc/init.d/*, rcS, inittab) that invoke services 30 | * Config files specifying interface bindings, open ports, authentication modes, or firmware upgrade routines 31 | * Dynamic execution hooks (e.g., auto-update scripts, scheduled tasks) 32 | 33 | - Sensitive data files (analyze **only if externally accessible**): 34 | * Files containing plaintext credentials, keys, API tokens, or certificates in /etc/* (e.g., passwd, shadow, smbpasswd, *.pem) 35 | * Prioritize if accessible through web, services, or user-controlled paths 36 | 37 | - Exclusion scope (deprioritized or skipped): 38 | * Core system utilities (e.g., cp, mv, echo, cat) are not analyzed unless explicitly triggered with user-controlled input 39 | * Static, unused resource files are considered low priority 40 | 41 | 42 | directory_requirements: 43 | user: | 44 | **Key Analysis Targets** 45 | 1. Executable Locations 46 | - bin/ 47 | - sbin/ 48 | - usr/bin/ 49 | - usr/sbin/ 50 | - cgi-bin/ 51 | - Contains triggerable ELF, scripts, CGI 52 | - Check for dangerous function calls or system command execution 53 | 54 | 2. Sensitive Files 55 | - etc/ 56 | - Contains startup process, authentication info, network bindings 57 | - Check for external interface bindings, plaintext passwords, config weaknesses 58 | 59 | - lib/ 60 | - Contains custom or third-party .so libraries that may have vulnerable logic 61 | 62 | 3. Web Interfaces & Upgrade Logic 63 | - www/ 64 | - web/ 65 | - upgrade/ 66 | - Check for config uploads, firmware upgrades, web backend execution scripts 67 | 68 | **Priority Levels** 69 | - Priority 1 (Critical) 70 | - Components or scripts that can be directly accessed, triggered or listened from external 71 | - Priority 2 (High) 72 | - Config files, key files, library files affecting system behavior or authentication 73 | - Priority 3 (Medium) 74 | - Auxiliary scripts, display pages, logs, upload files etc. 75 | 76 | file_findings_requirements: 77 | user: | 78 | **Analysis Workflow** 79 | 0. Attack Surface Discovery 80 | - Check for user input paths (web forms, JS, network connections, file parameters) 81 | - Check if component is running (started by init script, listening ports, daemon) 82 | - Check for input → dangerous function paths (system, strcpy etc.) 83 | 84 | 1. Information Discovery 85 | - Detect versions(executable or shared library), passwords, keys, interfaces, tokens and other sensitive info 86 | 87 | 2. Security Review 88 | - Check if services/ports are exposed 89 | - Check if configs allow unauthorized access or use weak encryption 90 | - Check if startup items load high-risk components 91 | 92 | 3. Code Analysis 93 | - Find command execution functions, memory operation functions, input reading functions 94 | - Check if unvalidated input is used for dangerous operations 95 | - Track data flow to see if user input can affect system calls 96 | 97 | 4. Vulnerability Search (Including Version Analysis) 98 | - Match CVE vulnerabilities based on component versions (libraries or binaries) 99 | - **Must list all CVEs matching loaded component versions even if no clear attack path is found** 100 | 101 | severity_rating: 102 | level_5_critical: 103 | description: Input → No validation → Dangerous function → Executable 104 | examples: 105 | - Remote Code Execution (RCE) 106 | - Command Injection 107 | - Private Key or Root Credential Exposure 108 | - Encryption Bypass 109 | - Arbitrary File Read/Write 110 | - Dynamic Library Hijacking 111 | - Configuration or script directly launches vulnerable or unauthorized executable 112 | 113 | level_4_high: 114 | description: Controllable input + Data leakage + Weak configuration 115 | examples: 116 | - Password/API Key Exposure 117 | - Privilege Escalation 118 | - Dangerous Function Controlled by Input 119 | - Weak or Disabled Encryption 120 | - Sensitive Data in Plaintext Transmission 121 | - Web Vulns: SSRF, CSRF, IDOR 122 | - File references or configures execution of another risky or misconfigured component 123 | - Indirect execution chain identified from user input to a dangerous binary 124 | 125 | level_3_medium: 126 | description: Vulnerability exists but exploit path unclear or uncontrollable 127 | examples: 128 | - Known Version information or Vulnerable Component Loaded 129 | - Default Credentials or Weak Permissions 130 | - Exposed Configs with Limited Risk 131 | - Hardcoded Interface Info 132 | - Flawed Auth Logic (with conditions) 133 | - Config or script references other components but path not fully controllable or executable 134 | - Component linking observed but no clear attack surface 135 | 136 | level_0_2_low: 137 | description: No exploit path, info leakage or minor misconfig 138 | examples: 139 | - Debug Info, Test Interfaces, Sensitive Comments 140 | - Unused Open Ports or Services 141 | - Minor Config Errors (e.g. DNS Leak, Timezone) 142 | - References to other components exist but not executed or not exposed 143 | - Inactive or broken component configuration linkage 144 | 145 | security_report_template: 146 | user: | 147 | - [Location] Relative path 148 | - [Type] Component/Credential/Configuration/Other 149 | - [Version Info] Version/release information and CVE number if applicable 150 | - [Description] Detailed issue description 151 | - [Details] Technical details/Sensitive information 152 | - [Impact] Potential security impact 153 | - [Risk Level] Critical/High/Medium/Low 154 | 155 | summary_template: 156 | user: | 157 | Firmware Summary: 158 | [Basic Info] 159 | - Name/Version: [Firmware name and version] 160 | - Device/Type/Arch: [Device model and architecture] 161 | - Key Info: [Other important information, such as release date, kernel version, etc.] 162 | 163 | [Coverage] 164 | - Key Dirs: [Key directories analyzed] 165 | - Key Files: [Key files analyzed] 166 | 167 | [Main Issues] 168 | - Critical Issues: [Location]: Root cause of the issue (Specific technical details like vulnerable code/misconfiguration/insecure functions etc.) 169 | - Major Issues: [Location]: Root cause of the issue (Specific technical details like vulnerable code/misconfiguration/insecure functions etc.) 170 | - Other Issues: [Location]: Root cause of the issue (Specific technical details like vulnerable code/misconfiguration/insecure functions etc.) 171 | 172 | [Attack Surface] 173 | Descriptions of the attack chain or specify the exact exploitation method. 174 | -------------------------------------------------------------------------------- /firmanalyzer/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import time 4 | import glob 5 | import logging 6 | import subprocess 7 | import argparse 8 | import configparser 9 | from pathlib import Path 10 | from explore import explorer 11 | from LogManage import LogManager 12 | 13 | 14 | def load_binwalk_path(): 15 | config = configparser.ConfigParser() 16 | config_path = os.path.join(os.path.dirname(__file__), 'config.ini') 17 | try: 18 | config.read(config_path) 19 | return config.get('Settings', 'binwalk_path', fallback='/usr/local/bin/binwalk') 20 | except Exception as e: 21 | logging.warning(f"Failed to load config file, using default binwalk path: {str(e)}") 22 | return '/usr/local/bin/binwalk' 23 | 24 | 25 | def extract_firmware_with_binwalk(firmware_path: str, extract_root: str) -> str: 26 | firmware_name = os.path.splitext(os.path.basename(firmware_path))[0] 27 | firmware_extract_path = os.path.join(extract_root, firmware_name) 28 | 29 | if os.path.exists(firmware_extract_path): 30 | import shutil 31 | shutil.rmtree(firmware_extract_path) 32 | 33 | os.makedirs(firmware_extract_path, exist_ok=True) 34 | binwalk_path = load_binwalk_path() 35 | 36 | cmd = f"'{binwalk_path}' -Me '{firmware_path}' --directory '{firmware_extract_path}'" 37 | process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 38 | stdout, stderr = process.communicate() 39 | 40 | if process.returncode != 0: 41 | raise Exception(f"Binwalk extraction failed: {stderr.decode()}") 42 | 43 | return stdout.decode('utf-8', errors='ignore') 44 | 45 | 46 | def find_firmware_root(start_path, required_dirs=None, file_patterns=None, min_score=12): 47 | dir_weights = { 48 | 'bin': 3, 'sbin': 2, 'lib': 3, 'etc': 2, 'usr': 1, 49 | 'var': 1, 'www': 2, 'system': 2 50 | } 51 | file_weights = { 52 | 'bin/sh': 5, 'etc/*.conf': 2, '*.ko': 3, 53 | 'init': 4, 'bin/busybox': 5, 'usr/lib/*.so': 2 54 | } 55 | required_dirs = required_dirs or dir_weights 56 | file_patterns = file_patterns or file_weights 57 | best = {'path': None, 'score': 0, 'depth': 0} 58 | 59 | def is_standard_fs(root): 60 | must_have = ['bin', 'lib'] 61 | optional = ['etc', 'usr', 'www', 'var'] 62 | return all(os.path.isdir(os.path.join(root, d)) for d in must_have) and \ 63 | any(os.path.isdir(os.path.join(root, d)) for d in optional) 64 | 65 | def is_root_like(root): 66 | return any(os.path.isdir(os.path.join(root, d)) for d in ['bin', 'sbin', 'etc']) 67 | 68 | for root, dirs, _ in os.walk(os.path.normpath(start_path), topdown=False): 69 | if is_root_like(root) and is_standard_fs(root): 70 | return os.path.normpath(root) 71 | 72 | dir_score = sum(weight for d, weight in required_dirs.items() if os.path.isdir(os.path.join(root, d))) 73 | file_score = sum(weight * len(glob.glob(os.path.join(root, pattern))) 74 | for pattern, weight in file_patterns.items()) 75 | depth = len(os.path.relpath(root, start_path).split(os.sep)) 76 | depth_penalty = depth * 0.2 77 | total_score = dir_score + file_score - depth_penalty 78 | 79 | if any(x in root.split(os.sep) for x in {'modules', 'kernel', 'drivers'}): 80 | continue 81 | if any(term in root.lower() for term in {'extracted', 'unpacked', 'temp'}): 82 | total_score *= 0.3 83 | 84 | if total_score >= min_score and (total_score > best['score'] or 85 | (total_score == best['score'] and depth > best['depth'])): 86 | best.update({'path': root, 'score': total_score, 'depth': depth}) 87 | 88 | return best['path'] if best['score'] >= min_score else None 89 | 90 | 91 | def analyze_firmware_content(firmware_dir: str, save_path: str, binwalk_report: str = ""): 92 | start_time = time.time() 93 | 94 | if not os.path.isdir(firmware_dir): 95 | raise ValueError(f"Invalid firmware directory: {firmware_dir}") 96 | os.makedirs(save_path, exist_ok=True) 97 | 98 | logger = LogManager.get_logger('FirmwareContentAnalyzer', os.path.join(save_path, "explore.log")) 99 | logger.info(f"Analyzing firmware in: {firmware_dir}") 100 | logger.info(f"Saving results to: {save_path}") 101 | 102 | with open(os.path.join(os.path.dirname(__file__), 'requirements.yaml'), encoding='utf-8') as f: 103 | prompts = yaml.safe_load(f) 104 | 105 | state = { 106 | "input": { 107 | "file_requirements": prompts['file_requirements']['user'], 108 | "file_findings_requirements": prompts['file_findings_requirements']['user'], 109 | "directory_requirements": prompts['directory_requirements']['user'], 110 | "security_report_template": prompts['security_report_template']['user'], 111 | "summary_template": prompts['summary_template']['user'], 112 | }, 113 | "current_dir": firmware_dir, 114 | "base_path": str(Path(firmware_dir)), 115 | "dir_data": {"files": [], "index": 0, "dir_path": ""}, 116 | "dir_stack": [], 117 | "response": {"thought": {"file": "", "reason": ""}, "action": "next"}, 118 | "scratchpad": [], 119 | "observation": "", 120 | "security_report_summary": binwalk_report[:10000], 121 | "save_path": save_path 122 | } 123 | 124 | report = explorer(state, max_steps=360) 125 | logging.info(f"Total analysis time: {time.time() - start_time:.2f} seconds") 126 | return report 127 | 128 | 129 | def process_firmware(input_path: str, output_path: str): 130 | logger = LogManager.get_logger('FirmwareProcessor') 131 | firmware_name = os.path.splitext(os.path.basename(input_path))[0] 132 | base_dir = os.path.join(output_path, firmware_name) 133 | analysis_dir = os.path.join(base_dir, "log") 134 | extraction_dir = os.path.join(base_dir, "extracted") 135 | 136 | os.makedirs(analysis_dir, exist_ok=True) 137 | os.makedirs(extraction_dir, exist_ok=True) 138 | LogManager.setup(analysis_dir) 139 | 140 | binwalk_report = "" 141 | if os.path.isdir(input_path): 142 | logger.info(f"Analyzing extracted firmware directory: {input_path}") 143 | root = find_firmware_root(input_path) 144 | else: 145 | if not os.path.isfile(input_path): 146 | raise ValueError(f"Invalid firmware file: {input_path}") 147 | logger.info(f"Extracting firmware: {input_path} -> {extraction_dir}") 148 | binwalk_report = extract_firmware_with_binwalk(input_path, extraction_dir) 149 | root = find_firmware_root(extraction_dir) 150 | 151 | if not root: 152 | raise ValueError("Could not locate valid filesystem root directory") 153 | 154 | logger.info(f"Found filesystem root at: {root}") 155 | return analyze_firmware_content(root, analysis_dir, binwalk_report) 156 | 157 | 158 | def main(firmware_path, save_path): 159 | try: 160 | report = process_firmware(firmware_path, save_path) 161 | print(f"\nAnalysis complete. Results saved to: {save_path}") 162 | return report 163 | except Exception as e: 164 | print(f"Error during firmware analysis: {str(e)}") 165 | raise 166 | 167 | 168 | if __name__ == "__main__": 169 | parser = argparse.ArgumentParser(description='Firmware Analysis Tool') 170 | parser.add_argument('firmware_path', help='Path to firmware file or extracted firmware directory') 171 | parser.add_argument('save_path', help='Path to save analysis results') 172 | args = parser.parse_args() 173 | main(args.firmware_path, args.save_path) 174 | -------------------------------------------------------------------------------- /firmanalyzer/tools.py: -------------------------------------------------------------------------------- 1 | import re 2 | import subprocess 3 | import logging 4 | import os 5 | import shutil 6 | from typing import Dict, List 7 | from collections import defaultdict 8 | from math import log2 9 | 10 | logging.basicConfig( 11 | level=logging.INFO, 12 | format='%(asctime)s [%(levelname)s] %(message)s', 13 | handlers=[logging.StreamHandler()] 14 | ) 15 | 16 | PROCESS_TIMEOUT = 30 17 | MAX_FILE_SIZE = 100 * 1024 * 1024 # 100MB 18 | 19 | VERSION_PATTERNS = [ 20 | r'(?i)\b(?:v|version)?(?:0|[1-9]\d*[a-z]*)(?:\.(?:0|[1-9]\d*[a-z]*)){1,3}' 21 | r'(?:-(?:0|[1-9]\d*|\d*[a-z-][0-9a-z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-z-][0-9a-z-]*))*)?' 22 | r'(?:\+[0-9a-z-]+(?:\.[0-9a-z-]+)*)?\b', 23 | 24 | r'\b(?:20\d{2}(?:[-_./]?(?:0[1-9]|1[0-2]|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec))' 25 | r'{1,2}(?:[-_./]?(?:0[1-9]|[12][0-9]|3[01]))?)\b', 26 | 27 | r'(?i)\b(?:build|bld|rev|r|rel|release)[-_]?(?:\d+[a-z]*|\d+\.\d+(?:\.\d+){0,2})\b', 28 | 29 | r'\b(?:\d+\.){3}\d+\b', 30 | 31 | r'(?i)\b(?:lib|dll|so|module)[-_]?(?:[a-z0-9]+[-_])?v?\d+(?:\.\d+){1,3}[a-z0-9-]*\b', 32 | 33 | r'\b(?:0x)?[0-9a-f]{4,8}(?:[-_.][0-9a-f]{4,8}){1,3}\b', 34 | 35 | r'\b(?:[a-z]+-?)(?:20\d{2}[a-z]?|\d+[a-z]{2,})\b', 36 | 37 | r'(?i)\b[a-z0-9_-]+[/-]v?\d+\.\d+(?:\.\d+)*[a-z0-9.-]*\b', 38 | 39 | r'(?i)\bv?\d+\.\d+(?:\.\d+)*[-_]?[a-z]+\d*\b', 40 | 41 | r'\b\d+\.\d+\.\d+[a-z]+\b' 42 | ] 43 | 44 | SENSITIVE_PATTERNS = { 45 | 'password': r'(?i)\b(?:password|passwd|pwd)\s*[:=]\s*["\']?([^\s"\']{8,})["\']?', 46 | 'api_key': r'(?i)\b(?:api[_-]?key|secret[_-]?key)\s*[:=]\s*["\']?([a-f0-9]{16,}|[A-Za-z0-9+/]{32,})["\']?', 47 | 'token': r'(?i)\b(?:access[_-]?token|auth[_-]?token|bearer)\s*[:=]\s*["\']?([a-f0-9]{32,}|eyJ[\w-]*\.[\w-]*\.[\w-]*)["\']?', 48 | 'url': r'(?i)\b(?:https?|ftp)://(?:[^\s:@/]+(?::[^\s@/]*)?@)?(?:[a-z0-9-]+\.)+[a-z]{2,}\b(?:/[^\s"\']*)?', 49 | 'ip': r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(?::\d{1,5})?\b', 50 | 'email': r'\b[\w.%+-]+@[a-z0-9.-]+\.[a-z]{2,}\b', 51 | 'firmware_creds': r'(?i)\b(?:admin|root|user)\s*[=:]\s*["\']?(?:admin|root|password|12345|zte521)["\']?', 52 | 'private_key': r'-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----', 53 | 'encrypted_data': r'\b(?:AES|DES|3DES|BLOWFISH)[-_]?(?:KEY|IV)\s*[=:]\s*["\']?[0-9a-fA-F]{16,}["\']?', 54 | 'debug_interface': r'(?i)\b(?:uart|jtag|console)\s*[=:]\s*\d+', 55 | 'hidden_service': r'(?i)\b(?:backdoor|secret)_(?:port|service)\s*[=:]\s*\d+', 56 | 'suspicious_path': r'/(?:etc|tmp|var)/(?:passwd|shadow|secret)[^\s"\']*', 57 | 'base64_data': r'(?:[A-Za-z0-9+/]{4}){20,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?' 58 | } 59 | 60 | class BinaryAnalyzer: 61 | def __init__(self, file_path: str): 62 | self.file_path = self._validate_file(file_path) 63 | self.strings_cache = None 64 | self._common_false_positives = { 65 | 'example.com', 'localhost', 'test.com', 66 | 'dummy', 'test', 'example', 'sample', 67 | 'changeme', 'placeholder', 'TODO', 'FIXME' 68 | } 69 | 70 | @staticmethod 71 | def _validate_file(path: str) -> str: 72 | if not os.path.exists(path): 73 | raise ValueError(f"File does not exist: {path}") 74 | if os.path.islink(path): 75 | raise ValueError("Symbolic links are not supported") 76 | if not os.path.isfile(path): 77 | raise ValueError("Regular file required") 78 | if os.path.getsize(path) > MAX_FILE_SIZE: 79 | raise ValueError("File size exceeds limit") 80 | return os.path.abspath(path) 81 | 82 | def _get_strings_output(self) -> str: 83 | if self.strings_cache is not None: 84 | return self.strings_cache 85 | 86 | try: 87 | result = subprocess.run( 88 | ['strings', self.file_path], 89 | stdout=subprocess.PIPE, 90 | stderr=subprocess.PIPE, 91 | timeout=PROCESS_TIMEOUT 92 | ) 93 | if result.returncode != 0: 94 | logging.warning(f"strings command failed: {result.stderr.decode()[:200]}") 95 | return "" 96 | 97 | self.strings_cache = result.stdout.decode('utf-8', errors='ignore') 98 | return self.strings_cache 99 | except Exception as e: 100 | logging.error(f"Failed to execute strings: {str(e)}") 101 | return "" 102 | 103 | def _safe_grep(self, pattern: str, data: str) -> List[Dict[str, str]]: 104 | try: 105 | compiled = re.compile(pattern) 106 | matches = [] 107 | for line in data.splitlines(): 108 | match = compiled.search(line) 109 | if match: 110 | matches.append({ 111 | 'line': line, 112 | 'match': match.group() 113 | }) 114 | return matches 115 | except re.error as e: 116 | logging.warning(f"Invalid regex pattern {pattern}: {str(e)}") 117 | return [] 118 | 119 | def _is_false_positive(self, s: str) -> bool: 120 | s_lower = s.lower() 121 | return any(fp in s_lower for fp in self._common_false_positives) or len(set(s)) < 4 122 | 123 | def extract_versions(self) -> List[Dict[str, str]]: 124 | versions = [] 125 | strings_data = self._get_strings_output() 126 | 127 | for pattern in VERSION_PATTERNS: 128 | matches = self._safe_grep(pattern, strings_data) 129 | for ver in matches: 130 | if len(ver['match']) > 4 and not self._is_false_positive(ver['match']): 131 | versions.append(ver) 132 | 133 | # 按匹配长度排序,并去重 134 | unique_versions = [] 135 | seen_matches = set() 136 | for v in sorted(versions, key=lambda x: len(x['match']), reverse=True): 137 | if v['match'] not in seen_matches: 138 | seen_matches.add(v['match']) 139 | unique_versions.append(v) 140 | 141 | return unique_versions[:20] # 限制返回数量 142 | 143 | def analyze_sensitive_info(self) -> Dict[str, List[Dict[str, str]]]: 144 | results = defaultdict(list) 145 | strings_data = self._get_strings_output() 146 | 147 | for info_type, pattern in SENSITIVE_PATTERNS.items(): 148 | matches = self._safe_grep(pattern, strings_data) 149 | filtered = [m for m in matches if not self._is_false_positive(m['match'])] 150 | 151 | if info_type == 'base64_data': 152 | filtered = [m for m in filtered if self._validate_base64(m['match'])] 153 | 154 | results[info_type].extend(filtered[:20]) 155 | 156 | return dict(results) 157 | 158 | def _validate_base64(self, s: str) -> bool: 159 | import base64 160 | try: 161 | if len(s) % 4 != 0: 162 | return False 163 | base64.b64decode(s) 164 | return True 165 | except: 166 | return False 167 | 168 | def analyze_elf_info(self) -> Dict[str, List[str]]: 169 | elf_info = defaultdict(list) 170 | 171 | if not shutil.which('objdump'): 172 | logging.warning("objdump not available, skipping ELF analysis") 173 | return dict(elf_info) 174 | 175 | try: 176 | section_meta = subprocess.run( 177 | ['objdump', '-h', self.file_path], 178 | stdout=subprocess.PIPE, 179 | timeout=PROCESS_TIMEOUT 180 | ).stdout.decode(errors='ignore') 181 | 182 | section_strategies = { 183 | '.rodata': {'min_length': 6, 'filters': [self._is_version_like, self._is_credential_like], 'max_items': 50}, 184 | '.data': {'min_length': 8, 'filters': [self._is_config_like], 'max_items': 30}, 185 | '.comment': {'min_length': 4, 'filters': [self._is_compiler_info], 'max_items': 20} 186 | } 187 | 188 | for section, strategy in section_strategies.items(): 189 | if f"{section} " in section_meta: 190 | content = subprocess.run( 191 | ['objdump', '-s', '-j', section, self.file_path], 192 | stdout=subprocess.PIPE, 193 | timeout=PROCESS_TIMEOUT 194 | ).stdout.decode(errors='ignore') 195 | 196 | candidates = self._extract_meaningful_strings(content, strategy['min_length']) 197 | filtered = [s.strip() for s in candidates if any(f(s.strip()) for f in strategy['filters'])] 198 | if filtered: 199 | elf_info[section] = list(dict.fromkeys(filtered))[:strategy['max_items']] 200 | 201 | return dict(elf_info) 202 | except Exception as e: 203 | logging.error(f"ELF analysis failed: {str(e)}") 204 | return dict(elf_info) 205 | 206 | def _extract_meaningful_strings(self, content: str, min_length: int = 8) -> List[str]: 207 | base_strings = re.findall(fr'[\x20-\x7E]{{{min_length},}}', content) 208 | entropy_filtered = [s for s in base_strings if self._calculate_entropy(s) > 2.5] 209 | structure_patterns = [ 210 | r'^[A-Za-z0-9][A-Za-z0-9_.+-]*$', 211 | r'^[A-Za-z]+(?:\s[A-Za-z]+)*$', 212 | r'^v?\d+\.\d+', 213 | ] 214 | return [s for s in entropy_filtered if any(re.match(p, s) for p in structure_patterns)] 215 | 216 | def _calculate_entropy(self, s: str) -> float: 217 | freq = defaultdict(int) 218 | for c in s: 219 | freq[c] += 1 220 | entropy, total = 0.0, len(s) 221 | for count in freq.values(): 222 | p = count / total 223 | entropy -= p * log2(p) 224 | return entropy 225 | 226 | def _is_version_like(self, s: str) -> bool: 227 | return any(re.search(p, s) for p in VERSION_PATTERNS) 228 | 229 | def _is_credential_like(self, s: str) -> bool: 230 | return any(re.search(p, s) for p in SENSITIVE_PATTERNS.values()) 231 | 232 | def _is_config_like(self, s: str) -> bool: 233 | config_patterns = [ 234 | r'^[A-Za-z_][A-Za-z0-9_]*=', 235 | r'^\w+://', 236 | r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?::\d+)?$' 237 | ] 238 | return any(re.match(p, s) for p in config_patterns) 239 | 240 | def _is_compiler_info(self, s: str) -> bool: 241 | compiler_keywords = {'GCC', 'clang', 'MSVC', 'build', 'optimize', 'version', 'target', 'configure'} 242 | return any(kw in s for kw in compiler_keywords) 243 | 244 | def full_analysis(self) -> Dict: 245 | return { 246 | 'file_info': { 247 | 'path': self.file_path, 248 | 'size': os.path.getsize(self.file_path), 249 | 'sha256': self._calculate_hash(), 250 | }, 251 | 'versions': self.extract_versions(), 252 | 'sensitive_info': self.analyze_sensitive_info(), 253 | 'elf_sections': self.analyze_elf_info() 254 | } 255 | 256 | def _calculate_hash(self) -> str: 257 | try: 258 | result = subprocess.run( 259 | ['sha256sum', self.file_path], 260 | stdout=subprocess.PIPE, 261 | timeout=PROCESS_TIMEOUT 262 | ) 263 | return result.stdout.decode().split()[0] 264 | except Exception as e: 265 | logging.warning(f"Hash calculation failed: {str(e)}") 266 | return "" 267 | 268 | -------------------------------------------------------------------------------- /firmcrawler/Examples/dlink/navigation_links.jsonl: -------------------------------------------------------------------------------- 1 | {"text": "COVR AC2200 Wi-Fi System 2-pack", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-2202-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 2 | {"text": "COVR-3902-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-3902-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 3 | {"text": "AC3900 Whole Home Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-3902-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 4 | {"text": "COVR-3902-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-3902-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 5 | {"text": "AC3900 Whole Home Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-3902-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 6 | {"text": "DES-1008E", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1330", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 7 | {"text": "8-Port 10/100 Unmanaged Desktop Switch", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1330", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 8 | {"text": "DES-105", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1330", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 9 | {"text": "5 Port 10/100 Desktop Switch", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1330", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 10 | {"text": "DFE-538TX", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1530-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 11 | {"text": "COVR-C1203-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-C1203-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 12 | {"text": "Dual Band Whole Home Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-C1203-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 13 | {"text": "COVR-C1203-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-C1203-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 14 | {"text": "Dual Band Whole Home Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-C1203-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 15 | {"text": "COVR-C1213", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-C1213", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 16 | {"text": "Dual Band Whole Home Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-C1213", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 17 | {"text": "COVR-L1900", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-L1900", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 18 | {"text": "AC1900 Whole Home Mesh Wi-Fi Router/Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-L1900", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 19 | {"text": "COVR-P2502-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-P2502-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 20 | {"text": "Hybrid Powerline Whole Home Mesh Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-P2502-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 21 | {"text": "COVR-R2203", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-R2203", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 22 | {"text": "Tri Band Whole Home Mesh Wi-Fi System", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-R2203", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 23 | {"text": "COVR-X1862-CA/COVR-X1863-CA", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-X1862-CA%2FCOVR-X1863-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 24 | {"text": "COVR-X1862-CA/COVR-X1863-CA", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-X1862-CA%2FCOVR-X1863-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 25 | {"text": "AX1800 Mesh Wi-Fi 6 System (2PK & 3PK)", "url": "https://support.dlink.com/ProductInfo.aspx?m=COVR-X1862-CA%2FCOVR-X1863-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 26 | {"text": "DAP-1325", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1325", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 27 | {"text": "N300 Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1325", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 28 | {"text": "DAP-1325-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1325-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 29 | {"text": "Wi-Fi N300 Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1325-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 30 | {"text": "DAP-1610-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1610-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 31 | {"text": "AC1200 Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1610-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 32 | {"text": "DAP-1620", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1620", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 33 | {"text": "AC1200 Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1620", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 34 | {"text": "DAP-1620-EXO", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1620-EXO", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 35 | {"text": "AC1200 Wi‑Fi Exo Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1620-EXO", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 36 | {"text": "DAP-1650", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1650", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 37 | {"text": "Wireless AC1200 Dual Band Gigabit Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1650", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 38 | {"text": "DAP-1650", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1650", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 39 | {"text": "Wireless AC1200 Dual Band Gigabit Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1650", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 40 | {"text": "DAP-1720", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1720", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 41 | {"text": "AC1750 Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1720", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 42 | {"text": "DAP-1755-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1755-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 43 | {"text": "AC1750 Mesh Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1755-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 44 | {"text": "DAP-1820-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1820-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 45 | {"text": "AC2000 Dual Band Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1820-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 46 | {"text": "DAP-1950-CA", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1950-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 47 | {"text": "AC1900 High-Performance Mesh Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1950-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 48 | {"text": "DAP-1955-US", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1955-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 49 | {"text": "AC1900 Mesh Wi-Fi Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-1955-US", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 50 | {"text": "DAP-LX1880-CA", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-LX1880-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 51 | {"text": "AX1800 Mesh Wi-Fi 6 Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-LX1880-CA", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 52 | {"text": "DAP-X1870", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-X1870", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 53 | {"text": "AX1800 Mesh Wi-Fi 6 Range Extender", "url": "https://support.dlink.com/ProductInfo.aspx?m=DAP-X1870", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 54 | {"text": "DCH-S150", "url": "https://support.dlink.com/ProductInfo.aspx?m=DCH-S150", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 55 | {"text": "mydlink Wi-Fi Smart Motion Sensor", "url": "https://support.dlink.com/ProductInfo.aspx?m=DCH-S150", "source_page": "https://support.dlink.com/AllPro.aspx", "original_href": "javascript:void(0);"} 56 | -------------------------------------------------------------------------------- /firmcrawler/Examples/dlink/screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/dlink/screenshot_1.png -------------------------------------------------------------------------------- /firmcrawler/Examples/dlink/screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/dlink/screenshot_2.png -------------------------------------------------------------------------------- /firmcrawler/Examples/dlink/screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/dlink/screenshot_3.png -------------------------------------------------------------------------------- /firmcrawler/Examples/dlink/screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/dlink/screenshot_4.png -------------------------------------------------------------------------------- /firmcrawler/Examples/dlink/screenshot_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/dlink/screenshot_5.png -------------------------------------------------------------------------------- /firmcrawler/Examples/openwrt/screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/openwrt/screenshot_1.png -------------------------------------------------------------------------------- /firmcrawler/Examples/openwrt/screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/openwrt/screenshot_2.png -------------------------------------------------------------------------------- /firmcrawler/Examples/openwrt/screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/openwrt/screenshot_3.png -------------------------------------------------------------------------------- /firmcrawler/Examples/openwrt/screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/openwrt/screenshot_4.png -------------------------------------------------------------------------------- /firmcrawler/Examples/openwrt/screenshot_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/openwrt/screenshot_5.png -------------------------------------------------------------------------------- /firmcrawler/Examples/tp-link/screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/tp-link/screenshot_1.png -------------------------------------------------------------------------------- /firmcrawler/Examples/tp-link/screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/tp-link/screenshot_2.png -------------------------------------------------------------------------------- /firmcrawler/Examples/tp-link/screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/tp-link/screenshot_3.png -------------------------------------------------------------------------------- /firmcrawler/Examples/tp-link/screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/tp-link/screenshot_4.png -------------------------------------------------------------------------------- /firmcrawler/Examples/tp-link/screenshot_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/tp-link/screenshot_5.png -------------------------------------------------------------------------------- /firmcrawler/Examples/ui/screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/ui/screenshot_1.png -------------------------------------------------------------------------------- /firmcrawler/Examples/ui/screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/ui/screenshot_2.png -------------------------------------------------------------------------------- /firmcrawler/Examples/ui/screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/ui/screenshot_3.png -------------------------------------------------------------------------------- /firmcrawler/Examples/ui/screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/ui/screenshot_4.png -------------------------------------------------------------------------------- /firmcrawler/Examples/ui/screenshot_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/ui/screenshot_5.png -------------------------------------------------------------------------------- /firmcrawler/Examples/zyxel/navigation_links.jsonl: -------------------------------------------------------------------------------- 1 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/28-port-10gbe-l3-aggregation-switch-xs3800-28", "source_page": "https://selector.zyxel.com/"} 2 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 3 | {"text": "", "url": "https://selector.zyxel.com/type/3", "source_page": "https://selector.zyxel.com/"} 4 | {"text": "", "url": "https://selector.zyxel.com/type/4", "source_page": "https://selector.zyxel.com/"} 5 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/28-port-10gbe-l3-aggregation-switch-xs3800-28", "source_page": "https://selector.zyxel.com/"} 6 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 7 | {"text": "", "url": "https://www.zyxel.com/global/en/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 8 | {"text": "", "url": "https://selector.zyxel.com/type/3", "source_page": "https://selector.zyxel.com/"} 9 | {"text": "", "url": "https://selector.zyxel.com/type/4", "source_page": "https://selector.zyxel.com/"} 10 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/28-port-10gbe-l3-aggregation-switch-xs3800-28", "source_page": "https://selector.zyxel.com/"} 11 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 12 | {"text": "", "url": "https://www.zyxel.com/global/en/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 13 | {"text": "", "url": "https://selector.zyxel.com/type/3", "source_page": "https://selector.zyxel.com/"} 14 | {"text": "", "url": "https://selector.zyxel.com/type/4", "source_page": "https://selector.zyxel.com/"} 15 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/28-port-10gbe-l3-aggregation-switch-xs3800-28", "source_page": "https://selector.zyxel.com/"} 16 | {"text": "", "url": "https://www.zyxel.com/us/en-us/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 17 | {"text": "", "url": "https://www.zyxel.com/global/en/products/switch/10-12-port-10g-multi-gigabit-lite-l3-smart-managed-switch-xs1930-series", "source_page": "https://selector.zyxel.com/"} 18 | -------------------------------------------------------------------------------- /firmcrawler/Examples/zyxel/screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/zyxel/screenshot_1.png -------------------------------------------------------------------------------- /firmcrawler/Examples/zyxel/screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/zyxel/screenshot_2.png -------------------------------------------------------------------------------- /firmcrawler/Examples/zyxel/screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/zyxel/screenshot_3.png -------------------------------------------------------------------------------- /firmcrawler/Examples/zyxel/screenshot_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/zyxel/screenshot_4.png -------------------------------------------------------------------------------- /firmcrawler/Examples/zyxel/screenshot_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhang-xr/FirmLLM/8dc567bdfa7fa514f1138725c1195261fc8aa041/firmcrawler/Examples/zyxel/screenshot_5.png -------------------------------------------------------------------------------- /firmcrawler/README.md: -------------------------------------------------------------------------------- 1 | ```mermaid 2 | sequenceDiagram 3 | participant User 4 | participant Controller 5 | participant Inspector 6 | participant Explorer 7 | participant Scraper 8 | 9 | User->>Controller: Input target website URL 10 | Controller->>Inspector: Start page diagnosis 11 | Inspector-->>Controller: Return SCRAPER/EXPLORER/ALL 12 | alt SCRAPER mode 13 | Controller->>Scraper: Execute deep data scraping 14 | Scraper-->>User: Structured firmware dataset 15 | else EXPLORER mode 16 | Controller->>Explorer: Start URL mining 17 | Explorer-->>Controller: Product URL list 18 | Controller->>Inspector: Recursive diagnosis for each URL 19 | else ALL mode 20 | Controller->>Scraper: Launch data scraping in parallel 21 | Controller->>Explorer: Synchronously start URL discovery 22 | end 23 | ``` -------------------------------------------------------------------------------- /firmcrawler/callback.py: -------------------------------------------------------------------------------- 1 | from langchain_core.callbacks import BaseCallbackHandler 2 | from typing import Dict, Any 3 | import json 4 | class TokenUsageCallbackHandler(BaseCallbackHandler): 5 | def __init__(self, save_path): 6 | super().__init__() 7 | self.total_prompt_tokens = 0 8 | self.total_completion_tokens = 0 9 | self.total_cost = 0 10 | self.save_path = save_path 11 | 12 | def on_llm_end(self, response: Any, **kwargs: Any) -> None: 13 | try: 14 | if hasattr(response, 'llm_output') and response.llm_output: 15 | token_usage = response.llm_output.get('token_usage', {}) 16 | 17 | prompt_tokens = token_usage.get('prompt_tokens', 0) 18 | completion_tokens = token_usage.get('completion_tokens', 0) 19 | 20 | input_cost = prompt_tokens * (2.50 / 1_000_000) 21 | output_cost = completion_tokens * (10.00 / 1_000_000) 22 | 23 | self.total_prompt_tokens += prompt_tokens 24 | self.total_completion_tokens += completion_tokens 25 | self.total_cost += input_cost + output_cost 26 | 27 | print(f"\n📊 [Token Usage for this call]") 28 | print(f"Input tokens: {prompt_tokens:,} (${input_cost:.4f})") 29 | print(f"Output tokens: {completion_tokens:,} (${output_cost:.4f})") 30 | print(f"Total tokens: {prompt_tokens + completion_tokens:,}") 31 | print(f"Running total cost: ${self.total_cost:.4f}") 32 | with open(self.save_path, 'w') as f: 33 | json.dump({ 34 | "total_prompt_tokens": self.total_prompt_tokens, 35 | "total_completion_tokens": self.total_completion_tokens, 36 | "total_cost": self.total_cost 37 | }, f) 38 | except Exception as e: 39 | print(f"Error in token usage calculation: {str(e)}") 40 | print("Response structure:", dir(response)) 41 | if hasattr(response, 'llm_output'): 42 | print("LLM output:", response.llm_output) -------------------------------------------------------------------------------- /firmcrawler/mark_page.js: -------------------------------------------------------------------------------- 1 | const customCSS = ` 2 | ::-webkit-scrollbar { 3 | width: 10px; 4 | } 5 | ::-webkit-scrollbar-track { 6 | background: #27272a; 7 | } 8 | ::-webkit-scrollbar-thumb { 9 | background: #888; 10 | border-radius: 0.375rem; 11 | } 12 | ::-webkit-scrollbar-thumb:hover { 13 | background: #555; 14 | } 15 | `; 16 | 17 | const styleTag = document.createElement("style"); 18 | styleTag.textContent = customCSS; 19 | document.head.append(styleTag); 20 | 21 | let labels = []; 22 | 23 | function unmarkPage() { 24 | // Unmark page logic 25 | for (const label of labels) { 26 | document.body.removeChild(label); 27 | } 28 | labels = []; 29 | } 30 | 31 | function markPage() { 32 | unmarkPage(); 33 | 34 | var bodyRect = document.body.getBoundingClientRect(); 35 | 36 | var items = Array.prototype.slice 37 | .call(document.querySelectorAll("*")) 38 | .map(function (element) { 39 | var vw = Math.max( 40 | document.documentElement.clientWidth || 0, 41 | window.innerWidth || 0 42 | ); 43 | var vh = Math.max( 44 | document.documentElement.clientHeight || 0, 45 | window.innerHeight || 0 46 | ); 47 | var textualContent = element.textContent.trim().replace(/\s{2,}/g, " "); 48 | var elementType = element.tagName.toLowerCase(); 49 | var ariaLabel = element.getAttribute("aria-label") || ""; 50 | 51 | var rects = [...element.getClientRects()] 52 | .filter((bb) => { 53 | var center_x = bb.left + bb.width / 2; 54 | var center_y = bb.top + bb.height / 2; 55 | var elAtCenter = document.elementFromPoint(center_x, center_y); 56 | 57 | return elAtCenter === element || element.contains(elAtCenter); 58 | }) 59 | .map((bb) => { 60 | const rect = { 61 | left: Math.max(0, bb.left), 62 | top: Math.max(0, bb.top), 63 | right: Math.min(vw, bb.right), 64 | bottom: Math.min(vh, bb.bottom), 65 | }; 66 | return { 67 | ...rect, 68 | width: rect.right - rect.left, 69 | height: rect.bottom - rect.top, 70 | }; 71 | }); 72 | 73 | var area = rects.reduce((acc, rect) => acc + rect.width * rect.height, 0); 74 | 75 | return { 76 | element: element, 77 | include: 78 | element.tagName === "SELECT" || 79 | element.tagName === "BUTTON" || 80 | element.tagName === "A" || 81 | element.onclick != null || 82 | window.getComputedStyle(element).cursor == "pointer" || 83 | element.tagName === "IFRAME", 84 | area, 85 | rects, 86 | text: textualContent, 87 | type: elementType, 88 | ariaLabel: ariaLabel, 89 | }; 90 | }) 91 | .filter((item) => item.include && item.area >= 80); 92 | 93 | // Only keep inner clickable items 94 | items = items.filter( 95 | (x) => !items.some((y) => x.element.contains(y.element) && !(x == y)) 96 | ); 97 | // Color scheme to distinguish valid links and normal A tags 98 | const COLOR_SCHEME = { 99 | 'a-link': "#FF4444", // Valid links in red 100 | 'a': "#FFA07A", // Normal A tags in light red 101 | 'button': "#4CAF50", // Buttons in green 102 | 'select': "#9C27B0", // Select boxes in purple 103 | 'iframe': "#2196F3", // Iframes in blue 104 | 'clickable': "#FF9800", // Other clickable elements in orange 105 | }; 106 | // Modify color logic in items.forEach 107 | items.forEach(function (item, index) { 108 | item.rects.forEach((bbox) => { 109 | newElement = document.createElement("div"); 110 | 111 | // Get element type and color 112 | const elementType = item.element.tagName.toLowerCase(); 113 | let borderColor; 114 | 115 | if (elementType === 'a') { 116 | // Check if it's a valid link 117 | const href = item.element.getAttribute('href'); 118 | const isValidLink = href && href !== '#' && href !== ''; 119 | borderColor = isValidLink ? COLOR_SCHEME['a-link'] : COLOR_SCHEME['a']; 120 | } else { 121 | borderColor = COLOR_SCHEME[elementType] || 122 | (item.element.onclick || window.getComputedStyle(item.element).cursor === "pointer" 123 | ? COLOR_SCHEME.clickable 124 | : "#757575"); 125 | } 126 | 127 | newElement.style.outline = `1px dashed ${borderColor}`; 128 | newElement.style.position = "fixed"; 129 | newElement.style.left = bbox.left + "px"; 130 | newElement.style.top = bbox.top + "px"; 131 | newElement.style.width = bbox.width + "px"; 132 | newElement.style.height = bbox.height + "px"; 133 | newElement.style.pointerEvents = "none"; 134 | newElement.style.boxSizing = "border-box"; 135 | newElement.style.zIndex = 2147483647; 136 | 137 | // Use the same color for the label 138 | var label = document.createElement("span"); 139 | label.textContent = index; 140 | label.style.position = "absolute"; 141 | label.style.top = "-19px"; 142 | label.style.left = "0px"; 143 | label.style.background = borderColor; 144 | label.style.color = "white"; 145 | label.style.padding = "2px 4px"; 146 | label.style.fontSize = "12px"; 147 | label.style.borderRadius = "2px"; 148 | newElement.appendChild(label); 149 | 150 | document.body.appendChild(newElement); 151 | labels.push(newElement); 152 | }); 153 | }); 154 | const coordinates = items.flatMap((item) => 155 | item.rects.map(({ left, top, width, height }) => ({ 156 | x: (left + left + width) / 2, 157 | y: (top + top + height) / 2, 158 | type: item.type, 159 | text: item.text, 160 | ariaLabel: item.ariaLabel, 161 | })) 162 | ); 163 | return coordinates; 164 | } 165 | -------------------------------------------------------------------------------- /firmcrawler/prompts/dlink.yaml: -------------------------------------------------------------------------------- 1 | inspector: 2 | user: | 3 | Workflow Examples: 4 | {{ 5 | "thought": "Found direct firmware download links (.bin files) with version numbers", 6 | "action": "ANSWER SCRAPER", 7 | "status": "complete", 8 | "next_step": "Direct download links found, suitable for Scraper" 9 | }} 10 | {{ 11 | "thought": "Page shows product list with 'Download' section for each, but needs additional navigation", 12 | "action": "ANSWER EXPLORER", 13 | "status": "complete", 14 | "next_step": "Multiple products with download sections need exploration" 15 | }} 16 | {{ 17 | "thought": "Page contains both product info and firmware download section", 18 | "action": "ANSWER ALL", 19 | "status": "complete", 20 | "next_step": "Mixed content needs both Scraper and Explorer" 21 | }} 22 | {{ 23 | "thought": "Found firmware table with version info but download links need verification", 24 | "action": "ANSWER ALL", 25 | "status": "complete", 26 | "next_step": "Firmware info present but needs further processing" 27 | }} 28 | {{ 29 | "thought": "Page shows product specs with firmware update button", 30 | "action": "ANSWER ALL", 31 | "status": "complete", 32 | "next_step": "Contains both product info and firmware update option" 33 | }} 34 | {{ 35 | "thought": "Download section visible but links go to selection page", 36 | "action": "ANSWER EXPLORER", 37 | "status": "complete", 38 | "next_step": "Downloads require further navigation" 39 | }} 40 | 41 | # URL explorer - Collects product page URLs 42 | explorer: 43 | user: | 44 | Workflow Examples: 45 | {{ 46 | "thought": "Starting from page top, collecting initial viewport links", 47 | "action": "GET_NAVIGATION_LINKS 1,2,3,4,5", 48 | "status": "continue", 49 | "next_step": "After collecting initial links, scroll for more content" 50 | }} 51 | {{ 52 | "thought": "Current viewport processed, scrolling down for more content", 53 | "action": "SCROLL WINDOW; down", 54 | "status": "continue", 55 | "next_step": "Wait for new content to load after scrolling" 56 | }} 57 | {{ 58 | "thought": "Waiting for new content to load after scroll", 59 | "action": "WAIT 2", 60 | "status": "continue", 61 | "next_step": "Process newly visible links in viewport" 62 | }} 63 | {{ 64 | "thought": "Processing next set of links in new viewport", 65 | "action": "GET_NAVIGATION_LINKS 6,7,8,9,10", 66 | "status": "continue", 67 | "next_step": "Check for more content by scrolling" 68 | }} 69 | {{ 70 | "thought": "Scrolling to check for more content", 71 | "action": "SCROLL WINDOW; down", 72 | "status": "continue", 73 | "next_step": "Wait to verify if new content appears" 74 | }} 75 | {{ 76 | "thought": "Waiting to verify no new content", 77 | "action": "WAIT 2", 78 | "status": "continue", 79 | "next_step": "Perform final scroll to confirm bottom" 80 | }} 81 | {{ 82 | "thought": "Performing final scroll to confirm bottom reached", 83 | "action": "SCROLL WINDOW; down", 84 | "status": "continue", 85 | "next_step": "Wait for final verification" 86 | }} 87 | {{ 88 | "thought": "Verification complete: Bottom reached, no new content, all links collected", 89 | "action": "ANSWER task completion status", 90 | "status": "complete", 91 | "next_step": "Task finished, all links collected" 92 | }} 93 | scraper: 94 | user: | 95 | Note that you start from the top of the page, so no need to scroll up. Make sure to reach the bottom before submitting ANSWER. 96 | Begin scrolling down to load all content. Process each viewport systematically. 97 | 98 | Example workflow: 99 | {{ 100 | "thought": "Starting from page top, scanning for firmware download sections", 101 | "action": "GET_NAVIGATION_LINKS 1,2,3", 102 | "status": "continue", 103 | "next_step": "After collecting current info, scroll down for more content" 104 | }} 105 | 106 | {{ 107 | "thought": "Firmware info from current viewport collected, continuing down", 108 | "action": "SCROLL WINDOW; down", 109 | "status": "continue", 110 | "next_step": "Process newly visible firmware info" 111 | }} 112 | 113 | {{ 114 | "thought": "Reached page bottom, all firmware info collected", 115 | "action": "ANSWER {collected_firmware_info}", 116 | "status": "complete", 117 | "next_step": "Task complete, all firmware info gathered" 118 | }} 119 | -------------------------------------------------------------------------------- /firmcrawler/prompts/foscam.yaml: -------------------------------------------------------------------------------- 1 | inspector: 2 | user: | 3 | Workflow Examples: 4 | {{ 5 | "thought": "Found direct firmware download links (.bin files) with version numbers", 6 | "action": "ANSWER SCRAPER", 7 | "status": "complete", 8 | "next_step": "Direct download links found, suitable for Scraper" 9 | }} 10 | {{ 11 | "thought": "Page shows product list with 'Download' section for each, but needs additional navigation", 12 | "action": "ANSWER EXPLORER", 13 | "status": "complete", 14 | "next_step": "Multiple products with download sections need exploration" 15 | }} 16 | {{ 17 | "thought": "Page contains both product info and firmware download section", 18 | "action": "ANSWER ALL", 19 | "status": "complete", 20 | "next_step": "Mixed content needs both Scraper and Explorer" 21 | }} 22 | {{ 23 | "thought": "Found firmware table with version info but download links need verification", 24 | "action": "ANSWER ALL", 25 | "status": "complete", 26 | "next_step": "Firmware info present but needs further processing" 27 | }} 28 | {{ 29 | "thought": "Page shows product specs with firmware update button", 30 | "action": "ANSWER ALL", 31 | "status": "complete", 32 | "next_step": "Contains both product info and firmware update option" 33 | }} 34 | {{ 35 | "thought": "Download section visible but links go to selection page", 36 | "action": "ANSWER EXPLORER", 37 | "status": "complete", 38 | "next_step": "Downloads require further navigation" 39 | }} 40 | 41 | explorer: 42 | user: | 43 | Important Task Requirements: 44 | - Must scroll to top of page when entering any new page 45 | - Must process all product images and their corresponding navigation links 46 | - Must scroll to the absolute bottom of each page 47 | - Must perform multiple scroll verifications to confirm true bottom is reached 48 | - Must ONLY click "Next Page" button (NEVER click "Previous Page") 49 | - Must process pages in forward order only (from first page to last page) 50 | - Must process content from top to bottom on each page 51 | - Only complete task after confirming no more content and no next page exists 52 | 53 | Workflow Examples: 54 | {{ 55 | "thought": "Start by ensuring we're at the top of the page", 56 | "action": "SCROLL WINDOW; up", 57 | "status": "continue", 58 | "next_step": "Begin processing from page top" 59 | }} 60 | {{ 61 | "thought": "Start scrolling down to load initial content", 62 | "action": "SCROLL WINDOW; down", 63 | "status": "continue", 64 | "next_step": "Wait for content to load then collect first set of product links" 65 | }} 66 | {{ 67 | "thought": "Processing first group of product images and their navigation links", 68 | "action": "GET_NAVIGATION_LINKS 1,2,3,4,5", 69 | "status": "continue", 70 | "next_step": "Scroll down to check for more products" 71 | }} 72 | {{ 73 | "thought": "More products visible, scroll to load them completely", 74 | "action": "SCROLL WINDOW; down", 75 | "status": "continue", 76 | "next_step": "Process next group of product links" 77 | }} 78 | {{ 79 | "thought": "Getting next set of product navigation links", 80 | "action": "GET_NAVIGATION_LINKS 6,7,8,9,10", 81 | "status": "continue", 82 | "next_step": "Continue scrolling to check for more content" 83 | }} 84 | {{ 85 | "thought": "Scroll to check for more content and next page button", 86 | "action": "SCROLL WINDOW; down", 87 | "status": "continue", 88 | "next_step": "Verify if next page exists" 89 | }} 90 | {{ 91 | "thought": "Found next page button (element 11), verifying it's 'Next' not 'Previous'", 92 | "action": "VERIFY_TEXT 11 'next'", 93 | "status": "continue", 94 | "next_step": "Click next page if verified" 95 | }} 96 | {{ 97 | "thought": "Confirmed it's next page button, clicking to proceed", 98 | "action": "CLICK 11", 99 | "status": "continue", 100 | "next_step": "Wait for new page to load then scroll to top" 101 | }} 102 | {{ 103 | "thought": "New page loaded, scrolling back to top before processing", 104 | "action": "SCROLL WINDOW; up", 105 | "status": "continue", 106 | "next_step": "Begin processing new page from top" 107 | }} 108 | {{ 109 | "thought": "No next page found, performing additional scroll to verify bottom", 110 | "action": "SCROLL WINDOW; down", 111 | "status": "continue", 112 | "next_step": "Double check we reached true bottom" 113 | }} 114 | {{ 115 | "thought": "Performing final verification scroll", 116 | "action": "SCROLL WINDOW; down", 117 | "status": "continue", 118 | "next_step": "Confirm no more content or next page button" 119 | }} 120 | {{ 121 | "thought": "Multiple scrolls confirm true bottom reached, no next page found", 122 | "action": "ANSWER All pages processed successfully, confirmed no more content", 123 | "status": "complete", 124 | "next_step": "Task complete, collected all product links from all pages" 125 | }} 126 | scraper: 127 | user: | 128 | Note that you start from the top of the page, so no need to scroll up. Make sure to reach the bottom before submitting ANSWER. 129 | Begin scrolling down to load all content. Process each viewport systematically. 130 | 131 | Example workflow: 132 | {{ 133 | "thought": "Starting from page top, scanning for firmware download sections", 134 | "action": "GET_NAVIGATION_LINKS 1,2,3", 135 | "status": "continue", 136 | "next_step": "After collecting current info, scroll down for more content" 137 | }} 138 | 139 | {{ 140 | "thought": "Firmware info from current viewport collected, continuing down", 141 | "action": "SCROLL WINDOW; down", 142 | "status": "continue", 143 | "next_step": "Process newly visible firmware info" 144 | }} 145 | 146 | {{ 147 | "thought": "Reached page bottom, all firmware info collected", 148 | "action": "ANSWER {collected_firmware_info}", 149 | "status": "complete", 150 | "next_step": "Task complete, all firmware info gathered" 151 | }} 152 | -------------------------------------------------------------------------------- /firmcrawler/prompts/ui.yaml: -------------------------------------------------------------------------------- 1 | inspector: 2 | user: | 3 | Workflow Examples: 4 | {{ 5 | "thought": "Found direct firmware download links (.bin files) with version numbers", 6 | "action": "ANSWER SCRAPER", 7 | "status": "complete", 8 | "next_step": "Direct download links found, suitable for Scraper" 9 | }} 10 | {{ 11 | "thought": "Page shows product list with 'Download' section for each, but needs additional navigation", 12 | "action": "ANSWER EXPLORER", 13 | "status": "complete", 14 | "next_step": "Multiple products with download sections need exploration" 15 | }} 16 | {{ 17 | "thought": "Page contains both product info and firmware download section", 18 | "action": "ANSWER ALL", 19 | "status": "complete", 20 | "next_step": "Mixed content needs both Scraper and Explorer" 21 | }} 22 | {{ 23 | "thought": "Found firmware table with version info but download links need verification", 24 | "action": "ANSWER ALL", 25 | "status": "complete", 26 | "next_step": "Firmware info present but needs further processing" 27 | }} 28 | {{ 29 | "thought": "Page shows product specs with firmware update button", 30 | "action": "ANSWER ALL", 31 | "status": "complete", 32 | "next_step": "Contains both product info and firmware update option" 33 | }} 34 | {{ 35 | "thought": "Download section visible but links go to selection page", 36 | "action": "ANSWER EXPLORER", 37 | "status": "complete", 38 | "next_step": "Downloads require further navigation" 39 | }} 40 | explorer: 41 | user: | 42 | Workflow Examples: 43 | {{ 44 | "thought": "Start scrolling down to load all content", 45 | "action": "SCROLL WINDOW; down", 46 | "status": "continue", 47 | "next_step": "Check for initial viewport links after scrolling" 48 | }} 49 | {{ 50 | "thought": "Starting from page top, collect initial viewport links", 51 | "action": "GET_NAVIGATION_LINKS 1,2,3,4,5", 52 | "status": "continue", 53 | "next_step": "After collecting links, scroll for more content" 54 | }} 55 | {{ 56 | "thought": "Current viewport processed, scroll down to see more content", 57 | "action": "SCROLL WINDOW; down", 58 | "status": "continue", 59 | "next_step": "Wait for new content to load" 60 | }} 61 | {{ 62 | "thought": "Process next group of links in new viewport", 63 | "action": "GET_NAVIGATION_LINKS 6,7,8,9,10", 64 | "status": "continue", 65 | "next_step": "Check for more content by scrolling" 66 | }} 67 | {{ 68 | "thought": "Scroll to check for more content", 69 | "action": "SCROLL WINDOW; down", 70 | "status": "continue", 71 | "next_step": "Wait to verify if new content appears" 72 | }} 73 | {{ 74 | "thought": "Perform final scroll to confirm bottom reached", 75 | "action": "SCROLL WINDOW; down", 76 | "status": "continue", 77 | "next_step": "Verify all content processed" 78 | }} 79 | {{ 80 | "thought": "Verification complete: reached bottom, no new content, all links collected", 81 | "action": "ANSWER Task completed", 82 | "status": "complete", 83 | "next_step": "Task finished, all links collected" 84 | }} 85 | 86 | scraper: 87 | user: | 88 | Note that you start from the top of the page, so no need to scroll up. Make sure to reach the bottom before submitting ANSWER. 89 | Begin scrolling down to load all content. Process each viewport systematically. 90 | 91 | Example workflow: 92 | {{ 93 | "thought": "Starting from page top, scanning for firmware download sections", 94 | "action": "GET_NAVIGATION_LINKS 1,2,3", 95 | "status": "continue", 96 | "next_step": "After collecting current info, scroll down for more content" 97 | }} 98 | 99 | {{ 100 | "thought": "Firmware info from current viewport collected, continuing down", 101 | "action": "SCROLL WINDOW; down", 102 | "status": "continue", 103 | "next_step": "Process newly visible firmware info" 104 | }} 105 | 106 | {{ 107 | "thought": "Reached page bottom, all firmware info collected", 108 | "action": "ANSWER {collected_firmware_info}", 109 | "status": "complete", 110 | "next_step": "Task complete, all firmware info gathered" 111 | }} 112 | --------------------------------------------------------------------------------