├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── Cargo.toml
├── README.assets
    ├── image-20250310152754822.png
    └── image-20250310155141048.png
├── README.md
└── src
    └── main.rs


/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     tags:
 4 |       - 'v*.*.*'  # 匹配以 'v' 开头的版本号，例如 v1.0.0
 5 | 
 6 | jobs:
 7 |   release:
 8 |     name: release ${{ matrix.target }}
 9 |     runs-on: ubuntu-latest
10 |     strategy:
11 |       fail-fast: false
12 |       matrix:
13 |         include:
14 |           - target: x86_64-pc-windows-gnu
15 |             archive: zip
16 |           - target: x86_64-unknown-linux-musl
17 |             archive: tar.gz tar.xz tar.zst
18 |           - target: x86_64-apple-darwin
19 |             archive: zip
20 |     steps:
21 |       - uses: actions/checkout@master
22 |       
23 |       # 安装 OpenSSL
24 |       - name: Install OpenSSL
25 |         run: |
26 |           sudo apt-get update
27 |           sudo apt-get install -y libssl-dev
28 |       
29 |       # 获取版本号
30 |       - name: Get version from tag
31 |         id: get_version
32 |         run: echo "::set-output name=version::${GITHUB_REF#refs/tags/}"
33 |       
34 |       - name: Compile and release
35 |         uses: rust-build/rust-build.action@v1.4.5
36 |         env:
37 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
38 |         with:
39 |           RUSTTARGET: ${{ matrix.target }}
40 |           ARCHIVE_TYPES: ${{ matrix.archive }}
41 |           # 使用版本号作为归档文件名的一部分
42 |           ARCHIVE_NAME: my-project-${{ steps.get_version.outputs.version }}-${{ matrix.target }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | .DS_Store
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "ICPSearch"
 3 | version = "0.1.2"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | clap = "3.0.0-beta.1"
10 | reqwest = { version = "0.11.22", features = ["blocking"] }
11 | tokio = { version = "1.33.0", features = ["full"] }
12 | base64 = "0.21.4"
13 | futures = "0.3.28"
14 | scraper = "0.17.1"
15 | kuchiki = "0.8.1"
16 | url = "2.4.1"
17 | psl = "2.1.4"
18 | rand = "0.8.5"
19 | ahash = "0.8.7"
20 | calamine = "0.19"
21 | rust_xlsxwriter = "0.40"
22 | 
23 | [profile.release]
24 | opt-level = "z"
25 | lto = true
26 | codegen-units = 1
27 | debug = false
28 | panic = "abort"
29 | 


--------------------------------------------------------------------------------
/README.assets/image-20250310152754822.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/A10ha/ICPSearch/4dae05777e52f67116e8e64df84ae37fa8f0388d/README.assets/image-20250310152754822.png


--------------------------------------------------------------------------------
/README.assets/image-20250310155141048.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/A10ha/ICPSearch/4dae05777e52f67116e8e64df84ae37fa8f0388d/README.assets/image-20250310155141048.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ICPSearch
 2 | ![image-20250310152754822](./README.assets/image-20250310152754822.png)
 3 | 
 4 | **ICP Lookup Tool** 是一个基于Rust编写的命令行工具，主要用于查找并获取网站域名的ICP备案信息。
 5 | 
 6 | ## 项目功能
 7 | 
 8 | 1. 通过域名、URL或者企业名（全称）查找ICP备案信息。你可以输入指定的域名或者企业名（全程），然后获取相应的备案信息。
 9 | 
10 |    ```bash
11 |    ICPSearch.exe -d yourdomain.com
12 |    ```
13 | ![image](https://github.com/A10ha/ICPSearch/assets/60035496/ab51e053-fc2c-4736-9ddc-a59fa87ae734)
14 | 
15 | 2. 批量处理多个域名、URL和企业名（全称）。你可以在文本文件中列出需要查找的多个域名或者企业名（全称），然后通过该工具一次性处理这些域名和企业名（全称），并获取相应的备案信息。
16 | 
17 |    ```bash
18 |    ICPSearch.exe -f domains.txt
19 |    ```
20 | ![image](https://github.com/A10ha/ICPSearch/assets/60035496/b4237cf1-af88-40cf-9b42-e96d23ee6e37)
21 | ![image](https://github.com/A10ha/ICPSearch/assets/60035496/f83b1206-4da1-43fd-9109-a6e3361fc7f6)
22 | 
23 | 3. 支持解析Excel文件指定列数据进行查询，查询结果自动插入到对应数据行后，并复制Excel文件保存。
24 | 
25 |    ```bash
26 |    ./ICPSearch -e <input.xlsx> -c <Column_name>
27 |    ```
28 | 
29 | ![image-20250310155141048](./README.assets/image-20250310155141048.png)
30 | 
31 | ## 数据输出（默认）
32 | 
33 | 所有的结果将会被打印到console，同时写入到名为result.txt的文件中。
34 | 
35 | ## 注意事项
36 | 
37 | 该工具的预设并发数量为50，请结合实际情况和目标服务器的承受能力来调节该值。过大的并发请求可能对目标服务器产生压力。
38 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | #![allow(non_snake_case)]
  2 | use std::fs::File;
  3 | use std::io::{self, BufRead};
  4 | use std::path::Path;
  5 | use std::error::Error;
  6 | use std::fs::OpenOptions;
  7 | use std::io::Write;
  8 | use std::str::from_utf8;
  9 | use std::collections::{HashSet, HashMap};
 10 | 
 11 | use reqwest::Client;
 12 | use clap::{Arg, AppSettings, App};
 13 | use tokio::runtime;
 14 | use futures::stream::{self, StreamExt};
 15 | use kuchiki::traits::*;
 16 | use url::Url;
 17 | use psl::{List, Psl};
 18 | use calamine::{Reader, open_workbook, Xlsx};
 19 | use std::path::PathBuf;
 20 | use rust_xlsxwriter::Workbook;
 21 | 
 22 | struct DomainResult {
 23 |     unit: String,
 24 |     type_: String,
 25 |     icp_code: String,
 26 |     domain: String,
 27 |     pass_time: String,
 28 | }
 29 | 
 30 | fn main() {
 31 |     create_file_if_not_exists("result.txt");
 32 |     let matches = App::new("ICP Lookup Tool")
 33 |         .setting(AppSettings::ArgRequiredElseHelp)
 34 |         .author("Author: A10ha")
 35 |         .about("Tool for querying ICP filings by domain name or company name or url")
 36 |         .arg(Arg::with_name("domain")
 37 |             .short('d')
 38 |             .long("domain")
 39 |             .value_name("DOMAIN")
 40 |             .takes_value(true)
 41 |             .help("Domain name Or Company name Or URL to lookup"))
 42 |         .arg(Arg::with_name("file")
 43 |             .short('f')
 44 |             .long("file")
 45 |             .value_name("FILE")
 46 |             .takes_value(true)
 47 |             .help("A file containing the domain or business name or url to be found"))
 48 |         .arg(Arg::with_name("excel")
 49 |             .short('e')
 50 |             .long("excel")
 51 |             .value_name("EXCEL")
 52 |             .takes_value(true)
 53 |             .help("Excel file path to process"))
 54 |         .arg(Arg::with_name("column")
 55 |             .short('c')
 56 |             .long("column")
 57 |             .value_name("COLUMN")
 58 |             .takes_value(true)
 59 |             .help("Column name to read from Excel"))
 60 |         .get_matches();
 61 | 
 62 |     let runtime = runtime::Runtime::new().unwrap();
 63 | 
 64 |     if let Some(excel_path) = matches.value_of("excel") {
 65 |         let column_name = matches.value_of("column").expect("Column name is required for Excel processing");
 66 |         match runtime.block_on(process_excel(excel_path, column_name)) {
 67 |             Ok(_) => println!("Excel processing completed."),
 68 |             Err(err) => println!("Error processing Excel: {}", err)
 69 |         };
 70 |     } else if let Some(domain) = matches.value_of("domain") {
 71 |         let url = build_url_xpath(domain);
 72 |         // println!("{}", url);
 73 |         match runtime.block_on(fetch_and_handle_data_xpath(&url)) {
 74 |             Ok(_) => println!("Data processing completed."),
 75 |             Err(err) => println!("Error: {}", err)
 76 |         };
 77 |     } else if let Some(filename) = matches.value_of("file") {
 78 |         match runtime.block_on(process_file(filename)) {
 79 |             Ok(_) => println!("Data processing completed."),
 80 |             Err(err) => println!("Error: {}", err)
 81 |         };
 82 |     } else {
 83 |         eprintln!("Invalid option.");
 84 |     }
 85 | }
 86 | 
 87 | async fn process_excel(excel_path: &str, column_name: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
 88 |     let path = PathBuf::from(excel_path);
 89 |     let mut workbook: Xlsx<_> = open_workbook(&path)?;
 90 | 
 91 |     println!("Processing Excel file: {}", excel_path);
 92 |     
 93 |     if let Some(Ok(range)) = workbook.worksheet_range("Sheet1") {
 94 |         let headers: Vec<String> = range.rows()
 95 |             .next()
 96 |             .unwrap()
 97 |             .iter()
 98 |             .map(|cell| cell.to_string())
 99 |             .collect();
100 |         
101 |         let column_index = headers.iter()
102 |             .position(|h| h == column_name)
103 |             .ok_or("Column not found")?;
104 | 
105 |         // 收集所有不重复的域名和它们在Excel中的位置
106 |         let mut domain_positions: HashMap<String, Vec<(u32, u16)>> = HashMap::new();
107 |         for (row_idx, row) in range.rows().enumerate().skip(1) {
108 |             if let Some(cell) = row.get(column_index) {
109 |                 let domain = cell.to_string();
110 |                 if !domain.is_empty() {
111 |                     domain_positions
112 |                         .entry(domain)
113 |                         .or_default()
114 |                         .push((row_idx as u32, column_index as u16));
115 |                 }
116 |             }
117 |         }
118 | 
119 |         // 并发查询所有不重复的域名
120 |         let domains: Vec<String> = domain_positions.keys().cloned().collect();
121 |         let mut results: HashMap<String, Option<DomainResult>> = HashMap::new();
122 |         
123 |         let fetches = stream::iter(domains)
124 |             .map(|domain| async {
125 |                 let url = build_url_xpath(&domain);
126 |                 let result = match fetch_data(&url).await {
127 |                     Ok(data) => parse_icp_data(&data),
128 |                     Err(_) => None,
129 |                 };
130 |                 (domain, result)
131 |             })
132 |             .buffer_unordered(10); // 控制并发数
133 | 
134 |         results.extend(fetches.collect::<Vec<_>>().await);
135 | 
136 |         // 创建新的工作簿并写入数据
137 |         let mut new_workbook = Workbook::new();
138 |         let worksheet = new_workbook.add_worksheet();
139 | 
140 |         // 复制原有数据
141 |         for (row_idx, row) in range.rows().enumerate() {
142 |             for (col_idx, cell) in row.iter().enumerate() {
143 |                 worksheet.write_string(row_idx as u32, col_idx as u16, cell.to_string())?;
144 |             }
145 |         }
146 | 
147 |         // 添加新的列头
148 |         let start_col = headers.len();
149 |         worksheet.write_string(0, start_col as u16, "Unit")?;
150 |         worksheet.write_string(0, (start_col + 1) as u16, "Type")?;
151 |         worksheet.write_string(0, (start_col + 2) as u16, "ICP Code")?;
152 |         worksheet.write_string(0, (start_col + 3) as u16, "Domain")?;
153 |         worksheet.write_string(0, (start_col + 4) as u16, "Pass Time")?;
154 | 
155 |         // 写入查询结果
156 |         for (domain, positions) in domain_positions {
157 |             if let Some(Some(result)) = results.get(&domain) {
158 |                 for (row_idx, _) in positions {
159 |                     worksheet.write_string(row_idx, start_col as u16, &result.unit)?;
160 |                     worksheet.write_string(row_idx, (start_col + 1) as u16, &result.type_)?;
161 |                     worksheet.write_string(row_idx, (start_col + 2) as u16, &result.icp_code)?;
162 |                     worksheet.write_string(row_idx, (start_col + 3) as u16, &result.domain)?;
163 |                     worksheet.write_string(row_idx, (start_col + 4) as u16, &result.pass_time)?;
164 |                 }
165 |             }
166 |             println!("Processed domain: {}", domain);
167 |         }
168 | 
169 |         // 保存结果
170 |         let result_path = path.with_file_name(format!("{}_result.xlsx", 
171 |             path.file_stem().unwrap().to_string_lossy()));
172 |         new_workbook.save(result_path)?;
173 |     }
174 | 
175 |     Ok(())
176 | }
177 | 
178 | fn parse_icp_data(html: &str) -> Option<DomainResult> {
179 |     let document = kuchiki::parse_html().one(html);
180 |     let css_selector = "table tbody tr";
181 | 
182 |     if let Ok(mut selections) = document.select(css_selector) {
183 |         if let Some(tr) = selections.next() {
184 |             let data_in_row: Vec<_> = tr.text_contents()
185 |                 .split_whitespace()
186 |                 .map(|s| s.to_owned())
187 |                 .collect();
188 | 
189 |             if data_in_row.len() >= 8 {
190 |                 return Some(DomainResult {
191 |                     unit: data_in_row[1].clone(),
192 |                     type_: data_in_row[2].clone(),
193 |                     icp_code: data_in_row[3].clone(),
194 |                     domain: data_in_row[data_in_row.len() - 4].clone(),
195 |                     pass_time: data_in_row[data_in_row.len() - 3].clone(),
196 |                 });
197 |             }
198 |         }
199 |     }
200 |     None
201 | }
202 | 
203 | fn get_uuid() -> String {
204 |     let uuid_template = "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx";
205 |     let mut uuid = String::new();
206 |     for c in uuid_template.chars() {
207 |         let v = match c {
208 |             'x' => rand::random::<u32>() % 16,
209 |             'y' => ((rand::random::<u32>() % 16) & 0x3) | 0x8,
210 |             '4' => 4,
211 |             '-' => u32::MAX, // 使用一个特殊值表示连字符
212 |             _ => {
213 |                 panic!("Unexpected character in UUID template: {}", c);
214 |             }
215 |         };
216 |         if v == u32::MAX {
217 |             uuid.push('-');
218 |         } else {
219 |             uuid.push_str(&format!("{:x}", v));
220 |         }
221 |     }
222 |     uuid
223 | }
224 | 
225 | fn get_root_domain(input: &str) -> Option<String> {
226 |     let domain_str = if input.starts_with("http://") || input.starts_with("https://") {
227 |         input.to_string()
228 |     } else {
229 |         format!("http://{}", input)
230 |     };
231 |     
232 |     let parsed_url = Url::parse(&domain_str).ok()?;
233 |     let host = parsed_url.host_str()?;
234 | 
235 |     let list = List;
236 |     let suffix = list.suffix(host.as_bytes())?;
237 | 
238 |     let suffix_byte = suffix.as_bytes();
239 |     let suffix_str = from_utf8(suffix_byte).unwrap();
240 | 
241 |     let domain = host.trim_end_matches(suffix_str).trim_end_matches('.');
242 |     let parts: Vec<&str> = domain.rsplitn(2, '.').collect();
243 | 
244 |     parts.first().map(|last| format!("{}.{}", last, suffix_str))
245 | }
246 | 
247 | fn contains_chinese(s: &str) -> bool {
248 |     for ch in s.chars() {
249 |         if !ch.is_ascii() {
250 |             return true;
251 |         }
252 |     }
253 |     false
254 | }
255 | 
256 | fn build_url_xpath(input: &str) -> String {
257 |     let index =  if !contains_chinese(input) {get_root_domain(input).expect("Failed to get root domain")
258 |     } else {
259 |         input.to_string()
260 |     };
261 |     format!(
262 |         "https://www.beianx.cn/search/{}",
263 |         index
264 |     )
265 | }
266 | 
267 | async fn fetch_and_handle_data_xpath(url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
268 |     let data = fetch_data(url).await?;
269 |     handle_data_xpath(&data);
270 |     Ok(())
271 | }
272 | 
273 | async fn fetch_data(url: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
274 |     let uuid = get_uuid();
275 |     let cookie_str = format!("machine_str={}", uuid);
276 |     let client = Client::new();
277 |     let response = client.get(url).header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36").header("Cookie", cookie_str).send().await?;
278 |     let body = response.text().await?;
279 |     Ok(body)
280 | }
281 | 
282 | fn create_file_if_not_exists(file_path: &str) {
283 |     let path = Path::new(file_path);
284 |     if !path.exists() {
285 |         File::create(path).expect("Failed to create file");
286 |     }
287 | }
288 | 
289 | fn process_domain_result(data_in_row: &[String], file: &mut File) {
290 |     let result = DomainResult {
291 |         unit: data_in_row[1].clone(),
292 |         type_: data_in_row[2].clone(),
293 |         icp_code: data_in_row[3].clone(),
294 |         domain: data_in_row[data_in_row.len() - 4].clone() ,
295 |         pass_time: data_in_row[data_in_row.len() - 3].clone(),
296 |     };
297 | 
298 |     let output = format!("[Unit]: {} [Type]: {} [icpCode]: {} [Domain]: {} [passTime]: {}", &result.unit, &result.type_, &result.icp_code, &result.domain, &result.pass_time);
299 | 
300 |     println!("{}", output);
301 | 
302 |     if let Err(e) = writeln!(file, "{}", output) {
303 |         eprintln!("Couldn't write to file: {}", e);
304 |     }
305 | }
306 | 
307 | fn handle_data_xpath(data: &str) {
308 |     let document = kuchiki::parse_html().one(data);
309 |     let css_selector = "table tbody tr";
310 | 
311 |     let selections: Vec<_> = document.select(css_selector).unwrap().collect();
312 | 
313 |     let mut file = OpenOptions::new()
314 |         .append(true)
315 |         .open("result.txt")
316 |         .unwrap();
317 | 
318 |     for tr in selections {
319 |         let data_in_row: Vec<_> = tr.text_contents().split_whitespace().map(|s| s.to_owned()).collect();
320 |         // println!("{:?}", data_in_row);
321 |         if data_in_row.len() >= 8 {
322 |             process_domain_result(&data_in_row, &mut file);
323 |         } else {
324 |             eprintln!("[Error] ICP filing query failed! Skipping!");
325 | 
326 |         }
327 |     }
328 | }
329 | 
330 | async fn process_file(filename: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
331 |     let path = Path::new(filename);
332 |     let file = File::open(path)?;
333 |     let reader = io::BufReader::new(file);
334 | 
335 |     let urls: Vec<String> = reader.lines()
336 |         .map_while(Result::ok)
337 |         .map(|line| build_url_xpath(&line))
338 |         .collect();
339 | 
340 |     let unique_urls: HashSet<String> = urls.into_iter().collect();
341 |     let urls_set: Vec<String> = unique_urls.into_iter().collect();
342 | 
343 |     let fetches = urls_set.iter()
344 |         .map(|url| fetch_and_handle_data_xpath(url));
345 | 
346 |     stream::iter(fetches)
347 |         .buffer_unordered(50)
348 |         .for_each(|result| async { 
349 |             if let Err(e) = result {
350 |                 // 打印错误信息
351 |                 println!("Error: {}", e);
352 |             }
353 |         })
354 |         .await;
355 | 
356 |     Ok(())
357 | }
358 | 


--------------------------------------------------------------------------------