├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── README.assets ├── image-20250310152754822.png └── image-20250310155141048.png ├── README.md └── src └── main.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - 'v*.*.*' # 匹配以 'v' 开头的版本号,例如 v1.0.0 5 | 6 | jobs: 7 | release: 8 | name: release ${{ matrix.target }} 9 | runs-on: ubuntu-latest 10 | strategy: 11 | fail-fast: false 12 | matrix: 13 | include: 14 | - target: x86_64-pc-windows-gnu 15 | archive: zip 16 | - target: x86_64-unknown-linux-musl 17 | archive: tar.gz tar.xz tar.zst 18 | - target: x86_64-apple-darwin 19 | archive: zip 20 | steps: 21 | - uses: actions/checkout@master 22 | 23 | # 安装 OpenSSL 24 | - name: Install OpenSSL 25 | run: | 26 | sudo apt-get update 27 | sudo apt-get install -y libssl-dev 28 | 29 | # 获取版本号 30 | - name: Get version from tag 31 | id: get_version 32 | run: echo "::set-output name=version::${GITHUB_REF#refs/tags/}" 33 | 34 | - name: Compile and release 35 | uses: rust-build/rust-build.action@v1.4.5 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | with: 39 | RUSTTARGET: ${{ matrix.target }} 40 | ARCHIVE_TYPES: ${{ matrix.archive }} 41 | # 使用版本号作为归档文件名的一部分 42 | ARCHIVE_NAME: my-project-${{ steps.get_version.outputs.version }}-${{ matrix.target }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .DS_Store 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ICPSearch" 3 | version = "0.1.2" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | clap = "3.0.0-beta.1" 10 | reqwest = { version = "0.11.22", features = ["blocking"] } 11 | tokio = { version = "1.33.0", features = ["full"] } 12 | base64 = "0.21.4" 13 | futures = "0.3.28" 14 | scraper = "0.17.1" 15 | kuchiki = "0.8.1" 16 | url = "2.4.1" 17 | psl = "2.1.4" 18 | rand = "0.8.5" 19 | ahash = "0.8.7" 20 | calamine = "0.19" 21 | rust_xlsxwriter = "0.40" 22 | 23 | [profile.release] 24 | opt-level = "z" 25 | lto = true 26 | codegen-units = 1 27 | debug = false 28 | panic = "abort" 29 | -------------------------------------------------------------------------------- /README.assets/image-20250310152754822.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A10ha/ICPSearch/4dae05777e52f67116e8e64df84ae37fa8f0388d/README.assets/image-20250310152754822.png -------------------------------------------------------------------------------- /README.assets/image-20250310155141048.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/A10ha/ICPSearch/4dae05777e52f67116e8e64df84ae37fa8f0388d/README.assets/image-20250310155141048.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ICPSearch 2 | ![image-20250310152754822](./README.assets/image-20250310152754822.png) 3 | 4 | **ICP Lookup Tool** 是一个基于Rust编写的命令行工具,主要用于查找并获取网站域名的ICP备案信息。 5 | 6 | ## 项目功能 7 | 8 | 1. 通过域名、URL或者企业名(全称)查找ICP备案信息。你可以输入指定的域名或者企业名(全程),然后获取相应的备案信息。 9 | 10 | ```bash 11 | ICPSearch.exe -d yourdomain.com 12 | ``` 13 | ![image](https://github.com/A10ha/ICPSearch/assets/60035496/ab51e053-fc2c-4736-9ddc-a59fa87ae734) 14 | 15 | 2. 批量处理多个域名、URL和企业名(全称)。你可以在文本文件中列出需要查找的多个域名或者企业名(全称),然后通过该工具一次性处理这些域名和企业名(全称),并获取相应的备案信息。 16 | 17 | ```bash 18 | ICPSearch.exe -f domains.txt 19 | ``` 20 | ![image](https://github.com/A10ha/ICPSearch/assets/60035496/b4237cf1-af88-40cf-9b42-e96d23ee6e37) 21 | ![image](https://github.com/A10ha/ICPSearch/assets/60035496/f83b1206-4da1-43fd-9109-a6e3361fc7f6) 22 | 23 | 3. 支持解析Excel文件指定列数据进行查询,查询结果自动插入到对应数据行后,并复制Excel文件保存。 24 | 25 | ```bash 26 | ./ICPSearch -e -c 27 | ``` 28 | 29 | ![image-20250310155141048](./README.assets/image-20250310155141048.png) 30 | 31 | ## 数据输出(默认) 32 | 33 | 所有的结果将会被打印到console,同时写入到名为result.txt的文件中。 34 | 35 | ## 注意事项 36 | 37 | 该工具的预设并发数量为50,请结合实际情况和目标服务器的承受能力来调节该值。过大的并发请求可能对目标服务器产生压力。 38 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | use std::fs::File; 3 | use std::io::{self, BufRead}; 4 | use std::path::Path; 5 | use std::error::Error; 6 | use std::fs::OpenOptions; 7 | use std::io::Write; 8 | use std::str::from_utf8; 9 | use std::collections::{HashSet, HashMap}; 10 | 11 | use reqwest::Client; 12 | use clap::{Arg, AppSettings, App}; 13 | use tokio::runtime; 14 | use futures::stream::{self, StreamExt}; 15 | use kuchiki::traits::*; 16 | use url::Url; 17 | use psl::{List, Psl}; 18 | use calamine::{Reader, open_workbook, Xlsx}; 19 | use std::path::PathBuf; 20 | use rust_xlsxwriter::Workbook; 21 | 22 | struct DomainResult { 23 | unit: String, 24 | type_: String, 25 | icp_code: String, 26 | domain: String, 27 | pass_time: String, 28 | } 29 | 30 | fn main() { 31 | create_file_if_not_exists("result.txt"); 32 | let matches = App::new("ICP Lookup Tool") 33 | .setting(AppSettings::ArgRequiredElseHelp) 34 | .author("Author: A10ha") 35 | .about("Tool for querying ICP filings by domain name or company name or url") 36 | .arg(Arg::with_name("domain") 37 | .short('d') 38 | .long("domain") 39 | .value_name("DOMAIN") 40 | .takes_value(true) 41 | .help("Domain name Or Company name Or URL to lookup")) 42 | .arg(Arg::with_name("file") 43 | .short('f') 44 | .long("file") 45 | .value_name("FILE") 46 | .takes_value(true) 47 | .help("A file containing the domain or business name or url to be found")) 48 | .arg(Arg::with_name("excel") 49 | .short('e') 50 | .long("excel") 51 | .value_name("EXCEL") 52 | .takes_value(true) 53 | .help("Excel file path to process")) 54 | .arg(Arg::with_name("column") 55 | .short('c') 56 | .long("column") 57 | .value_name("COLUMN") 58 | .takes_value(true) 59 | .help("Column name to read from Excel")) 60 | .get_matches(); 61 | 62 | let runtime = runtime::Runtime::new().unwrap(); 63 | 64 | if let Some(excel_path) = matches.value_of("excel") { 65 | let column_name = matches.value_of("column").expect("Column name is required for Excel processing"); 66 | match runtime.block_on(process_excel(excel_path, column_name)) { 67 | Ok(_) => println!("Excel processing completed."), 68 | Err(err) => println!("Error processing Excel: {}", err) 69 | }; 70 | } else if let Some(domain) = matches.value_of("domain") { 71 | let url = build_url_xpath(domain); 72 | // println!("{}", url); 73 | match runtime.block_on(fetch_and_handle_data_xpath(&url)) { 74 | Ok(_) => println!("Data processing completed."), 75 | Err(err) => println!("Error: {}", err) 76 | }; 77 | } else if let Some(filename) = matches.value_of("file") { 78 | match runtime.block_on(process_file(filename)) { 79 | Ok(_) => println!("Data processing completed."), 80 | Err(err) => println!("Error: {}", err) 81 | }; 82 | } else { 83 | eprintln!("Invalid option."); 84 | } 85 | } 86 | 87 | async fn process_excel(excel_path: &str, column_name: &str) -> Result<(), Box> { 88 | let path = PathBuf::from(excel_path); 89 | let mut workbook: Xlsx<_> = open_workbook(&path)?; 90 | 91 | println!("Processing Excel file: {}", excel_path); 92 | 93 | if let Some(Ok(range)) = workbook.worksheet_range("Sheet1") { 94 | let headers: Vec = range.rows() 95 | .next() 96 | .unwrap() 97 | .iter() 98 | .map(|cell| cell.to_string()) 99 | .collect(); 100 | 101 | let column_index = headers.iter() 102 | .position(|h| h == column_name) 103 | .ok_or("Column not found")?; 104 | 105 | // 收集所有不重复的域名和它们在Excel中的位置 106 | let mut domain_positions: HashMap> = HashMap::new(); 107 | for (row_idx, row) in range.rows().enumerate().skip(1) { 108 | if let Some(cell) = row.get(column_index) { 109 | let domain = cell.to_string(); 110 | if !domain.is_empty() { 111 | domain_positions 112 | .entry(domain) 113 | .or_default() 114 | .push((row_idx as u32, column_index as u16)); 115 | } 116 | } 117 | } 118 | 119 | // 并发查询所有不重复的域名 120 | let domains: Vec = domain_positions.keys().cloned().collect(); 121 | let mut results: HashMap> = HashMap::new(); 122 | 123 | let fetches = stream::iter(domains) 124 | .map(|domain| async { 125 | let url = build_url_xpath(&domain); 126 | let result = match fetch_data(&url).await { 127 | Ok(data) => parse_icp_data(&data), 128 | Err(_) => None, 129 | }; 130 | (domain, result) 131 | }) 132 | .buffer_unordered(10); // 控制并发数 133 | 134 | results.extend(fetches.collect::>().await); 135 | 136 | // 创建新的工作簿并写入数据 137 | let mut new_workbook = Workbook::new(); 138 | let worksheet = new_workbook.add_worksheet(); 139 | 140 | // 复制原有数据 141 | for (row_idx, row) in range.rows().enumerate() { 142 | for (col_idx, cell) in row.iter().enumerate() { 143 | worksheet.write_string(row_idx as u32, col_idx as u16, cell.to_string())?; 144 | } 145 | } 146 | 147 | // 添加新的列头 148 | let start_col = headers.len(); 149 | worksheet.write_string(0, start_col as u16, "Unit")?; 150 | worksheet.write_string(0, (start_col + 1) as u16, "Type")?; 151 | worksheet.write_string(0, (start_col + 2) as u16, "ICP Code")?; 152 | worksheet.write_string(0, (start_col + 3) as u16, "Domain")?; 153 | worksheet.write_string(0, (start_col + 4) as u16, "Pass Time")?; 154 | 155 | // 写入查询结果 156 | for (domain, positions) in domain_positions { 157 | if let Some(Some(result)) = results.get(&domain) { 158 | for (row_idx, _) in positions { 159 | worksheet.write_string(row_idx, start_col as u16, &result.unit)?; 160 | worksheet.write_string(row_idx, (start_col + 1) as u16, &result.type_)?; 161 | worksheet.write_string(row_idx, (start_col + 2) as u16, &result.icp_code)?; 162 | worksheet.write_string(row_idx, (start_col + 3) as u16, &result.domain)?; 163 | worksheet.write_string(row_idx, (start_col + 4) as u16, &result.pass_time)?; 164 | } 165 | } 166 | println!("Processed domain: {}", domain); 167 | } 168 | 169 | // 保存结果 170 | let result_path = path.with_file_name(format!("{}_result.xlsx", 171 | path.file_stem().unwrap().to_string_lossy())); 172 | new_workbook.save(result_path)?; 173 | } 174 | 175 | Ok(()) 176 | } 177 | 178 | fn parse_icp_data(html: &str) -> Option { 179 | let document = kuchiki::parse_html().one(html); 180 | let css_selector = "table tbody tr"; 181 | 182 | if let Ok(mut selections) = document.select(css_selector) { 183 | if let Some(tr) = selections.next() { 184 | let data_in_row: Vec<_> = tr.text_contents() 185 | .split_whitespace() 186 | .map(|s| s.to_owned()) 187 | .collect(); 188 | 189 | if data_in_row.len() >= 8 { 190 | return Some(DomainResult { 191 | unit: data_in_row[1].clone(), 192 | type_: data_in_row[2].clone(), 193 | icp_code: data_in_row[3].clone(), 194 | domain: data_in_row[data_in_row.len() - 4].clone(), 195 | pass_time: data_in_row[data_in_row.len() - 3].clone(), 196 | }); 197 | } 198 | } 199 | } 200 | None 201 | } 202 | 203 | fn get_uuid() -> String { 204 | let uuid_template = "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx"; 205 | let mut uuid = String::new(); 206 | for c in uuid_template.chars() { 207 | let v = match c { 208 | 'x' => rand::random::() % 16, 209 | 'y' => ((rand::random::() % 16) & 0x3) | 0x8, 210 | '4' => 4, 211 | '-' => u32::MAX, // 使用一个特殊值表示连字符 212 | _ => { 213 | panic!("Unexpected character in UUID template: {}", c); 214 | } 215 | }; 216 | if v == u32::MAX { 217 | uuid.push('-'); 218 | } else { 219 | uuid.push_str(&format!("{:x}", v)); 220 | } 221 | } 222 | uuid 223 | } 224 | 225 | fn get_root_domain(input: &str) -> Option { 226 | let domain_str = if input.starts_with("http://") || input.starts_with("https://") { 227 | input.to_string() 228 | } else { 229 | format!("http://{}", input) 230 | }; 231 | 232 | let parsed_url = Url::parse(&domain_str).ok()?; 233 | let host = parsed_url.host_str()?; 234 | 235 | let list = List; 236 | let suffix = list.suffix(host.as_bytes())?; 237 | 238 | let suffix_byte = suffix.as_bytes(); 239 | let suffix_str = from_utf8(suffix_byte).unwrap(); 240 | 241 | let domain = host.trim_end_matches(suffix_str).trim_end_matches('.'); 242 | let parts: Vec<&str> = domain.rsplitn(2, '.').collect(); 243 | 244 | parts.first().map(|last| format!("{}.{}", last, suffix_str)) 245 | } 246 | 247 | fn contains_chinese(s: &str) -> bool { 248 | for ch in s.chars() { 249 | if !ch.is_ascii() { 250 | return true; 251 | } 252 | } 253 | false 254 | } 255 | 256 | fn build_url_xpath(input: &str) -> String { 257 | let index = if !contains_chinese(input) {get_root_domain(input).expect("Failed to get root domain") 258 | } else { 259 | input.to_string() 260 | }; 261 | format!( 262 | "https://www.beianx.cn/search/{}", 263 | index 264 | ) 265 | } 266 | 267 | async fn fetch_and_handle_data_xpath(url: &str) -> Result<(), Box> { 268 | let data = fetch_data(url).await?; 269 | handle_data_xpath(&data); 270 | Ok(()) 271 | } 272 | 273 | async fn fetch_data(url: &str) -> Result> { 274 | let uuid = get_uuid(); 275 | let cookie_str = format!("machine_str={}", uuid); 276 | let client = Client::new(); 277 | let response = client.get(url).header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36").header("Cookie", cookie_str).send().await?; 278 | let body = response.text().await?; 279 | Ok(body) 280 | } 281 | 282 | fn create_file_if_not_exists(file_path: &str) { 283 | let path = Path::new(file_path); 284 | if !path.exists() { 285 | File::create(path).expect("Failed to create file"); 286 | } 287 | } 288 | 289 | fn process_domain_result(data_in_row: &[String], file: &mut File) { 290 | let result = DomainResult { 291 | unit: data_in_row[1].clone(), 292 | type_: data_in_row[2].clone(), 293 | icp_code: data_in_row[3].clone(), 294 | domain: data_in_row[data_in_row.len() - 4].clone() , 295 | pass_time: data_in_row[data_in_row.len() - 3].clone(), 296 | }; 297 | 298 | let output = format!("[Unit]: {} [Type]: {} [icpCode]: {} [Domain]: {} [passTime]: {}", &result.unit, &result.type_, &result.icp_code, &result.domain, &result.pass_time); 299 | 300 | println!("{}", output); 301 | 302 | if let Err(e) = writeln!(file, "{}", output) { 303 | eprintln!("Couldn't write to file: {}", e); 304 | } 305 | } 306 | 307 | fn handle_data_xpath(data: &str) { 308 | let document = kuchiki::parse_html().one(data); 309 | let css_selector = "table tbody tr"; 310 | 311 | let selections: Vec<_> = document.select(css_selector).unwrap().collect(); 312 | 313 | let mut file = OpenOptions::new() 314 | .append(true) 315 | .open("result.txt") 316 | .unwrap(); 317 | 318 | for tr in selections { 319 | let data_in_row: Vec<_> = tr.text_contents().split_whitespace().map(|s| s.to_owned()).collect(); 320 | // println!("{:?}", data_in_row); 321 | if data_in_row.len() >= 8 { 322 | process_domain_result(&data_in_row, &mut file); 323 | } else { 324 | eprintln!("[Error] ICP filing query failed! Skipping!"); 325 | 326 | } 327 | } 328 | } 329 | 330 | async fn process_file(filename: &str) -> Result<(), Box> { 331 | let path = Path::new(filename); 332 | let file = File::open(path)?; 333 | let reader = io::BufReader::new(file); 334 | 335 | let urls: Vec = reader.lines() 336 | .map_while(Result::ok) 337 | .map(|line| build_url_xpath(&line)) 338 | .collect(); 339 | 340 | let unique_urls: HashSet = urls.into_iter().collect(); 341 | let urls_set: Vec = unique_urls.into_iter().collect(); 342 | 343 | let fetches = urls_set.iter() 344 | .map(|url| fetch_and_handle_data_xpath(url)); 345 | 346 | stream::iter(fetches) 347 | .buffer_unordered(50) 348 | .for_each(|result| async { 349 | if let Err(e) = result { 350 | // 打印错误信息 351 | println!("Error: {}", e); 352 | } 353 | }) 354 | .await; 355 | 356 | Ok(()) 357 | } 358 | --------------------------------------------------------------------------------