├── 0.icp_query.py ├── 1.icp_query_result_processor.py ├── 2.quchong.py ├── 2.url_checker.py ├── config ├── config.yaml └── domain.txt ├── icpApi ├── icpApi_socks_v1.py ├── icpApi_socks_v2.1.py ├── icpApi_socks_v2.2.py ├── ip_analyzer.py ├── logger.py ├── readme.md ├── ymicp_socks_v1.py ├── ymicp_socks_v2.1.py └── ymicp_socks_v2.2.py ├── lib ├── Requests_func.py ├── hander_random.py ├── log_functions.py └── logo.py ├── log └── readme.md ├── readme.md └── update.md /0.icp_query.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author : S1g0day 3 | Creat time : 2024/3/15 17:27 4 | Modification time: 2025/2/8 15:00 5 | Introduce : 通过接口查询域名或公司备案 6 | ''' 7 | import yaml 8 | from datetime import datetime 9 | from argparse import ArgumentParser 10 | from lib.logo import logo 11 | from lib.Requests_func import make_request 12 | from lib.log_functions import api_logger 13 | 14 | def Page_traversal_temporary(id, total, params, query_url, req_list): 15 | # 一页显示所有数据 16 | domainId_list = [] 17 | params['pageSize'] = total 18 | req_page_unitName = make_request(query_url, params, req_list[0]['unitName']) 19 | 20 | if req_page_unitName: 21 | unitName_list = req_page_unitName['params']['list'] 22 | for item in unitName_list: 23 | if item.get('domain') and item.get('unitName'): 24 | success_output = f"id:{id}\tdomainId:{item['domainId']}\tunitName:{item['unitName']}\tnatureName:{item['natureName']}\tdomain:{item['domain']}\tmainLicence:{item['mainLicence']}\tserviceLicence:{item['serviceLicence']}\tupdateRecordTime:{item['updateRecordTime']}" 25 | 26 | if item['domainId'] and item['domainId'] not in domainId_list: 27 | 28 | domainId_list.append(item['domainId']) 29 | api_logger.success(success_output) 30 | else: 31 | api_logger.warning("unitName or domain is None...") 32 | else: 33 | api_logger.warning(f"No unitName_list found for {req_list}. Skipping...") 34 | return domainId_list 35 | 36 | def query_from(query_url, search_data, id): 37 | 38 | params = { 39 | 'search': search_data, 40 | 'pageNum': 1, 41 | 'pageSize': 10, 42 | } 43 | 44 | req = make_request(query_url, params, search_data) 45 | 46 | # 检查req是否为字典类型或是否包含所需的键 47 | if req and isinstance(req, dict) and 'params' in req: 48 | try: 49 | req_list = req['params']['list'] 50 | if req_list and isinstance(req_list, list) and len(req_list) > 0: 51 | params['search'] = req_list[0]['unitName'] 52 | req_unitName = make_request(query_url, params, params['search']) 53 | if req_unitName and isinstance(req_unitName, dict) and 'params' in req_unitName: 54 | total = req_unitName['params']['total'] 55 | domain_list = Page_traversal_temporary(id, total, params, query_url, req_list) 56 | 57 | if domain_list and isinstance(domain_list, list) and total != len(domain_list): 58 | error_icp_output = f"{search_data} 应提取出 {total} 条信息,实际为 {len(domain_list)} 条" 59 | # api_logger.error(error_icp_output) 60 | api_logger.write_log_error('log/error_icp.log', error_icp_output) 61 | return total 62 | 63 | except Exception as e: 64 | error_occurred_output = f"{search_data} an error occurred: {str(e)}" 65 | # api_logger.error(error_occurred_output) 66 | api_logger.write_log_error('log/error_occurred.log', error_occurred_output, search_data) 67 | 68 | no_req_list_output = f"Does not have req_list {search_data}" 69 | # api_logger.error(no_req_list_output) 70 | api_logger.write_log_error('log/no_req_list.log', no_req_list_output, search_data) 71 | 72 | # 根据您的需求,如果if条件不满足,最多重新运行10次 73 | return None 74 | 75 | def query_from_file(query_url, filename, start_index): 76 | with open(filename, 'r', encoding='utf-8') as file: 77 | data_list = file.readlines() 78 | total_domains = len(data_list) 79 | if start_index < 1: 80 | start_index = 1 81 | api_logger.warning("输入异常, start_index 重置为 1") 82 | elif start_index > total_domains: 83 | start_index = total_domains 84 | api_logger.warning(f"输入异常, start_index 重置为 {total_domains}") 85 | 86 | for index in range(start_index-1, total_domains): 87 | data = data_list[index].strip() 88 | 89 | if data: 90 | current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S:%f') 91 | Processing_Domain_output = f'Time: {current_time}, Schedule: {index+1}/{total_domains}, Domain: {data}' 92 | print("\n") 93 | api_logger.warning(f"Processing {Processing_Domain_output}") 94 | print("\n") 95 | total = query_from(query_url, data, index+1) 96 | if total is not None: 97 | Processing_Domain_output += f', Total: {total}' 98 | api_logger.success(Processing_Domain_output, 'processing_Domain.log') 99 | 100 | if __name__ == '__main__': 101 | logo() 102 | parser = ArgumentParser() 103 | parser.add_argument("-d", dest="query_url", help="请输入测试平台地址") 104 | parser.add_argument("-u", dest="domain", help="请输入目标") 105 | parser.add_argument("-uf", dest="domains_file", help="请输入目标文件") 106 | parser.add_argument("-s", dest="start_index", type=int, default="1", help="请输入起始位置,第一个数据的下标为0") 107 | args = parser.parse_args() 108 | 109 | # Load YAML configuration 110 | try: 111 | with open("config/config.yaml", "r", encoding="utf-8") as f: 112 | push_config = yaml.safe_load(f) 113 | except Exception as e: 114 | api_logger.error(f"Failed to load config file: {str(e)}") 115 | exit(1) 116 | 117 | # 确定查询URL和目标文件 118 | query_url = args.query_url or push_config.get('query_url') 119 | if not query_url: 120 | api_logger.error("No query URL provided. Please specify using -d or in config.yaml") 121 | exit(1) 122 | 123 | # 处理单个域名查询 124 | if args.domain: 125 | api_logger.info(f"Starting single domain query for: {args.domain}") 126 | query_from(query_url, args.domain, args.start_index) 127 | 128 | # 处理文件批量查询 129 | else: 130 | domains_file = args.domains_file or push_config.get('domains_file') 131 | if not domains_file: 132 | api_logger.error("No domains file specified. Please provide using -uf or in config.yaml") 133 | exit(1) 134 | 135 | api_logger.info(f"Query URL: {query_url}") 136 | api_logger.info(f"Domains file: {domains_file}") 137 | 138 | try: 139 | query_from_file(query_url, domains_file, args.start_index) 140 | except FileNotFoundError: 141 | api_logger.error(f"Domains file not found: {domains_file}") 142 | exit(1) 143 | except Exception as e: 144 | api_logger.error(f"Error during file processing: {str(e)}") 145 | exit(1) -------------------------------------------------------------------------------- /1.icp_query_result_processor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author : S1g0day 3 | Creat time : 2024/3/15 17:27 4 | Modification time: 2024/8/8 14:00 5 | Introduce : 处理0.icq_query.py 的结果 success.log,导出为xlsx 6 | ''' 7 | 8 | import sys 9 | import openpyxl 10 | 11 | # 创建一个新的Excel工作簿 12 | workbook = openpyxl.Workbook() 13 | # 获取默认的活动工作表 14 | worksheet = workbook.active 15 | 16 | with open(sys.argv[1], 'r', encoding='utf-8') as fileread: 17 | fileread = fileread.readlines() 18 | for i in fileread: 19 | if i != "\n": 20 | task_parts = i.strip().split('\t') 21 | # 创建空字典 22 | task_dict = {} 23 | 24 | # 遍历任务信息部分 25 | for part in task_parts: 26 | # 分割键值对 27 | key, value = part.split(':', 1) 28 | # 添加到字典 29 | task_dict[key] = value 30 | 31 | # 输出到Excel行 32 | worksheet.append([task_dict['domainId'], task_dict['unitName'], task_dict['natureName'], task_dict['domain'], task_dict['mainLicence'], task_dict['serviceLicence'], task_dict['updateRecordTime']]) 33 | 34 | # 保存Excel文件 35 | workbook.save('output.xlsx') 36 | -------------------------------------------------------------------------------- /2.quchong.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import sys 4 | import time 5 | import idna 6 | import socket 7 | 8 | # 中文域名转ASCII 9 | def convert_to_ascii(domain): 10 | try: 11 | ascii_domain = idna.encode(domain).decode('ascii') 12 | return ascii_domain 13 | except Exception as e: 14 | print("转换失败:", e) 15 | return None 16 | 17 | def is_chinese_domain(domain): 18 | for char in domain: 19 | if ord(char) > 127: # 如果字符的 ASCII 编码大于 127,则说明是非 ASCII 字符,可能是中文字符 20 | return True 21 | return False 22 | 23 | # url ip 排序 24 | def ip_url_output(ips_list, IP_vaild, domain_vaild): 25 | for i in ips_list: 26 | for j in IP_vaild: 27 | ipList = re.findall(r'[0-9]+(?:\.[0-9]+){3}', j) 28 | if i == ipList[0]: 29 | if j and j not in domain_vaild: 30 | domain_vaild.append(j) 31 | return domain_vaild 32 | 33 | # IP排序 34 | def IP_sort(ip_addresses): 35 | 36 | # 使用set()函数进行去重 37 | unique_ips = set(ip_addresses) 38 | 39 | # 使用socket库中的inet_aton函数将IP地址转换为32位二进制数,然后再将其转换为整数 40 | ip_integers = [socket.inet_aton(ip) for ip in unique_ips] 41 | ip_integers.sort() 42 | 43 | # 使用socket库中的inet_ntoa函数将整数转换回IP地址格式 44 | sorted_ips = [socket.inet_ntoa(ip) for ip in ip_integers] 45 | 46 | # print(sorted_ips) 47 | return sorted_ips 48 | 49 | # 提取IP段 50 | def convert_to_c_segment(ip): 51 | # 通过正则表达式提取IP地址的前三个段(C段) 52 | c_segment = re.match(r'(\d{1,3}\.\d{1,3}\.\d{1,3})\.\d{1,3}', ip) 53 | if c_segment: 54 | return c_segment.group(1) + ".0/24" # 添加子网掩码为24的CIDR表示 55 | else: 56 | return None 57 | 58 | def get_unique_c_segments(ip_list): 59 | c_segments = set() 60 | for ip in ip_list: 61 | c_segment = convert_to_c_segment(ip) 62 | if c_segment: 63 | c_segments.add(c_segment) 64 | return sorted(list(c_segments)) # 对唯一的C段进行排序 65 | 66 | # 提取纯粹的IP地址 67 | def get_ip(IP_vaild): 68 | ips = set() # 使用集合来存储已经添加数据 69 | for i in IP_vaild: 70 | ipList = re.findall(r'[0-9]+(?:\.[0-9]+){3}', i) 71 | ips.add(ipList[0]) # 使用集合来存储已经添加 72 | sorted_ips = IP_sort(list(ips)) # 将集合转换为列表并排序返回 73 | return sorted_ips 74 | 75 | def extract_ips(text): 76 | # 定义IP地址的正则表达式模式 77 | ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b' 78 | 79 | # 使用findall函数找到所有匹配的IP地址 80 | ips = re.findall(ip_pattern, text) 81 | 82 | return ips 83 | # 区分IP和域名 84 | def domain_or_ip(urls): 85 | IP_vaild = [] 86 | domain_vaild = [] 87 | ascii_domain = [] 88 | res = r'((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)' 89 | for i in range(len(urls)): 90 | if re.search(res, urls[i]): 91 | ips = extract_ips(urls[i]) 92 | IP_vaild.append(ips[0]) 93 | else: 94 | if is_chinese_domain(urls[i]): 95 | domain_ascii = convert_to_ascii(urls[i]) # 转换为 ASCII 格式 96 | if domain_ascii: 97 | # print(f"Domain:{urls[i]}, domain_ascii:{domain_ascii}") 98 | ascii_domain.append(domain_ascii) 99 | else: 100 | domain_vaild.append(urls[i]) 101 | return IP_vaild, domain_vaild, ascii_domain 102 | 103 | # url去重 104 | def url_quchong(file_name): 105 | urls = [] 106 | texts = [] 107 | with open(file_name, 'r', encoding='utf-8') as files: 108 | filelist = files.readlines() 109 | for i in filelist: 110 | i = i.strip() 111 | if i: 112 | if "//" in i.split()[0]: 113 | url = i.split()[0].split("//")[1] 114 | if url and url not in urls: 115 | urls.append(url) 116 | texts.append(i.split()[0]) 117 | else: 118 | url = i.split()[0] 119 | if url and url not in urls: 120 | urls.append(url) 121 | texts.append(i.split()[0]) 122 | return urls, texts 123 | 124 | def save(output, data): 125 | savedata = [] 126 | with open(output, 'w', encoding='utf-8') as fs: 127 | for i in data: 128 | if i and i not in savedata: 129 | fs.write(i + '\n') 130 | 131 | def main(files): 132 | # 去重 133 | urls, texts = url_quchong(files) 134 | 135 | # 提取域名和IP 136 | IP_vaild, domain_vaild, ascii_domain = domain_or_ip(texts) 137 | # 提取纯粹IP, 并排序 138 | ips = get_ip(IP_vaild) 139 | 140 | # IP 排序 141 | ips_list = sorted(ips, key=socket.inet_aton) 142 | url_set = ip_url_output(ips_list, IP_vaild, domain_vaild) 143 | 144 | # 生成文件名 145 | str(time.time()).split(".")[0] 146 | filename = ''.join(files.split('/')[-1].split('.')[:-1]) 147 | timenow = str(time.time()).split(".")[0] 148 | outfilename = f'{filename}_{timenow}' 149 | 150 | # 获取脚本所在目录 151 | script_dir = os.path.dirname(__file__) 152 | 153 | # 创建日志目录 154 | log_dir = os.path.join(script_dir, 'log') 155 | os.makedirs(log_dir, exist_ok=True) 156 | 157 | # 保存结果 158 | ips_output = 'log/' + outfilename + '_IP.txt' 159 | save(ips_output, ips) 160 | print('IP已保存到: ', ips_output) 161 | 162 | IP_segment_output = 'log/' + outfilename + '_IP_SEGMENT.txt' 163 | save(IP_segment_output, get_unique_c_segments(ips_list)) 164 | print('IP段已保存到: ', IP_segment_output) 165 | 166 | ascii_domain_output = 'log/' + outfilename + '_ASCIIDOMAIN.txt' 167 | save(ascii_domain_output, ascii_domain) 168 | print('Ascii已保存到: ', ascii_domain_output) 169 | 170 | all_output = 'log/' + outfilename + '_ALL.txt' 171 | save(all_output, url_set) 172 | print('去重结果已保存到: ', all_output) 173 | 174 | 175 | if __name__ == '__main__': 176 | 177 | main(sys.argv[1]) 178 | -------------------------------------------------------------------------------- /2.url_checker.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author : S1g0day 3 | Creat time : 2024/3/15 4 | Introduce : 用于检测query_url的可用性 5 | ''' 6 | import os 7 | import yaml 8 | import random 9 | import time 10 | from urllib.parse import urlparse 11 | from lib.log_functions import api_logger 12 | from lib.Requests_func import req_get 13 | 14 | def export_available_url(url): 15 | """ 16 | 导出可用URL到文件 17 | :param url: 要导出的URL 18 | """ 19 | filename = 'log/available_urls.log' 20 | try: 21 | # 检查文件是否已存在并包含该URL 22 | if os.path.exists(filename): 23 | with open(filename, 'r') as f: 24 | if url in f.read().splitlines(): 25 | api_logger.info(f"URL already exists in {filename}: {url}") 26 | return 27 | 28 | # 如果URL不存在,则追加写入 29 | with open(filename, 'a') as f: 30 | f.write(f"{url}\n") 31 | api_logger.success(f"Exported URL to {filename}: {url}") 32 | except Exception as e: 33 | api_logger.error(f"Failed to export URL {url}: {str(e)}") 34 | 35 | def check_url_availability(url): 36 | """ 37 | 检测URL的可用性 38 | :param url: 要检测的URL 39 | :return: 如果URL可用返回True,否则返回False 40 | """ 41 | max_retries = 3 42 | retries = 0 43 | 44 | try: 45 | # 验证URL格式 46 | parsed_url = urlparse(url) 47 | if not all([parsed_url.scheme, parsed_url.netloc]): 48 | api_logger.error(f"Invalid URL format: {url}") 49 | return False 50 | 51 | while retries < max_retries: 52 | try: 53 | # 构造测试请求URL 54 | test_url = f"{url.rstrip('/')}/query/web?search=baidu.com" 55 | 56 | # 发送请求 57 | response = req_get(test_url, params=None) 58 | 59 | # 检查响应 60 | if response and response.status_code == 200: 61 | try: 62 | json_data = response.json() 63 | if isinstance(json_data, dict) and 'params' in json_data: 64 | api_logger.success(f"API is working properly: {url}") 65 | export_available_url(url) 66 | return True 67 | except ValueError: 68 | api_logger.warning(f"Invalid JSON response from {url}") 69 | else: 70 | api_logger.warning(f"API returned status code {response.status_code if response else 'No response'}: {url}") 71 | 72 | retries += 1 73 | time.sleep(random.randint(1, 3)) 74 | 75 | except Exception as e: 76 | api_logger.error(f"Failed to connect to {url}: {str(e)}") 77 | retries += 1 78 | time.sleep(random.randint(1, 3)) 79 | 80 | api_logger.error(f"Max retries ({max_retries}) exceeded for URL: {url}") 81 | return False 82 | 83 | except Exception as e: 84 | api_logger.error(f"Unexpected error checking URL {url}: {str(e)}") 85 | return False 86 | 87 | def validate_query_url(query_url): 88 | """ 89 | 验证query_url的可用性 90 | :param query_url: 要验证的URL 91 | :return: 如果URL可用返回True,否则返回False 92 | """ 93 | if not query_url: 94 | api_logger.error("No query URL provided") 95 | return False 96 | return check_url_availability(query_url) 97 | 98 | if __name__ == "__main__": 99 | with open('config/config.yaml', 'r', encoding='utf-8') as file: 100 | config = yaml.safe_load(file) 101 | for url in config['query_url']: 102 | if not validate_query_url(url): 103 | api_logger.error(f"Invalid query URL: {url}") 104 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | version: "v0.0.7" 2 | 3 | # 测试平台地址 4 | query_url: 5 | # - http://192.168.1.1:16181 6 | # - http://192.168.1.2:16181 7 | # - http://192.168.1.3:16181 8 | 9 | # 目标文件 10 | domains_file: "config/domain.txt" 11 | -------------------------------------------------------------------------------- /config/domain.txt: -------------------------------------------------------------------------------- 1 | 北京百度网讯科技有限公司 2 | 浙江淘宝网络有限公司 -------------------------------------------------------------------------------- /icpApi/icpApi_socks_v1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2024/8/12 14:58 5 | Remark : 指定socks文件 6 | ''' 7 | 8 | from functools import wraps 9 | from aiohttp import web 10 | import json 11 | from ymicp_socks import beian 12 | 13 | # 跨域参数 14 | corscode = { 15 | 'Access-Control-Allow-Origin': '*', 16 | 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', # 需要限制请求就在这里增删 17 | 'Access-Control-Allow-Headers': '*', 18 | 'Server':'Welcome to api.wer.plus', 19 | } 20 | 21 | # 实例化路由 22 | routes = web.RouteTableDef() 23 | 24 | # 异步json序列化 25 | def jsondump(func): 26 | @wraps(func) 27 | async def wrapper(*args,**kwargs): 28 | result = await func(*args,**kwargs) 29 | try: 30 | return json.dumps(result ,ensure_ascii=False) 31 | except: 32 | return result 33 | return wrapper 34 | 35 | # 封装一下web.json_resp 36 | wj = lambda *args,**kwargs: web.json_response(*args,**kwargs) 37 | 38 | # 处理OPTIONS和跨域的中间件 39 | @jsondump 40 | async def options_middleware(app, handler): 41 | async def middleware(request): 42 | # 处理 OPTIONS 请求,直接返回空数据和允许跨域的 header 43 | if request.method == 'OPTIONS': 44 | return wj(headers=corscode) 45 | 46 | # 继续处理其他请求,同时处理异常响应,返回正常json值或自定义页面 47 | try: 48 | response = await handler(request) 49 | response.headers.update(corscode) 50 | if response.status == 200: 51 | return response 52 | except web.HTTPException as ex: 53 | if ex.status == 404: 54 | return wj({'code': ex.status,"msg":"查询请访问http://0.0.0.0:16181/query/{name}"},headers=corscode) 55 | return wj({'code': ex.status,"msg":ex.reason},headers=corscode) 56 | 57 | return response 58 | return middleware 59 | 60 | @jsondump 61 | @routes.view(r'/query/{path}') 62 | async def geturl(request): 63 | path = request.match_info['path'] 64 | 65 | if path not in appth and path not in bappth: 66 | return wj({"code":102,"msg":"不是支持的查询类型"}) 67 | 68 | if request.method == "GET": 69 | appname = request.query.get("search") 70 | pageNum = request.query.get("pageNum") 71 | pageSize = request.query.get("pageSize") 72 | if request.method == "POST": 73 | data = await request.json() 74 | appname = data.get("search") 75 | pageNum = data.get("pageNum") 76 | pageSize = data.get("pageSize") 77 | 78 | if not appname: 79 | return wj({"code":101,"msg":"参数错误,请指定search参数"}) 80 | 81 | if path in appth: 82 | return wj(await appth.get(path)( 83 | appname, 84 | pageNum if str(pageNum) else '', 85 | pageSize if str(pageSize) else '' 86 | )) 87 | else: 88 | return wj(await bappth.get(path)(appname)) 89 | 90 | if __name__ == '__main__': 91 | 92 | myicp = beian() 93 | appth = { 94 | "web": myicp.ymWeb, # 网站 95 | "app": myicp.ymApp, # APP 96 | "mapp": myicp.ymMiniApp, # 小程序 97 | "kapp": myicp.ymKuaiApp, # 快应用 98 | } 99 | 100 | # 违法违规应用不支持翻页 101 | bappth = { 102 | "bweb": myicp.bymWeb, # 违法违规网站 103 | "bapp": myicp.bymApp, # 违法违规APP 104 | "bmapp": myicp.bymMiniApp, # 违法违规小程序 105 | "bkapp": myicp.bymKuaiApp # 违法违规快应用 106 | } 107 | app = web.Application() 108 | app.add_routes(routes) 109 | 110 | app.middlewares.append(options_middleware) 111 | print(''' 112 | 113 | Welcome to the Yiming API : https://api.wer.plus 114 | Github : https://github.com/HG-ha 115 | Document : https://github.com/HG-ha/ICP_Query 116 | 117 | ''') 118 | web.run_app( 119 | app, 120 | host = "0.0.0.0", 121 | port = 16183 122 | ) -------------------------------------------------------------------------------- /icpApi/icpApi_socks_v2.1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2025/2/8 14:30 5 | Remark : 指定socks文件,无认证,配置日志系统 6 | ''' 7 | 8 | from functools import wraps 9 | from aiohttp import web 10 | import json 11 | from ymicp_socks import beian 12 | from logger import api_logger 13 | from ip_analyzer import ip_analyzer 14 | 15 | # 跨域参数 16 | corscode = { 17 | 'Access-Control-Allow-Origin': '*', 18 | 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', # 需要限制请求就在这里增删 19 | 'Access-Control-Allow-Headers': '*', 20 | 'Server':'Welcome to api.wer.plus', 21 | } 22 | 23 | # 实例化路由 24 | routes = web.RouteTableDef() 25 | 26 | # 异步json序列化 27 | def jsondump(func): 28 | @wraps(func) 29 | async def wrapper(*args,**kwargs): 30 | result = await func(*args,**kwargs) 31 | try: 32 | return json.dumps(result ,ensure_ascii=False) 33 | except: 34 | return result 35 | return wrapper 36 | 37 | # 封装一下web.json_resp 38 | wj = lambda *args,**kwargs: web.json_response(*args,**kwargs) 39 | 40 | # 处理OPTIONS和跨域的中间件 41 | @jsondump 42 | async def options_middleware(app, handler): 43 | async def middleware(request): 44 | # 记录IP访问 45 | ip_analyzer.record_ip(request.remote) 46 | 47 | # 处理 OPTIONS 请求 48 | if request.method == 'OPTIONS': 49 | api_logger.info(f"OPTIONS请求: {request.remote} - {request.path}") 50 | return wj(headers=corscode) 51 | 52 | try: 53 | response = await handler(request) 54 | response.headers.update(corscode) 55 | if response.status == 200: 56 | api_logger.log_request(request, 200, "请求成功") 57 | return response 58 | except web.HTTPException as ex: 59 | api_logger.error(f"请求异常: {request.remote} - {request.path} - {ex.status} - {ex.reason}") 60 | if ex.status == 404: 61 | return wj({'code': ex.status,"msg":"查询请访问http://0.0.0.0:16181/query/{name}"},headers=corscode) 62 | return wj({'code': ex.status,"msg":ex.reason},headers=corscode) 63 | 64 | return response 65 | return middleware 66 | 67 | # 添加新的路由处理IP统计 68 | @routes.get('/ip_stats') 69 | @jsondump 70 | async def get_ip_stats(request): 71 | """获取IP访问统计信息""" 72 | try: 73 | # 获取查询参数 74 | top_n = request.query.get('top', None) 75 | if top_n: 76 | try: 77 | top_n = int(top_n) 78 | except ValueError: 79 | return wj({'code': 400, 'msg': 'Invalid top parameter'}, status=400, headers=corscode) 80 | 81 | # 获取统计信息 82 | stats = ip_analyzer.get_formatted_stats(top_n=top_n) 83 | return wj({ 84 | 'code': 200, 85 | 'msg': 'success', 86 | 'data': stats 87 | }, headers=corscode) 88 | except Exception as e: 89 | api_logger.error(f"获取IP统计失败: {str(e)}") 90 | return wj({'code': 500, 'msg': 'Internal server error'}, status=500, headers=corscode) 91 | 92 | @routes.get('/ip_stats/clear') 93 | @jsondump 94 | async def clear_ip_stats(request): 95 | """清除IP统计数据""" 96 | try: 97 | if ip_analyzer.clear_stats(): 98 | return wj({ 99 | 'code': 200, 100 | 'msg': 'IP statistics cleared successfully' 101 | }, headers=corscode) 102 | else: 103 | return wj({ 104 | 'code': 500, 105 | 'msg': 'Failed to clear IP statistics' 106 | }, status=500, headers=corscode) 107 | except Exception as e: 108 | api_logger.error(f"清除IP统计失败: {str(e)}") 109 | return wj({'code': 500, 'msg': 'Internal server error'}, status=500, headers=corscode) 110 | 111 | @jsondump 112 | @routes.view(r'/query/{path}') 113 | async def geturl(request): 114 | path = request.match_info['path'] 115 | 116 | if path not in appth and path not in bappth: 117 | return wj({"code":102,"msg":"不是支持的查询类型"}) 118 | 119 | if request.method == "GET": 120 | appname = request.query.get("search") 121 | pageNum = request.query.get("pageNum") 122 | pageSize = request.query.get("pageSize") 123 | if request.method == "POST": 124 | data = await request.json() 125 | appname = data.get("search") 126 | pageNum = data.get("pageNum") 127 | pageSize = data.get("pageSize") 128 | 129 | if not appname: 130 | return wj({"code":101,"msg":"参数错误,请指定search参数"}) 131 | 132 | if path in appth: 133 | return wj(await appth.get(path)( 134 | appname, 135 | pageNum if str(pageNum) else '', 136 | pageSize if str(pageSize) else '' 137 | )) 138 | else: 139 | return wj(await bappth.get(path)(appname)) 140 | 141 | if __name__ == '__main__': 142 | 143 | myicp = beian() 144 | appth = { 145 | "web": myicp.ymWeb, # 网站 146 | "app": myicp.ymApp, # APP 147 | "mapp": myicp.ymMiniApp, # 小程序 148 | "kapp": myicp.ymKuaiApp, # 快应用 149 | } 150 | 151 | # 违法违规应用不支持翻页 152 | bappth = { 153 | "bweb": myicp.bymWeb, # 违法违规网站 154 | "bapp": myicp.bymApp, # 违法违规APP 155 | "bmapp": myicp.bymMiniApp, # 违法违规小程序 156 | "bkapp": myicp.bymKuaiApp # 违法违规快应用 157 | } 158 | app = web.Application() 159 | app.add_routes(routes) 160 | 161 | app.middlewares.append(options_middleware) 162 | print(''' 163 | 164 | Welcome to the Yiming API : https://api.wer.plus 165 | Github : https://github.com/HG-ha 166 | Document : https://github.com/HG-ha/ICP_Query 167 | 168 | ''') 169 | web.run_app( 170 | app, 171 | host = "0.0.0.0", 172 | port = 16183 173 | ) -------------------------------------------------------------------------------- /icpApi/icpApi_socks_v2.2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2025/2/8 9:30 5 | Remark : 指定socks文件,有认证,配置日志系统 6 | ''' 7 | 8 | from functools import wraps 9 | from aiohttp import web 10 | import json 11 | from ymicp_socks import beian 12 | from logger import api_logger 13 | from ip_analyzer import ip_analyzer 14 | 15 | # 跨域参数 16 | corscode = { 17 | 'Access-Control-Allow-Origin': '*', 18 | 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', # 需要限制请求就在这里增删 19 | 'Access-Control-Allow-Headers': '*', 20 | 'Server':'Welcome to api.wer.plus', 21 | } 22 | 23 | # 实例化路由 24 | routes = web.RouteTableDef() 25 | 26 | # 异步json序列化 27 | def jsondump(func): 28 | @wraps(func) 29 | async def wrapper(*args,**kwargs): 30 | result = await func(*args,**kwargs) 31 | try: 32 | return json.dumps(result ,ensure_ascii=False) 33 | except: 34 | return result 35 | return wrapper 36 | 37 | # 封装一下web.json_resp 38 | wj = lambda *args,**kwargs: web.json_response(*args,**kwargs) 39 | 40 | # 处理OPTIONS和跨域的中间件 41 | @jsondump 42 | async def options_middleware(app, handler): 43 | async def middleware(request): 44 | # 记录IP访问 45 | ip_analyzer.record_ip(request.remote) 46 | 47 | # 验证header 48 | auth_header = request.headers.get('Authorization') 49 | if not auth_header or auth_header != 'Bearer your-secret-token': 50 | api_logger.warning(f"未授权访问: {request.remote} - {request.path}") 51 | return wj({'code': 404, "msg": "Unauthorized"}, status=404, headers=corscode) 52 | 53 | # 处理 OPTIONS 请求 54 | if request.method == 'OPTIONS': 55 | api_logger.info(f"OPTIONS请求: {request.remote} - {request.path}") 56 | return wj(headers=corscode) 57 | 58 | try: 59 | response = await handler(request) 60 | response.headers.update(corscode) 61 | if response.status == 200: 62 | api_logger.log_request(request, 200, "请求成功") 63 | return response 64 | except web.HTTPException as ex: 65 | api_logger.error(f"请求异常: {request.remote} - {request.path} - {ex.status} - {ex.reason}") 66 | if ex.status == 404: 67 | return wj({'code': ex.status,"msg":"查询请访问http://0.0.0.0:16181/query/{name}"},headers=corscode) 68 | return wj({'code': ex.status,"msg":ex.reason},headers=corscode) 69 | 70 | return response 71 | return middleware 72 | 73 | # 添加新的路由处理IP统计 74 | @routes.get('/ip_stats') 75 | @jsondump 76 | async def get_ip_stats(request): 77 | """获取IP访问统计信息""" 78 | try: 79 | # 获取查询参数 80 | top_n = request.query.get('top', None) 81 | if top_n: 82 | try: 83 | top_n = int(top_n) 84 | except ValueError: 85 | return wj({'code': 400, 'msg': 'Invalid top parameter'}, status=400, headers=corscode) 86 | 87 | # 获取统计信息 88 | stats = ip_analyzer.get_formatted_stats(top_n=top_n) 89 | return wj({ 90 | 'code': 200, 91 | 'msg': 'success', 92 | 'data': stats 93 | }, headers=corscode) 94 | except Exception as e: 95 | api_logger.error(f"获取IP统计失败: {str(e)}") 96 | return wj({'code': 500, 'msg': 'Internal server error'}, status=500, headers=corscode) 97 | 98 | @routes.get('/ip_stats/clear') 99 | @jsondump 100 | async def clear_ip_stats(request): 101 | """清除IP统计数据""" 102 | try: 103 | if ip_analyzer.clear_stats(): 104 | return wj({ 105 | 'code': 200, 106 | 'msg': 'IP statistics cleared successfully' 107 | }, headers=corscode) 108 | else: 109 | return wj({ 110 | 'code': 500, 111 | 'msg': 'Failed to clear IP statistics' 112 | }, status=500, headers=corscode) 113 | except Exception as e: 114 | api_logger.error(f"清除IP统计失败: {str(e)}") 115 | return wj({'code': 500, 'msg': 'Internal server error'}, status=500, headers=corscode) 116 | 117 | @jsondump 118 | @routes.view(r'/query/{path}') 119 | async def geturl(request): 120 | path = request.match_info['path'] 121 | 122 | if path not in appth and path not in bappth: 123 | return wj({"code":102,"msg":"不是支持的查询类型"}) 124 | 125 | if request.method == "GET": 126 | appname = request.query.get("search") 127 | pageNum = request.query.get("pageNum") 128 | pageSize = request.query.get("pageSize") 129 | if request.method == "POST": 130 | data = await request.json() 131 | appname = data.get("search") 132 | pageNum = data.get("pageNum") 133 | pageSize = data.get("pageSize") 134 | 135 | if not appname: 136 | return wj({"code":101,"msg":"参数错误,请指定search参数"}) 137 | 138 | if path in appth: 139 | return wj(await appth.get(path)( 140 | appname, 141 | pageNum if str(pageNum) else '', 142 | pageSize if str(pageSize) else '' 143 | )) 144 | else: 145 | return wj(await bappth.get(path)(appname)) 146 | 147 | if __name__ == '__main__': 148 | 149 | myicp = beian() 150 | appth = { 151 | "web": myicp.ymWeb, # 网站 152 | "app": myicp.ymApp, # APP 153 | "mapp": myicp.ymMiniApp, # 小程序 154 | "kapp": myicp.ymKuaiApp, # 快应用 155 | } 156 | 157 | # 违法违规应用不支持翻页 158 | bappth = { 159 | "bweb": myicp.bymWeb, # 违法违规网站 160 | "bapp": myicp.bymApp, # 违法违规APP 161 | "bmapp": myicp.bymMiniApp, # 违法违规小程序 162 | "bkapp": myicp.bymKuaiApp # 违法违规快应用 163 | } 164 | app = web.Application() 165 | app.add_routes(routes) 166 | 167 | app.middlewares.append(options_middleware) 168 | print(''' 169 | 170 | Welcome to the Yiming API : https://api.wer.plus 171 | Github : https://github.com/HG-ha 172 | Document : https://github.com/HG-ha/ICP_Query 173 | 174 | ''') 175 | web.run_app( 176 | app, 177 | host = "0.0.0.0", 178 | port = 16183 179 | ) -------------------------------------------------------------------------------- /icpApi/ip_analyzer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2025/2/8 9:30 5 | Remark : Powered by cursor AI 6 | ''' 7 | 8 | import os 9 | import json 10 | from datetime import datetime 11 | from collections import defaultdict 12 | 13 | 14 | 15 | 16 | class IPAnalyzer: 17 | def __init__(self, log_dir='logs'): 18 | self.log_dir = log_dir 19 | self.current_date = datetime.now().strftime('%Y-%m-%d') 20 | self.stats_file = os.path.join(log_dir, f'ip_stats_{self.current_date}.json') 21 | self.ip_counts = defaultdict(int) 22 | self._ensure_stats_file() 23 | self.load_stats() 24 | 25 | def _ensure_log_directory(self): 26 | """确保日志目录存在""" 27 | try: 28 | if not os.path.exists(self.log_dir): 29 | os.makedirs(self.log_dir, exist_ok=True) 30 | print(f"Created log directory: {self.log_dir}") 31 | return True 32 | except Exception as e: 33 | print(f"Error creating log directory: {str(e)}") 34 | return False 35 | 36 | def _ensure_stats_file(self): 37 | """确保统计文件存在且可写""" 38 | try: 39 | # 确保目录存在 40 | self._ensure_log_directory() 41 | 42 | # 检查文件是否存在 43 | if not os.path.exists(self.stats_file): 44 | # 创建新文件 45 | with open(self.stats_file, 'w', encoding='utf-8') as f: 46 | json.dump({}, f) 47 | print(f"Created stats file: {self.stats_file}") 48 | return True 49 | except Exception as e: 50 | print(f"Error handling stats file: {str(e)}") 51 | return False 52 | 53 | def _check_date(self): 54 | """检查是否需要更新统计文件(新的一天)""" 55 | current_date = datetime.now().strftime('%Y-%m-%d') 56 | if current_date != self.current_date: 57 | # 保存当前统计数据 58 | self.save_stats() 59 | 60 | # 更新日期和文件路径 61 | self.current_date = current_date 62 | self.stats_file = os.path.join(self.log_dir, f'ip_stats_{self.current_date}.json') 63 | 64 | # 重置统计数据 65 | self.ip_counts.clear() 66 | 67 | # 确保新的统计文件存在 68 | self._ensure_stats_file() 69 | return True 70 | return False 71 | 72 | def load_stats(self): 73 | """加载统计数据""" 74 | try: 75 | if os.path.exists(self.stats_file): 76 | with open(self.stats_file, 'r', encoding='utf-8') as f: 77 | data = json.load(f) 78 | self.ip_counts = defaultdict(int, data) 79 | except Exception as e: 80 | print(f"Error loading IP stats: {str(e)}") 81 | self.ip_counts = defaultdict(int) 82 | 83 | def save_stats(self): 84 | """保存统计数据""" 85 | try: 86 | self._ensure_stats_file() 87 | with open(self.stats_file, 'w', encoding='utf-8') as f: 88 | json.dump(dict(self.ip_counts), f, indent=2, ensure_ascii=False) 89 | except Exception as e: 90 | print(f"Error saving IP stats: {str(e)}") 91 | 92 | def record_ip(self, ip): 93 | """记录IP访问""" 94 | try: 95 | # 检查日期变更 96 | self._check_date() 97 | 98 | # 确保文件存在 99 | self._ensure_stats_file() 100 | 101 | # 更新计数 102 | self.ip_counts[ip] += 1 103 | 104 | # 保存统计 105 | self.save_stats() 106 | return True 107 | except Exception as e: 108 | print(f"Error recording IP {ip}: {str(e)}") 109 | return False 110 | 111 | def get_ip_stats(self, top_n=None): 112 | """获取IP统计信息""" 113 | try: 114 | # 检查文件和加载数据 115 | self._ensure_stats_file() 116 | self.load_stats() 117 | 118 | # 排序统计数据 119 | sorted_ips = sorted(self.ip_counts.items(), key=lambda x: x[1], reverse=True) 120 | if top_n: 121 | sorted_ips = sorted_ips[:top_n] 122 | return dict(sorted_ips) 123 | except Exception as e: 124 | print(f"Error getting IP stats: {str(e)}") 125 | return {} 126 | 127 | def get_formatted_stats(self, top_n=None): 128 | """获取格式化的统计信息,返回JSON格式""" 129 | try: 130 | stats = self.get_ip_stats(top_n) 131 | if not stats: 132 | return { 133 | "date": self.current_date, 134 | "total_ips": 0, 135 | "statistics": [] 136 | } 137 | 138 | # 计算总访问次数 139 | total_visits = sum(stats.values()) 140 | 141 | # 构建JSON格式的统计数据 142 | result = { 143 | "date": self.current_date, 144 | "total_ips": len(stats), 145 | "total_visits": total_visits, 146 | "statistics": [ 147 | { 148 | "ip": ip, 149 | "count": count, 150 | "percentage": f"{round((count / total_visits) * 100, 2)}%" 151 | } 152 | for ip, count in stats.items() 153 | ] 154 | } 155 | return result 156 | 157 | except Exception as e: 158 | print(f"Error formatting IP stats: {str(e)}") 159 | return { 160 | "date": self.current_date, 161 | "total_ips": 0, 162 | "statistics": [], 163 | "error": str(e) 164 | } 165 | 166 | def clear_stats(self): 167 | """清除统计数据""" 168 | try: 169 | self.ip_counts.clear() 170 | self.save_stats() 171 | return True 172 | except Exception as e: 173 | print(f"Error clearing IP stats: {str(e)}") 174 | return False 175 | 176 | # 创建全局实例 177 | ip_analyzer = IPAnalyzer() 178 | 179 | # 使用示例 180 | if __name__ == '__main__': 181 | # 测试代码 182 | test_ips = ['192.168.1.1', '192.168.1.2', '192.168.1.1', '192.168.1.3'] 183 | 184 | print("记录测试IP...") 185 | for ip in test_ips: 186 | ip_analyzer.record_ip(ip) 187 | 188 | print("\n所有IP统计:") 189 | print(ip_analyzer.get_formatted_stats()) 190 | 191 | print("\n访问最多的2个IP:") 192 | print(ip_analyzer.get_formatted_stats(top_n=2)) -------------------------------------------------------------------------------- /icpApi/logger.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2025/2/7 17:30 5 | Remark : Powered by cursor AI 6 | ''' 7 | 8 | import logging 9 | import os 10 | from datetime import datetime 11 | from logging.handlers import RotatingFileHandler 12 | 13 | class Logger: 14 | def __init__(self, log_dir='logs'): 15 | self.log_dir = log_dir 16 | self.current_date = datetime.now().strftime('%Y-%m-%d') 17 | self.log_file = os.path.join(log_dir, f'api_{self.current_date}.log') 18 | self.logger = None 19 | self.formatter = logging.Formatter( 20 | '%(asctime)s - %(levelname)s - %(message)s', 21 | datefmt='%Y-%m-%d %H:%M:%S' 22 | ) 23 | self._setup_logger() 24 | 25 | def _ensure_log_directory(self): 26 | """确保日志目录存在""" 27 | try: 28 | if not os.path.exists(self.log_dir): 29 | os.makedirs(self.log_dir, exist_ok=True) 30 | print(f"Created log directory: {self.log_dir}") 31 | return True 32 | except Exception as e: 33 | print(f"Error creating log directory: {str(e)}") 34 | return False 35 | 36 | def _ensure_log_file(self): 37 | """确保日志文件存在且可写""" 38 | try: 39 | # 强制创建目录 40 | self._ensure_log_directory() 41 | 42 | # 尝试以追加模式打开文件,如果不存在则创建 43 | with open(self.log_file, 'a', encoding='utf-8') as f: 44 | if os.path.getsize(self.log_file) == 0: 45 | f.write(f"Log file created at {datetime.now()}\n") 46 | return True 47 | except Exception as e: 48 | print(f"Error handling log file: {str(e)}") 49 | return False 50 | 51 | def _setup_logger(self): 52 | """初始化logger""" 53 | if self.logger is None: 54 | self.logger = logging.getLogger('APILogger') 55 | self.logger.setLevel(logging.INFO) 56 | 57 | # 清除所有现有的处理器 58 | self.logger.handlers = [] 59 | 60 | # 添加控制台处理器 61 | console_handler = logging.StreamHandler() 62 | console_handler.setFormatter(self.formatter) 63 | self.logger.addHandler(console_handler) 64 | 65 | def _get_file_handler(self): 66 | """获取文件处理器""" 67 | try: 68 | # 确保文件存在 69 | if not self._ensure_log_file(): 70 | return None 71 | 72 | file_handler = RotatingFileHandler( 73 | self.log_file, 74 | maxBytes=10*1024*1024, # 10MB 75 | backupCount=10, 76 | encoding='utf-8' 77 | ) 78 | file_handler.setFormatter(self.formatter) 79 | return file_handler 80 | except Exception as e: 81 | print(f"Error creating file handler: {str(e)}") 82 | return None 83 | 84 | def _write_log(self, level, message): 85 | """写入日志的核心方法""" 86 | try: 87 | # 检查日期变更 88 | current_date = datetime.now().strftime('%Y-%m-%d') 89 | if current_date != self.current_date: 90 | self.current_date = current_date 91 | self.log_file = os.path.join(self.log_dir, f'api_{self.current_date}.log') 92 | 93 | # 重新设置logger 94 | self._setup_logger() 95 | 96 | # 尝试添加文件处理器 97 | file_handler = self._get_file_handler() 98 | if file_handler: 99 | self.logger.addHandler(file_handler) 100 | 101 | # 写入日志 102 | if level == 'INFO': 103 | self.logger.info(message) 104 | elif level == 'ERROR': 105 | self.logger.error(message) 106 | elif level == 'WARNING': 107 | self.logger.warning(message) 108 | elif level == 'DEBUG': 109 | self.logger.debug(message) 110 | 111 | # 如果有文件处理器,用完后移除 112 | if file_handler: 113 | self.logger.removeHandler(file_handler) 114 | file_handler.close() 115 | 116 | except Exception as e: 117 | print(f"Error writing log: {str(e)}") 118 | # 确保至少在控制台输出日志 119 | print(f"{datetime.now()} - {level} - {message}") 120 | 121 | def info(self, message): 122 | """记录信息级别的日志""" 123 | self._write_log('INFO', message) 124 | 125 | def error(self, message): 126 | """记录错误级别的日志""" 127 | self._write_log('ERROR', message) 128 | 129 | def warning(self, message): 130 | """记录警告级别的日志""" 131 | self._write_log('WARNING', message) 132 | 133 | def debug(self, message): 134 | """记录调试级别的日志""" 135 | self._write_log('DEBUG', message) 136 | 137 | def log_request(self, request, status_code, message=''): 138 | """记录API请求信息""" 139 | log_message = ( 140 | f"{message}: {request.remote} | " 141 | f"Method: {request.method} | " 142 | f"Path: {request.path} | " 143 | f"Status: {status_code} | " 144 | f"User-Agent: {request.headers.get('User-Agent', 'Unknown')} | " 145 | ) 146 | self._write_log('INFO', log_message) 147 | 148 | # 创建全局logger实例 149 | api_logger = Logger() 150 | 151 | # 使用示例 152 | if __name__ == '__main__': 153 | # 测试日志记录 154 | api_logger.info("测试信息日志") 155 | api_logger.error("测试错误日志") 156 | api_logger.warning("测试警告日志") 157 | api_logger.debug("测试调试日志") -------------------------------------------------------------------------------- /icpApi/readme.md: -------------------------------------------------------------------------------- 1 | ## 1、添加socks代理 2 | 3 | **安装`aiohttp_socks`**: 首先,确保你已经安装了`aiohttp_socks`库。如果没有安装,可以通过以下命令安装: 4 | 5 | ``` 6 | pip install aiohttp_socks 7 | ``` 8 | 9 | **修改`_init_session`方法**: 在`beian`类的`_init_session`方法中,你需要设置代理。这里假设你使用的是Socks5代理,并且代理的地址是`127.0.0.1`,端口是`7890`。 修改后的`_init_session`方法如下: 10 | 11 | ```` 12 | cp ymicp.py ymicp_socks_v2.py 13 | vi ymicp_socks_v2.py 14 | ```` 15 | 16 | `ymicp_socks_v2.py` 17 | 18 | ``` 19 | from aiohttp_socks import SocksConnector 20 | 21 | async def _init_session(self): 22 | self.session = aiohttp.ClientSession(connector=SocksConnector.from_url('socks5://127.0.0.1:8443')) 23 | ``` 24 | 25 | **测试代码**: 确保你的代理设置正确,并且代理服务器正在运行。你可以通过运行你的代码来测试代理是否生效。 26 | 27 | ``` 28 | root@b5158010562d:/icpApi_20240221_yolo8# python3 ymicp_socks_v2.py 29 | Loading weights into state dict... 30 | model_data/best_epoch_weights.pth model loaded. 31 | Configurations: 32 | ---------------------------------------------------------------------- 33 | | keys | values| 34 | ---------------------------------------------------------------------- 35 | | model_path | model_data/best_epoch_weights.pth| 36 | | input_shape | [32, 32]| 37 | | letterbox_image | False| 38 | | cuda | False| 39 | ---------------------------------------------------------------------- 40 | ymicp_socks_v2.py:75: DeprecationWarning: SocksConnector is deprecated. Use ProxyConnector instead. 41 | self.session = aiohttp.ClientSession(connector=SocksConnector.from_url('socks5://127.0.0.1:8443')) 42 | Loading model_data/best.onnx for ONNX Runtime inference... 43 | 44 | 0: 320x320 5 texts, 23.9ms 45 | Speed: 4.2ms preprocess, 23.9ms inference, 2.6ms postprocess per image at shape (1, 3, 320, 320) 46 | [W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware. 47 | 查询结果: 48 | {'code': 200, 'msg': '操作成功', 'params': {'endRow': 0, 'firstPage': 1, 'hasNextPage': False, 'hasPreviousPage': False, 'isFirstPage': True, 'isLastPage': True, 'lastPage': 1, 'list': [{'contentTypeName': '出版、出版、文化、文化、宗教、宗教、出版、文化、宗教、出版', 'domain': 'qq.com', 'domainId': 190000203203, 'leaderName': '', 'limitAccess': '否', 'mainId': 547280, 'mainLicence': '粤B2-20090059', 'natureName': '企业', 'serviceId': 4134047, 'serviceLicence': '粤B2-20090059-5', 'unitName': '深圳市腾讯计算机系统有限公司', 'updateRecordTime': '2022-09-06 15:51:52'}], 'navigatePages': 8, 'navigatepageNums': [1], 'nextPage': 1, 'pageNum': 1, 'pageSize': 10, 'pages': 1, 'prePage': 1, 'size': 1, 'startRow': 0, 'total': 1}, 'success': True} 49 | ``` 50 | 51 | **配置API代码** 52 | 53 | ``` 54 | cp icpApi.py icpApi-socks.py 55 | vi icpApi-socks.py 56 | ``` 57 | 58 | 修改导入包 59 | 60 | ``` 61 | from ymicp_socks_proxy import beian 62 | ``` 63 | 64 | 修改端口 65 | 66 | ``` 67 | web.run_app( 68 | app, 69 | host = "0.0.0.0", 70 | port = 16182 71 | ) 72 | ``` 73 | 74 | 我的程序运行在ymicp docker环境内,所有无所谓端口是多少,也不需要映射到宿主机 75 | 76 | ## 2、添加header认证 77 | 78 | ``` 79 | async def options_middleware(app, handler): 80 | async def middleware(request): 81 | # 验证header 82 | auth_header = request.headers.get('Authorization') 83 | if not auth_header or auth_header != 'Bearer your-secret-token': 84 | api_logger.warning(f"未授权访问: {request.remote} - {request.path}") 85 | return wj({'code': 404, "msg": "Unauthorized"}, status=404, headers=corscode) 86 | 87 | # 处理 OPTIONS 请求 88 | if request.method == 'OPTIONS': 89 | api_logger.info(f"OPTIONS请求: {request.remote} - {request.path}") 90 | return wj(headers=corscode) 91 | 92 | try: 93 | response = await handler(request) 94 | response.headers.update(corscode) 95 | if response.status == 200: 96 | api_logger.log_request(request, 200, "请求成功") 97 | return response 98 | except web.HTTPException as ex: 99 | api_logger.error(f"请求异常: {request.remote} - {request.path} - {ex.status} - {ex.reason}") 100 | if ex.status == 404: 101 | return wj({'code': ex.status,"msg":"查询请访问http://0.0.0.0:16181/query/{name}"},headers=corscode) 102 | return wj({'code': ex.status,"msg":ex.reason},headers=corscode) 103 | 104 | return response 105 | return middleware 106 | ``` 107 | 108 | ## 3、添加日志系统 109 | 110 | `logger.py` 111 | 112 | `ip_analyzer.py` 113 | -------------------------------------------------------------------------------- /icpApi/ymicp_socks_v1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2024/8/12 14:58 5 | Remark : 设置快代理 6 | ''' 7 | 8 | import asyncio 9 | import aiohttp 10 | import cv2 11 | import time 12 | import hashlib 13 | import re 14 | import base64 15 | import numpy as np 16 | import ujson 17 | import random 18 | import datetime 19 | from Crypto.Cipher import AES 20 | from Crypto.Util.Padding import pad 21 | import string 22 | import os 23 | from detnate import detnate 24 | 25 | class beian(): 26 | def __init__(self): 27 | self.typj = { 28 | 0:ujson.dumps( 29 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":1} 30 | ), # 网站 31 | 1:ujson.dumps( 32 | {"pageNum":"","pageSize":"","unitName":'',"serviceType":6} 33 | ), # APP 34 | 2:ujson.dumps( 35 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":7} 36 | ), # 小程序 37 | 3:ujson.dumps( 38 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":8} 39 | ) # 快应用 40 | } 41 | self.btypj = { 42 | 0: ujson.dumps({"domainName":""}), 43 | 1: ujson.dumps({"serviceName":"","serviceType":6}), 44 | 2: ujson.dumps({"serviceName":"","serviceType":7}), 45 | 3: ujson.dumps({"serviceName":"","serviceType":8}) 46 | } 47 | self.cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'} 48 | self.home = 'https://beian.miit.gov.cn/' 49 | self.url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth' 50 | # self.getCheckImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage' 51 | self.getCheckImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImagePoint' 52 | self.checkImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage' 53 | # 正常查询 54 | self.queryByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition' 55 | # 违法违规域名查询 56 | self.blackqueryByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/blackListDomain/queryByCondition' 57 | # 违法违规APP,小程序,快应用 58 | self.blackappAndMiniByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/blackListDomain/queryByCondition_appAndMini' 59 | self.det = detnate() 60 | self.p_uuid = '' 61 | 62 | # 设置代理池 https://www.kuaidaili.com/ 63 | self.page_url = "https://dev.kdlapi.com/testproxy" 64 | # 隧道域名:端口号 65 | self.tunnel = "xxxxx" 66 | # 用户名和密码方式 67 | self.username = "xxxxx" 68 | self.password = "xxxxx" 69 | self.proxy_auth = aiohttp.BasicAuth(self.username, self.password) 70 | 71 | def generate_random_filename(self, length=8, extension=None): 72 | if not os.path.exists('temp'): 73 | os.makedirs('temp') 74 | letters = string.ascii_lowercase 75 | random_filename = "temp/" +''.join(random.choice(letters) for _ in range(length)) 76 | if extension != None: 77 | random_filename += '.' + extension 78 | return random_filename 79 | 80 | async def _init_session(self): 81 | self.session = aiohttp.ClientSession() 82 | 83 | async def _close_session(self): 84 | if self.session is not None: 85 | await self.session.close() 86 | 87 | async def get_token(self): 88 | timeStamp = round(time.time()*1000) 89 | authSecret = 'testtest' + str(timeStamp) 90 | authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest() 91 | self.auth_data = {'authKey': authKey, 'timeStamp': timeStamp} 92 | self.cookie = await self.get_cookie() 93 | self.base_header = { 94 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32', 95 | 'Origin': 'https://beian.miit.gov.cn', 96 | 'Referer': 'https://beian.miit.gov.cn/', 97 | 'Cookie': f'__jsluid_s={self.cookie}', 98 | 'Accept': 'application/json, text/plain, */*' 99 | } 100 | try: 101 | async with self.session.post(self.url,data=self.auth_data,headers=self.base_header,proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as req: 102 | req = await req.text() 103 | t = ujson.loads(req) 104 | return t['params']['bussiness'] 105 | except Exception as e: 106 | return e 107 | 108 | async def get_cookie(self): 109 | async with self.session.get(self.home,headers=self.cookie_headers,proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as req: 110 | jsluid_s = re.compile('[0-9a-z]{32}').search(str(req.cookies))[0] 111 | return jsluid_s 112 | 113 | # 进行aes加密 114 | def get_pointJson(self,value,key): 115 | cipher = AES.new(key.encode(), AES.MODE_ECB) 116 | ciphertext = cipher.encrypt(pad(ujson.dumps(value).encode(), AES.block_size)) 117 | ciphertext_base64 = base64.b64encode(ciphertext) 118 | return ciphertext_base64.decode('utf-8') 119 | 120 | 121 | # 新增的UID加密生成算法 122 | def get_clientUid(self): 123 | characters = "0123456789abcdef" 124 | unique_id = ['0'] * 36 125 | 126 | for i in range(36): 127 | unique_id[i] = random.choice(characters) 128 | 129 | unique_id[14] = '4' 130 | unique_id[19] = characters[(3 & int(unique_id[19], 16)) | 8] 131 | unique_id[8] = unique_id[13] = unique_id[18] = unique_id[23] = "-" 132 | 133 | point_id = "point-" + ''.join(unique_id) 134 | 135 | return ujson.dumps({"clientUid":point_id}) 136 | 137 | async def check_img(self): 138 | self.token = await self.get_token() 139 | try: 140 | data = self.get_clientUid() 141 | clientUid = ujson.loads(data)["clientUid"] 142 | length = str(len(str(data).encode('utf-8'))) 143 | self.base_header.update({'Content-Length': length, 'Token': self.token}) 144 | self.base_header['Content-Type'] = 'application/json' 145 | 146 | async with self.session.post(self.getCheckImage,data=data,headers=self.base_header,proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as req: 147 | res = await req.json() 148 | self.p_uuid = res['params']['uuid'] 149 | big_image = res['params']['bigImage'] 150 | small_image = res['params']['smallImage'] 151 | self.secretKey = res['params']['secretKey'] 152 | self.wordCount = res['params']['wordCount'] 153 | selice_small = await self.small_selice(small_image,big_image) 154 | 155 | pointJson = self.get_pointJson(selice_small,self.secretKey) 156 | data = ujson.loads(ujson.dumps({"token":self.p_uuid, 157 | "secretKey":self.secretKey, 158 | "clientUid":clientUid, 159 | "pointJson":pointJson})) 160 | length = str(len(str(data).encode('utf-8'))) 161 | self.base_header.update({'Content-Length': length}) 162 | async with self.session.post(self.checkImage, 163 | json=data,headers=self.base_header,proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as req: 164 | res = await req.text() 165 | data = ujson.loads(res) 166 | if data["success"] == False: 167 | return 'verf error' 168 | else: 169 | return data["params"]["sign"] 170 | except Exception as e: 171 | return False 172 | 173 | async def small_selice(self,small_image,big_image): 174 | isma = cv2.imdecode(np.frombuffer(base64.b64decode(small_image),np.uint8), cv2.COLOR_GRAY2RGB) 175 | isma = cv2.cvtColor(isma, cv2.COLOR_BGRA2BGR) 176 | ibig = cv2.imdecode(np.frombuffer(base64.b64decode(big_image),np.uint8), cv2.COLOR_GRAY2RGB) 177 | data = self.det.check_target(ibig,isma) 178 | 179 | # 生成时间戳 180 | timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") 181 | 182 | # 创建保存图像的目录 183 | output_dir = "output" 184 | if not os.path.exists(output_dir): 185 | os.makedirs(output_dir) 186 | isma_dir = os.path.join(output_dir, "isma") 187 | if not os.path.exists(isma_dir): 188 | os.makedirs(isma_dir) 189 | ibig_dir = os.path.join(output_dir, "ibig") 190 | if not os.path.exists(ibig_dir): 191 | os.makedirs(ibig_dir) 192 | 193 | # 保存图像 194 | isma_filename = f"{timestamp}_small.jpg" 195 | isma_filepath = os.path.join(isma_dir, isma_filename) 196 | cv2.imwrite(isma_filepath, isma) # 保存带时间戳的小图像 197 | ibig_filename = f"{timestamp}_big.jpg" 198 | ibig_filepath = os.path.join(ibig_dir, ibig_filename) 199 | cv2.imwrite(ibig_filepath, ibig) # 保存带时间戳的大图像 200 | return data 201 | 202 | 203 | async def getbeian(self,name,sp,pageNum,pageSize,): 204 | info = ujson.loads(self.typj.get(sp)) 205 | info['pageNum'] = pageNum 206 | info['pageSize'] = pageSize 207 | info['unitName'] = name 208 | sign = await self.check_img() 209 | if sign == 'verf error': 210 | return {'code':201,'error':'验证码识别失败'} 211 | length = str(len(str(ujson.dumps(info,ensure_ascii=False)).encode('utf-8'))) 212 | self.base_header.update({'Content-Length': length, 'Uuid': self.p_uuid, 'Token': self.token, 'Sign': sign}) 213 | async with self.session.post(self.queryByCondition, data=ujson.dumps(info,ensure_ascii=False), headers=self.base_header,proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as req: 214 | res = await req.text() 215 | return ujson.loads(res) 216 | 217 | async def getblackbeian(self,name,sp): 218 | info = ujson.loads(self.btypj.get(sp)) 219 | if sp == 0: 220 | info['domainName'] = name 221 | else: 222 | info['serviceName'] = name 223 | sign = await self.check_img() 224 | if sign == 'verf error': 225 | return {'code':201,'error':'验证码识别失败'} 226 | length = str(len(str(ujson.dumps(info,ensure_ascii=False)).encode('utf-8'))) 227 | self.base_header.update({'Content-Length': length, 'Uuid': self.p_uuid, 'Token': self.token, 'Sign': sign}) 228 | async with self.session.post( 229 | self.blackqueryByCondition if sp == 0 else self.blackappAndMiniByCondition, 230 | data=ujson.dumps(info,ensure_ascii=False), 231 | headers=self.base_header,proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as req: 232 | res = await req.text() 233 | return ujson.loads(res) 234 | 235 | async def fetch(self): 236 | async with self.session.get(self.page_url, proxy="http://"+self.tunnel, proxy_auth=self.proxy_auth) as response: 237 | return await response.text() 238 | 239 | async def autoget(self,name,sp,pageNum='',pageSize='',b=1): 240 | await self._init_session() 241 | html = await self.fetch() 242 | print(html) 243 | try: 244 | data = await self.getbeian(name,sp,pageNum,pageSize) if b == 1 else await self.getblackbeian(name,sp) 245 | except Exception as e: 246 | return {"code":122,"msg":"查询失败"} 247 | finally: 248 | await self._close_session() 249 | 250 | if data['code'] == 500: 251 | return {"code":122,"msg":"工信部服务器异常"} 252 | return data 253 | 254 | # APP备案查询 255 | async def ymApp(self,name,pageNum='',pageSize=''): 256 | return await self.autoget(name,1,pageNum,pageSize) 257 | 258 | # 网站备案查询 259 | async def ymWeb(self,name,pageNum='',pageSize=''): 260 | return await self.autoget(name,0,pageNum,pageSize) 261 | 262 | # 小程序备案查询 263 | async def ymMiniApp(self,name,pageNum='',pageSize=''): 264 | return await self.autoget(name,2,pageNum,pageSize) 265 | 266 | # 快应用备案查询 267 | async def ymKuaiApp(self,name,pageNum='',pageSize=''): 268 | return await self.autoget(name,3,pageNum,pageSize) 269 | 270 | # 违法违规APP查询 271 | async def bymApp(self,name): 272 | return await self.autoget(name,1,b=0) 273 | 274 | # 违法违规网站查询 275 | async def bymWeb(self,name): 276 | return await self.autoget(name,0,b=0) 277 | 278 | # 违法违规小程序查询 279 | async def bymMiniApp(self,name): 280 | return await self.autoget(name,2,b=0) 281 | 282 | # 违法违规快应用查询 283 | async def bymKuaiApp(self,name): 284 | return await self.autoget(name,3,b=0) 285 | 286 | if __name__ == '__main__': 287 | async def main(): 288 | a = beian() 289 | # 官方单页查询pageSize最大支持26 290 | # 页面索引pageNum从1开始,第一页可以不写 291 | data = await a.ymWeb("qq.com") 292 | print(f"查询结果:\n{data}") 293 | return data 294 | loop = asyncio.get_event_loop() 295 | loop.run_until_complete(main()) 296 | 297 | ''' 298 | 在其他代码模块中调用(异步) 299 | 300 | from ymicp import beian 301 | 302 | icp = beian() 303 | data = await icp.ymApp("微信") 304 | 305 | ''' -------------------------------------------------------------------------------- /icpApi/ymicp_socks_v2.1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2024/8/12 14:58 5 | Remark : 设置本地代理池 6 | ''' 7 | 8 | import asyncio 9 | import aiohttp 10 | import cv2 11 | import time 12 | import hashlib 13 | import re 14 | import base64 15 | import numpy as np 16 | import ujson 17 | import random 18 | from Crypto.Cipher import AES 19 | from Crypto.Util.Padding import pad 20 | import string 21 | import os 22 | from detnate import detnate 23 | from aiohttp_socks import SocksConnector 24 | 25 | class beian(): 26 | def __init__(self): 27 | self.typj = { 28 | 0:ujson.dumps( 29 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":1} 30 | ), # 网站 31 | 1:ujson.dumps( 32 | {"pageNum":"","pageSize":"","unitName":'',"serviceType":6} 33 | ), # APP 34 | 2:ujson.dumps( 35 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":7} 36 | ), # 小程序 37 | 3:ujson.dumps( 38 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":8} 39 | ) # 快应用 40 | } 41 | self.btypj = { 42 | 0: ujson.dumps({"domainName":""}), 43 | 1: ujson.dumps({"serviceName":"","serviceType":6}), 44 | 2: ujson.dumps({"serviceName":"","serviceType":7}), 45 | 3: ujson.dumps({"serviceName":"","serviceType":8}) 46 | } 47 | self.cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'} 48 | self.home = 'https://beian.miit.gov.cn/' 49 | self.url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth' 50 | # self.getCheckImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage' 51 | self.getCheckImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImagePoint' 52 | self.checkImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage' 53 | # 正常查询 54 | self.queryByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition' 55 | # 违法违规域名查询 56 | self.blackqueryByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/blackListDomain/queryByCondition' 57 | # 违法违规APP,小程序,快应用 58 | self.blackappAndMiniByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/blackListDomain/queryByCondition_appAndMini' 59 | self.det = detnate() 60 | self.p_uuid = '' 61 | 62 | def generate_random_filename(self, length=8, extension=None): 63 | if not os.path.exists('temp'): 64 | os.makedirs('temp') 65 | letters = string.ascii_lowercase 66 | random_filename = "temp/" +''.join(random.choice(letters) for _ in range(length)) 67 | if extension != None: 68 | random_filename += '.' + extension 69 | return random_filename 70 | 71 | async def _init_session(self): 72 | self.session = aiohttp.ClientSession(connector=SocksConnector.from_url('socks5://127.0.0.1:8443')) 73 | 74 | async def _close_session(self): 75 | if self.session is not None: 76 | await self.session.close() 77 | 78 | async def get_token(self): 79 | timeStamp = round(time.time()*1000) 80 | authSecret = 'testtest' + str(timeStamp) 81 | authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest() 82 | self.auth_data = {'authKey': authKey, 'timeStamp': timeStamp} 83 | self.cookie = await self.get_cookie() 84 | self.base_header = { 85 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32', 86 | 'Origin': 'https://beian.miit.gov.cn', 87 | 'Referer': 'https://beian.miit.gov.cn/', 88 | 'Cookie': f'__jsluid_s={self.cookie}', 89 | 'Accept': 'application/json, text/plain, */*' 90 | } 91 | try: 92 | async with self.session.post(self.url,data=self.auth_data,headers=self.base_header) as req: 93 | req = await req.text() 94 | t = ujson.loads(req) 95 | return t['params']['bussiness'] 96 | except Exception as e: 97 | return e 98 | 99 | async def get_cookie(self): 100 | async with self.session.get(self.home,headers=self.cookie_headers) as req: 101 | jsluid_s = re.compile('[0-9a-z]{32}').search(str(req.cookies))[0] 102 | return jsluid_s 103 | 104 | # 进行aes加密 105 | def get_pointJson(self,value,key): 106 | cipher = AES.new(key.encode(), AES.MODE_ECB) 107 | ciphertext = cipher.encrypt(pad(ujson.dumps(value).encode(), AES.block_size)) 108 | ciphertext_base64 = base64.b64encode(ciphertext) 109 | return ciphertext_base64.decode('utf-8') 110 | 111 | 112 | # 新增的UID加密生成算法 113 | def get_clientUid(self): 114 | characters = "0123456789abcdef" 115 | unique_id = ['0'] * 36 116 | 117 | for i in range(36): 118 | unique_id[i] = random.choice(characters) 119 | 120 | unique_id[14] = '4' 121 | unique_id[19] = characters[(3 & int(unique_id[19], 16)) | 8] 122 | unique_id[8] = unique_id[13] = unique_id[18] = unique_id[23] = "-" 123 | 124 | point_id = "point-" + ''.join(unique_id) 125 | 126 | return ujson.dumps({"clientUid":point_id}) 127 | 128 | async def check_img(self): 129 | self.token = await self.get_token() 130 | try: 131 | data = self.get_clientUid() 132 | clientUid = ujson.loads(data)["clientUid"] 133 | length = str(len(str(data).encode('utf-8'))) 134 | self.base_header.update({'Content-Length': length, 'Token': self.token}) 135 | self.base_header['Content-Type'] = 'application/json' 136 | 137 | async with self.session.post(self.getCheckImage,data=data,headers=self.base_header) as req: 138 | res = await req.json() 139 | self.p_uuid = res['params']['uuid'] 140 | big_image = res['params']['bigImage'] 141 | small_image = res['params']['smallImage'] 142 | self.secretKey = res['params']['secretKey'] 143 | self.wordCount = res['params']['wordCount'] 144 | selice_small = await self.small_selice(small_image,big_image) 145 | 146 | pointJson = self.get_pointJson(selice_small,self.secretKey) 147 | data = ujson.loads(ujson.dumps({"token":self.p_uuid, 148 | "secretKey":self.secretKey, 149 | "clientUid":clientUid, 150 | "pointJson":pointJson})) 151 | length = str(len(str(data).encode('utf-8'))) 152 | self.base_header.update({'Content-Length': length}) 153 | async with self.session.post(self.checkImage, 154 | json=data,headers=self.base_header) as req: 155 | res = await req.text() 156 | data = ujson.loads(res) 157 | if data["success"] == False: 158 | return 'verf error' 159 | else: 160 | return data["params"]["sign"] 161 | except Exception as e: 162 | return False 163 | 164 | async def small_selice(self,small_image,big_image): 165 | isma = cv2.imdecode(np.frombuffer(base64.b64decode(small_image),np.uint8), cv2.COLOR_GRAY2RGB) 166 | isma = cv2.cvtColor(isma, cv2.COLOR_BGRA2BGR) 167 | ibig = cv2.imdecode(np.frombuffer(base64.b64decode(big_image),np.uint8), cv2.COLOR_GRAY2RGB) 168 | data = self.det.check_target(ibig,isma) 169 | return data 170 | 171 | 172 | async def getbeian(self,name,sp,pageNum,pageSize,): 173 | info = ujson.loads(self.typj.get(sp)) 174 | info['pageNum'] = pageNum 175 | info['pageSize'] = pageSize 176 | info['unitName'] = name 177 | sign = await self.check_img() 178 | if sign == 'verf error': 179 | return {'code':201,'error':'验证码识别失败'} 180 | length = str(len(str(ujson.dumps(info,ensure_ascii=False)).encode('utf-8'))) 181 | self.base_header.update({'Content-Length': length, 'Uuid': self.p_uuid, 'Token': self.token, 'Sign': sign}) 182 | async with self.session.post(self.queryByCondition, data=ujson.dumps(info,ensure_ascii=False), headers=self.base_header) as req: 183 | res = await req.text() 184 | return ujson.loads(res) 185 | 186 | async def getblackbeian(self,name,sp): 187 | info = ujson.loads(self.btypj.get(sp)) 188 | if sp == 0: 189 | info['domainName'] = name 190 | else: 191 | info['serviceName'] = name 192 | sign = await self.check_img() 193 | if sign == 'verf error': 194 | return {'code':201,'error':'验证码识别失败'} 195 | length = str(len(str(ujson.dumps(info,ensure_ascii=False)).encode('utf-8'))) 196 | self.base_header.update({'Content-Length': length, 'Uuid': self.p_uuid, 'Token': self.token, 'Sign': sign}) 197 | async with self.session.post( 198 | self.blackqueryByCondition if sp == 0 else self.blackappAndMiniByCondition, 199 | data=ujson.dumps(info,ensure_ascii=False), 200 | headers=self.base_header) as req: 201 | res = await req.text() 202 | return ujson.loads(res) 203 | 204 | async def autoget(self,name,sp,pageNum='',pageSize='',b=1): 205 | await self._init_session() 206 | try: 207 | data = await self.getbeian(name,sp,pageNum,pageSize) if b == 1 else await self.getblackbeian(name,sp) 208 | except Exception as e: 209 | return {"code":122,"msg":"查询失败"} 210 | finally: 211 | await self._close_session() 212 | 213 | if data['code'] == 500: 214 | return {"code":122,"msg":"工信部服务器异常"} 215 | return data 216 | 217 | # APP备案查询 218 | async def ymApp(self,name,pageNum='',pageSize=''): 219 | return await self.autoget(name,1,pageNum,pageSize) 220 | 221 | # 网站备案查询 222 | async def ymWeb(self,name,pageNum='',pageSize=''): 223 | return await self.autoget(name,0,pageNum,pageSize) 224 | 225 | # 小程序备案查询 226 | async def ymMiniApp(self,name,pageNum='',pageSize=''): 227 | return await self.autoget(name,2,pageNum,pageSize) 228 | 229 | # 快应用备案查询 230 | async def ymKuaiApp(self,name,pageNum='',pageSize=''): 231 | return await self.autoget(name,3,pageNum,pageSize) 232 | 233 | # 违法违规APP查询 234 | async def bymApp(self,name): 235 | return await self.autoget(name,1,b=0) 236 | 237 | # 违法违规网站查询 238 | async def bymWeb(self,name): 239 | return await self.autoget(name,0,b=0) 240 | 241 | # 违法违规小程序查询 242 | async def bymMiniApp(self,name): 243 | return await self.autoget(name,2,b=0) 244 | 245 | # 违法违规快应用查询 246 | async def bymKuaiApp(self,name): 247 | return await self.autoget(name,3,b=0) 248 | 249 | if __name__ == '__main__': 250 | async def main(): 251 | a = beian() 252 | # 官方单页查询pageSize最大支持26 253 | # 页面索引pageNum从1开始,第一页可以不写 254 | data = await a.ymWeb("qq.com") 255 | print(f"查询结果:\n{data}") 256 | return data 257 | loop = asyncio.get_event_loop() 258 | loop.run_until_complete(main()) 259 | 260 | ''' 261 | 在其他代码模块中调用(异步) 262 | 263 | from ymicp import beian 264 | 265 | icp = beian() 266 | data = await icp.ymApp("微信") 267 | 268 | ''' -------------------------------------------------------------------------------- /icpApi/ymicp_socks_v2.2.py: -------------------------------------------------------------------------------- 1 | ''' 2 | author : s1g0day 3 | Creat time : 2024/2/21 14:52 4 | modification time: 2024/8/12 14:58 5 | Remark : 设置本地代理池,保存验证码图片 6 | ''' 7 | 8 | import asyncio 9 | import aiohttp 10 | import cv2 11 | import time 12 | import hashlib 13 | import re 14 | import base64 15 | import numpy as np 16 | import ujson 17 | import random 18 | import datetime 19 | from Crypto.Cipher import AES 20 | from Crypto.Util.Padding import pad 21 | import string 22 | import os 23 | from detnate import detnate 24 | from aiohttp_socks import SocksConnector 25 | 26 | class beian(): 27 | def __init__(self): 28 | self.typj = { 29 | 0:ujson.dumps( 30 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":1} 31 | ), # 网站 32 | 1:ujson.dumps( 33 | {"pageNum":"","pageSize":"","unitName":'',"serviceType":6} 34 | ), # APP 35 | 2:ujson.dumps( 36 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":7} 37 | ), # 小程序 38 | 3:ujson.dumps( 39 | {'pageNum': '', 'pageSize': '', 'unitName': '',"serviceType":8} 40 | ) # 快应用 41 | } 42 | self.btypj = { 43 | 0: ujson.dumps({"domainName":""}), 44 | 1: ujson.dumps({"serviceName":"","serviceType":6}), 45 | 2: ujson.dumps({"serviceName":"","serviceType":7}), 46 | 3: ujson.dumps({"serviceName":"","serviceType":8}) 47 | } 48 | self.cookie_headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32'} 49 | self.home = 'https://beian.miit.gov.cn/' 50 | self.url = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/auth' 51 | # self.getCheckImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImage' 52 | self.getCheckImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/getCheckImagePoint' 53 | self.checkImage = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/image/checkImage' 54 | # 正常查询 55 | self.queryByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/icpAbbreviateInfo/queryByCondition' 56 | # 违法违规域名查询 57 | self.blackqueryByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/blackListDomain/queryByCondition' 58 | # 违法违规APP,小程序,快应用 59 | self.blackappAndMiniByCondition = 'https://hlwicpfwc.miit.gov.cn/icpproject_query/api/blackListDomain/queryByCondition_appAndMini' 60 | self.det = detnate() 61 | self.p_uuid = '' 62 | 63 | def generate_random_filename(self, length=8, extension=None): 64 | if not os.path.exists('temp'): 65 | os.makedirs('temp') 66 | letters = string.ascii_lowercase 67 | random_filename = "temp/" +''.join(random.choice(letters) for _ in range(length)) 68 | if extension != None: 69 | random_filename += '.' + extension 70 | return random_filename 71 | 72 | async def _init_session(self): 73 | self.session = aiohttp.ClientSession(connector=SocksConnector.from_url('socks5://127.0.0.1:8443')) 74 | 75 | async def _close_session(self): 76 | if self.session is not None: 77 | await self.session.close() 78 | 79 | async def get_token(self): 80 | timeStamp = round(time.time()*1000) 81 | authSecret = 'testtest' + str(timeStamp) 82 | authKey = hashlib.md5(authSecret.encode(encoding='UTF-8')).hexdigest() 83 | self.auth_data = {'authKey': authKey, 'timeStamp': timeStamp} 84 | self.cookie = await self.get_cookie() 85 | self.base_header = { 86 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32', 87 | 'Origin': 'https://beian.miit.gov.cn', 88 | 'Referer': 'https://beian.miit.gov.cn/', 89 | 'Cookie': f'__jsluid_s={self.cookie}', 90 | 'Accept': 'application/json, text/plain, */*' 91 | } 92 | try: 93 | async with self.session.post(self.url,data=self.auth_data,headers=self.base_header) as req: 94 | req = await req.text() 95 | t = ujson.loads(req) 96 | return t['params']['bussiness'] 97 | except Exception as e: 98 | return e 99 | 100 | async def get_cookie(self): 101 | async with self.session.get(self.home,headers=self.cookie_headers) as req: 102 | jsluid_s = re.compile('[0-9a-z]{32}').search(str(req.cookies))[0] 103 | return jsluid_s 104 | 105 | # 进行aes加密 106 | def get_pointJson(self,value,key): 107 | cipher = AES.new(key.encode(), AES.MODE_ECB) 108 | ciphertext = cipher.encrypt(pad(ujson.dumps(value).encode(), AES.block_size)) 109 | ciphertext_base64 = base64.b64encode(ciphertext) 110 | return ciphertext_base64.decode('utf-8') 111 | 112 | 113 | # 新增的UID加密生成算法 114 | def get_clientUid(self): 115 | characters = "0123456789abcdef" 116 | unique_id = ['0'] * 36 117 | 118 | for i in range(36): 119 | unique_id[i] = random.choice(characters) 120 | 121 | unique_id[14] = '4' 122 | unique_id[19] = characters[(3 & int(unique_id[19], 16)) | 8] 123 | unique_id[8] = unique_id[13] = unique_id[18] = unique_id[23] = "-" 124 | 125 | point_id = "point-" + ''.join(unique_id) 126 | 127 | return ujson.dumps({"clientUid":point_id}) 128 | 129 | async def check_img(self): 130 | self.token = await self.get_token() 131 | try: 132 | data = self.get_clientUid() 133 | clientUid = ujson.loads(data)["clientUid"] 134 | length = str(len(str(data).encode('utf-8'))) 135 | self.base_header.update({'Content-Length': length, 'Token': self.token}) 136 | self.base_header['Content-Type'] = 'application/json' 137 | 138 | async with self.session.post(self.getCheckImage,data=data,headers=self.base_header) as req: 139 | res = await req.json() 140 | self.p_uuid = res['params']['uuid'] 141 | big_image = res['params']['bigImage'] 142 | small_image = res['params']['smallImage'] 143 | self.secretKey = res['params']['secretKey'] 144 | self.wordCount = res['params']['wordCount'] 145 | selice_small = await self.small_selice(small_image,big_image) 146 | 147 | pointJson = self.get_pointJson(selice_small,self.secretKey) 148 | data = ujson.loads(ujson.dumps({"token":self.p_uuid, 149 | "secretKey":self.secretKey, 150 | "clientUid":clientUid, 151 | "pointJson":pointJson})) 152 | length = str(len(str(data).encode('utf-8'))) 153 | self.base_header.update({'Content-Length': length}) 154 | async with self.session.post(self.checkImage, 155 | json=data,headers=self.base_header) as req: 156 | res = await req.text() 157 | data = ujson.loads(res) 158 | if data["success"] == False: 159 | return 'verf error' 160 | else: 161 | return data["params"]["sign"] 162 | except Exception as e: 163 | return False 164 | 165 | async def small_selice(self,small_image,big_image): 166 | isma = cv2.imdecode(np.frombuffer(base64.b64decode(small_image),np.uint8), cv2.COLOR_GRAY2RGB) 167 | isma = cv2.cvtColor(isma, cv2.COLOR_BGRA2BGR) 168 | ibig = cv2.imdecode(np.frombuffer(base64.b64decode(big_image),np.uint8), cv2.COLOR_GRAY2RGB) 169 | data = self.det.check_target(ibig,isma) 170 | 171 | # 生成时间戳 172 | timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") 173 | 174 | # 创建保存图像的目录 175 | output_dir = "output" 176 | if not os.path.exists(output_dir): 177 | os.makedirs(output_dir) 178 | isma_dir = os.path.join(output_dir, "isma") 179 | if not os.path.exists(isma_dir): 180 | os.makedirs(isma_dir) 181 | ibig_dir = os.path.join(output_dir, "ibig") 182 | if not os.path.exists(ibig_dir): 183 | os.makedirs(ibig_dir) 184 | 185 | # 保存图像 186 | isma_filename = f"{timestamp}_small.jpg" 187 | isma_filepath = os.path.join(isma_dir, isma_filename) 188 | cv2.imwrite(isma_filepath, isma) # 保存带时间戳的小图像 189 | ibig_filename = f"{timestamp}_big.jpg" 190 | ibig_filepath = os.path.join(ibig_dir, ibig_filename) 191 | cv2.imwrite(ibig_filepath, ibig) # 保存带时间戳的大图像 192 | return data 193 | 194 | 195 | async def getbeian(self,name,sp,pageNum,pageSize,): 196 | info = ujson.loads(self.typj.get(sp)) 197 | info['pageNum'] = pageNum 198 | info['pageSize'] = pageSize 199 | info['unitName'] = name 200 | sign = await self.check_img() 201 | if sign == 'verf error': 202 | return {'code':201,'error':'验证码识别失败'} 203 | length = str(len(str(ujson.dumps(info,ensure_ascii=False)).encode('utf-8'))) 204 | self.base_header.update({'Content-Length': length, 'Uuid': self.p_uuid, 'Token': self.token, 'Sign': sign}) 205 | async with self.session.post(self.queryByCondition, data=ujson.dumps(info,ensure_ascii=False), headers=self.base_header) as req: 206 | res = await req.text() 207 | return ujson.loads(res) 208 | 209 | async def getblackbeian(self,name,sp): 210 | info = ujson.loads(self.btypj.get(sp)) 211 | if sp == 0: 212 | info['domainName'] = name 213 | else: 214 | info['serviceName'] = name 215 | sign = await self.check_img() 216 | if sign == 'verf error': 217 | return {'code':201,'error':'验证码识别失败'} 218 | length = str(len(str(ujson.dumps(info,ensure_ascii=False)).encode('utf-8'))) 219 | self.base_header.update({'Content-Length': length, 'Uuid': self.p_uuid, 'Token': self.token, 'Sign': sign}) 220 | async with self.session.post( 221 | self.blackqueryByCondition if sp == 0 else self.blackappAndMiniByCondition, 222 | data=ujson.dumps(info,ensure_ascii=False), 223 | headers=self.base_header) as req: 224 | res = await req.text() 225 | return ujson.loads(res) 226 | 227 | async def autoget(self,name,sp,pageNum='',pageSize='',b=1): 228 | await self._init_session() 229 | try: 230 | data = await self.getbeian(name,sp,pageNum,pageSize) if b == 1 else await self.getblackbeian(name,sp) 231 | except Exception as e: 232 | return {"code":122,"msg":"查询失败"} 233 | finally: 234 | await self._close_session() 235 | 236 | if data['code'] == 500: 237 | return {"code":122,"msg":"工信部服务器异常"} 238 | return data 239 | 240 | # APP备案查询 241 | async def ymApp(self,name,pageNum='',pageSize=''): 242 | return await self.autoget(name,1,pageNum,pageSize) 243 | 244 | # 网站备案查询 245 | async def ymWeb(self,name,pageNum='',pageSize=''): 246 | return await self.autoget(name,0,pageNum,pageSize) 247 | 248 | # 小程序备案查询 249 | async def ymMiniApp(self,name,pageNum='',pageSize=''): 250 | return await self.autoget(name,2,pageNum,pageSize) 251 | 252 | # 快应用备案查询 253 | async def ymKuaiApp(self,name,pageNum='',pageSize=''): 254 | return await self.autoget(name,3,pageNum,pageSize) 255 | 256 | # 违法违规APP查询 257 | async def bymApp(self,name): 258 | return await self.autoget(name,1,b=0) 259 | 260 | # 违法违规网站查询 261 | async def bymWeb(self,name): 262 | return await self.autoget(name,0,b=0) 263 | 264 | # 违法违规小程序查询 265 | async def bymMiniApp(self,name): 266 | return await self.autoget(name,2,b=0) 267 | 268 | # 违法违规快应用查询 269 | async def bymKuaiApp(self,name): 270 | return await self.autoget(name,3,b=0) 271 | 272 | if __name__ == '__main__': 273 | async def main(): 274 | a = beian() 275 | # 官方单页查询pageSize最大支持26 276 | # 页面索引pageNum从1开始,第一页可以不写 277 | data = await a.ymWeb("qq.com") 278 | print(f"查询结果:\n{data}") 279 | return data 280 | loop = asyncio.get_event_loop() 281 | loop.run_until_complete(main()) 282 | 283 | ''' 284 | 在其他代码模块中调用(异步) 285 | 286 | from ymicp import beian 287 | 288 | icp = beian() 289 | data = await icp.ymApp("微信") 290 | 291 | ''' -------------------------------------------------------------------------------- /lib/Requests_func.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | import time 4 | import json 5 | import random 6 | import requests 7 | import urllib3 8 | from lib.hander_random import requests_headers 9 | from lib.log_functions import api_logger 10 | 11 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) 12 | headers = requests_headers() 13 | 14 | def req_get(url, params): 15 | 16 | # proxies = { 17 | # # 用sock协议时只能用socks5h 不能用socks5,或者用http协议 18 | # 'http':'socks5h://127.0.0.1:8443', 19 | # 'https':'socks5h://127.0.0.1:8443' 20 | # } 21 | try: 22 | res = requests.get(url=url, headers=headers, params=params, verify=False) 23 | # res = requests.get(url=url, headers=headers, params=params, verify=False, proxies=proxies) 24 | res.encoding = res.apparent_encoding # apparent_encoding比"utf-8"错误率更低 25 | return res 26 | except Exception as e: 27 | api_logger.warning(f"req_get error: {str(e)}") 28 | pass 29 | 30 | def req_post(url, data=None, header=None): 31 | try: 32 | if header: 33 | header['Cookie'] = header 34 | res = requests.post(url=url, headers=headers, verify=False, data=data, allow_redirects=False, timeout=(4,20)) 35 | res.encoding = res.apparent_encoding # apparent_encoding比"utf-8"错误率更低 36 | return res 37 | except Exception as e: 38 | api_logger.warning(f"req_post error: {str(e)}") 39 | pass 40 | 41 | # 异常重试 42 | def make_request(urls, params, search_data): 43 | if isinstance(urls, str): 44 | urls = [urls] # 如果urls是字符串,转换为包含该字符串的列表 45 | elif not isinstance(urls, list): 46 | raise ValueError("urls must be a string or a list of strings") 47 | 48 | max_retries = 10 # 最大重试次数 49 | retries = 0 50 | while retries < max_retries: 51 | for url in urls: 52 | url = url.strip() 53 | api_logger.warning(f"Query url: {url}") 54 | try: 55 | response = requests.get(url +'/query/web', params=params, headers=headers, allow_redirects=False) 56 | response.encoding = response.apparent_encoding # apparent_encoding比"utf-8"错误率更低 57 | if response.status_code == 200: 58 | ''' 59 | {"code": 101, "msg": "参数错误,请指定search参数"} 60 | {"code": 122, "msg": "查询失败"} 61 | {"success": false, "code": 415, "msg": "参数异常,content_type_not_supported"} 62 | {"code": 201, "error": "验证码识别失败"} 63 | {"success": false, "code": 429, "msg": "您目前访问频次过高[访问ip:55.24.61.23], 请稍后再试。"} 64 | ''' 65 | req = json.loads(response.text) 66 | if req.get('code') == 200: 67 | success_sleep = random.randint(5, 15) 68 | api_logger.warning(f"Query was successful. wait {success_sleep}s ...") 69 | time.sleep(success_sleep) # 间隔重试 70 | return req 71 | else: 72 | api_logger.warning("Request failed. Retrying...") 73 | if req.get('code') == 201: 74 | jsondumpdata = f"search_data:{search_data}, code:{req['code']}, msg:{req['error']}" 75 | else: 76 | jsondumpdata = f"search_data:{search_data}, code:{req['code']}, msg:{req['msg']}" 77 | api_logger.warning(jsondumpdata) 78 | 79 | if req.get('code') == 429: 80 | error_sleep = random.randint(60, 120) 81 | api_logger.warning(f"Frequency too high. Switching to next URL. wait {error_sleep}s ...") 82 | time.sleep(error_sleep) 83 | continue # 跳出当前URL的循环 84 | else: 85 | error_sleep = random.randint(5, 15) # 间隔重试 86 | error_msg = f"Request status_code is {response.status_code}. Retrying {error_sleep}s ..." 87 | # api_logger.error(error_msg, extra={'search_data': search_data}) 88 | api_logger.write_log_error('log/error_status_code.log', error_msg, search_data) 89 | time.sleep(error_sleep) 90 | except Exception as e: 91 | api_logger.warning(f"Exception occurred: {str(e)}") 92 | time.sleep(random.randint(5, 10)) # 异常发生时等待一段时间再继续 93 | retries += 1 94 | error_max_msg = f"{search_data} Max retries exceeded. Failed to get successful response." 95 | # api_logger.error(error_max_msg) 96 | api_logger.write_log_error('log/error_max.log', error_max_msg, search_data) 97 | 98 | return None 99 | -------------------------------------------------------------------------------- /lib/hander_random.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | ''' 3 | 生成随机headers 4 | 引用案例 5 | from plugins.hander_random import requests_headers 6 | print(requests_headers()) 7 | ''' 8 | 9 | import random 10 | 11 | # 生成随机refener 12 | def random_referer(): 13 | dominio = ['Adzuna', 'Bixee', 'CareerBuilder', 'Craigslist', 'Dice', 'Eluta.ca', 'Hotjobs', 'JobStreet', 'Incruit', 'Indeed', 'Glassdoor', 'LinkUp', 'Monster', 'Naukri', 'Yahoo', 'Legal', 'GoogleScholar', 'Lexis', 'Manupatra', 'Quicklaw', 'WestLaw', 'Medical', 'Bing Health', 'Bioinformatic', 'CiteAb', 'EB-eye', 'Entrez', 'mtv', 'ubuntu', 'GenieKnows', 'GoPubMed', 'Healia', 'Healthline', 'Nextbio', 'PubGene', 'Quertle', 'Searchmedica', 'WebMD', 'News', 'BingNews', 'Daylife', 'GoogleNews', 'aol', 'microsoft', 'MagPortal', 'Newslookup', 'Nexis', 'Topix', 'Trapit', 'YahooNews', 'People', 'Comfibook', 'Ex.plode', 'InfoSpace', 'PeekYou', 'Spock', 'Spokeo', 'WorldwideHelpers', 'iPhone', 'Zabasearch', 'ZoomInfo', 'Fizber', 'HotPads', 'Realtor', 'Redfin', 'Rightmove', 'Trulia', 'Zillow', 'Zoopla', 'StuRents', 'globo', 'sbt', 'band', 'cnn', 'blog.inurl.com.br'] 14 | gTLD = ['aero', 'arpa', 'biz', 'com', 'coop', 'edu', 'gov', 'info', 'int', 'mil', 'museum', 'name', 'net', 'org', 'pro', 'tel'] 15 | arquivo = ['admin', 'index', 'wp-admin', 'info', 'shop', 'file', 'out', 'open', 'news', 'add', 'profile', 'search', 'open', 'photo', 'insert', 'view'] 16 | ext = ['exe', 'php', 'asp', 'aspx', 'jsf', 'html', 'htm', 'lua', 'log', 'cgi', 'sh', 'css', 'py', 'sql', 'xml', 'rss'] 17 | pasta = ['App_Files', 'Assets', 'CFFileServlet', 'CFIDE', 'Communication', 'Computers', 'CoreAdminHome', 'CoreHome', 'Crawler', 'Creator', 'DECOM', 'Dashboard', 'Drives', 'Dynamic', 'FCKeditor', 'Feedback', 'Files', 'Flash', 'Forms', 'Help', 'ICEcore', 'IO', 'Image', 'JPG', 'getold', 'JSP', 'KFSI', 'Laguna', 'Login', 'Motors', 'MultiSites', 'NR', 'OCodger', 'RSS', 'Safety', 'Smarty', 'Software', 'Static', 'Stress', 'getfull', 'Sugarcrm', 'Travel', 'UPLOAD', 'Urussanga', 'UserFiles', '__tpl', '_fckeditor', '_info', '_machine', '_plugins', '_sample', '_samples', 'postmost', '_source', '_testcases', 'aaa', 'abelardoluz', 'aberlardoluz', 'aborto', 'about', 'aboutus', 'abuse', 'abusers', 'ac_drives', 'acabamentos', 'mail', 'academias', 'acao', 'acartpro', 'acatalog', 'acc', 'acc_auto_del', 'acc_beep_ken', 'acc_beep_time', 'acc_ch_mail', 'acc_fc_prsc', 'accounts', 'validar', 'acc_html_mark', 'acc_html_rand', 'acc_lan_page', 'acc_pic_html', 'acc_profol', 'acc_soft_link', 'acc_ssd_page', 'acc_syun_ei', 'german', 'intranet', 'old', 'acc_time_go', 'acc_wbcreator', 'accept', 'accepted', 'acceso', 'access', 'accessibility', 'accessories', 'acciones', 'acclg', 'account', 'paste', 'paste22', 'acessorios', 'acontece', 'acougueiro', 'acoustic', 'act', 'action', 'activate', 'active', 'activeden', 'activism', 'actualit', 'actuators', 'ad', 'informatica', 'ad_division', 'ad_rate', 'adapter', 'adapters', 'adaptive', 'adaptivei', 'adatmentes', 'adbanner', 'adblock', 'adboard', 'adclick', 'add-ons', 'add', 'delete', 'added', 'addon', 'address', 'adduser', 'adfree', 'adhoc', 'adinfo', 'adios_papa', 'adlink', 'adlinks', 'acc_folder_vw', 'acc_syun_su'] 18 | locais = ['ac', 'ad', 'ae', 'af', 'ag', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as', 'at', 'au', 'aw', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi', 'bj', 'bm', 'bn', 'bw', 'by', 'bz', 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co', 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'bo', 'br', 'ec', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gp', 'gq', 'gr', 'bs', 'bt', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'bv', 'kh', 'ki', 'km', 'kn', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm', 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my', 'mz', 'nb', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'ru', 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'ss', 'st', 'su', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'um', 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'ye', 'yt', 'yu', 'za', 'zm', 'zw', 'ai'] 19 | return "http://www." + random.choice(dominio).lower() + "." + random.choice(gTLD) + "." + random.choice(locais) + "/" + random.choice(pasta) + "/" + random.choice(arquivo) + "." + random.choice(ext) 20 | 21 | # 生成随机useragent 22 | def random_useragent(): 23 | agentBrowser = ['Firefox', 'Safari', 'Opera', 'Flock', 'Internet Explorer', 'Seamonkey', 'Tor Browser', 'GNU IceCat', 'CriOS', 'TenFourFox', 'SeaMonkey', 'B-l-i-t-z-B-O-T', 'Konqueror', 'Mobile', 'Konqueror', 'Netscape', 'Chrome', 'Dragon', 'SeaMonkey', 'Maxthon', 'IBrowse', 'K-Meleon', 'GoogleBot', 'Konqueror', 'Minimo', 'Googlebot', 'WeltweitimnetzBrowser', 'SuperBot', 'TerrawizBot', 'YodaoBot', 'Wyzo', 'Grail', 'PycURL', 'Galaxy', 'EnigmaFox', '008', 'ABACHOBot', 'Bimbot', 'Covario IDS', 'iCab', 'KKman', 'Oregano', 'WorldWideWeb', 'Wyzo', 'GNU IceCat', 'Vimprobable', 'uzbl', 'Slim Browser', 'Flock', 'OmniWeb', 'Rockmelt', 'Shiira', 'Swift', 'Pale Moon', 'Camino', 'Flock', 'Galeon', 'Sylera'] 24 | agentSistema = ['Windows 3.1', 'Windows 95', 'Windows 98', 'Windows 2000', 'Windows NT', 'Linux 2.4.22-10mdk', 'FreeBSD', 'Windows XP', 'Windows Vista', 'Redhat Linux', 'Ubuntu', 'Fedora', 'AmigaOS', 'BackTrack Linux', 'iPad', 'BlackBerry', 'Unix', 'CentOS Linux', 'Debian Linux', 'Macintosh', 'Android', 'iPhone', 'Windows NT 6.1', 'BeOS', 'OS 10.5', 'Nokia', 'Arch Linux', 'Ark Linux', 'BitLinux', 'Conectiva (Mandriva)', 'CRUX Linux', 'Damn Small Linux', 'DeLi Linux', 'Ubuntu', 'BigLinux', 'Edubuntu', 'Fluxbuntu', 'Freespire', 'GNewSense', 'Gobuntu', 'gOS', 'Mint Linux', 'Kubuntu', 'Xubuntu', 'ZeVenOS', 'Zebuntu', 'DemoLinux', 'Dreamlinux', 'DualOS', 'eLearnix', 'Feather Linux', 'Famelix', 'FeniX', 'Gentoo', 'GoboLinux', 'GNUstep', 'Insigne Linux', 'Kalango', 'KateOS', 'Knoppix', 'Kurumin', 'Dizinha', 'TupiServer', 'Linspire', 'Litrix', 'Mandrake', 'Mandriva', 'MEPIS', 'Musix GNU Linux', 'Musix-BR', 'OneBase Go', 'openSuSE', 'pQui Linux', 'PCLinuxOS', 'Plaszma OS', 'Puppy Linux', 'QiLinux', 'Red Hat Linux', 'Red Hat Enterprise Linux', 'CentOS', 'Fedora', 'Resulinux', 'Rxart', 'Sabayon Linux', 'SAM Desktop', 'Satux', 'Slackware', 'GoblinX', 'Slax', 'Zenwalk', 'SuSE', 'Caixa Mágica', 'HP-UX', 'IRIX', 'OSF/1', 'OS-9', 'POSYS', 'QNX', 'Solaris', 'OpenSolaris', 'SunOS', 'SCO UNIX', 'Tropix', 'EROS', 'Tru64', 'Digital UNIX', 'Ultrix', 'UniCOS', 'UNIflex', 'Microsoft Xenix', 'z/OS', 'Xinu', 'Research Unix', 'InfernoOS'] 25 | locais = ['cs-CZ', 'en-US', 'sk-SK', 'pt-BR', 'sq_AL', 'sq', 'ar_DZ', 'ar_BH', 'ar_EG', 'ar_IQ', 'ar_JO', 'ar_KW', 'ar_LB', 'ar_LY', 'ar_MA', 'ar_OM', 'ar_QA', 'ar_SA', 'ar_SD', 'ar_SY', 'ar_TN', 'ar_AE', 'ar_YE', 'ar', 'be_BY', 'be', 'bg_BG', 'bg', 'ca_ES', 'ca', 'zh_CN', 'zh_HK', 'zh_SG', 'zh_TW', 'zh', 'hr_HR', 'hr', 'cs_CZ', 'cs', 'da_DK', 'da', 'nl_BE', 'nl_NL', 'nl', 'en_AU', 'en_CA', 'en_IN', 'en_IE', 'en_MT', 'en_NZ', 'en_PH', 'en_SG', 'en_ZA', 'en_GB', 'en_US', 'en', 'et_EE', 'et', 'fi_FI', 'fi', 'fr_BE', 'fr_CA', 'fr_FR', 'fr_LU', 'fr_CH', 'fr', 'de_AT', 'de_DE', 'de_LU', 'de_CH', 'de', 'el_CY', 'el_GR', 'el', 'iw_IL', 'iw', 'hi_IN', 'hu_HU', 'hu', 'is_IS', 'is', 'in_ID', 'in', 'ga_IE', 'ga', 'it_IT', 'it_CH', 'it', 'ja_JP', 'ja_JP_JP', 'ja', 'ko_KR', 'ko', 'lv_LV', 'lv', 'lt_LT', 'lt', 'mk_MK', 'mk', 'ms_MY', 'ms', 'mt_MT', 'mt', 'no_NO', 'no_NO_NY', 'no', 'pl_PL', 'pl', 'pt_PT', 'pt', 'ro_RO', 'ro', 'ru_RU', 'ru', 'sr_BA', 'sr_ME', 'sr_CS', 'sr_RS', 'sr', 'sk_SK', 'sk', 'sl_SI', 'sl', 'es_AR', 'es_BO', 'es_CL', 'es_CO', 'es_CR', 'es_DO', 'es_EC', 'es_SV', 'es_GT', 'es_HN', 'es_MX', 'es_NI', 'es_PA', 'es_PY', 'es_PE', 'es_PR', 'es_ES', 'es_US', 'es_UY', 'es_VE', 'es', 'sv_SE', 'sv', 'th_TH', 'th_TH_TH', 'th', 'tr_TR', 'tr', 'uk_UA', 'uk', 'vi_VN', 'vi'] 26 | 27 | headers = [] 28 | for _ in range(3): 29 | header = random.choice(agentBrowser) + "/" + str(random.randint(1, 20)) + "." + str(random.randint(1, 20)) + " (" + random.choice(agentSistema) + " " + str(random.randint(1, 7)) + "." + str(random.randint(0, 9)) + "; " + random.choice(locais) + ")" 30 | headers.append(header) 31 | return " ".join(headers) 32 | 33 | def generate_accept(): 34 | return "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" 35 | 36 | def generate_accept_language(): 37 | languages = ["en-US,en;q=0.9", "en-GB,en;q=0.8", "fr-FR,fr;q=0.9", "de-DE,de;q=0.8", "es-ES,es;q=0.8"] 38 | return random.choice(languages) 39 | 40 | # 生成随机的X-Forwarded-For、X-Originating-Ip、X-Remote-Addr、X-Remote-Ip 41 | def generate_random_headers_X(): 42 | # 生成随机IP地址 43 | def generate_random_ip(): 44 | ip = [] 45 | for _ in range(4): 46 | ip.append(str(random.randint(0, 255))) 47 | return '.'.join(ip) 48 | 49 | headers = { 50 | 'X-Forwarded-For': generate_random_ip(), 51 | 'X-Originating-Ip': generate_random_ip(), 52 | 'X-Remote-Addr': generate_random_ip(), 53 | 'X-Remote-Ip': generate_random_ip() 54 | } 55 | return headers 56 | 57 | # 合并上述函数生成随机headers 58 | def requests_headers(): 59 | headers = { 60 | 'User-Agent':random_useragent(), 61 | "Referer": random_referer(), 62 | 'Upgrade-Insecure-Requests':'1', 63 | 'Connection':'keep-alive', 64 | 'Cache-Control':'max-age=0', 65 | 'Accept': generate_accept(), 66 | 'Accept-Encoding':'gzip, deflate, sdch', 67 | 'Accept-Language': generate_accept_language(), 68 | 'Content-Type': 'application/x-www-form-urlencoded', 69 | 'Connection': 'close', 70 | "Pragma": "no-cache", 71 | "Cache-Control": "no-cache", 72 | } 73 | random_headers = generate_random_headers_X() 74 | headers = headers.copy() # 复制现有的请求头 75 | headers.update(random_headers) # 将随机生成的请求头追加到现有的请求头 76 | 77 | return headers -------------------------------------------------------------------------------- /lib/log_functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from datetime import datetime 4 | 5 | class APILogger: 6 | def __init__(self, log_dir='log'): 7 | # 确保日志目录存在 8 | self.log_dir = log_dir 9 | os.makedirs(log_dir, exist_ok=True) 10 | 11 | # 配置主日志文件 12 | log_file = os.path.join(log_dir, 'application.log') 13 | self.file_handler = logging.FileHandler(log_file, encoding='utf-8') 14 | self.file_handler.setFormatter( 15 | logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 16 | ) 17 | 18 | # 配置日志记录器 19 | self.logger = logging.getLogger(__name__) 20 | self.logger.setLevel(logging.INFO) 21 | self.logger.addHandler(self.file_handler) 22 | 23 | def _write_to_specific_log(self, log_path, content): 24 | """写入特定的日志文件""" 25 | if not self._is_content_in_log(log_path, content): 26 | with open(log_path, 'a', encoding='utf-8') as f: 27 | f.write(content + '\n') 28 | 29 | def _is_content_in_log(self, log_path, content): 30 | """检查内容是否已存在于日志文件中""" 31 | if not os.path.exists(log_path): 32 | return False 33 | 34 | try: 35 | with open(log_path, 'r', encoding='utf-8') as f: 36 | return content in f.read() 37 | except Exception: 38 | return False 39 | 40 | def info(self, message): 41 | """记录信息日志""" 42 | print(message) 43 | self.logger.info(message) 44 | 45 | def error(self, message): 46 | """记录错误日志""" 47 | print(message) 48 | self.logger.error(message) 49 | 50 | def warning(self, message): 51 | """记录警告日志""" 52 | print(message) 53 | self.logger.warning(message) 54 | 55 | def success(self, message, log_file='success.log'): 56 | """记录成功日志""" 57 | if 'Schedule' not in message: 58 | print(message) 59 | log_path = os.path.join(self.log_dir, log_file) 60 | self._write_to_specific_log(log_path, message) 61 | self.logger.info(message) 62 | 63 | # 为了保持向后兼容,保留原有的函数接口 64 | def write_log_success(self, log_file_path, success_output): 65 | api_logger.success(success_output, os.path.basename(log_file_path)) 66 | 67 | def write_log_error(self, log_file_path, error_output, search_data=''): 68 | output = search_data if search_data else error_output 69 | api_logger.error(error_output) 70 | if log_file_path: 71 | api_logger._write_to_specific_log(log_file_path, output) 72 | 73 | def write_log_warning(self, warning_output): 74 | api_logger.warning(warning_output) 75 | 76 | # 创建全局logger实例 77 | api_logger = APILogger() 78 | 79 | -------------------------------------------------------------------------------- /lib/logo.py: -------------------------------------------------------------------------------- 1 | def logo(): 2 | print(''' 3 | ____ _ ___ ____ 4 | / ___|/ | __ _ / _ \| _ \ __ _ _ _ 5 | \___ \| |/ _` | | | | | | |/ _` | | | | 6 | ___) | | (_| | |_| | |_| | (_| | |_| | 7 | |____/|_|\__, |\___/|____/ \__,_|\__, | 8 | |___/ |___/ 9 | 10 | Powered by S1g0Day 11 | -------------------------------------- 12 | ''') -------------------------------------------------------------------------------- /log/readme.md: -------------------------------------------------------------------------------- 1 | ## 输出日志 2 | 3 | ``` 4 | . 5 | ├── Processing_Domain.log # 进度日志,如果Ctrl+c中断的话,可以从这里看到定义起始位置 6 | ├── error.log # 报错日志,代码初期调试用的(可忽略) 7 | ├── error_Max.log # 关键日志, 代码中对domains中的每行数据最多遍历10次,如果10次都查询错误,会写入到这个日志,方便对其重新测试 8 | ├── error_status_code.log # 报错日志, 测试平台地址请求失败日志(可忽略),通过error_Max.log查询失败的记录 9 | ├── error_icp.log # 查询异常日志,主要是记录总量与实际数据不符的问题 10 | ├── no_req_list.log # 关键日志,存储了查询正常但没有数据回显的情况,需要注意是否是domians信息填错,如公司名称为简写等 11 | └── success.log # 关键日志, 查询成功且获取到所有备案域名信息 12 | ``` 13 | 14 | `quchong.py`日志 15 | 16 | ``` 17 | . 18 | ├── _output_ips.txt # 筛选出所有IP并将其排序后的结果 19 | ├── _output_ascii_domain.txt # 筛选出所有中文域名并将其转为ascii的结果 20 | └── _output_all.txt # 所有域名及IP结果,其中中文域名及转换结果已打印在输出行,并未写入当前日志 21 | ``` 22 | 23 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | 3 | 从工业和信息化部政务服务平台进行的ICP备案查询,核心是`HG-ha`师傅的 [ICP_Query 项目](https://github.com/HG-ha/ICP_Query) ,虽然慢,但好用 4 | 5 | # 搭建 6 | 7 | 不折腾了,直接docker部署 20240225版本 8 | 9 | ``` 10 | # 拉取镜像 11 | docker pull yiminger/ymicp:yolo8_latest 12 | # 运行并转发容器16181端口到本地所有地址 13 | docker run -d -p 16181:16181 yiminger/ymicp:yolo8_latest 14 | ``` 15 | 16 | # 项目 17 | 18 | ## 目录 19 | 20 | ``` 21 | . 22 | ├── 0.icp_query.py 23 | ├── 1.icp_query_result_processor.py 24 | ├── 2.url_checker.py 25 | ├── config 26 | │ ├── config.yaml 27 | │ └── domain.txt 28 | ├── lib 29 | │ ├── Requests_func.py 30 | │ ├── hander_random.py 31 | │ ├── logo.py 32 | │ ├── log_functions.py 33 | ├── log 34 | 35 | │ └── application.log 36 | │ └── available_urls.log 37 | │ └── error_max.log 38 | │ └── error_occurred.log 39 | │ └── error_status_code.log 40 | │ └── error_icp.log 41 | │ └── no_req_list.log 42 | │ └── processing_Domain.log 43 | │ └── success.log 44 | ``` 45 | 46 | ## 配置 47 | 48 | `config.yaml` 49 | 50 | ``` 51 | version: "0.0.7" 52 | 53 | # 测试平台地址 54 | query_url: 55 | # - http://192.168.1.1:16181 56 | # - http://192.168.1.2:16181 57 | # - http://192.168.1.3:16181 58 | 59 | # 目标文件 60 | domains_file: "config/domain.txt" 61 | ``` 62 | 63 | 检测query_url可用性,可用url输出到logs目录`available_urls.log` 64 | 65 | ``` 66 | python3 2.url_checker.py 67 | ``` 68 | 69 | 测试平台地址使用的是多个服务器搭建组成负载均衡的效果。为什么用多个服务器搭建? 70 | 71 | 一是因为处理报错。 72 | 73 | - 主要是解决频次问题,常见查询报错如下: 74 | 75 | ``` 76 | {"code": 101, "msg": "参数错误,请指定search参数"} 77 | {"code": 122, "msg": "查询失败"} 78 | {"success": false, "code": 415, "msg": "参数异常,content_type_not_supported"} 79 | {"code": 201, "error": "验证码识别失败"} 80 | {"success": false, "code": 429, "msg": "您目前访问频次过高[访问ip:x.x.x.x], 请稍后再试。"} 81 | ``` 82 | 83 | 二是测试拥有的几个订阅及免费代理池,代理可用但均无法正常访问到`beian.miit.gov.cn` 84 | 85 | 三是有这么多的吃灰VPS,任性。 86 | 87 | - 建议本地运行,这样就多一套运行环境 88 | 89 | - 一般本地搭建一套,然后再从两个VPS搭建后组成负载环境,基本上就能自动处理所有报错了 90 | 91 | 92 | 93 | `domain.txt` 94 | 95 | ``` 96 | 北京百度网讯科技有限公司 97 | 浙江淘宝网络有限公司 98 | ``` 99 | 100 | 当然也可以是域名 101 | 102 | ``` 103 | baidu.com 104 | taobao.com 105 | ``` 106 | 107 | 不管是域名还是公司名称,只要确认无误,都会遍历出所有备案域名 108 | 109 | ## 使用方法 110 | 111 | ``` 112 | python3 0.icp_query.py -h 113 | 114 | ____ _ ___ ____ 115 | / ___|/ | __ _ / _ \| _ \ __ _ _ _ 116 | \___ \| |/ _` | | | | | | |/ _` | | | | 117 | ___) | | (_| | |_| | |_| | (_| | |_| | 118 | |____/|_|\__, |\___/|____/ \__,_|\__, | 119 | |___/ |___/ 120 | 121 | Powered by S1g0Day 122 | -------------------------------------- 123 | 124 | usage: 0.icp_query.py [-h] [-d QUERY_URL] [-u DOMAIN] [-uf DOMAINS_FILE] [-s START_INDEX] 125 | 126 | options: 127 | -h, --help show this help message and exit 128 | -d QUERY_URL 请输入测试平台地址 129 | -u DOMAIN 请输入目标 130 | -uf DOMAINS_FILE 请输入目标文件 131 | -s START_INDEX 请输入起始位置,第一个数据的下标为0 132 | ``` 133 | 134 | 命令示例 135 | 136 | ``` 137 | # 1、使用配置文件内的平台地址及目标文件 138 | python3 0.icp_query.py 139 | 140 | # 2、使用配置文件内的平台地址,指定其他目标或目标文件 141 | python3 0.icp_query.py -u baidu.com 142 | python3 0.icp_query.py -u 浙江淘宝网络有限公司 143 | python3 0.icp_query.py -uf conf/domain.txt 144 | 145 | # 3、指定平台地址、使用配置文件内的目标文件 146 | python3 0.icp_query.py -d http://192.168.1.1:16181 147 | 148 | # 4、指定平台地址、目标或目标文件 149 | python3 0.icp_query.py -d http://192.168.1.1:16181 -u baidu.com 150 | python3 0.icp_query.py -d http://192.168.1.1:16181 -uf conf/domain.txt 151 | 152 | # 5、断点继续,以上4点都可以使用。假设 processing_Domain 日志为:"4/10: 浙江淘宝网络有限公司",想从第4个继续,命令如下 153 | python3 0.icp_query.py -s 4 154 | ``` 155 | 156 | ## 输出日志 157 | 158 | ``` 159 | . 160 | ├── application.log # 运行日志,代码初期调试用的(可忽略) 161 | ├── available_urls.log # 可用平台地址列表 162 | ├── error_max.log # 关键日志(需重新测试), 代码中对domains中的每行数据最多遍历10次,如果10次都查询错误,会写入到这个日志,方便对其重新测试 163 | ├── error_occurred.log # 关键日志(需重新测试),查询异常,原因需要通过debug查看 164 | ├── error_status_code.log # 报错日志, 测试平台地址请求失败日志(可忽略),通过error_Max.log查询失败的记录 165 | ├── error_icp.log # 查询异常日志(需重新测试),主要是记录总量与实际数据不符的问题 166 | ├── no_req_list.log # 关键日志(需重新测试),存储了查询正常但没有数据回显的情况,需要注意是否是domians信息填错,如公司名称为简写等 167 | ├── processing_Domain.log # 进度日志,如果Ctrl+c中断的话,可以从这里看到定义起始位置 168 | └── success.log # 关键日志, 查询成功且获取到所有备案域名信息 169 | ``` 170 | 171 | ## 数据处理 172 | 173 | 这里处理的是`success.log`日志 174 | 175 | ``` 176 | # 提取出域名 177 | [root@localhost icp_query_s1g0day]# python3 1.icp_query_result_processor.py log/success.log 178 | 179 | # 导出为xlsx文件 180 | # 将域名保存到 1.txt 181 | # 运行代码,结果保存到log目录下。这个代码的妙用还请亲身体验 182 | [root@localhost icp_query_s1g0day]# python3 2.quchong.py 1.txt 183 | ``` 184 | 185 | `quchong.py`日志 186 | 187 | ``` 188 | . 189 | ├── _output_ascii_domain.txt # 筛选出所有中文域名并将其转为ascii的结果 190 | ├── _output_ips.txt # 筛选出所有IP并将其排序后的结果 191 | └── _output_all.txt # 所有域名及IP结果,其中中文域名及转换结果已打印在输出行,并未写入当前日志 192 | ``` 193 | 194 | 也可以使用 [data_processor](https://github.com/s1g0day/data_processor) 进行数据处理 195 | 196 | # Lssuse 197 | 198 | 该项目只适合分享、学习、交流,不得用于商业及非法用途。觉得项目不错的小伙伴,可以在右上角Star一下,后期项目会不断优化,在使用过程中什么建议与BUG ,欢迎大家提交Lssuse 199 | 200 | # 后续 201 | 202 | 20240621-更新 203 | 204 | 没事的时候还是要想想怎么解决代理或代理池问题,针对本地和远程服务器分别有几种方案 205 | 206 | - 本地 207 | - [x] 修改代码使aiohttp走代理 208 | - [ ] ~~linux环境走全局代理~~ 209 | - [ ] ~~docker容器启动时走socks~~ 210 | - [ ] ~~利用proxychains使python走代理~~ 211 | - 远程 212 | - [x] 修改代码使aiohttp走代理 213 | - [ ] ~~合理购买代理池~~ 214 | 215 | 本次使用的是 [Rain-kl/glider_guid41asd4asd](https://github.com/Rain-kl/glider_guid41asd4asd) 组成的代理池 216 | 217 | **协议选择**: 我的订阅有两个协议:ss和trojan,之前一直用的是ss,但存在一些未知的问题无法应用到aiohttp上面。今天稍微做了一些修改,改为使用trojan协议,发现竟然可以用,多走多少弯路。 218 | 219 | 修改`订阅转换.py`,生成`forward=trojan://pass@host:port[?serverName=SERVERNAME][&skipVerify=true][&cert=PATH]` 220 | 221 | ``` 222 | def parse_config(array: list): 223 | ss = [] 224 | # {'name': '泰国', 'type': 'ss', 'server': 'xxx.cn', 'port': 123, 'cipher': 'chacha20-ietf-poly1305', 'password': 'password', 'udp': True} 225 | vmess = [] 226 | # { name: '香港', type: vmess, server: 'xxx.cn', port: 123, uuid: ac005860, alterId: 0, cipher: auto, udp: true } 227 | trojan = [] 228 | # { name: '[trojan] 香港 01', type: trojan, server: ixxx.xxx.cn, port: 50002, password: xxxxx, udp: true, skip-cert-verify: true } 229 | # { name: 香港02, type: trojan, server: xxx.xxx.cn, port: 50002, password: xxxxx, udp: true, sni: xxxx-cert.com, skip-cert-verify: true } 230 | 231 | keywords = ['最新', '流量', '重置', '到期'] 232 | for node in array: 233 | if any(keyword in node['name'] for keyword in keywords): 234 | pass 235 | else: 236 | if node['type'] == 'ss': 237 | node = f"{node['type']}://{node['cipher']}:{node['password']}@{node['server']}:{node['port']}#{node['name']}" 238 | ss.append(node) 239 | elif node['type'] == 'vmess': 240 | node = f"{node['type']}://none:{node['uuid']}@{node['server']}:{node['port']}?alterID={node['alterId']}" 241 | vmess.append(node) 242 | elif node['type'] == 'trojan': 243 | if 'sni'in node: 244 | node = f"{node['type']}://{node['password']}@{node['server']}:{node['port']}?serverName={node['sni']}&skipVerify={node['skip-cert-verify']}#{node['name']}" 245 | else: 246 | node = f"{node['type']}://{node['password']}@{node['server']}:{node['port']}?skipVerify={node['skip-cert-verify']}#{node['name']}" 247 | trojan.append(node) 248 | for node in ss: 249 | print(f'forward={node}') 250 | for node in vmess: 251 | print(f'forward={node}') 252 | for node in trojan: 253 | print(f'forward={node}') 254 | ``` 255 | 256 | 根据实际情况修改原本`ymicp-socks.py`代码,后续详情阅读 `icpApi/readme.md` 257 | 258 | --- 259 | 260 | 20240522-更新 261 | 262 | - 最近有师傅使用相同的技术栈重写了一个查询工具[ICP-spider](https://github.com/ravizhan/ICP-spider/),尝试了下速度和准确度确实比当前的速度快,应该是重新训练了数据模型,感兴趣的可以试用一下。 263 | - 当前项目稳定运行,近段时间也比较忙,暂时不进行合并了。下个大版本重新训练一下,搞个查询过程中自动添加数据并进行训练的功能,感兴趣的师傅可以自己二开一下。 264 | 265 | --- 266 | 267 | 20240408-补充 268 | 269 | 朋友给了一个[快代理](https://www.kuaidaili.com/)的账号,我测试了一下可以正常使用,确实比我免费的好使。 270 | 271 | 如果使用代理池的话需要将`icpApi/ymicp_socks_proxys.py`的内容替换到docker环境中的`ymicp.py`,其中需要修改以下信息 272 | 273 | ``` 274 | # 隧道域名:端口号 275 | self.tunnel = "XXX.XXX.com:15818" 276 | # 用户名和密码方式 277 | self.username = "username" 278 | self.password = "password" 279 | ``` 280 | 281 | 测试成功,可以看到使用了client ip,并且查询成功。但失败率是比较高,重试了6遍才成功 282 | 283 | ``` 284 | root@323f2fa05c9a:/icpApi_20240221_yolo8# python3 ymicp-socks.py 285 | Loading weights into state dict... 286 | model_data/best_epoch_weights.pth model loaded. 287 | Configurations: 288 | ---------------------------------------------------------------------- 289 | | keys | values| 290 | ---------------------------------------------------------------------- 291 | | model_path | model_data/best_epoch_weights.pth| 292 | | input_shape | [32, 32]| 293 | | letterbox_image | False| 294 | | cuda | False| 295 | ---------------------------------------------------------------------- 296 | sucess! client ip: 117.90.45.134 297 | Loading model_data/best.onnx for ONNX Runtime inference... 298 | 299 | 0: 320x320 5 texts, 32.1ms 300 | Speed: 1.7ms preprocess, 32.1ms inference, 12.2ms postprocess per image at shape (1, 3, 320, 320) 301 | 查询结果: 302 | {'code': 200, 'msg': '操作成功', 'params': {'endRow': 0, 'firstPage': 1, 'hasNextPage': False, 'hasPreviousPage': False, 'isFirstPage': True, 'isLastPage': True, 'lastPage': 1, 'list': [{'contentTypeName': '出版、文化、出版、新闻、宗教、出版、宗教、文化、新闻、新闻', 'domain': 'qq.com', 'domainId': 190000203203, 'leaderName': '', 'limitAccess': '否', 'mainId': 547280, 'mainLicence': '粤B2-20090059', 'natureName': '企业', 'serviceId': 4134047, 'serviceLicence': '粤B2-20090059-5', 'unitName': '深圳市腾讯计算机系统有限公司', 'updateRecordTime': '2022-09-06 15:51:52'}], 'navigatePages': 8, 'navigatepageNums': [1], 'nextPage': 1, 'pageNum': 1, 'pageSize': 10, 'pages': 1, 'prePage': 1, 'size': 1, 'startRow': 0, 'total': 1}, 'success': True} 303 | ``` 304 | 305 | 以上只是使用示例,可以根据自己的情况做出响应的调试。 306 | 307 | --- 308 | 309 | 关于使用代理的问题 310 | 311 | - 添加代理需要再原项目上做修改,测试了几次后,由于之前的问题,遂放弃了使用。 312 | 313 | - 后测试代理节点使用浏览器正常访问到`beian.miit.gov.cn`,仅python无法使用,所以是自身的代码问题,但已经折腾完负载了,就不想再折腾了 314 | 315 | - 我这里提供一份代码文件`icpApi\ymicp-socks.py`及使用的代理池项目 316 | 317 | ``` 318 | 爱加速代理池: https://github.com/s1g0day/Aijiasu_Agent_Pool 319 | 节点转换成爬虫代理池: https://github.com/Rain-kl/glider_guid41asd4asd 320 | 免费代理IP池: https://github.com/pingc0y/go_proxy_pool 321 | ``` 322 | 323 | 如果有解决的师傅,还请给小弟一个`fork`的机会 324 | 325 | --- 326 | 327 | 328 | 329 | 再次感谢`HG-ha`师傅, [ICP_Query 项目](https://github.com/HG-ha/ICP_Query) 好用!!! 330 | 331 | 如果本项目对你有用,还请star一下。哈哈 332 | 333 | -------------------------------------------------------------------------------- /update.md: -------------------------------------------------------------------------------- 1 | # 版本更新 2 | 3 | - v0.0.7 update 20250208 4 | 5 | ``` 6 | 1、更新本程序日志系统 7 | 2、更新icpApi代码, 添加认证及日志系统 8 | ``` 9 | 10 | - v0.0.6 update 20240808 11 | 12 | ``` 13 | 1、修改输出内容 14 | 2、解决输出文件时内容重复问题 15 | 3、添加程序运行日志 16 | ``` 17 | 18 | - v0.0.5 update 20240621 19 | 20 | ``` 21 | 1、修改sock5方案 22 | 2、修改输出格式 23 | ``` 24 | 25 | - v0.0.4 update 26 | 27 | ``` 28 | 1、修复BUG: 自动翻页后存在数据重复。测试发现为API服务的问题,手动在官网翻页查询也会存在这样的问题,只能一页全部显示。 29 | ``` 30 | 31 | - v0.0.3 update 32 | 33 | ``` 34 | 1、修复BUG: 翻页后无法自动根据公司遍历备案域名 35 | 2、补充日志描述 36 | ``` 37 | 38 | - v0.0.2 update 39 | 40 | ``` 41 | 1、确认代理池代码 42 | ``` 43 | 44 | - v0.0.1 bug修复 45 | 46 | ``` 47 | 1、修改查询公司时遇到的分页问题 48 | ``` 49 | 50 | - 首次完成项目基础功能 51 | 52 | ``` 53 | 1、手动查询备案域名 54 | 2、自动查询 55 | ``` 56 | --------------------------------------------------------------------------------