├── README.md ├── domains.txt ├── imgs └── image-20200412151928878.png └── seo.py /README.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | 3 | Python3编写的一个批量查询SEO的小脚本,没想到我的代码这么烂居然还有朋友 star ,国光今天看了下代码,看不下去了,于是重写了一下,这次加入了进程池,执行速度比以前快很多,而且去掉了花里胡哨的进度条和表格输出,直接用原生的 print 然后手动`ljust(30)`对齐,大道至简! 4 | 5 | # 依赖安装 6 | 7 | 到项目下使用`pip`来安装相关依赖 8 | 9 | ```bash 10 | pip install requests 11 | ``` 12 | 13 | # 使用方法 14 | 15 | `-r`:手动选择包含域名列表的文件 16 | 17 | ```shell 18 | python3 seo.py -r /Users/sqlsec/Temp/domain.txt 19 | ``` 20 | 21 | ![image-20200412151928878](imgs/image-20200412151928878.png) 22 | 23 | # 性能对比 24 | 25 | 没有进程池查询100个域名耗时:`138.7033` 秒 26 | 27 | 使用进程池查询100个域名耗时:`4.4753` 秒 28 | 29 | -------------------------------------------------------------------------------- /domains.txt: -------------------------------------------------------------------------------- 1 | https://www.aizhan.com/ 2 | https://www.aizhan.com/ 3 | https://www.aizhan.com 4 | www.aizhan.com 5 | https://www.aizhan.com/ 6 | https://www.aizhan.com/ 7 | https://www.aizhan.com 8 | www.aizhan.com 9 | -------------------------------------------------------------------------------- /imgs/image-20200412151928878.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sqlsec/seo/84fec1b1b0398a41270993e9961e448cdf2bffed/imgs/image-20200412151928878.png -------------------------------------------------------------------------------- /seo.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import time 4 | import argparse 5 | import requests 6 | from multiprocessing import Pool 7 | 8 | 9 | def args(): 10 | """ 11 | 命令行参数以及说明 12 | """ 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('-r', '--read', dest='read', help='input domains file path') 15 | parse_args = parser.parse_args() 16 | 17 | # 参数为空 输出--help命令 18 | if parse_args.read is None: 19 | parser.print_help() 20 | os._exit(0) 21 | 22 | return parse_args.read 23 | 24 | 25 | def seo(domain_url): 26 | """ 27 | 利用爱站接口查询权重信息 28 | """ 29 | url = f'http://seo.chinaz.com/?host={domain_url}' 30 | headers = { 31 | 'Host': 'seo.chinaz.com', 32 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0', 33 | 'Content-Type': 'application/x-www-form-urlencoded', 34 | } 35 | r = requests.get(url=url, headers=headers, timeout=6) 36 | html = r.text 37 | 38 | # 百度权重正则 39 | baidu_pattern = re.compile(r'baiduapp/(.*?).gif') 40 | baidu = baidu_pattern.findall(html)[0] 41 | 42 | # 站点标题正则 43 | site_name_rules = re.compile(r'class="ball">(.*?)') 44 | site_name = site_name_rules.findall(html)[0] 45 | 46 | print(str(domain_url).ljust(30), '\t', baidu, '\t', site_name) 47 | 48 | 49 | def main(): 50 | start = time.time() 51 | file_path = args() 52 | try: 53 | # 读取文件所有行 54 | with open(file_path, "r") as f: 55 | lines = ''.join(f.readlines()).split("\n") 56 | 57 | print('域名'.ljust(30), '权重\t 站点标题') 58 | # 设置一个容量为8的进程池 59 | pool_number = 10 60 | pool = Pool(pool_number) 61 | 62 | for line in lines: 63 | if 'http://' in line: 64 | line = line[7:] 65 | elif 'https://' in line: 66 | line = line[8:] 67 | if line: 68 | pool.apply_async(seo, (line,)) 69 | 70 | pool.close() 71 | pool.join() 72 | 73 | end = time.time() 74 | print(f'\n耗时: {end - start:.4f} 秒') 75 | except Exception as e: 76 | print('文件读取异常,请检查文件路径是否正确!') 77 | print(e) 78 | 79 | 80 | if __name__ == '__main__': 81 | main() 82 | --------------------------------------------------------------------------------