├── README.md
├── domains.txt
├── imgs
    └── image-20200412151928878.png
└── seo.py


/README.md:
--------------------------------------------------------------------------------
 1 | # 简介
 2 | 
 3 | Python3编写的一个批量查询SEO的小脚本，没想到我的代码这么烂居然还有朋友 star ，国光今天看了下代码，看不下去了，于是重写了一下，这次加入了进程池，执行速度比以前快很多，而且去掉了花里胡哨的进度条和表格输出，直接用原生的 print 然后手动`ljust(30)`对齐，大道至简！
 4 | 
 5 | # 依赖安装
 6 | 
 7 | 到项目下使用`pip`来安装相关依赖
 8 | 
 9 | ```bash
10 | pip install requests
11 | ```
12 | 
13 | # 使用方法
14 | 
15 | `-r`：手动选择包含域名列表的文件
16 | 
17 | ```shell
18 | python3 seo.py -r /Users/sqlsec/Temp/domain.txt
19 | ```
20 | 
21 | ![image-20200412151928878](imgs/image-20200412151928878.png)  
22 | 
23 | # 性能对比
24 | 
25 | 没有进程池查询100个域名耗时：`138.7033` 秒
26 | 
27 | 使用进程池查询100个域名耗时：`4.4753` 秒 
28 | 
29 | 


--------------------------------------------------------------------------------
/domains.txt:
--------------------------------------------------------------------------------
1 | https://www.aizhan.com/
2 | https://www.aizhan.com/
3 | https://www.aizhan.com
4 | www.aizhan.com
5 | https://www.aizhan.com/
6 | https://www.aizhan.com/
7 | https://www.aizhan.com
8 | www.aizhan.com
9 | 


--------------------------------------------------------------------------------
/imgs/image-20200412151928878.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sqlsec/seo/84fec1b1b0398a41270993e9961e448cdf2bffed/imgs/image-20200412151928878.png


--------------------------------------------------------------------------------
/seo.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | import time
 4 | import argparse
 5 | import requests
 6 | from multiprocessing import Pool
 7 | 
 8 | 
 9 | def args():
10 |     """
11 |     命令行参数以及说明
12 |     """
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('-r', '--read', dest='read', help='input domains file path')
15 |     parse_args = parser.parse_args()
16 | 
17 |     # 参数为空 输出--help命令
18 |     if parse_args.read is None:
19 |         parser.print_help()
20 |         os._exit(0)
21 |     
22 |     return parse_args.read
23 | 
24 | 
25 | def seo(domain_url):
26 |     """
27 |     利用爱站接口查询权重信息
28 |     """
29 |     url = f'http://seo.chinaz.com/?host={domain_url}'
30 |     headers = {
31 |         'Host': 'seo.chinaz.com',
32 |         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0',
33 |         'Content-Type': 'application/x-www-form-urlencoded',
34 |     }
35 |     r = requests.get(url=url, headers=headers, timeout=6)
36 |     html = r.text
37 | 
38 |     # 百度权重正则
39 |     baidu_pattern = re.compile(r'baiduapp/(.*?).gif')
40 |     baidu = baidu_pattern.findall(html)[0]
41 | 
42 |     # 站点标题正则
43 |     site_name_rules = re.compile(r'class="ball">(.*?)</div>')
44 |     site_name = site_name_rules.findall(html)[0]
45 | 
46 |     print(str(domain_url).ljust(30), '\t', baidu, '\t', site_name)
47 | 
48 | 
49 | def main():
50 |     start = time.time()
51 |     file_path = args()
52 |     try:
53 |         #  读取文件所有行
54 |         with open(file_path, "r") as f:
55 |             lines = ''.join(f.readlines()).split("\n")
56 | 
57 |         print('域名'.ljust(30), '权重\t 站点标题')
58 |         #  设置一个容量为8的进程池
59 |         pool_number = 10
60 |         pool = Pool(pool_number)
61 | 
62 |         for line in lines:
63 |             if 'http://' in line:
64 |                 line = line[7:]
65 |             elif 'https://' in line:
66 |                 line = line[8:]
67 |             if line:
68 |                 pool.apply_async(seo, (line,))
69 | 
70 |         pool.close()
71 |         pool.join()
72 | 
73 |         end = time.time()
74 |         print(f'\n耗时: {end - start:.4f} 秒')
75 |     except Exception as e:
76 |         print('文件读取异常，请检查文件路径是否正确！')
77 |         print(e)
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     main()
82 | 


--------------------------------------------------------------------------------