├── requirements.txt
├── Pic
├── data.png
├── logo.png
├── run.png
└── zoomeye.png
├── Spiders
├── captcha.jpg
├── lianzhong_captcha.py
├── zoomeye.py
└── zoomeye_captcha.py
└── README.md
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==2.2.2
2 | requests==2.18.4
3 | PyExecJS==1.5.1
4 |
--------------------------------------------------------------------------------
/Pic/data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/data.png
--------------------------------------------------------------------------------
/Pic/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/logo.png
--------------------------------------------------------------------------------
/Pic/run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/run.png
--------------------------------------------------------------------------------
/Pic/zoomeye.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/zoomeye.png
--------------------------------------------------------------------------------
/Spiders/captcha.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Spiders/captcha.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Python实现ZoomEye钟旭之眼的登录+关键字爬取 
2 | ===========================
3 |    
4 | ### ZoomEye钟旭之眼 - https://www.zoomeye.org/
5 | |Author|:sunglasses:Henryhaohao:sunglasses:|
6 | |---|---
7 | |Email|:hearts:1073064953@qq.com:hearts:
8 |
9 |
10 | ****
11 | ## :dolphin:声明
12 | ### 软件均仅用于学习交流,请勿用于任何商业用途!感谢大家!
13 | ## :dolphin:介绍
14 | ### 该项目为[ZoomEye钟旭之眼](https://www.zoomeye.org/)的模拟登录+关键字爬取
15 | - **反爬手段: 知道创宇平台 - 加速乐Cookie破解(Cloudflare升级版)**
16 | - **ZoomEye介绍:**
17 | > 参考文章: https://blog.csdn.net/Fly_hps/article/details/79406517
18 | - **项目版本:**
19 | > 版本一:手动填写验证码进行登录:Spiders/zoomeye.py
20 | > 版本二:自动识别验证码进行登录:Spiders/zoomeye_captcha.py (我这里接入的是[联众打码](https://www.jsdati.com/)平台)
21 | ## :dolphin:运行环境
22 | **Version: Python3**
23 | ## :dolphin:安装依赖库
24 | ```
25 | pip3 install -r requirements.txt
26 | ```
27 | ## :dolphin:**相关截图**
28 | > - **ZoomEye钟旭之眼官网 - https://www.zoomeye.org/**
29 | 
30 | > - **运行过程**
31 | 
32 | 
33 | ## :dolphin:**总结**
34 | **最后,如果你觉得这个项目不错或者对你有帮助,给个Star呗,也算是对我学习路上的一种鼓励!
35 | 哈哈哈,感谢大家!笔芯哟~**:cupid::cupid:
36 |
37 |
38 |
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/Spiders/lianzhong_captcha.py:
--------------------------------------------------------------------------------
1 | # !/user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # time: 2018/10/15--14:39
4 | __author__ = 'Henry'
5 |
6 |
7 | import requests
8 |
9 |
10 | def main(api_username, api_password, file_name, api_post_url, yzm_min, yzm_max, yzm_type, tools_token):
11 | '''
12 | main() 参数介绍
13 | api_username (API账号) --必须提供
14 | api_password (API账号密码) --必须提供
15 | file_name (需要打码的图片路径) --必须提供
16 | api_post_url (API接口地址) --必须提供
17 | yzm_min (验证码最小值) --可空提供
18 | yzm_max (验证码最大值) --可空提供
19 | yzm_type (验证码类型) --可空提供
20 | tools_token (工具或软件token) --可空提供
21 | '''
22 | # api_username =
23 | # api_password =
24 | # file_name = 'c:/temp/lianzhong_vcode.png'
25 | # api_post_url = "http://v1-http-api.jsdama.com/api.php?mod=php&act=upload"
26 | # yzm_min = '1'
27 | # yzm_max = '8'
28 | # yzm_type = '1303'
29 | # tools_token = api_username
30 |
31 | # proxies = {'http': 'http://127.0.0.1:8888'}
32 | headers = {
33 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
34 | 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
35 | 'Accept-Encoding': 'gzip, deflate',
36 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0',
37 | # 'Content-Type': 'multipart/form-data; boundary=---------------------------227973204131376',
38 | 'Connection': 'keep-alive',
39 | 'Host': 'v1-http-api.jsdama.com',
40 | 'Upgrade-Insecure-Requests': '1'
41 | }
42 |
43 | files = {
44 | 'upload': (file_name, open(file_name, 'rb'), 'image/jpg')
45 | }
46 |
47 | data = {
48 | 'user_name': api_username,
49 | 'user_pw': api_password,
50 | 'yzm_minlen': yzm_min,
51 | 'yzm_maxlen': yzm_max,
52 | 'yzmtype_mark': yzm_type,
53 | 'zztool_token': tools_token
54 | }
55 | # s = requests.session()
56 | # r = s.post(api_post_url, headers=headers, data=data, files=files, verify=False, proxies=proxies)
57 | r = requests.post(api_post_url, headers=headers, data=data, files=files, verify=False)
58 | return r.json()
59 |
60 |
61 | def download_vcode():
62 | try:
63 | url = 'https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.8126458147235742'
64 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'}
65 | s = requests.session()
66 | resp = s.get(url, headers=headers, verify=False)
67 | file_name = 'c:/temp/lianzhong_vcode.png'
68 | with open(file_name, 'wb') as f:
69 | f.write(resp.content)
70 | except Exception as e:
71 | print(e)
72 |
73 |
74 | if __name__ == '__main__':
75 | # download_vcode()
76 | main('账号',
77 | '密码',
78 | 'c:/temp/lianzhong_vcode.png',
79 | "http://v1-http-api.jsdama.com/api.php?mod=php&act=upload",
80 | '1',
81 | '8',
82 | '1303',
83 | '')
84 |
85 | '''
86 | main() 参数介绍
87 | api_username (API账号) --必须提供
88 | api_password (API账号密码) --必须提供
89 | file_name (需要打码的图片路径) --必须提供
90 | api_post_url (API接口地址) --必须提供
91 | yzm_min (验证码最小值) --可空提供
92 | yzm_max (验证码最大值) --可空提供
93 | yzm_type (验证码类型) --可空提供
94 | tools_token (工具或软件token) --可空提供
95 | '''
96 |
--------------------------------------------------------------------------------
/Spiders/zoomeye.py:
--------------------------------------------------------------------------------
1 | # !/user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # time: 2018/10/7--17:42
4 | __author__ = 'Henry'
5 |
6 | '''
7 | 项目:ZoomEye钟旭之眼的登录+爬取
8 | 注意:一页20条,最多显示100页,即2000条数据(对外开放的开发者版本,只能获取总结果的 30%,同时涵盖 10,000 结果条数上限。)
9 | '''
10 |
11 | import requests, execjs, re
12 | import matplotlib.image as mpimg
13 | import matplotlib.pyplot as plt
14 | from Spiders.lianzhong_captcha import main
15 |
16 |
17 | def Zoomeye():
18 | req = requests.Session()
19 | # 1.获取cookie,token
20 | url = 'https://sso.telnet404.com/cas/login/'
21 | headers = {
22 | 'Origin': 'https://sso.telnet404.com',
23 | 'Host': 'sso.telnet404.com',
24 | 'Upgrade-Insecure-Requests': '1',
25 | 'Referer': 'https://sso.telnet404.com/cas/login/', # 不加referer:403错误(HTTPS防第三方劫持)
26 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
27 | }
28 | html = req.get(url, headers=headers)
29 | token = re.search(r"token' value='(.*?)'", html.text).group(1)
30 | print(token)
31 | # 2.获取验证码
32 | url = 'https://sso.telnet404.com/captcha/'
33 | html = req.get(url, headers=headers)
34 | with open('captcha.jpg', 'wb') as f:
35 | f.write(html.content)
36 | img = mpimg.imread('captcha.jpg')
37 | plt.imshow(img)
38 | plt.axis('off')
39 | plt.show()
40 | captcha = input('请输入验证码:')
41 | # 3.post提交账号密码,打码登录
42 | url = 'https://sso.telnet404.com/cas/login/'
43 | data = {
44 | 'csrfmiddlewaretoken': token,
45 | 'email': username,
46 | 'password': password,
47 | 'captcha': captcha
48 | }
49 | html = req.post(url, data=data, headers=headers)
50 | if 'login' in html.url:
51 | print('登录失败!请填写正确的账号密码验证码!')
52 | else:
53 | print('登录成功!')
54 | # 第一次重定向
55 | url = 'https://www.zoomeye.org/searchResult?q=666'
56 | headers = {
57 | 'Host': 'www.zoomeye.org',
58 | 'Upgrade-Insecure-Requests': '1',
59 | # 'Referer': 'https://www.zoomeye.org/login',
60 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
61 | }
62 | html = req.get(url, headers=headers)
63 | code = html.text.replace('\x00', '').replace('', '').replace('try{eval','try{return')
64 | func = execjs.compile(code).call('f')
65 | reg = re.search(r"document.cookie='(.*?)\+';Expires", func)
66 | reg_1 = re.search(r',\(function\(\){var (.*?)=document', func)
67 | if reg:
68 | if reg_1:
69 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1)
70 | code_2 = ('\'' + code_2).replace('window', '{}')
71 | arg = re.search(r',\(function\(\){var (.*?)=document', code_2).group(1)
72 | code_2 = re.sub(r',\(function\(\){.*?return function', ',(function(){return function', code_2)
73 | reg = arg + '\.charAt'
74 | code_2 = re.sub(reg, '"www.zoomeye.org/".charAt', code_2)
75 | else:
76 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1)
77 | code_2 = ('\'' + code_2).replace('window', '{}')
78 | jsl_clearance = execjs.eval(code_2)
79 | headers = {
80 | 'Host': 'www.zoomeye.org',
81 | 'Upgrade-Insecure-Requests': '1',
82 | 'Referer': url,
83 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
84 | }
85 | html = req.get(url, headers=headers, allow_redirects=False)
86 | req.cookies['__jsl_clearance'] = jsl_clearance
87 | # 第二次重定向
88 | url = 'https://sso.telnet404.com/cas/login?service=https%3A%2F%2Fwww.zoomeye.org%2Flogin'
89 | headers = {
90 | 'Host': 'sso.telnet404.com',
91 | 'Upgrade-Insecure-Requests': '1',
92 | 'Referer': 'https://www.zoomeye.org/login',
93 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
94 | }
95 | html = req.get(url, headers=headers)
96 | # 第三次重定向(获取返回的token,即Cube-Authorization)
97 | url = 'https://www.zoomeye.org/user/login?' + html.url.split('?')[1]
98 | headers = {
99 | 'Host': 'www.zoomeye.org',
100 | 'Upgrade-Insecure-Requests': '1',
101 | 'Referer': html.url,
102 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
103 | }
104 | html = req.get(url, headers=headers)
105 | cube_auth = html.json()['token']
106 | print(req.cookies)
107 | print(cube_auth)
108 | # 最终访问接口,运行爬虫
109 | for page in range(1, 6):
110 | url = 'https://www.zoomeye.org/api/search?q=666&p={}'.format(str(page)) # q:搜索内容;P:页数
111 | headers = {
112 | 'Host': 'www.zoomeye.org',
113 | 'Upgrade-Insecure-Requests': '1',
114 | 'Referer': 'https://www.zoomeye.org/searchResult?q=666', # 根据搜索内容修改
115 | 'Cube-Authorization': cube_auth, # 加入用户验证
116 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
117 | }
118 | html = req.get(url, headers=headers)
119 | for i in html.json().get('matches'):
120 | data = {
121 | 'title':i.get('title'),
122 | 'site':i.get('site'),
123 | 'ip':i.get('ip')[0],
124 | 'type':i.get('type'),
125 | 'timestamp':i.get('timestamp'),
126 | 'country':i.get('geoinfo').get('country').get('names').get('zh-CN'),
127 | }
128 | print(data)
129 | else:
130 | print('Method 2-动态混淆,暂未解密...sorry~~') # 请求快了,就会激发动态混淆,只需一个小时重新请求一次就好了
131 | exit()
132 |
133 |
134 | if __name__ == '__main__':
135 | username = input('请输入您的ZoomEye用户名(邮箱):')
136 | password = input('请输入您的ZoomEye密码:')
137 | Zoomeye()
138 |
--------------------------------------------------------------------------------
/Spiders/zoomeye_captcha.py:
--------------------------------------------------------------------------------
1 | # !/user/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # time: 2018/10/7--17:42
4 | __author__ = 'Henry'
5 |
6 | '''
7 | 项目:ZoomEye钟旭之眼的登录+爬取
8 | 注意:一页20条,最多显示100页,即2000条数据(对外开放的开发者版本,只能获取总结果的 30%,同时涵盖 10,000 结果条数上限。)
9 | '''
10 |
11 | import requests, execjs, re
12 | import matplotlib.image as mpimg
13 | import matplotlib.pyplot as plt
14 | from Spiders.lianzhong_captcha import main
15 |
16 |
17 | def Zoomeye():
18 | req = requests.Session()
19 | # 1.获取cookie,token
20 | url = 'https://sso.telnet404.com/cas/login/'
21 | headers = {
22 | 'Origin': 'https://sso.telnet404.com',
23 | 'Host': 'sso.telnet404.com',
24 | 'Upgrade-Insecure-Requests': '1',
25 | 'Referer': 'https://sso.telnet404.com/cas/login/', # 不加referer:403错误(HTTPS防第三方劫持)
26 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
27 | }
28 | html = req.get(url, headers=headers)
29 | token = re.search(r"token' value='(.*?)'", html.text).group(1)
30 | print(token)
31 | # 2.获取验证码
32 | url = 'https://sso.telnet404.com/captcha/'
33 | html = req.get(url, headers=headers)
34 | with open('captcha.jpg', 'wb') as f:
35 | f.write(html.content)
36 | img = mpimg.imread('captcha.jpg')
37 | plt.imshow(img)
38 | plt.axis('off')
39 | plt.show()
40 | print('正在识别验证码...')
41 | result = main('***请填入你的联众账号***', '***请填入你的联众密码***', 'captcha.jpg',
42 | 'http://v1-http-api.jsdama.com/api.php?mod=php&act=upload', '', '', '1001', '') # 四位英数混合
43 | print(result) # 返回实例: {"data":{"val":"3752","id":21708338617},"result":true}
44 | captcha = ''
45 | if result.get('result') == True:
46 | captcha = result.get('data').get('val')
47 | print('识别验证码成功!验证码为:' + captcha)
48 | else:
49 | print('验证码识别失败!')
50 | # 3.post提交账号密码,打码登录
51 | url = 'https://sso.telnet404.com/cas/login/'
52 | data = {
53 | 'csrfmiddlewaretoken': token,
54 | 'email': username,
55 | 'password': password,
56 | 'captcha': captcha
57 | }
58 | html = req.post(url, data=data, headers=headers)
59 | if 'login' in html.url:
60 | print('登录失败!请填写正确的账号密码验证码!')
61 | else:
62 | print('登录成功!')
63 | # 第一次重定向
64 | url = 'https://www.zoomeye.org/searchResult?q=666'
65 | headers = {
66 | 'Host': 'www.zoomeye.org',
67 | 'Upgrade-Insecure-Requests': '1',
68 | # 'Referer': 'https://www.zoomeye.org/login',
69 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
70 | }
71 | html = req.get(url, headers=headers)
72 | code = html.text.replace('\x00', '').replace('', '').replace('try{eval','try{return')
73 | func = execjs.compile(code).call('f')
74 | reg = re.search(r"document.cookie='(.*?)\+';Expires", func)
75 | reg_1 = re.search(r',\(function\(\){var (.*?)=document', func)
76 | if reg:
77 | if reg_1:
78 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1)
79 | code_2 = ('\'' + code_2).replace('window', '{}')
80 | arg = re.search(r',\(function\(\){var (.*?)=document', code_2).group(1)
81 | code_2 = re.sub(r',\(function\(\){.*?return function', ',(function(){return function', code_2)
82 | reg = arg + '\.charAt'
83 | code_2 = re.sub(reg, '"www.zoomeye.org/".charAt', code_2)
84 | else:
85 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1)
86 | code_2 = ('\'' + code_2).replace('window', '{}')
87 | jsl_clearance = execjs.eval(code_2)
88 | headers = {
89 | 'Host': 'www.zoomeye.org',
90 | 'Upgrade-Insecure-Requests': '1',
91 | 'Referer': url,
92 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
93 | }
94 | html = req.get(url, headers=headers, allow_redirects=False)
95 | req.cookies['__jsl_clearance'] = jsl_clearance
96 | # 第二次重定向
97 | url = 'https://sso.telnet404.com/cas/login?service=https%3A%2F%2Fwww.zoomeye.org%2Flogin'
98 | headers = {
99 | 'Host': 'sso.telnet404.com',
100 | 'Upgrade-Insecure-Requests': '1',
101 | 'Referer': 'https://www.zoomeye.org/login',
102 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
103 | }
104 | html = req.get(url, headers=headers)
105 | # 第三次重定向(获取返回的token,即Cube-Authorization)
106 | url = 'https://www.zoomeye.org/user/login?' + html.url.split('?')[1]
107 | headers = {
108 | 'Host': 'www.zoomeye.org',
109 | 'Upgrade-Insecure-Requests': '1',
110 | 'Referer': html.url,
111 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
112 | }
113 | html = req.get(url, headers=headers)
114 | cube_auth = html.json()['token']
115 | print(req.cookies)
116 | print(cube_auth)
117 | # 最终访问接口,运行爬虫
118 | for page in range(1, 6):
119 | url = 'https://www.zoomeye.org/api/search?q=666&p={}'.format(str(page)) # q:搜索内容;P:页数
120 | headers = {
121 | 'Host': 'www.zoomeye.org',
122 | 'Upgrade-Insecure-Requests': '1',
123 | 'Referer': 'https://www.zoomeye.org/searchResult?q=666', # 根据搜索内容修改
124 | 'Cube-Authorization': cube_auth, # 加入用户验证
125 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
126 | }
127 | html = req.get(url, headers=headers)
128 | for i in html.json().get('matches'):
129 | data = {
130 | 'title':i.get('title'),
131 | 'site':i.get('site'),
132 | 'ip':i.get('ip')[0],
133 | 'type':i.get('type'),
134 | 'timestamp':i.get('timestamp'),
135 | 'country':i.get('geoinfo').get('country').get('names').get('zh-CN'),
136 | }
137 | print(data)
138 | else:
139 | print('Method 2-动态混淆,暂未解密...sorry~~') # 请求快了,就会激发动态混淆,只需一个小时重新请求一次就好了
140 | exit()
141 |
142 |
143 | if __name__ == '__main__':
144 | username = input('请输入您的ZoomEye用户名(邮箱):')
145 | password = input('请输入您的ZoomEye密码:')
146 | Zoomeye()
147 |
--------------------------------------------------------------------------------