├── requirements.txt ├── Pic ├── data.png ├── logo.png ├── run.png └── zoomeye.png ├── Spiders ├── captcha.jpg ├── lianzhong_captcha.py ├── zoomeye.py └── zoomeye_captcha.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==2.2.2 2 | requests==2.18.4 3 | PyExecJS==1.5.1 4 | -------------------------------------------------------------------------------- /Pic/data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/data.png -------------------------------------------------------------------------------- /Pic/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/logo.png -------------------------------------------------------------------------------- /Pic/run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/run.png -------------------------------------------------------------------------------- /Pic/zoomeye.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Pic/zoomeye.png -------------------------------------------------------------------------------- /Spiders/captcha.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Henryhaohao/ZoomEye_Spider/HEAD/Spiders/captcha.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python实现ZoomEye钟旭之眼的登录+关键字爬取 ![enter image description here](Pic/logo.png) 2 | =========================== 3 | ![](https://img.shields.io/badge/Python-3.6.3-green.svg) ![](https://img.shields.io/badge/requests-2.18.4-green.svg) ![](https://img.shields.io/badge/PyExecJS-1.5.1-green.svg) ![](https://img.shields.io/badge/matplotlib-2.2.2-green.svg) 4 | ### ZoomEye钟旭之眼 - https://www.zoomeye.org/ 5 | |Author|:sunglasses:Henryhaohao:sunglasses:| 6 | |---|--- 7 | |Email|:hearts:1073064953@qq.com:hearts: 8 | 9 | 10 | **** 11 | ## :dolphin:声明 12 | ### 软件均仅用于学习交流,请勿用于任何商业用途!感谢大家! 13 | ## :dolphin:介绍 14 | ### 该项目为[ZoomEye钟旭之眼](https://www.zoomeye.org/)的模拟登录+关键字爬取 15 | - **反爬手段: 知道创宇平台 - 加速乐Cookie破解(Cloudflare升级版)** 16 | - **ZoomEye介绍:** 17 | > 参考文章: https://blog.csdn.net/Fly_hps/article/details/79406517 18 | - **项目版本:** 19 | > 版本一:手动填写验证码进行登录:Spiders/zoomeye.py
20 | > 版本二:自动识别验证码进行登录:Spiders/zoomeye_captcha.py (我这里接入的是[联众打码](https://www.jsdati.com/)平台) 21 | ## :dolphin:运行环境 22 | **Version: Python3** 23 | ## :dolphin:安装依赖库 24 | ``` 25 | pip3 install -r requirements.txt 26 | ``` 27 | ## :dolphin:**相关截图** 28 | > - **ZoomEye钟旭之眼官网 - https://www.zoomeye.org/**

29 | ![enter image description here](Pic/zoomeye.png) 30 | > - **运行过程**

31 | ![enter image description here](Pic/run.png) 32 | ![enter image description here](Pic/data.png) 33 | ## :dolphin:**总结** 34 | **最后,如果你觉得这个项目不错或者对你有帮助,给个Star呗,也算是对我学习路上的一种鼓励!
35 | 哈哈哈,感谢大家!笔芯哟~**:cupid::cupid: 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /Spiders/lianzhong_captcha.py: -------------------------------------------------------------------------------- 1 | # !/user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # time: 2018/10/15--14:39 4 | __author__ = 'Henry' 5 | 6 | 7 | import requests 8 | 9 | 10 | def main(api_username, api_password, file_name, api_post_url, yzm_min, yzm_max, yzm_type, tools_token): 11 | ''' 12 | main() 参数介绍 13 | api_username (API账号) --必须提供 14 | api_password (API账号密码) --必须提供 15 | file_name (需要打码的图片路径) --必须提供 16 | api_post_url (API接口地址) --必须提供 17 | yzm_min (验证码最小值) --可空提供 18 | yzm_max (验证码最大值) --可空提供 19 | yzm_type (验证码类型) --可空提供 20 | tools_token (工具或软件token) --可空提供 21 | ''' 22 | # api_username = 23 | # api_password = 24 | # file_name = 'c:/temp/lianzhong_vcode.png' 25 | # api_post_url = "http://v1-http-api.jsdama.com/api.php?mod=php&act=upload" 26 | # yzm_min = '1' 27 | # yzm_max = '8' 28 | # yzm_type = '1303' 29 | # tools_token = api_username 30 | 31 | # proxies = {'http': 'http://127.0.0.1:8888'} 32 | headers = { 33 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 34 | 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 35 | 'Accept-Encoding': 'gzip, deflate', 36 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0', 37 | # 'Content-Type': 'multipart/form-data; boundary=---------------------------227973204131376', 38 | 'Connection': 'keep-alive', 39 | 'Host': 'v1-http-api.jsdama.com', 40 | 'Upgrade-Insecure-Requests': '1' 41 | } 42 | 43 | files = { 44 | 'upload': (file_name, open(file_name, 'rb'), 'image/jpg') 45 | } 46 | 47 | data = { 48 | 'user_name': api_username, 49 | 'user_pw': api_password, 50 | 'yzm_minlen': yzm_min, 51 | 'yzm_maxlen': yzm_max, 52 | 'yzmtype_mark': yzm_type, 53 | 'zztool_token': tools_token 54 | } 55 | # s = requests.session() 56 | # r = s.post(api_post_url, headers=headers, data=data, files=files, verify=False, proxies=proxies) 57 | r = requests.post(api_post_url, headers=headers, data=data, files=files, verify=False) 58 | return r.json() 59 | 60 | 61 | def download_vcode(): 62 | try: 63 | url = 'https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.8126458147235742' 64 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0'} 65 | s = requests.session() 66 | resp = s.get(url, headers=headers, verify=False) 67 | file_name = 'c:/temp/lianzhong_vcode.png' 68 | with open(file_name, 'wb') as f: 69 | f.write(resp.content) 70 | except Exception as e: 71 | print(e) 72 | 73 | 74 | if __name__ == '__main__': 75 | # download_vcode() 76 | main('账号', 77 | '密码', 78 | 'c:/temp/lianzhong_vcode.png', 79 | "http://v1-http-api.jsdama.com/api.php?mod=php&act=upload", 80 | '1', 81 | '8', 82 | '1303', 83 | '') 84 | 85 | ''' 86 | main() 参数介绍 87 | api_username (API账号) --必须提供 88 | api_password (API账号密码) --必须提供 89 | file_name (需要打码的图片路径) --必须提供 90 | api_post_url (API接口地址) --必须提供 91 | yzm_min (验证码最小值) --可空提供 92 | yzm_max (验证码最大值) --可空提供 93 | yzm_type (验证码类型) --可空提供 94 | tools_token (工具或软件token) --可空提供 95 | ''' 96 | -------------------------------------------------------------------------------- /Spiders/zoomeye.py: -------------------------------------------------------------------------------- 1 | # !/user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # time: 2018/10/7--17:42 4 | __author__ = 'Henry' 5 | 6 | ''' 7 | 项目:ZoomEye钟旭之眼的登录+爬取 8 | 注意:一页20条,最多显示100页,即2000条数据(对外开放的开发者版本,只能获取总结果的 30%,同时涵盖 10,000 结果条数上限。) 9 | ''' 10 | 11 | import requests, execjs, re 12 | import matplotlib.image as mpimg 13 | import matplotlib.pyplot as plt 14 | from Spiders.lianzhong_captcha import main 15 | 16 | 17 | def Zoomeye(): 18 | req = requests.Session() 19 | # 1.获取cookie,token 20 | url = 'https://sso.telnet404.com/cas/login/' 21 | headers = { 22 | 'Origin': 'https://sso.telnet404.com', 23 | 'Host': 'sso.telnet404.com', 24 | 'Upgrade-Insecure-Requests': '1', 25 | 'Referer': 'https://sso.telnet404.com/cas/login/', # 不加referer:403错误(HTTPS防第三方劫持) 26 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 27 | } 28 | html = req.get(url, headers=headers) 29 | token = re.search(r"token' value='(.*?)'", html.text).group(1) 30 | print(token) 31 | # 2.获取验证码 32 | url = 'https://sso.telnet404.com/captcha/' 33 | html = req.get(url, headers=headers) 34 | with open('captcha.jpg', 'wb') as f: 35 | f.write(html.content) 36 | img = mpimg.imread('captcha.jpg') 37 | plt.imshow(img) 38 | plt.axis('off') 39 | plt.show() 40 | captcha = input('请输入验证码:') 41 | # 3.post提交账号密码,打码登录 42 | url = 'https://sso.telnet404.com/cas/login/' 43 | data = { 44 | 'csrfmiddlewaretoken': token, 45 | 'email': username, 46 | 'password': password, 47 | 'captcha': captcha 48 | } 49 | html = req.post(url, data=data, headers=headers) 50 | if 'login' in html.url: 51 | print('登录失败!请填写正确的账号密码验证码!') 52 | else: 53 | print('登录成功!') 54 | # 第一次重定向 55 | url = 'https://www.zoomeye.org/searchResult?q=666' 56 | headers = { 57 | 'Host': 'www.zoomeye.org', 58 | 'Upgrade-Insecure-Requests': '1', 59 | # 'Referer': 'https://www.zoomeye.org/login', 60 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 61 | } 62 | html = req.get(url, headers=headers) 63 | code = html.text.replace('\x00', '').replace('', '').replace('try{eval','try{return') 64 | func = execjs.compile(code).call('f') 65 | reg = re.search(r"document.cookie='(.*?)\+';Expires", func) 66 | reg_1 = re.search(r',\(function\(\){var (.*?)=document', func) 67 | if reg: 68 | if reg_1: 69 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1) 70 | code_2 = ('\'' + code_2).replace('window', '{}') 71 | arg = re.search(r',\(function\(\){var (.*?)=document', code_2).group(1) 72 | code_2 = re.sub(r',\(function\(\){.*?return function', ',(function(){return function', code_2) 73 | reg = arg + '\.charAt' 74 | code_2 = re.sub(reg, '"www.zoomeye.org/".charAt', code_2) 75 | else: 76 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1) 77 | code_2 = ('\'' + code_2).replace('window', '{}') 78 | jsl_clearance = execjs.eval(code_2) 79 | headers = { 80 | 'Host': 'www.zoomeye.org', 81 | 'Upgrade-Insecure-Requests': '1', 82 | 'Referer': url, 83 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 84 | } 85 | html = req.get(url, headers=headers, allow_redirects=False) 86 | req.cookies['__jsl_clearance'] = jsl_clearance 87 | # 第二次重定向 88 | url = 'https://sso.telnet404.com/cas/login?service=https%3A%2F%2Fwww.zoomeye.org%2Flogin' 89 | headers = { 90 | 'Host': 'sso.telnet404.com', 91 | 'Upgrade-Insecure-Requests': '1', 92 | 'Referer': 'https://www.zoomeye.org/login', 93 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 94 | } 95 | html = req.get(url, headers=headers) 96 | # 第三次重定向(获取返回的token,即Cube-Authorization) 97 | url = 'https://www.zoomeye.org/user/login?' + html.url.split('?')[1] 98 | headers = { 99 | 'Host': 'www.zoomeye.org', 100 | 'Upgrade-Insecure-Requests': '1', 101 | 'Referer': html.url, 102 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 103 | } 104 | html = req.get(url, headers=headers) 105 | cube_auth = html.json()['token'] 106 | print(req.cookies) 107 | print(cube_auth) 108 | # 最终访问接口,运行爬虫 109 | for page in range(1, 6): 110 | url = 'https://www.zoomeye.org/api/search?q=666&p={}'.format(str(page)) # q:搜索内容;P:页数 111 | headers = { 112 | 'Host': 'www.zoomeye.org', 113 | 'Upgrade-Insecure-Requests': '1', 114 | 'Referer': 'https://www.zoomeye.org/searchResult?q=666', # 根据搜索内容修改 115 | 'Cube-Authorization': cube_auth, # 加入用户验证 116 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 117 | } 118 | html = req.get(url, headers=headers) 119 | for i in html.json().get('matches'): 120 | data = { 121 | 'title':i.get('title'), 122 | 'site':i.get('site'), 123 | 'ip':i.get('ip')[0], 124 | 'type':i.get('type'), 125 | 'timestamp':i.get('timestamp'), 126 | 'country':i.get('geoinfo').get('country').get('names').get('zh-CN'), 127 | } 128 | print(data) 129 | else: 130 | print('Method 2-动态混淆,暂未解密...sorry~~') # 请求快了,就会激发动态混淆,只需一个小时重新请求一次就好了 131 | exit() 132 | 133 | 134 | if __name__ == '__main__': 135 | username = input('请输入您的ZoomEye用户名(邮箱):') 136 | password = input('请输入您的ZoomEye密码:') 137 | Zoomeye() 138 | -------------------------------------------------------------------------------- /Spiders/zoomeye_captcha.py: -------------------------------------------------------------------------------- 1 | # !/user/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # time: 2018/10/7--17:42 4 | __author__ = 'Henry' 5 | 6 | ''' 7 | 项目:ZoomEye钟旭之眼的登录+爬取 8 | 注意:一页20条,最多显示100页,即2000条数据(对外开放的开发者版本,只能获取总结果的 30%,同时涵盖 10,000 结果条数上限。) 9 | ''' 10 | 11 | import requests, execjs, re 12 | import matplotlib.image as mpimg 13 | import matplotlib.pyplot as plt 14 | from Spiders.lianzhong_captcha import main 15 | 16 | 17 | def Zoomeye(): 18 | req = requests.Session() 19 | # 1.获取cookie,token 20 | url = 'https://sso.telnet404.com/cas/login/' 21 | headers = { 22 | 'Origin': 'https://sso.telnet404.com', 23 | 'Host': 'sso.telnet404.com', 24 | 'Upgrade-Insecure-Requests': '1', 25 | 'Referer': 'https://sso.telnet404.com/cas/login/', # 不加referer:403错误(HTTPS防第三方劫持) 26 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 27 | } 28 | html = req.get(url, headers=headers) 29 | token = re.search(r"token' value='(.*?)'", html.text).group(1) 30 | print(token) 31 | # 2.获取验证码 32 | url = 'https://sso.telnet404.com/captcha/' 33 | html = req.get(url, headers=headers) 34 | with open('captcha.jpg', 'wb') as f: 35 | f.write(html.content) 36 | img = mpimg.imread('captcha.jpg') 37 | plt.imshow(img) 38 | plt.axis('off') 39 | plt.show() 40 | print('正在识别验证码...') 41 | result = main('***请填入你的联众账号***', '***请填入你的联众密码***', 'captcha.jpg', 42 | 'http://v1-http-api.jsdama.com/api.php?mod=php&act=upload', '', '', '1001', '') # 四位英数混合 43 | print(result) # 返回实例: {"data":{"val":"3752","id":21708338617},"result":true} 44 | captcha = '' 45 | if result.get('result') == True: 46 | captcha = result.get('data').get('val') 47 | print('识别验证码成功!验证码为:' + captcha) 48 | else: 49 | print('验证码识别失败!') 50 | # 3.post提交账号密码,打码登录 51 | url = 'https://sso.telnet404.com/cas/login/' 52 | data = { 53 | 'csrfmiddlewaretoken': token, 54 | 'email': username, 55 | 'password': password, 56 | 'captcha': captcha 57 | } 58 | html = req.post(url, data=data, headers=headers) 59 | if 'login' in html.url: 60 | print('登录失败!请填写正确的账号密码验证码!') 61 | else: 62 | print('登录成功!') 63 | # 第一次重定向 64 | url = 'https://www.zoomeye.org/searchResult?q=666' 65 | headers = { 66 | 'Host': 'www.zoomeye.org', 67 | 'Upgrade-Insecure-Requests': '1', 68 | # 'Referer': 'https://www.zoomeye.org/login', 69 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 70 | } 71 | html = req.get(url, headers=headers) 72 | code = html.text.replace('\x00', '').replace('', '').replace('try{eval','try{return') 73 | func = execjs.compile(code).call('f') 74 | reg = re.search(r"document.cookie='(.*?)\+';Expires", func) 75 | reg_1 = re.search(r',\(function\(\){var (.*?)=document', func) 76 | if reg: 77 | if reg_1: 78 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1) 79 | code_2 = ('\'' + code_2).replace('window', '{}') 80 | arg = re.search(r',\(function\(\){var (.*?)=document', code_2).group(1) 81 | code_2 = re.sub(r',\(function\(\){.*?return function', ',(function(){return function', code_2) 82 | reg = arg + '\.charAt' 83 | code_2 = re.sub(reg, '"www.zoomeye.org/".charAt', code_2) 84 | else: 85 | code_2 = re.search(r"'__jsl_clearance=(.*?)\+';Expires", func).group(1) 86 | code_2 = ('\'' + code_2).replace('window', '{}') 87 | jsl_clearance = execjs.eval(code_2) 88 | headers = { 89 | 'Host': 'www.zoomeye.org', 90 | 'Upgrade-Insecure-Requests': '1', 91 | 'Referer': url, 92 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 93 | } 94 | html = req.get(url, headers=headers, allow_redirects=False) 95 | req.cookies['__jsl_clearance'] = jsl_clearance 96 | # 第二次重定向 97 | url = 'https://sso.telnet404.com/cas/login?service=https%3A%2F%2Fwww.zoomeye.org%2Flogin' 98 | headers = { 99 | 'Host': 'sso.telnet404.com', 100 | 'Upgrade-Insecure-Requests': '1', 101 | 'Referer': 'https://www.zoomeye.org/login', 102 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 103 | } 104 | html = req.get(url, headers=headers) 105 | # 第三次重定向(获取返回的token,即Cube-Authorization) 106 | url = 'https://www.zoomeye.org/user/login?' + html.url.split('?')[1] 107 | headers = { 108 | 'Host': 'www.zoomeye.org', 109 | 'Upgrade-Insecure-Requests': '1', 110 | 'Referer': html.url, 111 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 112 | } 113 | html = req.get(url, headers=headers) 114 | cube_auth = html.json()['token'] 115 | print(req.cookies) 116 | print(cube_auth) 117 | # 最终访问接口,运行爬虫 118 | for page in range(1, 6): 119 | url = 'https://www.zoomeye.org/api/search?q=666&p={}'.format(str(page)) # q:搜索内容;P:页数 120 | headers = { 121 | 'Host': 'www.zoomeye.org', 122 | 'Upgrade-Insecure-Requests': '1', 123 | 'Referer': 'https://www.zoomeye.org/searchResult?q=666', # 根据搜索内容修改 124 | 'Cube-Authorization': cube_auth, # 加入用户验证 125 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' 126 | } 127 | html = req.get(url, headers=headers) 128 | for i in html.json().get('matches'): 129 | data = { 130 | 'title':i.get('title'), 131 | 'site':i.get('site'), 132 | 'ip':i.get('ip')[0], 133 | 'type':i.get('type'), 134 | 'timestamp':i.get('timestamp'), 135 | 'country':i.get('geoinfo').get('country').get('names').get('zh-CN'), 136 | } 137 | print(data) 138 | else: 139 | print('Method 2-动态混淆,暂未解密...sorry~~') # 请求快了,就会激发动态混淆,只需一个小时重新请求一次就好了 140 | exit() 141 | 142 | 143 | if __name__ == '__main__': 144 | username = input('请输入您的ZoomEye用户名(邮箱):') 145 | password = input('请输入您的ZoomEye密码:') 146 | Zoomeye() 147 | --------------------------------------------------------------------------------