├── .idea
├── .gitignore
├── data_collection.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
└── modules.xml
├── Gather.py
├── Readme.md
├── __pycache__
└── config.cpython-37.pyc
├── api
├── __pycache__
│ ├── data_from_fofa.cpython-37.pyc
│ ├── data_from_shodan.cpython-37.pyc
│ └── data_from_zoomeye.cpython-37.pyc
├── data_from_fofa.py
├── data_from_shodan.py
└── data_from_zoomeye.py
├── config.py
├── lib
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ ├── cmdline.cpython-37.pyc
│ └── color.cpython-37.pyc
├── cmdline.py
└── color.py
├── output
├── Shodan_20210414210320.txt
├── Shodan_20210414210530.txt
└── Zoomeye_20210414210415.txt
└── requirements.txt
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/data_collection.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Gather.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # author = EASY
4 | from config import logo
5 | from lib.cmdline import cmdline
6 |
7 | if __name__ == '__main__':
8 | print(logo)
9 | cmdline()
10 |
11 |
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 | ## 前言
2 |
3 | Gather是一个数据采集工具,使用python3编写,使用的时候请确保运行环境为python3.6以上。Gather支持Fofa,钟馗之眼(Zoomeye),Shodan的数据采集。
4 |
5 | ## 使用说明
6 |
7 | Gather极力避免各种繁杂的参数,使用-aF or aZ or -aS 指定特定采集方式即可。
8 |
9 | ```
10 | git clone https://github.com/EASY233/Gather.git
11 | pip install -r requirements.txt
12 | python3 Gather.py -h
13 | -aF Using fofa to collect data
14 | -aZ Using Zoomeye to collect data
15 | -aS Using Shodan to collect data
16 |
17 | ```
18 |
19 | ## 配置文件
20 |
21 | 在根目录下的``config.py``进行api等配置,注意fofa不是使用api进行调用而是使用爬虫的方式,需要填入fofa_token(没有钱开通会员,没有办法使用api进行测试所以没有写api调用版本~)。
22 |
23 | ```txt
24 | fofa_token = ""
25 | # 一轮抓取结束后,休眠时间,防止被fofa拉黑
26 | time_sleep = 5
27 | time_out = (10, 10)
28 | page_host_limit = 10
29 |
30 | # Zoomeye配置文件
31 | email = ""
32 | password = ""
33 |
34 | # Shodan配置文件
35 | api_key = ""
36 | ```
37 |
38 | ## 运行效果
39 |
40 | 通过fofa进行数据采集,该脚本修改自开源项目:[fofa_spider-1.0.3](https://github.com/FightingForWhat/fofa_spider-1.0.3)
41 |
42 | 
43 |
44 | 通过Zoomeye进行数据采集:
45 |
46 | 
47 |
48 | 通过Shodan进行数据采集:
49 |
50 | 
51 |
52 |
--------------------------------------------------------------------------------
/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EASY233/Gather/b87b21bd42ad85b5648ca7730415bdd572a97528/__pycache__/config.cpython-37.pyc
--------------------------------------------------------------------------------
/api/__pycache__/data_from_fofa.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EASY233/Gather/b87b21bd42ad85b5648ca7730415bdd572a97528/api/__pycache__/data_from_fofa.cpython-37.pyc
--------------------------------------------------------------------------------
/api/__pycache__/data_from_shodan.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EASY233/Gather/b87b21bd42ad85b5648ca7730415bdd572a97528/api/__pycache__/data_from_shodan.cpython-37.pyc
--------------------------------------------------------------------------------
/api/__pycache__/data_from_zoomeye.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EASY233/Gather/b87b21bd42ad85b5648ca7730415bdd572a97528/api/__pycache__/data_from_zoomeye.cpython-37.pyc
--------------------------------------------------------------------------------
/api/data_from_fofa.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # author = EASY
4 | import requests
5 | import base64
6 | import re
7 | import config
8 | import random
9 | from urllib.parse import quote
10 | import datetime
11 | from datetime import datetime
12 | from datetime import timedelta
13 | import time
14 | import os
15 | from config import Save_Path
16 |
17 | host_list = []
18 | timestamp_list = []
19 |
20 | class Fofa:
21 | def __init__(self):
22 | self.check_cookie()
23 | search_key = self.search_key_input()
24 | searchbs64, headers_use = self.get_page_num(search_key)
25 | self.fofa_spider(search_key, searchbs64, headers_use)
26 | self.host_list_print()
27 | self.save()
28 |
29 | def check_cookie(self):
30 | if config.fofa_token == "":
31 | print("[*] 请配置config fofa_token文件")
32 | exit(0)
33 | print("[*] 检测到fafa_token,请保证token可用")
34 | return
35 |
36 | def headers(self):
37 | user_agent_use = config.user_agent[random.randint(0, len(config.user_agent) - 1)]
38 | headers_use = {
39 | 'User-Agent': user_agent_use,
40 | 'Accept': 'application/json, text/plain, */*',
41 | 'Authorization': config.fofa_token
42 | }
43 | return headers_use
44 |
45 | def search_key_input(self):
46 | search_key = input('[*] 请输入fofa搜索关键字: ')
47 | search_key = quote(str(base64.b64encode(search_key.encode()), encoding='utf-8'))
48 | return search_key
49 |
50 | def get_page_num(self,search_key):
51 | headers_use = self.headers()
52 | searchbs64 = search_key
53 |
54 | print("[*] 爬取页面为:https://fofa.so/result?&qbase64=" + searchbs64)
55 | html = requests.get(url="https://fofa.so/result?&qbase64=" + searchbs64, headers=headers_use).text
56 | pagenum = re.findall('
(\d*)