├── Common
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   ├── GetProxies.cpython-38.pyc
    │   ├── LogOutput.cpython-38.pyc
    │   └── CustomException.cpython-38.pyc
    ├── CustomException.py
    ├── ProxyPool
    │   ├── DomesticProxyPool.txt
    │   └── ForeignProxyPool.txt
    ├── LogOutput.py
    ├── Exception结构.md
    └── GetProxies.py
├── Plugins
    ├── __init__.py
    ├── InfoSearch
    │   ├── __init__.py
    │   ├── Domain
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── domainapi.cpython-38.pyc
    │   │   └── domainapi.py
    │   ├── Subdomain
    │   │   ├── __init__.py
    │   │   ├── IsCND
    │   │   │   ├── __init__.py
    │   │   │   ├── GeoLite2-ASN.mmdb
    │   │   │   ├── cdn-domain.conf
    │   │   │   └── CheckCDN.py
    │   │   ├── Spider
    │   │   │   ├── __init__.py
    │   │   │   ├── Baidu
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── baidu.cpython-38.pyc
    │   │   │   │   │   └── __init__.cpython-38.pyc
    │   │   │   │   └── baidu.py
    │   │   │   ├── Bing
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── bing.cpython-38.pyc
    │   │   │   │   │   └── __init__.cpython-38.pyc
    │   │   │   │   └── bing.py
    │   │   │   ├── Google
    │   │   │   │   ├── docs
    │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   ├── index.rst
    │   │   │   │   │   ├── Makefile
    │   │   │   │   │   ├── make.bat
    │   │   │   │   │   └── conf.py
    │   │   │   │   ├── googlesearch
    │   │   │   │   │   ├── user_agents.txt.gz
    │   │   │   │   │   ├── __pycache__
    │   │   │   │   │   │   └── __init__.cpython-38.pyc
    │   │   │   │   │   └── __init__.py
    │   │   │   │   ├── .google-cookie
    │   │   │   │   ├── .travis.yml
    │   │   │   │   ├── demo.py
    │   │   │   │   └── google.py
    │   │   │   └── __pycache__
    │   │   │   │   └── __init__.cpython-38.pyc
    │   │   ├── JsFinder
    │   │   │   ├── __init__.py
    │   │   │   └── jsfinder.py
    │   │   ├── ESD
    │   │   │   ├── ESD-0.0.29.dist-info
    │   │   │   │   ├── REQUESTED
    │   │   │   │   ├── INSTALLER
    │   │   │   │   ├── top_level.txt
    │   │   │   │   ├── entry_points.txt
    │   │   │   │   ├── WHEEL
    │   │   │   │   ├── RECORD
    │   │   │   │   ├── METADATA
    │   │   │   │   └── LICENSE
    │   │   │   └── ESD
    │   │   │   │   ├── __pycache__
    │   │   │   │       └── __init__.cpython-38.pyc
    │   │   │   │   ├── key.ini
    │   │   │   │   ├── subs-test.esd
    │   │   │   │   └── __init__.py
    │   │   ├── ThirdPartyPlatform
    │   │   │   ├── __init__.py
    │   │   │   ├── certificate.py
    │   │   │   └── netcraft.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   └── subdomainapi.cpython-38.pyc
    │   │   └── subdomainapi.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── infosearchapi.cpython-38.pyc
    │   └── infosearchapi.py
    ├── __pycache__
    │   └── __init__.cpython-38.pyc
    └── SaveToExcel.py
├── Reports
    ├── googleSpider.txt
    ├── tjzj.edu.cn-key-links
    └── tjzj.edu.cn-subdomains
├── config.ini
├── README.md
├── requirements.txt
└── search_all.py


/Common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/JsFinder/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/REQUESTED:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ThirdPartyPlatform/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/INSTALLER:
--------------------------------------------------------------------------------
1 | pip
2 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/top_level.txt:
--------------------------------------------------------------------------------
1 | ESD
2 | 


--------------------------------------------------------------------------------
/Reports/googleSpider.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Reports/googleSpider.txt


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/entry_points.txt:
--------------------------------------------------------------------------------
1 | [console_scripts]
2 | esd = ESD:main
3 | 
4 | 


--------------------------------------------------------------------------------
/Common/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Common/__pycache__/GetProxies.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/GetProxies.cpython-38.pyc


--------------------------------------------------------------------------------
/Common/__pycache__/LogOutput.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/LogOutput.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Common/CustomException.py:
--------------------------------------------------------------------------------
1 | '''自定义异常类'''
2 | 
3 | class CustomException(Exception):
4 |     pass
5 | 
6 | class NetworkException(ValueError):
7 | 


--------------------------------------------------------------------------------
/Common/__pycache__/CustomException.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/CustomException.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/GeoLite2-ASN.mmdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/IsCND/GeoLite2-ASN.mmdb


--------------------------------------------------------------------------------
/Plugins/InfoSearch/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/WHEEL:
--------------------------------------------------------------------------------
1 | Wheel-Version: 1.0
2 | Generator: bdist_wheel (0.36.2)
3 | Root-Is-Purelib: true
4 | Tag: py3-none-any
5 | 
6 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/__pycache__/infosearchapi.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/__pycache__/infosearchapi.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Domain/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/__pycache__/domainapi.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Domain/__pycache__/domainapi.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/__pycache__/subdomainapi.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/__pycache__/subdomainapi.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/ESD/ESD/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/bing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/bing.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/baidu.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/baidu.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/user_agents.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/user_agents.txt.gz


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
 1 | [fofa api]
 2 | EMAIL = cnno.1@protonmail.com
 3 | KEY = 86b1a3ae6a597782a0394041c7d1908c
 4 | 
 5 | [shodan api]
 6 | SHODAN_API_KEY =
 7 | 
 8 | [github api]
 9 | GITHUB_TOKEN =
10 | 
11 | [quake api]
12 | X-QuakeToken =
13 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/key.ini:
--------------------------------------------------------------------------------
 1 | [shodan]
 2 | shodan_key = 
 3 | 
 4 | [fofa]
 5 | fofa_key =
 6 | fofa_email =
 7 | 
 8 | [zoomeye]
 9 | zoomeye_username =
10 | zoomeye_password =
11 | 
12 | [censys]
13 | uid =
14 | secret =
15 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/subs-test.esd:
--------------------------------------------------------------------------------
 1 | www
 2 | h5
 3 | wap
 4 | _feei
 5 | test
 6 | dkim._domainkey
 7 | _finger._tcp
 8 | sso.cn
 9 | dmarc.mail2
10 | f2.market
11 | market
12 | mail
13 | bn73
14 | passport
15 | djfowj
16 | video
17 | dkfowejf
18 | cnvhueq
19 | dhfowje
20 | clive
21 | echiu
22 | img
23 | inn
24 | et25
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 经过r师傅教诲，目前觉得这个项目造轮子无意义，已停止开发。
 2 | 
 3 | # SearchAll
 4 | 一款外网信息收集集合工具，包括对顶级域名、子域名、c段、敏感信息泄漏等等的收集
 5 | 
 6 | 
 7 | 目前正在开发，功能还未完善
 8 | 设计遇到问题，我会同步到我的博客https://urdr-gungnir.github.io/post/SearchAll%E8%AE%BE%E8%AE%A1%E9%97%AE%E9%A2%98.html
 9 | ![image](https://user-images.githubusercontent.com/73213935/139377042-950135ce-c19a-478f-a943-6a9d14f234b1.png)
10 | 


--------------------------------------------------------------------------------
/Common/ProxyPool/DomesticProxyPool.txt:
--------------------------------------------------------------------------------
 1 | 121.40.185.42:1080
 2 | 123.60.93.108:1080
 3 | 42.122.65.112:1080
 4 | 114.117.206.230:1080
 5 | 171.107.184.247:1080
 6 | 123.59.211.39:45554
 7 | 121.42.173.167:1080
 8 | 39.103.199.2:1080
 9 | 124.91.134.216:1080
10 | 123.60.224.72:1080
11 | 222.64.9.47:1080
12 | 115.239.213.75:1080
13 | 111.229.21.197:1080
14 | 59.61.161.66:1080
15 | 45.43.54.177:1080
16 | 39.103.199.2:1080
17 | 101.42.94.199:1080
18 | 113.107.166.125:1080
19 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/.google-cookie:
--------------------------------------------------------------------------------
1 | #LWP-Cookies-2.0
2 | Set-Cookie3: 1P_JAR="2021-10-01-12"; path="/"; domain=".google.com"; path_spec; domain_dot; secure; expires="2021-10-31 12:54:53Z"; version=0
3 | Set-Cookie3: NID="511=JTjC9CxK8SYPY3weJg-Aej42eHSD22u3_xTVIRYcfPecmhPzNd8tmOF42QmDwh3uJFXhZjB8utDi4nhu8xjmk8_Z6UDYKiRycBK-3u4H__Fo-U1toiaFgpQwY3D6rgETpjxChNhKfUaZZiaKmOS9zeC3g4llerzYVbVE3JOQILY"; path="/"; domain=".google.com"; path_spec; domain_dot; expires="2022-04-02 12:54:52Z"; HttpOnly=None; version=0
4 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | # Supported CPython versions:
 4 | # https://en.wikipedia.org/wiki/CPython#Version_history
 5 | python:
 6 |  - pypy3
 7 |  - pypy
 8 |  - 2.7
 9 |  - 3.6
10 |  - 3.5
11 |  - 3.4
12 | 
13 | # Use container-based infrastructure
14 | sudo: false
15 | 
16 | install:
17 |  - pip install pycodestyle pyflakes
18 | 
19 | script:
20 |  # Static analysis
21 |  - pyflakes .
22 |  - pycodestyle --statistics --count .
23 | 
24 | matrix:
25 |   fast_finish: true
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiodns==3.0.0
 2 | aiohttp==3.7.4.post0
 3 | async-timeout==3.0.1
 4 | attrs==21.2.0
 5 | backoff==1.11.1
 6 | certifi==2021.5.30
 7 | cffi==1.14.6
 8 | chardet==4.0.0
 9 | charset-normalizer==2.0.6
10 | colorama==0.4.4
11 | colorlog==6.4.1
12 | dnspython==2.1.0
13 | et-xmlfile==1.1.0
14 | idna==3.2
15 | multidict==5.1.0
16 | openpyxl==3.0.9
17 | pycares==4.0.0
18 | pycparser==2.20
19 | PySocks==1.7.1
20 | requests==2.26.0
21 | termcolor==1.1.0
22 | tqdm==4.62.3
23 | typing-extensions==3.10.0.2
24 | urllib3==1.26.7
25 | wincertstore==0.2
26 | yarl==1.6.3
27 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. googlesearch documentation master file, created by
 2 |    sphinx-quickstart on Tue Nov  6 12:25:12 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to googlesearch's documentation!
 7 | ========================================
 8 | 
 9 | Indices and tables
10 | ==================
11 | 
12 | * :ref:`genindex`
13 | * :ref:`modindex`
14 | * :ref:`search`
15 | 
16 | Reference
17 | =========
18 | 
19 | .. automodule:: googlesearch
20 |    :members:
21 | 
22 | 


--------------------------------------------------------------------------------
/Common/ProxyPool/ForeignProxyPool.txt:
--------------------------------------------------------------------------------
 1 | 123.59.120.247:1080
 2 | 123.59.120.207:1080
 3 | 123.59.120.38:45554
 4 | 123.59.120.15:45554
 5 | 123.59.211.123:45554
 6 | 123.59.211.192:45554
 7 | 123.59.120.40:45554
 8 | 123.59.211.180:45554
 9 | 120.244.127.254:1080
10 | 123.59.211.193:45554
11 | 183.45.77.126:1080
12 | 113.16.158.35:1080
13 | 123.59.120.61:45554
14 | 123.59.211.161:45554
15 | 123.59.120.112:45554
16 | 123.59.120.40:45554
17 | 123.59.211.123:45554
18 | 120.244.127.254:1080
19 | 123.59.211.213:45554
20 | 183.45.77.126:1080
21 | 123.59.120.38:45554
22 | 123.59.120.171:45554
23 | 123.59.211.193:45554
24 | 123.59.120.123:45554
25 | 


--------------------------------------------------------------------------------
/Reports/tjzj.edu.cn-key-links:
--------------------------------------------------------------------------------
 1 | openportal服务后台  http://weixin.tjzj.edu.cn/
 2 | 腾讯企业邮箱-登录入口  http://mail.tjzj.edu.cn/
 3 | 同济大学浙江学院大门、公寓楼门口出入系统项目招标信息公告 |...  https://www.tjzj.edu.cn/info/16183.html
 4 | 浙江警官职业学院2014年招录省属监狱系统人民警察学员公告 | 同济大学浙 ...  https://www.tjzj.edu.cn/info/13360.html
 5 | 同济大学浙江学院安全态势感知系统和EDR招标信息公告 | 同济大学...  https://www.tjzj.edu.cn/index.php/info/16194.html
 6 | 同济大学浙江学院结构工程综合加载试验系统项目招标信息公告 |...  https://www.tjzj.edu.cn/info/16252.html
 7 | 喜报|我校代表队摘获市属教育系统红诗会比赛二等奖 | 同济大学浙江学院  https://www.tjzj.edu.cn/info/19831.html
 8 | 吉讯大学生职业测评与规划系统 | 同济大学浙江学院  https://www.tjzj.edu.cn/info/13090.html
 9 | 同济大学浙江学院银行系统软件测试仿真实验室招标信息公告 | 同济...  https://www.tjzj.edu.cn/info/18640.html
10 | 


--------------------------------------------------------------------------------
/Plugins/SaveToExcel.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class saveToExcel:
 3 |     def __init__(self, excelSavePath, excel, title):
 4 |         self.excelSavePath = excelSavePath          # excel的保存路径
 5 |         self.excel = excel                       # openpyxl.Workbook()的实例话
 6 |         self.sheet = self.excel.create_sheet(title=title)   # 创建工作区
 7 |         self.Sheet_line = 1         # 表格的行
 8 | 
 9 |     # def CreatExcel(self):
10 | 
11 | 
12 |     def SaveSpider(self, spiderName, key_links=[], subdomains=[]):
13 | 
14 |         def SaveKeyLinks():
15 |             cprint("*"*20+"存储spider关键词数据"+"*"*20, color="green")
16 | 
17 |         def SaveSubdomains():
18 |             cprint("*"*20+"存储spider子域名数据"+"*"*20, color="green")


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = googlesearch
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/demo.py:
--------------------------------------------------------------------------------
 1 | from googlesearch import search
 2 | import sys
 3 | from sys import version_info
 4 | 
 5 | PY2, PY3 = (True, False) if version_info[0] == 2 else (False, True)
 6 | 
 7 | if PY2:
 8 |     from urlparse import urlparse
 9 | else:
10 |     from urllib.parse import urlparse
11 | 
12 | key = 'site:hbu.edu.cn 后台'# sys.argv[1]
13 | 
14 | urls = []
15 | 
16 | for each_result in search(key, stop=4):
17 |     parseRet = urlparse(each_result)
18 |     print(each_result, parseRet)
19 |     url = parseRet.scheme + '://' + parseRet.netloc
20 |     if key in parseRet.netloc and url not in urls:
21 |         print(url, each_result)
22 |         urls.append(url)
23 | 
24 | print('search {} Done!'.format(key))
25 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=googlesearch
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/Reports/tjzj.edu.cn-subdomains:
--------------------------------------------------------------------------------
 1 | sk.tjzj.edu.cn  122.225.92.218
 2 | cxcy.tjzj.edu.cn  122.225.92.218
 3 | h5.tjzj.edu.cn  122.225.92.218
 4 | tcsf.tjzj.edu.cn  122.225.92.218
 5 | kjx.tjzj.edu.cn  122.225.92.218
 6 | dfl.tjzj.edu.cn  122.225.92.218
 7 | jtx.tjzj.edu.cn  122.225.92.218
 8 | message.tjzj.edu.cn  122.225.19.18
 9 | tmx.tjzj.edu.cn  122.225.92.218
10 | weixin.tjzj.edu.cn  60.190.149.38
11 | oa.tjzj.edu.cn  192.168.100.16
12 | bwc.tjzj.edu.cn  122.225.60.2
13 | tyb.tjzj.edu.cn  122.225.92.218
14 | www.tjzj.edu.cn  122.225.92.218
15 | old.tjzj.edu.cn  122.225.92.218
16 | lxb.tjzj.edu.cn  122.225.92.218
17 | job.tjzj.edu.cn  121.41.227.53
18 | ns1.tjzj.edu.cn  121.192.40.100
19 | depart.tjzj.edu.cn  122.225.92.218
20 | mail.tjzj.edu.cn  157.255.173.155,61.241.49.119
21 | dem.tjzj.edu.cn  122.225.92.218
22 | daka.tjzj.edu.cn  60.190.149.38
23 | idm.tjzj.edu.cn  122.225.19.22
24 | ei.tjzj.edu.cn  122.225.92.218
25 | cdz.tjzj.edu.cn  122.225.92.218
26 | jxx.tjzj.edu.cn  122.225.92.218
27 | jzh.tjzj.edu.cn  122.225.92.218
28 | english.tjzj.edu.cn  122.225.92.218
29 | ns.tjzj.edu.cn  121.192.40.150
30 | sso.tjzj.edu.cn  122.225.19.22
31 | uc.tjzj.edu.cn  122.225.19.19
32 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/RECORD:
--------------------------------------------------------------------------------
 1 | ../../Scripts/esd.exe,sha256=MixWaXi8KM3VLhimXNWmr4Y-fN-z09IqoTNbSDozYhY,106314
 2 | ESD-0.0.29.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
 3 | ESD-0.0.29.dist-info/LICENSE,sha256=_j7qbFmeI6AMCMX1yyMgwwrcj4aH21_OybeaZixT_2s,35120
 4 | ESD-0.0.29.dist-info/METADATA,sha256=QpsUTykinvy5NmoN1Rfy0xEYUwv2csbmV-C_qiUxGDI,4297
 5 | ESD-0.0.29.dist-info/RECORD,,
 6 | ESD-0.0.29.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 7 | ESD-0.0.29.dist-info/WHEEL,sha256=OqRkF0eY5GHssMorFjlbTIq072vpHpF60fIQA6lS9xA,92
 8 | ESD-0.0.29.dist-info/entry_points.txt,sha256=jACTsjKk3gdyl9UunxTSx1fgnzxDnZV3Q0omnUqY8Js,34
 9 | ESD-0.0.29.dist-info/top_level.txt,sha256=Kl4w48552EuhLMDNoXD8LUUhnEzA-VOfGQHUdAtp2ig,4
10 | ESD/__init__.py,sha256=imNtmEteBLIjgnc4mH7zvwCmP2UIY0QyMKiQm6oUxhI,40162
11 | ESD/__pycache__/__init__.cpython-38.pyc,,
12 | ESD/cacert.pem,sha256=hmlbG-kiXDz4gtKD8FyUTjqrvB32QopEJCaak-mX3GU,209309
13 | ESD/key.ini,sha256=HLCG1J_EYjuFrrhKMRDN7rViJqJt2ZOH1fdJQiZzQCM,129
14 | ESD/subs-test.esd,sha256=KnXxbaeIBrV5U3DMk_JoCC4wWR6agyhSQjkJDgdsXWE,169
15 | ESD/subs.esd,sha256=jiSYlj4rlIZTOdrWaEr-SEiQvqt5qkJ2tRT9NRR46Mw,806655
16 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/google.py:
--------------------------------------------------------------------------------
 1 | from Plugins.InfoSearch.Subdomain.Spider.Google.googlesearch import search
 2 | from sys import version_info
 3 | 
 4 | PY2, PY3 = (True, False) if version_info[0] == 2 else (False, True)
 5 | 
 6 | if PY2:
 7 |     from urlparse import urlparse
 8 | else:
 9 |     from urllib.parse import urlparse
10 | 
11 | # 谷歌爬虫
12 | class GoogleSpider:
13 |     def __init__(self, domain, save_fold_path):
14 |         self.domain = domain
15 |         # site:domain inurl:admin inurl:login inurl:system 后台 系统
16 |         self.wds = ['inurl:admin|login|register|upload|editor', '后台|系统']
17 |         # print('Please wait a few time ...')
18 |         self.STOP = 50      # 谷歌最多爬取20个结果
19 |         self.save_fold_path = save_fold_path    # \result\0ca9b508e31f
20 |         self.googleSubdomains = []
21 | 
22 |     def run(self):
23 |         for wd in self.wds:
24 |             with open('{}/googleSpider.txt'.format(self.save_fold_path), 'at') as f:
25 |                 key = 'site:*.{} {}'.format(self.domain, wd)
26 |                 f.writelines('[+] {} :\n'.format(key))
27 |                 print('\t[+] google search -> [{}]'.format(key))
28 |                 for each_result in search(key):
29 |                     f.writelines('{}\n'.format(each_result))
30 |                     parseRet = urlparse(each_result)
31 |                     subdomain = parseRet.netloc
32 |                     if self.domain in subdomain and subdomain not in self.googleSubdomains:
33 |                         self.googleSubdomains.append(subdomain)
34 | 
35 |         return self.googleSubdomains
36 | 


--------------------------------------------------------------------------------
/Common/LogOutput.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | log output
 3 | 
 4 | a = LogOutput()
 5 | Logger_object = a.SetModuleName("Module_name")
 6 | Looger_object.wrong("log_info") / Looger_object.error("log_info") / Looger_object.info("log_info")
 7 | '''
 8 | 
 9 | import colorlog
10 | import logging
11 | 
12 | 
13 | class LogOutput():
14 |     _instance = None
15 |     def __new__(cls, *args, **kwargs):
16 |         if cls._instance is None:
17 |             cls._instance = object.__new__(cls, *args, **kwargs)
18 |         return cls._instance
19 |     def __init__(self):
20 |         self.mycolorlog = colorlog
21 |         self.handler = self.mycolorlog.StreamHandler()
22 |         formatter = self.mycolorlog.ColoredFormatter(
23 |             '%(log_color)s[+] %(asctime)s [%(name)s] [%(levelname)s] %(message)s%(reset)s',
24 |             datefmt=None,
25 |             reset=True,
26 |             log_colors={
27 |                 'DEBUG': 'cyan',
28 |                 'INFO': 'green',
29 |                 'WARNING': 'yellow',
30 |                 'ERROR': 'red',
31 |                 'CRITICAL': 'white,bg_red',
32 |                 'Module': 'purple'
33 |             },
34 |             secondary_log_colors={},
35 |             style='%'
36 |         )
37 |         self.handler.setFormatter(formatter)
38 | 
39 | 
40 |     def SetModuleName(self, module_name=''):
41 |         if module_name == '':
42 |             self.logger = self.mycolorlog.getLogger('Seach_all')
43 |         else:
44 |             self.logger = self.mycolorlog.getLogger('Seach_all  {}'.format(module_name))
45 |         self.logger.addHandler(self.handler)
46 |         self.logger.setLevel(self.mycolorlog.INFO)
47 |         return self.logger


--------------------------------------------------------------------------------
/Plugins/InfoSearch/infosearchapi.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | InfoSearchApi
 3 | 
 4 | :return
 5 | type : json
 6 | '''
 7 | import random
 8 | 
 9 | from Common.LogOutput import LogOutput
10 | logger_object = LogOutput()
11 | logger = logger_object.SetModuleName("InfoSearch")
12 | import sys
13 | 
14 | class InfoSearchApi():
15 |     def __init__(self):
16 |         pass
17 |     def GetSubdomain_ips(self, domain, proxies):
18 |         if domain:
19 |             from Plugins.InfoSearch.Subdomain.subdomainapi import SubdomainApi
20 |             subdomainobject = SubdomainApi(proxies)
21 |             return subdomainobject.Run(domain)
22 |         else:
23 |             logger.error("Need a target domain！")
24 |             sys.exit()
25 | 
26 |     def GetDomains(self, domain):
27 |         if domain:
28 |             from Plugins.InfoSearch.Domain.domainapi import DomainApi
29 |             domainobject = DomainApi()
30 |             return domainobject.run_domain(domain)
31 |         else:
32 |             logger.error("Need a target domain！")
33 |             sys.exit()
34 | 
35 |     def get_proxy(self):
36 |         with open("./Common/ProxyPool/DomesticProxyPool.txt", "r") as f:
37 |             proxies = [proxy.strip() for proxy in f.readlines()]
38 |         # current_path = os.path.dirname(__file__)
39 |         # f = open(current_path +"/", "r")
40 |         # proxies = f.readlines()
41 |         return proxies
42 | 
43 |     def Run(self, domain):
44 |         # proxies = self.get_proxy()
45 |         # proxy = random.choice(proxies)
46 |         Domains, companyname = self.GetDomains(domain)
47 | 
48 |         for domain in Domains:
49 |             subdomain_ips, key_links = self.GetSubdomain_ips(domain[0])
50 | 
51 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ThirdPartyPlatform/certificate.py:
--------------------------------------------------------------------------------
 1 | from Common.LogOutput import LogOutput
 2 | logger_object = LogOutput()
 3 | logger = logger_object.SetModuleName("certificate")
 4 | import sys
 5 | 
 6 | import requests
 7 | import re
 8 | 
 9 | # crt.sh
10 | class Certificate():
11 |     def __init__(self):
12 |         self.header = {
13 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
14 |         }
15 |         self.subdomains = []
16 | 
17 |     # def Get_subdomain(self):
18 | 
19 |     def run(self, domain):
20 |         try:
21 |             res = requests.get("https://crt.sh/?q={}".format(domain), headers=self.header)
22 |             before_subdomains = re.findall(r"<TD>(.*.{})*?</TD>".format(domain), res.text)
23 |             before_subdomains = list(set(before_subdomains))
24 |             for subdomains in before_subdomains:
25 |                 if '<BR>' in subdomains:
26 |                     subdomain_list = subdomains.split("<BR>")
27 |                     for subdomain in subdomain_list:
28 |                         if '*' in subdomain or '@' in subdomain:
29 |                             continue
30 |                         self.subdomains.append(subdomain)
31 |                     continue
32 |                 if '*' in subdomains or '@' in subdomains:
33 |                     continue
34 |                 self.subdomains.append(subdomains)
35 |         except:
36 |             logger.error("There are too many exceptions requested, you need to check.")
37 |             sys.exit()
38 |         return list(set(self.subdomains))
39 | 
40 | # 测试成功
41 | # a = Certificate()
42 | # print(a.run('tjut.edu.cn'))


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ThirdPartyPlatform/netcraft.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from Common.LogOutput import LogOutput
 4 | logger_object = LogOutput()
 5 | logger = logger_object.SetModuleName("netcraft")
 6 | import sys
 7 | from urllib.parse import urlparse
 8 | import requests
 9 | import re
10 | 
11 | 
12 | # netcraft
13 | class Netcraft():
14 |     def __init__(self):
15 |         self.header = {
16 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
17 |         }
18 |         self.subdomains = []
19 |         self.proxy = {"http": "socks5://171.107.184.247:1080/", "https": "socks5://171.107.184.247:1080/"}
20 | 
21 |     def GetSubdomains(self, url):
22 |         res = requests.get(url, headers=self.header, proxies=self.proxy)
23 |         if "Sorry, no results were found." in res.text:
24 |             return 0
25 |         links = re.findall(r'<a class="results-table__host" href="(.*?)"', res.text)
26 |         for link in links:
27 |             self.subdomains.append(urlparse(link).netloc)
28 |         while "Next Page" in res.text:
29 |             href = re.findall(r'<a class="btn-info" href="(.*?)">Next Page <i class="fas fa-chevron-circle-right"></i></a>', res.text)
30 |             for hr in href:
31 |                 url = "https://searchdns.netcraft.com"+hr
32 |                 res = requests.get(url, headers=self.header, proxies=self.proxy)
33 |                 links = re.findall(r'<a class="results-table__host" href="(.*?)"', res.text)
34 |                 for link in links:
35 |                     self.subdomains.append(urlparse(link).netloc)
36 |             time.sleep(0.1)
37 |     def Run(self, domain):
38 |         url = "https://searchdns.netcraft.com/?restriction=site+contains&host={}&position=limited".format(domain)
39 |         self.GetSubdomains(url)
40 |         logger.info("End Netcraft")
41 |         return self.subdomains
42 | 
43 | # if __name__ == "__main__":
44 | #     a = Netcraft()
45 | #     print(a.Run("tjut.edu.cn"))


--------------------------------------------------------------------------------
/Common/Exception结构.md:
--------------------------------------------------------------------------------
 1 | BaseException
 2 |  +-- SystemExit
 3 |  +-- KeyboardInterrupt
 4 |  +-- GeneratorExit
 5 |  +-- Exception
 6 |       +-- StopIteration
 7 |       +-- StopAsyncIteration
 8 |       +-- ArithmeticError
 9 |       |    +-- FloatingPointError
10 |       |    +-- OverflowError
11 |       |    +-- ZeroDivisionError
12 |       +-- AssertionError
13 |       +-- AttributeError
14 |       +-- BufferError
15 |       +-- EOFError
16 |       +-- ImportError
17 |            +-- ModuleNotFoundError
18 |       +-- LookupError
19 |       |    +-- IndexError
20 |       |    +-- KeyError
21 |       +-- MemoryError
22 |       +-- NameError
23 |       |    +-- UnboundLocalError
24 |       +-- OSError
25 |       |    +-- BlockingIOError
26 |       |    +-- ChildProcessError
27 |       |    +-- ConnectionError
28 |       |    |    +-- BrokenPipeError
29 |       |    |    +-- ConnectionAbortedError
30 |       |    |    +-- ConnectionRefusedError
31 |       |    |    +-- ConnectionResetError
32 |       |    +-- FileExistsError
33 |       |    +-- FileNotFoundError
34 |       |    +-- InterruptedError
35 |       |    +-- IsADirectoryError
36 |       |    +-- NotADirectoryError
37 |       |    +-- PermissionError
38 |       |    +-- ProcessLookupError
39 |       |    +-- TimeoutError
40 |       +-- ReferenceError
41 |       +-- RuntimeError
42 |       |    +-- NotImplementedError
43 |       |    +-- RecursionError
44 |       +-- SyntaxError
45 |       |    +-- IndentationError
46 |       |         +-- TabError
47 |       +-- SystemError
48 |       +-- TypeError
49 |       +-- ValueError
50 |       |    +-- UnicodeError
51 |       |         +-- UnicodeDecodeError
52 |       |         +-- UnicodeEncodeError
53 |       |         +-- UnicodeTranslateError
54 |       +-- Warning
55 |            +-- DeprecationWarning
56 |            +-- PendingDeprecationWarning
57 |            +-- RuntimeWarning
58 |            +-- SyntaxWarning
59 |            +-- UserWarning
60 |            +-- FutureWarning
61 |            +-- ImportWarning
62 |            +-- UnicodeWarning
63 |            +-- BytesWarning
64 |            +-- ResourceWarning


--------------------------------------------------------------------------------
/search_all.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import sys
 4 | from Common.LogOutput import LogOutput
 5 | logger_object = LogOutput()
 6 | logger = logger_object.SetModuleName()
 7 | try:
 8 |     from concurrent.futures import ThreadPoolExecutor, as_completed
 9 | except ImportError:
10 |     logger.error('Please use Python > 3.2 to run search_all.')
11 |     sys.exit()
12 | 
13 | 
14 | 
15 | def InfoSearch(domain):
16 |     from Plugins.InfoSearch.infosearchapi import  InfoSearchApi
17 |     infosearchapi = InfoSearchApi()
18 |     infosearchapi.Run(domain)
19 | 
20 | def GetPorxies():
21 |     from Common import GetProxies
22 |     GetProxies.run_getSocksProxy()
23 | 
24 | 
25 | def Banner():
26 |     banner = '''
27 |      ________  _______   ________  ________  ________  ___  ___  ________  ___       ___          
28 |     |\   ____\|\  ___ \ |\   __  \|\   __  \|\   ____\|\  \|\  \|\   __  \|\  \     |\  \         
29 |     \ \  \___|\ \   __/|\ \  \|\  \ \  \|\  \ \  \___|\ \  \\\\\  \ \  \|\  \ \  \    \ \  \        
30 |      \ \_____  \ \  \_|/_\ \   __  \ \   _  _\ \  \    \ \   __  \ \   __  \ \  \    \ \  \       
31 |       \|____|\  \ \  \_|\ \ \  \ \  \ \  \\\\  \\\\ \  \____\ \  \ \  \ \  \ \  \ \  \____\ \  \____  
32 |         ____\_\  \ \_______\ \__\ \__\ \__\\\\ _\\\\ \_______\ \__\ \__\ \__\ \__\ \_______\ \_______\\
33 |        |\_________\|_______|\|__|\|__|\|__|\|__|\|_______|\|__|\|__|\|__|\|__|\|_______|\|_______|
34 |        \|_________|                                                                                   author:Gungnir
35 |     '''
36 |     print('\033[35m' + banner + '\033[0m')
37 | 
38 | 
39 | def Init_set():
40 |     Banner()
41 | 
42 |     global domain, WhetherRunInfoSearch, WhetherGetProxies
43 | 
44 |     import argparse
45 | 
46 |     parser = argparse.ArgumentParser(description='''
47 |     (￢︿̫̿￢☆)，哼，可恶! 竟然发现我了.
48 |      (ˉ▽￣～)   既然发现我了，那就给你吧！
49 |     ''')
50 |     parser.add_argument("-d", "--domain", help="Need a target domain", dest="domain")
51 |     parser.add_argument("-i", "--InfoSearch", help="Conduct information collection", dest="WhetherRunInfoSearch", action="store_true")
52 |     parser.add_argument("-p", "--Proxy", help="Get proxies", dest="WhetherGetProxies", action="store_true")
53 | 
54 |     args = parser.parse_args()
55 |     options = vars(args)
56 | 
57 | 
58 |     domain, WhetherRunInfoSearch, WhetherGetProxies = options['domain'], options['WhetherRunInfoSearch'], options['WhetherGetProxies']
59 | 
60 |     # GetProxies
61 |     if(WhetherGetProxies):
62 |         GetPorxies()
63 | 
64 | 
65 |     # 判断用户要不要信息收集
66 |     if(WhetherRunInfoSearch):
67 |         # GetSubdomains(domain)
68 |         InfoSearch(domain)
69 |     # else:
70 |     #     logger.error('At least one run command parameter is required, please use the --help or -h command for details')
71 |     #     sys.exit()
72 | 
73 | if __name__ == '__main__':
74 | 
75 | 
76 | 
77 |     '''初始化参数'''
78 |     Init_set()
79 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/METADATA:
--------------------------------------------------------------------------------
  1 | Metadata-Version: 2.1
  2 | Name: ESD
  3 | Version: 0.0.29
  4 | Summary: Enumeration Sub Domains(枚举子域名)
  5 | Home-page: https://github.com/FeeiCN/ESD
  6 | Author: Feei
  7 | Author-email: feei@feei.cn
  8 | License: UNKNOWN
  9 | Platform: UNKNOWN
 10 | Classifier: Topic :: Security
 11 | Classifier: Programming Language :: Python :: 3
 12 | Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 13 | Description-Content-Type: text/markdown
 14 | Requires-Dist: colorlog
 15 | Requires-Dist: aiodns
 16 | Requires-Dist: aiohttp
 17 | Requires-Dist: async-timeout
 18 | Requires-Dist: requests
 19 | Requires-Dist: backoff
 20 | Requires-Dist: dnspython
 21 | Requires-Dist: pysocks
 22 | Requires-Dist: tqdm
 23 | Requires-Dist: colorama
 24 | 
 25 | # ESD(Enumeration Sub Domain)
 26 | 
 27 | [![PyPI](https://img.shields.io/pypi/v/ESD.svg)](https://pypi.org/project/ESD/)
 28 | ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ESD.svg)
 29 | ![license](https://img.shields.io/github/license/FeeiCN/ESD.svg)
 30 | 
 31 | [![asciicast](https://asciinema.org/a/15WhUe40eEhSbwAXZdf2RQdq9.png)](https://asciinema.org/a/15WhUe40eEhSbwAXZdf2RQdq9)
 32 | 
 33 | ## 优势
 34 | #### 支持泛解析域名
 35 | > 基于`RSC`（响应相似度对比）技术对泛解析域名进行枚举（受网络质量、网站带宽等影响，速度会比较慢）
 36 | 
 37 | 基于`aioHTTP`获取一个不存在子域名的响应内容，并将其和字典子域名响应进行相似度比对。
 38 | 超过阈值则说明是同个页面，否则则为可用子域名，并对最终子域名再次进行响应相似度对比。
 39 | 
 40 | #### 更快的速度
 41 | > 基于`AsyncIO`异步协程技术对域名进行枚举（受网络和DNS服务器影响会导致扫描速度小幅波动，基本在250秒以内）
 42 | 
 43 | 基于`AsyncIO`+`aioDNS`将比传统多进程/多线程/gevent模式快50%以上。
 44 | 通过扫描`qq.com`，共`620328`条规则，找到`3421`个域名，耗时`15`分钟左右。
 45 | 
 46 | 更新于2021年9月，经实测多个DNS Server做了请求数限制，大并发下存在大量连接超时和异常导致遗漏情况大幅增加，目前通过限制DNS服务器以及并发数来解决，因此建议不要过于追求速度，通过设计更合理的触发时间来解决速度变慢问题。
 47 | 
 48 | #### 更全的字典
 49 | > 融合各类字典，去重后共620328条子域名字典
 50 | 
 51 | - 通用字典
 52 |     - 单字母、单字母+单数字、双字母、双字母+单数字、双字母+双数字、三字母、四字母
 53 |     - 单数字、双数字、三数字
 54 | - 域名解析商公布使用最多的子域名
 55 |     - DNSPod: dnspod-top2000-sub-domains.txt
 56 | - 其它域名爆破工具字典
 57 |     - subbrute: names_small.txt
 58 |     - subDomainsBrute: subnames_full.txt
 59 | 
 60 | #### 更多的收集渠道
 61 | - [X] 收集DNSPod接口泄露的子域名
 62 | - [X] 收集页面响应内容中出现的子域名
 63 | - [X] 收集跳转过程中的子域名
 64 | - [X] 收集HTTPS证书透明度子域名
 65 | - [X] 收集DNS域传送子域名
 66 | 
 67 | #### DNS服务器
 68 | - 解决各家DNS服务商对于网络线路出口判定不一致问题
 69 | - 解决各家DNS服务商缓存时间不一致问题
 70 | - 解决随机DNS问题，比如fliggy.com、plu.cn等
 71 | - 根据网络情况自动剔除无效DNS，提高枚举成功率
 72 | 
 73 | ## 使用
 74 | 仅在macOS、Linux的Python3下验证过
 75 | ```bash
 76 | # 安装
 77 | pip install esd
 78 | 
 79 | # 升级
 80 | pip install esd --upgrade
 81 | ```
 82 | **CLI命令行使用**
 83 | ```bash
 84 | # 扫描单个域名
 85 | esd -d qq.com
 86 | 
 87 | # debug模式扫描单个域名
 88 | esd=debug esd -d qq.com
 89 | 
 90 | # 扫描多个域名（英文逗号分隔）
 91 | esd --domain qq.com,tencent.com
 92 | 
 93 | # 扫描单个域名且过滤子域名中单个特定响应内容
 94 | esd --domain mogujie.com --filter 搜本店
 95 | 
 96 | # 扫描单个域名且过滤子域名中多个特定响应内容
 97 | esd --domain mogujie.com --filter 搜本店,收藏店铺
 98 | 
 99 | # 扫描文件（文件中每行一个域名）
100 | esd --file targets.txt
101 | 
102 | # 跳过相似度对比（开启这个选项会把所有泛解析的域名都过滤掉）
103 | esd --domain qq.com --skip-rsc
104 | 
105 | # 平均分割字典，加快爆破
106 | esd --domain qq.com --split 1/4
107 | 
108 | # 使用DNS域传送漏洞获取子域名
109 | esd --domain qq.com --dns-transfer
110 | 
111 | # 使用HTTPS证书透明度获取子域名
112 | esd --domain qq.com --ca-info
113 | 
114 | ```
115 | 
116 | **程序调用**
117 | ```python
118 | from ESD import EnumSubDomain
119 | domains = EnumSubDomain('feei.cn').run()
120 | ```
121 | 
122 | ## 后续
123 | - 提升扫描速度
124 | - 支持三级子域名，多种组合更多可能性
125 | 
126 | ## 文档
127 | - https://github.com/FeeiCN/ESD/wiki
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/cdn-domain.conf:
--------------------------------------------------------------------------------
  1 | #蓝汛
  2 | gslbsvc.net.cn
  3 | chinacache.com.cn
  4 | ccgslb.net
  5 | gslbsvc.com.cn
  6 | cdnsvc.cn
  7 | ccgslb.cn
  8 | cdn2cdn.net
  9 | blueit.org.cn
 10 | cc-1.com
 11 | cdnsvc.net
 12 | ccgslb.com.cn
 13 | lxsvc.net
 14 | lxsvc.cn
 15 | chinacache.org
 16 | gslbsvc.com
 17 | ccgslb.com
 18 | gslbsvc.cn
 19 | chinacache.com
 20 | igslb.net
 21 | chinacache.net
 22 | cdnsvc.net.cn
 23 | ccgslb.net.cn
 24 | hd-cdn.com
 25 | cdnsvc.com.cn
 26 | cdnsvc.com
 27 | speedupchina.net
 28 | b2r.com.cn
 29 | blueit.com
 30 | lxsvc.cn
 31 | gslbsvc.net
 32 | speedupchina.com
 33 | cc-cps.com
 34 | cc-cps.com.cn
 35 | cc-cps.net
 36 | cc-cps.mobi
 37 | cc-cps.cn
 38 | ccbench.com
 39 | webluker.com
 40 | ccindex.cn
 41 | ccindex.com.cn
 42 | ccmplus.com.cn
 43 | ccmplus.net
 44 | ccmplus.cn
 45 | 
 46 | #Webluker
 47 | xgslb.net
 48 | 
 49 | #网宿
 50 | wscdns.com
 51 | ourglb0.com
 52 | wsngb.com
 53 | lxdns.com
 54 | lxdns.net
 55 | 51cdn.com
 56 | chinanetcenter.com
 57 | netcenter.com.cn
 58 | wangsu.com
 59 | ourwebat.com
 60 | ourwebcdn.com
 61 | 
 62 | #快网
 63 | fastweb.com.cn
 64 | fwdns.net
 65 | hadns.net
 66 | cachecn.net
 67 | sz-dns.net
 68 | cachecn.com
 69 | cloudcdn.cn
 70 | 1test.cn
 71 | 5test.cn
 72 | fsspace.com
 73 | fsspace.com.cn
 74 | fsspace.cn
 75 | cloudcdn.net
 76 | fastwebcdn.com
 77 | hacdn.com
 78 | fwcdn.com
 79 | fwcdn.net
 80 | hacdn.net
 81 | cloudglb.com
 82 | cloudxns.net
 83 | cloudglb.net
 84 | cloudxns.com
 85 | cloudtcp.net
 86 | myxns.cn
 87 | newdefend.cn
 88 | myxns.net.cn
 89 | myxns.com.cn
 90 | myxns.org
 91 | newdefend.net
 92 | newdefend.org
 93 | newdefend.net.cn
 94 | newdefend.com.cn
 95 | newdefend.org.cn
 96 | newdefend.com
 97 | ffdns.net
 98 | fwmob.com
 99 | tlgslb.com
100 | fastcdn.com
101 | 
102 | #阿里云CDN
103 | kunlunea.com
104 | kunlunso.com
105 | kunlunwe.com
106 | kunlunno.com
107 | kunlunaq.com
108 | kunlunpi.com
109 | kunlunra.com
110 | kunlungr.com
111 | kunlunhuf.com
112 | kunlunsl.com
113 | kunlunar.com
114 | kunlunta.com
115 | kunlungem.com
116 | kunluncan.com
117 | kunlunle.com
118 | kunlunvi.com
119 | kunlunli.com
120 | kunlunsc.com
121 | kunlunsa.com
122 | kunlunca.com
123 | alikunlun.net
124 | alikunlun.com
125 | 
126 | #腾讯CDN
127 | qcloud.com
128 | myqcloud.com
129 | tcdn.qq.com
130 | cdntip.com
131 | 
132 | #百度云CDN
133 | bdydns.net
134 | bcedns.net
135 | bcedns.com
136 | bcedns.cn
137 | bdydns.com
138 | baiduyundns.net
139 | bdydns.cn
140 | baiduyundns.com
141 | baiduyundns.cn
142 | 
143 | #百度云加速
144 | yunjiasu-cdn.net
145 | 
146 | #七牛
147 | qiniudn.com
148 | qbox.me
149 | clouddn.com
150 | qiniudns.com
151 | 
152 | #又拍云
153 | aicdn.com
154 | 
155 | #360网站卫士
156 | dnspao.com
157 | 360wzb.cn
158 | 
159 | #同兴万点CDN
160 | cdngc.net
161 | cdnetworks.net
162 | gccdn.net
163 | 
164 | #CDN联盟
165 | cdnudns.com
166 | 
167 | #白山云
168 | qingcdn.com
169 | 
170 | #CloudFlare
171 | cdn.cloudflare.net
172 | cloudflare
173 | 
174 | #未知
175 | hdslb.com
176 | hdslb.net
177 | tbcache.com
178 | 21okglb.cn
179 | 21vianet.com.cn
180 | 21vokglb.cn
181 | 360wzb.com
182 | acadn.com
183 | akadns.net
184 | akamai-staging.net
185 | akamai.com
186 | akamai.net
187 | akamaitech.net
188 | akamaized.net
189 | alicloudlayer.com
190 | aliyun-inc.com
191 | alicloudsec.com
192 | aliyuncs.com
193 | amazonaws.com
194 | aodianyun.com
195 | aqb.so
196 | awsdns
197 | azureedge.net
198 | bitgravity.com
199 | cachefly.net
200 | chinaidns.net
201 | cloudfront.net
202 | dnion.com
203 | edgesuite.net
204 | ewcache.com
205 | fastcache.com
206 | fastly.net
207 | footprint.net
208 | fpbns.net
209 | hichina.com
210 | hichina.net
211 | incapdns.net
212 | jiashule.com
213 | okglb.com
214 | txnetworks.cn
215 | ucloud.cn
216 | unicache.com
217 | verygslb.com
218 | vo.llnwd.net
219 | cloudfront
220 | edgekey
221 | fastly
222 | akamai
223 | edgecast
224 | cachefly
225 | fpbns
226 | footprint
227 | llnwd
228 | netdna
229 | bitgravity
230 | azureedge
231 | telefonica
232 | dnsv1
233 | ngenix
234 | incapdns
235 | clients.turbobytes.net
236 | akamaiedge.net
237 | akamaitechnologies.com
238 | gslb.tbcache.com
239 | att-dsa.net
240 | bluehatnetwork.com
241 | c3cache.net
242 | cncssr.chinacache.net
243 | cloudflare.com
244 | fastlylb.net
245 | googlesyndication.
246 | googleusercontent.com
247 | l.doubleclick.net
248 | inscname.net
249 | insnw.net
250 | llnwd.net
251 | lldns.net
252 | netdna-ssl.com
253 | netdna.com
254 | stackpathdns.com
255 | instacontent.net
256 | mirror-image.net
257 | cap-mii.net
258 | swiftserve.com
259 | gslb.taobao.com
260 | vo.msecnd.net
261 | ay1.b.yahoo.com
262 | zenedge.net


--------------------------------------------------------------------------------
/Common/GetProxies.py:
--------------------------------------------------------------------------------
  1 | from Common.LogOutput import LogOutput
  2 | logger_object = LogOutput()
  3 | logger = logger_object.SetModuleName("GetProxies")
  4 | 
  5 | import sys
  6 | 
  7 | import requests
  8 | import base64
  9 | import json
 10 | import configparser
 11 | from threading import Thread
 12 | from queue import Queue
 13 | import random
 14 | import urllib3
 15 | import datetime
 16 | 
 17 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 18 | 
 19 | cf = configparser.ConfigParser()
 20 | cf.read("./config.ini")
 21 | secs = cf.sections()
 22 | email = cf.get('fofa api', 'EMAIL')
 23 | key = cf.get('fofa api', 'KEY')
 24 | 
 25 | headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Connection': 'close'}
 26 | 
 27 | size = 10000
 28 | page = 1
 29 | today = datetime.date.today()
 30 | oneday = datetime.timedelta(days=1)
 31 | yesterday = today - oneday
 32 | 
 33 | 
 34 | # 访问百度和谷歌
 35 | 
 36 | def curlWeb(socks5_proxys_queue, socksProxysDict):
 37 |     while not socks5_proxys_queue.empty():
 38 |         proxy = socks5_proxys_queue.get()
 39 |         requests_proxies = {"http": "socks5://{}".format(proxy), "https": "socks5://{}".format(proxy)}
 40 |         baidu_url = "https://www.baidu.com"
 41 |         google_url = "https://www.google.com"
 42 | 
 43 |         try:
 44 |             res2 = requests.get(url=google_url, headers=headers, timeout=10, verify=False, proxies=requests_proxies)
 45 |             if res2.status_code == 200:
 46 |                 logger.info("{} 成功访问谷歌 [{}]".format(proxy, res2.status_code))
 47 |                 socksProxysDict["google"].append(proxy)
 48 |                 continue
 49 |         except Exception as e:
 50 |             pass
 51 | 
 52 |         try:
 53 |             res = requests.get(url=baidu_url, headers=headers, timeout=10, verify=False, proxies=requests_proxies)
 54 |             if res.status_code == 200:
 55 |                 logger.info("{} 成功访问百度 [{}]".format(proxy, res.status_code))
 56 |                 socksProxysDict["baidu"].append(proxy)
 57 |         except Exception as e:
 58 |             pass
 59 | 
 60 | 
 61 | def query_socks5(yesterday):
 62 |     query_str = r'protocol=="socks5" && "Version:5 Method:No Authentication(0x00)" && after="{}" && country="CN"'.format(yesterday)
 63 |     qbase64 = str(base64.b64encode(query_str.encode(encoding='utf-8')), 'utf-8')
 64 |     url = r'https://fofa.so/api/v1/search/all?email={}&key={}&qbase64={}&size={}&page={}&fields=host,title,ip,domain,port,country,city,server,protocol'.format(email, key, qbase64, size, page)
 65 |     print(url)
 66 |     socks5_proxys = []
 67 |     try:
 68 |         ret = json.loads(requests.get(url=url, headers=headers, timeout=10, verify=False).text)
 69 |         fofa_Results = ret['results']
 70 |         for result in fofa_Results:
 71 |             host, title, ip, domain, port, country, city, server, protocol = result
 72 |             proxy = ip + ":" + port
 73 |             socks5_proxys.append(proxy)
 74 |     except Exception as e:
 75 |         logger.error('fofa inquire {} : {}'.format(query_str, e.args))
 76 |     return socks5_proxys
 77 | 
 78 | def SaveToProxyPool(baidu_proxies, google_proxies):
 79 |     try:
 80 |         with open("./Common/ProxyPool/DomesticProxyPool.txt", "a") as f:
 81 |             for baidu_proxy in baidu_proxies:
 82 |                 f.write(baidu_proxy+'\n')
 83 |         logger.info("The domestic proxies is stored in the file path as Common/ProxyPool/DomesticProxyPool.txt")
 84 |         with open("./Common/ProxyPool/ForeignProxyPool.txt", "a") as f:
 85 |             for google_proxy in google_proxies:
 86 |                 f.write(google_proxy+'\n')
 87 |         logger.info("The foreign proxies is stored in the file path as Common/ProxyPool/ForeignProxyPool.txt")
 88 |     except:
 89 |         logger.error("Error opening file")
 90 |         sys.exit()
 91 | def run_getSocksProxy():
 92 |     logger.info("Start Searching Proxies")
 93 |     socksProxysDict = {"baidu": [], "google": []}
 94 |     socks5_proxys = query_socks5(yesterday)
 95 |     socks5_proxys_queue = Queue(-1)
 96 |     if socks5_proxys:
 97 |         # 随机取1000个代理ip
 98 |         for eachSocks5 in random.sample(socks5_proxys, 50):
 99 |             socks5_proxys_queue.put(eachSocks5)
100 | 
101 |         threads = []
102 |         for num in range(100):
103 |             t = Thread(target=curlWeb, args=(socks5_proxys_queue, socksProxysDict))
104 |             threads.append(t)
105 |             t.start()
106 |         for t in threads:
107 |             t.join()
108 | 
109 |     baidu_proxies = socksProxysDict.get('baidu')
110 |     google_proxies = socksProxysDict.get('google')
111 |     SaveToProxyPool(baidu_proxies, google_proxies)
112 |     logger.info("Find {} DomesticProxies and {} ForeignProxies".format(len(baidu_proxies), len(google_proxies)))
113 |     logger.info("End Searching Proxies")
114 |     # return baidu_proxies, google_proxies
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/bing.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import threading
 3 | import re
 4 | 
 5 | from urllib.parse import urlparse
 6 | 
 7 | class BingSpider():
 8 |     def __init__(self, page):
 9 |         self.header = {
10 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
11 |             # "Cookie": "MUID=0D37497B3A146A9009A459B93B2C6B63; _EDGE_V=1; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=BBE8EF5D85544AFFABC19E151D05B49F&dmnchg=1; _SS=SID=14FD12E54B646094309102274AC8612F; MUIDB=0D37497B3A146A9009A459B93B2C6B63; _EDGE_S=SID=14FD12E54B646094309102274AC8612F&mkt=zh-cn&ui=zh-cn; SRCHUSR=DOB=20210930&T=1632976149000&TPC=1632961354000; ipv6=hit=1632979751088&t=4; SNRHOP=I=&TS=; SRCHHPGUSR=SRCHLANG=zh-Hans&BZA=0&BRW=NOTP&BRH=M&CW=724&CH=722&SW=1536&SH=864&DPR=1.25&UTC=480&DM=1&WTS=63768572949&HV=1632976747",
12 |         }
13 |         self.timeout = 5
14 |         self.PAGES = page # 子域名要爬取的页数
15 |         self.KEY_PAGES = 1 # 关键词要爬取的页数
16 |         self.subdomains = []
17 |         self.key_links = []
18 |         self.keywords = ['inurl:admin', 'inurl:login', 'inurl:system', 'inurl:register', 'inurl:upload', 'intitle:后台', 'intitle:系统', 'intitle:登录']
19 |         self.errorurls = []
20 | 
21 |     def info_processing(self, text):
22 |         return re.findall(r'<h2><a target="_blank" href="(.*?)" h="ID=.*?">(.*?)</a></h2>', text)
23 | 
24 | 
25 |     def get_info(self, domain, page ,real_page):
26 |         url = r"https://cn.bing.com/search?q=site:{}&first={}".format(domain, page)
27 |         print('[+]page:第{}页  关键词:[site:{}]  Requesting:[{}] '.format(real_page, domain, url))
28 |         try:
29 |             res = requests.get(url=url, headers=self.header, timeout=self.timeout)
30 |             tmp_subdomains = self.info_processing(res.text)
31 |             for tmp_subdomain in tmp_subdomains:
32 |                 self.subdomains += [urlparse(tmp_subdomain[0]).netloc]
33 |         except Exception as e:
34 |             self.subdomains += []
35 |             self.errorurls[url] = e
36 | 
37 |     def get_key_info(self, domain, keyword='', page=1, real_page=1):
38 |         #https://cn.bing.com/search?q=site%3Atjut.edu.cn+inurl:upload&qs=n&form=QBRE&sp=-1&pq=sitetjut.edu.cn+inurl:upload&sc=1-16&sk=&cvid=6734767D90664B77800EA8092B6BB8DD&first=1
39 |         #https://cn.bing.com/search?q=site%3Abaidu.com&qs=n&form=QBRE&sp=-1&pq=site%3Atjut.edu.cn&sc=1-16&sk=&cvid=6734767D90664B77800EA8092B6BB8DD
40 |         url = r"https://cn.bing.com/search?q=site:{}{}&first={}".format(domain, '+'+keyword, page)
41 |         print('[+]page:第{}页  关键词:[site:{} {}]  Requesting:[{}] '.format(real_page, domain, keyword, url))
42 |         try:
43 |             res = requests.get(url=url, headers=self.header, timeout=self.timeout)
44 |             tmp_key_links = self.info_processing(res.text)
45 |             for tmp_key_link in tmp_key_links:
46 |                 self.key_links += [(tmp_key_link[1], tmp_key_link[0])]
47 |                 self.subdomains.append(urlparse(tmp_key_link[0]).netloc)
48 |         except Exception as e:
49 |             self.key_links += []
50 |             self.errorurls[url] = e
51 | 
52 |     def run(self, domain):
53 | 
54 |         threads = []
55 |         # tmp_page = 1
56 |         # self.get_key_info(domain, self.keyword, tmp_page)
57 |         # self.get_info(domain, tmp_page)
58 |         for keyword in self.keywords:
59 |             for page in range(1, self.KEY_PAGES+1):
60 |                 if page == 1:
61 |                     tmp_page = 1
62 |                 elif page == 2:
63 |                     tmp_page = 2
64 |                 else:
65 |                     tmp_page = (page-2)*10+2
66 |                 t = threading.Thread(target=self.get_key_info, args=(domain, keyword, tmp_page, page))
67 |                 t.start()
68 |                 threads.append(t)
69 |                 if(len(threads)>5):
70 |                     for t in threads:
71 |                         t.join()
72 |                     threads = []
73 | 
74 |         for page in range(1, self.PAGES+1):
75 |             if page == 1:
76 |                 tmp_page = 1
77 |             elif page == 2:
78 |                 tmp_page = 2
79 |             else:
80 |                 tmp_page = (page - 2) * 10 + 2
81 |             t = threading.Thread(target=self.get_info, args=(domain, tmp_page, page))
82 |             t.start()
83 |             threads.append(t)
84 |             if (len(threads) > 5):
85 |                 for t in threads:
86 |                     t.join()
87 |                 threads = []
88 |         for t in threads:
89 |             t.join()
90 | 
91 |         if len(self.errorurls) >= 3:
92 |             cprint("[+]There are too many exceptions requested, you need to check.", "red")
93 |             cprint("[+]The exception information is", "red")
94 |             for key in self.errorurls.keys():
95 |                 cprint("[+]url:{}\n>>Err:{}".format(key, self.errorurls[key]), "red")
96 | 
97 |         return list(set(self.subdomains)), list(set(self.key_links))
98 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/domainapi.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import re
  3 | from urllib.parse import quote
  4 | import json
  5 | import math
  6 | 
  7 | from Common.LogOutput import LogOutput
  8 | logger_object = LogOutput()
  9 | 
 10 | logger = logger_object.SetModuleName('Domain')
 11 | 
 12 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0'}
 13 | 
 14 | class DomainApi():
 15 |     def __init__(self):
 16 |         # self.proxy = {"http": "http://{}".format(proxy), "https": "https://{}".format(proxy)}
 17 |         pass
 18 |     def chinazApi(self, domain):
 19 |         # 解析chinaz返回结果的json数据
 20 |         def parse_json(json_ret):
 21 |             chinazNewDomains = []
 22 |             results = json_ret['data']
 23 |             for result in results:
 24 |                 companyName = result['webName']
 25 |                 newDomain = result['host']
 26 |                 time = result['verifyTime']
 27 |                 chinazNewDomains.append((companyName, newDomain, time))
 28 |             chinazNewDomains = list(set(chinazNewDomains))
 29 |             return chinazNewDomains
 30 | 
 31 | 
 32 |         chinazNewDomains = []
 33 |         tempDict = {}
 34 |         tempList = []
 35 | 
 36 |         # 获取域名的公司名字
 37 |         url = r'http://icp.chinaz.com/{}'.format(domain)
 38 |         try:
 39 |             res = requests.get(url=url, headers=headers, allow_redirects=False, verify=False, timeout=10)
 40 |         except Exception as e:
 41 |             logger.error(url+' '+e.args)
 42 |             return [], []
 43 |         text = res.text
 44 | 
 45 |         companyName = re.search("var kw = '([\S]*)'", text)
 46 |         if companyName:
 47 |             companyName = companyName.group(1)
 48 |             logger.info('公司名: {}'.format(companyName))
 49 |             companyNameUrlEncode = quote(str(companyName))
 50 |         else:
 51 |             logger.warning('没有匹配到公司名')
 52 |             return [], []
 53 | 
 54 |         # 备案反查域名
 55 |         headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
 56 |         url = 'http://icp.chinaz.com/Home/PageData'
 57 |         data = 'pageNo=1&pageSize=20&Kw=' + companyNameUrlEncode
 58 |         try:
 59 |             res = requests.post(url=url, headers=headers, data=data, allow_redirects=False, verify=False, timeout=10)
 60 |         except Exception as e:
 61 |             logger.error('{} {}'.format(url, e.args))
 62 |             return [], []
 63 | 
 64 |         json_ret = json.loads(res.text)
 65 |         if 'amount' not in json_ret.keys():
 66 |             return chinazNewDomains, []
 67 |         amount = json_ret['amount']
 68 |         pages = math.ceil(amount / 20)
 69 |         logger.info('页数: {}'.format(pages))
 70 |         tempList.extend(parse_json(json_ret))
 71 | 
 72 |         # 继续获取后面页数
 73 |         for page in range(2, pages+1):
 74 |             logger.info('请求第{}页'.format(page))
 75 |             data = 'pageNo={}&pageSize=20&Kw='.format(page) + companyNameUrlEncode
 76 |             try:
 77 |                 res = requests.post(url=url, headers=headers, data=data, allow_redirects=False, verify=False, timeout=10)
 78 |                 json_ret = json.loads(res.text)
 79 |                 tempList.extend(parse_json(json_ret))
 80 |             except Exception as e:
 81 |                 logger.error('{} {}'.format(url, e.args))
 82 | 
 83 |         for each in tempList:
 84 |             if each[1] not in tempDict:
 85 |                 tempDict[each[1]] = each
 86 |                 chinazNewDomains.append(each)
 87 | 
 88 |         return chinazNewDomains, companyName
 89 | 
 90 |     def run_domain(self, domain):
 91 |         beianNewDomains = []
 92 |         chinazNewDomains, companyName = self.chinazApi(domain)
 93 | 
 94 |         tempDict = {}
 95 |         for each in chinazNewDomains:
 96 |             if each[1] not in tempDict:
 97 |                 tempDict[each[1]] = each
 98 |                 beianNewDomains.append(each)
 99 | 
100 |         logger.info("去重后共计{}个顶级域名".format(len(beianNewDomains)))
101 |         print("\033[33m"+"The top-level domain name is shown below"+"\033[0m")
102 | 
103 |         for _ in beianNewDomains:
104 |             print(_)
105 | 
106 |         p = re.compile("[^0-9a-zA-Z.]+")
107 |         judge = 'y'
108 |         for _ in beianNewDomains:
109 |             if p.match(_[1]):
110 |                 logger.critical("I’m not sure if [{}] is a top-level domain name, you need to judge. (y/n)".format(_[1]))
111 |                 judge = input()
112 |                 while(judge != 'y' and judge !='n'):
113 |                     logger.critical("I’m not sure if [{}] is a top-level domain name, you need to judge. (y/n)".format(_[1]))
114 |                     judge = input()
115 |                 if(judge == 'y'):
116 |                     continue
117 |                 else:
118 |                     beianNewDomains.remove(_)
119 |             else:
120 |                 continue
121 |         logger.info("A total of {} top-level domain name after screening".format(len(beianNewDomains)))
122 |         for _ in beianNewDomains:
123 |             print(_[1])
124 | 
125 |         return beianNewDomains, companyName
126 | # [('同济大学浙江学院', 'tjzj.edu.cn', '2021-01-14')]   同济大学浙江学院


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # googlesearch documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Nov  6 12:25:12 2018.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | sys.path.insert(0, os.path.abspath('..'))
 22 | 
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #
 28 | # needs_sphinx = '1.0'
 29 | 
 30 | # Add any Sphinx extension module names here, as strings. They can be
 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 32 | # ones.
 33 | extensions = ['sphinx.ext.autodoc',
 34 |               'sphinx.ext.viewcode',
 35 |               'sphinx.ext.githubpages']
 36 | 
 37 | # Add any paths that contain templates here, relative to this directory.
 38 | templates_path = ['_templates']
 39 | 
 40 | # The suffix(es) of source filenames.
 41 | # You can specify multiple suffix as a list of string:
 42 | #
 43 | # source_suffix = ['.rst', '.md']
 44 | source_suffix = '.rst'
 45 | 
 46 | # The master toctree document.
 47 | master_doc = 'index'
 48 | 
 49 | # General information about the project.
 50 | project = u'googlesearch'
 51 | copyright = u'2018, Mario Vilas'
 52 | author = u'Mario Vilas'
 53 | 
 54 | # The version info for the project you're documenting, acts as replacement for
 55 | # |version| and |release|, also used in various other places throughout the
 56 | # built documents.
 57 | #
 58 | # The short X.Y version.
 59 | version = u''
 60 | # The full version, including alpha/beta/rc tags.
 61 | release = u''
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #
 66 | # This is also used if you do content translation via gettext catalogs.
 67 | # Usually you set "language" from the command line for these cases.
 68 | language = None
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | # This patterns also effect to html_static_path and html_extra_path
 73 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 74 | 
 75 | # The name of the Pygments (syntax highlighting) style to use.
 76 | pygments_style = 'sphinx'
 77 | 
 78 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 79 | todo_include_todos = False
 80 | 
 81 | 
 82 | # -- Options for HTML output ----------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = 'alabaster'
 88 | 
 89 | # Theme options are theme-specific and customize the look and feel of a theme
 90 | # further.  For a list of options available for each theme, see the
 91 | # documentation.
 92 | #
 93 | # html_theme_options = {}
 94 | 
 95 | # Add any paths that contain custom static files (such as style sheets) here,
 96 | # relative to this directory. They are copied after the builtin static files,
 97 | # so a file named "default.css" will overwrite the builtin "default.css".
 98 | html_static_path = ['_static']
 99 | 
100 | # Custom sidebar templates, must be a dictionary that maps document names
101 | # to template names.
102 | #
103 | # This is required for the alabaster theme
104 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
105 | html_sidebars = {
106 |     '**': [
107 |         'relations.html',  # needs 'show_related': True theme option to display
108 |         'searchbox.html',
109 |     ]
110 | }
111 | 
112 | 
113 | # -- Options for HTMLHelp output ------------------------------------------
114 | 
115 | # Output file base name for HTML help builder.
116 | htmlhelp_basename = 'googlesearchdoc'
117 | 
118 | 
119 | # -- Options for LaTeX output ---------------------------------------------
120 | 
121 | latex_elements = {
122 |     # The paper size ('letterpaper' or 'a4paper').
123 |     #
124 |     # 'papersize': 'letterpaper',
125 | 
126 |     # The font size ('10pt', '11pt' or '12pt').
127 |     #
128 |     # 'pointsize': '10pt',
129 | 
130 |     # Additional stuff for the LaTeX preamble.
131 |     #
132 |     # 'preamble': '',
133 | 
134 |     # Latex figure (float) alignment
135 |     #
136 |     # 'figure_align': 'htbp',
137 | }
138 | 
139 | # Grouping the document tree into LaTeX files. List of tuples
140 | # (source start file, target name, title,
141 | #  author, documentclass [howto, manual, or own class]).
142 | latex_documents = [
143 |     (master_doc, 'googlesearch.tex', u'googlesearch Documentation',
144 |      u'Mario Vilas', 'manual'),
145 | ]
146 | 
147 | 
148 | # -- Options for manual page output ---------------------------------------
149 | 
150 | # One entry per manual page. List of tuples
151 | # (source start file, name, description, authors, manual section).
152 | man_pages = [
153 |     (master_doc, 'googlesearch', u'googlesearch Documentation',
154 |      [author], 1)
155 | ]
156 | 
157 | 
158 | # -- Options for Texinfo output -------------------------------------------
159 | 
160 | # Grouping the document tree into Texinfo files. List of tuples
161 | # (source start file, target name, title, author,
162 | #  dir menu entry, description, category)
163 | texinfo_documents = [
164 |     (master_doc, 'googlesearch', u'googlesearch Documentation',
165 |      author, 'googlesearch', 'Python bindings to the Google search engine.',
166 |      'Miscellaneous'),
167 | ]
168 | 
169 | 
170 | # -- Options for Epub output ----------------------------------------------
171 | 
172 | # Bibliographic Dublin Core info.
173 | epub_title = project
174 | epub_author = author
175 | epub_publisher = author
176 | epub_copyright = copyright
177 | 
178 | # The unique identifier of the text. This can be a ISBN number
179 | # or the project homepage.
180 | #
181 | # epub_identifier = ''
182 | 
183 | # A unique identification for the text.
184 | #
185 | # epub_uid = ''
186 | 
187 | # A list of files that should not be packed into the epub file.
188 | epub_exclude_files = ['search.html']
189 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/baidu.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import random
  3 | 
  4 | import requests
  5 | import threading
  6 | import re
  7 | from urllib.parse import urlparse
  8 | from Common.LogOutput import LogOutput
  9 | logger_object = LogOutput()
 10 | logger = logger_object.SetModuleName("BaiduSpider")
 11 | 
 12 | class MyThread(threading.Thread):
 13 |     def __init__(self,func,args=()):
 14 |         super(MyThread,self).__init__()
 15 |         self.func = func
 16 |         self.args = args
 17 |     def run(self):
 18 |         self.result = self.func(*self.args)
 19 |     def get_result(self):
 20 |         try:
 21 |             return self.result
 22 |         except Exception:
 23 |             return None
 24 | class BaiduSpider():
 25 |     def __init__(self, proxies):
 26 |         self.header = {
 27 |             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
 28 |             # "Cookie": "MUID=0D37497B3A146A9009A459B93B2C6B63; _EDGE_V=1; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=BBE8EF5D85544AFFABC19E151D05B49F&dmnchg=1; _SS=SID=14FD12E54B646094309102274AC8612F; MUIDB=0D37497B3A146A9009A459B93B2C6B63; _EDGE_S=SID=14FD12E54B646094309102274AC8612F&mkt=zh-cn&ui=zh-cn; SRCHUSR=DOB=20210930&T=1632976149000&TPC=1632961354000; ipv6=hit=1632979751088&t=4; SNRHOP=I=&TS=; SRCHHPGUSR=SRCHLANG=zh-Hans&BZA=0&BRW=NOTP&BRH=M&CW=724&CH=722&SW=1536&SH=864&DPR=1.25&UTC=480&DM=1&WTS=63768572949&HV=1632976747",
 29 |         }
 30 |         self.timeout = 5
 31 |         self.PAGES = 10000 # 子域名要爬取的页数
 32 |         self.KEY_PAGES = 2 # 关键词要爬取的页数
 33 |         self.subdomains = []
 34 |         self.key_links = []
 35 |         self.links = []
 36 |         self.keywords = ['inurl:admin', 'inurl:login', 'inurl:system', 'inurl:register', 'inurl:upload', 'intitle:后台', 'intitle:系统', 'intitle:登录']
 37 |         self.errorurls = {} # 存放第一次报异常的url，和异常原因， 3秒后重新进行请求。
 38 |         self.proxies = proxies
 39 | 
 40 |     def info_processing(self, text):
 41 |         return re.findall(r'<div class="c-tools c-gap-left" id="\S*" data-tools=\'{"title":"(.*)","url":"(.*)"}\'>', text)
 42 | 
 43 | 
 44 | 
 45 |     def real_url(self, link):
 46 |         try:
 47 |             real_link = requests.get(link, allow_redirects=False, timeout=self.timeout).headers.get('Location')
 48 |             return real_link
 49 |         except:
 50 |             return link
 51 | 
 52 |     def get_proxy(self):
 53 |         return random.choice(self.proxies)
 54 | 
 55 | 
 56 |     def get_info(self, domain, page=0):
 57 |         url = r"https://www.baidu.com/s?wd=site:{}&pn={}0".format(domain, page)
 58 |         print('[+]page:第{}页  关键词:[site:{}]  Requesting:[{}] '.format(page+1, domain, url))
 59 |         # proxies = {
 60 |         #     "http": "socks5://{}".format(proxy),
 61 |         #     "https": "socks5://{}".format(proxy)
 62 |         # }
 63 |         try:
 64 |             res = requests.get(url=url, headers=self.header, timeout=self.timeout)
 65 |             if self.check_page(res.text, page+1) == 'Stop':
 66 |                 return 'Stop'
 67 |             tmp_subdomains = self.info_processing(res.text)
 68 |             for tmp_subdomain in tmp_subdomains:
 69 |                 tmp_real_link = self.real_url(tmp_subdomain[1])
 70 |                 self.subdomains += [urlparse(tmp_real_link).netloc]
 71 |                 self.links.append(tmp_real_link)
 72 |                 logger.info(urlparse(tmp_real_link).netloc)
 73 |         except Exception as e:
 74 |             self.subdomains += []
 75 |             self.errorurls[url] = e
 76 | 
 77 | 
 78 | 
 79 |     def get_key_info(self, domain, keyword='', page=0):
 80 |         url = r"https://www.baidu.com/s?wd=site:{}{}&pn={}0".format(domain, '+'+keyword, page)
 81 |         print('[+]page:第{}页  关键词:[site:{} {}]  Requesting:[{}] '.format(page+1, domain, keyword, url))
 82 |         # proxies = {
 83 |         #     "http": "socks5://{}".format(proxy),
 84 |         #     "https": "socks5://{}".format(proxy)
 85 |         # }
 86 |         try:
 87 |             res = requests.get(url=url, headers=self.header, timeout=self.timeout)
 88 |             tmp_key_links = self.info_processing(res.text)
 89 |             for tmp_key_link in tmp_key_links:
 90 |                 tmp_real_link = self.real_url(tmp_key_link[1])
 91 |                 self.key_links += [(tmp_key_link[0], tmp_real_link)]
 92 |                 self.subdomains.append(urlparse(tmp_key_link[0]).netloc)
 93 |                 logger.info(urlparse(tmp_real_link).netloc)
 94 |         except Exception as e:
 95 |             self.key_links += []
 96 |             self.errorurls[url] = e
 97 | 
 98 | 
 99 |     def check_page(self, text, page):
100 |         num = re.findall(r'<strong><span class="page-item_M4MDr pc">(\d*?)</span></strong>', text)
101 |         if (num != ['{}'.format(page)]) and (page != 1):
102 |             return 'Stop'
103 |         else:
104 |             print("{}页没问题， num = {}".format(page, num))
105 |             return 'Contiune'
106 | 
107 | 
108 |     '''
109 |         返回一个子域名列表和一个包含title和连接的link列表
110 |     '''
111 |     def run(self, domain):
112 |         threads = []
113 |         num = 1
114 |         flag = 1
115 |         try:
116 |             while flag != 0:
117 |                 # proxy = self.get_proxy()
118 |                 for page in range(0+(num-1)*5, 5+(num-1)*5):
119 |                     # t = MyThread(self.get_info, args=(domain, proxy, page))
120 |                     t = MyThread(self.get_info, args=(domain, page))
121 |                     t.start()
122 |                     threads.append(t)
123 |                 for t in threads:
124 |                     t.join()
125 |                     if(t.get_result() == 'Stop'):
126 |                         flag = 0
127 |                 num += 1
128 | 
129 | 
130 |             for keyword in self.keywords:
131 |                 # proxy = self.get_proxy()
132 |                 for page in range(0, self.KEY_PAGES):
133 |                     t = threading.Thread(target=self.get_key_info, args=(domain, keyword, page))
134 |                     # t = threading.Thread(target=self.get_key_info, args=(domain, proxy, keyword, page))
135 |                     t.start()
136 |                     threads.append(t)
137 |             for t in threads:
138 |                 t.join()
139 | 
140 |         #     if len(self.errorurls)>=3:
141 |         #         cprint("[+]There are too many exceptions requested, you need to check.", "red")
142 |         #         cprint("[+]The exception information is", "red")
143 |         #         for key in self.errorurls.keys():
144 |         #             cprint("[+]url:{}\n>>Err:{}".format(key, self.errorurls[key]), "red")
145 |         except:
146 |             return list(set(self.subdomains)), list(set(self.key_links)), list(set(self.links))
147 |         # # print(set(self.subdomains)), list(set(self.key_links))
148 |         return list(set(self.subdomains)), list(set(self.key_links)), list(set(self.links))


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/subdomainapi.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 得到subdomain的api
  3 | '''
  4 | import sys
  5 | import requests
  6 | 
  7 | from Common.LogOutput import LogOutput
  8 | 
  9 | logger_object = LogOutput()
 10 | logger = logger_object.SetModuleName("subdomains")
 11 | 
 12 | class SubdomainApi():
 13 | 
 14 |     def __init__(self):
 15 |         self.global_subdomains = []
 16 |         self.global_subdomains_ips = []
 17 |         self.global_key_links = []
 18 |         self.links = []
 19 |         self.global_CDNSubdomainsDict = []
 20 |         # self.proxies = proxies
 21 | 
 22 |     def WebSpiderSubdomains(self, domain):
 23 |         # proxies = self.proxies
 24 |         def BaiduSpider(domain):
 25 |             logger.info('Start Baidu Spider')
 26 | 
 27 |             from Plugins.InfoSearch.Subdomain.Spider.Baidu.baidu import BaiduSpider
 28 |             BaiduSpider = BaiduSpider()
 29 |             tmp_subdomains, tmp_key_links, tmp_links = BaiduSpider.run(domain)
 30 |             self.global_subdomains += tmp_subdomains
 31 |             self.global_key_links += tmp_key_links
 32 |             self.links += tmp_links
 33 |             logger.info('Baidu Spider Is Over')
 34 | 
 35 |         # bing的请求一直有问题
 36 |         def BingSpider(domain):
 37 |             logger.info('Start Bing Spider')
 38 | 
 39 |             from Plugins.InfoSearch.Subdomain.Spider.Bing.bing import BingSpider
 40 |             BingSpider = BingSpider()
 41 |             tmp_subdomains, tmp_key_links, tmp_links = BingSpider.run(domain)
 42 | 
 43 |             self.global_subdomains += tmp_subdomains
 44 |             self.global_key_links += tmp_key_links
 45 |             self.links += self.links
 46 |             logger.info('Bing Spider Is Over')
 47 | 
 48 | 
 49 |         logger.info("Start Spider Module")
 50 |         BaiduSpider(domain)
 51 |         # BingSpider(domain, proxies)
 52 |         logger.info("WebSpider is over")
 53 | 
 54 |     def ThirdPartyPlatform(self, domain):
 55 |         # proxies = self.proxies
 56 |         # proxy = random.choice(proxies)
 57 |         def Certificate(domain):
 58 | 
 59 |             from Plugins.InfoSearch.Subdomain.ThirdPartyPlatform.certificate import Certificate
 60 |             Certificate = Certificate()
 61 |             tmp_subdomains = Certificate.run(domain)
 62 | 
 63 |             self.global_subdomains += tmp_subdomains
 64 | 
 65 |         def Netcraft(domain):
 66 |             from Plugins.InfoSearch.Subdomain.ThirdPartyPlatform.netcraft import Netcraft
 67 |             Netcraft = Netcraft()
 68 |             tmp_subdomains = Netcraft.Run(domain)
 69 | 
 70 |             self.global_subdomains += tmp_subdomains
 71 |         logger.info("ThirdPartyPlatform start")
 72 |         Certificate(domain)
 73 |         Netcraft(domain)
 74 |         logger.info("ThirdPartyPlatform end")
 75 | 
 76 | 
 77 |     '''DNS解析'''
 78 |     def Dns_resolver(self):
 79 |         import dns.resolver
 80 |         dns_servers = [
 81 |             # DNS对结果准确性影响非常大，部分DNS结果会和其它DNS结果不一致甚至没结果
 82 |             # '223.5.5.5',  # AliDNS
 83 |             # '114.114.114.114',  # 114DNS
 84 |             # '1.1.1.1',  # Cloudflare
 85 |             '119.29.29.29',  # DNSPod https://www.dnspod.cn/products/public.dns
 86 |             # '180.76.76.76',  # BaiduDNS
 87 |             # '1.2.4.8',  # sDNS
 88 |             # '11.1.1.1'  # test DNS, not available
 89 |             # '8.8.8.8', # Google DNS, 延时太高了
 90 |         ]
 91 | 
 92 |         my_resolver = dns.resolver.Resolver()
 93 |         my_resolver.nameservers = dns_servers
 94 | 
 95 |         def DNS_Query(domain_name, domain_type):
 96 |             try:
 97 |                 ips = ''
 98 |                 A = my_resolver.resolve(domain_name, domain_type)
 99 |                 for ip in A.rrset.items.keys():
100 |                     ips = ips + str(ip) + ','
101 |                 return ips.strip(",")
102 |             except Exception as e:
103 |                 return 'null'
104 | 
105 | 
106 |         logger.info("Dns_resolver start")
107 |         for single_subdomain in self.global_subdomains:
108 |             ips = DNS_Query(single_subdomain, "A")
109 |             self.global_subdomains_ips += [(single_subdomain, ips)]
110 | 
111 |         logger.info("Dns_resolver end")
112 | 
113 | 
114 |     def ESD_Run(self, domain):
115 |         logger.info("ESD start")
116 |         from Plugins.InfoSearch.Subdomain.ESD.ESD import EnumSubDomain
117 |         self.global_subdomains_ips += EnumSubDomain(domain).run()
118 |         logger.info("ESD end")
119 | 
120 |     def Check_network_connectivity(self):
121 |         try:
122 |             logger.info("Checking the network")
123 |             if requests.get("https://www.baidu.com").status_code == 200:
124 |                 logger.info("Network status is good")
125 |         except:
126 |             logger.error("You need to check the network settings.Network problems")
127 |             sys.exit()
128 |     def JsFinderRun(self):
129 |         from Plugins.InfoSearch.Subdomain.JsFinder import jsfinder
130 |         for link in self.links:
131 |             self.global_subdomains += jsfinder.RunJsFinder(link)
132 | 
133 | 
134 |     # def Save_Subdomains(self, domain):
135 |     #     f = open('../../../Reports/{}-{}-{}-{}'.format(domain, datetime.datetime.now().year, datetime.datetime.now().month,datetime.datetime.now().day), 'w')
136 |     #     print(list(set(self.global_subdomains_ips)))
137 |     #
138 |     #     print("The subdomain is stored in Reports/{}".format(f.name))
139 |     #
140 |     #     for i in self.global_subdomains_ips:
141 |     #         f.write(i[0] + '      ' + i[1] + '\n')
142 |     #     f.close()
143 |     #     logger.error("There is something wrong in network")
144 | 
145 | 
146 |     def Data_Filtering(self, domain):
147 |         logger.info("Data filtering start")
148 |         if ('') in self.global_subdomains:
149 |             self.global_subdomains.remove('')
150 |         if (domain) in self.global_subdomains:
151 |             self.global_subdomains.remove(domain)
152 |         self.global_subdomains = list(set(self.global_subdomains))
153 |         logger.info("Data filtering end")
154 | 
155 | 
156 |     def CheckCDN(self):
157 |         from Plugins.InfoSearch.Subdomain.IsCND import CheckCDN
158 |         self.subdomain_ips ,self.global_CDNSubdomainsDict = CheckCDN.run_checkCDN(self.global_subdomains)
159 | 
160 |     def Run(self, domain):
161 |         logger.info("Subdomains start")
162 | 
163 | 
164 | 
165 |         # self.Check_network_connectivity()
166 | 
167 |         self.WebSpiderSubdomains(domain)
168 |         self.JsFinderRun()
169 |         self.ThirdPartyPlatform(domain)
170 | 
171 | 
172 |         self.Data_Filtering(domain)
173 |         self.CheckCDN()
174 | 
175 |         # self.Dns_resolver()
176 | 
177 |         # self.ESD_Run(domain)
178 | 
179 |         logger.info("Subdomains end")
180 |         return list(set(self.global_subdomains_ips)), self.global_key_links
181 | 
182 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/JsFinder/jsfinder.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import requests, argparse, sys, re
  3 | from requests.packages import urllib3
  4 | from urllib.parse import urlparse
  5 | from bs4 import BeautifulSoup
  6 | 
  7 | 
  8 | def extract_URL(JS):
  9 |     pattern_raw = r"""
 10 | 	  (?:"|')                               # Start newline delimiter
 11 | 	  (
 12 | 	    ((?:[a-zA-Z]{1,10}://|//)           # Match a scheme [a-Z]*1-10 or //
 13 | 	    [^"'/]{1,}\.                        # Match a domainname (any character + dot)
 14 | 	    [a-zA-Z]{2,}[^"']{0,})              # The domainextension and/or path
 15 | 	    |
 16 | 	    ((?:/|\.\./|\./)                    # Start with /,../,./
 17 | 	    [^"'><,;| *()(%%$^/\\\[\]]          # Next character can't be...
 18 | 	    [^"'><,;|()]{1,})                   # Rest of the characters can't be
 19 | 	    |
 20 | 	    ([a-zA-Z0-9_\-/]{1,}/               # Relative endpoint with /
 21 | 	    [a-zA-Z0-9_\-/]{1,}                 # Resource name
 22 | 	    \.(?:[a-zA-Z]{1,4}|action)          # Rest + extension (length 1-4 or action)
 23 | 	    (?:[\?|/][^"|']{0,}|))              # ? mark with parameters
 24 | 	    |
 25 | 	    ([a-zA-Z0-9_\-]{1,}                 # filename
 26 | 	    \.(?:php|asp|aspx|jsp|json|
 27 | 	         action|html|js|txt|xml)             # . + extension
 28 | 	    (?:\?[^"|']{0,}|))                  # ? mark with parameters
 29 | 	  )
 30 | 	  (?:"|')                               # End newline delimiter
 31 | 	"""
 32 |     pattern = re.compile(pattern_raw, re.VERBOSE)
 33 |     result = re.finditer(pattern, str(JS))
 34 |     if result == None:
 35 |         return None
 36 |     js_url = []
 37 |     return [match.group().strip('"').strip("'") for match in result
 38 |             if match.group() not in js_url]
 39 | 
 40 | 
 41 | # Get the page source
 42 | def Extract_html(URL):
 43 |     header = {
 44 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36",
 45 | }
 46 |     try:
 47 |         raw = requests.get(URL, headers=header, timeout=3, verify=False)
 48 |         raw = raw.content.decode("utf-8", "ignore")
 49 |         return raw
 50 |     except:
 51 |         return None
 52 | 
 53 | 
 54 | # Handling relative URLs
 55 | def process_url(URL, re_URL):
 56 |     black_url = ["javascript:"]  # Add some keyword for filter url.
 57 |     URL_raw = urlparse(URL)
 58 |     ab_URL = URL_raw.netloc
 59 |     host_URL = URL_raw.scheme
 60 |     if re_URL[0:2] == "//":
 61 |         result = host_URL + ":" + re_URL
 62 |     elif re_URL[0:4] == "http":
 63 |         result = re_URL
 64 |     elif re_URL[0:2] != "//" and re_URL not in black_url:
 65 |         if re_URL[0:1] == "/":
 66 |             result = host_URL + "://" + ab_URL + re_URL
 67 |         else:
 68 |             if re_URL[0:1] == ".":
 69 |                 if re_URL[0:2] == "..":
 70 |                     result = host_URL + "://" + ab_URL + re_URL[2:]
 71 |                 else:
 72 |                     result = host_URL + "://" + ab_URL + re_URL[1:]
 73 |             else:
 74 |                 result = host_URL + "://" + ab_URL + "/" + re_URL
 75 |     else:
 76 |         result = URL
 77 |     return result
 78 | 
 79 | 
 80 | def find_last(string, str):
 81 |     positions = []
 82 |     last_position = -1
 83 |     while True:
 84 |         position = string.find(str, last_position + 1)
 85 |         if position == -1: break
 86 |         last_position = position
 87 |         positions.append(position)
 88 |     return positions
 89 | 
 90 | 
 91 | def find_by_url(url, js=False):
 92 |     if js == False:
 93 |         try:
 94 |             print("url:" + url)
 95 |         except:
 96 |             print("Please specify a URL like https://www.baidu.com")
 97 |         html_raw = Extract_html(url)
 98 |         if html_raw == None:
 99 |             print("Fail to access " + url)
100 |             return None
101 |         # print(html_raw)
102 |         html = BeautifulSoup(html_raw, "html.parser")
103 |         html_scripts = html.findAll("script")
104 |         script_array = {}
105 |         script_temp = ""
106 |         for html_script in html_scripts:
107 |             script_src = html_script.get("src")
108 |             if script_src == None:
109 |                 script_temp += html_script.get_text() + "\n"
110 |             else:
111 |                 purl = process_url(url, script_src)
112 |                 script_array[purl] = Extract_html(purl)
113 |         script_array[url] = script_temp
114 |         allurls = []
115 |         for script in script_array:
116 |             # print(script)
117 |             temp_urls = extract_URL(script_array[script])
118 |             if len(temp_urls) == 0: continue
119 |             for temp_url in temp_urls:
120 |                 allurls.append(process_url(script, temp_url))
121 |         result = []
122 |         for singerurl in allurls:
123 |             url_raw = urlparse(url)
124 |             domain = url_raw.netloc
125 |             positions = find_last(domain, ".")
126 |             miandomain = domain
127 |             if len(positions) > 1: miandomain = domain[positions[-2] + 1:]
128 |             # print(miandomain)
129 |             suburl = urlparse(singerurl)
130 |             subdomain = suburl.netloc
131 |             # print(singerurl)
132 |             if miandomain in subdomain or subdomain.strip() == "":
133 |                 if singerurl.strip() not in result:
134 |                     result.append(singerurl)
135 |         return result
136 |     return sorted(set(extract_URL(Extract_html(url)))) or None
137 | 
138 | 
139 | def find_subdomain(urls, mainurl):
140 |     url_raw = urlparse(mainurl)
141 |     domain = url_raw.netloc
142 |     miandomain = domain
143 |     positions = find_last(domain, ".")
144 |     if len(positions) > 1: miandomain = domain[positions[-2] + 1:]
145 |     subdomains = []
146 |     for url in urls:
147 |         suburl = urlparse(url)
148 |         subdomain = suburl.netloc
149 |         # print(subdomain)
150 |         if subdomain.strip() == "": continue
151 |         if miandomain in subdomain:
152 |             if subdomain not in subdomains:
153 |                 subdomains.append(subdomain)
154 |     return subdomains
155 | 
156 | 
157 | def find_by_url_deep(url):
158 |     html_raw = Extract_html(url)
159 |     if html_raw == None:
160 |         print("Fail to access " + url)
161 |         return None
162 |     html = BeautifulSoup(html_raw, "html.parser")
163 |     html_as = html.findAll("a")
164 |     links = []
165 |     for html_a in html_as:
166 |         src = html_a.get("href")
167 |         if src == "" or src == None: continue
168 |         link = process_url(url, src)
169 |         if link not in links:
170 |             links.append(link)
171 |     if links == []: return None
172 |     print("ALL Find " + str(len(links)) + " links")
173 |     urls = []
174 |     i = len(links)
175 |     for link in links:
176 |         temp_urls = find_by_url(link)
177 |         if temp_urls == None: continue
178 |         print("Remaining " + str(i) + " | Find " + str(len(temp_urls)) + " URL in " + link)
179 |         for temp_url in temp_urls:
180 |             if temp_url not in urls:
181 |                 urls.append(temp_url)
182 |         i -= 1
183 |     return urls
184 | 
185 | 
186 | # def find_by_file(file_path, js=False):
187 | #     with open(file_path, "r") as fobject:
188 | #         links = fobject.read().split("\n")
189 | #     if links == []: return None
190 | #     print("ALL Find " + str(len(links)) + " links")
191 | #     urls = []
192 | #     i = len(links)
193 | #     for link in links:
194 | #         if js == False:
195 | #             temp_urls = find_by_url(link)
196 | #         else:
197 | #             temp_urls = find_by_url(link, js=True)
198 | #         if temp_urls == None: continue
199 | #         print(str(i) + " Find " + str(len(temp_urls)) + " URL in " + link)
200 | #         for temp_url in temp_urls:
201 | #             if temp_url not in urls:
202 | #                 urls.append(temp_url)
203 | #         i -= 1
204 | #     return urls
205 | 
206 | #
207 | # def giveresult(urls, domian):
208 | #     if urls == None:
209 | #         return None
210 | #     print("Find " + str(len(urls)) + " URL:")
211 | #     content_url = ""
212 | #     content_subdomain = ""
213 | #     for url in urls:
214 | #         content_url += url + "\n"
215 | #         print(url)
216 | #     subdomains = find_subdomain(urls, domian)
217 | #     print("\nFind " + str(len(subdomains)) + " Subdomain:")
218 | #     for subdomain in subdomains:
219 | #         content_subdomain += subdomain + "\n"
220 | #         print(subdomain)
221 |     # if args.outputurl != None:
222 |     #     with open(args.outputurl, "a", encoding='utf-8') as fobject:
223 |     #         fobject.write(content_url)
224 |     #     print("\nOutput " + str(len(urls)) + " urls")
225 |     #     print("Path:" + args.outputurl)
226 |     # if args.outputsubdomain != None:
227 |     #     with open(args.outputsubdomain, "a", encoding='utf-8') as fobject:
228 |     #         fobject.write(content_subdomain)
229 |     #     print("\nOutput " + str(len(subdomains)) + " subdomains")
230 |     #     print("Path:" + args.outputsubdomain)
231 | 
232 |     # return content_url, content_subdomain
233 | 
234 | def RunJsFinder(url):
235 |     urllib3.disable_warnings()
236 |     urls = find_by_url(url)
237 |     subdomains = find_subdomain(urls, url)
238 |     # return urls, subdomains
239 |     return subdomains
240 | if __name__ == "__main__":
241 |     print(RunJsFinder("https://www.tjut.edu.cn"))


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/CheckCDN.py:
--------------------------------------------------------------------------------
  1 | import ipaddress
  2 | import re
  3 | import dns.resolver
  4 | import geoip2.database
  5 | 
  6 | import dns.resolver
  7 | from queue import Queue
  8 | from threading import Thread
  9 | from Common.LogOutput import LogOutput
 10 | logger_object = LogOutput()
 11 | logger = logger_object.SetModuleName("CheckCDN")
 12 | 
 13 | 
 14 | 
 15 | 
 16 | # 通过查询pdns，然后排除国内外常见的cdn段，如果出现极有可能是真实ip
 17 | cdns = [
 18 |     '223.99.255.0/24', '71.152.0.0/17', '219.153.73.0/24', '125.39.46.0/24', '190.93.240.0/20', '14.0.113.0/24',
 19 |     '14.0.47.0/24', '113.20.148.0/22', '103.75.201.0/24', '1.32.239.0/24', '101.79.239.0/24', '52.46.0.0/18',
 20 |     '125.88.189.0/24', '150.138.248.0/24', '180.153.235.0/24', '205.251.252.0/23', '103.1.65.0/24', '115.127.227.0/24',
 21 |     '14.0.42.0/24', '109.199.58.0/24', '116.211.155.0/24', '112.253.3.0/24', '14.0.58.0/24', '223.112.227.0/24',
 22 |     '113.20.150.0/23', '61.182.141.0/24', '34.216.51.0/25', '124.95.188.0/24', '42.51.25.0/24', '183.136.133.0/24',
 23 |     '52.220.191.0/26', '119.84.93.0/24', '182.118.38.0/24', '13.59.250.0/26', '54.178.75.0/24', '119.84.92.0/24',
 24 |     '183.131.62.0/24', '111.32.136.0/24', '13.124.199.0/24', '111.47.227.0/24', '104.37.177.0/24', '14.0.50.0/24',
 25 |     '183.230.70.0/24', '114.111.59.0/24', '220.181.135.0/24', '112.140.32.0/19', '101.79.230.0/24', '14.0.115.0/24',
 26 |     '103.28.248.0/22', '117.34.72.0/24', '109.199.57.0/24', '101.79.149.0/24', '116.128.128.0/24', '115.231.186.0/24',
 27 |     '103.22.200.0/22', '61.155.165.0/24', '113.20.148.0/23', '185.254.242.0/24', '59.36.120.0/24', '70.132.0.0/18',
 28 |     '116.31.126.0/24', '119.147.134.0/24', '115.127.246.0/24', '52.47.139.0/24', '118.107.175.0/24', '52.78.247.128/26',
 29 |     '110.93.176.0/20', '54.240.128.0/18', '46.51.216.0/21', '119.31.251.0/24', '125.39.18.0/24', '108.175.33.0/24',
 30 |     '1.31.128.0/24', '61.151.163.0/24', '103.95.132.0/24', '58.215.118.0/24', '54.233.255.128/26', '120.52.113.0/24',
 31 |     '118.107.174.0/24', '1.32.242.0/24', '221.195.34.0/24', '101.79.228.0/24', '205.251.249.0/24', '113.200.91.0/24',
 32 |     '101.79.146.0/24', '221.238.22.0/24', '134.19.183.0/24', '110.93.160.0/20', '180.97.158.0/24', '115.127.251.0/24',
 33 |     '119.167.147.0/24', '115.127.238.0/24', '115.127.240.0/22', '14.0.48.0/24', '115.127.240.0/24', '113.7.183.0/24',
 34 |     '112.140.128.0/20', '115.127.255.0/24', '114.31.36.0/22', '101.79.232.0/24', '218.98.44.0/24', '106.119.182.0/24',
 35 |     '101.79.167.0/24', '125.39.5.0/24', '58.49.105.0/24', '124.202.164.0/24', '111.177.6.0/24', '61.133.127.0/24',
 36 |     '185.11.124.0/22', '150.138.150.0/24', '115.127.248.0/24', '103.74.80.0/22', '101.79.166.0/24', '101.71.55.0/24',
 37 |     '198.41.128.0/17', '117.21.219.0/24', '103.231.170.0/24', '221.204.202.0/24', '101.79.224.0/24', '112.25.16.0/24',
 38 |     '111.177.3.0/24', '204.246.168.0/22', '103.40.7.0/24', '134.226.0.0/16', '52.15.127.128/26', '122.190.2.0/24',
 39 |     '101.203.192.0/18', '1.32.238.0/24', '101.79.144.0/24', '176.34.28.0/24', '119.84.15.0/24', '18.216.170.128/25',
 40 |     '222.88.94.0/24', '101.79.150.0/24', '114.111.48.0/21', '124.95.168.0/24', '114.111.48.0/20', '110.93.176.0/21',
 41 |     '223.111.127.0/24', '117.23.61.0/24', '140.207.120.0/24', '157.255.26.0/24', '221.204.14.0/24', '183.222.96.0/24',
 42 |     '104.37.180.0/24', '42.236.93.0/24', '111.63.51.0/24', '114.31.32.0/20', '118.180.50.0/24', '222.240.184.0/24',
 43 |     '205.251.192.0/19', '101.79.225.0/24', '115.127.228.0/24', '113.20.148.0/24', '61.213.176.0/24', '112.65.75.0/24',
 44 |     '111.13.147.0/24', '113.20.145.0/24', '103.253.132.0/24', '52.222.128.0/17', '183.203.7.0/24', '27.221.27.0/24',
 45 |     '103.79.134.0/24', '123.150.187.0/24', '103.15.194.0/24', '162.158.0.0/15', '61.163.30.0/24', '182.140.227.0/24',
 46 |     '112.25.60.0/24', '117.148.161.0/24', '61.182.136.0/24', '114.31.56.0/22', '64.252.128.0/18', '183.61.185.0/24',
 47 |     '115.127.250.0/24', '150.138.138.0/24', '13.210.67.128/26', '211.162.64.0/24', '61.174.9.0/24', '14.0.112.0/24',
 48 |     '52.52.191.128/26', '27.221.124.0/24', '103.4.203.0/24', '103.14.10.0/24', '34.232.163.208/29', '114.31.48.0/20',
 49 |     '59.51.81.0/24', '183.60.235.0/24', '101.227.206.0/24', '125.39.174.0/24', '119.167.246.0/24', '118.107.160.0/21',
 50 |     '223.166.151.0/24', '110.93.160.0/19', '204.246.172.0/23', '119.31.253.0/24', '143.204.0.0/16', '14.0.60.0/24',
 51 |     '123.151.76.0/24', '116.193.80.0/24', '120.241.102.0/24', '180.96.20.0/24', '216.137.32.0/19', '223.94.95.0/24',
 52 |     '103.4.201.0/24', '14.0.56.0/24', '115.127.234.0/24', '113.20.144.0/23', '103.248.104.0/24', '122.143.15.0/24',
 53 |     '101.79.229.0/24', '101.79.163.0/24', '104.37.112.0/22', '115.127.253.0/24', '141.101.64.0/18', '113.20.144.0/22',
 54 |     '101.79.155.0/24', '117.148.160.0/24', '124.193.166.0/24', '109.94.168.0/24', '203.90.247.0/24', '101.79.208.0/21',
 55 |     '182.118.12.0/24', '114.31.58.0/23', '202.162.109.0/24', '101.79.164.0/24', '58.216.2.0/24', '222.216.190.0/24',
 56 |     '101.79.165.0/24', '111.6.191.0/24', '1.255.100.0/24', '52.84.0.0/15', '112.65.74.0/24', '183.250.179.0/24',
 57 |     '101.79.236.0/24', '119.31.252.0/24', '113.20.150.0/24', '60.12.166.0/24', '101.79.234.0/24', '113.17.174.0/24',
 58 |     '101.79.237.0/24', '61.54.46.0/24', '118.212.233.0/24', '183.110.242.0/24', '150.138.149.0/24', '117.34.13.0/24',
 59 |     '115.127.245.0/24', '14.0.102.0/24', '14.0.109.0/24', '61.130.28.0/24', '113.20.151.0/24', '219.159.84.0/24',
 60 |     '114.111.62.0/24', '172.64.0.0/13', '61.155.222.0/24', '120.52.29.0/24', '115.127.231.0/24', '14.0.49.0/24',
 61 |     '113.202.0.0/16', '103.248.104.0/22', '205.251.250.0/23', '103.216.136.0/22', '118.107.160.0/20', '109.87.0.0/21',
 62 |     '54.239.128.0/18', '115.127.224.0/19', '111.202.98.0/24', '109.94.169.0/24', '59.38.112.0/24', '204.246.176.0/20',
 63 |     '123.133.84.0/24', '103.4.200.0/24', '111.161.109.0/24', '112.84.34.0/24', '103.82.129.0/24', '183.3.254.0/24',
 64 |     '112.137.184.0/21', '122.227.237.0/24', '36.42.75.0/24', '13.35.0.0/16', '101.226.4.0/24', '116.140.35.0/24',
 65 |     '58.250.143.0/24', '13.54.63.128/26', '205.251.254.0/24', '173.245.48.0/20', '183.61.177.0/24', '113.20.144.0/24',
 66 |     '104.37.183.0/24', '35.158.136.0/24', '116.211.121.0/24', '42.236.94.0/24', '117.34.91.0/24', '123.6.13.0/24',
 67 |     '13.224.0.0/14', '113.20.146.0/24', '58.58.81.0/24', '52.124.128.0/17', '122.228.198.0/24', '197.234.240.0/22',
 68 |     '99.86.0.0/16', '144.220.0.0/16', '119.188.97.0/24', '36.27.212.0/24', '104.37.178.0/24', '114.31.52.0/22',
 69 |     '218.65.212.0/24', '1.255.41.0/24', '14.0.45.0/24', '1.32.243.0/24', '220.170.185.0/24', '122.190.3.0/24',
 70 |     '103.79.133.0/24', '220.181.55.0/24', '125.39.191.0/24', '115.127.226.0/24', '125.39.32.0/24', '61.120.154.0/24',
 71 |     '103.4.202.0/24', '103.79.134.0/23', '115.127.224.0/24', '113.20.147.0/24', '61.156.149.0/24', '210.209.122.0/24',
 72 |     '115.127.249.0/24', '104.37.179.0/24', '120.52.18.0/24', '54.192.0.0/16', '14.0.55.0/24', '61.160.224.0/24',
 73 |     '113.207.101.0/24', '101.79.157.0/24', '110.93.128.0/20', '58.251.121.0/24', '61.240.149.0/24', '130.176.0.0/16',
 74 |     '113.107.238.0/24', '112.65.73.0/24', '103.75.200.0/23', '199.83.128.0/21', '123.129.220.0/24', '54.230.0.0/16',
 75 |     '114.111.60.0/24', '199.27.128.0/21', '14.0.118.0/24', '101.79.158.0/24', '119.31.248.0/21', '54.182.0.0/16',
 76 |     '113.31.27.0/24', '14.17.69.0/24', '101.79.145.0/24', '113.20.144.0/21', '180.163.22.0/24', '104.37.176.0/21',
 77 |     '117.25.156.0/24', '115.127.252.0/24', '115.127.244.0/23', '14.0.46.0/24', '113.207.102.0/24', '52.199.127.192/26',
 78 |     '13.113.203.0/24', '64.252.64.0/18', '1.32.240.0/24', '123.129.232.0/24', '1.32.241.0/24', '180.163.189.0/24',
 79 |     '157.255.25.0/24', '1.32.244.0/24', '103.248.106.0/24', '121.48.95.0/24', '54.239.192.0/19', '113.20.146.0/23',
 80 |     '61.136.173.0/24', '35.162.63.192/26', '117.34.14.0/24', '183.232.29.0/24', '42.81.93.0/24', '122.228.238.0/24',
 81 |     '183.61.190.0/24', '125.39.239.0/24', '115.127.230.0/24', '103.140.200.0/23', '202.102.85.0/24', '14.0.32.0/21',
 82 |     '14.0.57.0/24', '112.25.90.0/24', '58.211.137.0/24', '210.22.63.0/24', '34.226.14.0/24', '13.32.0.0/15',
 83 |     '101.79.156.0/24', '103.89.176.0/24', '14.0.116.0/24', '106.42.25.0/24', '101.79.233.0/24', '101.79.231.0/24',
 84 |     '103.75.200.0/24', '119.188.9.0/24', '183.232.51.0/24', '149.126.72.0/21', '103.21.244.0/22', '115.127.233.0/24',
 85 |     '27.221.20.0/24', '198.143.32.0/19', '103.248.107.0/24', '101.79.227.0/24', '115.127.242.0/24', '119.31.250.0/24',
 86 |     '103.82.130.0/24', '99.84.0.0/16', '222.73.144.0/24', '103.79.132.0/22', '101.79.208.0/20', '104.37.182.0/24',
 87 |     '101.79.152.0/24', '36.99.18.0/24', '101.71.56.0/24', '36.250.5.0/24', '61.158.240.0/24', '119.188.14.0/24',
 88 |     '13.249.0.0/16', '183.214.156.0/24', '60.221.236.0/24', '58.30.212.0/24', '115.127.254.0/24', '188.114.96.0/20',
 89 |     '115.127.241.0/24', '103.4.200.0/22', '115.127.239.0/24', '115.127.243.0/24', '111.32.135.0/24', '120.221.29.0/24',
 90 |     '115.127.232.0/24', '14.0.43.0/24', '14.0.59.0/24', '183.61.236.0/24', '34.223.12.224/27', '103.24.120.0/24',
 91 |     '52.57.254.0/24', '113.207.100.0/24', '222.186.19.0/24', '113.20.149.0/24', '150.138.151.0/24', '115.231.110.0/24',
 92 |     '52.56.127.0/25', '104.37.176.0/24', '163.177.8.0/24', '163.53.89.0/24', '52.82.128.0/19', '114.111.63.0/24',
 93 |     '108.162.192.0/18', '14.136.130.0/24', '115.127.229.0/24', '14.17.71.0/24', '52.212.248.0/26', '180.163.188.0/24',
 94 |     '61.182.137.0/24', '119.161.224.0/21', '14.0.41.0/24', '202.162.108.0/24', '106.122.248.0/24', '52.66.194.128/26',
 95 |     '115.127.237.0/24', '220.170.186.0/24', '14.0.32.0/19', '14.0.114.0/24', '112.90.216.0/24', '115.127.236.0/24',
 96 |     '116.193.84.0/24', '113.207.76.0/24', '101.79.235.0/24', '101.79.224.0/20', '61.155.149.0/24', '101.79.148.0/24',
 97 |     '180.163.224.0/24', '204.246.174.0/23', '183.60.136.0/24', '101.227.207.0/24', '103.248.105.0/24',
 98 |     '119.188.35.0/24', '42.236.7.0/24', '116.193.88.0/21', '116.193.83.0/24', '120.199.69.0/24', '122.226.182.0/24',
 99 |     '58.20.204.0/24', '110.93.128.0/21', '115.231.187.0/24', '69.28.58.0/24', '114.31.32.0/19', '112.25.91.0/24',
100 |     '59.52.28.0/24', '117.27.149.0/24', '61.147.92.0/24', '14.0.117.0/24', '14.0.40.0/24', '119.97.151.0/24',
101 |     '103.199.228.0/22', '122.70.134.0/24', '115.127.244.0/24', '223.112.198.0/24', '115.127.225.0/24', '104.16.0.0/12',
102 |     '121.12.98.0/24', '103.31.4.0/22', '204.246.164.0/22', '223.94.66.0/24', '35.167.191.128/26', '116.31.127.0/24',
103 |     '101.79.226.0/24', '34.195.252.0/24', '115.127.247.0/24', '61.240.144.0/24', '108.175.32.0/20', '120.197.85.0/24',
104 |     '183.232.53.0/24', '111.161.66.0/24', '117.34.28.0/24', '45.64.64.0/22', '14.0.44.0/24', '109.86.0.0/15',
105 |     '182.23.211.0/24', '58.211.2.0/24', '119.36.164.0/24', '116.55.250.0/24', '101.227.163.0/24', '13.228.69.0/24',
106 |     '120.221.136.0/24', '119.188.132.0/24', '115.127.235.0/24', '42.236.6.0/24', '125.88.190.0/24', '61.54.47.0/24',
107 |     '103.27.12.0/22', '116.193.80.0/21', '101.79.159.0/24', '123.155.158.0/24', '111.47.226.0/24', '131.0.72.0/22',
108 |     '192.230.64.0/18', '218.92.0.0/24'
109 | ]
110 | 
111 | ASNS = [
112 |     '10576', '10762', '11748', '131099', '132601', '133496', '134409', '135295', '136764', '137187', '13777', '13890',
113 |     '14103', '14520', '17132', '199251', '200013', '200325', '200856', '201263', '202294', '203075', '203139', '204248',
114 |     '204286', '204545', '206227', '206734', '206848', '206986', '207158', '208559', '209403', '21030', '21257', '23327',
115 |     '23393', '23637', '23794', '24997', '26492', '268843', '28709', '29264', '30282', '30637', '328126', '36408',
116 |     '38107', '397192', '40366', '43303', '44907', '46071', '46177', '47542', '49287', '49689', '51286', '55082',
117 |     '55254', '56636', '57363', '58127', '59730', '59776', '60068', '60626', '60922', '61107', '61159', '62026', '62229',
118 |     '63062', '64232', '8868', '9053', '55770', '49846', '49249', '48163', '45700', '43639', '39836', '393560', '393234',
119 |     '36183', '35994', '35993', '35204', '34850', '34164', '33905', '32787', '31377', '31110', '31109', '31108', '31107',
120 |     '30675', '24319', '23903', '23455', '23454', '22207', '21399', '21357', '21342', '20940', '20189', '18717', '18680',
121 |     '17334', '16702', '16625', '12222', '209101', '201585', '135429', '395747', '394536', '209242', '203898', '202623',
122 |     '14789', '133877', '13335', '132892', '21859', '6185', '47823'
123 | ]
124 | 
125 | cdnDict = {}
126 | with open(r'./cdn-domain.conf', 'rt', encoding='utf-8') as f:
127 | # with open(r'cdn-domain.conf', 'rt') as f:
128 |     for eachline in f.readlines():
129 |         eachline = eachline.strip()
130 |         if '#' in eachline:
131 |             cdnName = eachline.replace('#', '')
132 |             cdnDict[cdnName] = []
133 |         elif eachline:
134 |             cdnDict[cdnName].append(eachline)
135 | 
136 | # 解析域名获取IP
137 | def query_A(subdomain):
138 |     # 根据domain得到ip 例如www.xxx.com 得到 x.x.x.x
139 |     ips = []
140 |     try:
141 |         dns_A_ips = [j for i in dns.resolver.resolve(subdomain, 'A').response.answer for j in i.items]
142 |         ips = []
143 |         for each_ip in dns_A_ips:
144 |             each_ip = str(each_ip)
145 |             if re.compile('^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$').match(each_ip):  # 正则匹配是否是IP
146 |                 ips.append(str(each_ip))
147 |     except Exception as e:
148 |         pass
149 | 
150 |     return ips
151 | 
152 | # 通过IP判断是否是CDN
153 | def ipASNSCheckCDN(subdomain):
154 |     ips = query_A(subdomain)
155 | 
156 |     with geoip2.database.Reader('./GeoLite2-ASN.mmdb') as reader:
157 |     # with geoip2.database.Reader('./GeoLite2-ASN.mmdb') as reader:
158 |         for ip in ips:
159 |             # 通过CDN的IP段判断
160 |             for cdn in cdns:
161 |                 if ipaddress.ip_address(ip) in ipaddress.ip_network(cdn):
162 |                     return ['CDN IP段', cdn]
163 | 
164 |             # 通过ASN判断
165 |             try:
166 |                 response = reader.asn(ip)
167 |                 asnsNum = response.autonomous_system_number
168 |                 if str(asnsNum) in ASNS:
169 |                     return ['CDN ASNS范围', asnsNum]
170 |             except Exception as e:
171 |                 pass
172 | 
173 |     return [], ips
174 | 
175 | # 查询cname
176 | def queryCname(subdomain):
177 |     try:
178 |         cname = dns.resolver.resolve(subdomain, 'CNAME')
179 |         for i in cname.response.answer:
180 |             for j in i.items:
181 |                 subdomain_cname = j.to_text()
182 |                 return subdomain_cname
183 |     except Exception as e:
184 |         return ''
185 | 
186 | # 通过cname判断是否是CDN
187 | def cnameCheckCDN(subdomian):
188 |     subdomain_cname = queryCname(subdomian)
189 |     # print(subdomain_cname)
190 |     for cdnName in cdnDict:
191 |         cdnDomains = cdnDict[cdnName]
192 |         for cdnDomain in cdnDomains:
193 |             if cdnDomain in subdomain_cname:
194 |                 # print(subdomian, subdomain_cname, cdnName)
195 |                 return [cdnName, subdomain_cname]
196 |     if 'cdn' in subdomain_cname:
197 |         return ['CDN', subdomain_cname]
198 |     return False
199 | 
200 | 
201 | def checkCDN(subdomains_queue, notCDNSubdomains, CDNSubdomainsDict):
202 |     while not subdomains_queue.empty():
203 |         subdomain = subdomains_queue.get()
204 |         cnameRet = cnameCheckCDN(subdomain)
205 |         if not cnameRet:
206 |             ipASNSRet, ips = ipASNSCheckCDN(subdomain)
207 |             if not ipASNSRet:
208 |                 notCDNSubdomains.append((subdomain,ips))
209 |                 CDNSubdomainsDict[subdomain] = 'NOT'
210 |             else:
211 |                 logger.info('{}: {}'.format(subdomain, ipASNSRet))
212 |                 # notCDNSubdomains.append(subdomain)
213 |                 CDNSubdomainsDict[subdomain] = ipASNSRet
214 |         else:
215 |             logger.info('{}: {}'.format(subdomain, cnameRet))
216 |             # notCDNSubdomains.append(subdomain)
217 |             CDNSubdomainsDict[subdomain] = cnameRet
218 | 
219 | 
220 | def run_checkCDN(subdomains):
221 |     query_A_threads = []  # 存放线程
222 |     subdomains_queue = Queue(-1)
223 | 
224 |     for subdomain in subdomains:
225 |         subdomains_queue.put(subdomain)
226 | 
227 |     # 没有CDN的子域名
228 |     notCDNSubdomains = []
229 |     # CDN的子域名结果
230 |     CDNSubdomainsDict = {}
231 | 
232 |     for t_id in range(50):       # 对新增的子域名进行A记录查询获取IP
233 |         t = Thread(target=checkCDN, args=(subdomains_queue, notCDNSubdomains, CDNSubdomainsDict))
234 |         query_A_threads.append(t)
235 |         t.start()
236 |     for t in query_A_threads:
237 |         t.join()
238 | 
239 |     # print()
240 |     return notCDNSubdomains, CDNSubdomainsDict
241 | 
242 | 
243 | if __name__ == "__main__":
244 |     subdomains = ['www.tjut.edu.cn']
245 |     notCDNSubdomains, CDNSubdomainsDict = run_checkCDN(subdomains)
246 |     print(CDNSubdomainsDict)
247 | 
248 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Python bindings to the Google search engine
  4 | # Copyright (c) 2009-2018, Mario Vilas
  5 | # All rights reserved.
  6 | #
  7 | # Redistribution and use in source and binary forms, with or without
  8 | # modification, are permitted provided that the following conditions are met:
  9 | #
 10 | #     * Redistributions of source code must retain the above copyright notice,
 11 | #       this list of conditions and the following disclaimer.
 12 | #     * Redistributions in binary form must reproduce the above copyright
 13 | #       notice,this list of conditions and the following disclaimer in the
 14 | #       documentation and/or other materials provided with the distribution.
 15 | #     * Neither the name of the copyright holder nor the names of its
 16 | #       contributors may be used to endorse or promote products derived from
 17 | #       this software without specific prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 22 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 23 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 24 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 25 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 26 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 27 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 28 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 29 | # POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 | import os
 32 | import random
 33 | import sys
 34 | import time
 35 | import math
 36 | from urllib.error import HTTPError
 37 | 
 38 | if sys.version_info[0] > 2:
 39 |     from http.cookiejar import LWPCookieJar
 40 |     from urllib.request import Request, urlopen
 41 |     from urllib.parse import quote_plus, urlparse, parse_qs
 42 | else:
 43 |     from cookielib import LWPCookieJar
 44 |     from urllib import quote_plus
 45 |     from urllib2 import Request, urlopen
 46 |     from urlparse import urlparse, parse_qs
 47 | 
 48 | try:
 49 |     from bs4 import BeautifulSoup
 50 |     is_bs4 = True
 51 | except ImportError:
 52 |     from BeautifulSoup import BeautifulSoup
 53 |     is_bs4 = False
 54 | 
 55 | __all__ = [
 56 | 
 57 |     # Main search function.
 58 |     'search',
 59 | 
 60 |     # Specialized search functions.
 61 |     'search_images', 'search_news',
 62 |     'search_videos', 'search_shop',
 63 |     'search_books', 'search_apps',
 64 | 
 65 |     # Shortcut for "get lucky" search.
 66 |     'lucky',
 67 | 
 68 |     # Computations based on the number of Google hits.
 69 |     'hits', 'ngd',
 70 | 
 71 |     # Miscellaneous utility functions.
 72 |     'get_random_user_agent',
 73 | ]
 74 | 
 75 | # URL templates to make Google searches.
 76 | url_home = "https://www.google.%(tld)s/"
 77 | url_search = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
 78 |              "btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s"
 79 | url_next_page = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
 80 |                 "start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s"
 81 | url_search_num = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
 82 |                  "num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \
 83 |                  "tbm=%(tpe)s"
 84 | url_next_page_num = "https://www.google.%(tld)s/search?hl=%(lang)s&" \
 85 |                     "q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&" \
 86 |                     "safe=%(safe)s&tbm=%(tpe)s"
 87 | 
 88 | # Cookie jar. Stored at the user's home folder.
 89 | home_folder = os.getenv('HOME')
 90 | if not home_folder:
 91 |     home_folder = os.getenv('USERHOME')
 92 |     if not home_folder:
 93 |         home_folder = '.'   # Use the current folder on error.
 94 | cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie'))
 95 | try:
 96 |     cookie_jar.load()
 97 | except Exception:
 98 |     pass
 99 | 
100 | # Default user agent, unless instructed by the user to change it.
101 | USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)'
102 | 
103 | # Load the list of valid user agents from the install folder.
104 | try:
105 |     install_folder = os.path.abspath(os.path.split(__file__)[0])
106 |     try:
107 |         user_agents_file = os.path.join(install_folder, 'user_agents.txt.gz')
108 |         import gzip
109 |         fp = gzip.open(user_agents_file, 'rb')
110 |         try:
111 |             user_agents_list = [_.strip() for _ in fp.readlines()]
112 |         finally:
113 |             fp.close()
114 |             del fp
115 |     except Exception:
116 |         user_agents_file = os.path.join(install_folder, 'user_agents.txt')
117 |         with open(user_agents_file) as fp:
118 |             user_agents_list = [_.strip() for _ in fp.readlines()]
119 | except Exception:
120 |     user_agents_list = [USER_AGENT]
121 | 
122 | 
123 | # Get a random user agent.
124 | def get_random_user_agent():
125 |     """
126 |     Get a random user agent string.
127 | 
128 |     :rtype: str
129 |     :return: Random user agent string.
130 |     """
131 |     return random.choice(user_agents_list)
132 | 
133 | 
134 | # Request the given URL and return the response page, using the cookie jar.
135 | def get_page(url, user_agent=None):
136 |     """
137 |     Request the given URL and return the response page, using the cookie jar.
138 | 
139 |     :param str url: URL to retrieve.
140 |     :param str user_agent: User agent for the HTTP requests.
141 |         Use None for the default.
142 | 
143 |     :rtype: str
144 |     :return: Web page retrieved for the given URL.
145 | 
146 |     :raises IOError: An exception is raised on error.
147 |     :raises urllib2.URLError: An exception is raised on error.
148 |     :raises urllib2.HTTPError: An exception is raised on error.
149 |     """
150 |     if user_agent is None:
151 |         user_agent = USER_AGENT
152 |     request = Request(url)
153 |     request.add_header('User-Agent', USER_AGENT)
154 |     cookie_jar.add_cookie_header(request)
155 |     response = urlopen(request)
156 |     cookie_jar.extract_cookies(response, request)
157 |     html = response.read()
158 |     response.close()
159 |     try:
160 |         cookie_jar.save()
161 |     except Exception:
162 |         pass
163 |     return html
164 | 
165 | 
166 | # Filter links found in the Google result pages HTML code.
167 | # Returns None if the link doesn't yield a valid result.
168 | def filter_result(link):
169 |     try:
170 | 
171 |         # Valid results are absolute URLs not pointing to a Google domain
172 |         # like images.google.com or googleusercontent.com
173 |         o = urlparse(link, 'http')
174 |         if o.netloc and 'google' not in o.netloc:
175 |             return link
176 | 
177 |         # Decode hidden URLs.
178 |         if link.startswith('/url?'):
179 |             link = parse_qs(o.query)['q'][0]
180 | 
181 |             # Valid results are absolute URLs not pointing to a Google domain
182 |             # like images.google.com or googleusercontent.com
183 |             o = urlparse(link, 'http')
184 |             if o.netloc and 'google' not in o.netloc:
185 |                 return link
186 | 
187 |     # Otherwise, or on error, return None.
188 |     except Exception:
189 |         pass
190 |     return None
191 | 
192 | 
193 | # Returns a generator that yields URLs.
194 | def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
195 |            stop=None, domains=None, pause=2.0, only_standard=False,
196 |            extra_params={}, tpe='', user_agent=None):
197 |     """
198 |     Search the given query string using Google.
199 | 
200 |     :param str query: Query string. Must NOT be url-encoded.
201 |     :param str tld: Top level domain.
202 |     :param str lang: Language.
203 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
204 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
205 |     :param str safe: Safe search.
206 |     :param int num: Number of results per page.
207 |     :param int start: First result to retrieve.
208 |     :param int or None stop: Last result to retrieve.
209 |         Use None to keep searching forever.
210 |     :param list of str or None domains: A list of web domains to constrain
211 |         the search.
212 |     :param float pause: Lapse to wait between HTTP requests.
213 |         A lapse too long will make the search slow, but a lapse too short may
214 |         cause Google to block your IP. Your mileage may vary!
215 |     :param bool only_standard: If True, only returns the standard results from
216 |         each page. If False, it returns every possible link from each page,
217 |         except for those that point back to Google itself. Defaults to False
218 |         for backwards compatibility with older versions of this module.
219 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
220 |         parameters, which must be URL encoded. For example if you don't want
221 |         Google to filter similar results you can set the extra_params to
222 |         {'filter': '0'} which will append '&filter=0' to every query.
223 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
224 |         Use the following values {videos: 'vid', images: 'isch',
225 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
226 |     :param str or None user_agent: User agent for the HTTP requests.
227 |         Use None for the default.
228 | 
229 |     :rtype: generator of str
230 |     :return: Generator (iterator) that yields found URLs.
231 |         If the stop parameter is None the iterator will loop forever.
232 |     """
233 |     # Set of hashes for the results found.
234 |     # This is used to avoid repeated results.
235 |     hashes = set()
236 | 
237 |     # Count the number of links yielded
238 |     count = 0
239 | 
240 |     # Prepare domain list if it exists.
241 |     if domains:
242 |         query = query + ' ' + ' OR '.join(
243 |                                 'site:' + domain for domain in domains)
244 | 
245 |     # Prepare the search string.
246 |     query = quote_plus(query)
247 | 
248 |     # Check extra_params for overlapping
249 |     for builtin_param in ('hl', 'q', 'btnG', 'tbs', 'safe', 'tbm'):
250 |         if builtin_param in extra_params.keys():
251 |             raise ValueError(
252 |                 'GET parameter "%s" is overlapping with \
253 |                 the built-in GET parameter',
254 |                 builtin_param
255 |             )
256 | 
257 |     # Grab the cookie from the home page.
258 |     get_page(url_home % vars())
259 | 
260 |     # Prepare the URL of the first request.
261 |     if start:
262 |         if num == 10:
263 |             url = url_next_page % vars()
264 |         else:
265 |             url = url_next_page_num % vars()
266 |     else:
267 |         if num == 10:
268 |             url = url_search % vars()
269 |         else:
270 |             url = url_search_num % vars()
271 |     print('\tgoogle search : {}'.format(url))
272 |     # Loop until we reach the maximum result, if any (otherwise, loop forever).
273 |     while not stop or start < stop:
274 | 
275 |         try:  # Is it python<3?
276 |             iter_extra_params = extra_params.iteritems()
277 |         except AttributeError:  # Or python>3?
278 |             iter_extra_params = extra_params.items()
279 |         # Append extra GET_parameters to URL
280 |         for k, v in iter_extra_params:
281 |             url += url + ('&%s=%s' % (k, v))
282 | 
283 |         # Sleep between requests.
284 |         time.sleep(pause)
285 | 
286 |         # Request the Google Search results page.
287 |         # html = get_page(url)
288 |         try:
289 |             html = get_page(url)
290 |         except HTTPError:
291 |             print('\t[!] Error: Google probably now is blocking our requests.\n    [-] Stop Google Search!')
292 |             return False
293 | 
294 |         # Parse the response and process every anchored URL.
295 |         if is_bs4:
296 |             soup = BeautifulSoup(html, 'html.parser')
297 |         else:
298 |             soup = BeautifulSoup(html)
299 |         anchors = soup.find(id='search').findAll('a')
300 |         for a in anchors:
301 | 
302 |             # Leave only the "standard" results if requested.
303 |             # Otherwise grab all possible links.
304 |             if only_standard and (
305 |                     not a.parent or a.parent.name.lower() != "h3"):
306 |                 continue
307 | 
308 |             # Get the URL from the anchor tag.
309 |             try:
310 |                 link = a['href']
311 |             except KeyError:
312 |                 continue
313 | 
314 |             # Filter invalid links and links pointing to Google itself.
315 |             link = filter_result(link)
316 |             if not link:
317 |                 continue
318 | 
319 |             # Discard repeated results.
320 |             h = hash(link)
321 |             if h in hashes:
322 |                 continue
323 |             hashes.add(h)
324 | 
325 |             # Yield the result.
326 |             yield link
327 | 
328 |             count += 1
329 |             if stop and count >= stop:
330 |                 return
331 | 
332 |         # End if there are no more results.
333 |         if not soup.find(id='nav'):
334 |             break
335 | 
336 |         # Prepare the URL for the next request.
337 |         start += num
338 |         if num == 10:
339 |             url = url_next_page % vars()
340 |         else:
341 |             url = url_next_page_num % vars()
342 | 
343 | 
344 | # Shortcut to search images.
345 | # Beware, this does not return the image link.
346 | def search_images(query, tld='com', lang='en', tbs='0', safe='off', num=10,
347 |                   start=0, stop=None, pause=2.0, domains=None,
348 |                   only_standard=False, extra_params={}):
349 |     """
350 |     Shortcut to search images.
351 | 
352 |     :note: Beware, this does not return the image link.
353 | 
354 |     :param str query: Query string. Must NOT be url-encoded.
355 |     :param str tld: Top level domain.
356 |     :param str lang: Language.
357 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
358 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
359 |     :param str safe: Safe search.
360 |     :param int num: Number of results per page.
361 |     :param int start: First result to retrieve.
362 |     :param int or None stop: Last result to retrieve.
363 |         Use None to keep searching forever.
364 |     :param list of str or None domains: A list of web domains to constrain
365 |         the search.
366 |     :param float pause: Lapse to wait between HTTP requests.
367 |         A lapse too long will make the search slow, but a lapse too short may
368 |         cause Google to block your IP. Your mileage may vary!
369 |     :param bool only_standard: If True, only returns the standard results from
370 |         each page. If False, it returns every possible link from each page,
371 |         except for those that point back to Google itself. Defaults to False
372 |         for backwards compatibility with older versions of this module.
373 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
374 |         parameters, which must be URL encoded. For example if you don't want
375 |         Google to filter similar results you can set the extra_params to
376 |         {'filter': '0'} which will append '&filter=0' to every query.
377 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
378 |         Use the following values {videos: 'vid', images: 'isch',
379 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
380 |     :param str or None user_agent: User agent for the HTTP requests.
381 |         Use None for the default.
382 | 
383 |     :rtype: generator of str
384 |     :return: Generator (iterator) that yields found URLs.
385 |         If the stop parameter is None the iterator will loop forever.
386 |     """
387 |     return search(query, tld, lang, tbs, safe, num, start, stop, domains,
388 |                   pause, only_standard, extra_params, tpe='isch')
389 | 
390 | 
391 | # Shortcut to search news.
392 | def search_news(query, tld='com', lang='en', tbs='0', safe='off', num=10,
393 |                 start=0, stop=None, domains=None, pause=2.0,
394 |                 only_standard=False, extra_params={}):
395 |     """
396 |     Shortcut to search news.
397 | 
398 |     :param str query: Query string. Must NOT be url-encoded.
399 |     :param str tld: Top level domain.
400 |     :param str lang: Language.
401 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
402 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
403 |     :param str safe: Safe search.
404 |     :param int num: Number of results per page.
405 |     :param int start: First result to retrieve.
406 |     :param int or None stop: Last result to retrieve.
407 |         Use None to keep searching forever.
408 |     :param list of str or None domains: A list of web domains to constrain
409 |         the search.
410 |     :param float pause: Lapse to wait between HTTP requests.
411 |         A lapse too long will make the search slow, but a lapse too short may
412 |         cause Google to block your IP. Your mileage may vary!
413 |     :param bool only_standard: If True, only returns the standard results from
414 |         each page. If False, it returns every possible link from each page,
415 |         except for those that point back to Google itself. Defaults to False
416 |         for backwards compatibility with older versions of this module.
417 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
418 |         parameters, which must be URL encoded. For example if you don't want
419 |         Google to filter similar results you can set the extra_params to
420 |         {'filter': '0'} which will append '&filter=0' to every query.
421 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
422 |         Use the following values {videos: 'vid', images: 'isch',
423 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
424 |     :param str or None user_agent: User agent for the HTTP requests.
425 |         Use None for the default.
426 | 
427 |     :rtype: generator of str
428 |     :return: Generator (iterator) that yields found URLs.
429 |         If the stop parameter is None the iterator will loop forever.
430 |     """
431 |     return search(query, tld, lang, tbs, safe, num, start, stop, domains,
432 |                   pause, only_standard, extra_params, tpe='nws')
433 | 
434 | 
435 | # Shortcut to search videos.
436 | def search_videos(query, tld='com', lang='en', tbs='0', safe='off', num=10,
437 |                   start=0, stop=None, domains=None, pause=2.0,
438 |                   only_standard=False, extra_params={}):
439 |     """
440 |     Shortcut to search videos.
441 | 
442 |     :param str query: Query string. Must NOT be url-encoded.
443 |     :param str tld: Top level domain.
444 |     :param str lang: Language.
445 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
446 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
447 |     :param str safe: Safe search.
448 |     :param int num: Number of results per page.
449 |     :param int start: First result to retrieve.
450 |     :param int or None stop: Last result to retrieve.
451 |         Use None to keep searching forever.
452 |     :param list of str or None domains: A list of web domains to constrain
453 |         the search.
454 |     :param float pause: Lapse to wait between HTTP requests.
455 |         A lapse too long will make the search slow, but a lapse too short may
456 |         cause Google to block your IP. Your mileage may vary!
457 |     :param bool only_standard: If True, only returns the standard results from
458 |         each page. If False, it returns every possible link from each page,
459 |         except for those that point back to Google itself. Defaults to False
460 |         for backwards compatibility with older versions of this module.
461 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
462 |         parameters, which must be URL encoded. For example if you don't want
463 |         Google to filter similar results you can set the extra_params to
464 |         {'filter': '0'} which will append '&filter=0' to every query.
465 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
466 |         Use the following values {videos: 'vid', images: 'isch',
467 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
468 |     :param str or None user_agent: User agent for the HTTP requests.
469 |         Use None for the default.
470 | 
471 |     :rtype: generator of str
472 |     :return: Generator (iterator) that yields found URLs.
473 |         If the stop parameter is None the iterator will loop forever.
474 |     """
475 |     return search(query, tld, lang, tbs, safe, num, start, stop, domains,
476 |                   pause, only_standard, extra_params, tpe='vid')
477 | 
478 | 
479 | # Shortcut to search shop.
480 | def search_shop(query, tld='com', lang='en', tbs='0', safe='off', num=10,
481 |                 start=0, stop=None, domains=None, pause=2.0,
482 |                 only_standard=False, extra_params={}):
483 |     """
484 |     Shortcut to search shop.
485 | 
486 |     :param str query: Query string. Must NOT be url-encoded.
487 |     :param str tld: Top level domain.
488 |     :param str lang: Language.
489 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
490 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
491 |     :param str safe: Safe search.
492 |     :param int num: Number of results per page.
493 |     :param int start: First result to retrieve.
494 |     :param int or None stop: Last result to retrieve.
495 |         Use None to keep searching forever.
496 |     :param list of str or None domains: A list of web domains to constrain
497 |         the search.
498 |     :param float pause: Lapse to wait between HTTP requests.
499 |         A lapse too long will make the search slow, but a lapse too short may
500 |         cause Google to block your IP. Your mileage may vary!
501 |     :param bool only_standard: If True, only returns the standard results from
502 |         each page. If False, it returns every possible link from each page,
503 |         except for those that point back to Google itself. Defaults to False
504 |         for backwards compatibility with older versions of this module.
505 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
506 |         parameters, which must be URL encoded. For example if you don't want
507 |         Google to filter similar results you can set the extra_params to
508 |         {'filter': '0'} which will append '&filter=0' to every query.
509 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
510 |         Use the following values {videos: 'vid', images: 'isch',
511 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
512 |     :param str or None user_agent: User agent for the HTTP requests.
513 |         Use None for the default.
514 | 
515 |     :rtype: generator of str
516 |     :return: Generator (iterator) that yields found URLs.
517 |         If the stop parameter is None the iterator will loop forever.
518 |     """
519 |     return search(query, tld, lang, tbs, safe, num, start, stop, domains,
520 |                   pause, only_standard, extra_params, tpe='shop')
521 | 
522 | 
523 | # Shortcut to search books.
524 | def search_books(query, tld='com', lang='en', tbs='0', safe='off', num=10,
525 |                  start=0, stop=None, domains=None, pause=2.0,
526 |                  only_standard=False, extra_params={}):
527 |     """
528 |     Shortcut to search books.
529 | 
530 |     :param str query: Query string. Must NOT be url-encoded.
531 |     :param str tld: Top level domain.
532 |     :param str lang: Language.
533 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
534 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
535 |     :param str safe: Safe search.
536 |     :param int num: Number of results per page.
537 |     :param int start: First result to retrieve.
538 |     :param int or None stop: Last result to retrieve.
539 |         Use None to keep searching forever.
540 |     :param list of str or None domains: A list of web domains to constrain
541 |         the search.
542 |     :param float pause: Lapse to wait between HTTP requests.
543 |         A lapse too long will make the search slow, but a lapse too short may
544 |         cause Google to block your IP. Your mileage may vary!
545 |     :param bool only_standard: If True, only returns the standard results from
546 |         each page. If False, it returns every possible link from each page,
547 |         except for those that point back to Google itself. Defaults to False
548 |         for backwards compatibility with older versions of this module.
549 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
550 |         parameters, which must be URL encoded. For example if you don't want
551 |         Google to filter similar results you can set the extra_params to
552 |         {'filter': '0'} which will append '&filter=0' to every query.
553 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
554 |         Use the following values {videos: 'vid', images: 'isch',
555 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
556 |     :param str or None user_agent: User agent for the HTTP requests.
557 |         Use None for the default.
558 | 
559 |     :rtype: generator of str
560 |     :return: Generator (iterator) that yields found URLs.
561 |         If the stop parameter is None the iterator will loop forever.
562 |     """
563 |     return search(query, tld, lang, tbs, safe, num, start, stop, domains,
564 |                   pause, only_standard, extra_params, tpe='bks')
565 | 
566 | 
567 | # Shortcut to search apps.
568 | def search_apps(query, tld='com', lang='en', tbs='0', safe='off', num=10,
569 |                 start=0, stop=None, domains=None, pause=2.0,
570 |                 only_standard=False, extra_params={}):
571 |     """
572 |     Shortcut to search apps.
573 | 
574 |     :param str query: Query string. Must NOT be url-encoded.
575 |     :param str tld: Top level domain.
576 |     :param str lang: Language.
577 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
578 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
579 |     :param str safe: Safe search.
580 |     :param int num: Number of results per page.
581 |     :param int start: First result to retrieve.
582 |     :param int or None stop: Last result to retrieve.
583 |         Use None to keep searching forever.
584 |     :param list of str or None domains: A list of web domains to constrain
585 |         the search.
586 |     :param float pause: Lapse to wait between HTTP requests.
587 |         A lapse too long will make the search slow, but a lapse too short may
588 |         cause Google to block your IP. Your mileage may vary!
589 |     :param bool only_standard: If True, only returns the standard results from
590 |         each page. If False, it returns every possible link from each page,
591 |         except for those that point back to Google itself. Defaults to False
592 |         for backwards compatibility with older versions of this module.
593 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
594 |         parameters, which must be URL encoded. For example if you don't want
595 |         Google to filter similar results you can set the extra_params to
596 |         {'filter': '0'} which will append '&filter=0' to every query.
597 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
598 |         Use the following values {videos: 'vid', images: 'isch',
599 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
600 |     :param str or None user_agent: User agent for the HTTP requests.
601 |         Use None for the default.
602 | 
603 |     :rtype: generator of str
604 |     :return: Generator (iterator) that yields found URLs.
605 |         If the stop parameter is None the iterator will loop forever.
606 |     """
607 |     return search(query, tld, lang, tbs, safe, num, start, stop, domains,
608 |                   pause, only_standard, extra_params, tpe='app')
609 | 
610 | 
611 | # Shortcut to single-item search.
612 | # Evaluates the iterator to return the single URL as a string.
613 | def lucky(query, tld='com', lang='en', tbs='0', safe='off',
614 |           only_standard=False, extra_params={}, tpe=''):
615 |     """
616 |     Shortcut to single-item search.
617 | 
618 |     :param str query: Query string. Must NOT be url-encoded.
619 |     :param str tld: Top level domain.
620 |     :param str lang: Language.
621 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
622 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
623 |     :param str safe: Safe search.
624 |     :param int num: Number of results per page.
625 |     :param int start: First result to retrieve.
626 |     :param int or None stop: Last result to retrieve.
627 |         Use None to keep searching forever.
628 |     :param list of str or None domains: A list of web domains to constrain
629 |         the search.
630 |     :param float pause: Lapse to wait between HTTP requests.
631 |         A lapse too long will make the search slow, but a lapse too short may
632 |         cause Google to block your IP. Your mileage may vary!
633 |     :param bool only_standard: If True, only returns the standard results from
634 |         each page. If False, it returns every possible link from each page,
635 |         except for those that point back to Google itself. Defaults to False
636 |         for backwards compatibility with older versions of this module.
637 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
638 |         parameters, which must be URL encoded. For example if you don't want
639 |         Google to filter similar results you can set the extra_params to
640 |         {'filter': '0'} which will append '&filter=0' to every query.
641 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
642 |         Use the following values {videos: 'vid', images: 'isch',
643 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
644 |     :param str or None user_agent: User agent for the HTTP requests.
645 |         Use None for the default.
646 | 
647 |     :rtype: str
648 |     :return: URL found by Google.
649 |     """
650 |     gen = search(query, tld, lang, tbs, safe, 1, 0, 1, 0., only_standard,
651 |                  extra_params, tpe)
652 |     return next(gen)
653 | 
654 | 
655 | # Returns only the number of Google hits for the given search query.
656 | # This is the number reported by Google itself, NOT by scraping.
657 | def hits(query, tld='com', lang='en', tbs='0', safe='off',
658 |          domains=None, extra_params={}, tpe='', user_agent=None):
659 |     """
660 |     Search the given query string using Google and return the number of hits.
661 | 
662 |     :note: This is the number reported by Google itself, NOT by scraping.
663 | 
664 |     :param str query: Query string. Must NOT be url-encoded.
665 |     :param str tld: Top level domain.
666 |     :param str lang: Language.
667 |     :param str tbs: Time limits (i.e "qdr:h" => last hour,
668 |         "qdr:d" => last 24 hours, "qdr:m" => last month).
669 |     :param str safe: Safe search.
670 |     :param int num: Number of results per page.
671 |     :param int start: First result to retrieve.
672 |     :param int or None stop: Last result to retrieve.
673 |         Use None to keep searching forever.
674 |     :param list of str or None domains: A list of web domains to constrain
675 |         the search.
676 |     :param float pause: Lapse to wait between HTTP requests.
677 |         A lapse too long will make the search slow, but a lapse too short may
678 |         cause Google to block your IP. Your mileage may vary!
679 |     :param bool only_standard: If True, only returns the standard results from
680 |         each page. If False, it returns every possible link from each page,
681 |         except for those that point back to Google itself. Defaults to False
682 |         for backwards compatibility with older versions of this module.
683 |     :param dict of str to str extra_params: A dictionary of extra HTTP GET
684 |         parameters, which must be URL encoded. For example if you don't want
685 |         Google to filter similar results you can set the extra_params to
686 |         {'filter': '0'} which will append '&filter=0' to every query.
687 |     :param str tpe: Search type (images, videos, news, shopping, books, apps)
688 |         Use the following values {videos: 'vid', images: 'isch',
689 |         news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
690 |     :param str or None user_agent: User agent for the HTTP requests.
691 |         Use None for the default.
692 | 
693 |     :rtype: int
694 |     :return: Number of Google hits for the given search query.
695 |     """
696 | 
697 |     # Prepare domain list if it exists.
698 |     if domains:
699 |         domain_query = '+OR+'.join('site:' + domain for domain in domains)
700 |         domain_query = '+' + domain_query
701 |     else:
702 |         domain_query = ''
703 | 
704 |     # Prepare the search string.
705 |     query = quote_plus(query + domain_query)
706 | 
707 |     # Check extra_params for overlapping
708 |     for builtin_param in ('hl', 'q', 'btnG', 'tbs', 'safe', 'tbm'):
709 |         if builtin_param in extra_params.keys():
710 |             raise ValueError(
711 |                 'GET parameter "%s" is overlapping with \
712 |                 the built-in GET parameter',
713 |                 builtin_param
714 |             )
715 | 
716 |     # Grab the cookie from the home page.
717 |     get_page(url_home % vars())
718 | 
719 |     # Prepare the URL of the first (and in this cases ONLY) request.
720 |     url = url_search % vars()
721 | 
722 |     try:  # Is it python<3?
723 |         iter_extra_params = extra_params.iteritems()
724 |     except AttributeError:  # Or python>3?
725 |         iter_extra_params = extra_params.items()
726 |     # Append extra GET_parameters to URL
727 |     for k, v in iter_extra_params:
728 |         url += url + ('&%s=%s' % (k, v))
729 | 
730 |     # Request the Google Search results page.
731 |     html = get_page(url)
732 | 
733 |     # Parse the response.
734 |     if is_bs4:
735 |         soup = BeautifulSoup(html, 'html.parser')
736 |     else:
737 |         soup = BeautifulSoup(html)
738 | 
739 |     # Get the number of hits.
740 |     tag = soup.find_all(attrs={"class": "sd", "id": "resultStats"})[0]
741 |     hits_text_parts = tag.text.split()
742 |     if len(hits_text_parts) < 3:
743 |         return 0
744 |     return int(hits_text_parts[1].replace(',', '').replace('.', ''))
745 | 
746 | 
747 | def ngd(term1, term2):
748 |     """
749 |     Return the Normalized Google distance between words.
750 | 
751 |     For more info, refer to:
752 |     https://en.wikipedia.org/wiki/Normalized_Google_distance
753 | 
754 |     :param str term1: First term to compare.
755 |     :param str term2: Second term to compare.
756 | 
757 |     :rtype: float
758 |     :return: Normalized Google distance between words.
759 |     """
760 | 
761 |     lhits1 = math.log10(hits(term1))
762 |     lhits2 = math.log10(hits(term2))
763 |     lhits_mix = math.log10(hits('"' + term1 + '" "' + term2 + '"'))
764 |     npages = hits('the')
765 |     fix = 1000
766 | 
767 |     lN = math.log10(npages * fix)
768 |     numerator = max([lhits1, lhits2]) - lhits_mix
769 |     denomin = lN - min([lhits1, lhits2])
770 | 
771 |     return numerator / denomin
772 | 


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.


--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 |     ESD
  6 |     ~~~
  7 | 
  8 |     Implements enumeration sub domains
  9 | 
 10 |     :author:    Feei <feei@feei.cn>
 11 |     :homepage:  https://github.com/FeeiCN/ESD
 12 |     :license:   GPL, see LICENSE for more details.
 13 |     :copyright: Copyright (c) 2018 Feei. All rights reserved
 14 | """
 15 | import os
 16 | import re
 17 | import time
 18 | import ssl
 19 | import math
 20 | import string
 21 | import random
 22 | import traceback
 23 | import itertools
 24 | import datetime
 25 | import colorlog
 26 | import asyncio
 27 | import aiodns
 28 | import aiohttp
 29 | import logging
 30 | import requests
 31 | import backoff
 32 | import socket
 33 | import async_timeout
 34 | import dns.query
 35 | import dns.zone
 36 | import dns.resolver
 37 | from tqdm import tqdm
 38 | from colorama import Fore
 39 | from optparse import OptionParser
 40 | from aiohttp.resolver import AsyncResolver
 41 | from itertools import islice
 42 | from difflib import SequenceMatcher
 43 | 
 44 | __version__ = '0.0.29'
 45 | 
 46 | handler = colorlog.StreamHandler()
 47 | formatter = colorlog.ColoredFormatter(
 48 |     '%(log_color)s%(asctime)s [%(name)s] [%(levelname)s] %(message)s%(reset)s',
 49 |     datefmt=None,
 50 |     reset=True,
 51 |     log_colors={
 52 |         'DEBUG': 'cyan',
 53 |         'INFO': 'green',
 54 |         'WARNING': 'yellow',
 55 |         'ERROR': 'red',
 56 |         'CRITICAL': 'red,bg_white',
 57 |     },
 58 |     secondary_log_colors={},
 59 |     style='%'
 60 | )
 61 | handler.setFormatter(formatter)
 62 | 
 63 | logger = colorlog.getLogger('ESD')
 64 | logger.addHandler(handler)
 65 | logger.setLevel(logging.INFO)
 66 | 
 67 | ssl.match_hostname = lambda cert, hostname: True
 68 | 
 69 | 
 70 | # 只采用了递归，速度非常慢，在优化完成前不建议开启
 71 | # TODO:优化DNS查询，递归太慢了
 72 | class DNSQuery(object):
 73 |     def __init__(self, root_domain, subs, suffix):
 74 |         # root domain
 75 |         self.suffix = suffix
 76 |         self.sub_domains = []
 77 |         if root_domain:
 78 |             self.sub_domains.append(root_domain)
 79 | 
 80 |         for sub in subs:
 81 |             sub = ''.join(sub.rsplit(suffix, 1)).rstrip('.')
 82 |             self.sub_domains.append('{sub}.{domain}'.format(sub=sub, domain=suffix))
 83 | 
 84 |     def dns_query(self):
 85 |         """
 86 |         soa,txt,mx,aaaa
 87 |         :param sub:
 88 |         :return:
 89 |         """
 90 |         final_list = []
 91 |         for subdomain in self.sub_domains:
 92 |             try:
 93 |                 soa = []
 94 |                 q_soa = dns.resolver.resolve(subdomain, 'SOA')
 95 |                 for a in q_soa:
 96 |                     soa.append(str(a.rname).strip('.'))
 97 |                     soa.append(str(a.mname).strip('.'))
 98 |             except Exception as e:
 99 |                 logger.warning('Query failed. {e}'.format(e=str(e)))
100 |             try:
101 |                 aaaa = []
102 |                 q_aaaa = dns.resolver.resolve(subdomain, 'AAAA')
103 |                 aaaa = [str(a.address).strip('.') for a in q_aaaa]
104 |             except Exception as e:
105 |                 logger.warning('Query failed. {e}'.format(e=str(e)))
106 |             try:
107 |                 txt = []
108 |                 q_txt = dns.resolver.resolve(subdomain, 'TXT')
109 |                 txt = [t.strings[0].decode('utf-8').strip('.') for t in q_txt]
110 |             except Exception as e:
111 |                 logger.warning('Query failed. {e}'.format(e=str(e)))
112 |             try:
113 |                 mx = []
114 |                 q_mx = dns.resolver.resolve(subdomain, 'MX')
115 |                 mx = [str(m.exchange).strip('.') for m in q_mx]
116 |             except Exception as e:
117 |                 logger.warning('Query failed. {e}'.format(e=str(e)))
118 |             domain_set = soa + aaaa + txt + mx
119 |             domain_list = [i for i in domain_set]
120 |             for p in domain_set:
121 |                 re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}\.?)$', p)
122 |                 if len(re_domain) > 0 and subdomain in re_domain[0][0]:
123 |                     continue
124 |                 else:
125 |                     domain_list.remove(p)
126 |             final_list = domain_list + final_list
127 |         # 递归调用，在子域名的dns记录中查找新的子域名
128 |         recursive = []
129 |         # print("before: {0}".format(final_list))
130 |         # print("self.sub_domain: {0}".format(self.sub_domains))
131 |         final_list = list(set(final_list).difference(set(self.sub_domains)))
132 |         # print("after: {0}".format(final_list))
133 |         if final_list:
134 |             d = DNSQuery('', final_list, self.suffix)
135 |             recursive = d.dns_query()
136 |         return final_list + recursive
137 | 
138 | 
139 | class DNSTransfer(object):
140 |     def __init__(self, domain):
141 |         self.domain = domain
142 | 
143 |     def transfer_info(self):
144 |         ret_zones = list()
145 |         try:
146 |             nss = dns.resolver.resolve(self.domain, 'NS')
147 |             nameservers = [str(ns) for ns in nss]
148 |             ns_addr = dns.resolver.resolve(nameservers[0], 'A')
149 |             # dnspython 的 bug，需要设置 lifetime 参数
150 |             zones = dns.zone.from_xfr(dns.query.xfr(ns_addr, self.domain, relativize=False, timeout=2, lifetime=2),
151 |                                       check_origin=False)
152 |             names = zones.nodes.keys()
153 |             for n in names:
154 |                 subdomain = ''
155 |                 for t in range(0, len(n) - 1):
156 |                     if subdomain != '':
157 |                         subdomain += '.'
158 |                     subdomain += str(n[t].decode())
159 |                 if subdomain != self.domain:
160 |                     ret_zones.append(subdomain)
161 |             return ret_zones
162 |         except BaseException:
163 |             return []
164 | 
165 | 
166 | class CAInfo(object):
167 |     def __init__(self, domain):
168 |         self.domain = domain
169 | 
170 |     def dns_resolve(self):
171 |         padding_domain = 'www.' + self.domain
172 |         # loop = asyncio.get_event_loop()
173 |         loop = asyncio.new_event_loop()
174 |         asyncio.set_event_loop(loop)
175 |         resolver = aiodns.DNSResolver(loop=loop)
176 |         f = resolver.query(padding_domain, 'A')
177 |         result = loop.run_until_complete(f)
178 |         return result[0].host
179 | 
180 |     def get_cert_info_by_ip(self, ip):
181 |         s = socket.socket()
182 |         s.settimeout(2)
183 |         base_dir = os.path.dirname(os.path.abspath(__file__))
184 |         cert_path = base_dir + '/cacert.pem'
185 |         connect = ssl.wrap_socket(s, cert_reqs=ssl.CERT_REQUIRED, ca_certs=cert_path)
186 |         connect.settimeout(2)
187 |         connect.connect((ip, 443))
188 |         cert_data = connect.getpeercert().get('subjectAltName')
189 |         return cert_data
190 | 
191 |     def get_ca_domain_info(self):
192 |         domain_list = list()
193 |         try:
194 |             ip = self.dns_resolve()
195 |             cert_data = self.get_cert_info_by_ip(ip)
196 |         except Exception as e:
197 |             return domain_list
198 | 
199 |         for domain_info in cert_data:
200 |             hostname = domain_info[1]
201 |             if not hostname.startswith('*') and hostname.endswith(self.domain):
202 |                 domain_list.append(hostname)
203 | 
204 |         return domain_list
205 | 
206 |     def get_subdomains(self):
207 |         subs = list()
208 |         subdomain_list = self.get_ca_domain_info()
209 |         for sub in subdomain_list:
210 |             subs.append(sub[:len(sub) - len(self.domain) - 1])
211 |         return subs
212 | 
213 | 
214 | class EnumSubDomain(object):
215 |     def __init__(self, domain, response_filter=None, dns_servers=None, skip_rsc=False, debug=False,
216 |                  split=None, proxy={}, multiresolve=False):
217 |         self.project_directory = os.path.abspath(os.path.dirname(__file__))
218 |         logger.info('Version: {v}'.format(v=__version__))
219 |         logger.info('----------')
220 |         logger.info('Start domain: {d}'.format(d=domain))
221 |         self.proxy = proxy
222 |         self.data = {}
223 |         self.domain = domain
224 |         self.skip_rsc = skip_rsc
225 |         self.split = split
226 |         self.multiresolve = multiresolve
227 |         self.stable_dns_servers = ['119.29.29.29']
228 |         if dns_servers is None:
229 |             # 除了DNSPod外，其它的都不适合作为稳定的DNS
230 |             # 要么并发会显著下降，要么就完全不能用
231 |             dns_servers = [
232 |                 # DNS对结果准确性影响非常大，部分DNS结果会和其它DNS结果不一致甚至没结果
233 |                 # '223.5.5.5',  # AliDNS
234 |                 # '114.114.114.114',  # 114DNS
235 |                 # '1.1.1.1',  # Cloudflare
236 |                 '119.29.29.29',  # DNSPod https://www.dnspod.cn/products/public.dns
237 |                 # '180.76.76.76',  # BaiduDNS
238 |                 # '1.2.4.8',  # sDNS
239 |                 # '11.1.1.1'  # test DNS, not available
240 |                 # '8.8.8.8', # Google DNS, 延时太高了
241 |             ]
242 | 
243 |         random.shuffle(dns_servers)
244 |         self.dns_servers = dns_servers
245 |         self.resolver = None
246 |         self.loop = asyncio.get_event_loop()
247 |         self.general_dicts = []
248 |         # Mark whether the current domain name is a pan-resolved domain name
249 |         self.is_wildcard_domain = False
250 |         # Use a nonexistent domain name to determine whether
251 |         # there is a pan-resolve based on the DNS resolution result
252 |         self.wildcard_sub = 'feei-esd-{random}'.format(random=random.randint(0, 9999))
253 |         self.wildcard_sub3 = 'feei-esd-{random}.{random}'.format(random=random.randint(0, 9999))
254 |         # There is no domain name DNS resolution IP
255 |         self.wildcard_ips = []
256 |         # No domain name response HTML
257 |         self.wildcard_html = None
258 |         self.wildcard_html_len = 0
259 |         self.wildcard_html3 = None
260 |         self.wildcard_html3_len = 0
261 |         # Subdomains that are consistent with IPs that do not have domain names
262 |         self.wildcard_subs = []
263 |         # Wildcard domains use RSC
264 |         self.wildcard_domains = {}
265 |         # Corotines count
266 |         self.coroutine_count = None
267 |         # 并发太高DNS Server的错误会大幅增加
268 |         self.coroutine_count_dns = 1000
269 |         self.coroutine_count_request = 100
270 |         # dnsaio resolve timeout
271 |         self.resolve_timeout = 3
272 |         # RSC ratio
273 |         self.rsc_ratio = 0.8
274 |         self.remainder = 0
275 |         self.count = 0
276 |         # Request Header
277 |         self.request_headers = {
278 |             'Connection': 'keep-alive',
279 |             'Pragma': 'no-cache',
280 |             'Cache-Control': 'no-cache',
281 |             'Upgrade-Insecure-Requests': '1',
282 |             'User-Agent': 'Baiduspider',
283 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
284 |             'DNT': '1',
285 |             'Referer': 'http://www.baidu.com/',
286 |             'Accept-Encoding': 'gzip, deflate',
287 |             'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'}
288 |         # Filter the domain's response(regex)
289 |         self.response_filter = response_filter
290 |         # debug mode
291 |         self.debug = debug
292 |         if self.debug:
293 |             logger.setLevel(logging.DEBUG)
294 |         # collect redirecting domains and response domains
295 |         self.domains_rs = []
296 |         self.domains_rs_processed = []
297 |         self.dns_query_errors = 0
298 | 
299 |     def generate_general_dicts(self, line):
300 |         """
301 |         Generate general subdomains dicts
302 |         :param line:
303 |         :return:
304 |         """
305 |         letter_count = line.count('{letter}')
306 |         number_count = line.count('{number}')
307 |         letters = itertools.product(string.ascii_lowercase, repeat=letter_count)
308 |         letters = [''.join(l) for l in letters]
309 |         numbers = itertools.product(string.digits, repeat=number_count)
310 |         numbers = [''.join(n) for n in numbers]
311 |         for l in letters:
312 |             iter_line = line.replace('{letter}' * letter_count, l)
313 |             self.general_dicts.append(iter_line)
314 |         number_dicts = []
315 |         for gd in self.general_dicts:
316 |             for n in numbers:
317 |                 iter_line = gd.replace('{number}' * number_count, n)
318 |                 number_dicts.append(iter_line)
319 |         if len(number_dicts) > 0:
320 |             return number_dicts
321 |         else:
322 |             return self.general_dicts
323 | 
324 |     def load_sub_domain_dict(self):
325 |         """
326 |         Load subdomains from files and dicts
327 |         :return:
328 |         """
329 |         dicts = []
330 |         if self.debug:
331 |             path = '{pd}/subs-test.esd'.format(pd=self.project_directory)
332 |         else:
333 |             path = '{pd}/subs.esd'.format(pd=self.project_directory)
334 |         with open(path, encoding='utf-8') as f:
335 |             for line in f:
336 |                 line = line.strip().lower()
337 |                 # skip comments and space
338 |                 if '#' in line or line == '':
339 |                     continue
340 |                 if '{letter}' in line or '{number}' in line:
341 |                     self.general_dicts = []
342 |                     dicts_general = self.generate_general_dicts(line)
343 |                     dicts += dicts_general
344 |                 else:
345 |                     # compatibility other dicts
346 |                     line = line.strip('.')
347 |                     dicts.append(line)
348 |         dicts = list(set(dicts))
349 | 
350 |         # split dict
351 |         if self.split is not None:
352 |             s = self.split.split('/')
353 |             dicts_choose = int(s[0])
354 |             dicts_count = int(s[1])
355 |             dicts_every = int(math.ceil(len(dicts) / dicts_count))
356 |             dicts = [dicts[i:i + dicts_every] for i in range(0, len(dicts), dicts_every)][dicts_choose - 1]
357 |             logger.info(
358 |                 'Sub domain dict split {count} and get {choose}st'.format(count=dicts_count, choose=dicts_choose))
359 | 
360 |         # root domain
361 |         dicts.append('@')
362 | 
363 |         return dicts
364 | 
365 |     async def query(self, sub):
366 |         """
367 |         Query domain
368 |         :param sub:
369 |         :return:
370 |         """
371 |         ret = None
372 |         # root domain
373 |         if sub == '@' or sub == '':
374 |             sub_domain = self.domain
375 |         else:
376 |             sub = ''.join(sub.rsplit(self.domain, 1)).rstrip('.')
377 |             sub_domain = '{sub}.{domain}'.format(sub=sub, domain=self.domain)
378 |         # 如果存在特定异常则进行重试
379 |         for i in range(4):
380 |             try:
381 |                 ret = await self.resolver.query(sub_domain, 'A')
382 |             except aiodns.error.DNSError as e:
383 |                 err_code, err_msg = e.args[0], e.args[1]
384 |                 # 域名确实不存在
385 |                 # 4:  Domain name not found
386 |                 # 1:  DNS server returned answer with no data
387 |                 # 其它情况都需要重试，否则存在很高的遗漏
388 |                 # 11: Could not contact DNS servers
389 |                 # 12: Timeout while contacting DNS servers
390 |                 if err_code not in [1, 4]:
391 |                     if i == 2:
392 |                         logger.warning(f'Try {i + 1} times, but failed. {sub_domain} {e}')
393 |                         self.dns_query_errors = self.dns_query_errors + 1
394 |                     continue
395 |             except Exception as e:
396 |                 logger.info(sub_domain)
397 |                 logger.warning(traceback.format_exc())
398 |             else:
399 |                 ret = [r.host for r in ret]
400 |                 domain_ips = [s for s in ret]
401 |                 # It is a wildcard domain name and
402 |                 # the subdomain IP that is burst is consistent with the IP
403 |                 # that does not exist in the domain name resolution,
404 |                 # the response similarity is discarded for further processing.
405 |                 if self.is_wildcard_domain and (
406 |                         sorted(self.wildcard_ips) == sorted(domain_ips) or set(domain_ips).issubset(
407 |                     self.wildcard_ips)):
408 |                     if self.skip_rsc:
409 |                         logger.debug(
410 |                             '{sub} maybe wildcard subdomain, but it is --skip-rsc mode now, it will be drop this subdomain in results'.format(
411 |                                 sub=sub_domain))
412 |                     else:
413 |                         logger.debug(
414 |                             '{r} maybe wildcard domain, continue RSC {sub}'.format(r=self.remainder, sub=sub_domain,
415 |                                                                                    ips=domain_ips))
416 |                 else:
417 |                     if sub != self.wildcard_sub:
418 |                         self.data[sub_domain] = sorted(domain_ips)
419 |                         print('', end='\n')
420 |                         self.count += 1
421 |                         logger.info('{r} {sub} {ips}'.format(r=self.remainder, sub=sub_domain, ips=domain_ips))
422 |             break
423 |         self.remainder += -1
424 |         return sub_domain, ret
425 | 
426 |     @staticmethod
427 |     def limited_concurrency_coroutines(coros, limit):
428 |         futures = [
429 |             asyncio.ensure_future(c)
430 |             for c in islice(coros, 0, limit)
431 |         ]
432 | 
433 |         async def first_to_finish():
434 |             while True:
435 |                 await asyncio.sleep(0)
436 |                 for f in futures:
437 |                     if f.done():
438 |                         futures.remove(f)
439 |                         try:
440 |                             nf = next(coros)
441 |                             futures.append(asyncio.ensure_future(nf))
442 |                         except StopIteration:
443 |                             pass
444 |                         return f.result()
445 | 
446 |         while len(futures) > 0:
447 |             yield first_to_finish()
448 | 
449 |     async def start(self, tasks, tasks_num):
450 |         """
451 |         Limit the number of coroutines for reduce memory footprint
452 |         :param tasks:
453 |         :return:
454 |         """
455 |         for res in tqdm(self.limited_concurrency_coroutines(tasks, self.coroutine_count),
456 |                         bar_format="%s{l_bar}%s{bar}%s{r_bar}%s" % (Fore.YELLOW, Fore.YELLOW, Fore.YELLOW, Fore.RESET),
457 |                         total=tasks_num):
458 |             await res
459 | 
460 |     @staticmethod
461 |     def data_clean(data):
462 |         try:
463 |             html = re.sub(r'\s', '', data)
464 |             html = re.sub(r'<script(?!.*?src=).*?>.*?</script>', '', html)
465 |             return html
466 |         except BaseException:
467 |             return data
468 | 
469 |     @staticmethod
470 |     @backoff.on_exception(backoff.expo, TimeoutError, max_tries=3)
471 |     async def fetch(session, url):
472 |         """
473 |         Fetch url response with session
474 |         :param session:
475 |         :param url:
476 |         :return:
477 |         """
478 |         try:
479 |             async with async_timeout.timeout(20):
480 |                 async with session.get(url) as response:
481 |                     return await response.text(), response.history
482 |         except Exception as e:
483 |             # TODO 当在随机DNS场景中只做响应相似度比对的话，如果域名没有Web服务会导致相似度比对失败从而丢弃
484 |             logger.warning('fetch exception: {e} {u}'.format(e=type(e).__name__, u=url))
485 |             return None, None
486 | 
487 |     async def similarity(self, sub):
488 |         """
489 |         Enumerate subdomains by responding to similarities
490 |         :param sub:
491 |         :return:
492 |         """
493 |         # root domain
494 |         if sub == '@' or sub == '':
495 |             sub_domain = self.domain
496 |         else:
497 |             sub = ''.join(sub.rsplit(self.domain, 1)).rstrip('.')
498 |             sub_domain = '{sub}.{domain}'.format(sub=sub, domain=self.domain)
499 | 
500 |         if sub_domain in self.domains_rs:
501 |             self.domains_rs.remove(sub_domain)
502 |         full_domain = 'http://{sub_domain}'.format(sub_domain=sub_domain)
503 |         # 如果跳转中的域名是以下情况则不加入下一轮RSC
504 |         skip_domain_with_history = [
505 |             # 跳到主域名了
506 |             '{domain}'.format(domain=self.domain),
507 |             'www.{domain}'.format(domain=self.domain),
508 |             # 跳到自己本身了，比如HTTP跳HTTPS
509 |             '{domain}'.format(domain=sub_domain),
510 |         ]
511 |         try:
512 |             regex_domain = r"((?!\/)(?:(?:[a-z\d-]*\.)+{d}))".format(d=self.domain)
513 |             resolver = AsyncResolver(nameservers=self.dns_servers)
514 |             conn = aiohttp.TCPConnector(resolver=resolver)
515 |             async with aiohttp.ClientSession(connector=conn, headers=self.request_headers) as session:
516 |                 html, history = await self.fetch(session, full_domain)
517 |                 html = self.data_clean(html)
518 |                 if history is not None and len(history) > 0:
519 |                     location = str(history[-1].headers['location'])
520 |                     if '.' in location:
521 |                         location_split = location.split('/')
522 |                         if len(location_split) > 2:
523 |                             location = location_split[2]
524 |                         else:
525 |                             location = location
526 |                         try:
527 |                             location = re.match(regex_domain, location).group(0)
528 |                         except AttributeError:
529 |                             location = location
530 |                         status = history[-1].status
531 |                         if location in skip_domain_with_history and len(history) >= 2:
532 |                             logger.debug('domain in skip: {s} {r} {l}'.format(s=sub_domain, r=status, l=location))
533 |                             return
534 |                         else:
535 |                             # cnsuning.com suning.com
536 |                             if location[-len(self.domain) - 1:] == '.{d}'.format(d=self.domain):
537 |                                 # collect redirecting's domains
538 |                                 if sub_domain != location and location not in self.domains_rs and location not in self.domains_rs_processed:
539 |                                     print('', end='\n')
540 |                                     logger.info(
541 |                                         '[{sd}] add redirect domain: {l}({len})'.format(sd=sub_domain, l=location,
542 |                                                                                         len=len(self.domains_rs)))
543 |                                     self.domains_rs.append(location)
544 |                                     self.domains_rs_processed.append(location)
545 |                             else:
546 |                                 print('', end='\n')
547 |                                 logger.info('not same domain: {l}'.format(l=location))
548 |                     else:
549 |                         print('', end='\n')
550 |                         logger.info('not domain(maybe path): {l}'.format(l=location))
551 |                 if html is None:
552 |                     print('', end='\n')
553 |                     logger.warning('domain\'s html is none: {s}'.format(s=sub_domain))
554 |                     return
555 |                 # collect response html's domains
556 |                 response_domains = re.findall(regex_domain, html)
557 |                 response_domains = list(set(response_domains) - set([sub_domain]))
558 |                 for rd in response_domains:
559 |                     rd = rd.strip().strip('.')
560 |                     if rd.count('.') >= sub_domain.count('.') and rd[-len(sub_domain):] == sub_domain:
561 |                         continue
562 |                     if rd not in self.domains_rs:
563 |                         if rd not in self.domains_rs_processed:
564 |                             print('', end='\n')
565 |                             logger.info('[{sd}] add response domain: {s}({l})'.format(sd=sub_domain, s=rd,
566 |                                                                                       l=len(self.domains_rs)))
567 |                             self.domains_rs.append(rd)
568 |                             self.domains_rs_processed.append(rd)
569 | 
570 |                 if len(html) == self.wildcard_html_len:
571 |                     ratio = 1
572 |                 else:
573 |                     # SPEED 4 2 1, but here is still the bottleneck
574 |                     # real_quick_ratio() > quick_ratio() > ratio()
575 |                     # TODO bottleneck
576 |                     if sub.count('.') == 0:  # secondary sub, ex: www
577 |                         ratio = SequenceMatcher(None, html, self.wildcard_html).real_quick_ratio()
578 |                         ratio = round(ratio, 3)
579 |                     else:  # tertiary sub, ex: home.dev
580 |                         ratio = SequenceMatcher(None, html, self.wildcard_html3).real_quick_ratio()
581 |                         ratio = round(ratio, 3)
582 |                 self.remainder += -1
583 |                 if ratio > self.rsc_ratio:
584 |                     # passed
585 |                     logger.debug(
586 |                         '{r} RSC ratio: {ratio} (passed) {sub}'.format(r=self.remainder, sub=sub_domain, ratio=ratio))
587 |                 else:
588 |                     # added
589 |                     # for def distinct func
590 |                     # self.wildcard_domains[sub_domain] = html
591 |                     if self.response_filter is not None:
592 |                         for resp_filter in self.response_filter.split(','):
593 |                             if resp_filter in html:
594 |                                 logger.debug('{r} RSC filter in response (passed) {sub}'.format(r=self.remainder,
595 |                                                                                                 sub=sub_domain))
596 |                                 return
597 |                             else:
598 |                                 continue
599 |                         self.data[sub_domain] = self.wildcard_ips
600 |                     else:
601 |                         self.data[sub_domain] = self.wildcard_ips
602 |                     print('', end='\n')
603 |                     logger.info(
604 |                         '{r} RSC ratio: {ratio} (added) {sub}'.format(r=self.remainder, sub=sub_domain, ratio=ratio))
605 |         except Exception as e:
606 |             logger.debug(traceback.format_exc())
607 |             return
608 | 
609 |     def distinct(self):
610 |         for domain, html in self.wildcard_domains.items():
611 |             for domain2, html2 in self.wildcard_domains.items():
612 |                 ratio = SequenceMatcher(None, html, html2).real_quick_ratio()
613 |                 if ratio > self.rsc_ratio:
614 |                     # remove this domain
615 |                     if domain2 in self.data:
616 |                         del self.data[domain2]
617 |                     m = 'Remove'
618 |                 else:
619 |                     m = 'Stay'
620 |                 logger.info('{d} : {d2} {ratio} {m}'.format(d=domain, d2=domain2, ratio=ratio, m=m))
621 | 
622 |     def check(self, dns):
623 |         logger.info("Checking if DNS server {dns} is available".format(dns=dns))
624 |         msg = b'\x5c\x6d\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x03www\x05baidu\x03com\x00\x00\x01\x00\x01'
625 |         sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
626 |         sock.settimeout(3)
627 |         repeat = {
628 |             1: 'first',
629 |             2: 'second',
630 |             3: 'third'
631 |         }
632 |         for i in range(3):
633 |             logger.info("Sending message to DNS server a {times} time".format(times=repeat[i + 1]))
634 |             sock.sendto(msg, (dns, 53))
635 |             try:
636 |                 sock.recv(4096)
637 |                 break
638 |             except socket.timeout as e:
639 |                 logger.warning('check dns server timeout Failed!')
640 |             if i == 2:
641 |                 return False
642 |         return True
643 | 
644 |     def run(self):
645 |         """
646 |         Run
647 |         :return:
648 |         """
649 |         start_time = time.time()
650 |         subs = self.load_sub_domain_dict()
651 |         logger.info('Sub domain dict count: {c}'.format(c=len(subs)))
652 |         logger.info('Generate coroutines...')
653 |         # Verify that all DNS server results are consistent
654 |         stable_dns = []
655 |         wildcard_ips = None
656 |         last_dns = []
657 |         only_similarity = False
658 |         for dns in self.dns_servers:
659 |             delay = self.check(dns)
660 |             if not delay:
661 |                 logger.warning("@{dns} is not available, skip this DNS server".format(dns=dns))
662 |                 continue
663 |             self.resolver = aiodns.DNSResolver(loop=self.loop, nameservers=[dns], timeout=self.resolve_timeout)
664 |             job = self.query(self.wildcard_sub)
665 |             sub, ret = self.loop.run_until_complete(job)
666 |             logger.info('@{dns} {sub} {ips}'.format(dns=dns, sub=sub, ips=ret))
667 |             if ret is None:
668 |                 ret = None
669 |             else:
670 |                 ret = sorted(ret)
671 | 
672 |             if dns in self.stable_dns_servers:
673 |                 wildcard_ips = ret
674 |             stable_dns.append(ret)
675 | 
676 |             if ret:
677 |                 equal = [False for r in ret if r not in last_dns]
678 |                 if len(last_dns) != 0 and False in equal:
679 |                     only_similarity = self.is_wildcard_domain = True
680 |                     logger.info('Is a random resolve subdomain.')
681 |                     break
682 |                 else:
683 |                     last_dns = ret
684 | 
685 |         is_all_stable_dns = stable_dns.count(stable_dns[0]) == len(stable_dns)
686 |         if not is_all_stable_dns:
687 |             logger.info('Is all stable dns: NO, use the default dns server')
688 |             self.resolver = aiodns.DNSResolver(loop=self.loop, nameservers=self.stable_dns_servers,
689 |                                                timeout=self.resolve_timeout)
690 |         # Wildcard domain
691 |         is_wildcard_domain = not (stable_dns.count(None) == len(stable_dns))
692 |         if is_wildcard_domain or self.is_wildcard_domain:
693 |             if not self.skip_rsc:
694 |                 logger.info('This is a wildcard domain, will enumeration subdomains use by DNS+RSC.')
695 |             else:
696 |                 logger.info(
697 |                     'This is a wildcard domain, but it is --skip-rsc mode now, it will be drop all random resolve subdomains in results')
698 |             self.is_wildcard_domain = True
699 |             if wildcard_ips is not None:
700 |                 self.wildcard_ips = wildcard_ips
701 |             else:
702 |                 self.wildcard_ips = stable_dns[0]
703 |             logger.info('Wildcard IPS: {ips}'.format(ips=self.wildcard_ips))
704 |             if not self.skip_rsc:
705 |                 try:
706 |                     self.wildcard_html = requests.get(
707 |                         'http://{w_sub}.{domain}'.format(w_sub=self.wildcard_sub, domain=self.domain),
708 |                         headers=self.request_headers, timeout=10, verify=False).text
709 |                     self.wildcard_html = self.data_clean(self.wildcard_html)
710 |                     self.wildcard_html_len = len(self.wildcard_html)
711 |                     self.wildcard_html3 = requests.get(
712 |                         'http://{w_sub}.{domain}'.format(w_sub=self.wildcard_sub3, domain=self.domain),
713 |                         headers=self.request_headers, timeout=10, verify=False).text
714 |                     self.wildcard_html3 = self.data_clean(self.wildcard_html3)
715 |                     self.wildcard_html3_len = len(self.wildcard_html3)
716 |                     logger.info(
717 |                         'Wildcard domain response html length: {len} 3length: {len2}'.format(len=self.wildcard_html_len,
718 |                                                                                              len2=self.wildcard_html3_len))
719 |                 except requests.exceptions.SSLError:
720 |                     logger.warning('SSL Certificate Error!')
721 |                 except requests.exceptions.ConnectTimeout:
722 |                     logger.warning('Request response content failed, check network please!')
723 |                 except requests.exceptions.ReadTimeout:
724 |                     self.wildcard_html = self.wildcard_html3 = ''
725 |                     self.wildcard_html_len = self.wildcard_html3_len = 0
726 |                     logger.warning(
727 |                         'Request response content timeout, {w_sub}.{domain} and {w_sub3}.{domain} maybe not a http service, content will be set to blank!'.format(
728 |                             w_sub=self.wildcard_sub,
729 |                             domain=self.domain,
730 |                             w_sub3=self.wildcard_sub3))
731 |                 except requests.exceptions.ConnectionError:
732 |                     logger.error('ESD can\'t get the response text so the rsc will be skipped. ')
733 |                     self.skip_rsc = True
734 |         else:
735 |             logger.info('Not a wildcard domain')
736 | 
737 |         if not only_similarity:
738 |             self.coroutine_count = self.coroutine_count_dns
739 |             tasks = (self.query(sub) for sub in subs)
740 |             self.loop.run_until_complete(self.start(tasks, len(subs)))
741 |             logger.info("Brute Force subdomain count: {total}".format(total=self.count))
742 |         dns_time = time.time()
743 |         time_consume_dns = int(dns_time - start_time)
744 |         logger.info(f'DNS query errors: {self.dns_query_errors}')
745 | 
746 |         # CA subdomain info
747 |         ca_subdomains = []
748 |         logger.info('Collect subdomains in CA...')
749 |         ca_subdomains = CAInfo(self.domain).get_subdomains()
750 |         if len(ca_subdomains):
751 |             tasks = (self.query(sub) for sub in ca_subdomains)
752 |             self.loop.run_until_complete(self.start(tasks, len(ca_subdomains)))
753 |         logger.info('CA subdomain count: {c}'.format(c=len(ca_subdomains)))
754 | 
755 |         # DNS Transfer Vulnerability
756 |         transfer_info = []
757 |         logger.info('Check DNS Transfer Vulnerability in {domain}'.format(domain=self.domain))
758 |         transfer_info = DNSTransfer(self.domain).transfer_info()
759 |         if len(transfer_info):
760 |             logger.warning('DNS Transfer Vulnerability found in {domain}!'.format(domain=self.domain))
761 |             tasks = (self.query(sub) for sub in transfer_info)
762 |             self.loop.run_until_complete(self.start(tasks, len(transfer_info)))
763 |         logger.info('DNS Transfer subdomain count: {c}'.format(c=len(transfer_info)))
764 | 
765 |         total_subs = set(subs + transfer_info + ca_subdomains)
766 | 
767 |         # Use TXT,SOA,MX,AAAA record to find sub domains
768 |         if self.multiresolve:
769 |             logger.info('Enumerating subdomains with TXT, SOA, MX, AAAA record...')
770 |             dnsquery = DNSQuery(self.domain, total_subs, self.domain)
771 |             record_info = dnsquery.dns_query()
772 |             tasks = (self.query(record[:record.find('.')]) for record in record_info)
773 |             self.loop.run_until_complete(self.start(tasks, len(record_info)))
774 |             logger.info('DNS record subdomain count: {c}'.format(c=len(record_info)))
775 | 
776 |         if self.is_wildcard_domain and not self.skip_rsc:
777 |             # Response similarity comparison
778 |             total_subs = set(subs + transfer_info + ca_subdomains)
779 |             self.wildcard_subs = list(set(subs).union(total_subs))
780 |             logger.info('Enumerates {len} sub domains by DNS mode in {tcd}.'.format(len=len(self.data), tcd=str(
781 |                 datetime.timedelta(seconds=time_consume_dns))))
782 |             logger.info(
783 |                 'Will continue to test the distinct({len_subs}-{len_exist})={len_remain} domains used by RSC, the speed will be affected.'.format(
784 |                     len_subs=len(subs), len_exist=len(self.data),
785 |                     len_remain=len(self.wildcard_subs)))
786 |             self.coroutine_count = self.coroutine_count_request
787 |             self.remainder = len(self.wildcard_subs)
788 |             tasks = (self.similarity(sub) for sub in self.wildcard_subs)
789 |             self.loop.run_until_complete(self.start(tasks, len(self.wildcard_subs)))
790 | 
791 |             # Distinct last domains use RSC
792 |             # Maybe misinformation
793 |             # self.distinct()
794 | 
795 |             time_consume_request = int(time.time() - dns_time)
796 |             logger.info('Requests time consume {tcr}'.format(tcr=str(datetime.timedelta(seconds=time_consume_request))))
797 |         # RS(redirect/response) domains
798 |         while len(self.domains_rs) != 0:
799 |             logger.info('RS(redirect/response) domains({l})...'.format(l=len(self.domains_rs)))
800 |             tasks = (self.similarity(''.join(domain.rsplit(self.domain, 1)).rstrip('.')) for domain in self.domains_rs)
801 | 
802 |             self.loop.run_until_complete(self.start(tasks, len(self.domains_rs)))
803 | 
804 |         # write output
805 |         # tmp_dir = '/tmp/esd'
806 |         # if not os.path.isdir(tmp_dir):
807 |         #     os.mkdir(tmp_dir, 0o777)
808 |         # output_path_with_time = '{td}/.{domain}_{time}.esd'.format(td=tmp_dir, domain=self.domain,
809 |         #                                                            time=datetime.datetime.now().strftime(
810 |         #                                                                "%Y-%m_%d_%H-%M"))
811 |         # output_path = '{td}/.{domain}.esd'.format(td=tmp_dir, domain=self.domain)
812 |         # if len(self.data):
813 |         #     max_domain_len = max(map(len, self.data)) + 2
814 |         # else:
815 |         #     max_domain_len = 2
816 |         # output_format = '%-{0}s%-s\n'.format(max_domain_len)
817 |         # with open(output_path_with_time, 'w') as opt, open(output_path, 'w') as op:
818 |         #     for domain, ips in self.data.items():
819 |         #         # The format is consistent with other scanners to ensure that they are
820 |         #         # invoked at the same time without increasing the cost of
821 |         #         # resolution
822 |         #         if ips is None or len(ips) == 0:
823 |         #             ips_split = ''
824 |         #         else:
825 |         #             ips_split = ','.join(ips)
826 |         #         con = output_format % (domain, ips_split)
827 |         #         op.write(con)
828 |         #         opt.write(con)
829 | 
830 |         # 自修改
831 |         subdomains = []
832 |         for domain, ips in self.data.items():
833 |             # The format is consistent with other scanners to ensure that they are
834 |             # invoked at the same time without increasing the cost of
835 |             # resolution
836 |             if ips is None or len(ips) == 0:
837 |                 ips_split = ''
838 |             else:
839 |                 ips_split = ','.join(ips)
840 |             subdomains += [(domain, ips_split)]
841 | 
842 | 
843 |         # logger.info('Output: {op}'.format(op=output_path))
844 |         # logger.info('Output with time: {op}'.format(op=output_path_with_time))
845 |         logger.info('Total domain: {td}'.format(td=len(self.data)))
846 |         time_consume = int(time.time() - start_time)
847 |         logger.info('Time consume: {tc}'.format(tc=str(datetime.timedelta(seconds=time_consume))))
848 |         return subdomains
849 | 
850 | 
851 | def banner():
852 |     print("""\033[94m
853 |      ______    _____   _____  
854 |     |  ____|  / ____| |  __ \ 
855 |     | |__    | (___   | |  | |
856 |     |  __|    \___ \  | |  | |
857 |     | |____   ____) | | |__| |
858 |     |______| |_____/  |_____/\033[0m\033[93m
859 |     Enumeration Sub Domains v%s\033[92m
860 |     """ % __version__)
861 | 
862 | 
863 | def main():
864 |     banner()
865 |     parser = OptionParser(
866 |         'Usage: esd -d feei.cn -F response_filter -p user:pass@host:port')
867 |     parser.add_option('-d', '--domain', dest='domains', help='The domains that you want to enumerate')
868 |     parser.add_option('-f', '--file', dest='input', help='Import domains from this file')
869 |     parser.add_option('-F', '--filter', dest='filter', help='Response filter')
870 |     parser.add_option('-s', '--skip-rsc', dest='skiprsc', help='Skip response similary compare', action='store_true',
871 |                       default=False)
872 |     parser.add_option('-S', '--split', dest='split', help='Split the dict into several parts', default='1/1')
873 |     parser.add_option('-p', '--proxy', dest='proxy', help='Use socks5 proxy to access Google and Yahoo')
874 |     parser.add_option('-m', '--multi-resolve', dest='multiresolve',
875 |                       help='Use TXT, AAAA, MX, SOA record to find subdomains', action='store_true', default=False)
876 |     (options, args) = parser.parse_args()
877 | 
878 |     domains = []
879 |     response_filter = options.filter
880 |     skip_rsc = options.skiprsc
881 |     split_list = options.split.split('/')
882 |     split = options.split
883 |     multiresolve = options.multiresolve
884 | 
885 |     try:
886 |         if len(split_list) != 2 or int(split_list[0]) > int(split_list[1]):
887 |             logger.error('Invaild split parameter,can not split the dict')
888 |             split = None
889 |     except:
890 |         logger.error('Split validation failed: {d}'.format(d=split_list))
891 |         exit(0)
892 | 
893 |     if options.proxy:
894 |         proxy = {
895 |             'http': 'socks5h://%s' % options.proxy,
896 |             'https': 'socks5h://%s' % options.proxy
897 |         }
898 |     else:
899 |         proxy = {}
900 | 
901 |     if options.domains is not None:
902 |         for p in options.domains.split(','):
903 |             p = p.strip().lower()
904 |             re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,})$', p)
905 |             if len(re_domain) > 0 and re_domain[0][0] == p:
906 |                 domains.append(p.strip())
907 |             else:
908 |                 logger.error('Domain validation failed: {d}'.format(d=p))
909 |     elif options.input and os.path.isfile(options.input):
910 |         with open(options.input) as fh:
911 |             for line_domain in fh:
912 |                 line_domain = line_domain.strip().lower()
913 |                 re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,})$', line_domain)
914 |                 if len(re_domain) > 0 and re_domain[0][0] == line_domain:
915 |                     domains.append(line_domain)
916 |                 else:
917 |                     logger.error('Domain validation failed: {d}'.format(d=line_domain))
918 |     else:
919 |         logger.error('Please input vaild parameter. ie: "esd -d feei.cn" or "esd -f /Users/root/domains.txt"')
920 | 
921 |     if 'esd' in os.environ:
922 |         debug = os.environ['esd']
923 |     else:
924 |         debug = False
925 |     logger.info('Debug: {d}'.format(d=debug))
926 |     logger.info('--skip-rsc: {rsc}'.format(rsc=skip_rsc))
927 | 
928 |     logger.info('Total target domains: {ttd}'.format(ttd=len(domains)))
929 |     try:
930 |         for d in domains:
931 |             esd = EnumSubDomain(d, response_filter, skip_rsc=skip_rsc, debug=debug, split=split,
932 |                                 proxy=proxy,
933 |                                 multiresolve=multiresolve)
934 |             esd.run()
935 |     except KeyboardInterrupt:
936 |         print('', end='\n')
937 |         logger.info('Bye :)')
938 |         exit(0)
939 | 
940 | 
941 | if __name__ == '__main__':
942 |     main()
943 | 


--------------------------------------------------------------------------------