├── Common
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-38.pyc
│ ├── GetProxies.cpython-38.pyc
│ ├── LogOutput.cpython-38.pyc
│ └── CustomException.cpython-38.pyc
├── CustomException.py
├── ProxyPool
│ ├── DomesticProxyPool.txt
│ └── ForeignProxyPool.txt
├── LogOutput.py
├── Exception结构.md
└── GetProxies.py
├── Plugins
├── __init__.py
├── InfoSearch
│ ├── __init__.py
│ ├── Domain
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ └── domainapi.cpython-38.pyc
│ │ └── domainapi.py
│ ├── Subdomain
│ │ ├── __init__.py
│ │ ├── IsCND
│ │ │ ├── __init__.py
│ │ │ ├── GeoLite2-ASN.mmdb
│ │ │ ├── cdn-domain.conf
│ │ │ └── CheckCDN.py
│ │ ├── Spider
│ │ │ ├── __init__.py
│ │ │ ├── Baidu
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ │ ├── baidu.cpython-38.pyc
│ │ │ │ │ └── __init__.cpython-38.pyc
│ │ │ │ └── baidu.py
│ │ │ ├── Bing
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ │ ├── bing.cpython-38.pyc
│ │ │ │ │ └── __init__.cpython-38.pyc
│ │ │ │ └── bing.py
│ │ │ ├── Google
│ │ │ │ ├── docs
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── index.rst
│ │ │ │ │ ├── Makefile
│ │ │ │ │ ├── make.bat
│ │ │ │ │ └── conf.py
│ │ │ │ ├── googlesearch
│ │ │ │ │ ├── user_agents.txt.gz
│ │ │ │ │ ├── __pycache__
│ │ │ │ │ │ └── __init__.cpython-38.pyc
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── .google-cookie
│ │ │ │ ├── .travis.yml
│ │ │ │ ├── demo.py
│ │ │ │ └── google.py
│ │ │ └── __pycache__
│ │ │ │ └── __init__.cpython-38.pyc
│ │ ├── JsFinder
│ │ │ ├── __init__.py
│ │ │ └── jsfinder.py
│ │ ├── ESD
│ │ │ ├── ESD-0.0.29.dist-info
│ │ │ │ ├── REQUESTED
│ │ │ │ ├── INSTALLER
│ │ │ │ ├── top_level.txt
│ │ │ │ ├── entry_points.txt
│ │ │ │ ├── WHEEL
│ │ │ │ ├── RECORD
│ │ │ │ ├── METADATA
│ │ │ │ └── LICENSE
│ │ │ └── ESD
│ │ │ │ ├── __pycache__
│ │ │ │ └── __init__.cpython-38.pyc
│ │ │ │ ├── key.ini
│ │ │ │ ├── subs-test.esd
│ │ │ │ └── __init__.py
│ │ ├── ThirdPartyPlatform
│ │ │ ├── __init__.py
│ │ │ ├── certificate.py
│ │ │ └── netcraft.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ └── subdomainapi.cpython-38.pyc
│ │ └── subdomainapi.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── infosearchapi.cpython-38.pyc
│ └── infosearchapi.py
├── __pycache__
│ └── __init__.cpython-38.pyc
└── SaveToExcel.py
├── Reports
├── googleSpider.txt
├── tjzj.edu.cn-key-links
└── tjzj.edu.cn-subdomains
├── config.ini
├── README.md
├── requirements.txt
└── search_all.py
/Common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/JsFinder/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/REQUESTED:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ThirdPartyPlatform/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/INSTALLER:
--------------------------------------------------------------------------------
1 | pip
2 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/top_level.txt:
--------------------------------------------------------------------------------
1 | ESD
2 |
--------------------------------------------------------------------------------
/Reports/googleSpider.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Reports/googleSpider.txt
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/entry_points.txt:
--------------------------------------------------------------------------------
1 | [console_scripts]
2 | esd = ESD:main
3 |
4 |
--------------------------------------------------------------------------------
/Common/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Common/__pycache__/GetProxies.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/GetProxies.cpython-38.pyc
--------------------------------------------------------------------------------
/Common/__pycache__/LogOutput.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/LogOutput.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Common/CustomException.py:
--------------------------------------------------------------------------------
1 | '''自定义异常类'''
2 |
3 | class CustomException(Exception):
4 | pass
5 |
6 | class NetworkException(ValueError):
7 |
--------------------------------------------------------------------------------
/Common/__pycache__/CustomException.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Common/__pycache__/CustomException.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/GeoLite2-ASN.mmdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/IsCND/GeoLite2-ASN.mmdb
--------------------------------------------------------------------------------
/Plugins/InfoSearch/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/WHEEL:
--------------------------------------------------------------------------------
1 | Wheel-Version: 1.0
2 | Generator: bdist_wheel (0.36.2)
3 | Root-Is-Purelib: true
4 | Tag: py3-none-any
5 |
6 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/__pycache__/infosearchapi.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/__pycache__/infosearchapi.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Domain/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/__pycache__/domainapi.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Domain/__pycache__/domainapi.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/__pycache__/subdomainapi.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/__pycache__/subdomainapi.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/ESD/ESD/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/bing.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/bing.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/baidu.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/baidu.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/user_agents.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/user_agents.txt.gz
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Baidu/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Bing/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/urdr-gungnir/SearchAll/HEAD/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/config.ini:
--------------------------------------------------------------------------------
1 | [fofa api]
2 | EMAIL = cnno.1@protonmail.com
3 | KEY = 86b1a3ae6a597782a0394041c7d1908c
4 |
5 | [shodan api]
6 | SHODAN_API_KEY =
7 |
8 | [github api]
9 | GITHUB_TOKEN =
10 |
11 | [quake api]
12 | X-QuakeToken =
13 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/key.ini:
--------------------------------------------------------------------------------
1 | [shodan]
2 | shodan_key =
3 |
4 | [fofa]
5 | fofa_key =
6 | fofa_email =
7 |
8 | [zoomeye]
9 | zoomeye_username =
10 | zoomeye_password =
11 |
12 | [censys]
13 | uid =
14 | secret =
15 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/subs-test.esd:
--------------------------------------------------------------------------------
1 | www
2 | h5
3 | wap
4 | _feei
5 | test
6 | dkim._domainkey
7 | _finger._tcp
8 | sso.cn
9 | dmarc.mail2
10 | f2.market
11 | market
12 | mail
13 | bn73
14 | passport
15 | djfowj
16 | video
17 | dkfowejf
18 | cnvhueq
19 | dhfowje
20 | clive
21 | echiu
22 | img
23 | inn
24 | et25
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 经过r师傅教诲,目前觉得这个项目造轮子无意义,已停止开发。
2 |
3 | # SearchAll
4 | 一款外网信息收集集合工具,包括对顶级域名、子域名、c段、敏感信息泄漏等等的收集
5 |
6 |
7 | 目前正在开发,功能还未完善
8 | 设计遇到问题,我会同步到我的博客https://urdr-gungnir.github.io/post/SearchAll%E8%AE%BE%E8%AE%A1%E9%97%AE%E9%A2%98.html
9 | 
10 |
--------------------------------------------------------------------------------
/Common/ProxyPool/DomesticProxyPool.txt:
--------------------------------------------------------------------------------
1 | 121.40.185.42:1080
2 | 123.60.93.108:1080
3 | 42.122.65.112:1080
4 | 114.117.206.230:1080
5 | 171.107.184.247:1080
6 | 123.59.211.39:45554
7 | 121.42.173.167:1080
8 | 39.103.199.2:1080
9 | 124.91.134.216:1080
10 | 123.60.224.72:1080
11 | 222.64.9.47:1080
12 | 115.239.213.75:1080
13 | 111.229.21.197:1080
14 | 59.61.161.66:1080
15 | 45.43.54.177:1080
16 | 39.103.199.2:1080
17 | 101.42.94.199:1080
18 | 113.107.166.125:1080
19 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/.google-cookie:
--------------------------------------------------------------------------------
1 | #LWP-Cookies-2.0
2 | Set-Cookie3: 1P_JAR="2021-10-01-12"; path="/"; domain=".google.com"; path_spec; domain_dot; secure; expires="2021-10-31 12:54:53Z"; version=0
3 | Set-Cookie3: NID="511=JTjC9CxK8SYPY3weJg-Aej42eHSD22u3_xTVIRYcfPecmhPzNd8tmOF42QmDwh3uJFXhZjB8utDi4nhu8xjmk8_Z6UDYKiRycBK-3u4H__Fo-U1toiaFgpQwY3D6rgETpjxChNhKfUaZZiaKmOS9zeC3g4llerzYVbVE3JOQILY"; path="/"; domain=".google.com"; path_spec; domain_dot; expires="2022-04-02 12:54:52Z"; HttpOnly=None; version=0
4 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | # Supported CPython versions:
4 | # https://en.wikipedia.org/wiki/CPython#Version_history
5 | python:
6 | - pypy3
7 | - pypy
8 | - 2.7
9 | - 3.6
10 | - 3.5
11 | - 3.4
12 |
13 | # Use container-based infrastructure
14 | sudo: false
15 |
16 | install:
17 | - pip install pycodestyle pyflakes
18 |
19 | script:
20 | # Static analysis
21 | - pyflakes .
22 | - pycodestyle --statistics --count .
23 |
24 | matrix:
25 | fast_finish: true
26 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiodns==3.0.0
2 | aiohttp==3.7.4.post0
3 | async-timeout==3.0.1
4 | attrs==21.2.0
5 | backoff==1.11.1
6 | certifi==2021.5.30
7 | cffi==1.14.6
8 | chardet==4.0.0
9 | charset-normalizer==2.0.6
10 | colorama==0.4.4
11 | colorlog==6.4.1
12 | dnspython==2.1.0
13 | et-xmlfile==1.1.0
14 | idna==3.2
15 | multidict==5.1.0
16 | openpyxl==3.0.9
17 | pycares==4.0.0
18 | pycparser==2.20
19 | PySocks==1.7.1
20 | requests==2.26.0
21 | termcolor==1.1.0
22 | tqdm==4.62.3
23 | typing-extensions==3.10.0.2
24 | urllib3==1.26.7
25 | wincertstore==0.2
26 | yarl==1.6.3
27 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. googlesearch documentation master file, created by
2 | sphinx-quickstart on Tue Nov 6 12:25:12 2018.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to googlesearch's documentation!
7 | ========================================
8 |
9 | Indices and tables
10 | ==================
11 |
12 | * :ref:`genindex`
13 | * :ref:`modindex`
14 | * :ref:`search`
15 |
16 | Reference
17 | =========
18 |
19 | .. automodule:: googlesearch
20 | :members:
21 |
22 |
--------------------------------------------------------------------------------
/Common/ProxyPool/ForeignProxyPool.txt:
--------------------------------------------------------------------------------
1 | 123.59.120.247:1080
2 | 123.59.120.207:1080
3 | 123.59.120.38:45554
4 | 123.59.120.15:45554
5 | 123.59.211.123:45554
6 | 123.59.211.192:45554
7 | 123.59.120.40:45554
8 | 123.59.211.180:45554
9 | 120.244.127.254:1080
10 | 123.59.211.193:45554
11 | 183.45.77.126:1080
12 | 113.16.158.35:1080
13 | 123.59.120.61:45554
14 | 123.59.211.161:45554
15 | 123.59.120.112:45554
16 | 123.59.120.40:45554
17 | 123.59.211.123:45554
18 | 120.244.127.254:1080
19 | 123.59.211.213:45554
20 | 183.45.77.126:1080
21 | 123.59.120.38:45554
22 | 123.59.120.171:45554
23 | 123.59.211.193:45554
24 | 123.59.120.123:45554
25 |
--------------------------------------------------------------------------------
/Reports/tjzj.edu.cn-key-links:
--------------------------------------------------------------------------------
1 | openportal服务后台 http://weixin.tjzj.edu.cn/
2 | 腾讯企业邮箱-登录入口 http://mail.tjzj.edu.cn/
3 | 同济大学浙江学院大门、公寓楼门口出入系统项目招标信息公告 |... https://www.tjzj.edu.cn/info/16183.html
4 | 浙江警官职业学院2014年招录省属监狱系统人民警察学员公告 | 同济大学浙 ... https://www.tjzj.edu.cn/info/13360.html
5 | 同济大学浙江学院安全态势感知系统和EDR招标信息公告 | 同济大学... https://www.tjzj.edu.cn/index.php/info/16194.html
6 | 同济大学浙江学院结构工程综合加载试验系统项目招标信息公告 |... https://www.tjzj.edu.cn/info/16252.html
7 | 喜报|我校代表队摘获市属教育系统红诗会比赛二等奖 | 同济大学浙江学院 https://www.tjzj.edu.cn/info/19831.html
8 | 吉讯大学生职业测评与规划系统 | 同济大学浙江学院 https://www.tjzj.edu.cn/info/13090.html
9 | 同济大学浙江学院银行系统软件测试仿真实验室招标信息公告 | 同济... https://www.tjzj.edu.cn/info/18640.html
10 |
--------------------------------------------------------------------------------
/Plugins/SaveToExcel.py:
--------------------------------------------------------------------------------
1 |
2 | class saveToExcel:
3 | def __init__(self, excelSavePath, excel, title):
4 | self.excelSavePath = excelSavePath # excel的保存路径
5 | self.excel = excel # openpyxl.Workbook()的实例话
6 | self.sheet = self.excel.create_sheet(title=title) # 创建工作区
7 | self.Sheet_line = 1 # 表格的行
8 |
9 | # def CreatExcel(self):
10 |
11 |
12 | def SaveSpider(self, spiderName, key_links=[], subdomains=[]):
13 |
14 | def SaveKeyLinks():
15 | cprint("*"*20+"存储spider关键词数据"+"*"*20, color="green")
16 |
17 | def SaveSubdomains():
18 | cprint("*"*20+"存储spider子域名数据"+"*"*20, color="green")
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = googlesearch
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/demo.py:
--------------------------------------------------------------------------------
1 | from googlesearch import search
2 | import sys
3 | from sys import version_info
4 |
5 | PY2, PY3 = (True, False) if version_info[0] == 2 else (False, True)
6 |
7 | if PY2:
8 | from urlparse import urlparse
9 | else:
10 | from urllib.parse import urlparse
11 |
12 | key = 'site:hbu.edu.cn 后台'# sys.argv[1]
13 |
14 | urls = []
15 |
16 | for each_result in search(key, stop=4):
17 | parseRet = urlparse(each_result)
18 | print(each_result, parseRet)
19 | url = parseRet.scheme + '://' + parseRet.netloc
20 | if key in parseRet.netloc and url not in urls:
21 | print(url, each_result)
22 | urls.append(url)
23 |
24 | print('search {} Done!'.format(key))
25 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=googlesearch
13 |
14 | if "%1" == "" goto help
15 |
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | echo.
19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | echo.installed, then set the SPHINXBUILD environment variable to point
21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | echo.may add the Sphinx directory to PATH.
23 | echo.
24 | echo.If you don't have Sphinx installed, grab it from
25 | echo.http://sphinx-doc.org/
26 | exit /b 1
27 | )
28 |
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 |
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 |
35 | :end
36 | popd
37 |
--------------------------------------------------------------------------------
/Reports/tjzj.edu.cn-subdomains:
--------------------------------------------------------------------------------
1 | sk.tjzj.edu.cn 122.225.92.218
2 | cxcy.tjzj.edu.cn 122.225.92.218
3 | h5.tjzj.edu.cn 122.225.92.218
4 | tcsf.tjzj.edu.cn 122.225.92.218
5 | kjx.tjzj.edu.cn 122.225.92.218
6 | dfl.tjzj.edu.cn 122.225.92.218
7 | jtx.tjzj.edu.cn 122.225.92.218
8 | message.tjzj.edu.cn 122.225.19.18
9 | tmx.tjzj.edu.cn 122.225.92.218
10 | weixin.tjzj.edu.cn 60.190.149.38
11 | oa.tjzj.edu.cn 192.168.100.16
12 | bwc.tjzj.edu.cn 122.225.60.2
13 | tyb.tjzj.edu.cn 122.225.92.218
14 | www.tjzj.edu.cn 122.225.92.218
15 | old.tjzj.edu.cn 122.225.92.218
16 | lxb.tjzj.edu.cn 122.225.92.218
17 | job.tjzj.edu.cn 121.41.227.53
18 | ns1.tjzj.edu.cn 121.192.40.100
19 | depart.tjzj.edu.cn 122.225.92.218
20 | mail.tjzj.edu.cn 157.255.173.155,61.241.49.119
21 | dem.tjzj.edu.cn 122.225.92.218
22 | daka.tjzj.edu.cn 60.190.149.38
23 | idm.tjzj.edu.cn 122.225.19.22
24 | ei.tjzj.edu.cn 122.225.92.218
25 | cdz.tjzj.edu.cn 122.225.92.218
26 | jxx.tjzj.edu.cn 122.225.92.218
27 | jzh.tjzj.edu.cn 122.225.92.218
28 | english.tjzj.edu.cn 122.225.92.218
29 | ns.tjzj.edu.cn 121.192.40.150
30 | sso.tjzj.edu.cn 122.225.19.22
31 | uc.tjzj.edu.cn 122.225.19.19
32 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/RECORD:
--------------------------------------------------------------------------------
1 | ../../Scripts/esd.exe,sha256=MixWaXi8KM3VLhimXNWmr4Y-fN-z09IqoTNbSDozYhY,106314
2 | ESD-0.0.29.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
3 | ESD-0.0.29.dist-info/LICENSE,sha256=_j7qbFmeI6AMCMX1yyMgwwrcj4aH21_OybeaZixT_2s,35120
4 | ESD-0.0.29.dist-info/METADATA,sha256=QpsUTykinvy5NmoN1Rfy0xEYUwv2csbmV-C_qiUxGDI,4297
5 | ESD-0.0.29.dist-info/RECORD,,
6 | ESD-0.0.29.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7 | ESD-0.0.29.dist-info/WHEEL,sha256=OqRkF0eY5GHssMorFjlbTIq072vpHpF60fIQA6lS9xA,92
8 | ESD-0.0.29.dist-info/entry_points.txt,sha256=jACTsjKk3gdyl9UunxTSx1fgnzxDnZV3Q0omnUqY8Js,34
9 | ESD-0.0.29.dist-info/top_level.txt,sha256=Kl4w48552EuhLMDNoXD8LUUhnEzA-VOfGQHUdAtp2ig,4
10 | ESD/__init__.py,sha256=imNtmEteBLIjgnc4mH7zvwCmP2UIY0QyMKiQm6oUxhI,40162
11 | ESD/__pycache__/__init__.cpython-38.pyc,,
12 | ESD/cacert.pem,sha256=hmlbG-kiXDz4gtKD8FyUTjqrvB32QopEJCaak-mX3GU,209309
13 | ESD/key.ini,sha256=HLCG1J_EYjuFrrhKMRDN7rViJqJt2ZOH1fdJQiZzQCM,129
14 | ESD/subs-test.esd,sha256=KnXxbaeIBrV5U3DMk_JoCC4wWR6agyhSQjkJDgdsXWE,169
15 | ESD/subs.esd,sha256=jiSYlj4rlIZTOdrWaEr-SEiQvqt5qkJ2tRT9NRR46Mw,806655
16 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/google.py:
--------------------------------------------------------------------------------
1 | from Plugins.InfoSearch.Subdomain.Spider.Google.googlesearch import search
2 | from sys import version_info
3 |
4 | PY2, PY3 = (True, False) if version_info[0] == 2 else (False, True)
5 |
6 | if PY2:
7 | from urlparse import urlparse
8 | else:
9 | from urllib.parse import urlparse
10 |
11 | # 谷歌爬虫
12 | class GoogleSpider:
13 | def __init__(self, domain, save_fold_path):
14 | self.domain = domain
15 | # site:domain inurl:admin inurl:login inurl:system 后台 系统
16 | self.wds = ['inurl:admin|login|register|upload|editor', '后台|系统']
17 | # print('Please wait a few time ...')
18 | self.STOP = 50 # 谷歌最多爬取20个结果
19 | self.save_fold_path = save_fold_path # \result\0ca9b508e31f
20 | self.googleSubdomains = []
21 |
22 | def run(self):
23 | for wd in self.wds:
24 | with open('{}/googleSpider.txt'.format(self.save_fold_path), 'at') as f:
25 | key = 'site:*.{} {}'.format(self.domain, wd)
26 | f.writelines('[+] {} :\n'.format(key))
27 | print('\t[+] google search -> [{}]'.format(key))
28 | for each_result in search(key):
29 | f.writelines('{}\n'.format(each_result))
30 | parseRet = urlparse(each_result)
31 | subdomain = parseRet.netloc
32 | if self.domain in subdomain and subdomain not in self.googleSubdomains:
33 | self.googleSubdomains.append(subdomain)
34 |
35 | return self.googleSubdomains
36 |
--------------------------------------------------------------------------------
/Common/LogOutput.py:
--------------------------------------------------------------------------------
1 | '''
2 | log output
3 |
4 | a = LogOutput()
5 | Logger_object = a.SetModuleName("Module_name")
6 | Looger_object.wrong("log_info") / Looger_object.error("log_info") / Looger_object.info("log_info")
7 | '''
8 |
9 | import colorlog
10 | import logging
11 |
12 |
13 | class LogOutput():
14 | _instance = None
15 | def __new__(cls, *args, **kwargs):
16 | if cls._instance is None:
17 | cls._instance = object.__new__(cls, *args, **kwargs)
18 | return cls._instance
19 | def __init__(self):
20 | self.mycolorlog = colorlog
21 | self.handler = self.mycolorlog.StreamHandler()
22 | formatter = self.mycolorlog.ColoredFormatter(
23 | '%(log_color)s[+] %(asctime)s [%(name)s] [%(levelname)s] %(message)s%(reset)s',
24 | datefmt=None,
25 | reset=True,
26 | log_colors={
27 | 'DEBUG': 'cyan',
28 | 'INFO': 'green',
29 | 'WARNING': 'yellow',
30 | 'ERROR': 'red',
31 | 'CRITICAL': 'white,bg_red',
32 | 'Module': 'purple'
33 | },
34 | secondary_log_colors={},
35 | style='%'
36 | )
37 | self.handler.setFormatter(formatter)
38 |
39 |
40 | def SetModuleName(self, module_name=''):
41 | if module_name == '':
42 | self.logger = self.mycolorlog.getLogger('Seach_all')
43 | else:
44 | self.logger = self.mycolorlog.getLogger('Seach_all {}'.format(module_name))
45 | self.logger.addHandler(self.handler)
46 | self.logger.setLevel(self.mycolorlog.INFO)
47 | return self.logger
--------------------------------------------------------------------------------
/Plugins/InfoSearch/infosearchapi.py:
--------------------------------------------------------------------------------
1 | '''
2 | InfoSearchApi
3 |
4 | :return
5 | type : json
6 | '''
7 | import random
8 |
9 | from Common.LogOutput import LogOutput
10 | logger_object = LogOutput()
11 | logger = logger_object.SetModuleName("InfoSearch")
12 | import sys
13 |
14 | class InfoSearchApi():
15 | def __init__(self):
16 | pass
17 | def GetSubdomain_ips(self, domain, proxies):
18 | if domain:
19 | from Plugins.InfoSearch.Subdomain.subdomainapi import SubdomainApi
20 | subdomainobject = SubdomainApi(proxies)
21 | return subdomainobject.Run(domain)
22 | else:
23 | logger.error("Need a target domain!")
24 | sys.exit()
25 |
26 | def GetDomains(self, domain):
27 | if domain:
28 | from Plugins.InfoSearch.Domain.domainapi import DomainApi
29 | domainobject = DomainApi()
30 | return domainobject.run_domain(domain)
31 | else:
32 | logger.error("Need a target domain!")
33 | sys.exit()
34 |
35 | def get_proxy(self):
36 | with open("./Common/ProxyPool/DomesticProxyPool.txt", "r") as f:
37 | proxies = [proxy.strip() for proxy in f.readlines()]
38 | # current_path = os.path.dirname(__file__)
39 | # f = open(current_path +"/", "r")
40 | # proxies = f.readlines()
41 | return proxies
42 |
43 | def Run(self, domain):
44 | # proxies = self.get_proxy()
45 | # proxy = random.choice(proxies)
46 | Domains, companyname = self.GetDomains(domain)
47 |
48 | for domain in Domains:
49 | subdomain_ips, key_links = self.GetSubdomain_ips(domain[0])
50 |
51 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ThirdPartyPlatform/certificate.py:
--------------------------------------------------------------------------------
1 | from Common.LogOutput import LogOutput
2 | logger_object = LogOutput()
3 | logger = logger_object.SetModuleName("certificate")
4 | import sys
5 |
6 | import requests
7 | import re
8 |
9 | # crt.sh
10 | class Certificate():
11 | def __init__(self):
12 | self.header = {
13 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
14 | }
15 | self.subdomains = []
16 |
17 | # def Get_subdomain(self):
18 |
19 | def run(self, domain):
20 | try:
21 | res = requests.get("https://crt.sh/?q={}".format(domain), headers=self.header)
22 | before_subdomains = re.findall(r"
3.2 to run search_all.')
11 | sys.exit()
12 |
13 |
14 |
15 | def InfoSearch(domain):
16 | from Plugins.InfoSearch.infosearchapi import InfoSearchApi
17 | infosearchapi = InfoSearchApi()
18 | infosearchapi.Run(domain)
19 |
20 | def GetPorxies():
21 | from Common import GetProxies
22 | GetProxies.run_getSocksProxy()
23 |
24 |
25 | def Banner():
26 | banner = '''
27 | ________ _______ ________ ________ ________ ___ ___ ________ ___ ___
28 | |\ ____\|\ ___ \ |\ __ \|\ __ \|\ ____\|\ \|\ \|\ __ \|\ \ |\ \
29 | \ \ \___|\ \ __/|\ \ \|\ \ \ \|\ \ \ \___|\ \ \\\\\ \ \ \|\ \ \ \ \ \ \
30 | \ \_____ \ \ \_|/_\ \ __ \ \ _ _\ \ \ \ \ __ \ \ __ \ \ \ \ \ \
31 | \|____|\ \ \ \_|\ \ \ \ \ \ \ \\\\ \\\\ \ \____\ \ \ \ \ \ \ \ \ \ \____\ \ \____
32 | ____\_\ \ \_______\ \__\ \__\ \__\\\\ _\\\\ \_______\ \__\ \__\ \__\ \__\ \_______\ \_______\\
33 | |\_________\|_______|\|__|\|__|\|__|\|__|\|_______|\|__|\|__|\|__|\|__|\|_______|\|_______|
34 | \|_________| author:Gungnir
35 | '''
36 | print('\033[35m' + banner + '\033[0m')
37 |
38 |
39 | def Init_set():
40 | Banner()
41 |
42 | global domain, WhetherRunInfoSearch, WhetherGetProxies
43 |
44 | import argparse
45 |
46 | parser = argparse.ArgumentParser(description='''
47 | (¬︿̫̿¬☆),哼,可恶! 竟然发现我了.
48 | (ˉ▽ ̄~) 既然发现我了,那就给你吧!
49 | ''')
50 | parser.add_argument("-d", "--domain", help="Need a target domain", dest="domain")
51 | parser.add_argument("-i", "--InfoSearch", help="Conduct information collection", dest="WhetherRunInfoSearch", action="store_true")
52 | parser.add_argument("-p", "--Proxy", help="Get proxies", dest="WhetherGetProxies", action="store_true")
53 |
54 | args = parser.parse_args()
55 | options = vars(args)
56 |
57 |
58 | domain, WhetherRunInfoSearch, WhetherGetProxies = options['domain'], options['WhetherRunInfoSearch'], options['WhetherGetProxies']
59 |
60 | # GetProxies
61 | if(WhetherGetProxies):
62 | GetPorxies()
63 |
64 |
65 | # 判断用户要不要信息收集
66 | if(WhetherRunInfoSearch):
67 | # GetSubdomains(domain)
68 | InfoSearch(domain)
69 | # else:
70 | # logger.error('At least one run command parameter is required, please use the --help or -h command for details')
71 | # sys.exit()
72 |
73 | if __name__ == '__main__':
74 |
75 |
76 |
77 | '''初始化参数'''
78 | Init_set()
79 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/METADATA:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.1
2 | Name: ESD
3 | Version: 0.0.29
4 | Summary: Enumeration Sub Domains(枚举子域名)
5 | Home-page: https://github.com/FeeiCN/ESD
6 | Author: Feei
7 | Author-email: feei@feei.cn
8 | License: UNKNOWN
9 | Platform: UNKNOWN
10 | Classifier: Topic :: Security
11 | Classifier: Programming Language :: Python :: 3
12 | Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
13 | Description-Content-Type: text/markdown
14 | Requires-Dist: colorlog
15 | Requires-Dist: aiodns
16 | Requires-Dist: aiohttp
17 | Requires-Dist: async-timeout
18 | Requires-Dist: requests
19 | Requires-Dist: backoff
20 | Requires-Dist: dnspython
21 | Requires-Dist: pysocks
22 | Requires-Dist: tqdm
23 | Requires-Dist: colorama
24 |
25 | # ESD(Enumeration Sub Domain)
26 |
27 | [](https://pypi.org/project/ESD/)
28 | 
29 | 
30 |
31 | [](https://asciinema.org/a/15WhUe40eEhSbwAXZdf2RQdq9)
32 |
33 | ## 优势
34 | #### 支持泛解析域名
35 | > 基于`RSC`(响应相似度对比)技术对泛解析域名进行枚举(受网络质量、网站带宽等影响,速度会比较慢)
36 |
37 | 基于`aioHTTP`获取一个不存在子域名的响应内容,并将其和字典子域名响应进行相似度比对。
38 | 超过阈值则说明是同个页面,否则则为可用子域名,并对最终子域名再次进行响应相似度对比。
39 |
40 | #### 更快的速度
41 | > 基于`AsyncIO`异步协程技术对域名进行枚举(受网络和DNS服务器影响会导致扫描速度小幅波动,基本在250秒以内)
42 |
43 | 基于`AsyncIO`+`aioDNS`将比传统多进程/多线程/gevent模式快50%以上。
44 | 通过扫描`qq.com`,共`620328`条规则,找到`3421`个域名,耗时`15`分钟左右。
45 |
46 | 更新于2021年9月,经实测多个DNS Server做了请求数限制,大并发下存在大量连接超时和异常导致遗漏情况大幅增加,目前通过限制DNS服务器以及并发数来解决,因此建议不要过于追求速度,通过设计更合理的触发时间来解决速度变慢问题。
47 |
48 | #### 更全的字典
49 | > 融合各类字典,去重后共620328条子域名字典
50 |
51 | - 通用字典
52 | - 单字母、单字母+单数字、双字母、双字母+单数字、双字母+双数字、三字母、四字母
53 | - 单数字、双数字、三数字
54 | - 域名解析商公布使用最多的子域名
55 | - DNSPod: dnspod-top2000-sub-domains.txt
56 | - 其它域名爆破工具字典
57 | - subbrute: names_small.txt
58 | - subDomainsBrute: subnames_full.txt
59 |
60 | #### 更多的收集渠道
61 | - [X] 收集DNSPod接口泄露的子域名
62 | - [X] 收集页面响应内容中出现的子域名
63 | - [X] 收集跳转过程中的子域名
64 | - [X] 收集HTTPS证书透明度子域名
65 | - [X] 收集DNS域传送子域名
66 |
67 | #### DNS服务器
68 | - 解决各家DNS服务商对于网络线路出口判定不一致问题
69 | - 解决各家DNS服务商缓存时间不一致问题
70 | - 解决随机DNS问题,比如fliggy.com、plu.cn等
71 | - 根据网络情况自动剔除无效DNS,提高枚举成功率
72 |
73 | ## 使用
74 | 仅在macOS、Linux的Python3下验证过
75 | ```bash
76 | # 安装
77 | pip install esd
78 |
79 | # 升级
80 | pip install esd --upgrade
81 | ```
82 | **CLI命令行使用**
83 | ```bash
84 | # 扫描单个域名
85 | esd -d qq.com
86 |
87 | # debug模式扫描单个域名
88 | esd=debug esd -d qq.com
89 |
90 | # 扫描多个域名(英文逗号分隔)
91 | esd --domain qq.com,tencent.com
92 |
93 | # 扫描单个域名且过滤子域名中单个特定响应内容
94 | esd --domain mogujie.com --filter 搜本店
95 |
96 | # 扫描单个域名且过滤子域名中多个特定响应内容
97 | esd --domain mogujie.com --filter 搜本店,收藏店铺
98 |
99 | # 扫描文件(文件中每行一个域名)
100 | esd --file targets.txt
101 |
102 | # 跳过相似度对比(开启这个选项会把所有泛解析的域名都过滤掉)
103 | esd --domain qq.com --skip-rsc
104 |
105 | # 平均分割字典,加快爆破
106 | esd --domain qq.com --split 1/4
107 |
108 | # 使用DNS域传送漏洞获取子域名
109 | esd --domain qq.com --dns-transfer
110 |
111 | # 使用HTTPS证书透明度获取子域名
112 | esd --domain qq.com --ca-info
113 |
114 | ```
115 |
116 | **程序调用**
117 | ```python
118 | from ESD import EnumSubDomain
119 | domains = EnumSubDomain('feei.cn').run()
120 | ```
121 |
122 | ## 后续
123 | - 提升扫描速度
124 | - 支持三级子域名,多种组合更多可能性
125 |
126 | ## 文档
127 | - https://github.com/FeeiCN/ESD/wiki
128 |
129 |
130 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/cdn-domain.conf:
--------------------------------------------------------------------------------
1 | #蓝汛
2 | gslbsvc.net.cn
3 | chinacache.com.cn
4 | ccgslb.net
5 | gslbsvc.com.cn
6 | cdnsvc.cn
7 | ccgslb.cn
8 | cdn2cdn.net
9 | blueit.org.cn
10 | cc-1.com
11 | cdnsvc.net
12 | ccgslb.com.cn
13 | lxsvc.net
14 | lxsvc.cn
15 | chinacache.org
16 | gslbsvc.com
17 | ccgslb.com
18 | gslbsvc.cn
19 | chinacache.com
20 | igslb.net
21 | chinacache.net
22 | cdnsvc.net.cn
23 | ccgslb.net.cn
24 | hd-cdn.com
25 | cdnsvc.com.cn
26 | cdnsvc.com
27 | speedupchina.net
28 | b2r.com.cn
29 | blueit.com
30 | lxsvc.cn
31 | gslbsvc.net
32 | speedupchina.com
33 | cc-cps.com
34 | cc-cps.com.cn
35 | cc-cps.net
36 | cc-cps.mobi
37 | cc-cps.cn
38 | ccbench.com
39 | webluker.com
40 | ccindex.cn
41 | ccindex.com.cn
42 | ccmplus.com.cn
43 | ccmplus.net
44 | ccmplus.cn
45 |
46 | #Webluker
47 | xgslb.net
48 |
49 | #网宿
50 | wscdns.com
51 | ourglb0.com
52 | wsngb.com
53 | lxdns.com
54 | lxdns.net
55 | 51cdn.com
56 | chinanetcenter.com
57 | netcenter.com.cn
58 | wangsu.com
59 | ourwebat.com
60 | ourwebcdn.com
61 |
62 | #快网
63 | fastweb.com.cn
64 | fwdns.net
65 | hadns.net
66 | cachecn.net
67 | sz-dns.net
68 | cachecn.com
69 | cloudcdn.cn
70 | 1test.cn
71 | 5test.cn
72 | fsspace.com
73 | fsspace.com.cn
74 | fsspace.cn
75 | cloudcdn.net
76 | fastwebcdn.com
77 | hacdn.com
78 | fwcdn.com
79 | fwcdn.net
80 | hacdn.net
81 | cloudglb.com
82 | cloudxns.net
83 | cloudglb.net
84 | cloudxns.com
85 | cloudtcp.net
86 | myxns.cn
87 | newdefend.cn
88 | myxns.net.cn
89 | myxns.com.cn
90 | myxns.org
91 | newdefend.net
92 | newdefend.org
93 | newdefend.net.cn
94 | newdefend.com.cn
95 | newdefend.org.cn
96 | newdefend.com
97 | ffdns.net
98 | fwmob.com
99 | tlgslb.com
100 | fastcdn.com
101 |
102 | #阿里云CDN
103 | kunlunea.com
104 | kunlunso.com
105 | kunlunwe.com
106 | kunlunno.com
107 | kunlunaq.com
108 | kunlunpi.com
109 | kunlunra.com
110 | kunlungr.com
111 | kunlunhuf.com
112 | kunlunsl.com
113 | kunlunar.com
114 | kunlunta.com
115 | kunlungem.com
116 | kunluncan.com
117 | kunlunle.com
118 | kunlunvi.com
119 | kunlunli.com
120 | kunlunsc.com
121 | kunlunsa.com
122 | kunlunca.com
123 | alikunlun.net
124 | alikunlun.com
125 |
126 | #腾讯CDN
127 | qcloud.com
128 | myqcloud.com
129 | tcdn.qq.com
130 | cdntip.com
131 |
132 | #百度云CDN
133 | bdydns.net
134 | bcedns.net
135 | bcedns.com
136 | bcedns.cn
137 | bdydns.com
138 | baiduyundns.net
139 | bdydns.cn
140 | baiduyundns.com
141 | baiduyundns.cn
142 |
143 | #百度云加速
144 | yunjiasu-cdn.net
145 |
146 | #七牛
147 | qiniudn.com
148 | qbox.me
149 | clouddn.com
150 | qiniudns.com
151 |
152 | #又拍云
153 | aicdn.com
154 |
155 | #360网站卫士
156 | dnspao.com
157 | 360wzb.cn
158 |
159 | #同兴万点CDN
160 | cdngc.net
161 | cdnetworks.net
162 | gccdn.net
163 |
164 | #CDN联盟
165 | cdnudns.com
166 |
167 | #白山云
168 | qingcdn.com
169 |
170 | #CloudFlare
171 | cdn.cloudflare.net
172 | cloudflare
173 |
174 | #未知
175 | hdslb.com
176 | hdslb.net
177 | tbcache.com
178 | 21okglb.cn
179 | 21vianet.com.cn
180 | 21vokglb.cn
181 | 360wzb.com
182 | acadn.com
183 | akadns.net
184 | akamai-staging.net
185 | akamai.com
186 | akamai.net
187 | akamaitech.net
188 | akamaized.net
189 | alicloudlayer.com
190 | aliyun-inc.com
191 | alicloudsec.com
192 | aliyuncs.com
193 | amazonaws.com
194 | aodianyun.com
195 | aqb.so
196 | awsdns
197 | azureedge.net
198 | bitgravity.com
199 | cachefly.net
200 | chinaidns.net
201 | cloudfront.net
202 | dnion.com
203 | edgesuite.net
204 | ewcache.com
205 | fastcache.com
206 | fastly.net
207 | footprint.net
208 | fpbns.net
209 | hichina.com
210 | hichina.net
211 | incapdns.net
212 | jiashule.com
213 | okglb.com
214 | txnetworks.cn
215 | ucloud.cn
216 | unicache.com
217 | verygslb.com
218 | vo.llnwd.net
219 | cloudfront
220 | edgekey
221 | fastly
222 | akamai
223 | edgecast
224 | cachefly
225 | fpbns
226 | footprint
227 | llnwd
228 | netdna
229 | bitgravity
230 | azureedge
231 | telefonica
232 | dnsv1
233 | ngenix
234 | incapdns
235 | clients.turbobytes.net
236 | akamaiedge.net
237 | akamaitechnologies.com
238 | gslb.tbcache.com
239 | att-dsa.net
240 | bluehatnetwork.com
241 | c3cache.net
242 | cncssr.chinacache.net
243 | cloudflare.com
244 | fastlylb.net
245 | googlesyndication.
246 | googleusercontent.com
247 | l.doubleclick.net
248 | inscname.net
249 | insnw.net
250 | llnwd.net
251 | lldns.net
252 | netdna-ssl.com
253 | netdna.com
254 | stackpathdns.com
255 | instacontent.net
256 | mirror-image.net
257 | cap-mii.net
258 | swiftserve.com
259 | gslb.taobao.com
260 | vo.msecnd.net
261 | ay1.b.yahoo.com
262 | zenedge.net
--------------------------------------------------------------------------------
/Common/GetProxies.py:
--------------------------------------------------------------------------------
1 | from Common.LogOutput import LogOutput
2 | logger_object = LogOutput()
3 | logger = logger_object.SetModuleName("GetProxies")
4 |
5 | import sys
6 |
7 | import requests
8 | import base64
9 | import json
10 | import configparser
11 | from threading import Thread
12 | from queue import Queue
13 | import random
14 | import urllib3
15 | import datetime
16 |
17 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
18 |
19 | cf = configparser.ConfigParser()
20 | cf.read("./config.ini")
21 | secs = cf.sections()
22 | email = cf.get('fofa api', 'EMAIL')
23 | key = cf.get('fofa api', 'KEY')
24 |
25 | headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36', 'Connection': 'close'}
26 |
27 | size = 10000
28 | page = 1
29 | today = datetime.date.today()
30 | oneday = datetime.timedelta(days=1)
31 | yesterday = today - oneday
32 |
33 |
34 | # 访问百度和谷歌
35 |
36 | def curlWeb(socks5_proxys_queue, socksProxysDict):
37 | while not socks5_proxys_queue.empty():
38 | proxy = socks5_proxys_queue.get()
39 | requests_proxies = {"http": "socks5://{}".format(proxy), "https": "socks5://{}".format(proxy)}
40 | baidu_url = "https://www.baidu.com"
41 | google_url = "https://www.google.com"
42 |
43 | try:
44 | res2 = requests.get(url=google_url, headers=headers, timeout=10, verify=False, proxies=requests_proxies)
45 | if res2.status_code == 200:
46 | logger.info("{} 成功访问谷歌 [{}]".format(proxy, res2.status_code))
47 | socksProxysDict["google"].append(proxy)
48 | continue
49 | except Exception as e:
50 | pass
51 |
52 | try:
53 | res = requests.get(url=baidu_url, headers=headers, timeout=10, verify=False, proxies=requests_proxies)
54 | if res.status_code == 200:
55 | logger.info("{} 成功访问百度 [{}]".format(proxy, res.status_code))
56 | socksProxysDict["baidu"].append(proxy)
57 | except Exception as e:
58 | pass
59 |
60 |
61 | def query_socks5(yesterday):
62 | query_str = r'protocol=="socks5" && "Version:5 Method:No Authentication(0x00)" && after="{}" && country="CN"'.format(yesterday)
63 | qbase64 = str(base64.b64encode(query_str.encode(encoding='utf-8')), 'utf-8')
64 | url = r'https://fofa.so/api/v1/search/all?email={}&key={}&qbase64={}&size={}&page={}&fields=host,title,ip,domain,port,country,city,server,protocol'.format(email, key, qbase64, size, page)
65 | print(url)
66 | socks5_proxys = []
67 | try:
68 | ret = json.loads(requests.get(url=url, headers=headers, timeout=10, verify=False).text)
69 | fofa_Results = ret['results']
70 | for result in fofa_Results:
71 | host, title, ip, domain, port, country, city, server, protocol = result
72 | proxy = ip + ":" + port
73 | socks5_proxys.append(proxy)
74 | except Exception as e:
75 | logger.error('fofa inquire {} : {}'.format(query_str, e.args))
76 | return socks5_proxys
77 |
78 | def SaveToProxyPool(baidu_proxies, google_proxies):
79 | try:
80 | with open("./Common/ProxyPool/DomesticProxyPool.txt", "a") as f:
81 | for baidu_proxy in baidu_proxies:
82 | f.write(baidu_proxy+'\n')
83 | logger.info("The domestic proxies is stored in the file path as Common/ProxyPool/DomesticProxyPool.txt")
84 | with open("./Common/ProxyPool/ForeignProxyPool.txt", "a") as f:
85 | for google_proxy in google_proxies:
86 | f.write(google_proxy+'\n')
87 | logger.info("The foreign proxies is stored in the file path as Common/ProxyPool/ForeignProxyPool.txt")
88 | except:
89 | logger.error("Error opening file")
90 | sys.exit()
91 | def run_getSocksProxy():
92 | logger.info("Start Searching Proxies")
93 | socksProxysDict = {"baidu": [], "google": []}
94 | socks5_proxys = query_socks5(yesterday)
95 | socks5_proxys_queue = Queue(-1)
96 | if socks5_proxys:
97 | # 随机取1000个代理ip
98 | for eachSocks5 in random.sample(socks5_proxys, 50):
99 | socks5_proxys_queue.put(eachSocks5)
100 |
101 | threads = []
102 | for num in range(100):
103 | t = Thread(target=curlWeb, args=(socks5_proxys_queue, socksProxysDict))
104 | threads.append(t)
105 | t.start()
106 | for t in threads:
107 | t.join()
108 |
109 | baidu_proxies = socksProxysDict.get('baidu')
110 | google_proxies = socksProxysDict.get('google')
111 | SaveToProxyPool(baidu_proxies, google_proxies)
112 | logger.info("Find {} DomesticProxies and {} ForeignProxies".format(len(baidu_proxies), len(google_proxies)))
113 | logger.info("End Searching Proxies")
114 | # return baidu_proxies, google_proxies
115 |
116 |
117 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Bing/bing.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import threading
3 | import re
4 |
5 | from urllib.parse import urlparse
6 |
7 | class BingSpider():
8 | def __init__(self, page):
9 | self.header = {
10 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
11 | # "Cookie": "MUID=0D37497B3A146A9009A459B93B2C6B63; _EDGE_V=1; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=BBE8EF5D85544AFFABC19E151D05B49F&dmnchg=1; _SS=SID=14FD12E54B646094309102274AC8612F; MUIDB=0D37497B3A146A9009A459B93B2C6B63; _EDGE_S=SID=14FD12E54B646094309102274AC8612F&mkt=zh-cn&ui=zh-cn; SRCHUSR=DOB=20210930&T=1632976149000&TPC=1632961354000; ipv6=hit=1632979751088&t=4; SNRHOP=I=&TS=; SRCHHPGUSR=SRCHLANG=zh-Hans&BZA=0&BRW=NOTP&BRH=M&CW=724&CH=722&SW=1536&SH=864&DPR=1.25&UTC=480&DM=1&WTS=63768572949&HV=1632976747",
12 | }
13 | self.timeout = 5
14 | self.PAGES = page # 子域名要爬取的页数
15 | self.KEY_PAGES = 1 # 关键词要爬取的页数
16 | self.subdomains = []
17 | self.key_links = []
18 | self.keywords = ['inurl:admin', 'inurl:login', 'inurl:system', 'inurl:register', 'inurl:upload', 'intitle:后台', 'intitle:系统', 'intitle:登录']
19 | self.errorurls = []
20 |
21 | def info_processing(self, text):
22 | return re.findall(r'', text)
23 |
24 |
25 | def get_info(self, domain, page ,real_page):
26 | url = r"https://cn.bing.com/search?q=site:{}&first={}".format(domain, page)
27 | print('[+]page:第{}页 关键词:[site:{}] Requesting:[{}] '.format(real_page, domain, url))
28 | try:
29 | res = requests.get(url=url, headers=self.header, timeout=self.timeout)
30 | tmp_subdomains = self.info_processing(res.text)
31 | for tmp_subdomain in tmp_subdomains:
32 | self.subdomains += [urlparse(tmp_subdomain[0]).netloc]
33 | except Exception as e:
34 | self.subdomains += []
35 | self.errorurls[url] = e
36 |
37 | def get_key_info(self, domain, keyword='', page=1, real_page=1):
38 | #https://cn.bing.com/search?q=site%3Atjut.edu.cn+inurl:upload&qs=n&form=QBRE&sp=-1&pq=sitetjut.edu.cn+inurl:upload&sc=1-16&sk=&cvid=6734767D90664B77800EA8092B6BB8DD&first=1
39 | #https://cn.bing.com/search?q=site%3Abaidu.com&qs=n&form=QBRE&sp=-1&pq=site%3Atjut.edu.cn&sc=1-16&sk=&cvid=6734767D90664B77800EA8092B6BB8DD
40 | url = r"https://cn.bing.com/search?q=site:{}{}&first={}".format(domain, '+'+keyword, page)
41 | print('[+]page:第{}页 关键词:[site:{} {}] Requesting:[{}] '.format(real_page, domain, keyword, url))
42 | try:
43 | res = requests.get(url=url, headers=self.header, timeout=self.timeout)
44 | tmp_key_links = self.info_processing(res.text)
45 | for tmp_key_link in tmp_key_links:
46 | self.key_links += [(tmp_key_link[1], tmp_key_link[0])]
47 | self.subdomains.append(urlparse(tmp_key_link[0]).netloc)
48 | except Exception as e:
49 | self.key_links += []
50 | self.errorurls[url] = e
51 |
52 | def run(self, domain):
53 |
54 | threads = []
55 | # tmp_page = 1
56 | # self.get_key_info(domain, self.keyword, tmp_page)
57 | # self.get_info(domain, tmp_page)
58 | for keyword in self.keywords:
59 | for page in range(1, self.KEY_PAGES+1):
60 | if page == 1:
61 | tmp_page = 1
62 | elif page == 2:
63 | tmp_page = 2
64 | else:
65 | tmp_page = (page-2)*10+2
66 | t = threading.Thread(target=self.get_key_info, args=(domain, keyword, tmp_page, page))
67 | t.start()
68 | threads.append(t)
69 | if(len(threads)>5):
70 | for t in threads:
71 | t.join()
72 | threads = []
73 |
74 | for page in range(1, self.PAGES+1):
75 | if page == 1:
76 | tmp_page = 1
77 | elif page == 2:
78 | tmp_page = 2
79 | else:
80 | tmp_page = (page - 2) * 10 + 2
81 | t = threading.Thread(target=self.get_info, args=(domain, tmp_page, page))
82 | t.start()
83 | threads.append(t)
84 | if (len(threads) > 5):
85 | for t in threads:
86 | t.join()
87 | threads = []
88 | for t in threads:
89 | t.join()
90 |
91 | if len(self.errorurls) >= 3:
92 | cprint("[+]There are too many exceptions requested, you need to check.", "red")
93 | cprint("[+]The exception information is", "red")
94 | for key in self.errorurls.keys():
95 | cprint("[+]url:{}\n>>Err:{}".format(key, self.errorurls[key]), "red")
96 |
97 | return list(set(self.subdomains)), list(set(self.key_links))
98 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Domain/domainapi.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import re
3 | from urllib.parse import quote
4 | import json
5 | import math
6 |
7 | from Common.LogOutput import LogOutput
8 | logger_object = LogOutput()
9 |
10 | logger = logger_object.SetModuleName('Domain')
11 |
12 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0'}
13 |
14 | class DomainApi():
15 | def __init__(self):
16 | # self.proxy = {"http": "http://{}".format(proxy), "https": "https://{}".format(proxy)}
17 | pass
18 | def chinazApi(self, domain):
19 | # 解析chinaz返回结果的json数据
20 | def parse_json(json_ret):
21 | chinazNewDomains = []
22 | results = json_ret['data']
23 | for result in results:
24 | companyName = result['webName']
25 | newDomain = result['host']
26 | time = result['verifyTime']
27 | chinazNewDomains.append((companyName, newDomain, time))
28 | chinazNewDomains = list(set(chinazNewDomains))
29 | return chinazNewDomains
30 |
31 |
32 | chinazNewDomains = []
33 | tempDict = {}
34 | tempList = []
35 |
36 | # 获取域名的公司名字
37 | url = r'http://icp.chinaz.com/{}'.format(domain)
38 | try:
39 | res = requests.get(url=url, headers=headers, allow_redirects=False, verify=False, timeout=10)
40 | except Exception as e:
41 | logger.error(url+' '+e.args)
42 | return [], []
43 | text = res.text
44 |
45 | companyName = re.search("var kw = '([\S]*)'", text)
46 | if companyName:
47 | companyName = companyName.group(1)
48 | logger.info('公司名: {}'.format(companyName))
49 | companyNameUrlEncode = quote(str(companyName))
50 | else:
51 | logger.warning('没有匹配到公司名')
52 | return [], []
53 |
54 | # 备案反查域名
55 | headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
56 | url = 'http://icp.chinaz.com/Home/PageData'
57 | data = 'pageNo=1&pageSize=20&Kw=' + companyNameUrlEncode
58 | try:
59 | res = requests.post(url=url, headers=headers, data=data, allow_redirects=False, verify=False, timeout=10)
60 | except Exception as e:
61 | logger.error('{} {}'.format(url, e.args))
62 | return [], []
63 |
64 | json_ret = json.loads(res.text)
65 | if 'amount' not in json_ret.keys():
66 | return chinazNewDomains, []
67 | amount = json_ret['amount']
68 | pages = math.ceil(amount / 20)
69 | logger.info('页数: {}'.format(pages))
70 | tempList.extend(parse_json(json_ret))
71 |
72 | # 继续获取后面页数
73 | for page in range(2, pages+1):
74 | logger.info('请求第{}页'.format(page))
75 | data = 'pageNo={}&pageSize=20&Kw='.format(page) + companyNameUrlEncode
76 | try:
77 | res = requests.post(url=url, headers=headers, data=data, allow_redirects=False, verify=False, timeout=10)
78 | json_ret = json.loads(res.text)
79 | tempList.extend(parse_json(json_ret))
80 | except Exception as e:
81 | logger.error('{} {}'.format(url, e.args))
82 |
83 | for each in tempList:
84 | if each[1] not in tempDict:
85 | tempDict[each[1]] = each
86 | chinazNewDomains.append(each)
87 |
88 | return chinazNewDomains, companyName
89 |
90 | def run_domain(self, domain):
91 | beianNewDomains = []
92 | chinazNewDomains, companyName = self.chinazApi(domain)
93 |
94 | tempDict = {}
95 | for each in chinazNewDomains:
96 | if each[1] not in tempDict:
97 | tempDict[each[1]] = each
98 | beianNewDomains.append(each)
99 |
100 | logger.info("去重后共计{}个顶级域名".format(len(beianNewDomains)))
101 | print("\033[33m"+"The top-level domain name is shown below"+"\033[0m")
102 |
103 | for _ in beianNewDomains:
104 | print(_)
105 |
106 | p = re.compile("[^0-9a-zA-Z.]+")
107 | judge = 'y'
108 | for _ in beianNewDomains:
109 | if p.match(_[1]):
110 | logger.critical("I’m not sure if [{}] is a top-level domain name, you need to judge. (y/n)".format(_[1]))
111 | judge = input()
112 | while(judge != 'y' and judge !='n'):
113 | logger.critical("I’m not sure if [{}] is a top-level domain name, you need to judge. (y/n)".format(_[1]))
114 | judge = input()
115 | if(judge == 'y'):
116 | continue
117 | else:
118 | beianNewDomains.remove(_)
119 | else:
120 | continue
121 | logger.info("A total of {} top-level domain name after screening".format(len(beianNewDomains)))
122 | for _ in beianNewDomains:
123 | print(_[1])
124 |
125 | return beianNewDomains, companyName
126 | # [('同济大学浙江学院', 'tjzj.edu.cn', '2021-01-14')] 同济大学浙江学院
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # googlesearch documentation build configuration file, created by
4 | # sphinx-quickstart on Tue Nov 6 12:25:12 2018.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | #
19 | import os
20 | import sys
21 | sys.path.insert(0, os.path.abspath('..'))
22 |
23 |
24 | # -- General configuration ------------------------------------------------
25 |
26 | # If your documentation needs a minimal Sphinx version, state it here.
27 | #
28 | # needs_sphinx = '1.0'
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = ['sphinx.ext.autodoc',
34 | 'sphinx.ext.viewcode',
35 | 'sphinx.ext.githubpages']
36 |
37 | # Add any paths that contain templates here, relative to this directory.
38 | templates_path = ['_templates']
39 |
40 | # The suffix(es) of source filenames.
41 | # You can specify multiple suffix as a list of string:
42 | #
43 | # source_suffix = ['.rst', '.md']
44 | source_suffix = '.rst'
45 |
46 | # The master toctree document.
47 | master_doc = 'index'
48 |
49 | # General information about the project.
50 | project = u'googlesearch'
51 | copyright = u'2018, Mario Vilas'
52 | author = u'Mario Vilas'
53 |
54 | # The version info for the project you're documenting, acts as replacement for
55 | # |version| and |release|, also used in various other places throughout the
56 | # built documents.
57 | #
58 | # The short X.Y version.
59 | version = u''
60 | # The full version, including alpha/beta/rc tags.
61 | release = u''
62 |
63 | # The language for content autogenerated by Sphinx. Refer to documentation
64 | # for a list of supported languages.
65 | #
66 | # This is also used if you do content translation via gettext catalogs.
67 | # Usually you set "language" from the command line for these cases.
68 | language = None
69 |
70 | # List of patterns, relative to source directory, that match files and
71 | # directories to ignore when looking for source files.
72 | # This patterns also effect to html_static_path and html_extra_path
73 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
74 |
75 | # The name of the Pygments (syntax highlighting) style to use.
76 | pygments_style = 'sphinx'
77 |
78 | # If true, `todo` and `todoList` produce output, else they produce nothing.
79 | todo_include_todos = False
80 |
81 |
82 | # -- Options for HTML output ----------------------------------------------
83 |
84 | # The theme to use for HTML and HTML Help pages. See the documentation for
85 | # a list of builtin themes.
86 | #
87 | html_theme = 'alabaster'
88 |
89 | # Theme options are theme-specific and customize the look and feel of a theme
90 | # further. For a list of options available for each theme, see the
91 | # documentation.
92 | #
93 | # html_theme_options = {}
94 |
95 | # Add any paths that contain custom static files (such as style sheets) here,
96 | # relative to this directory. They are copied after the builtin static files,
97 | # so a file named "default.css" will overwrite the builtin "default.css".
98 | html_static_path = ['_static']
99 |
100 | # Custom sidebar templates, must be a dictionary that maps document names
101 | # to template names.
102 | #
103 | # This is required for the alabaster theme
104 | # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
105 | html_sidebars = {
106 | '**': [
107 | 'relations.html', # needs 'show_related': True theme option to display
108 | 'searchbox.html',
109 | ]
110 | }
111 |
112 |
113 | # -- Options for HTMLHelp output ------------------------------------------
114 |
115 | # Output file base name for HTML help builder.
116 | htmlhelp_basename = 'googlesearchdoc'
117 |
118 |
119 | # -- Options for LaTeX output ---------------------------------------------
120 |
121 | latex_elements = {
122 | # The paper size ('letterpaper' or 'a4paper').
123 | #
124 | # 'papersize': 'letterpaper',
125 |
126 | # The font size ('10pt', '11pt' or '12pt').
127 | #
128 | # 'pointsize': '10pt',
129 |
130 | # Additional stuff for the LaTeX preamble.
131 | #
132 | # 'preamble': '',
133 |
134 | # Latex figure (float) alignment
135 | #
136 | # 'figure_align': 'htbp',
137 | }
138 |
139 | # Grouping the document tree into LaTeX files. List of tuples
140 | # (source start file, target name, title,
141 | # author, documentclass [howto, manual, or own class]).
142 | latex_documents = [
143 | (master_doc, 'googlesearch.tex', u'googlesearch Documentation',
144 | u'Mario Vilas', 'manual'),
145 | ]
146 |
147 |
148 | # -- Options for manual page output ---------------------------------------
149 |
150 | # One entry per manual page. List of tuples
151 | # (source start file, name, description, authors, manual section).
152 | man_pages = [
153 | (master_doc, 'googlesearch', u'googlesearch Documentation',
154 | [author], 1)
155 | ]
156 |
157 |
158 | # -- Options for Texinfo output -------------------------------------------
159 |
160 | # Grouping the document tree into Texinfo files. List of tuples
161 | # (source start file, target name, title, author,
162 | # dir menu entry, description, category)
163 | texinfo_documents = [
164 | (master_doc, 'googlesearch', u'googlesearch Documentation',
165 | author, 'googlesearch', 'Python bindings to the Google search engine.',
166 | 'Miscellaneous'),
167 | ]
168 |
169 |
170 | # -- Options for Epub output ----------------------------------------------
171 |
172 | # Bibliographic Dublin Core info.
173 | epub_title = project
174 | epub_author = author
175 | epub_publisher = author
176 | epub_copyright = copyright
177 |
178 | # The unique identifier of the text. This can be a ISBN number
179 | # or the project homepage.
180 | #
181 | # epub_identifier = ''
182 |
183 | # A unique identification for the text.
184 | #
185 | # epub_uid = ''
186 |
187 | # A list of files that should not be packed into the epub file.
188 | epub_exclude_files = ['search.html']
189 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Baidu/baidu.py:
--------------------------------------------------------------------------------
1 |
2 | import random
3 |
4 | import requests
5 | import threading
6 | import re
7 | from urllib.parse import urlparse
8 | from Common.LogOutput import LogOutput
9 | logger_object = LogOutput()
10 | logger = logger_object.SetModuleName("BaiduSpider")
11 |
12 | class MyThread(threading.Thread):
13 | def __init__(self,func,args=()):
14 | super(MyThread,self).__init__()
15 | self.func = func
16 | self.args = args
17 | def run(self):
18 | self.result = self.func(*self.args)
19 | def get_result(self):
20 | try:
21 | return self.result
22 | except Exception:
23 | return None
24 | class BaiduSpider():
25 | def __init__(self, proxies):
26 | self.header = {
27 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36",
28 | # "Cookie": "MUID=0D37497B3A146A9009A459B93B2C6B63; _EDGE_V=1; SRCHD=AF=NOFORM; SRCHUID=V=2&GUID=BBE8EF5D85544AFFABC19E151D05B49F&dmnchg=1; _SS=SID=14FD12E54B646094309102274AC8612F; MUIDB=0D37497B3A146A9009A459B93B2C6B63; _EDGE_S=SID=14FD12E54B646094309102274AC8612F&mkt=zh-cn&ui=zh-cn; SRCHUSR=DOB=20210930&T=1632976149000&TPC=1632961354000; ipv6=hit=1632979751088&t=4; SNRHOP=I=&TS=; SRCHHPGUSR=SRCHLANG=zh-Hans&BZA=0&BRW=NOTP&BRH=M&CW=724&CH=722&SW=1536&SH=864&DPR=1.25&UTC=480&DM=1&WTS=63768572949&HV=1632976747",
29 | }
30 | self.timeout = 5
31 | self.PAGES = 10000 # 子域名要爬取的页数
32 | self.KEY_PAGES = 2 # 关键词要爬取的页数
33 | self.subdomains = []
34 | self.key_links = []
35 | self.links = []
36 | self.keywords = ['inurl:admin', 'inurl:login', 'inurl:system', 'inurl:register', 'inurl:upload', 'intitle:后台', 'intitle:系统', 'intitle:登录']
37 | self.errorurls = {} # 存放第一次报异常的url,和异常原因, 3秒后重新进行请求。
38 | self.proxies = proxies
39 |
40 | def info_processing(self, text):
41 | return re.findall(r'', text)
42 |
43 |
44 |
45 | def real_url(self, link):
46 | try:
47 | real_link = requests.get(link, allow_redirects=False, timeout=self.timeout).headers.get('Location')
48 | return real_link
49 | except:
50 | return link
51 |
52 | def get_proxy(self):
53 | return random.choice(self.proxies)
54 |
55 |
56 | def get_info(self, domain, page=0):
57 | url = r"https://www.baidu.com/s?wd=site:{}&pn={}0".format(domain, page)
58 | print('[+]page:第{}页 关键词:[site:{}] Requesting:[{}] '.format(page+1, domain, url))
59 | # proxies = {
60 | # "http": "socks5://{}".format(proxy),
61 | # "https": "socks5://{}".format(proxy)
62 | # }
63 | try:
64 | res = requests.get(url=url, headers=self.header, timeout=self.timeout)
65 | if self.check_page(res.text, page+1) == 'Stop':
66 | return 'Stop'
67 | tmp_subdomains = self.info_processing(res.text)
68 | for tmp_subdomain in tmp_subdomains:
69 | tmp_real_link = self.real_url(tmp_subdomain[1])
70 | self.subdomains += [urlparse(tmp_real_link).netloc]
71 | self.links.append(tmp_real_link)
72 | logger.info(urlparse(tmp_real_link).netloc)
73 | except Exception as e:
74 | self.subdomains += []
75 | self.errorurls[url] = e
76 |
77 |
78 |
79 | def get_key_info(self, domain, keyword='', page=0):
80 | url = r"https://www.baidu.com/s?wd=site:{}{}&pn={}0".format(domain, '+'+keyword, page)
81 | print('[+]page:第{}页 关键词:[site:{} {}] Requesting:[{}] '.format(page+1, domain, keyword, url))
82 | # proxies = {
83 | # "http": "socks5://{}".format(proxy),
84 | # "https": "socks5://{}".format(proxy)
85 | # }
86 | try:
87 | res = requests.get(url=url, headers=self.header, timeout=self.timeout)
88 | tmp_key_links = self.info_processing(res.text)
89 | for tmp_key_link in tmp_key_links:
90 | tmp_real_link = self.real_url(tmp_key_link[1])
91 | self.key_links += [(tmp_key_link[0], tmp_real_link)]
92 | self.subdomains.append(urlparse(tmp_key_link[0]).netloc)
93 | logger.info(urlparse(tmp_real_link).netloc)
94 | except Exception as e:
95 | self.key_links += []
96 | self.errorurls[url] = e
97 |
98 |
99 | def check_page(self, text, page):
100 | num = re.findall(r'(\d*?)', text)
101 | if (num != ['{}'.format(page)]) and (page != 1):
102 | return 'Stop'
103 | else:
104 | print("{}页没问题, num = {}".format(page, num))
105 | return 'Contiune'
106 |
107 |
108 | '''
109 | 返回一个子域名列表和一个包含title和连接的link列表
110 | '''
111 | def run(self, domain):
112 | threads = []
113 | num = 1
114 | flag = 1
115 | try:
116 | while flag != 0:
117 | # proxy = self.get_proxy()
118 | for page in range(0+(num-1)*5, 5+(num-1)*5):
119 | # t = MyThread(self.get_info, args=(domain, proxy, page))
120 | t = MyThread(self.get_info, args=(domain, page))
121 | t.start()
122 | threads.append(t)
123 | for t in threads:
124 | t.join()
125 | if(t.get_result() == 'Stop'):
126 | flag = 0
127 | num += 1
128 |
129 |
130 | for keyword in self.keywords:
131 | # proxy = self.get_proxy()
132 | for page in range(0, self.KEY_PAGES):
133 | t = threading.Thread(target=self.get_key_info, args=(domain, keyword, page))
134 | # t = threading.Thread(target=self.get_key_info, args=(domain, proxy, keyword, page))
135 | t.start()
136 | threads.append(t)
137 | for t in threads:
138 | t.join()
139 |
140 | # if len(self.errorurls)>=3:
141 | # cprint("[+]There are too many exceptions requested, you need to check.", "red")
142 | # cprint("[+]The exception information is", "red")
143 | # for key in self.errorurls.keys():
144 | # cprint("[+]url:{}\n>>Err:{}".format(key, self.errorurls[key]), "red")
145 | except:
146 | return list(set(self.subdomains)), list(set(self.key_links)), list(set(self.links))
147 | # # print(set(self.subdomains)), list(set(self.key_links))
148 | return list(set(self.subdomains)), list(set(self.key_links)), list(set(self.links))
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/subdomainapi.py:
--------------------------------------------------------------------------------
1 | '''
2 | 得到subdomain的api
3 | '''
4 | import sys
5 | import requests
6 |
7 | from Common.LogOutput import LogOutput
8 |
9 | logger_object = LogOutput()
10 | logger = logger_object.SetModuleName("subdomains")
11 |
12 | class SubdomainApi():
13 |
14 | def __init__(self):
15 | self.global_subdomains = []
16 | self.global_subdomains_ips = []
17 | self.global_key_links = []
18 | self.links = []
19 | self.global_CDNSubdomainsDict = []
20 | # self.proxies = proxies
21 |
22 | def WebSpiderSubdomains(self, domain):
23 | # proxies = self.proxies
24 | def BaiduSpider(domain):
25 | logger.info('Start Baidu Spider')
26 |
27 | from Plugins.InfoSearch.Subdomain.Spider.Baidu.baidu import BaiduSpider
28 | BaiduSpider = BaiduSpider()
29 | tmp_subdomains, tmp_key_links, tmp_links = BaiduSpider.run(domain)
30 | self.global_subdomains += tmp_subdomains
31 | self.global_key_links += tmp_key_links
32 | self.links += tmp_links
33 | logger.info('Baidu Spider Is Over')
34 |
35 | # bing的请求一直有问题
36 | def BingSpider(domain):
37 | logger.info('Start Bing Spider')
38 |
39 | from Plugins.InfoSearch.Subdomain.Spider.Bing.bing import BingSpider
40 | BingSpider = BingSpider()
41 | tmp_subdomains, tmp_key_links, tmp_links = BingSpider.run(domain)
42 |
43 | self.global_subdomains += tmp_subdomains
44 | self.global_key_links += tmp_key_links
45 | self.links += self.links
46 | logger.info('Bing Spider Is Over')
47 |
48 |
49 | logger.info("Start Spider Module")
50 | BaiduSpider(domain)
51 | # BingSpider(domain, proxies)
52 | logger.info("WebSpider is over")
53 |
54 | def ThirdPartyPlatform(self, domain):
55 | # proxies = self.proxies
56 | # proxy = random.choice(proxies)
57 | def Certificate(domain):
58 |
59 | from Plugins.InfoSearch.Subdomain.ThirdPartyPlatform.certificate import Certificate
60 | Certificate = Certificate()
61 | tmp_subdomains = Certificate.run(domain)
62 |
63 | self.global_subdomains += tmp_subdomains
64 |
65 | def Netcraft(domain):
66 | from Plugins.InfoSearch.Subdomain.ThirdPartyPlatform.netcraft import Netcraft
67 | Netcraft = Netcraft()
68 | tmp_subdomains = Netcraft.Run(domain)
69 |
70 | self.global_subdomains += tmp_subdomains
71 | logger.info("ThirdPartyPlatform start")
72 | Certificate(domain)
73 | Netcraft(domain)
74 | logger.info("ThirdPartyPlatform end")
75 |
76 |
77 | '''DNS解析'''
78 | def Dns_resolver(self):
79 | import dns.resolver
80 | dns_servers = [
81 | # DNS对结果准确性影响非常大,部分DNS结果会和其它DNS结果不一致甚至没结果
82 | # '223.5.5.5', # AliDNS
83 | # '114.114.114.114', # 114DNS
84 | # '1.1.1.1', # Cloudflare
85 | '119.29.29.29', # DNSPod https://www.dnspod.cn/products/public.dns
86 | # '180.76.76.76', # BaiduDNS
87 | # '1.2.4.8', # sDNS
88 | # '11.1.1.1' # test DNS, not available
89 | # '8.8.8.8', # Google DNS, 延时太高了
90 | ]
91 |
92 | my_resolver = dns.resolver.Resolver()
93 | my_resolver.nameservers = dns_servers
94 |
95 | def DNS_Query(domain_name, domain_type):
96 | try:
97 | ips = ''
98 | A = my_resolver.resolve(domain_name, domain_type)
99 | for ip in A.rrset.items.keys():
100 | ips = ips + str(ip) + ','
101 | return ips.strip(",")
102 | except Exception as e:
103 | return 'null'
104 |
105 |
106 | logger.info("Dns_resolver start")
107 | for single_subdomain in self.global_subdomains:
108 | ips = DNS_Query(single_subdomain, "A")
109 | self.global_subdomains_ips += [(single_subdomain, ips)]
110 |
111 | logger.info("Dns_resolver end")
112 |
113 |
114 | def ESD_Run(self, domain):
115 | logger.info("ESD start")
116 | from Plugins.InfoSearch.Subdomain.ESD.ESD import EnumSubDomain
117 | self.global_subdomains_ips += EnumSubDomain(domain).run()
118 | logger.info("ESD end")
119 |
120 | def Check_network_connectivity(self):
121 | try:
122 | logger.info("Checking the network")
123 | if requests.get("https://www.baidu.com").status_code == 200:
124 | logger.info("Network status is good")
125 | except:
126 | logger.error("You need to check the network settings.Network problems")
127 | sys.exit()
128 | def JsFinderRun(self):
129 | from Plugins.InfoSearch.Subdomain.JsFinder import jsfinder
130 | for link in self.links:
131 | self.global_subdomains += jsfinder.RunJsFinder(link)
132 |
133 |
134 | # def Save_Subdomains(self, domain):
135 | # f = open('../../../Reports/{}-{}-{}-{}'.format(domain, datetime.datetime.now().year, datetime.datetime.now().month,datetime.datetime.now().day), 'w')
136 | # print(list(set(self.global_subdomains_ips)))
137 | #
138 | # print("The subdomain is stored in Reports/{}".format(f.name))
139 | #
140 | # for i in self.global_subdomains_ips:
141 | # f.write(i[0] + ' ' + i[1] + '\n')
142 | # f.close()
143 | # logger.error("There is something wrong in network")
144 |
145 |
146 | def Data_Filtering(self, domain):
147 | logger.info("Data filtering start")
148 | if ('') in self.global_subdomains:
149 | self.global_subdomains.remove('')
150 | if (domain) in self.global_subdomains:
151 | self.global_subdomains.remove(domain)
152 | self.global_subdomains = list(set(self.global_subdomains))
153 | logger.info("Data filtering end")
154 |
155 |
156 | def CheckCDN(self):
157 | from Plugins.InfoSearch.Subdomain.IsCND import CheckCDN
158 | self.subdomain_ips ,self.global_CDNSubdomainsDict = CheckCDN.run_checkCDN(self.global_subdomains)
159 |
160 | def Run(self, domain):
161 | logger.info("Subdomains start")
162 |
163 |
164 |
165 | # self.Check_network_connectivity()
166 |
167 | self.WebSpiderSubdomains(domain)
168 | self.JsFinderRun()
169 | self.ThirdPartyPlatform(domain)
170 |
171 |
172 | self.Data_Filtering(domain)
173 | self.CheckCDN()
174 |
175 | # self.Dns_resolver()
176 |
177 | # self.ESD_Run(domain)
178 |
179 | logger.info("Subdomains end")
180 | return list(set(self.global_subdomains_ips)), self.global_key_links
181 |
182 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/JsFinder/jsfinder.py:
--------------------------------------------------------------------------------
1 |
2 | import requests, argparse, sys, re
3 | from requests.packages import urllib3
4 | from urllib.parse import urlparse
5 | from bs4 import BeautifulSoup
6 |
7 |
8 | def extract_URL(JS):
9 | pattern_raw = r"""
10 | (?:"|') # Start newline delimiter
11 | (
12 | ((?:[a-zA-Z]{1,10}://|//) # Match a scheme [a-Z]*1-10 or //
13 | [^"'/]{1,}\. # Match a domainname (any character + dot)
14 | [a-zA-Z]{2,}[^"']{0,}) # The domainextension and/or path
15 | |
16 | ((?:/|\.\./|\./) # Start with /,../,./
17 | [^"'><,;| *()(%%$^/\\\[\]] # Next character can't be...
18 | [^"'><,;|()]{1,}) # Rest of the characters can't be
19 | |
20 | ([a-zA-Z0-9_\-/]{1,}/ # Relative endpoint with /
21 | [a-zA-Z0-9_\-/]{1,} # Resource name
22 | \.(?:[a-zA-Z]{1,4}|action) # Rest + extension (length 1-4 or action)
23 | (?:[\?|/][^"|']{0,}|)) # ? mark with parameters
24 | |
25 | ([a-zA-Z0-9_\-]{1,} # filename
26 | \.(?:php|asp|aspx|jsp|json|
27 | action|html|js|txt|xml) # . + extension
28 | (?:\?[^"|']{0,}|)) # ? mark with parameters
29 | )
30 | (?:"|') # End newline delimiter
31 | """
32 | pattern = re.compile(pattern_raw, re.VERBOSE)
33 | result = re.finditer(pattern, str(JS))
34 | if result == None:
35 | return None
36 | js_url = []
37 | return [match.group().strip('"').strip("'") for match in result
38 | if match.group() not in js_url]
39 |
40 |
41 | # Get the page source
42 | def Extract_html(URL):
43 | header = {
44 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.108 Safari/537.36",
45 | }
46 | try:
47 | raw = requests.get(URL, headers=header, timeout=3, verify=False)
48 | raw = raw.content.decode("utf-8", "ignore")
49 | return raw
50 | except:
51 | return None
52 |
53 |
54 | # Handling relative URLs
55 | def process_url(URL, re_URL):
56 | black_url = ["javascript:"] # Add some keyword for filter url.
57 | URL_raw = urlparse(URL)
58 | ab_URL = URL_raw.netloc
59 | host_URL = URL_raw.scheme
60 | if re_URL[0:2] == "//":
61 | result = host_URL + ":" + re_URL
62 | elif re_URL[0:4] == "http":
63 | result = re_URL
64 | elif re_URL[0:2] != "//" and re_URL not in black_url:
65 | if re_URL[0:1] == "/":
66 | result = host_URL + "://" + ab_URL + re_URL
67 | else:
68 | if re_URL[0:1] == ".":
69 | if re_URL[0:2] == "..":
70 | result = host_URL + "://" + ab_URL + re_URL[2:]
71 | else:
72 | result = host_URL + "://" + ab_URL + re_URL[1:]
73 | else:
74 | result = host_URL + "://" + ab_URL + "/" + re_URL
75 | else:
76 | result = URL
77 | return result
78 |
79 |
80 | def find_last(string, str):
81 | positions = []
82 | last_position = -1
83 | while True:
84 | position = string.find(str, last_position + 1)
85 | if position == -1: break
86 | last_position = position
87 | positions.append(position)
88 | return positions
89 |
90 |
91 | def find_by_url(url, js=False):
92 | if js == False:
93 | try:
94 | print("url:" + url)
95 | except:
96 | print("Please specify a URL like https://www.baidu.com")
97 | html_raw = Extract_html(url)
98 | if html_raw == None:
99 | print("Fail to access " + url)
100 | return None
101 | # print(html_raw)
102 | html = BeautifulSoup(html_raw, "html.parser")
103 | html_scripts = html.findAll("script")
104 | script_array = {}
105 | script_temp = ""
106 | for html_script in html_scripts:
107 | script_src = html_script.get("src")
108 | if script_src == None:
109 | script_temp += html_script.get_text() + "\n"
110 | else:
111 | purl = process_url(url, script_src)
112 | script_array[purl] = Extract_html(purl)
113 | script_array[url] = script_temp
114 | allurls = []
115 | for script in script_array:
116 | # print(script)
117 | temp_urls = extract_URL(script_array[script])
118 | if len(temp_urls) == 0: continue
119 | for temp_url in temp_urls:
120 | allurls.append(process_url(script, temp_url))
121 | result = []
122 | for singerurl in allurls:
123 | url_raw = urlparse(url)
124 | domain = url_raw.netloc
125 | positions = find_last(domain, ".")
126 | miandomain = domain
127 | if len(positions) > 1: miandomain = domain[positions[-2] + 1:]
128 | # print(miandomain)
129 | suburl = urlparse(singerurl)
130 | subdomain = suburl.netloc
131 | # print(singerurl)
132 | if miandomain in subdomain or subdomain.strip() == "":
133 | if singerurl.strip() not in result:
134 | result.append(singerurl)
135 | return result
136 | return sorted(set(extract_URL(Extract_html(url)))) or None
137 |
138 |
139 | def find_subdomain(urls, mainurl):
140 | url_raw = urlparse(mainurl)
141 | domain = url_raw.netloc
142 | miandomain = domain
143 | positions = find_last(domain, ".")
144 | if len(positions) > 1: miandomain = domain[positions[-2] + 1:]
145 | subdomains = []
146 | for url in urls:
147 | suburl = urlparse(url)
148 | subdomain = suburl.netloc
149 | # print(subdomain)
150 | if subdomain.strip() == "": continue
151 | if miandomain in subdomain:
152 | if subdomain not in subdomains:
153 | subdomains.append(subdomain)
154 | return subdomains
155 |
156 |
157 | def find_by_url_deep(url):
158 | html_raw = Extract_html(url)
159 | if html_raw == None:
160 | print("Fail to access " + url)
161 | return None
162 | html = BeautifulSoup(html_raw, "html.parser")
163 | html_as = html.findAll("a")
164 | links = []
165 | for html_a in html_as:
166 | src = html_a.get("href")
167 | if src == "" or src == None: continue
168 | link = process_url(url, src)
169 | if link not in links:
170 | links.append(link)
171 | if links == []: return None
172 | print("ALL Find " + str(len(links)) + " links")
173 | urls = []
174 | i = len(links)
175 | for link in links:
176 | temp_urls = find_by_url(link)
177 | if temp_urls == None: continue
178 | print("Remaining " + str(i) + " | Find " + str(len(temp_urls)) + " URL in " + link)
179 | for temp_url in temp_urls:
180 | if temp_url not in urls:
181 | urls.append(temp_url)
182 | i -= 1
183 | return urls
184 |
185 |
186 | # def find_by_file(file_path, js=False):
187 | # with open(file_path, "r") as fobject:
188 | # links = fobject.read().split("\n")
189 | # if links == []: return None
190 | # print("ALL Find " + str(len(links)) + " links")
191 | # urls = []
192 | # i = len(links)
193 | # for link in links:
194 | # if js == False:
195 | # temp_urls = find_by_url(link)
196 | # else:
197 | # temp_urls = find_by_url(link, js=True)
198 | # if temp_urls == None: continue
199 | # print(str(i) + " Find " + str(len(temp_urls)) + " URL in " + link)
200 | # for temp_url in temp_urls:
201 | # if temp_url not in urls:
202 | # urls.append(temp_url)
203 | # i -= 1
204 | # return urls
205 |
206 | #
207 | # def giveresult(urls, domian):
208 | # if urls == None:
209 | # return None
210 | # print("Find " + str(len(urls)) + " URL:")
211 | # content_url = ""
212 | # content_subdomain = ""
213 | # for url in urls:
214 | # content_url += url + "\n"
215 | # print(url)
216 | # subdomains = find_subdomain(urls, domian)
217 | # print("\nFind " + str(len(subdomains)) + " Subdomain:")
218 | # for subdomain in subdomains:
219 | # content_subdomain += subdomain + "\n"
220 | # print(subdomain)
221 | # if args.outputurl != None:
222 | # with open(args.outputurl, "a", encoding='utf-8') as fobject:
223 | # fobject.write(content_url)
224 | # print("\nOutput " + str(len(urls)) + " urls")
225 | # print("Path:" + args.outputurl)
226 | # if args.outputsubdomain != None:
227 | # with open(args.outputsubdomain, "a", encoding='utf-8') as fobject:
228 | # fobject.write(content_subdomain)
229 | # print("\nOutput " + str(len(subdomains)) + " subdomains")
230 | # print("Path:" + args.outputsubdomain)
231 |
232 | # return content_url, content_subdomain
233 |
234 | def RunJsFinder(url):
235 | urllib3.disable_warnings()
236 | urls = find_by_url(url)
237 | subdomains = find_subdomain(urls, url)
238 | # return urls, subdomains
239 | return subdomains
240 | if __name__ == "__main__":
241 | print(RunJsFinder("https://www.tjut.edu.cn"))
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/IsCND/CheckCDN.py:
--------------------------------------------------------------------------------
1 | import ipaddress
2 | import re
3 | import dns.resolver
4 | import geoip2.database
5 |
6 | import dns.resolver
7 | from queue import Queue
8 | from threading import Thread
9 | from Common.LogOutput import LogOutput
10 | logger_object = LogOutput()
11 | logger = logger_object.SetModuleName("CheckCDN")
12 |
13 |
14 |
15 |
16 | # 通过查询pdns,然后排除国内外常见的cdn段,如果出现极有可能是真实ip
17 | cdns = [
18 | '223.99.255.0/24', '71.152.0.0/17', '219.153.73.0/24', '125.39.46.0/24', '190.93.240.0/20', '14.0.113.0/24',
19 | '14.0.47.0/24', '113.20.148.0/22', '103.75.201.0/24', '1.32.239.0/24', '101.79.239.0/24', '52.46.0.0/18',
20 | '125.88.189.0/24', '150.138.248.0/24', '180.153.235.0/24', '205.251.252.0/23', '103.1.65.0/24', '115.127.227.0/24',
21 | '14.0.42.0/24', '109.199.58.0/24', '116.211.155.0/24', '112.253.3.0/24', '14.0.58.0/24', '223.112.227.0/24',
22 | '113.20.150.0/23', '61.182.141.0/24', '34.216.51.0/25', '124.95.188.0/24', '42.51.25.0/24', '183.136.133.0/24',
23 | '52.220.191.0/26', '119.84.93.0/24', '182.118.38.0/24', '13.59.250.0/26', '54.178.75.0/24', '119.84.92.0/24',
24 | '183.131.62.0/24', '111.32.136.0/24', '13.124.199.0/24', '111.47.227.0/24', '104.37.177.0/24', '14.0.50.0/24',
25 | '183.230.70.0/24', '114.111.59.0/24', '220.181.135.0/24', '112.140.32.0/19', '101.79.230.0/24', '14.0.115.0/24',
26 | '103.28.248.0/22', '117.34.72.0/24', '109.199.57.0/24', '101.79.149.0/24', '116.128.128.0/24', '115.231.186.0/24',
27 | '103.22.200.0/22', '61.155.165.0/24', '113.20.148.0/23', '185.254.242.0/24', '59.36.120.0/24', '70.132.0.0/18',
28 | '116.31.126.0/24', '119.147.134.0/24', '115.127.246.0/24', '52.47.139.0/24', '118.107.175.0/24', '52.78.247.128/26',
29 | '110.93.176.0/20', '54.240.128.0/18', '46.51.216.0/21', '119.31.251.0/24', '125.39.18.0/24', '108.175.33.0/24',
30 | '1.31.128.0/24', '61.151.163.0/24', '103.95.132.0/24', '58.215.118.0/24', '54.233.255.128/26', '120.52.113.0/24',
31 | '118.107.174.0/24', '1.32.242.0/24', '221.195.34.0/24', '101.79.228.0/24', '205.251.249.0/24', '113.200.91.0/24',
32 | '101.79.146.0/24', '221.238.22.0/24', '134.19.183.0/24', '110.93.160.0/20', '180.97.158.0/24', '115.127.251.0/24',
33 | '119.167.147.0/24', '115.127.238.0/24', '115.127.240.0/22', '14.0.48.0/24', '115.127.240.0/24', '113.7.183.0/24',
34 | '112.140.128.0/20', '115.127.255.0/24', '114.31.36.0/22', '101.79.232.0/24', '218.98.44.0/24', '106.119.182.0/24',
35 | '101.79.167.0/24', '125.39.5.0/24', '58.49.105.0/24', '124.202.164.0/24', '111.177.6.0/24', '61.133.127.0/24',
36 | '185.11.124.0/22', '150.138.150.0/24', '115.127.248.0/24', '103.74.80.0/22', '101.79.166.0/24', '101.71.55.0/24',
37 | '198.41.128.0/17', '117.21.219.0/24', '103.231.170.0/24', '221.204.202.0/24', '101.79.224.0/24', '112.25.16.0/24',
38 | '111.177.3.0/24', '204.246.168.0/22', '103.40.7.0/24', '134.226.0.0/16', '52.15.127.128/26', '122.190.2.0/24',
39 | '101.203.192.0/18', '1.32.238.0/24', '101.79.144.0/24', '176.34.28.0/24', '119.84.15.0/24', '18.216.170.128/25',
40 | '222.88.94.0/24', '101.79.150.0/24', '114.111.48.0/21', '124.95.168.0/24', '114.111.48.0/20', '110.93.176.0/21',
41 | '223.111.127.0/24', '117.23.61.0/24', '140.207.120.0/24', '157.255.26.0/24', '221.204.14.0/24', '183.222.96.0/24',
42 | '104.37.180.0/24', '42.236.93.0/24', '111.63.51.0/24', '114.31.32.0/20', '118.180.50.0/24', '222.240.184.0/24',
43 | '205.251.192.0/19', '101.79.225.0/24', '115.127.228.0/24', '113.20.148.0/24', '61.213.176.0/24', '112.65.75.0/24',
44 | '111.13.147.0/24', '113.20.145.0/24', '103.253.132.0/24', '52.222.128.0/17', '183.203.7.0/24', '27.221.27.0/24',
45 | '103.79.134.0/24', '123.150.187.0/24', '103.15.194.0/24', '162.158.0.0/15', '61.163.30.0/24', '182.140.227.0/24',
46 | '112.25.60.0/24', '117.148.161.0/24', '61.182.136.0/24', '114.31.56.0/22', '64.252.128.0/18', '183.61.185.0/24',
47 | '115.127.250.0/24', '150.138.138.0/24', '13.210.67.128/26', '211.162.64.0/24', '61.174.9.0/24', '14.0.112.0/24',
48 | '52.52.191.128/26', '27.221.124.0/24', '103.4.203.0/24', '103.14.10.0/24', '34.232.163.208/29', '114.31.48.0/20',
49 | '59.51.81.0/24', '183.60.235.0/24', '101.227.206.0/24', '125.39.174.0/24', '119.167.246.0/24', '118.107.160.0/21',
50 | '223.166.151.0/24', '110.93.160.0/19', '204.246.172.0/23', '119.31.253.0/24', '143.204.0.0/16', '14.0.60.0/24',
51 | '123.151.76.0/24', '116.193.80.0/24', '120.241.102.0/24', '180.96.20.0/24', '216.137.32.0/19', '223.94.95.0/24',
52 | '103.4.201.0/24', '14.0.56.0/24', '115.127.234.0/24', '113.20.144.0/23', '103.248.104.0/24', '122.143.15.0/24',
53 | '101.79.229.0/24', '101.79.163.0/24', '104.37.112.0/22', '115.127.253.0/24', '141.101.64.0/18', '113.20.144.0/22',
54 | '101.79.155.0/24', '117.148.160.0/24', '124.193.166.0/24', '109.94.168.0/24', '203.90.247.0/24', '101.79.208.0/21',
55 | '182.118.12.0/24', '114.31.58.0/23', '202.162.109.0/24', '101.79.164.0/24', '58.216.2.0/24', '222.216.190.0/24',
56 | '101.79.165.0/24', '111.6.191.0/24', '1.255.100.0/24', '52.84.0.0/15', '112.65.74.0/24', '183.250.179.0/24',
57 | '101.79.236.0/24', '119.31.252.0/24', '113.20.150.0/24', '60.12.166.0/24', '101.79.234.0/24', '113.17.174.0/24',
58 | '101.79.237.0/24', '61.54.46.0/24', '118.212.233.0/24', '183.110.242.0/24', '150.138.149.0/24', '117.34.13.0/24',
59 | '115.127.245.0/24', '14.0.102.0/24', '14.0.109.0/24', '61.130.28.0/24', '113.20.151.0/24', '219.159.84.0/24',
60 | '114.111.62.0/24', '172.64.0.0/13', '61.155.222.0/24', '120.52.29.0/24', '115.127.231.0/24', '14.0.49.0/24',
61 | '113.202.0.0/16', '103.248.104.0/22', '205.251.250.0/23', '103.216.136.0/22', '118.107.160.0/20', '109.87.0.0/21',
62 | '54.239.128.0/18', '115.127.224.0/19', '111.202.98.0/24', '109.94.169.0/24', '59.38.112.0/24', '204.246.176.0/20',
63 | '123.133.84.0/24', '103.4.200.0/24', '111.161.109.0/24', '112.84.34.0/24', '103.82.129.0/24', '183.3.254.0/24',
64 | '112.137.184.0/21', '122.227.237.0/24', '36.42.75.0/24', '13.35.0.0/16', '101.226.4.0/24', '116.140.35.0/24',
65 | '58.250.143.0/24', '13.54.63.128/26', '205.251.254.0/24', '173.245.48.0/20', '183.61.177.0/24', '113.20.144.0/24',
66 | '104.37.183.0/24', '35.158.136.0/24', '116.211.121.0/24', '42.236.94.0/24', '117.34.91.0/24', '123.6.13.0/24',
67 | '13.224.0.0/14', '113.20.146.0/24', '58.58.81.0/24', '52.124.128.0/17', '122.228.198.0/24', '197.234.240.0/22',
68 | '99.86.0.0/16', '144.220.0.0/16', '119.188.97.0/24', '36.27.212.0/24', '104.37.178.0/24', '114.31.52.0/22',
69 | '218.65.212.0/24', '1.255.41.0/24', '14.0.45.0/24', '1.32.243.0/24', '220.170.185.0/24', '122.190.3.0/24',
70 | '103.79.133.0/24', '220.181.55.0/24', '125.39.191.0/24', '115.127.226.0/24', '125.39.32.0/24', '61.120.154.0/24',
71 | '103.4.202.0/24', '103.79.134.0/23', '115.127.224.0/24', '113.20.147.0/24', '61.156.149.0/24', '210.209.122.0/24',
72 | '115.127.249.0/24', '104.37.179.0/24', '120.52.18.0/24', '54.192.0.0/16', '14.0.55.0/24', '61.160.224.0/24',
73 | '113.207.101.0/24', '101.79.157.0/24', '110.93.128.0/20', '58.251.121.0/24', '61.240.149.0/24', '130.176.0.0/16',
74 | '113.107.238.0/24', '112.65.73.0/24', '103.75.200.0/23', '199.83.128.0/21', '123.129.220.0/24', '54.230.0.0/16',
75 | '114.111.60.0/24', '199.27.128.0/21', '14.0.118.0/24', '101.79.158.0/24', '119.31.248.0/21', '54.182.0.0/16',
76 | '113.31.27.0/24', '14.17.69.0/24', '101.79.145.0/24', '113.20.144.0/21', '180.163.22.0/24', '104.37.176.0/21',
77 | '117.25.156.0/24', '115.127.252.0/24', '115.127.244.0/23', '14.0.46.0/24', '113.207.102.0/24', '52.199.127.192/26',
78 | '13.113.203.0/24', '64.252.64.0/18', '1.32.240.0/24', '123.129.232.0/24', '1.32.241.0/24', '180.163.189.0/24',
79 | '157.255.25.0/24', '1.32.244.0/24', '103.248.106.0/24', '121.48.95.0/24', '54.239.192.0/19', '113.20.146.0/23',
80 | '61.136.173.0/24', '35.162.63.192/26', '117.34.14.0/24', '183.232.29.0/24', '42.81.93.0/24', '122.228.238.0/24',
81 | '183.61.190.0/24', '125.39.239.0/24', '115.127.230.0/24', '103.140.200.0/23', '202.102.85.0/24', '14.0.32.0/21',
82 | '14.0.57.0/24', '112.25.90.0/24', '58.211.137.0/24', '210.22.63.0/24', '34.226.14.0/24', '13.32.0.0/15',
83 | '101.79.156.0/24', '103.89.176.0/24', '14.0.116.0/24', '106.42.25.0/24', '101.79.233.0/24', '101.79.231.0/24',
84 | '103.75.200.0/24', '119.188.9.0/24', '183.232.51.0/24', '149.126.72.0/21', '103.21.244.0/22', '115.127.233.0/24',
85 | '27.221.20.0/24', '198.143.32.0/19', '103.248.107.0/24', '101.79.227.0/24', '115.127.242.0/24', '119.31.250.0/24',
86 | '103.82.130.0/24', '99.84.0.0/16', '222.73.144.0/24', '103.79.132.0/22', '101.79.208.0/20', '104.37.182.0/24',
87 | '101.79.152.0/24', '36.99.18.0/24', '101.71.56.0/24', '36.250.5.0/24', '61.158.240.0/24', '119.188.14.0/24',
88 | '13.249.0.0/16', '183.214.156.0/24', '60.221.236.0/24', '58.30.212.0/24', '115.127.254.0/24', '188.114.96.0/20',
89 | '115.127.241.0/24', '103.4.200.0/22', '115.127.239.0/24', '115.127.243.0/24', '111.32.135.0/24', '120.221.29.0/24',
90 | '115.127.232.0/24', '14.0.43.0/24', '14.0.59.0/24', '183.61.236.0/24', '34.223.12.224/27', '103.24.120.0/24',
91 | '52.57.254.0/24', '113.207.100.0/24', '222.186.19.0/24', '113.20.149.0/24', '150.138.151.0/24', '115.231.110.0/24',
92 | '52.56.127.0/25', '104.37.176.0/24', '163.177.8.0/24', '163.53.89.0/24', '52.82.128.0/19', '114.111.63.0/24',
93 | '108.162.192.0/18', '14.136.130.0/24', '115.127.229.0/24', '14.17.71.0/24', '52.212.248.0/26', '180.163.188.0/24',
94 | '61.182.137.0/24', '119.161.224.0/21', '14.0.41.0/24', '202.162.108.0/24', '106.122.248.0/24', '52.66.194.128/26',
95 | '115.127.237.0/24', '220.170.186.0/24', '14.0.32.0/19', '14.0.114.0/24', '112.90.216.0/24', '115.127.236.0/24',
96 | '116.193.84.0/24', '113.207.76.0/24', '101.79.235.0/24', '101.79.224.0/20', '61.155.149.0/24', '101.79.148.0/24',
97 | '180.163.224.0/24', '204.246.174.0/23', '183.60.136.0/24', '101.227.207.0/24', '103.248.105.0/24',
98 | '119.188.35.0/24', '42.236.7.0/24', '116.193.88.0/21', '116.193.83.0/24', '120.199.69.0/24', '122.226.182.0/24',
99 | '58.20.204.0/24', '110.93.128.0/21', '115.231.187.0/24', '69.28.58.0/24', '114.31.32.0/19', '112.25.91.0/24',
100 | '59.52.28.0/24', '117.27.149.0/24', '61.147.92.0/24', '14.0.117.0/24', '14.0.40.0/24', '119.97.151.0/24',
101 | '103.199.228.0/22', '122.70.134.0/24', '115.127.244.0/24', '223.112.198.0/24', '115.127.225.0/24', '104.16.0.0/12',
102 | '121.12.98.0/24', '103.31.4.0/22', '204.246.164.0/22', '223.94.66.0/24', '35.167.191.128/26', '116.31.127.0/24',
103 | '101.79.226.0/24', '34.195.252.0/24', '115.127.247.0/24', '61.240.144.0/24', '108.175.32.0/20', '120.197.85.0/24',
104 | '183.232.53.0/24', '111.161.66.0/24', '117.34.28.0/24', '45.64.64.0/22', '14.0.44.0/24', '109.86.0.0/15',
105 | '182.23.211.0/24', '58.211.2.0/24', '119.36.164.0/24', '116.55.250.0/24', '101.227.163.0/24', '13.228.69.0/24',
106 | '120.221.136.0/24', '119.188.132.0/24', '115.127.235.0/24', '42.236.6.0/24', '125.88.190.0/24', '61.54.47.0/24',
107 | '103.27.12.0/22', '116.193.80.0/21', '101.79.159.0/24', '123.155.158.0/24', '111.47.226.0/24', '131.0.72.0/22',
108 | '192.230.64.0/18', '218.92.0.0/24'
109 | ]
110 |
111 | ASNS = [
112 | '10576', '10762', '11748', '131099', '132601', '133496', '134409', '135295', '136764', '137187', '13777', '13890',
113 | '14103', '14520', '17132', '199251', '200013', '200325', '200856', '201263', '202294', '203075', '203139', '204248',
114 | '204286', '204545', '206227', '206734', '206848', '206986', '207158', '208559', '209403', '21030', '21257', '23327',
115 | '23393', '23637', '23794', '24997', '26492', '268843', '28709', '29264', '30282', '30637', '328126', '36408',
116 | '38107', '397192', '40366', '43303', '44907', '46071', '46177', '47542', '49287', '49689', '51286', '55082',
117 | '55254', '56636', '57363', '58127', '59730', '59776', '60068', '60626', '60922', '61107', '61159', '62026', '62229',
118 | '63062', '64232', '8868', '9053', '55770', '49846', '49249', '48163', '45700', '43639', '39836', '393560', '393234',
119 | '36183', '35994', '35993', '35204', '34850', '34164', '33905', '32787', '31377', '31110', '31109', '31108', '31107',
120 | '30675', '24319', '23903', '23455', '23454', '22207', '21399', '21357', '21342', '20940', '20189', '18717', '18680',
121 | '17334', '16702', '16625', '12222', '209101', '201585', '135429', '395747', '394536', '209242', '203898', '202623',
122 | '14789', '133877', '13335', '132892', '21859', '6185', '47823'
123 | ]
124 |
125 | cdnDict = {}
126 | with open(r'./cdn-domain.conf', 'rt', encoding='utf-8') as f:
127 | # with open(r'cdn-domain.conf', 'rt') as f:
128 | for eachline in f.readlines():
129 | eachline = eachline.strip()
130 | if '#' in eachline:
131 | cdnName = eachline.replace('#', '')
132 | cdnDict[cdnName] = []
133 | elif eachline:
134 | cdnDict[cdnName].append(eachline)
135 |
136 | # 解析域名获取IP
137 | def query_A(subdomain):
138 | # 根据domain得到ip 例如www.xxx.com 得到 x.x.x.x
139 | ips = []
140 | try:
141 | dns_A_ips = [j for i in dns.resolver.resolve(subdomain, 'A').response.answer for j in i.items]
142 | ips = []
143 | for each_ip in dns_A_ips:
144 | each_ip = str(each_ip)
145 | if re.compile('^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$').match(each_ip): # 正则匹配是否是IP
146 | ips.append(str(each_ip))
147 | except Exception as e:
148 | pass
149 |
150 | return ips
151 |
152 | # 通过IP判断是否是CDN
153 | def ipASNSCheckCDN(subdomain):
154 | ips = query_A(subdomain)
155 |
156 | with geoip2.database.Reader('./GeoLite2-ASN.mmdb') as reader:
157 | # with geoip2.database.Reader('./GeoLite2-ASN.mmdb') as reader:
158 | for ip in ips:
159 | # 通过CDN的IP段判断
160 | for cdn in cdns:
161 | if ipaddress.ip_address(ip) in ipaddress.ip_network(cdn):
162 | return ['CDN IP段', cdn]
163 |
164 | # 通过ASN判断
165 | try:
166 | response = reader.asn(ip)
167 | asnsNum = response.autonomous_system_number
168 | if str(asnsNum) in ASNS:
169 | return ['CDN ASNS范围', asnsNum]
170 | except Exception as e:
171 | pass
172 |
173 | return [], ips
174 |
175 | # 查询cname
176 | def queryCname(subdomain):
177 | try:
178 | cname = dns.resolver.resolve(subdomain, 'CNAME')
179 | for i in cname.response.answer:
180 | for j in i.items:
181 | subdomain_cname = j.to_text()
182 | return subdomain_cname
183 | except Exception as e:
184 | return ''
185 |
186 | # 通过cname判断是否是CDN
187 | def cnameCheckCDN(subdomian):
188 | subdomain_cname = queryCname(subdomian)
189 | # print(subdomain_cname)
190 | for cdnName in cdnDict:
191 | cdnDomains = cdnDict[cdnName]
192 | for cdnDomain in cdnDomains:
193 | if cdnDomain in subdomain_cname:
194 | # print(subdomian, subdomain_cname, cdnName)
195 | return [cdnName, subdomain_cname]
196 | if 'cdn' in subdomain_cname:
197 | return ['CDN', subdomain_cname]
198 | return False
199 |
200 |
201 | def checkCDN(subdomains_queue, notCDNSubdomains, CDNSubdomainsDict):
202 | while not subdomains_queue.empty():
203 | subdomain = subdomains_queue.get()
204 | cnameRet = cnameCheckCDN(subdomain)
205 | if not cnameRet:
206 | ipASNSRet, ips = ipASNSCheckCDN(subdomain)
207 | if not ipASNSRet:
208 | notCDNSubdomains.append((subdomain,ips))
209 | CDNSubdomainsDict[subdomain] = 'NOT'
210 | else:
211 | logger.info('{}: {}'.format(subdomain, ipASNSRet))
212 | # notCDNSubdomains.append(subdomain)
213 | CDNSubdomainsDict[subdomain] = ipASNSRet
214 | else:
215 | logger.info('{}: {}'.format(subdomain, cnameRet))
216 | # notCDNSubdomains.append(subdomain)
217 | CDNSubdomainsDict[subdomain] = cnameRet
218 |
219 |
220 | def run_checkCDN(subdomains):
221 | query_A_threads = [] # 存放线程
222 | subdomains_queue = Queue(-1)
223 |
224 | for subdomain in subdomains:
225 | subdomains_queue.put(subdomain)
226 |
227 | # 没有CDN的子域名
228 | notCDNSubdomains = []
229 | # CDN的子域名结果
230 | CDNSubdomainsDict = {}
231 |
232 | for t_id in range(50): # 对新增的子域名进行A记录查询获取IP
233 | t = Thread(target=checkCDN, args=(subdomains_queue, notCDNSubdomains, CDNSubdomainsDict))
234 | query_A_threads.append(t)
235 | t.start()
236 | for t in query_A_threads:
237 | t.join()
238 |
239 | # print()
240 | return notCDNSubdomains, CDNSubdomainsDict
241 |
242 |
243 | if __name__ == "__main__":
244 | subdomains = ['www.tjut.edu.cn']
245 | notCDNSubdomains, CDNSubdomainsDict = run_checkCDN(subdomains)
246 | print(CDNSubdomainsDict)
247 |
248 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/Spider/Google/googlesearch/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Python bindings to the Google search engine
4 | # Copyright (c) 2009-2018, Mario Vilas
5 | # All rights reserved.
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # * Redistributions of source code must retain the above copyright notice,
11 | # this list of conditions and the following disclaimer.
12 | # * Redistributions in binary form must reproduce the above copyright
13 | # notice,this list of conditions and the following disclaimer in the
14 | # documentation and/or other materials provided with the distribution.
15 | # * Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 | # POSSIBILITY OF SUCH DAMAGE.
30 |
31 | import os
32 | import random
33 | import sys
34 | import time
35 | import math
36 | from urllib.error import HTTPError
37 |
38 | if sys.version_info[0] > 2:
39 | from http.cookiejar import LWPCookieJar
40 | from urllib.request import Request, urlopen
41 | from urllib.parse import quote_plus, urlparse, parse_qs
42 | else:
43 | from cookielib import LWPCookieJar
44 | from urllib import quote_plus
45 | from urllib2 import Request, urlopen
46 | from urlparse import urlparse, parse_qs
47 |
48 | try:
49 | from bs4 import BeautifulSoup
50 | is_bs4 = True
51 | except ImportError:
52 | from BeautifulSoup import BeautifulSoup
53 | is_bs4 = False
54 |
55 | __all__ = [
56 |
57 | # Main search function.
58 | 'search',
59 |
60 | # Specialized search functions.
61 | 'search_images', 'search_news',
62 | 'search_videos', 'search_shop',
63 | 'search_books', 'search_apps',
64 |
65 | # Shortcut for "get lucky" search.
66 | 'lucky',
67 |
68 | # Computations based on the number of Google hits.
69 | 'hits', 'ngd',
70 |
71 | # Miscellaneous utility functions.
72 | 'get_random_user_agent',
73 | ]
74 |
75 | # URL templates to make Google searches.
76 | url_home = "https://www.google.%(tld)s/"
77 | url_search = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
78 | "btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s"
79 | url_next_page = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
80 | "start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s"
81 | url_search_num = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
82 | "num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \
83 | "tbm=%(tpe)s"
84 | url_next_page_num = "https://www.google.%(tld)s/search?hl=%(lang)s&" \
85 | "q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&" \
86 | "safe=%(safe)s&tbm=%(tpe)s"
87 |
88 | # Cookie jar. Stored at the user's home folder.
89 | home_folder = os.getenv('HOME')
90 | if not home_folder:
91 | home_folder = os.getenv('USERHOME')
92 | if not home_folder:
93 | home_folder = '.' # Use the current folder on error.
94 | cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie'))
95 | try:
96 | cookie_jar.load()
97 | except Exception:
98 | pass
99 |
100 | # Default user agent, unless instructed by the user to change it.
101 | USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)'
102 |
103 | # Load the list of valid user agents from the install folder.
104 | try:
105 | install_folder = os.path.abspath(os.path.split(__file__)[0])
106 | try:
107 | user_agents_file = os.path.join(install_folder, 'user_agents.txt.gz')
108 | import gzip
109 | fp = gzip.open(user_agents_file, 'rb')
110 | try:
111 | user_agents_list = [_.strip() for _ in fp.readlines()]
112 | finally:
113 | fp.close()
114 | del fp
115 | except Exception:
116 | user_agents_file = os.path.join(install_folder, 'user_agents.txt')
117 | with open(user_agents_file) as fp:
118 | user_agents_list = [_.strip() for _ in fp.readlines()]
119 | except Exception:
120 | user_agents_list = [USER_AGENT]
121 |
122 |
123 | # Get a random user agent.
124 | def get_random_user_agent():
125 | """
126 | Get a random user agent string.
127 |
128 | :rtype: str
129 | :return: Random user agent string.
130 | """
131 | return random.choice(user_agents_list)
132 |
133 |
134 | # Request the given URL and return the response page, using the cookie jar.
135 | def get_page(url, user_agent=None):
136 | """
137 | Request the given URL and return the response page, using the cookie jar.
138 |
139 | :param str url: URL to retrieve.
140 | :param str user_agent: User agent for the HTTP requests.
141 | Use None for the default.
142 |
143 | :rtype: str
144 | :return: Web page retrieved for the given URL.
145 |
146 | :raises IOError: An exception is raised on error.
147 | :raises urllib2.URLError: An exception is raised on error.
148 | :raises urllib2.HTTPError: An exception is raised on error.
149 | """
150 | if user_agent is None:
151 | user_agent = USER_AGENT
152 | request = Request(url)
153 | request.add_header('User-Agent', USER_AGENT)
154 | cookie_jar.add_cookie_header(request)
155 | response = urlopen(request)
156 | cookie_jar.extract_cookies(response, request)
157 | html = response.read()
158 | response.close()
159 | try:
160 | cookie_jar.save()
161 | except Exception:
162 | pass
163 | return html
164 |
165 |
166 | # Filter links found in the Google result pages HTML code.
167 | # Returns None if the link doesn't yield a valid result.
168 | def filter_result(link):
169 | try:
170 |
171 | # Valid results are absolute URLs not pointing to a Google domain
172 | # like images.google.com or googleusercontent.com
173 | o = urlparse(link, 'http')
174 | if o.netloc and 'google' not in o.netloc:
175 | return link
176 |
177 | # Decode hidden URLs.
178 | if link.startswith('/url?'):
179 | link = parse_qs(o.query)['q'][0]
180 |
181 | # Valid results are absolute URLs not pointing to a Google domain
182 | # like images.google.com or googleusercontent.com
183 | o = urlparse(link, 'http')
184 | if o.netloc and 'google' not in o.netloc:
185 | return link
186 |
187 | # Otherwise, or on error, return None.
188 | except Exception:
189 | pass
190 | return None
191 |
192 |
193 | # Returns a generator that yields URLs.
194 | def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
195 | stop=None, domains=None, pause=2.0, only_standard=False,
196 | extra_params={}, tpe='', user_agent=None):
197 | """
198 | Search the given query string using Google.
199 |
200 | :param str query: Query string. Must NOT be url-encoded.
201 | :param str tld: Top level domain.
202 | :param str lang: Language.
203 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
204 | "qdr:d" => last 24 hours, "qdr:m" => last month).
205 | :param str safe: Safe search.
206 | :param int num: Number of results per page.
207 | :param int start: First result to retrieve.
208 | :param int or None stop: Last result to retrieve.
209 | Use None to keep searching forever.
210 | :param list of str or None domains: A list of web domains to constrain
211 | the search.
212 | :param float pause: Lapse to wait between HTTP requests.
213 | A lapse too long will make the search slow, but a lapse too short may
214 | cause Google to block your IP. Your mileage may vary!
215 | :param bool only_standard: If True, only returns the standard results from
216 | each page. If False, it returns every possible link from each page,
217 | except for those that point back to Google itself. Defaults to False
218 | for backwards compatibility with older versions of this module.
219 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
220 | parameters, which must be URL encoded. For example if you don't want
221 | Google to filter similar results you can set the extra_params to
222 | {'filter': '0'} which will append '&filter=0' to every query.
223 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
224 | Use the following values {videos: 'vid', images: 'isch',
225 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
226 | :param str or None user_agent: User agent for the HTTP requests.
227 | Use None for the default.
228 |
229 | :rtype: generator of str
230 | :return: Generator (iterator) that yields found URLs.
231 | If the stop parameter is None the iterator will loop forever.
232 | """
233 | # Set of hashes for the results found.
234 | # This is used to avoid repeated results.
235 | hashes = set()
236 |
237 | # Count the number of links yielded
238 | count = 0
239 |
240 | # Prepare domain list if it exists.
241 | if domains:
242 | query = query + ' ' + ' OR '.join(
243 | 'site:' + domain for domain in domains)
244 |
245 | # Prepare the search string.
246 | query = quote_plus(query)
247 |
248 | # Check extra_params for overlapping
249 | for builtin_param in ('hl', 'q', 'btnG', 'tbs', 'safe', 'tbm'):
250 | if builtin_param in extra_params.keys():
251 | raise ValueError(
252 | 'GET parameter "%s" is overlapping with \
253 | the built-in GET parameter',
254 | builtin_param
255 | )
256 |
257 | # Grab the cookie from the home page.
258 | get_page(url_home % vars())
259 |
260 | # Prepare the URL of the first request.
261 | if start:
262 | if num == 10:
263 | url = url_next_page % vars()
264 | else:
265 | url = url_next_page_num % vars()
266 | else:
267 | if num == 10:
268 | url = url_search % vars()
269 | else:
270 | url = url_search_num % vars()
271 | print('\tgoogle search : {}'.format(url))
272 | # Loop until we reach the maximum result, if any (otherwise, loop forever).
273 | while not stop or start < stop:
274 |
275 | try: # Is it python<3?
276 | iter_extra_params = extra_params.iteritems()
277 | except AttributeError: # Or python>3?
278 | iter_extra_params = extra_params.items()
279 | # Append extra GET_parameters to URL
280 | for k, v in iter_extra_params:
281 | url += url + ('&%s=%s' % (k, v))
282 |
283 | # Sleep between requests.
284 | time.sleep(pause)
285 |
286 | # Request the Google Search results page.
287 | # html = get_page(url)
288 | try:
289 | html = get_page(url)
290 | except HTTPError:
291 | print('\t[!] Error: Google probably now is blocking our requests.\n [-] Stop Google Search!')
292 | return False
293 |
294 | # Parse the response and process every anchored URL.
295 | if is_bs4:
296 | soup = BeautifulSoup(html, 'html.parser')
297 | else:
298 | soup = BeautifulSoup(html)
299 | anchors = soup.find(id='search').findAll('a')
300 | for a in anchors:
301 |
302 | # Leave only the "standard" results if requested.
303 | # Otherwise grab all possible links.
304 | if only_standard and (
305 | not a.parent or a.parent.name.lower() != "h3"):
306 | continue
307 |
308 | # Get the URL from the anchor tag.
309 | try:
310 | link = a['href']
311 | except KeyError:
312 | continue
313 |
314 | # Filter invalid links and links pointing to Google itself.
315 | link = filter_result(link)
316 | if not link:
317 | continue
318 |
319 | # Discard repeated results.
320 | h = hash(link)
321 | if h in hashes:
322 | continue
323 | hashes.add(h)
324 |
325 | # Yield the result.
326 | yield link
327 |
328 | count += 1
329 | if stop and count >= stop:
330 | return
331 |
332 | # End if there are no more results.
333 | if not soup.find(id='nav'):
334 | break
335 |
336 | # Prepare the URL for the next request.
337 | start += num
338 | if num == 10:
339 | url = url_next_page % vars()
340 | else:
341 | url = url_next_page_num % vars()
342 |
343 |
344 | # Shortcut to search images.
345 | # Beware, this does not return the image link.
346 | def search_images(query, tld='com', lang='en', tbs='0', safe='off', num=10,
347 | start=0, stop=None, pause=2.0, domains=None,
348 | only_standard=False, extra_params={}):
349 | """
350 | Shortcut to search images.
351 |
352 | :note: Beware, this does not return the image link.
353 |
354 | :param str query: Query string. Must NOT be url-encoded.
355 | :param str tld: Top level domain.
356 | :param str lang: Language.
357 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
358 | "qdr:d" => last 24 hours, "qdr:m" => last month).
359 | :param str safe: Safe search.
360 | :param int num: Number of results per page.
361 | :param int start: First result to retrieve.
362 | :param int or None stop: Last result to retrieve.
363 | Use None to keep searching forever.
364 | :param list of str or None domains: A list of web domains to constrain
365 | the search.
366 | :param float pause: Lapse to wait between HTTP requests.
367 | A lapse too long will make the search slow, but a lapse too short may
368 | cause Google to block your IP. Your mileage may vary!
369 | :param bool only_standard: If True, only returns the standard results from
370 | each page. If False, it returns every possible link from each page,
371 | except for those that point back to Google itself. Defaults to False
372 | for backwards compatibility with older versions of this module.
373 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
374 | parameters, which must be URL encoded. For example if you don't want
375 | Google to filter similar results you can set the extra_params to
376 | {'filter': '0'} which will append '&filter=0' to every query.
377 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
378 | Use the following values {videos: 'vid', images: 'isch',
379 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
380 | :param str or None user_agent: User agent for the HTTP requests.
381 | Use None for the default.
382 |
383 | :rtype: generator of str
384 | :return: Generator (iterator) that yields found URLs.
385 | If the stop parameter is None the iterator will loop forever.
386 | """
387 | return search(query, tld, lang, tbs, safe, num, start, stop, domains,
388 | pause, only_standard, extra_params, tpe='isch')
389 |
390 |
391 | # Shortcut to search news.
392 | def search_news(query, tld='com', lang='en', tbs='0', safe='off', num=10,
393 | start=0, stop=None, domains=None, pause=2.0,
394 | only_standard=False, extra_params={}):
395 | """
396 | Shortcut to search news.
397 |
398 | :param str query: Query string. Must NOT be url-encoded.
399 | :param str tld: Top level domain.
400 | :param str lang: Language.
401 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
402 | "qdr:d" => last 24 hours, "qdr:m" => last month).
403 | :param str safe: Safe search.
404 | :param int num: Number of results per page.
405 | :param int start: First result to retrieve.
406 | :param int or None stop: Last result to retrieve.
407 | Use None to keep searching forever.
408 | :param list of str or None domains: A list of web domains to constrain
409 | the search.
410 | :param float pause: Lapse to wait between HTTP requests.
411 | A lapse too long will make the search slow, but a lapse too short may
412 | cause Google to block your IP. Your mileage may vary!
413 | :param bool only_standard: If True, only returns the standard results from
414 | each page. If False, it returns every possible link from each page,
415 | except for those that point back to Google itself. Defaults to False
416 | for backwards compatibility with older versions of this module.
417 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
418 | parameters, which must be URL encoded. For example if you don't want
419 | Google to filter similar results you can set the extra_params to
420 | {'filter': '0'} which will append '&filter=0' to every query.
421 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
422 | Use the following values {videos: 'vid', images: 'isch',
423 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
424 | :param str or None user_agent: User agent for the HTTP requests.
425 | Use None for the default.
426 |
427 | :rtype: generator of str
428 | :return: Generator (iterator) that yields found URLs.
429 | If the stop parameter is None the iterator will loop forever.
430 | """
431 | return search(query, tld, lang, tbs, safe, num, start, stop, domains,
432 | pause, only_standard, extra_params, tpe='nws')
433 |
434 |
435 | # Shortcut to search videos.
436 | def search_videos(query, tld='com', lang='en', tbs='0', safe='off', num=10,
437 | start=0, stop=None, domains=None, pause=2.0,
438 | only_standard=False, extra_params={}):
439 | """
440 | Shortcut to search videos.
441 |
442 | :param str query: Query string. Must NOT be url-encoded.
443 | :param str tld: Top level domain.
444 | :param str lang: Language.
445 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
446 | "qdr:d" => last 24 hours, "qdr:m" => last month).
447 | :param str safe: Safe search.
448 | :param int num: Number of results per page.
449 | :param int start: First result to retrieve.
450 | :param int or None stop: Last result to retrieve.
451 | Use None to keep searching forever.
452 | :param list of str or None domains: A list of web domains to constrain
453 | the search.
454 | :param float pause: Lapse to wait between HTTP requests.
455 | A lapse too long will make the search slow, but a lapse too short may
456 | cause Google to block your IP. Your mileage may vary!
457 | :param bool only_standard: If True, only returns the standard results from
458 | each page. If False, it returns every possible link from each page,
459 | except for those that point back to Google itself. Defaults to False
460 | for backwards compatibility with older versions of this module.
461 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
462 | parameters, which must be URL encoded. For example if you don't want
463 | Google to filter similar results you can set the extra_params to
464 | {'filter': '0'} which will append '&filter=0' to every query.
465 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
466 | Use the following values {videos: 'vid', images: 'isch',
467 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
468 | :param str or None user_agent: User agent for the HTTP requests.
469 | Use None for the default.
470 |
471 | :rtype: generator of str
472 | :return: Generator (iterator) that yields found URLs.
473 | If the stop parameter is None the iterator will loop forever.
474 | """
475 | return search(query, tld, lang, tbs, safe, num, start, stop, domains,
476 | pause, only_standard, extra_params, tpe='vid')
477 |
478 |
479 | # Shortcut to search shop.
480 | def search_shop(query, tld='com', lang='en', tbs='0', safe='off', num=10,
481 | start=0, stop=None, domains=None, pause=2.0,
482 | only_standard=False, extra_params={}):
483 | """
484 | Shortcut to search shop.
485 |
486 | :param str query: Query string. Must NOT be url-encoded.
487 | :param str tld: Top level domain.
488 | :param str lang: Language.
489 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
490 | "qdr:d" => last 24 hours, "qdr:m" => last month).
491 | :param str safe: Safe search.
492 | :param int num: Number of results per page.
493 | :param int start: First result to retrieve.
494 | :param int or None stop: Last result to retrieve.
495 | Use None to keep searching forever.
496 | :param list of str or None domains: A list of web domains to constrain
497 | the search.
498 | :param float pause: Lapse to wait between HTTP requests.
499 | A lapse too long will make the search slow, but a lapse too short may
500 | cause Google to block your IP. Your mileage may vary!
501 | :param bool only_standard: If True, only returns the standard results from
502 | each page. If False, it returns every possible link from each page,
503 | except for those that point back to Google itself. Defaults to False
504 | for backwards compatibility with older versions of this module.
505 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
506 | parameters, which must be URL encoded. For example if you don't want
507 | Google to filter similar results you can set the extra_params to
508 | {'filter': '0'} which will append '&filter=0' to every query.
509 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
510 | Use the following values {videos: 'vid', images: 'isch',
511 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
512 | :param str or None user_agent: User agent for the HTTP requests.
513 | Use None for the default.
514 |
515 | :rtype: generator of str
516 | :return: Generator (iterator) that yields found URLs.
517 | If the stop parameter is None the iterator will loop forever.
518 | """
519 | return search(query, tld, lang, tbs, safe, num, start, stop, domains,
520 | pause, only_standard, extra_params, tpe='shop')
521 |
522 |
523 | # Shortcut to search books.
524 | def search_books(query, tld='com', lang='en', tbs='0', safe='off', num=10,
525 | start=0, stop=None, domains=None, pause=2.0,
526 | only_standard=False, extra_params={}):
527 | """
528 | Shortcut to search books.
529 |
530 | :param str query: Query string. Must NOT be url-encoded.
531 | :param str tld: Top level domain.
532 | :param str lang: Language.
533 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
534 | "qdr:d" => last 24 hours, "qdr:m" => last month).
535 | :param str safe: Safe search.
536 | :param int num: Number of results per page.
537 | :param int start: First result to retrieve.
538 | :param int or None stop: Last result to retrieve.
539 | Use None to keep searching forever.
540 | :param list of str or None domains: A list of web domains to constrain
541 | the search.
542 | :param float pause: Lapse to wait between HTTP requests.
543 | A lapse too long will make the search slow, but a lapse too short may
544 | cause Google to block your IP. Your mileage may vary!
545 | :param bool only_standard: If True, only returns the standard results from
546 | each page. If False, it returns every possible link from each page,
547 | except for those that point back to Google itself. Defaults to False
548 | for backwards compatibility with older versions of this module.
549 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
550 | parameters, which must be URL encoded. For example if you don't want
551 | Google to filter similar results you can set the extra_params to
552 | {'filter': '0'} which will append '&filter=0' to every query.
553 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
554 | Use the following values {videos: 'vid', images: 'isch',
555 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
556 | :param str or None user_agent: User agent for the HTTP requests.
557 | Use None for the default.
558 |
559 | :rtype: generator of str
560 | :return: Generator (iterator) that yields found URLs.
561 | If the stop parameter is None the iterator will loop forever.
562 | """
563 | return search(query, tld, lang, tbs, safe, num, start, stop, domains,
564 | pause, only_standard, extra_params, tpe='bks')
565 |
566 |
567 | # Shortcut to search apps.
568 | def search_apps(query, tld='com', lang='en', tbs='0', safe='off', num=10,
569 | start=0, stop=None, domains=None, pause=2.0,
570 | only_standard=False, extra_params={}):
571 | """
572 | Shortcut to search apps.
573 |
574 | :param str query: Query string. Must NOT be url-encoded.
575 | :param str tld: Top level domain.
576 | :param str lang: Language.
577 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
578 | "qdr:d" => last 24 hours, "qdr:m" => last month).
579 | :param str safe: Safe search.
580 | :param int num: Number of results per page.
581 | :param int start: First result to retrieve.
582 | :param int or None stop: Last result to retrieve.
583 | Use None to keep searching forever.
584 | :param list of str or None domains: A list of web domains to constrain
585 | the search.
586 | :param float pause: Lapse to wait between HTTP requests.
587 | A lapse too long will make the search slow, but a lapse too short may
588 | cause Google to block your IP. Your mileage may vary!
589 | :param bool only_standard: If True, only returns the standard results from
590 | each page. If False, it returns every possible link from each page,
591 | except for those that point back to Google itself. Defaults to False
592 | for backwards compatibility with older versions of this module.
593 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
594 | parameters, which must be URL encoded. For example if you don't want
595 | Google to filter similar results you can set the extra_params to
596 | {'filter': '0'} which will append '&filter=0' to every query.
597 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
598 | Use the following values {videos: 'vid', images: 'isch',
599 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
600 | :param str or None user_agent: User agent for the HTTP requests.
601 | Use None for the default.
602 |
603 | :rtype: generator of str
604 | :return: Generator (iterator) that yields found URLs.
605 | If the stop parameter is None the iterator will loop forever.
606 | """
607 | return search(query, tld, lang, tbs, safe, num, start, stop, domains,
608 | pause, only_standard, extra_params, tpe='app')
609 |
610 |
611 | # Shortcut to single-item search.
612 | # Evaluates the iterator to return the single URL as a string.
613 | def lucky(query, tld='com', lang='en', tbs='0', safe='off',
614 | only_standard=False, extra_params={}, tpe=''):
615 | """
616 | Shortcut to single-item search.
617 |
618 | :param str query: Query string. Must NOT be url-encoded.
619 | :param str tld: Top level domain.
620 | :param str lang: Language.
621 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
622 | "qdr:d" => last 24 hours, "qdr:m" => last month).
623 | :param str safe: Safe search.
624 | :param int num: Number of results per page.
625 | :param int start: First result to retrieve.
626 | :param int or None stop: Last result to retrieve.
627 | Use None to keep searching forever.
628 | :param list of str or None domains: A list of web domains to constrain
629 | the search.
630 | :param float pause: Lapse to wait between HTTP requests.
631 | A lapse too long will make the search slow, but a lapse too short may
632 | cause Google to block your IP. Your mileage may vary!
633 | :param bool only_standard: If True, only returns the standard results from
634 | each page. If False, it returns every possible link from each page,
635 | except for those that point back to Google itself. Defaults to False
636 | for backwards compatibility with older versions of this module.
637 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
638 | parameters, which must be URL encoded. For example if you don't want
639 | Google to filter similar results you can set the extra_params to
640 | {'filter': '0'} which will append '&filter=0' to every query.
641 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
642 | Use the following values {videos: 'vid', images: 'isch',
643 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
644 | :param str or None user_agent: User agent for the HTTP requests.
645 | Use None for the default.
646 |
647 | :rtype: str
648 | :return: URL found by Google.
649 | """
650 | gen = search(query, tld, lang, tbs, safe, 1, 0, 1, 0., only_standard,
651 | extra_params, tpe)
652 | return next(gen)
653 |
654 |
655 | # Returns only the number of Google hits for the given search query.
656 | # This is the number reported by Google itself, NOT by scraping.
657 | def hits(query, tld='com', lang='en', tbs='0', safe='off',
658 | domains=None, extra_params={}, tpe='', user_agent=None):
659 | """
660 | Search the given query string using Google and return the number of hits.
661 |
662 | :note: This is the number reported by Google itself, NOT by scraping.
663 |
664 | :param str query: Query string. Must NOT be url-encoded.
665 | :param str tld: Top level domain.
666 | :param str lang: Language.
667 | :param str tbs: Time limits (i.e "qdr:h" => last hour,
668 | "qdr:d" => last 24 hours, "qdr:m" => last month).
669 | :param str safe: Safe search.
670 | :param int num: Number of results per page.
671 | :param int start: First result to retrieve.
672 | :param int or None stop: Last result to retrieve.
673 | Use None to keep searching forever.
674 | :param list of str or None domains: A list of web domains to constrain
675 | the search.
676 | :param float pause: Lapse to wait between HTTP requests.
677 | A lapse too long will make the search slow, but a lapse too short may
678 | cause Google to block your IP. Your mileage may vary!
679 | :param bool only_standard: If True, only returns the standard results from
680 | each page. If False, it returns every possible link from each page,
681 | except for those that point back to Google itself. Defaults to False
682 | for backwards compatibility with older versions of this module.
683 | :param dict of str to str extra_params: A dictionary of extra HTTP GET
684 | parameters, which must be URL encoded. For example if you don't want
685 | Google to filter similar results you can set the extra_params to
686 | {'filter': '0'} which will append '&filter=0' to every query.
687 | :param str tpe: Search type (images, videos, news, shopping, books, apps)
688 | Use the following values {videos: 'vid', images: 'isch',
689 | news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'}
690 | :param str or None user_agent: User agent for the HTTP requests.
691 | Use None for the default.
692 |
693 | :rtype: int
694 | :return: Number of Google hits for the given search query.
695 | """
696 |
697 | # Prepare domain list if it exists.
698 | if domains:
699 | domain_query = '+OR+'.join('site:' + domain for domain in domains)
700 | domain_query = '+' + domain_query
701 | else:
702 | domain_query = ''
703 |
704 | # Prepare the search string.
705 | query = quote_plus(query + domain_query)
706 |
707 | # Check extra_params for overlapping
708 | for builtin_param in ('hl', 'q', 'btnG', 'tbs', 'safe', 'tbm'):
709 | if builtin_param in extra_params.keys():
710 | raise ValueError(
711 | 'GET parameter "%s" is overlapping with \
712 | the built-in GET parameter',
713 | builtin_param
714 | )
715 |
716 | # Grab the cookie from the home page.
717 | get_page(url_home % vars())
718 |
719 | # Prepare the URL of the first (and in this cases ONLY) request.
720 | url = url_search % vars()
721 |
722 | try: # Is it python<3?
723 | iter_extra_params = extra_params.iteritems()
724 | except AttributeError: # Or python>3?
725 | iter_extra_params = extra_params.items()
726 | # Append extra GET_parameters to URL
727 | for k, v in iter_extra_params:
728 | url += url + ('&%s=%s' % (k, v))
729 |
730 | # Request the Google Search results page.
731 | html = get_page(url)
732 |
733 | # Parse the response.
734 | if is_bs4:
735 | soup = BeautifulSoup(html, 'html.parser')
736 | else:
737 | soup = BeautifulSoup(html)
738 |
739 | # Get the number of hits.
740 | tag = soup.find_all(attrs={"class": "sd", "id": "resultStats"})[0]
741 | hits_text_parts = tag.text.split()
742 | if len(hits_text_parts) < 3:
743 | return 0
744 | return int(hits_text_parts[1].replace(',', '').replace('.', ''))
745 |
746 |
747 | def ngd(term1, term2):
748 | """
749 | Return the Normalized Google distance between words.
750 |
751 | For more info, refer to:
752 | https://en.wikipedia.org/wiki/Normalized_Google_distance
753 |
754 | :param str term1: First term to compare.
755 | :param str term2: Second term to compare.
756 |
757 | :rtype: float
758 | :return: Normalized Google distance between words.
759 | """
760 |
761 | lhits1 = math.log10(hits(term1))
762 | lhits2 = math.log10(hits(term2))
763 | lhits_mix = math.log10(hits('"' + term1 + '" "' + term2 + '"'))
764 | npages = hits('the')
765 | fix = 1000
766 |
767 | lN = math.log10(npages * fix)
768 | numerator = max([lhits1, lhits2]) - lhits_mix
769 | denomin = lN - min([lhits1, lhits2])
770 |
771 | return numerator / denomin
772 |
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD-0.0.29.dist-info/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
--------------------------------------------------------------------------------
/Plugins/InfoSearch/Subdomain/ESD/ESD/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | ESD
6 | ~~~
7 |
8 | Implements enumeration sub domains
9 |
10 | :author: Feei
11 | :homepage: https://github.com/FeeiCN/ESD
12 | :license: GPL, see LICENSE for more details.
13 | :copyright: Copyright (c) 2018 Feei. All rights reserved
14 | """
15 | import os
16 | import re
17 | import time
18 | import ssl
19 | import math
20 | import string
21 | import random
22 | import traceback
23 | import itertools
24 | import datetime
25 | import colorlog
26 | import asyncio
27 | import aiodns
28 | import aiohttp
29 | import logging
30 | import requests
31 | import backoff
32 | import socket
33 | import async_timeout
34 | import dns.query
35 | import dns.zone
36 | import dns.resolver
37 | from tqdm import tqdm
38 | from colorama import Fore
39 | from optparse import OptionParser
40 | from aiohttp.resolver import AsyncResolver
41 | from itertools import islice
42 | from difflib import SequenceMatcher
43 |
44 | __version__ = '0.0.29'
45 |
46 | handler = colorlog.StreamHandler()
47 | formatter = colorlog.ColoredFormatter(
48 | '%(log_color)s%(asctime)s [%(name)s] [%(levelname)s] %(message)s%(reset)s',
49 | datefmt=None,
50 | reset=True,
51 | log_colors={
52 | 'DEBUG': 'cyan',
53 | 'INFO': 'green',
54 | 'WARNING': 'yellow',
55 | 'ERROR': 'red',
56 | 'CRITICAL': 'red,bg_white',
57 | },
58 | secondary_log_colors={},
59 | style='%'
60 | )
61 | handler.setFormatter(formatter)
62 |
63 | logger = colorlog.getLogger('ESD')
64 | logger.addHandler(handler)
65 | logger.setLevel(logging.INFO)
66 |
67 | ssl.match_hostname = lambda cert, hostname: True
68 |
69 |
70 | # 只采用了递归,速度非常慢,在优化完成前不建议开启
71 | # TODO:优化DNS查询,递归太慢了
72 | class DNSQuery(object):
73 | def __init__(self, root_domain, subs, suffix):
74 | # root domain
75 | self.suffix = suffix
76 | self.sub_domains = []
77 | if root_domain:
78 | self.sub_domains.append(root_domain)
79 |
80 | for sub in subs:
81 | sub = ''.join(sub.rsplit(suffix, 1)).rstrip('.')
82 | self.sub_domains.append('{sub}.{domain}'.format(sub=sub, domain=suffix))
83 |
84 | def dns_query(self):
85 | """
86 | soa,txt,mx,aaaa
87 | :param sub:
88 | :return:
89 | """
90 | final_list = []
91 | for subdomain in self.sub_domains:
92 | try:
93 | soa = []
94 | q_soa = dns.resolver.resolve(subdomain, 'SOA')
95 | for a in q_soa:
96 | soa.append(str(a.rname).strip('.'))
97 | soa.append(str(a.mname).strip('.'))
98 | except Exception as e:
99 | logger.warning('Query failed. {e}'.format(e=str(e)))
100 | try:
101 | aaaa = []
102 | q_aaaa = dns.resolver.resolve(subdomain, 'AAAA')
103 | aaaa = [str(a.address).strip('.') for a in q_aaaa]
104 | except Exception as e:
105 | logger.warning('Query failed. {e}'.format(e=str(e)))
106 | try:
107 | txt = []
108 | q_txt = dns.resolver.resolve(subdomain, 'TXT')
109 | txt = [t.strings[0].decode('utf-8').strip('.') for t in q_txt]
110 | except Exception as e:
111 | logger.warning('Query failed. {e}'.format(e=str(e)))
112 | try:
113 | mx = []
114 | q_mx = dns.resolver.resolve(subdomain, 'MX')
115 | mx = [str(m.exchange).strip('.') for m in q_mx]
116 | except Exception as e:
117 | logger.warning('Query failed. {e}'.format(e=str(e)))
118 | domain_set = soa + aaaa + txt + mx
119 | domain_list = [i for i in domain_set]
120 | for p in domain_set:
121 | re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}\.?)$', p)
122 | if len(re_domain) > 0 and subdomain in re_domain[0][0]:
123 | continue
124 | else:
125 | domain_list.remove(p)
126 | final_list = domain_list + final_list
127 | # 递归调用,在子域名的dns记录中查找新的子域名
128 | recursive = []
129 | # print("before: {0}".format(final_list))
130 | # print("self.sub_domain: {0}".format(self.sub_domains))
131 | final_list = list(set(final_list).difference(set(self.sub_domains)))
132 | # print("after: {0}".format(final_list))
133 | if final_list:
134 | d = DNSQuery('', final_list, self.suffix)
135 | recursive = d.dns_query()
136 | return final_list + recursive
137 |
138 |
139 | class DNSTransfer(object):
140 | def __init__(self, domain):
141 | self.domain = domain
142 |
143 | def transfer_info(self):
144 | ret_zones = list()
145 | try:
146 | nss = dns.resolver.resolve(self.domain, 'NS')
147 | nameservers = [str(ns) for ns in nss]
148 | ns_addr = dns.resolver.resolve(nameservers[0], 'A')
149 | # dnspython 的 bug,需要设置 lifetime 参数
150 | zones = dns.zone.from_xfr(dns.query.xfr(ns_addr, self.domain, relativize=False, timeout=2, lifetime=2),
151 | check_origin=False)
152 | names = zones.nodes.keys()
153 | for n in names:
154 | subdomain = ''
155 | for t in range(0, len(n) - 1):
156 | if subdomain != '':
157 | subdomain += '.'
158 | subdomain += str(n[t].decode())
159 | if subdomain != self.domain:
160 | ret_zones.append(subdomain)
161 | return ret_zones
162 | except BaseException:
163 | return []
164 |
165 |
166 | class CAInfo(object):
167 | def __init__(self, domain):
168 | self.domain = domain
169 |
170 | def dns_resolve(self):
171 | padding_domain = 'www.' + self.domain
172 | # loop = asyncio.get_event_loop()
173 | loop = asyncio.new_event_loop()
174 | asyncio.set_event_loop(loop)
175 | resolver = aiodns.DNSResolver(loop=loop)
176 | f = resolver.query(padding_domain, 'A')
177 | result = loop.run_until_complete(f)
178 | return result[0].host
179 |
180 | def get_cert_info_by_ip(self, ip):
181 | s = socket.socket()
182 | s.settimeout(2)
183 | base_dir = os.path.dirname(os.path.abspath(__file__))
184 | cert_path = base_dir + '/cacert.pem'
185 | connect = ssl.wrap_socket(s, cert_reqs=ssl.CERT_REQUIRED, ca_certs=cert_path)
186 | connect.settimeout(2)
187 | connect.connect((ip, 443))
188 | cert_data = connect.getpeercert().get('subjectAltName')
189 | return cert_data
190 |
191 | def get_ca_domain_info(self):
192 | domain_list = list()
193 | try:
194 | ip = self.dns_resolve()
195 | cert_data = self.get_cert_info_by_ip(ip)
196 | except Exception as e:
197 | return domain_list
198 |
199 | for domain_info in cert_data:
200 | hostname = domain_info[1]
201 | if not hostname.startswith('*') and hostname.endswith(self.domain):
202 | domain_list.append(hostname)
203 |
204 | return domain_list
205 |
206 | def get_subdomains(self):
207 | subs = list()
208 | subdomain_list = self.get_ca_domain_info()
209 | for sub in subdomain_list:
210 | subs.append(sub[:len(sub) - len(self.domain) - 1])
211 | return subs
212 |
213 |
214 | class EnumSubDomain(object):
215 | def __init__(self, domain, response_filter=None, dns_servers=None, skip_rsc=False, debug=False,
216 | split=None, proxy={}, multiresolve=False):
217 | self.project_directory = os.path.abspath(os.path.dirname(__file__))
218 | logger.info('Version: {v}'.format(v=__version__))
219 | logger.info('----------')
220 | logger.info('Start domain: {d}'.format(d=domain))
221 | self.proxy = proxy
222 | self.data = {}
223 | self.domain = domain
224 | self.skip_rsc = skip_rsc
225 | self.split = split
226 | self.multiresolve = multiresolve
227 | self.stable_dns_servers = ['119.29.29.29']
228 | if dns_servers is None:
229 | # 除了DNSPod外,其它的都不适合作为稳定的DNS
230 | # 要么并发会显著下降,要么就完全不能用
231 | dns_servers = [
232 | # DNS对结果准确性影响非常大,部分DNS结果会和其它DNS结果不一致甚至没结果
233 | # '223.5.5.5', # AliDNS
234 | # '114.114.114.114', # 114DNS
235 | # '1.1.1.1', # Cloudflare
236 | '119.29.29.29', # DNSPod https://www.dnspod.cn/products/public.dns
237 | # '180.76.76.76', # BaiduDNS
238 | # '1.2.4.8', # sDNS
239 | # '11.1.1.1' # test DNS, not available
240 | # '8.8.8.8', # Google DNS, 延时太高了
241 | ]
242 |
243 | random.shuffle(dns_servers)
244 | self.dns_servers = dns_servers
245 | self.resolver = None
246 | self.loop = asyncio.get_event_loop()
247 | self.general_dicts = []
248 | # Mark whether the current domain name is a pan-resolved domain name
249 | self.is_wildcard_domain = False
250 | # Use a nonexistent domain name to determine whether
251 | # there is a pan-resolve based on the DNS resolution result
252 | self.wildcard_sub = 'feei-esd-{random}'.format(random=random.randint(0, 9999))
253 | self.wildcard_sub3 = 'feei-esd-{random}.{random}'.format(random=random.randint(0, 9999))
254 | # There is no domain name DNS resolution IP
255 | self.wildcard_ips = []
256 | # No domain name response HTML
257 | self.wildcard_html = None
258 | self.wildcard_html_len = 0
259 | self.wildcard_html3 = None
260 | self.wildcard_html3_len = 0
261 | # Subdomains that are consistent with IPs that do not have domain names
262 | self.wildcard_subs = []
263 | # Wildcard domains use RSC
264 | self.wildcard_domains = {}
265 | # Corotines count
266 | self.coroutine_count = None
267 | # 并发太高DNS Server的错误会大幅增加
268 | self.coroutine_count_dns = 1000
269 | self.coroutine_count_request = 100
270 | # dnsaio resolve timeout
271 | self.resolve_timeout = 3
272 | # RSC ratio
273 | self.rsc_ratio = 0.8
274 | self.remainder = 0
275 | self.count = 0
276 | # Request Header
277 | self.request_headers = {
278 | 'Connection': 'keep-alive',
279 | 'Pragma': 'no-cache',
280 | 'Cache-Control': 'no-cache',
281 | 'Upgrade-Insecure-Requests': '1',
282 | 'User-Agent': 'Baiduspider',
283 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
284 | 'DNT': '1',
285 | 'Referer': 'http://www.baidu.com/',
286 | 'Accept-Encoding': 'gzip, deflate',
287 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'}
288 | # Filter the domain's response(regex)
289 | self.response_filter = response_filter
290 | # debug mode
291 | self.debug = debug
292 | if self.debug:
293 | logger.setLevel(logging.DEBUG)
294 | # collect redirecting domains and response domains
295 | self.domains_rs = []
296 | self.domains_rs_processed = []
297 | self.dns_query_errors = 0
298 |
299 | def generate_general_dicts(self, line):
300 | """
301 | Generate general subdomains dicts
302 | :param line:
303 | :return:
304 | """
305 | letter_count = line.count('{letter}')
306 | number_count = line.count('{number}')
307 | letters = itertools.product(string.ascii_lowercase, repeat=letter_count)
308 | letters = [''.join(l) for l in letters]
309 | numbers = itertools.product(string.digits, repeat=number_count)
310 | numbers = [''.join(n) for n in numbers]
311 | for l in letters:
312 | iter_line = line.replace('{letter}' * letter_count, l)
313 | self.general_dicts.append(iter_line)
314 | number_dicts = []
315 | for gd in self.general_dicts:
316 | for n in numbers:
317 | iter_line = gd.replace('{number}' * number_count, n)
318 | number_dicts.append(iter_line)
319 | if len(number_dicts) > 0:
320 | return number_dicts
321 | else:
322 | return self.general_dicts
323 |
324 | def load_sub_domain_dict(self):
325 | """
326 | Load subdomains from files and dicts
327 | :return:
328 | """
329 | dicts = []
330 | if self.debug:
331 | path = '{pd}/subs-test.esd'.format(pd=self.project_directory)
332 | else:
333 | path = '{pd}/subs.esd'.format(pd=self.project_directory)
334 | with open(path, encoding='utf-8') as f:
335 | for line in f:
336 | line = line.strip().lower()
337 | # skip comments and space
338 | if '#' in line or line == '':
339 | continue
340 | if '{letter}' in line or '{number}' in line:
341 | self.general_dicts = []
342 | dicts_general = self.generate_general_dicts(line)
343 | dicts += dicts_general
344 | else:
345 | # compatibility other dicts
346 | line = line.strip('.')
347 | dicts.append(line)
348 | dicts = list(set(dicts))
349 |
350 | # split dict
351 | if self.split is not None:
352 | s = self.split.split('/')
353 | dicts_choose = int(s[0])
354 | dicts_count = int(s[1])
355 | dicts_every = int(math.ceil(len(dicts) / dicts_count))
356 | dicts = [dicts[i:i + dicts_every] for i in range(0, len(dicts), dicts_every)][dicts_choose - 1]
357 | logger.info(
358 | 'Sub domain dict split {count} and get {choose}st'.format(count=dicts_count, choose=dicts_choose))
359 |
360 | # root domain
361 | dicts.append('@')
362 |
363 | return dicts
364 |
365 | async def query(self, sub):
366 | """
367 | Query domain
368 | :param sub:
369 | :return:
370 | """
371 | ret = None
372 | # root domain
373 | if sub == '@' or sub == '':
374 | sub_domain = self.domain
375 | else:
376 | sub = ''.join(sub.rsplit(self.domain, 1)).rstrip('.')
377 | sub_domain = '{sub}.{domain}'.format(sub=sub, domain=self.domain)
378 | # 如果存在特定异常则进行重试
379 | for i in range(4):
380 | try:
381 | ret = await self.resolver.query(sub_domain, 'A')
382 | except aiodns.error.DNSError as e:
383 | err_code, err_msg = e.args[0], e.args[1]
384 | # 域名确实不存在
385 | # 4: Domain name not found
386 | # 1: DNS server returned answer with no data
387 | # 其它情况都需要重试,否则存在很高的遗漏
388 | # 11: Could not contact DNS servers
389 | # 12: Timeout while contacting DNS servers
390 | if err_code not in [1, 4]:
391 | if i == 2:
392 | logger.warning(f'Try {i + 1} times, but failed. {sub_domain} {e}')
393 | self.dns_query_errors = self.dns_query_errors + 1
394 | continue
395 | except Exception as e:
396 | logger.info(sub_domain)
397 | logger.warning(traceback.format_exc())
398 | else:
399 | ret = [r.host for r in ret]
400 | domain_ips = [s for s in ret]
401 | # It is a wildcard domain name and
402 | # the subdomain IP that is burst is consistent with the IP
403 | # that does not exist in the domain name resolution,
404 | # the response similarity is discarded for further processing.
405 | if self.is_wildcard_domain and (
406 | sorted(self.wildcard_ips) == sorted(domain_ips) or set(domain_ips).issubset(
407 | self.wildcard_ips)):
408 | if self.skip_rsc:
409 | logger.debug(
410 | '{sub} maybe wildcard subdomain, but it is --skip-rsc mode now, it will be drop this subdomain in results'.format(
411 | sub=sub_domain))
412 | else:
413 | logger.debug(
414 | '{r} maybe wildcard domain, continue RSC {sub}'.format(r=self.remainder, sub=sub_domain,
415 | ips=domain_ips))
416 | else:
417 | if sub != self.wildcard_sub:
418 | self.data[sub_domain] = sorted(domain_ips)
419 | print('', end='\n')
420 | self.count += 1
421 | logger.info('{r} {sub} {ips}'.format(r=self.remainder, sub=sub_domain, ips=domain_ips))
422 | break
423 | self.remainder += -1
424 | return sub_domain, ret
425 |
426 | @staticmethod
427 | def limited_concurrency_coroutines(coros, limit):
428 | futures = [
429 | asyncio.ensure_future(c)
430 | for c in islice(coros, 0, limit)
431 | ]
432 |
433 | async def first_to_finish():
434 | while True:
435 | await asyncio.sleep(0)
436 | for f in futures:
437 | if f.done():
438 | futures.remove(f)
439 | try:
440 | nf = next(coros)
441 | futures.append(asyncio.ensure_future(nf))
442 | except StopIteration:
443 | pass
444 | return f.result()
445 |
446 | while len(futures) > 0:
447 | yield first_to_finish()
448 |
449 | async def start(self, tasks, tasks_num):
450 | """
451 | Limit the number of coroutines for reduce memory footprint
452 | :param tasks:
453 | :return:
454 | """
455 | for res in tqdm(self.limited_concurrency_coroutines(tasks, self.coroutine_count),
456 | bar_format="%s{l_bar}%s{bar}%s{r_bar}%s" % (Fore.YELLOW, Fore.YELLOW, Fore.YELLOW, Fore.RESET),
457 | total=tasks_num):
458 | await res
459 |
460 | @staticmethod
461 | def data_clean(data):
462 | try:
463 | html = re.sub(r'\s', '', data)
464 | html = re.sub(r'', '', html)
465 | return html
466 | except BaseException:
467 | return data
468 |
469 | @staticmethod
470 | @backoff.on_exception(backoff.expo, TimeoutError, max_tries=3)
471 | async def fetch(session, url):
472 | """
473 | Fetch url response with session
474 | :param session:
475 | :param url:
476 | :return:
477 | """
478 | try:
479 | async with async_timeout.timeout(20):
480 | async with session.get(url) as response:
481 | return await response.text(), response.history
482 | except Exception as e:
483 | # TODO 当在随机DNS场景中只做响应相似度比对的话,如果域名没有Web服务会导致相似度比对失败从而丢弃
484 | logger.warning('fetch exception: {e} {u}'.format(e=type(e).__name__, u=url))
485 | return None, None
486 |
487 | async def similarity(self, sub):
488 | """
489 | Enumerate subdomains by responding to similarities
490 | :param sub:
491 | :return:
492 | """
493 | # root domain
494 | if sub == '@' or sub == '':
495 | sub_domain = self.domain
496 | else:
497 | sub = ''.join(sub.rsplit(self.domain, 1)).rstrip('.')
498 | sub_domain = '{sub}.{domain}'.format(sub=sub, domain=self.domain)
499 |
500 | if sub_domain in self.domains_rs:
501 | self.domains_rs.remove(sub_domain)
502 | full_domain = 'http://{sub_domain}'.format(sub_domain=sub_domain)
503 | # 如果跳转中的域名是以下情况则不加入下一轮RSC
504 | skip_domain_with_history = [
505 | # 跳到主域名了
506 | '{domain}'.format(domain=self.domain),
507 | 'www.{domain}'.format(domain=self.domain),
508 | # 跳到自己本身了,比如HTTP跳HTTPS
509 | '{domain}'.format(domain=sub_domain),
510 | ]
511 | try:
512 | regex_domain = r"((?!\/)(?:(?:[a-z\d-]*\.)+{d}))".format(d=self.domain)
513 | resolver = AsyncResolver(nameservers=self.dns_servers)
514 | conn = aiohttp.TCPConnector(resolver=resolver)
515 | async with aiohttp.ClientSession(connector=conn, headers=self.request_headers) as session:
516 | html, history = await self.fetch(session, full_domain)
517 | html = self.data_clean(html)
518 | if history is not None and len(history) > 0:
519 | location = str(history[-1].headers['location'])
520 | if '.' in location:
521 | location_split = location.split('/')
522 | if len(location_split) > 2:
523 | location = location_split[2]
524 | else:
525 | location = location
526 | try:
527 | location = re.match(regex_domain, location).group(0)
528 | except AttributeError:
529 | location = location
530 | status = history[-1].status
531 | if location in skip_domain_with_history and len(history) >= 2:
532 | logger.debug('domain in skip: {s} {r} {l}'.format(s=sub_domain, r=status, l=location))
533 | return
534 | else:
535 | # cnsuning.com suning.com
536 | if location[-len(self.domain) - 1:] == '.{d}'.format(d=self.domain):
537 | # collect redirecting's domains
538 | if sub_domain != location and location not in self.domains_rs and location not in self.domains_rs_processed:
539 | print('', end='\n')
540 | logger.info(
541 | '[{sd}] add redirect domain: {l}({len})'.format(sd=sub_domain, l=location,
542 | len=len(self.domains_rs)))
543 | self.domains_rs.append(location)
544 | self.domains_rs_processed.append(location)
545 | else:
546 | print('', end='\n')
547 | logger.info('not same domain: {l}'.format(l=location))
548 | else:
549 | print('', end='\n')
550 | logger.info('not domain(maybe path): {l}'.format(l=location))
551 | if html is None:
552 | print('', end='\n')
553 | logger.warning('domain\'s html is none: {s}'.format(s=sub_domain))
554 | return
555 | # collect response html's domains
556 | response_domains = re.findall(regex_domain, html)
557 | response_domains = list(set(response_domains) - set([sub_domain]))
558 | for rd in response_domains:
559 | rd = rd.strip().strip('.')
560 | if rd.count('.') >= sub_domain.count('.') and rd[-len(sub_domain):] == sub_domain:
561 | continue
562 | if rd not in self.domains_rs:
563 | if rd not in self.domains_rs_processed:
564 | print('', end='\n')
565 | logger.info('[{sd}] add response domain: {s}({l})'.format(sd=sub_domain, s=rd,
566 | l=len(self.domains_rs)))
567 | self.domains_rs.append(rd)
568 | self.domains_rs_processed.append(rd)
569 |
570 | if len(html) == self.wildcard_html_len:
571 | ratio = 1
572 | else:
573 | # SPEED 4 2 1, but here is still the bottleneck
574 | # real_quick_ratio() > quick_ratio() > ratio()
575 | # TODO bottleneck
576 | if sub.count('.') == 0: # secondary sub, ex: www
577 | ratio = SequenceMatcher(None, html, self.wildcard_html).real_quick_ratio()
578 | ratio = round(ratio, 3)
579 | else: # tertiary sub, ex: home.dev
580 | ratio = SequenceMatcher(None, html, self.wildcard_html3).real_quick_ratio()
581 | ratio = round(ratio, 3)
582 | self.remainder += -1
583 | if ratio > self.rsc_ratio:
584 | # passed
585 | logger.debug(
586 | '{r} RSC ratio: {ratio} (passed) {sub}'.format(r=self.remainder, sub=sub_domain, ratio=ratio))
587 | else:
588 | # added
589 | # for def distinct func
590 | # self.wildcard_domains[sub_domain] = html
591 | if self.response_filter is not None:
592 | for resp_filter in self.response_filter.split(','):
593 | if resp_filter in html:
594 | logger.debug('{r} RSC filter in response (passed) {sub}'.format(r=self.remainder,
595 | sub=sub_domain))
596 | return
597 | else:
598 | continue
599 | self.data[sub_domain] = self.wildcard_ips
600 | else:
601 | self.data[sub_domain] = self.wildcard_ips
602 | print('', end='\n')
603 | logger.info(
604 | '{r} RSC ratio: {ratio} (added) {sub}'.format(r=self.remainder, sub=sub_domain, ratio=ratio))
605 | except Exception as e:
606 | logger.debug(traceback.format_exc())
607 | return
608 |
609 | def distinct(self):
610 | for domain, html in self.wildcard_domains.items():
611 | for domain2, html2 in self.wildcard_domains.items():
612 | ratio = SequenceMatcher(None, html, html2).real_quick_ratio()
613 | if ratio > self.rsc_ratio:
614 | # remove this domain
615 | if domain2 in self.data:
616 | del self.data[domain2]
617 | m = 'Remove'
618 | else:
619 | m = 'Stay'
620 | logger.info('{d} : {d2} {ratio} {m}'.format(d=domain, d2=domain2, ratio=ratio, m=m))
621 |
622 | def check(self, dns):
623 | logger.info("Checking if DNS server {dns} is available".format(dns=dns))
624 | msg = b'\x5c\x6d\x01\x00\x00\x01\x00\x00\x00\x00\x00\x00\x03www\x05baidu\x03com\x00\x00\x01\x00\x01'
625 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
626 | sock.settimeout(3)
627 | repeat = {
628 | 1: 'first',
629 | 2: 'second',
630 | 3: 'third'
631 | }
632 | for i in range(3):
633 | logger.info("Sending message to DNS server a {times} time".format(times=repeat[i + 1]))
634 | sock.sendto(msg, (dns, 53))
635 | try:
636 | sock.recv(4096)
637 | break
638 | except socket.timeout as e:
639 | logger.warning('check dns server timeout Failed!')
640 | if i == 2:
641 | return False
642 | return True
643 |
644 | def run(self):
645 | """
646 | Run
647 | :return:
648 | """
649 | start_time = time.time()
650 | subs = self.load_sub_domain_dict()
651 | logger.info('Sub domain dict count: {c}'.format(c=len(subs)))
652 | logger.info('Generate coroutines...')
653 | # Verify that all DNS server results are consistent
654 | stable_dns = []
655 | wildcard_ips = None
656 | last_dns = []
657 | only_similarity = False
658 | for dns in self.dns_servers:
659 | delay = self.check(dns)
660 | if not delay:
661 | logger.warning("@{dns} is not available, skip this DNS server".format(dns=dns))
662 | continue
663 | self.resolver = aiodns.DNSResolver(loop=self.loop, nameservers=[dns], timeout=self.resolve_timeout)
664 | job = self.query(self.wildcard_sub)
665 | sub, ret = self.loop.run_until_complete(job)
666 | logger.info('@{dns} {sub} {ips}'.format(dns=dns, sub=sub, ips=ret))
667 | if ret is None:
668 | ret = None
669 | else:
670 | ret = sorted(ret)
671 |
672 | if dns in self.stable_dns_servers:
673 | wildcard_ips = ret
674 | stable_dns.append(ret)
675 |
676 | if ret:
677 | equal = [False for r in ret if r not in last_dns]
678 | if len(last_dns) != 0 and False in equal:
679 | only_similarity = self.is_wildcard_domain = True
680 | logger.info('Is a random resolve subdomain.')
681 | break
682 | else:
683 | last_dns = ret
684 |
685 | is_all_stable_dns = stable_dns.count(stable_dns[0]) == len(stable_dns)
686 | if not is_all_stable_dns:
687 | logger.info('Is all stable dns: NO, use the default dns server')
688 | self.resolver = aiodns.DNSResolver(loop=self.loop, nameservers=self.stable_dns_servers,
689 | timeout=self.resolve_timeout)
690 | # Wildcard domain
691 | is_wildcard_domain = not (stable_dns.count(None) == len(stable_dns))
692 | if is_wildcard_domain or self.is_wildcard_domain:
693 | if not self.skip_rsc:
694 | logger.info('This is a wildcard domain, will enumeration subdomains use by DNS+RSC.')
695 | else:
696 | logger.info(
697 | 'This is a wildcard domain, but it is --skip-rsc mode now, it will be drop all random resolve subdomains in results')
698 | self.is_wildcard_domain = True
699 | if wildcard_ips is not None:
700 | self.wildcard_ips = wildcard_ips
701 | else:
702 | self.wildcard_ips = stable_dns[0]
703 | logger.info('Wildcard IPS: {ips}'.format(ips=self.wildcard_ips))
704 | if not self.skip_rsc:
705 | try:
706 | self.wildcard_html = requests.get(
707 | 'http://{w_sub}.{domain}'.format(w_sub=self.wildcard_sub, domain=self.domain),
708 | headers=self.request_headers, timeout=10, verify=False).text
709 | self.wildcard_html = self.data_clean(self.wildcard_html)
710 | self.wildcard_html_len = len(self.wildcard_html)
711 | self.wildcard_html3 = requests.get(
712 | 'http://{w_sub}.{domain}'.format(w_sub=self.wildcard_sub3, domain=self.domain),
713 | headers=self.request_headers, timeout=10, verify=False).text
714 | self.wildcard_html3 = self.data_clean(self.wildcard_html3)
715 | self.wildcard_html3_len = len(self.wildcard_html3)
716 | logger.info(
717 | 'Wildcard domain response html length: {len} 3length: {len2}'.format(len=self.wildcard_html_len,
718 | len2=self.wildcard_html3_len))
719 | except requests.exceptions.SSLError:
720 | logger.warning('SSL Certificate Error!')
721 | except requests.exceptions.ConnectTimeout:
722 | logger.warning('Request response content failed, check network please!')
723 | except requests.exceptions.ReadTimeout:
724 | self.wildcard_html = self.wildcard_html3 = ''
725 | self.wildcard_html_len = self.wildcard_html3_len = 0
726 | logger.warning(
727 | 'Request response content timeout, {w_sub}.{domain} and {w_sub3}.{domain} maybe not a http service, content will be set to blank!'.format(
728 | w_sub=self.wildcard_sub,
729 | domain=self.domain,
730 | w_sub3=self.wildcard_sub3))
731 | except requests.exceptions.ConnectionError:
732 | logger.error('ESD can\'t get the response text so the rsc will be skipped. ')
733 | self.skip_rsc = True
734 | else:
735 | logger.info('Not a wildcard domain')
736 |
737 | if not only_similarity:
738 | self.coroutine_count = self.coroutine_count_dns
739 | tasks = (self.query(sub) for sub in subs)
740 | self.loop.run_until_complete(self.start(tasks, len(subs)))
741 | logger.info("Brute Force subdomain count: {total}".format(total=self.count))
742 | dns_time = time.time()
743 | time_consume_dns = int(dns_time - start_time)
744 | logger.info(f'DNS query errors: {self.dns_query_errors}')
745 |
746 | # CA subdomain info
747 | ca_subdomains = []
748 | logger.info('Collect subdomains in CA...')
749 | ca_subdomains = CAInfo(self.domain).get_subdomains()
750 | if len(ca_subdomains):
751 | tasks = (self.query(sub) for sub in ca_subdomains)
752 | self.loop.run_until_complete(self.start(tasks, len(ca_subdomains)))
753 | logger.info('CA subdomain count: {c}'.format(c=len(ca_subdomains)))
754 |
755 | # DNS Transfer Vulnerability
756 | transfer_info = []
757 | logger.info('Check DNS Transfer Vulnerability in {domain}'.format(domain=self.domain))
758 | transfer_info = DNSTransfer(self.domain).transfer_info()
759 | if len(transfer_info):
760 | logger.warning('DNS Transfer Vulnerability found in {domain}!'.format(domain=self.domain))
761 | tasks = (self.query(sub) for sub in transfer_info)
762 | self.loop.run_until_complete(self.start(tasks, len(transfer_info)))
763 | logger.info('DNS Transfer subdomain count: {c}'.format(c=len(transfer_info)))
764 |
765 | total_subs = set(subs + transfer_info + ca_subdomains)
766 |
767 | # Use TXT,SOA,MX,AAAA record to find sub domains
768 | if self.multiresolve:
769 | logger.info('Enumerating subdomains with TXT, SOA, MX, AAAA record...')
770 | dnsquery = DNSQuery(self.domain, total_subs, self.domain)
771 | record_info = dnsquery.dns_query()
772 | tasks = (self.query(record[:record.find('.')]) for record in record_info)
773 | self.loop.run_until_complete(self.start(tasks, len(record_info)))
774 | logger.info('DNS record subdomain count: {c}'.format(c=len(record_info)))
775 |
776 | if self.is_wildcard_domain and not self.skip_rsc:
777 | # Response similarity comparison
778 | total_subs = set(subs + transfer_info + ca_subdomains)
779 | self.wildcard_subs = list(set(subs).union(total_subs))
780 | logger.info('Enumerates {len} sub domains by DNS mode in {tcd}.'.format(len=len(self.data), tcd=str(
781 | datetime.timedelta(seconds=time_consume_dns))))
782 | logger.info(
783 | 'Will continue to test the distinct({len_subs}-{len_exist})={len_remain} domains used by RSC, the speed will be affected.'.format(
784 | len_subs=len(subs), len_exist=len(self.data),
785 | len_remain=len(self.wildcard_subs)))
786 | self.coroutine_count = self.coroutine_count_request
787 | self.remainder = len(self.wildcard_subs)
788 | tasks = (self.similarity(sub) for sub in self.wildcard_subs)
789 | self.loop.run_until_complete(self.start(tasks, len(self.wildcard_subs)))
790 |
791 | # Distinct last domains use RSC
792 | # Maybe misinformation
793 | # self.distinct()
794 |
795 | time_consume_request = int(time.time() - dns_time)
796 | logger.info('Requests time consume {tcr}'.format(tcr=str(datetime.timedelta(seconds=time_consume_request))))
797 | # RS(redirect/response) domains
798 | while len(self.domains_rs) != 0:
799 | logger.info('RS(redirect/response) domains({l})...'.format(l=len(self.domains_rs)))
800 | tasks = (self.similarity(''.join(domain.rsplit(self.domain, 1)).rstrip('.')) for domain in self.domains_rs)
801 |
802 | self.loop.run_until_complete(self.start(tasks, len(self.domains_rs)))
803 |
804 | # write output
805 | # tmp_dir = '/tmp/esd'
806 | # if not os.path.isdir(tmp_dir):
807 | # os.mkdir(tmp_dir, 0o777)
808 | # output_path_with_time = '{td}/.{domain}_{time}.esd'.format(td=tmp_dir, domain=self.domain,
809 | # time=datetime.datetime.now().strftime(
810 | # "%Y-%m_%d_%H-%M"))
811 | # output_path = '{td}/.{domain}.esd'.format(td=tmp_dir, domain=self.domain)
812 | # if len(self.data):
813 | # max_domain_len = max(map(len, self.data)) + 2
814 | # else:
815 | # max_domain_len = 2
816 | # output_format = '%-{0}s%-s\n'.format(max_domain_len)
817 | # with open(output_path_with_time, 'w') as opt, open(output_path, 'w') as op:
818 | # for domain, ips in self.data.items():
819 | # # The format is consistent with other scanners to ensure that they are
820 | # # invoked at the same time without increasing the cost of
821 | # # resolution
822 | # if ips is None or len(ips) == 0:
823 | # ips_split = ''
824 | # else:
825 | # ips_split = ','.join(ips)
826 | # con = output_format % (domain, ips_split)
827 | # op.write(con)
828 | # opt.write(con)
829 |
830 | # 自修改
831 | subdomains = []
832 | for domain, ips in self.data.items():
833 | # The format is consistent with other scanners to ensure that they are
834 | # invoked at the same time without increasing the cost of
835 | # resolution
836 | if ips is None or len(ips) == 0:
837 | ips_split = ''
838 | else:
839 | ips_split = ','.join(ips)
840 | subdomains += [(domain, ips_split)]
841 |
842 |
843 | # logger.info('Output: {op}'.format(op=output_path))
844 | # logger.info('Output with time: {op}'.format(op=output_path_with_time))
845 | logger.info('Total domain: {td}'.format(td=len(self.data)))
846 | time_consume = int(time.time() - start_time)
847 | logger.info('Time consume: {tc}'.format(tc=str(datetime.timedelta(seconds=time_consume))))
848 | return subdomains
849 |
850 |
851 | def banner():
852 | print("""\033[94m
853 | ______ _____ _____
854 | | ____| / ____| | __ \
855 | | |__ | (___ | | | |
856 | | __| \___ \ | | | |
857 | | |____ ____) | | |__| |
858 | |______| |_____/ |_____/\033[0m\033[93m
859 | Enumeration Sub Domains v%s\033[92m
860 | """ % __version__)
861 |
862 |
863 | def main():
864 | banner()
865 | parser = OptionParser(
866 | 'Usage: esd -d feei.cn -F response_filter -p user:pass@host:port')
867 | parser.add_option('-d', '--domain', dest='domains', help='The domains that you want to enumerate')
868 | parser.add_option('-f', '--file', dest='input', help='Import domains from this file')
869 | parser.add_option('-F', '--filter', dest='filter', help='Response filter')
870 | parser.add_option('-s', '--skip-rsc', dest='skiprsc', help='Skip response similary compare', action='store_true',
871 | default=False)
872 | parser.add_option('-S', '--split', dest='split', help='Split the dict into several parts', default='1/1')
873 | parser.add_option('-p', '--proxy', dest='proxy', help='Use socks5 proxy to access Google and Yahoo')
874 | parser.add_option('-m', '--multi-resolve', dest='multiresolve',
875 | help='Use TXT, AAAA, MX, SOA record to find subdomains', action='store_true', default=False)
876 | (options, args) = parser.parse_args()
877 |
878 | domains = []
879 | response_filter = options.filter
880 | skip_rsc = options.skiprsc
881 | split_list = options.split.split('/')
882 | split = options.split
883 | multiresolve = options.multiresolve
884 |
885 | try:
886 | if len(split_list) != 2 or int(split_list[0]) > int(split_list[1]):
887 | logger.error('Invaild split parameter,can not split the dict')
888 | split = None
889 | except:
890 | logger.error('Split validation failed: {d}'.format(d=split_list))
891 | exit(0)
892 |
893 | if options.proxy:
894 | proxy = {
895 | 'http': 'socks5h://%s' % options.proxy,
896 | 'https': 'socks5h://%s' % options.proxy
897 | }
898 | else:
899 | proxy = {}
900 |
901 | if options.domains is not None:
902 | for p in options.domains.split(','):
903 | p = p.strip().lower()
904 | re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,})$', p)
905 | if len(re_domain) > 0 and re_domain[0][0] == p:
906 | domains.append(p.strip())
907 | else:
908 | logger.error('Domain validation failed: {d}'.format(d=p))
909 | elif options.input and os.path.isfile(options.input):
910 | with open(options.input) as fh:
911 | for line_domain in fh:
912 | line_domain = line_domain.strip().lower()
913 | re_domain = re.findall(r'^(([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,})$', line_domain)
914 | if len(re_domain) > 0 and re_domain[0][0] == line_domain:
915 | domains.append(line_domain)
916 | else:
917 | logger.error('Domain validation failed: {d}'.format(d=line_domain))
918 | else:
919 | logger.error('Please input vaild parameter. ie: "esd -d feei.cn" or "esd -f /Users/root/domains.txt"')
920 |
921 | if 'esd' in os.environ:
922 | debug = os.environ['esd']
923 | else:
924 | debug = False
925 | logger.info('Debug: {d}'.format(d=debug))
926 | logger.info('--skip-rsc: {rsc}'.format(rsc=skip_rsc))
927 |
928 | logger.info('Total target domains: {ttd}'.format(ttd=len(domains)))
929 | try:
930 | for d in domains:
931 | esd = EnumSubDomain(d, response_filter, skip_rsc=skip_rsc, debug=debug, split=split,
932 | proxy=proxy,
933 | multiresolve=multiresolve)
934 | esd.run()
935 | except KeyboardInterrupt:
936 | print('', end='\n')
937 | logger.info('Bye :)')
938 | exit(0)
939 |
940 |
941 | if __name__ == '__main__':
942 | main()
943 |
--------------------------------------------------------------------------------