├── tests
    ├── __init__.py
    ├── discovery
    │   ├── __init__.py
    │   ├── test_otx.py
    │   ├── test_certspotter.py
    │   ├── test_linkedin_links.py
    │   ├── test_linkedin_links.txt
    │   └── test_githubcode.py
    ├── test_myparser.py
    └── test_theHarvester.py
├── theHarvester
    ├── lib
    │   ├── port_scanner.py
    │   ├── __init__.py
    │   ├── hostchecker.py
    │   ├── reportgraph.py
    │   ├── statichtmlgenerator.py
    │   └── stash.py
    ├── parsers
    │   ├── __init__.py
    │   ├── intelxparser.py
    │   ├── securitytrailsparser.py
    │   └── myparser.py
    ├── __init__.py
    ├── discovery
    │   ├── __init__.py
    │   ├── crtsh.py
    │   ├── certspottersearch.py
    │   ├── virustotal.py
    │   ├── threatcrowd.py
    │   ├── spyse.py
    │   ├── port_scanner.py
    │   ├── baidusearch.py
    │   ├── huntersearch.py
    │   ├── dogpilesearch.py
    │   ├── otxsearch.py
    │   ├── dnssearch.py
    │   ├── dnsdumpster.py
    │   ├── twittersearch.py
    │   ├── yahoosearch.py
    │   ├── shodansearch.py
    │   ├── suip.py
    │   ├── linkedinsearch.py
    │   ├── securitytrailssearch.py
    │   ├── intelxsearch.py
    │   ├── netcraft.py
    │   ├── trello.py
    │   ├── takeover.py
    │   ├── exaleadsearch.py
    │   ├── duckduckgosearch.py
    │   ├── bingsearch.py
    │   ├── constants.py
    │   ├── githubcode.py
    │   └── googlesearch.py
    └── __main__.py
├── setup.cfg
├── theHarvester-logo.png
├── wordlists
    ├── general
    │   └── common.txt
    ├── dorks.txt
    └── dns-names.txt
├── mypy.ini
├── .lgtm.yml
├── .gitignore
├── Dockerfile
├── requirements.txt
├── .github
    ├── workflows
    │   ├── dockerci.yml
    │   └── theHarvester.yml
    ├── FUNDING.yml
    └── ISSUE_TEMPLATE
    │   └── issue-template.md
├── api-keys.yaml
├── theHarvester.py
├── Pipfile
├── LICENSES
├── .gitattributes
├── CONTRIBUTING.md
├── setup.py
├── .travis.yml
├── README.md
├── COPYING
└── Pipfile.lock


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/discovery/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/theHarvester/lib/port_scanner.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/theHarvester/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E501, F405, F403, E402


--------------------------------------------------------------------------------
/theHarvester/lib/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['hostchecker']
2 | 


--------------------------------------------------------------------------------
/theHarvester-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hakluke/theHarvester/HEAD/theHarvester-logo.png


--------------------------------------------------------------------------------
/wordlists/general/common.txt:
--------------------------------------------------------------------------------
1 | admin
2 | test
3 | hello
4 | uk
5 | login
6 | book
7 | robots.txt
8 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
3 | show_traceback = True
4 | show_error_codes = True


--------------------------------------------------------------------------------
/theHarvester/__init__.py:
--------------------------------------------------------------------------------
1 | from gevent import monkey as curious_george
2 | curious_george.patch_all(thread=False, select=False)
3 | 


--------------------------------------------------------------------------------
/.lgtm.yml:
--------------------------------------------------------------------------------
1 | queries:
2 |   - exclude: py/import-and-import-from
3 |   - exclude: py/polluting-import
4 | 
5 | extraction:
6 |   python:
7 |     python_setup:
8 |       version: 3


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.idea
 2 | *.pyc
 3 | *.sqlite
 4 | *.html
 5 | *.vscode
 6 | *.xml
 7 | debug_results.txt
 8 | venv
 9 | .mypy_cache
10 | .pytest_cache
11 | build/
12 | dist/
13 | theHarvester.egg-info/


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM kalilinux/kali-linux-docker
 2 | RUN mkdir /app
 3 | WORKDIR /app
 4 | COPY . /app
 5 | RUN apt-get -qq update
 6 | RUN apt-get install -yqq python3-pip
 7 | RUN pip3 install -r requirements.txt
 8 | RUN chmod +x *.py
 9 | ENTRYPOINT ["/app/theHarvester.py"]
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiodns==2.0.0
 2 | beautifulsoup4==4.8.0
 3 | dnspython==1.16.0
 4 | flake8==3.7.8
 5 | gevent==1.4.0 
 6 | grequests==0.4.0
 7 | mypy==0.740
 8 | netaddr==0.7.19
 9 | plotly==4.2.1
10 | pytest==5.2.0
11 | PyYaml==5.1.2
12 | requests==2.22.0
13 | shodan==1.19.0
14 | texttable==1.6.2
15 | retrying==1.3.3
16 | 


--------------------------------------------------------------------------------
/.github/workflows/dockerci.yml:
--------------------------------------------------------------------------------
 1 | name: TheHarvester Docker Image CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v1
10 |     - name: Build the Docker image
11 |       run: docker build . --file Dockerfile --tag theharvester:$(date +%s)


--------------------------------------------------------------------------------
/api-keys.yaml:
--------------------------------------------------------------------------------
 1 | apikeys:
 2 |   bing:
 3 |     key: 
 4 | 
 5 |   github:
 6 |     key: 
 7 | 
 8 |   hunter:
 9 |     key:
10 | 
11 |   intelx:
12 |     key: 9df61df0-84f7-4dc7-b34c-8ccfb8646ace
13 | 
14 |   securityTrails:
15 |     key:
16 | 
17 |   shodan:
18 |     key: oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt
19 | 
20 |   spyse:
21 |     key:


--------------------------------------------------------------------------------
/theHarvester.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Note: This script runs theHarvester
 4 | from platform import python_version
 5 | import sys
 6 | if python_version()[0:3] < '3.7':
 7 |     print('\033[93m[!] Make sure you have Python 3.7+ installed, quitting.\n\n \033[0m')
 8 |     sys.exit(1)
 9 | from theHarvester import __main__
10 | __main__.entry_point()
11 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.python.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | aiodns = "==2.0.0"
 8 | beautifulsoup4 = "==4.8.1"
 9 | dnspython = "==1.16.0"
10 | grequests = "==0.4.0"
11 | netaddr = "==0.7.19"
12 | plotly = "==4.2.1"
13 | pyyaml = "==5.1.2"
14 | requests = "==2.22.0"
15 | retrying = "==1.3.3"
16 | shodan = "==1.19.0"
17 | texttable = "==1.6.2"
18 | 
19 | [dev-packages]
20 | flake8 = "==3.7.8"
21 | mypy = "==0.740"
22 | mypy-extensions = "==0.4.3"
23 | pytest = "==5.2.1"
24 | 


--------------------------------------------------------------------------------
/tests/test_myparser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | 
 4 | from theHarvester.parsers import myparser
 5 | import pytest
 6 | 
 7 | 
 8 | class TestMyParser(object):
 9 | 
10 |     def test_emails(self):
11 |         word = 'domain.com'
12 |         results = '@domain.com***a@domain***banotherdomain.com***c@domain.com***d@sub.domain.com***'
13 |         parse = myparser.Parser(results, word)
14 |         emails = sorted(parse.emails())
15 |         assert emails, ['c@domain.com', 'd@sub.domain.com']
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     pytest.main()
20 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | #github: [L1ghtn1ng] # Matt, chris and Lee if you have signed up to the beta put your names in here
 4 | open_collective: # Replace with a single Open Collective username
 5 | ko_fi: theharvester
 6 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 7 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 8 | liberapay: # Replace with a single Liberapay username
 9 | issuehunt: # Replace with a single IssueHunt username
10 | otechie: # Replace with a single Otechie username
11 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
12 | 


--------------------------------------------------------------------------------
/LICENSES:
--------------------------------------------------------------------------------
 1 | Released under the GPL v 2.0.
 2 | 
 3 | If you did not recieve a copy of the GPL, try http://www.gnu.org/.
 4 | 
 5 | Copyright 2011 Christian Martorella 
 6 | 
 7 | theHarvester is free software; you can redistribute it and/or modify
 8 | it under the terms of the GNU General Public License as published by
 9 | the Free Software Foundation version 2 of the License.
10 | 
11 | theHarvester is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | GNU General Public License for more details.
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, which is to have git automatically determine
 2 | # whether a file is a text or binary, unless otherwise specified.
 3 | 
 4 | * text=auto
 5 | 
 6 | # Basic .gitattributes for a python repo.
 7 | 
 8 | # Source files
 9 | # ============
10 | *.pxd       text diff=python
11 | *.py        text diff=python
12 | *.py3       text diff=python
13 | *.pyw       text diff=python
14 | *.pyx       text diff=python
15 | 
16 | # Binary files
17 | # ============
18 | *.db        binary
19 | *.p         binary
20 | *.pkl       binary
21 | *.pyc       binary
22 | *.pyd       binary
23 | *.pyo       binary
24 | 
25 | # Note: .db, .p, and .pkl files are associated with the python modules
26 | # ``pickle``, ``dbm.*``, # ``shelve``, ``marshal``, ``anydbm``, & ``bsddb``
27 | # (among others).
28 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['baidusearch',
 2 |            'bingsearch',
 3 |            'crtsh',
 4 |            'certspottersearch',
 5 |            'dnssearch',
 6 |            'dogpilesearch',
 7 |            'duckduckgosearch',
 8 |            'exaleadsearch',
 9 |            'githubcode',
10 |            'googlesearch',
11 |            'huntersearch',
12 |            'intelxsearch',
13 |            'linkedinsearch',
14 |            'netcraft',
15 |            'otxsearch',
16 |            'port_scanner',
17 |            'securitytrailssearch',
18 |            'shodansearch',
19 |            'spyse',
20 |            'takeover',
21 |            'threatcrowd',
22 |            'trello',
23 |            'twittersearch',
24 |            'virustotal',
25 |            'yahoosearch',
26 |            ]
27 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to theHarvester Project
 2 | Welcome to theHarvester project, so you would like to contribute.
 3 | The following below must be met to get accepted.
 4 | 
 5 | # CI
 6 | Make sure all CI passes and you do not introduce any alerts from lgtm.
 7 | 
 8 | # Unit Tests
 9 | For new modules a unit test for that module is required and we use pytest.
10 | 
11 | # Coding Standards
12 | * No single letter variables and variable names must represent the action that it is performing
13 | * Have static typing on functions etc
14 | * Make sure no errors are reported from mypy
15 | * No issues reported with flake8
16 |  
17 | # Submitting Bugs
18 | If you find a bug in a module that you want to submit an issue for and know how to write python code.
19 | Please create a unit test for that bug(If possible) and submit a fix for it as it would be a big help to the project.


--------------------------------------------------------------------------------
/theHarvester/discovery/crtsh.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | import requests
 3 | 
 4 | 
 5 | class SearchCrtsh:
 6 | 
 7 |     def __init__(self, word):
 8 |         self.word = word
 9 |         self.data = set()
10 | 
11 |     def do_search(self) -> Set:
12 |         data = set()  # type: Set
13 |         url = f'https://crt.sh/?q=%25.{self.word}&output=json'
14 |         headers = {'User-Agent': Core.get_user_agent()}
15 |         request = requests.get(url, headers=headers, timeout=15)
16 |         if request.ok:
17 |             content = request.json()
18 |             data = set([dct['name_value'][2:] if '*.' == dct['name_value'][:2] else dct['name_value'] for dct in content])
19 |         return data
20 | 
21 |     def process(self) -> None:
22 |         print('\tSearching results.')
23 |         data = self.do_search()
24 |         self.data = data
25 | 
26 |     def get_data(self) -> Set:
27 |         return self.data
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Issue Template
 3 | about: A template for new issues.
 4 | title: "[Bug|Feature Request|Other] Short Description of Issue"
 5 | labels: ''
 6 | 
 7 | ---
 8 | 
 9 | **Feature Request or Bug or Other**
10 | Feature Request | Bug | Other
11 | 
12 | **Describe the feature request or bug or other**
13 | A clear and concise description of what the bug, feature request,
14 | or other request is.
15 | 
16 | **To Reproduce**
17 | Steps to reproduce the behaviour:
18 | 1. Run tool like this: '...'
19 | 2. See error
20 | 
21 | **Expected behaviour**
22 | A clear and concise description of what you expected to happen.
23 | 
24 | **Screenshots**
25 | If possible please add screenshots to help explain your problem.
26 | 
27 | **System Information (System that tool is running on):**
28 |  - OS: [e.g. Windows10]
29 |  - Version [e.g. 2.7]
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.


--------------------------------------------------------------------------------
/theHarvester/discovery/certspottersearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | import requests
 3 | 
 4 | 
 5 | class SearchCertspoter:
 6 | 
 7 |     def __init__(self, word):
 8 |         self.word = word
 9 |         self.totalhosts = set()
10 | 
11 |     def do_search(self) -> None:
12 |         base_url = f'https://api.certspotter.com/v1/issuances?domain={self.word}&expand=dns_names'
13 |         headers = {'User-Agent': Core.get_user_agent()}
14 |         try:
15 |             request = requests.get(base_url, headers=headers)
16 |             response = request.json()
17 |             for dct in response:
18 |                 for key, value in dct.items():
19 |                     if key == 'dns_names':
20 |                         self.totalhosts.update({name for name in value if name})
21 |         except Exception as e:
22 |             print(e)
23 | 
24 |     def get_hostnames(self) -> set:
25 |         return self.totalhosts
26 | 
27 |     def process(self):
28 |         self.do_search()
29 |         print('\tSearching results.')
30 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/virustotal.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import Core
 2 | from theHarvester.parsers import myparser
 3 | import requests
 4 | 
 5 | 
 6 | class SearchVirustotal:
 7 | 
 8 |     def __init__(self, word):
 9 |         self.word = word
10 |         self.results = ""
11 |         self.totalresults = ""
12 |         self.quantity = '100'
13 |         self.counter = 0
14 | 
15 |     def do_search(self):
16 |         base_url = f'https://www.virustotal.com/ui/domains/{self.word}/subdomains?relationships=resolutions&cursor=STMwCi4%3D&limit=40'
17 |         headers = {'User-Agent': Core.get_user_agent()}
18 |         res = requests.get(base_url, headers=headers)
19 |         self.results = res.content.decode('UTF-8')
20 |         self.totalresults += self.results
21 | 
22 |     def get_hostnames(self):
23 |         rawres = myparser.Parser(self.results, self.word)
24 |         return rawres.hostnames()
25 | 
26 |     def process(self):
27 |         print('\tSearching results.')
28 |         self.do_search()
29 |         self.get_hostnames()
30 | 


--------------------------------------------------------------------------------
/tests/discovery/test_otx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | from theHarvester.lib.core import *
 4 | from theHarvester.discovery import otxsearch
 5 | import requests
 6 | import pytest
 7 | 
 8 | 
 9 | class TestOtx(object):
10 |     @staticmethod
11 |     def domain() -> str:
12 |         return 'metasploit.com'
13 | 
14 |     def test_api(self):
15 |         base_url = f'https://otx.alienvault.com/api/v1/indicators/domain/{TestOtx.domain()}/passive_dns'
16 |         headers = {'User-Agent': Core.get_user_agent()}
17 |         request = requests.get(base_url, headers=headers)
18 |         assert request.status_code == 200
19 | 
20 |     def test_search(self):
21 |         search = otxsearch.SearchOtx(TestOtx.domain())
22 |         search.process()
23 |         assert isinstance(search.get_hostnames(), set)
24 | 
25 |     def test_search_no_results(self):
26 |         search = otxsearch.SearchOtx('radiant.eu')
27 |         search.process()
28 |         assert len(search.get_hostnames()) == 0
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     pytest.main()
33 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/threatcrowd.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from theHarvester.parsers import myparser
 3 | import grequests
 4 | 
 5 | 
 6 | class SearchThreatcrowd:
 7 | 
 8 |     def __init__(self, word):
 9 |         self.word = word.replace(' ', '%20')
10 |         self.results = ""
11 |         self.totalresults = ""
12 | 
13 |     def do_search(self):
14 |         base_url = f'https://www.threatcrowd.org/searchApi/v2/domain/report/?domain={self.word}'
15 |         headers = {'User-Agent': Core.get_user_agent()}
16 |         try:
17 |             request = grequests.get(base_url, headers=headers)
18 |             data = grequests.map([request])
19 |             self.results = data[0].content.decode('UTF-8')
20 |         except Exception as e:
21 |             print(e)
22 |         self.totalresults += self.results
23 | 
24 |     def get_hostnames(self):
25 |         return myparser.Parser(self.results, self.word).hostnames()
26 | 
27 |     def process(self):
28 |         self.do_search()
29 |         self.get_hostnames()
30 |         print('\tSearching results.')
31 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/spyse.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | import requests
 4 | 
 5 | 
 6 | class SearchSpyse:
 7 | 
 8 |     def __init__(self, word):
 9 |         self.word = word
10 |         self.key = Core.spyse_key()
11 |         if self.key is None:
12 |             raise MissingKey(True)
13 |         self.results = ''
14 |         self.totalresults = ''
15 | 
16 |     def do_search(self):
17 |         try:
18 |             base_url = f'https://api.spyse.com/v1/subdomains?domain={self.word}&api_token={self.key}&page=2'
19 |             headers = {'User-Agent': Core.get_user_agent()}
20 |             request = requests.get(base_url, headers=headers)
21 |             self.results = request.json()
22 |             # self.totalresults += self.results
23 | 
24 |         except Exception as e:
25 |             print(f'An exception has occurred: {e}')
26 | 
27 |     def get_hostnames(self):
28 |         return self.totalresults
29 | 
30 |     def process(self):
31 |         self.do_search()
32 |         print('\tSearching results.')
33 | 


--------------------------------------------------------------------------------
/wordlists/dorks.txt:
--------------------------------------------------------------------------------
 1 | login.html
 2 | administrator/login.%XT%
 3 | admin_area/login.%XT%
 4 | intext:@
 5 | inurl: 
 6 | intitle: 
 7 | intext:
 8 | sysadm/
 9 | administratoraccounts/
10 | usr/
11 | root/
12 | secret/
13 | admin/login.%XT%
14 | moderator/login.%XT%
15 | login%XT%
16 | logout%XT%
17 | super_index%XT%
18 | super_login%XT%
19 | supermanager%XT%
20 | superuser%XT%
21 | inurl:/publications.asp?type=
22 | intitle:"Index of" .bash_history
23 | intitle:"index of" members OR accounts
24 | inurl:section.php?id=
25 | =inurl:/filedown.php?file=
26 | inurl:/shared/help.php?page=
27 | inurl:index.php?load=
28 | inurl:home.php?pagina=
29 | index.php?mode=
30 | intitle:"index of" +myd size
31 | inurl:public
32 | intitle:index.of inbox
33 | intext:"Storage Management Server for" intitle:"Server Administration"
34 | inurl:"gs/adminlogin.aspx"
35 | "http://*:*@www" 
36 | =enable password | secret "current configuration" -intext:the
37 | wwwboard WebAdmin inurl:passwd.txt wwwboard|webadmin
38 | robots.txt
39 | php-addressbook "This is the addressbook for *" -warning
40 | intitle:"index of" members OR accounts
41 | 


--------------------------------------------------------------------------------
/tests/discovery/test_certspotter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | from theHarvester.lib.core import *
 4 | from theHarvester.discovery import certspottersearch
 5 | import requests
 6 | import pytest
 7 | 
 8 | 
 9 | class TestCertspotter(object):
10 |     @staticmethod
11 |     def domain() -> str:
12 |         return 'metasploit.com'
13 | 
14 |     def test_api(self):
15 |         base_url = f'https://api.certspotter.com/v1/issuances?domain={TestCertspotter.domain()}&expand=dns_names'
16 |         headers = {'User-Agent': Core.get_user_agent()}
17 |         request = requests.get(base_url, headers=headers)
18 |         assert request.status_code == 200
19 | 
20 |     def test_search(self):
21 |         search = certspottersearch.SearchCertspoter(TestCertspotter.domain())
22 |         search.process()
23 |         assert isinstance(search.get_hostnames(), set)
24 | 
25 |     def test_search_no_results(self):
26 |         search = certspottersearch.SearchCertspoter('radiant.eu')
27 |         search.process()
28 |         assert len(search.get_hostnames()) == 0
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     pytest.main()
33 | 


--------------------------------------------------------------------------------
/theHarvester/parsers/intelxparser.py:
--------------------------------------------------------------------------------
 1 | class Parser:
 2 | 
 3 |     def __init__(self):
 4 |         self.emails = set()
 5 |         self.hosts = set()
 6 | 
 7 |     def parse_dictionaries(self, results: dict) -> tuple:
 8 |         """
 9 |         Parse method to parse json results
10 |         :param results: Dictionary containing a list of dictionaries known as selectors
11 |         :return: tuple of emails and hosts
12 |         """
13 |         if results is not None:
14 |             for dictionary in results["selectors"]:
15 |                 field = dictionary['selectorvalue']
16 |                 if '@' in field:
17 |                     self.emails.add(field)
18 |                 else:
19 |                     field = str(field)
20 |                     if 'http' in field or 'https' in field:
21 |                         if field[:5] == 'https':
22 |                             field = field[8:]
23 |                         else:
24 |                             field = field[7:]
25 |                     self.hosts.add(field.replace(')', '').replace(',', ''))
26 |             return self.emails, self.hosts
27 |         return None, None
28 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/port_scanner.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | import threading
 3 | 
 4 | 
 5 | class PortScan:
 6 | 
 7 |     def __init__(self, host, ports):
 8 |         self.threads = 25
 9 |         self.host = host
10 |         self.ports = ports
11 |         self.lock = threading.BoundedSemaphore(value=self.threads)
12 | 
13 |     def port_scanner(self, host, ports):
14 |         openports = []
15 |         self.lock.acquire()
16 |         for port in ports:
17 |             try:
18 |                 connect = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
19 |                 connect.settimeout(2)
20 |                 result = connect.connect_ex((host, int(port)))
21 |                 if result == 0:
22 |                     openports.append(port)
23 |                 connect.close()
24 |             except Exception as e:
25 |                 print(e)
26 |         self.lock.release()
27 | 
28 |         if(len(self.ports)) == 0:
29 |             print("No ports found on host: {0}".format(host))
30 | 
31 |         return openports
32 | 
33 |     def process(self):
34 |         ports = self.port_scanner(self.host, self.ports)
35 |         return ports
36 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/baidusearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from theHarvester.parsers import myparser
 3 | import grequests
 4 | 
 5 | 
 6 | class SearchBaidu:
 7 | 
 8 |     def __init__(self, word, limit):
 9 |         self.word = word
10 |         self.total_results = ""
11 |         self.server = 'www.baidu.com'
12 |         self.hostname = 'www.baidu.com'
13 |         self.limit = limit
14 | 
15 |     def do_search(self):
16 |         headers = {
17 |             'Host': self.hostname,
18 |             'User-agent': Core.get_user_agent()
19 |         }
20 |         base_url = f'https://{self.server}/s?wd=%40{self.word}&pnxx&oq={self.word}'
21 |         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
22 |         req = (grequests.get(url, headers=headers, timeout=5) for url in urls)
23 |         responses = grequests.imap(req, size=5)
24 |         for response in responses:
25 |             self.total_results += response.content.decode('UTF-8')
26 | 
27 |     def process(self):
28 |         self.do_search()
29 | 
30 |     def get_emails(self):
31 |         rawres = myparser.Parser(self.total_results, self.word)
32 |         return rawres.emails()
33 | 
34 |     def get_hostnames(self):
35 |         rawres = myparser.Parser(self.total_results, self.word)
36 |         return rawres.hostnames()
37 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/huntersearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import myparser
 4 | import grequests
 5 | 
 6 | 
 7 | class SearchHunter:
 8 | 
 9 |     def __init__(self, word, limit, start):
10 |         self.word = word
11 |         self.limit = limit
12 |         self.start = start
13 |         self.key = Core.hunter_key()
14 |         if self.key is None:
15 |             raise MissingKey(True)
16 |         self.total_results = ""
17 |         self.counter = start
18 |         self.database = f'https://api.hunter.io/v2/domain-search?domain={word}&api_key={self.key}&limit={self.limit}'
19 | 
20 |     def do_search(self):
21 |         request = grequests.get(self.database)
22 |         response = grequests.map([request])
23 |         self.total_results = response[0].content.decode('UTF-8')
24 | 
25 |     def process(self):
26 |         self.do_search()  # Only need to do it once.
27 | 
28 |     def get_emails(self):
29 |         rawres = myparser.Parser(self.total_results, self.word)
30 |         return rawres.emails()
31 | 
32 |     def get_hostnames(self):
33 |         rawres = myparser.Parser(self.total_results, self.word)
34 |         return rawres.hostnames()
35 | 
36 |     def get_profiles(self):
37 |         rawres = myparser.Parser(self.total_results, self.word)
38 |         return rawres.profiles()
39 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/dogpilesearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from theHarvester.parsers import myparser
 3 | import grequests
 4 | 
 5 | 
 6 | class SearchDogpile:
 7 | 
 8 |     def __init__(self, word, limit):
 9 |         self.word = word
10 |         self.total_results = ""
11 |         self.server = 'www.dogpile.com'
12 |         self.hostname = 'www.dogpile.com'
13 |         self.limit = limit
14 | 
15 |     def do_search(self):
16 |         # Dogpile is hardcoded to return 10 results.
17 |         try:
18 |             headers = {'User-agent': Core.get_user_agent()}
19 |             base_url = f'https://{self.server}/search/web?qsi=xx&q=%40{self.word}'
20 |             urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
21 |             req = (grequests.get(url, headers=headers, verify=False, timeout=5) for url in urls)
22 |             responses = grequests.imap(req, size=5)
23 |             for response in responses:
24 |                 self.total_results += response.content.decode('UTF-8')
25 |         except Exception as e:
26 |             print(f'Error Occurred: {e}')
27 | 
28 |     def process(self):
29 |         self.do_search()
30 | 
31 |     def get_emails(self):
32 |         rawres = myparser.Parser(self.total_results, self.word)
33 |         return rawres.emails()
34 | 
35 |     def get_hostnames(self):
36 |         rawres = myparser.Parser(self.total_results, self.word)
37 |         return rawres.hostnames()
38 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/otxsearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | import json
 3 | import grequests
 4 | import re
 5 | 
 6 | 
 7 | class SearchOtx:
 8 | 
 9 |     def __init__(self, word):
10 |         self.word = word
11 |         self.results = ''
12 |         self.totalresults = ''
13 |         self.totalhosts = set()
14 |         self.totalips = set()
15 | 
16 |     def do_search(self):
17 |         base_url = f'https://otx.alienvault.com/api/v1/indicators/domain/{self.word}/passive_dns'
18 |         headers = {'User-Agent': Core.get_user_agent()}
19 |         try:
20 |             request = grequests.get(base_url, headers=headers)
21 |             data = grequests.map([request])
22 |             self.results = data[0].content.decode('UTF-8')
23 |         except Exception as e:
24 |             print(e)
25 | 
26 |         self.totalresults += self.results
27 |         dct = json.loads(self.totalresults)
28 |         self.totalhosts: set = {host['hostname'] for host in dct['passive_dns']}
29 |         # filter out ips that are just called NXDOMAIN
30 |         self.totalips: set = {ip['address'] for ip in dct['passive_dns']
31 |                               if re.match(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$", ip['address'])}
32 | 
33 |     def get_hostnames(self) -> set:
34 |         return self.totalhosts
35 | 
36 |     def get_ips(self) -> set:
37 |         return self.totalips
38 | 
39 |     def process(self):
40 |         self.do_search()
41 |         print('\tSearching results.')
42 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | from theHarvester.lib.core import Core
 3 | 
 4 | with open('README.md', 'r') as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | setuptools.setup(
 8 |     name='theHarvester',
 9 |     version=Core.version(),
10 |     author="Christian Martorella",
11 |     author_email="cmartorella@edge-security.com",
12 |     description="theHarvester is a very simple, yet effective tool designed to be used in the early stages of a penetration test",
13 |     long_description=long_description,
14 |     long_description_content_type="text/markdown",
15 |     url="https://github.com/laramies/theHarvester",
16 |     packages=setuptools.find_packages(exclude=['tests']),
17 |     entry_points={
18 |         'console_scripts': [
19 |             'theHarvester = theHarvester.__main__:entry_point'
20 |         ]
21 |     },
22 | 
23 |     classifiers=[
24 |         "Programming Language :: Python :: 3",
25 |         "Programming Language :: Python :: 3.7",
26 |         "Programming Language :: Python :: 3.8",
27 |         "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
28 |         "Operating System :: OS Independent",
29 |     ],
30 |     data_files=[
31 |         ('/etc/theHarvester', [
32 |             'wordlists/general/common.txt',
33 |             'wordlists/dns-big.txt',
34 |             'wordlists/dns-names.txt',
35 |             'wordlists/dorks.txt',
36 |             'wordlists/names_small.txt',
37 |             'api-keys.yaml'
38 |         ]
39 |         )
40 |     ],
41 | )
42 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/dnssearch.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import dns.resolver
 3 | 
 4 | # TODO: need big focus on performance and results parsing, now does the basic.
 5 | 
 6 | 
 7 | class DnsForce:
 8 | 
 9 |     def __init__(self, domain, dnsserver, verbose=False):
10 |         self.domain = domain
11 |         self.subdo = False
12 |         self.verbose = verbose
13 |         try:
14 |             with open('wordlists/dns-names.txt', 'r') as file:
15 |                 self.list = file.readlines()
16 |         except FileNotFoundError:
17 |             with open('/etc/theHarvester/dns-names.txt', 'r') as file:
18 |                 self.list = file.readlines()
19 | 
20 |     def run(self, host):
21 |         hostname = str(host.split('\n')[0]) + '.' + str(self.domain)
22 |         if self.verbose:
23 |             esc = chr(27)
24 |             sys.stdout.write(esc + '[2K' + esc + '[G')
25 |             sys.stdout.write('\r' + hostname + ' - ')
26 |             sys.stdout.flush()
27 |         try:
28 |             answer = dns.resolver.query(hostname, 'A')
29 |             print(answer.canonical_name)
30 |             return answer.canonical_name  # TODO: need rework all this results
31 | 
32 |         except Exception:
33 |             pass
34 | 
35 |     def process(self):
36 |         results = []
37 |         for entry in self.list:
38 |             host = self.run(entry)
39 |             if host is not None:
40 |                 # print(' : ' + host.split(':')[1])
41 |                 results.append(host)
42 |         return results
43 | 


--------------------------------------------------------------------------------
/theHarvester/parsers/securitytrailsparser.py:
--------------------------------------------------------------------------------
 1 | class Parser:
 2 | 
 3 |     def __init__(self, word, text):
 4 |         self.word = word
 5 |         self.text = text
 6 |         self.hostnames = set()
 7 |         self.ips = set()
 8 | 
 9 |     def parse_text(self):
10 |         sub_domain_flag = 0
11 |         self.text = str(self.text).splitlines()
12 |         # Split lines to get a list of lines.
13 |         for index in range(0, len(self.text)):
14 |             line = self.text[index].strip()
15 |             if '"ip":' in line:
16 |                 # Extract IP.
17 |                 ip = ''
18 |                 for ch in line[7:]:
19 |                     if ch == '"':
20 |                         break
21 |                     else:
22 |                         ip += ch
23 |                 self.ips.add(ip)
24 |             elif '"subdomains":' in line:
25 |                 # subdomains start here so set flag to 1
26 |                 sub_domain_flag = 1
27 |                 continue
28 |             elif sub_domain_flag > 0:
29 |                 if ']' in line:
30 |                     sub_domain_flag = 0
31 |                 else:
32 |                     if 'www' in self.word:
33 |                         self.word = str(self.word).replace('www.', '').replace('www', '')
34 |                     # Remove www from word if entered
35 |                     self.hostnames.add(str(line).replace('"', '').replace(',', '') + '.' + self.word)
36 |             else:
37 |                 continue
38 |         return list(self.ips), list(self.hostnames)
39 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/dnsdumpster.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from theHarvester.parsers import myparser
 3 | import requests
 4 | 
 5 | 
 6 | class SearchDnsDumpster:
 7 | 
 8 |     def __init__(self, word):
 9 |         self.word = word.replace(' ', '%20')
10 |         self.results = ""
11 |         self.totalresults = ""
12 |         self.server = 'dnsdumpster.com'
13 | 
14 |     def do_search(self):
15 |         try:
16 |             agent = Core.get_user_agent()
17 |             headers = {'User-Agent': agent}
18 |             session = requests.session()
19 |             # create a session to properly verify
20 |             url = f'https://{self.server}'
21 |             request = session.get(url, headers=headers)
22 |             cookies = str(request.cookies)
23 |             # extract csrftoken from cookies
24 |             csrftoken = ''
25 |             for ch in cookies.split("=")[1]:
26 |                 if ch == ' ':
27 |                     break
28 |                 csrftoken += ch
29 |             data = {
30 |                 'Cookie': f'csfrtoken={csrftoken}', 'csrfmiddlewaretoken': csrftoken, 'targetip': self.word}
31 |             headers['Referer'] = url
32 |             post_req = session.post(url, headers=headers, data=data)
33 |             self.results = post_req.text
34 |         except Exception as e:
35 |             print(f'An exception occured: {e}')
36 |         self.totalresults += self.results
37 | 
38 |     def get_hostnames(self):
39 |         rawres = myparser.Parser(self.totalresults, self.word)
40 |         return rawres.hostnames()
41 | 
42 |     def process(self):
43 |         self.do_search()  # Only need to do it once.
44 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/twittersearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from theHarvester.parsers import myparser
 3 | import grequests
 4 | import re
 5 | 
 6 | 
 7 | class SearchTwitter:
 8 | 
 9 |     def __init__(self, word, limit):
10 |         self.word = word.replace(' ', '%20')
11 |         self.results = ""
12 |         self.totalresults = ""
13 |         self.server = 'www.google.com'
14 |         self.quantity = '100'
15 |         self.limit = int(limit)
16 |         self.counter = 0
17 | 
18 |     def do_search(self):
19 |         base_url = f'https://{self.server}/search?num=100&start=xx&hl=en&meta=&q=site%3Atwitter.com%20intitle%3A%22on+Twitter%22%20{self.word}'
20 |         headers = {'User-Agent': Core.get_user_agent()}
21 |         try:
22 |             urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
23 |             request = (grequests.get(url, headers=headers) for url in urls)
24 |             response = grequests.imap(request, size=5)
25 |             for entry in response:
26 |                 self.totalresults += entry.content.decode('UTF-8')
27 |         except Exception as error:
28 |             print(error)
29 | 
30 |     def get_people(self):
31 |         rawres = myparser.Parser(self.totalresults, self.word)
32 |         to_parse = rawres.people_twitter()
33 |         # fix invalid handles that look like @user other_output
34 |         handles = set()
35 |         for handle in to_parse:
36 |             result = re.search(r'^@?(\w){1,15}', handle)
37 |             if result:
38 |                 handles.add(result.group(0))
39 |         return handles
40 | 
41 |     def process(self):
42 |         self.do_search()
43 | 


--------------------------------------------------------------------------------
/tests/discovery/test_linkedin_links.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # coding=utf-8
 3 | from theHarvester.discovery import linkedinsearch
 4 | from theHarvester.discovery.constants import splitter
 5 | import pytest
 6 | import os
 7 | import re
 8 | 
 9 | 
10 | class TestGetLinks(object):
11 | 
12 |     def test_splitter(self):
13 |         results = [
14 |             'https://www.linkedin.com/in/don-draper-b1045618',
15 |             'https://www.linkedin.com/in/don-draper-b59210a',
16 |             'https://www.linkedin.com/in/don-draper-b5bb50b3',
17 |             'https://www.linkedin.com/in/don-draper-b83ba26',
18 |             'https://www.linkedin.com/in/don-draper-b854a51'
19 |         ]
20 |         filtered_results = splitter(results)
21 |         assert len(filtered_results) == 1
22 | 
23 |     def test_get_links(self):
24 |         search = linkedinsearch.SearchLinkedin("facebook.com", '100')
25 |         search.process()
26 |         links = search.get_links()
27 |         assert type(links) == list
28 | 
29 |     def test_links_linkedin(self):
30 |         dir_path = os.path.dirname(os.path.realpath(__file__))
31 |         mock_response = open(dir_path + "/test_linkedin_links.txt")
32 |         mock_response_content = mock_response.read()
33 |         mock_response.close()
34 |         reg_links = re.compile(r"url=https:\/\/www\.linkedin.com(.*?)&")
35 |         temp = reg_links.findall(mock_response_content)
36 |         resul = []
37 |         for regex_item in temp:
38 |             stripped_url = regex_item.replace("url=", "")
39 |             resul.append("https://www.linkedin.com" + stripped_url)
40 |         assert set(resul)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     pytest.main()
45 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: bionic
 2 | language: python
 3 | cache: pip
 4 | matrix:
 5 |   include:
 6 |   - python: '3.7'
 7 |     env: TEST_SUITE=suite_3_7
 8 |   - python: '3.8'
 9 |     env: TEST_SUITE=suite_3_8
10 | before_install:
11 | - pip install pipenv
12 | - pipenv install --dev
13 | install:
14 | - python setup.py test
15 | script:
16 | - python theHarvester.py -d metasploit.com -b baidu,bing,certspotter,crtsh,dnsdumpster,dogpile,duckduckgo,exalead,linkedin,netcraft,otx,intelx,threatcrowd,trello,twitter,virustotal,yahoo
17 |   -l 200
18 | - pytest
19 | - flake8 . --count --show-source --statistics
20 | - mypy --pretty theHarvester/discovery/*.py
21 | notifications:
22 |   email: false
23 |   slack:
24 |     secure: DQXvpVqLJ1NKa4zOVrrLuHjd5yCY8tdLm4QjSILc5g7NGN5QY3wVmC3m7KWq3RsqdepeiJbd3mgLYhfo6TA/tAaZxEYXKEdafWGF7ayJcEJS/fn0GuLqhOaS/PzRYSeBMQH5KodfvJQpVFzfHPj9AoIHOrHVH3x192RzIS3hRyR8kZgSCrTgxiDjTeWUzLvg/w7ikEVqVFMh73cQJegVA6A5mkHeUf20NmKzo+e0fGU7Sktk38YyNOdi3fbAiACR6ah1clSB7HaBg3VDiAmQCE8O2tftgcU6ihhnTi6d4i8Lf/traQznQ3mvSbFcw5Pedo8eXaLDhAuqwzMb3uWE9jr+zLlDa8s6+ADNVO/ISu+xV1zpnTdcjATKHaBfsNFntLij1hnyeuTEbhLRAB1T7wc+uAWVlJkkDZK08610a8NWalRtV17U8u8lJbcKWQ4IBnclG6DE+zpgsMHZpcswyeMF092mRZzUbgXG9+nbRcp1JqhgiLJUZdg5jXX7NoLdk7irbrZU4aTFqhbz3P3NexafFDXZEsp1Z1eY0uppRsd0vt8E8rX/HMw9OWHgkg7GDATZSqMu1kgJoSQQg1U3ApXacsl6WBAndLdYF+MyHJMLgzewdAJ4y4qvVMb/VkTJ8Q6PicjwlqyEP5PRLZk7fech4kuTVUqyuTibd5t8D5k=
25 |     on_success: always
26 |     on_failure: always
27 |     template:
28 |       - "Repo `%{repository_slug}` *%{result}* build (<%{build_url}|#%{build_number}>) for commit (<%{compare_url}|%{commit}>) on branch `%{branch}`."
29 |       - "Execution time: *%{duration}*"
30 |       - "Message: %{message}"
31 |       - "By: %{author}"


--------------------------------------------------------------------------------
/theHarvester/discovery/yahoosearch.py:
--------------------------------------------------------------------------------
 1 | import grequests
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import myparser
 4 | 
 5 | 
 6 | class SearchYahoo:
 7 | 
 8 |     def __init__(self, word, limit):
 9 |         self.word = word
10 |         self.total_results = ""
11 |         self.server = 'search.yahoo.com'
12 |         self.limit = limit
13 | 
14 |     def do_search(self):
15 |         base_url = f'https://{self.server}/search?p=%40{self.word}&b=xx&pz=10'
16 |         headers = {
17 |             'Host': self.server,
18 |             'User-agent': Core.get_user_agent()
19 |         }
20 |         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 10) if num <= self.limit]
21 |         request = (grequests.get(url, headers=headers) for url in urls)
22 |         response = grequests.imap(request, size=5)
23 |         for entry in response:
24 |             self.total_results += entry.content.decode('UTF-8')
25 | 
26 |     def process(self):
27 |         self.do_search()
28 | 
29 |     def get_emails(self):
30 |         rawres = myparser.Parser(self.total_results, self.word)
31 |         toparse_emails = rawres.emails()
32 |         emails = set()
33 |         # strip out numbers and dashes for emails that look like xxx-xxx-xxxemail@host.tld
34 |         for email in toparse_emails:
35 |             email = str(email)
36 |             if '-' in email and email[0].isdigit() and email.index('-') <= 9:
37 |                 while email[0] == '-' or email[0].isdigit():
38 |                     email = email[1:]
39 |             emails.add(email)
40 |         return list(emails)
41 | 
42 |     def get_hostnames(self):
43 |         rawres = myparser.Parser(self.total_results, self.word)
44 |         return rawres.hostnames()
45 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/shodansearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from shodan import exception
 4 | from shodan import Shodan
 5 | 
 6 | 
 7 | class SearchShodan:
 8 | 
 9 |     def __init__(self):
10 |         self.key = Core.shodan_key()
11 |         if self.key is None:
12 |             raise MissingKey(True)
13 |         self.api = Shodan(self.key)
14 |         self.hostdatarow = []
15 | 
16 |     def search_ip(self, ip):
17 |         try:
18 |             ipaddress = ip
19 |             results = self.api.host(ipaddress)
20 |             technologies = []
21 |             servicesports = []
22 |             for result in results['data']:
23 |                 try:
24 |                     for key in result['http']['components'].keys():
25 |                         technologies.append(key)
26 |                 except KeyError:
27 |                     pass
28 |                 port = str(result.get('port'))
29 |                 product = str(result.get('product'))
30 |                 servicesports.append(str(product) + ':' + str(port))
31 |             technologies = list(set(technologies))
32 |             self.hostdatarow = [
33 |                 str(results.get('ip_str')), str(results.get('hostnames')).strip('[]\''),
34 |                 str(results.get('org')), str(servicesports).replace('\'', '').strip('[]'),
35 |                 str(technologies).replace('\'', '').strip('[]')]
36 |         except exception.APIError:
37 |             print(f'{ipaddress}: Not in Shodan')
38 |             self.hostdatarow = [ipaddress, "Not in Shodan", "Not in Shodan", "Not in Shodan", "Not in Shodan"]
39 | 
40 |         except Exception as e:
41 |             print(f'Error occurred in the Shodan IP search module: {e}')
42 |         finally:
43 |             return self.hostdatarow
44 | 


--------------------------------------------------------------------------------
/theHarvester/lib/hostchecker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | Created by laramies on 2008-08-21.
 5 | Revised to use aiodns & asyncio on 2019-09-23
 6 | """
 7 | 
 8 | import aiodns
 9 | import asyncio
10 | import socket
11 | from typing import Tuple, Any
12 | 
13 | 
14 | class Checker:
15 | 
16 |     def __init__(self, hosts: list):
17 |         self.hosts = hosts
18 |         self.realhosts: list = []
19 |         self.addresses: set = set()
20 | 
21 |     @staticmethod
22 |     async def query(host, resolver) -> Tuple[str, Any]:
23 |         try:
24 |             result = await resolver.gethostbyname(host, socket.AF_INET)
25 |             addresses = result.addresses
26 |             if addresses == [] or addresses is None or result is None:
27 |                 return f"{host}:", tuple()
28 |             else:
29 |                 return f"{host}:{', '.join(map(str, addresses))}", addresses
30 |         except Exception:
31 |             return f"{host}:", tuple()
32 | 
33 |     async def query_all(self, resolver) -> list:
34 |         results = await asyncio.gather(*[asyncio.create_task(self.query(host, resolver))
35 |                                          for host in self.hosts])
36 |         return results
37 | 
38 |     async def check(self):
39 |         loop = asyncio.get_event_loop()
40 |         resolver = aiodns.DNSResolver(loop=loop, timeout=4)
41 |         results = await self.query_all(resolver)
42 |         for host, address in results:
43 |             self.realhosts.append(host)
44 |             self.addresses.update({addr for addr in address})
45 |             # address may be a list of ips
46 |             # and do a set comprehension to remove duplicates
47 |         self.realhosts.sort()
48 |         self.addresses = list(self.addresses)
49 |         return self.realhosts, self.addresses
50 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/suip.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from bs4 import BeautifulSoup
 3 | import requests
 4 | 
 5 | 
 6 | class SearchSuip:
 7 | 
 8 |     def __init__(self, word: str):
 9 |         self.word: str = word
10 |         self.results: str = ''
11 |         self.totalresults: str = ''
12 |         self.totalhosts: set = set()
13 |         self.totalips: set = set()
14 | 
15 |     def do_search(self):
16 |         headers = {'User-Agent': Core.get_user_agent()}
17 |         params = (
18 |             ('act', 'subfinder'),
19 |         )
20 | 
21 |         data = {
22 |             'url': self.word.replace('www.', ''),
23 |             'Submit1': 'Submit'
24 |         }
25 |         response = requests.post('https://suip.biz/', headers=headers, params=params, data=data)
26 |         soup = BeautifulSoup(response.text, 'html.parser')
27 |         hosts: list = str(soup.find('pre')).splitlines()
28 |         self.clean_hosts(hosts)
29 |         params = (
30 |             ('act', 'amass'),
31 |         )
32 |         # change act to amass now
33 |         response = requests.post('https://suip.biz/', headers=headers, params=params, data=data)
34 |         soup = BeautifulSoup(response.text, 'html.parser')
35 |         hosts: list = str(soup.find('pre')).splitlines()
36 |         self.clean_hosts(hosts)
37 | 
38 |     def get_hostnames(self) -> set:
39 |         return self.totalhosts
40 | 
41 |     def process(self):
42 |         self.do_search()
43 |         print('\tSearching results.')
44 | 
45 |     def clean_hosts(self, soup_hosts):
46 |         for host in soup_hosts:
47 |             host = str(host).strip()
48 |             if len(host) > 1 and 'pre' not in host:
49 |                 if host[0] == '.':
50 |                     self.totalhosts.add(host[1:])
51 |                 else:
52 |                     self.totalhosts.add(host)
53 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/linkedinsearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import myparser
 4 | import requests
 5 | import time
 6 | 
 7 | 
 8 | class SearchLinkedin:
 9 | 
10 |     def __init__(self, word, limit):
11 |         self.word = word.replace(' ', '%20')
12 |         self.results = ""
13 |         self.totalresults = ""
14 |         self.server = 'www.google.com'
15 |         self.quantity = '100'
16 |         self.limit = int(limit)
17 |         self.counter = 0
18 | 
19 |     def do_search(self):
20 |         urly = 'http://' + self.server + '/search?num=100&start=' + str(self.counter) + '&hl=en&meta=&q=site%3Alinkedin.com/in%20' + self.word
21 |         try:
22 |             headers = {'User-Agent': Core.get_user_agent()}
23 |             r = requests.get(urly, headers=headers)
24 |             self.results = r.text
25 |             if search(self.results):
26 |                 try:
27 |                     self.results = google_workaround(urly)
28 |                     if isinstance(self.results, bool):
29 |                         print('Google is blocking your ip and the workaround, returning')
30 |                         return
31 |                 except Exception:
32 |                     # google blocked, no useful result
33 |                     return
34 |         except Exception as e:
35 |             print(e)
36 |         time.sleep(getDelay())
37 |         self.totalresults += self.results
38 | 
39 |     def get_people(self):
40 |         rawres = myparser.Parser(self.totalresults, self.word)
41 |         return rawres.people_linkedin()
42 | 
43 |     def get_links(self):
44 |         links = myparser.Parser(self.totalresults, self.word)
45 |         return splitter(links.links_linkedin())
46 | 
47 |     def process(self):
48 |         while self.counter < self.limit:
49 |             self.do_search()
50 |             time.sleep(getDelay())
51 |             self.counter += 100
52 |             print(f'\tSearching {self.counter} results.')
53 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/securitytrailssearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import securitytrailsparser
 4 | import requests
 5 | import time
 6 | 
 7 | 
 8 | class SearchSecuritytrail:
 9 | 
10 |     def __init__(self, word):
11 |         self.word = word
12 |         self.key = Core.security_trails_key()
13 |         if self.key is None:
14 |             raise MissingKey(True)
15 |         self.results = ""
16 |         self.totalresults = ""
17 |         self.api = 'https://api.securitytrails.com/v1/'
18 |         self.info = ()
19 | 
20 |     def authenticate(self):
21 |         # Method to authenticate API key before sending requests.
22 |         headers = {'APIKEY': self.key}
23 |         url = self.api + 'ping'
24 |         r = requests.get(url, headers=headers).text
25 |         if 'False' in r or 'Invalid authentication' in r:
26 |             print('\tKey could not be authenticated exiting program.')
27 |         time.sleep(2)
28 | 
29 |     def do_search(self):
30 |         url = ''
31 |         headers = {}
32 |         try:
33 |             # https://api.securitytrails.com/v1/domain/domain.com
34 |             url = self.api + 'domain/' + self.word
35 |             headers = {'APIKEY': self.key}
36 |             r = requests.get(url, headers=headers)
37 |             time.sleep(2)  # Not random delay because 2 seconds is required due to rate limit.
38 |         except Exception as e:
39 |             print(e)
40 |         self.results = r.text
41 |         self.totalresults += self.results
42 |         url += '/subdomains'  # Get subdomains now.
43 |         r = requests.get(url, headers=headers)
44 |         time.sleep(2)
45 |         self.results = r.text
46 |         self.totalresults += self.results
47 | 
48 |     def process(self):
49 |         self.authenticate()
50 |         self.do_search()
51 |         parser = securitytrailsparser.Parser(word=self.word, text=self.totalresults)
52 |         self.info = parser.parse_text()
53 |         # Create parser and set self.info to tuple returned from parsing text.
54 |         print('\tDone Searching Results')
55 | 
56 |     def get_ips(self):
57 |         return self.info[0]
58 | 
59 |     def get_hostnames(self):
60 |         return self.info[1]
61 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/intelxsearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import intelxparser
 4 | import requests
 5 | import time
 6 | 
 7 | 
 8 | class SearchIntelx:
 9 | 
10 |     def __init__(self, word, limit):
11 |         self.word = word
12 |         # default key is public key
13 |         self.key = Core.intelx_key()
14 |         if self.key is None:
15 |             raise MissingKey(True)
16 |         self.database = 'https://public.intelx.io/'
17 |         self.results = None
18 |         self.info = ()
19 |         self.limit = limit
20 | 
21 |     def do_search(self):
22 |         try:
23 |             user_agent = Core.get_user_agent()
24 |             headers = {'User-Agent': user_agent, 'x-key': self.key}
25 |             # data is json that corresponds to what we are searching for, sort:2 means sort by most relevant
26 |             data = f'{{"term": "{self.word}", "maxresults": {self.limit}, "media": 0, "sort": 2 , "terminate": []}}'
27 |             r = requests.post(f'{self.database}phonebook/search', data=data, headers=headers)
28 | 
29 |             if r.status_code == 400:
30 |                 raise Exception('Invalid json was passed in.')
31 |             time.sleep(1)
32 | 
33 |             # grab uuid to send get request to fetch data
34 |             uuid = r.json()['id']
35 |             url = f'{self.database}phonebook/search/result?id={uuid}&offset=0&limit={self.limit}'
36 |             r = requests.get(url, headers=headers)
37 |             time.sleep(1)
38 | 
39 |             # TODO: add in future grab status from r.text and check if more results can be gathered
40 |             if r.status_code != 200:
41 |                 raise Exception('Error occurred while searching intelx.')
42 |             self.results = r.json()
43 |         except Exception as e:
44 |             print(f'An exception has occurred: {e}')
45 | 
46 |     def process(self):
47 |         self.do_search()
48 |         intelx_parser = intelxparser.Parser()
49 |         self.info = intelx_parser.parse_dictionaries(self.results)
50 |         # Create parser and set self.info to tuple returned from parsing text.
51 | 
52 |     def get_emails(self):
53 |         return self.info[0]
54 | 
55 |     def get_hostnames(self):
56 |         return self.info[1]
57 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/netcraft.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib.core import *
 2 | from theHarvester.parsers import myparser
 3 | import requests
 4 | import hashlib
 5 | import urllib.parse as urllib
 6 | import re
 7 | 
 8 | 
 9 | class SearchNetcraft:
10 |     # this module was inspired by sublist3r's netcraft module
11 | 
12 |     def __init__(self, word):
13 |         self.word = word.replace(' ', '%20')
14 |         self.totalresults = ""
15 |         self.server = 'netcraft.com'
16 |         self.base_url = f'https://searchdns.netcraft.com/?restriction=site+ends+with&host={word}'
17 |         self.session = requests.session()
18 |         self.headers = {
19 |             'User-Agent': Core.get_user_agent()
20 |         }
21 |         self.timeout = 25
22 |         self.domain = f"https://searchdns.netcraft.com/?restriction=site+ends+with&host={self.word}"
23 | 
24 |     def request(self, url, cookies=None):
25 |         cookies = cookies or {}
26 |         try:
27 |             resp = self.session.get(url, headers=self.headers, timeout=self.timeout, cookies=cookies)
28 |         except Exception as e:
29 |             print(e)
30 |             resp = None
31 |         return resp
32 | 
33 |     def get_next(self, resp):
34 |         link_regx = re.compile('<A href="(.*?)"><b>Next page</b></a>')
35 |         link = link_regx.findall(resp)
36 |         link = re.sub(f'host=.*?{self.word}', f'host={self.domain}', link[0])
37 |         url = f'https://searchdns.netcraft.com{link.replace(" ", "%20")}'
38 |         return url
39 | 
40 |     def create_cookies(self, cookie):
41 |         cookies = dict()
42 |         cookies_list = cookie[0:cookie.find(';')].split("=")
43 |         cookies[cookies_list[0]] = cookies_list[1]
44 |         # get js verification response
45 |         cookies['netcraft_js_verification_response'] = hashlib.sha1(
46 |             urllib.unquote(cookies_list[1]).encode('utf-8')).hexdigest()
47 |         return cookies
48 | 
49 |     def get_cookies(self, headers):
50 |         if 'set-cookie' in headers:
51 |             cookies = self.create_cookies(headers['set-cookie'])
52 |         else:
53 |             cookies = {}
54 |         return cookies
55 | 
56 |     def do_search(self):
57 |         start_url = self.base_url
58 |         resp = self.request(start_url)
59 |         cookies = self.get_cookies(resp.headers)
60 |         while True:
61 |             resp = self.request(self.base_url, cookies).text
62 |             self.totalresults += resp
63 |             if 'Next page' not in resp or resp is None:
64 |                 break
65 |             self.base_url = self.get_next(resp)
66 | 
67 |     def get_hostnames(self):
68 |         rawres = myparser.Parser(self.totalresults, self.word)
69 |         return rawres.hostnames()
70 | 
71 |     def process(self):
72 |         self.do_search()
73 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/trello.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.parsers import myparser
 3 | import grequests
 4 | import requests
 5 | import random
 6 | import time
 7 | 
 8 | 
 9 | class SearchTrello:
10 | 
11 |     def __init__(self, word):
12 |         self.word = word.replace(' ', '%20')
13 |         self.results = ""
14 |         self.totalresults = ""
15 |         self.server = 'www.google.com'
16 |         self.quantity = '100'
17 |         self.limit = 300
18 |         self.trello_urls = []
19 |         self.hostnames = []
20 |         self.counter = 0
21 | 
22 |     def do_search(self):
23 |         base_url = f'https://{self.server}/search?num=300&start=xx&hl=en&q=site%3Atrello.com%20{self.word}'
24 |         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 20) if num <= self.limit]
25 |         # limit is 20 as that is the most results google will show per num
26 |         headers = {'User-Agent': googleUA}
27 |         for url in urls:
28 |             try:
29 |                 resp = requests.get(url, headers=headers)
30 |                 self.results = resp.text
31 |                 if search(self.results):
32 |                     try:
33 |                         self.results = google_workaround(base_url)
34 |                         if isinstance(self.results, bool):
35 |                             print('Google is blocking your ip and the workaround, returning')
36 |                             return
37 |                     except Exception as e:
38 |                         print(e)
39 |                 self.totalresults += self.results
40 |                 time.sleep(getDelay() - .5)
41 |             except Exception as e:
42 |                 print(f'An exception has occurred in trello: {e}')
43 | 
44 |     def get_emails(self):
45 |         rawres = myparser.Parser(self.totalresults, self.word)
46 |         return rawres.emails()
47 | 
48 |     def get_urls(self):
49 |         try:
50 |             rawres = myparser.Parser(self.totalresults, 'trello.com')
51 |             self.trello_urls = set(rawres.urls())
52 |             self.totalresults = ''
53 |             # reset what totalresults as before it was just google results now it is trello results
54 |             headers = {'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4'])}
55 |             # do not change the headers
56 |             req = (grequests.get(url, headers=headers, timeout=4) for url in self.trello_urls)
57 |             responses = grequests.imap(req, size=8)
58 |             for response in responses:
59 |                 self.totalresults += response.content.decode('UTF-8')
60 | 
61 |             rawres = myparser.Parser(self.totalresults, self.word)
62 |             self.hostnames = rawres.hostnames()
63 |         except Exception as e:
64 |             print(f'Error occurred: {e}')
65 | 
66 |     def process(self):
67 |         self.do_search()
68 |         self.get_urls()
69 |         print(f'\tSearching {self.counter} results.')
70 | 
71 |     def get_results(self) -> tuple:
72 |         return self.get_emails(), self.hostnames, self.trello_urls
73 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/takeover.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import requests
 3 | 
 4 | 
 5 | class TakeOver:
 6 | 
 7 |     def __init__(self, host):
 8 |         self.host = host
 9 |         self.results = ""
10 |         self.totalresults = ""
11 |         self.fingerprints = ["<title>Squarespace - Domain Not Claimed</title>",
12 |                              'www.herokucdn.com/error-pages/no-such-app.html',
13 |                              '<title>Squarespace - No Such Account</title>',
14 |                              "<p> If you're trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>",
15 |                              "<p> If you\'re trying to publish one, <a href=\"https://help.github.com/pages/\">read the full documentation</a> to learn how to set up <strong>GitHub Pages</strong> for your repository, organization, or user account. </p>",
16 |                              "<span class=\"title\">Bummer. It looks like the help center that you are trying to reach no longer exists.</span>",
17 |                              "<head> <title>The page you\'re looking for could not be found (404)</title> <style> body { color: #666; text-align: center; font-family: \"Helvetica Neue\", Helvetica, Arial, sans-serif; margin: 0; width: 800px; margin: auto; font-size: 14px; } h1 { font-size: 56px; line-height: 100px; font-weight: normal; color: #456; } h2 { font-size: 24px; color: #666; line-height: 1.5em; } h3 { color: #456; font-size: 20px; font-weight: normal; line-height: 28px; } hr { margin: 18px 0; border: 0; border-top: 1px solid #EEE; border-bottom: 1px solid white; } </style> </head>",
18 |                              'The specified bucket does not exist',
19 |                              'Bad Request: ERROR: The request could not be satisfied',
20 |                              'Fastly error: unknown domain:',
21 |                              "There isn't a Github Pages site here.",
22 |                              'No such app',
23 |                              'Unrecognized domain',
24 |                              'Sorry, this shop is currently unavailable.',
25 |                              "Whatever you were looking for doesn't currently exist at this address",
26 |                              'The requested URL was not found on this server.',
27 |                              'This UserVoice subdomain is currently available!',
28 |                              'Do you want to register *.wordpress.com?',
29 |                              'Help Center Closed']
30 | 
31 |     def do_take(self):
32 |         try:
33 |             print(f'\t Searching takeovers for {self.host}')
34 |             r = requests.get(f'https://{self.host}', verify=False)
35 |             for x in self.fingerprints:
36 |                 take_reg = re.compile(x)
37 |                 self.temp = take_reg.findall(r.text)
38 |                 if self.temp != []:
39 |                     print(f'\t\033[91m Takeover detected! - {self.host}\033[1;32;40m')
40 |         except Exception as e:
41 |             print(e)
42 | 
43 |     def process(self):
44 |         self.do_take()
45 | 


--------------------------------------------------------------------------------
/tests/discovery/test_linkedin_links.txt:
--------------------------------------------------------------------------------
1 | LinkedIn</a></h3><div class="s"><div class="hJND5c" style="margin-bottom:2px;word-wrap:break-word"><cite>https://www.linkedin.<b>com</b>/in/gm-tuhin-ialam-546526b8</cite></div><div class="f slp">Albany, New York Area&nbsp;-&nbsp;Facebook Advertising</div><span class="st">Gm Tuhin.ialam. <b>facebook</b>.<b>com</b> at Facebook Advertising. Albany, New York Area. <br>
2 | Marketing and Advertising. Facebook Advertising. 0 connections&nbsp;...</span><br></div></div><div class="g"><h3 class="r"><a href="/url?url=https://in.linkedin.com/in/nikulact&amp;rct=j&amp;frm=1&amp;q=&amp;esrc=s&amp;sa=U&amp;ved=0ahUKEwjRpfC-9b_kAhVNnJ4KHcs9C2MQFggcMAM&amp;usg=AOvVaw1Bqufzx2E449oJST9BI5cT">NIKUL www.<b>facebook</b>.<b>com</b>/nikulact - Modeling - Self Modeling ...</a></h3><div class="s"><div class="hJND5c" style="margin-bottom:2px;word-wrap:break-word"><cite>https://in.linkedin.<b>com</b>/in/nikulact</cite><div class="Pj9hGd">‎<div style="display:inline" onclick="google.sham(this);" aria-expanded="false" aria-haspopup="true" tabindex="0" data-ved="0ahUKEwjRpfC-9b_kAhVNnJ4KHcs9C2MQ7B0IHTAD"><span class="CiacGf"></span></div><div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"><ul><li class="mUpfKd"><a class="imx0m" href="/search?hl=en&amp;q=related:https://in.linkedin.com/in/nikulact&amp;tbo=1&amp;sa=X&amp;ved=0ahUKEwjRpfC-9b_kAhVNnJ4KHcs9C2MQHwgfMAM">Similar</a></li></ul></div></div></div><div class="f slp">Ahmedabad Area, India&nbsp;-&nbsp;Self Modeling</div><span class="st">View NIKUL www.<b>facebook</b>.<b>com</b>/nikulact&#39;s profile on LinkedIn, the world&#39;s largest <br>
3 | professional community. NIKUL has 1 job listed on their profile. See the&nbsp;...</span><br></div></div><div class="g"><h3 class="r"><a href="/url?url=https://www.linkedin.com/in/victor-scott-9a967343&amp;rct=j&amp;frm=1&amp;q=&amp;esrc=s&amp;sa=U&amp;ved=0ahUKEwjRpfC-9b_kAhVNnJ4KHcs9C2MQFggiMAQ&amp;usg=AOvVaw2unq4BLAYGCfUquVZB3R4M">Victor Scott - Metal Band <b>facebook</b>.<b>com</b>/alchemyoftime - Alchemy of ...</a></h3><div class="s"><div class="hJND5c" style="margin-bottom:2px;word-wrap:break-word"><cite>https://www.linkedin.<b>com</b>/in/victor-scott-9a967343</cite></div><div class="f slp">Albany, New York Area&nbsp;-&nbsp;Alchemy of Time</div><span class="st">Victor Scott. Metal Band <b>facebook</b>.<b>com</b>/alchemyoftime at Alchemy of Time. Albany<br>
4 | , New York Area. Music. Alchemy of Time. 1 connection&nbsp;...</span><br></div></div><div class="g"><h3 class="r"><a href="/url?url=https://www.linkedin.com/in/elkhorbat-lkhorbat-6028b33a&amp;rct=j&amp;frm=1&amp;q=&amp;esrc=s&amp;sa=U&amp;ved=0ahUKEwjRpfC-9b_kAhVNnJ4KHcs9C2MQFgglMAU&amp;usg=AOvVaw0CgoQP7h8_4jJ1WkOSC6TB">elkhorbat lkhorbat - http://www.<b>facebook</b>.<b>com</b>/pages/elkhorbat ...</a></h3><div class="s"><div class="hJND5c" style="margin-bottom:2px;word-wrap:break-word"><cite>https://www.linkedin.<b>com</b>/in/elkhorbat-lkhorbat-6028b33a</cite></div><div class="f slp">United States&nbsp;-&nbsp;http://www.facebook.com/pages/elkhorbat/302997479939</div><span class="st">View elkhorbat lkhorbat&#39;s profile on LinkedIn, the world&#39;s largest professional <br>
5 | community. elkhorbat has 1 job listed on their profile. See the complete profile on<br>
6 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/exaleadsearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import myparser
 4 | import re
 5 | import time
 6 | import grequests
 7 | import requests
 8 | 
 9 | 
10 | class SearchExalead:
11 | 
12 |     def __init__(self, word, limit, start):
13 |         self.word = word
14 |         self.files = 'pdf'
15 |         self.results = ""
16 |         self.total_results = ""
17 |         self.server = 'www.exalead.com'
18 |         self.hostname = 'www.exalead.com'
19 |         self.limit = limit
20 |         self.counter = start
21 | 
22 |     def do_search(self):
23 |         base_url = f'https://{self.server}/search/web/results/?q=%40{self.word}&elements_per_page=50&start_index=xx'
24 |         headers = {
25 |             'Host': self.hostname,
26 |             'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
27 |             'User-agent': Core.get_user_agent()
28 |         }
29 |         urls = [base_url.replace("xx", str(num)) for num in range(self.counter, self.limit, 50) if num <= self.limit]
30 |         req = []
31 |         for url in urls:
32 |             req.append(grequests.get(url, headers=headers, timeout=5))
33 |             time.sleep(3)
34 |         responses = grequests.imap(tuple(req), size=3)
35 |         for response in responses:
36 |             # TODO if decoded content contains information about solving captcha print message to user to visit website
37 |             # TODO to solve it or use a vpn as it appears to be ip based
38 |             self.total_results += response.content.decode('UTF-8')
39 | 
40 |     def do_search_files(self, files):
41 |         url = f'https://{self.server}/search/web/results/?q=%40{self.word}filetype:{self.files}&elements_per_page' \
42 |             f'=50&start_index={self.counter} '
43 |         headers = {
44 |             'Host': self.hostname,
45 |             'Referer': ('http://' + self.hostname + '/search/web/results/?q=%40' + self.word),
46 |             'User-agent': Core.get_user_agent()
47 |         }
48 |         h = requests.get(url=url, headers=headers)
49 |         self.results = h.text
50 |         self.total_results += self.results
51 | 
52 |     def check_next(self):
53 |         renext = re.compile('topNextUrl')
54 |         nextres = renext.findall(self.results)
55 |         if nextres != []:
56 |             nexty = '1'
57 |             print(str(self.counter))
58 |         else:
59 |             nexty = '0'
60 |         return nexty
61 | 
62 |     def get_emails(self):
63 |         rawres = myparser.Parser(self.total_results, self.word)
64 |         return rawres.emails()
65 | 
66 |     def get_hostnames(self):
67 |         rawres = myparser.Parser(self.total_results, self.word)
68 |         return rawres.hostnames()
69 | 
70 |     def get_files(self):
71 |         rawres = myparser.Parser(self.total_results, self.word)
72 |         return rawres.fileurls(self.files)
73 | 
74 |     def process(self):
75 |         print('Searching results')
76 |         self.do_search()
77 | 
78 |     def process_files(self, files):
79 |         while self.counter < self.limit:
80 |             self.do_search_files(files)
81 |             time.sleep(getDelay())
82 |             more = self.check_next()
83 |             if more == '1':
84 |                 self.counter += 50
85 |             else:
86 |                 break
87 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/duckduckgosearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import myparser
 4 | import json
 5 | import requests
 6 | import time
 7 | 
 8 | 
 9 | class SearchDuckDuckGo:
10 | 
11 |     def __init__(self, word, limit):
12 |         self.word = word
13 |         self.results = ""
14 |         self.totalresults = ""
15 |         self.dorks = []
16 |         self.links = []
17 |         self.database = 'https://duckduckgo.com/?q='
18 |         self.api = 'https://api.duckduckgo.com/?q=x&format=json&pretty=1'  # Currently using API.
19 |         self.quantity = '100'
20 |         self.limit = limit
21 | 
22 |     def do_search(self):
23 |         # Do normal scraping.
24 |         url = self.api.replace('x', self.word)
25 |         headers = {'User-Agent': googleUA}
26 |         r = requests.get(url, headers=headers)
27 |         time.sleep(getDelay())
28 |         self.results = r.text
29 |         self.totalresults += self.results
30 |         urls = self.crawl(self.results)
31 |         for url in urls:
32 |             try:
33 |                 self.totalresults += requests.get(url, headers={'User-Agent': Core.get_user_agent()}).text
34 |                 time.sleep(getDelay())
35 |             except Exception:
36 |                 continue
37 | 
38 |     def crawl(self, text):
39 |         """
40 |         Function parses json and returns URLs.
41 |         :param text: formatted json
42 |         :return: set of URLs
43 |         """
44 |         urls = set()
45 |         try:
46 |             load = json.loads(text)
47 |             for keys in load.keys():  # Iterate through keys of dict.
48 |                 val = load.get(keys)
49 |                 if isinstance(val, int) or isinstance(val, dict) or val is None:
50 |                     continue
51 |                 if isinstance(val, list):
52 |                     if len(val) == 0:  # Make sure not indexing an empty list.
53 |                         continue
54 |                     val = val[0]  # First value should be dict.
55 |                     if isinstance(val, dict):  # Sanity check.
56 |                         for key in val.keys():
57 |                             value = val.get(key)
58 |                             if isinstance(value, str) and value != '' and 'https://' in value or 'http://' in value:
59 |                                 urls.add(value)
60 |                 if isinstance(val, str) and val != '' and 'https://' in val or 'http://' in val:
61 |                     urls.add(val)
62 |             tmp = set()
63 |             for url in urls:
64 |                 if '<' in url and 'href=' in url:  # Format is <href="https://www.website.com"/>
65 |                     equal_index = url.index('=')
66 |                     true_url = ''
67 |                     for ch in url[equal_index + 1:]:
68 |                         if ch == '"':
69 |                             tmp.add(true_url)
70 |                             break
71 |                         true_url += ch
72 |                 else:
73 |                     if url != '':
74 |                         tmp.add(url)
75 |             return tmp
76 |         except Exception as e:
77 |             print(f'Exception occurred: {e}')
78 |             return []
79 | 
80 |     def get_emails(self):
81 |         rawres = myparser.Parser(self.totalresults, self.word)
82 |         return rawres.emails()
83 | 
84 |     def get_hostnames(self):
85 |         rawres = myparser.Parser(self.totalresults, self.word)
86 |         return rawres.hostnames()
87 | 
88 |     def process(self):
89 |         self.do_search()  # Only need to search once since using API.
90 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/bingsearch.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.discovery.constants import *
 2 | from theHarvester.lib.core import *
 3 | from theHarvester.parsers import myparser
 4 | import grequests
 5 | 
 6 | 
 7 | class SearchBing:
 8 | 
 9 |     def __init__(self, word, limit, start):
10 |         self.word = word.replace(' ', '%20')
11 |         self.results = ""
12 |         self.total_results = ""
13 |         self.server = 'www.bing.com'
14 |         self.apiserver = 'api.search.live.net'
15 |         self.hostname = 'www.bing.com'
16 |         self.limit = int(limit)
17 |         self.bingApi = Core.bing_key()
18 |         self.counter = start
19 | 
20 |     def do_search(self):
21 |         headers = {
22 |             'Host': self.hostname,
23 |             'Cookie': 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50',
24 |             'Accept-Language': 'en-us,en',
25 |             'User-agent': Core.get_user_agent()
26 |         }
27 |         base_url = f'https://{self.server}/search?q=%40"{self.word}"&count=50&first=xx'
28 |         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
29 |         req = (grequests.get(url, headers=headers, timeout=5) for url in urls)
30 |         responses = grequests.imap(req, size=5)
31 |         for response in responses:
32 |             self.total_results += response.content.decode('UTF-8')
33 | 
34 |     def do_search_api(self):
35 |         url = 'https://api.cognitive.microsoft.com/bing/v7.0/search?'
36 |         params = {
37 |             'q': self.word,
38 |             'count': str(self.limit),
39 |             'offset': '0',
40 |             'mkt': 'en-us',
41 |             'safesearch': 'Off'
42 |         }
43 |         headers = {'User-Agent': Core.get_user_agent(), 'Ocp-Apim-Subscription-Key': self.bingApi}
44 |         grequests_resp = grequests.get(url=url, headers=headers, params=params)
45 |         response = grequests.map([grequests_resp])
46 |         self.results = response[0].content.decode('UTF-8')
47 |         self.total_results += self.results
48 | 
49 |     def do_search_vhost(self):
50 |         headers = {
51 |             'Host': self.hostname,
52 |             'Cookie': 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50',
53 |             'Accept-Language': 'en-us,en',
54 |             'User-agent': Core.get_user_agent()
55 |         }
56 |         base_url = f'http://{self.server}/search?q=ip:{self.word}&go=&count=50&FORM=QBHL&qs=n&first=xx'
57 |         urls = [base_url.replace("xx", str(num)) for num in range(0, self.limit, 50) if num <= self.limit]
58 |         req = (grequests.get(url, headers=headers, timeout=5) for url in urls)
59 |         responses = grequests.imap(req, size=5)
60 |         for response in responses:
61 |             self.total_results += response.content.decode('UTF-8')
62 | 
63 |     def get_emails(self):
64 |         rawres = myparser.Parser(self.total_results, self.word)
65 |         return rawres.emails()
66 | 
67 |     def get_hostnames(self):
68 |         rawres = myparser.Parser(self.total_results, self.word)
69 |         return rawres.hostnames()
70 | 
71 |     def get_allhostnames(self):
72 |         rawres = myparser.Parser(self.total_results, self.word)
73 |         return rawres.hostnames_all()
74 | 
75 |     def process(self, api):
76 |         if api == 'yes':
77 |             if self.bingApi is None:
78 |                 raise MissingKey(True)
79 |         else:
80 |             if api == 'yes':
81 |                 self.do_search_api()
82 |             else:
83 |                 self.do_search()
84 |             print(f'\tSearching {self.counter} results.')
85 | 
86 |     def process_vhost(self):
87 |         self.do_search_vhost()
88 | 


--------------------------------------------------------------------------------
/.github/workflows/theHarvester.yml:
--------------------------------------------------------------------------------
  1 | name: TheHarvester Python CI
  2 | 
  3 | on:
  4 |   push:
  5 |     branches:
  6 |     - '*'
  7 | 
  8 |   pull_request:
  9 |     branches:
 10 |     - '*'
 11 | jobs:
 12 |   Python:
 13 |     runs-on: ubuntu-latest
 14 |     strategy:
 15 |       max-parallel: 4
 16 |       matrix:
 17 |         python-version: [3.7]
 18 | 
 19 |     steps:
 20 |     - uses: actions/checkout@v1
 21 |     - name: Set up Python ${{ matrix.python-version }}
 22 |       uses: actions/setup-python@v1
 23 |       with:
 24 |         python-version: ${{ matrix.python-version }}
 25 |     - name: Install dependencies
 26 |       run: |
 27 |         pip install --upgrade pip
 28 |         pip install -r requirements.txt
 29 | 
 30 |     - name: Run theHarvester module baidu
 31 |       run: |
 32 |         python theHarvester.py -d metasploit.com -b baidu
 33 | 
 34 |     - name: Run theHarvester module bing
 35 |       run: |
 36 |         python theHarvester.py -d metasploit.com -b bing
 37 | 
 38 |     - name: Run theHarvester module certspotter
 39 |       run: |
 40 |         python theHarvester.py -d metasploit.com -b certspotter
 41 | 
 42 |     - name: Run theHarvester module crtsh
 43 |       run: |
 44 |         python theHarvester.py -d metasploit.com -b crtsh
 45 | 
 46 |     - name: Run theHarvester module dnsdumpster
 47 |       run: |
 48 |         python theHarvester.py -d metasploit.com -b dnsdumpster
 49 | 
 50 |     - name: Run theHarvester module dogplie
 51 |       run: |
 52 |         python theHarvester.py -d metasploit.com -b dogpile
 53 | 
 54 |     - name: Run theHarvester module duckduckgo
 55 |       run: |
 56 |         python theHarvester.py -d metasploit.com -b duckduckgo
 57 | 
 58 |     - name: Run theHarvester module exalead
 59 |       run: |
 60 |         python theHarvester.py -d metasploit.com -b exalead
 61 | 
 62 |     - name: Run theHarvester module google
 63 |       run: |
 64 |         python theHarvester.py -d metasploit.com -b google
 65 | 
 66 |     - name: Run theHarvester module Intelx
 67 |       run: |
 68 |         python theHarvester.py -d metasploit.com -b intelx
 69 | 
 70 |     - name: Run theHarvester module linkedin
 71 |       run: |
 72 |         python theHarvester.py -d metasploit.com -b linkedin
 73 | 
 74 |     - name: Run theHarvester module linkedin_links
 75 |       run: |
 76 |         python theHarvester.py -d metasploit.com -b linkedin_links
 77 | 
 78 |     - name: Run theHarvester module netcraft
 79 |       run: |
 80 |         python theHarvester.py -d metasploit.com -b netcraft
 81 | 
 82 |     - name: Run theHarvester module Otx
 83 |       run: |
 84 |         python theHarvester.py -d metasploit.com -b otx
 85 | 
 86 |     - name: Run theHarvester module threatcrowd
 87 |       run: |
 88 |         python theHarvester.py -d metasploit.com -b threatcrowd
 89 |     - name: Run theHarvester module trello
 90 |       run: |
 91 |         python theHarvester.py -d metasploit.com -b trello
 92 | 
 93 |     - name: Run theHarvester module twitter
 94 |       run: |
 95 |         python theHarvester.py -d metasploit.com -b twitter
 96 | 
 97 |     - name: Run theHarvester module virustotal
 98 |       run: |
 99 |         python theHarvester.py -d metasploit.com -b virustotal
100 | 
101 |     - name: Run theHarvester module yahoo
102 |       run: |
103 |         python theHarvester.py -d metasploit.com -b yahoo
104 | 
105 |     - name: Lint with flake8
106 |       run: |
107 |         # stop the build if there are Python syntax errors or undefined names
108 |         flake8 . --count --show-source --statistics
109 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
110 |         flake8 . --count --exit-zero  --max-line-length=127 --statistics
111 | 
112 |     - name: Test with pytest
113 |       run: |
114 |         pytest
115 |     - name: Static type checking with mypy
116 |       run: |
117 |         mypy --pretty theHarvester/*/*.py


--------------------------------------------------------------------------------
/theHarvester/lib/reportgraph.py:
--------------------------------------------------------------------------------
 1 | from theHarvester.lib import stash
 2 | from datetime import datetime
 3 | import plotly
 4 | import plotly.graph_objs as go
 5 | 
 6 | try:
 7 |     db = stash.StashManager()
 8 |     db.do_init()
 9 | except Exception as error:
10 |     print(f'{error}')
11 | 
12 | 
13 | class GraphGenerator:
14 | 
15 |     def __init__(self, domain):
16 |         self.domain = domain
17 |         self.bardata = []
18 |         self.barcolumns = []
19 |         self.scatterxdata = []
20 |         self.scattercountemails = []
21 |         self.scattercounthosts = []
22 |         self.scattercountips = []
23 |         self.scattercountshodans = []
24 |         self.scattercountvhosts = []
25 | 
26 |     def drawlatestscangraph(self, domain, latestscandata):
27 |         try:
28 |             self.barcolumns = ['email', 'host', 'ip', 'shodan', 'vhost']
29 |             self.bardata.append(latestscandata['email'])
30 |             self.bardata.append(latestscandata['host'])
31 |             self.bardata.append(latestscandata['ip'])
32 |             self.bardata.append(latestscandata['shodan'])
33 |             self.bardata.append(latestscandata['vhost'])
34 |             layout = dict(title='Latest scan - number of targets identified for ' + domain,
35 |                           xaxis=dict(title='Targets'),
36 |                           yaxis=dict(title='Hits'),)
37 |             barchartcode = plotly.offline.plot({
38 |                 'data': [go.Bar(x=self.barcolumns, y=self.bardata)],
39 |                 'layout': layout,
40 |             }, auto_open=False, include_plotlyjs=False, filename='report.html', output_type='div')
41 |             return barchartcode
42 |         except Exception as e:
43 |             print(f'Error generating HTML bar graph code for domain: {e}')
44 | 
45 |     def drawscattergraphscanhistory(self, domain, scanhistorydomain):
46 |         try:
47 |             scandata = scanhistorydomain
48 |             for i in scandata:
49 |                 self.scatterxdata.append(datetime.date(datetime.strptime(i['date'], '%Y-%m-%d')))
50 |                 self.scattercountemails.append(int(i['email']))
51 |                 self.scattercounthosts.append(int(i['hosts']))
52 |                 self.scattercountips.append(int(i['ip']))
53 |                 self.scattercountshodans.append(int(i['shodan']))
54 |                 self.scattercountvhosts.append(int(i['vhost']))
55 | 
56 |             trace0 = go.Scatter(
57 |                 x=self.scatterxdata,
58 |                 y=self.scattercounthosts,
59 |                 mode='lines+markers',
60 |                 name='hosts')
61 | 
62 |             trace1 = go.Scatter(
63 |                 x=self.scatterxdata,
64 |                 y=self.scattercountips,
65 |                 mode='lines+markers',
66 |                 name='IP address')
67 | 
68 |             trace2 = go.Scatter(
69 |                 x=self.scatterxdata,
70 |                 y=self.scattercountvhosts,
71 |                 mode='lines+markers',
72 |                 name='vhost')
73 | 
74 |             trace3 = go.Scatter(
75 |                 x=self.scatterxdata,
76 |                 y=self.scattercountshodans,
77 |                 mode='lines+markers',
78 |                 name='shodan')
79 | 
80 |             trace4 = go.Scatter(
81 |                 x=self.scatterxdata,
82 |                 y=self.scattercountemails,
83 |                 mode='lines+markers',
84 |                 name='email')
85 | 
86 |             data = [trace0, trace1, trace2, trace3, trace4]
87 |             layout = dict(title=f'Scanning history for {domain}', xaxis=dict(title='Date'),
88 |                           yaxis=dict(title='Results'))
89 |             scatterchartcode = plotly.offline.plot({
90 |                 'data': data,
91 |                 'layout': layout}, auto_open=False, include_plotlyjs=False, filename='report.html',
92 |                 output_type='div')
93 |             return scatterchartcode
94 |         except Exception as e:
95 |             print(f'Error generating HTML for the historical graph for domain: {e}')
96 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/constants.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | import random
  3 | 
  4 | googleUA = 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36'
  5 | 
  6 | 
  7 | def splitter(links):
  8 |     """
  9 |     Method that tries to remove duplicates
 10 |     LinkedinLists pulls a lot of profiles with the same name.
 11 |     This method tries to remove duplicates from the list.
 12 |     :param links: list of links to remove duplicates from
 13 |     :return: unique-ish list
 14 |     """
 15 |     unique_list = []
 16 |     name_check = []
 17 |     for url in links:
 18 |         tail = url.split("/")[-1]
 19 |         if len(tail) == 2 or tail == "zh-cn":
 20 |             tail = url.split("/")[-2]
 21 |         name = tail.split("-")
 22 |         if len(name) > 1:
 23 |             joined_name = name[0] + name[1]
 24 |         else:
 25 |             joined_name = name[0]
 26 |         if joined_name not in name_check:
 27 |             unique_list.append(url)
 28 |             name_check.append(joined_name)
 29 |     return unique_list
 30 | 
 31 | 
 32 | def filter(lst):
 33 |     """
 34 |     Method that filters list
 35 |     :param lst: list to be filtered
 36 |     :return: new filtered list
 37 |     """
 38 |     if lst is None:
 39 |         return []
 40 |     if not isinstance(lst, set):
 41 |         lst = set(lst)  # Remove duplicates.
 42 |     new_lst = []
 43 |     for item in lst:
 44 |         item = str(item)
 45 |         if (item[0].isalpha() or item[0].isdigit()) and ('xxx' not in item) and ('..' not in item):
 46 |             item = item.replace('252f', '').replace('2F', '').replace('2f', '')
 47 |             new_lst.append(item.lower())
 48 |     return new_lst
 49 | 
 50 | 
 51 | def getDelay() -> float:
 52 |     return random.randint(1, 3) - .5
 53 | 
 54 | 
 55 | def search(text: str) -> bool:
 56 |     # Helper function to check if Google has blocked traffic.
 57 |     for line in text.strip().splitlines():
 58 |         if 'This page appears when Google automatically detects requests coming from your computer network' in line \
 59 |                 or 'http://www.google.com/sorry/index' in line or 'https://www.google.com/sorry/index' in line:
 60 |             # print('\tGoogle is blocking your IP due to too many automated requests, wait or change your IP')
 61 |             return True
 62 |     return False
 63 | 
 64 | 
 65 | def google_workaround(visit_url: str) -> Union[bool, str]:
 66 |     """
 67 |     Function that makes a request on our behalf, if Google starts to block us
 68 |     :param visit_url: Url to scrape
 69 |     :return: Correct html that can be parsed by BS4
 70 |     """
 71 |     import requests
 72 |     url = 'https://websniffer.cc/'
 73 |     data = {
 74 |         'Cookie': '',
 75 |         'url': visit_url,
 76 |         'submit': 'Submit',
 77 |         'type': 'GET&http=1.1',
 78 |         'uak': str(random.randint(4, 8))  # select random UA to send to Google
 79 |     }
 80 |     resp = requests.post(url, headers={'User-Agent': googleUA}, data=data)
 81 |     returned_html = resp.text
 82 |     if search(returned_html):
 83 |         # indicates that google is serving workaround a captcha
 84 |         # TODO rework workaround with more websites to send requests on our behalf or utilize proxies option in request
 85 |         return True
 86 |     # the html we get is malformed for BS4 as there are no greater than or less than signs
 87 |     if '&lt;html&gt;' in returned_html:
 88 |         start_index = returned_html.index('&lt;html&gt;')
 89 |     else:
 90 |         start_index = returned_html.index('&lt;html')
 91 | 
 92 |     end_index = returned_html.index('&lt;/html&gt;') + 1
 93 |     correct_html = returned_html[start_index:end_index]
 94 |     # Slice list to get the response's html
 95 |     correct_html = ''.join([ch.strip().replace('&lt;', '<').replace('&gt;', '>') for ch in correct_html])
 96 |     return correct_html
 97 | 
 98 | 
 99 | class MissingKey(Exception):
100 | 
101 |     def __init__(self, identity_flag: bool):
102 |         if identity_flag:
103 |             self.message = '\n\033[93m[!] Missing API key. \033[0m'
104 |         else:
105 |             self.message = '\n\033[93m[!] Missing CSE id. \033[0m'
106 | 
107 |     def __str__(self) -> str:
108 |         return self.message
109 | 


--------------------------------------------------------------------------------
/tests/discovery/test_githubcode.py:
--------------------------------------------------------------------------------
  1 | from theHarvester.discovery import githubcode
  2 | from theHarvester.discovery.githubcode import RetryResult, ErrorResult, SuccessResult
  3 | from theHarvester.discovery.constants import MissingKey
  4 | from theHarvester.lib.core import Core
  5 | from unittest.mock import MagicMock
  6 | from requests import Response
  7 | import pytest
  8 | 
  9 | 
 10 | class TestSearchGithubCode:
 11 | 
 12 |     class OkResponse:
 13 |         response = Response()
 14 |         json = {
 15 |             "items": [
 16 |                 {
 17 |                     "text_matches": [
 18 |                         {
 19 |                             "fragment": "test1"
 20 |                         }
 21 |                     ]
 22 |                 },
 23 |                 {
 24 |                     "text_matches": [
 25 |                         {
 26 |                             "fragment": "test2"
 27 |                         }
 28 |                     ]
 29 |                 }
 30 |             ]
 31 |         }
 32 |         response.status_code = 200
 33 |         response.json = MagicMock(return_value=json)
 34 | 
 35 |     class FailureResponse:
 36 |         response = Response()
 37 |         response.json = MagicMock(return_value={})
 38 |         response.status_code = 401
 39 | 
 40 |     class RetryResponse:
 41 |         response = Response()
 42 |         response.json = MagicMock(return_value={})
 43 |         response.status_code = 403
 44 | 
 45 |     class MalformedResponse:
 46 |         response = Response()
 47 |         json = {
 48 |             "items": [
 49 |                 {
 50 |                     "fail": True
 51 |                 },
 52 |                 {
 53 |                     "text_matches": []
 54 |                 },
 55 |                 {
 56 |                     "text_matches": [
 57 |                         {
 58 |                             "weird": "result"
 59 |                         }
 60 |                     ]
 61 |                 }
 62 |             ]
 63 |         }
 64 |         response.json = MagicMock(return_value=json)
 65 |         response.status_code = 200
 66 | 
 67 |     def test_missing_key(self):
 68 |         with pytest.raises(MissingKey):
 69 |             Core.github_key = MagicMock(return_value=None)
 70 |             githubcode.SearchGithubCode(word="test", limit=500)
 71 | 
 72 |     def test_fragments_from_response(self):
 73 |         Core.github_key = MagicMock(return_value="lol")
 74 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
 75 |         test_result = test_class_instance.fragments_from_response(self.OkResponse.response)
 76 |         assert test_result == ["test1", "test2"]
 77 | 
 78 |     def test_invalid_fragments_from_response(self):
 79 |         Core.github_key = MagicMock(return_value="lol")
 80 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
 81 |         test_result = test_class_instance.fragments_from_response(self.MalformedResponse.response)
 82 |         assert test_result == []
 83 | 
 84 |     def test_handle_response_ok(self):
 85 |         Core.github_key = MagicMock(return_value="lol")
 86 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
 87 |         test_result = test_class_instance.handle_response(self.OkResponse.response)
 88 |         assert isinstance(test_result, SuccessResult)
 89 | 
 90 |     def test_handle_response_retry(self):
 91 |         Core.github_key = MagicMock(return_value="lol")
 92 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
 93 |         test_result = test_class_instance.handle_response(self.RetryResponse.response)
 94 |         assert isinstance(test_result, RetryResult)
 95 | 
 96 |     def test_handle_response_fail(self):
 97 |         Core.github_key = MagicMock(return_value="lol")
 98 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
 99 |         test_result = test_class_instance.handle_response(self.FailureResponse.response)
100 |         assert isinstance(test_result, ErrorResult)
101 | 
102 |     def test_next_page(self):
103 |         Core.github_key = MagicMock(return_value="lol")
104 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
105 |         test_result = githubcode.SuccessResult(list(), next_page=2, last_page=4)
106 |         assert(2 == test_class_instance.next_page_or_end(test_result))
107 | 
108 |     def test_last_page(self):
109 |         Core.github_key = MagicMock(return_value="lol")
110 |         test_class_instance = githubcode.SearchGithubCode(word="test", limit=500)
111 |         test_result = githubcode.SuccessResult(list(), None, None)
112 |         assert(None is test_class_instance.next_page_or_end(test_result))
113 | 
114 |     if __name__ == '__main__':
115 |         pytest.main()
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![theHarvester](https://github.com/laramies/theHarvester/blob/master/theHarvester-logo.png)
  2 | 
  3 | [![Build Status](https://travis-ci.com/laramies/theHarvester.svg?branch=master)](https://travis-ci.com/laramies/theHarvester) [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/laramies/theHarvester.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/laramies/theHarvester/context:python)
  4 | [![Rawsec's CyberSecurity Inventory](https://inventory.rawsec.ml/img/badges/Rawsec-inventoried-FF5050_flat_without_logo.svg)](https://inventory.rawsec.ml/)
  5 | 
  6 | What is this?
  7 | -------------
  8 | theHarvester is a very simple, yet effective tool designed to be used in the early<br>
  9 | stages of a penetration test. Use it for open source intelligence gathering and<br>
 10 | helping to determine a company's external threat landscape on the internet. The<br>
 11 | tool gathers emails, names, subdomains, IPs, and URLs using multiple public data<br>
 12 | sources that include:
 13 | 
 14 | Passive:
 15 | --------
 16 | * baidu: Baidu search engine - www.baidu.com
 17 | 
 18 | * bing: Microsoft search engine - www.bing.com
 19 | 
 20 | * bingapi: Microsoft search engine, through the API (Requires an API key, see below.)
 21 | 
 22 | * CertSpotter: Cert Spotter monitors Certificate Transparency logs - https://sslmate.com/certspotter/
 23 | 
 24 | * crtsh: Comodo Certificate search - www.crt.sh
 25 | 
 26 | * dnsdumpster: DNSdumpster search engine - dnsdumpster.com
 27 | 
 28 | * dogpile: Dogpile search engine - www.dogpile.com
 29 | 
 30 | * duckduckgo: DuckDuckGo search engine - www.duckduckgo.com
 31 | 
 32 | * Exalead: a Meta search engine - https://www.exalead.com/search
 33 | 
 34 | * github-code: Github code search engine (Requires a Github Personal Access Token, see below.) - www.github.com
 35 | 
 36 | * google: Google search engine (Optional Google dorking.) - www.google.com
 37 | 
 38 | * hunter: Hunter search engine (Requires an API key, see below.) - www.hunter.io
 39 | 
 40 | * intelx: Intelx search engine (Requires an API key, see below.) - www.intelx.io
 41 | 
 42 | * linkedin: Google search engine, specific search for LinkedIn users - www.linkedin.com
 43 | 
 44 | * netcraft: Internet Security and Data Mining - www.netcraft.com
 45 | 
 46 | * otx: AlienVault Open Threat Exchange - https://otx.alienvault.com
 47 | 
 48 | * securityTrails: Security Trails search engine, the world's largest repository<br>
 49 |   of historical DNS data (Requires an API key, see below.) - www.securitytrails.com
 50 | 
 51 | * shodan: Shodan search engine, will search for ports and banners from discovered<br>
 52 |   hosts - www.shodanhq.com
 53 | 
 54 | * Spyse: Web research tools for professionals (Requires an API key.) - https://spyse.com/
 55 | 
 56 | * Suip: Web research tools that can take over 10 minutes to run, but worth the wait. - https://suip.biz/
 57 | 
 58 | * threatcrowd: Open source threat intelligence - www.threatcrowd.org
 59 | 
 60 | * trello: Search trello boards (Uses Google search.)
 61 | 
 62 | * twitter: Twitter accounts related to a specific domain (Uses Google search.)
 63 | 
 64 | * vhost: Bing virtual hosts search
 65 | 
 66 | * virustotal: virustotal.com domain search
 67 | 
 68 | * yahoo: Yahoo search engine
 69 | 
 70 | 
 71 | Active:
 72 | -------
 73 | * DNS brute force: dictionary brute force enumeration
 74 | 
 75 | 
 76 | Modules that require an API key:
 77 | --------------------------------
 78 | Add your keys to api-keys.yaml
 79 | 
 80 | * bing
 81 | * github
 82 | * hunter
 83 | * intelx
 84 | * securityTrails
 85 | * shodan
 86 | * spyse
 87 | 
 88 | Dependencies:
 89 | -------------
 90 | * Python 3.7+
 91 | * python3 -m pip install pipenv
 92 | * pipenv install
 93 | 
 94 | Comments, bugs, or requests?
 95 | ----------------------------
 96 | * [![Twitter Follow](https://img.shields.io/twitter/follow/laramies.svg?style=social&label=Follow)](https://twitter.com/laramies) Christian Martorella @laramies
 97 | cmartorella@edge-security.com
 98 | * [![Twitter Follow](https://img.shields.io/twitter/follow/NotoriousRebel1.svg?style=social&label=Follow)](https://twitter.com/NotoriousRebel1) Matthew Brown @NotoriousRebel1
 99 | * [![Twitter Follow](https://img.shields.io/twitter/follow/jay_townsend1.svg?style=social&label=Follow)](https://twitter.com/jay_townsend1) Jay "L1ghtn1ng" Townsend @jay_townsend1
100 | 
101 | Main contributors:
102 | ------------------
103 | * [![Twitter Follow](https://img.shields.io/twitter/follow/NotoriousRebel1.svg?style=social&label=Follow)](https://twitter.com/NotoriousRebel1) Matthew Brown @NotoriousRebel1
104 | * [![Twitter Follow](https://img.shields.io/twitter/follow/jay_townsend1.svg?style=social&label=Follow)](https://twitter.com/jay_townsend1) Jay "L1ghtn1ng" Townsend @jay_townsend1
105 | * [![Twitter Follow](https://img.shields.io/twitter/follow/discoverscripts.svg?style=social&label=Follow)](https://twitter.com/discoverscripts) Lee Baird @discoverscripts 
106 | * [![LinkedIn](https://static.licdn.com/scds/common/u/img/webpromo/btn_viewmy_160x25.png)](https://www.linkedin.com/in/janoszold/)  Janos Zold
107 | 
108 | Thanks:
109 | -------
110 | * John Matherly - Shodan project
111 | * Ahmed Aboul Ela - subdomain names dictionaries (big and small)
112 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/githubcode.py:
--------------------------------------------------------------------------------
  1 | from theHarvester.discovery.constants import *
  2 | from theHarvester.lib.core import *
  3 | from theHarvester.parsers import myparser
  4 | import requests
  5 | from requests import Response
  6 | import time
  7 | from typing import List, Dict, Any, Optional, NamedTuple
  8 | import urllib.parse as urlparse
  9 | 
 10 | 
 11 | class RetryResult(NamedTuple):
 12 |     time: float
 13 | 
 14 | 
 15 | class SuccessResult(NamedTuple):
 16 |     fragments: List[str]
 17 |     next_page: Optional[int]
 18 |     last_page: Optional[int]
 19 | 
 20 | 
 21 | class ErrorResult(NamedTuple):
 22 |     status_code: int
 23 |     body: Any
 24 | 
 25 | 
 26 | class SearchGithubCode:
 27 | 
 28 |     def __init__(self, word, limit):
 29 |         self.word = word
 30 |         self.total_results = ""
 31 |         self.server = 'api.github.com'
 32 |         self.limit = limit
 33 |         self.counter = 0
 34 |         self.page = 1
 35 |         self.key = Core.github_key()
 36 |         # If you don't have a personal access token, github narrows your search capabilities significantly
 37 |         # rate limits you more severely
 38 |         # https://developer.github.com/v3/search/#rate-limit
 39 |         if self.key is None:
 40 |             raise MissingKey(True)
 41 | 
 42 |     @staticmethod
 43 |     def fragments_from_response(response: Response) -> List[str]:
 44 |         items: List[Dict[str, Any]] = response.json().get('items') or list()
 45 |         fragments: List[str] = list()
 46 |         for item in items:
 47 |             matches = item.get("text_matches") or list()
 48 |             for match in matches:
 49 |                 fragments.append(match.get("fragment"))
 50 |         return [fragment for fragment in fragments if fragment is not None]
 51 | 
 52 |     @staticmethod
 53 |     def page_from_response(page: str, response: Response) -> Optional[Any]:
 54 |         page_link = response.links.get(page)
 55 |         if page_link:
 56 |             parsed = urlparse.urlparse(page_link.get("url"))
 57 |             params = urlparse.parse_qs(parsed.query)
 58 |             pages: List[Any] = params.get('page', [None])
 59 |             page_number = pages[0] and int(pages[0])
 60 |             return page_number
 61 |         else:
 62 |             return None
 63 | 
 64 |     def handle_response(self, response: Response) -> Optional[Any]:
 65 |         if response.ok:
 66 |             results = self.fragments_from_response(response)
 67 |             next_page = self.page_from_response("next", response)
 68 |             last_page = self.page_from_response("last", response)
 69 |             return SuccessResult(results, next_page, last_page)
 70 |         elif response.status_code == 429 or response.status_code == 403:
 71 |             return RetryResult(60)
 72 |         else:
 73 |             try:
 74 |                 return ErrorResult(response.status_code, response.json())
 75 |             except ValueError:
 76 |                 return ErrorResult(response.status_code, response.text)
 77 | 
 78 |     def do_search(self, page: Optional[int]) -> Response:
 79 |         if page is None:
 80 |             url = f'https://{self.server}/search/code?q="{self.word}"'
 81 |         else:
 82 |             url = f'https://{self.server}/search/code?q="{self.word}"&page={page}'
 83 |         headers = {
 84 |             'Host': self.server,
 85 |             'User-agent': Core.get_user_agent(),
 86 |             'Accept': "application/vnd.github.v3.text-match+json",
 87 |             'Authorization': 'token {}'.format(self.key)
 88 |         }
 89 |         return requests.get(url=url, headers=headers, verify=True)
 90 | 
 91 |     @staticmethod
 92 |     def next_page_or_end(result: SuccessResult) -> Optional[int]:
 93 |         if result.next_page is not None:
 94 |             return result.next_page
 95 |         else:
 96 |             return result.last_page
 97 | 
 98 |     def process(self):
 99 |         while self.counter <= self.limit and self.page is not None:
100 |             api_response = self.do_search(self.page)
101 |             result = self.handle_response(api_response)
102 |             if type(result) == SuccessResult:
103 |                 print(f'\tSearching {self.counter} results.')
104 |                 for fragment in result.fragments:
105 |                     self.total_results += fragment
106 |                     self.counter = self.counter + 1
107 | 
108 |                 self.page = self.next_page_or_end(result)
109 |                 time.sleep(getDelay())
110 |             elif type(result) == RetryResult:
111 |                 sleepy_time = getDelay() + result.time
112 |                 print(f'\tRetrying page in {sleepy_time} seconds...')
113 |                 time.sleep(sleepy_time)
114 |             elif type(result) == ErrorResult:
115 |                 raise Exception(f"\tException occurred: status_code: {result.status_code} reason: {result.body}")
116 |             else:
117 |                 raise Exception("\tUnknown exception occurred")
118 | 
119 |     def get_emails(self):
120 |         rawres = myparser.Parser(self.total_results, self.word)
121 |         return rawres.emails()
122 | 
123 |     def get_hostnames(self):
124 |         rawres = myparser.Parser(self.total_results, self.word)
125 |         return rawres.hostnames()
126 | 


--------------------------------------------------------------------------------
/wordlists/dns-names.txt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 01
  3 | 02
  4 | 03
  5 | 1
  6 | 2
  7 | 3
  8 | 4
  9 | 5
 10 | 6
 11 | 7
 12 | 8
 13 | 9
 14 | Se0
 15 | a
 16 | a.auth-ns
 17 | a01
 18 | a02
 19 | a1
 20 | a2
 21 | about
 22 | access
 23 | accounting
 24 | acct4
 25 | acct4-cert
 26 | achilles
 27 | activestat
 28 | ad 
 29 | adm
 30 | admin
 31 | afrodita
 32 | aix
 33 | ajax
 34 | akamai
 35 | alterwind
 36 | analyzer
 37 | antivir
 38 | antivirus
 39 | ap
 40 | apache
 41 | app
 42 | app01
 43 | app1
 44 | apps
 45 | arcsight
 46 | asia
 47 | asp
 48 | att
 49 | av
 50 | b
 51 | b.auth-ns
 52 | b01
 53 | b02
 54 | b1
 55 | b2
 56 | bdc
 57 | bdc1
 58 | be
 59 | bilbo
 60 | billing
 61 | blog
 62 | blogs
 63 | box
 64 | bsd
 65 | bsd01
 66 | bsd02
 67 | bsd1
 68 | bsd2
 69 | bug
 70 | bugs
 71 | bugzilla
 72 | c
 73 | c.auth-ns
 74 | cache
 75 | call
 76 | careers
 77 | campus
 78 | cdn
 79 | cgi
 80 | chat
 81 | checkpoint
 82 | cims
 83 | cisco
 84 | citrix
 85 | clicktrack
 86 | clientes
 87 | cms
 88 | communication
 89 | comunicacion
 90 | con
 91 | concentrator
 92 | connect
 93 | console
 94 | confluence
 95 | core
 96 | core01
 97 | corp
 98 | correo
 99 | counterstrike
100 | cricket
101 | crm
102 | customer
103 | customers
104 | cvs
105 | d
106 | db
107 | db 
108 | db01 
109 | db02 
110 | db1 
111 | db2 
112 | de
113 | debian
114 | demo
115 | demo1
116 | desa
117 | desarrollo
118 | dev
119 | dev01
120 | dev1
121 | develo
122 | development
123 | devil
124 | dhcp
125 | dhcp 
126 | dhcp1
127 | diablo
128 | dialup
129 | dk
130 | dmz
131 | dmz1
132 | dns
133 | docs
134 | domino
135 | download
136 | downloads
137 | drupal
138 | e
139 | eate4
140 | eate4-cert
141 | email
142 | employees
143 | en
144 | eng
145 | eng01
146 | eng1
147 | engineering
148 | erp
149 | es
150 | esm
151 | eth0
152 | eth0/0
153 | europe
154 | exchange
155 | extranets
156 | f
157 | f5
158 | fa0/0
159 | faststats
160 | firewall
161 | flow
162 | forum
163 | forums
164 | foundry
165 | fr
166 | free
167 | freebsd
168 | freebsd01
169 | freebsd02
170 | freebsd1
171 | freebsd2
172 | frodo
173 | fs
174 | ftp
175 | ftp1
176 | fw
177 | fw1
178 | fwsm
179 | fwsm01
180 | fwsm1
181 | fxp
182 | g
183 | games
184 | gandalf
185 | gate
186 | gateway
187 | gateway1
188 | global
189 | go
190 | gojira
191 | god
192 | gw
193 | gw1
194 | h
195 | hefesto
196 | help
197 | helpdesk
198 | hercules
199 | home
200 | homebase
201 | honeypot
202 | hpov
203 | hr
204 | human
205 | humanresources
206 | i
207 | ias
208 | id 
209 | ids
210 | iis
211 | images
212 | imap
213 | img
214 | img01
215 | img02
216 | include
217 | inside
218 | int 
219 | internal
220 | interno
221 | intl 
222 | intranet 
223 | investor
224 | investors
225 | ipv6
226 | ir
227 | irc
228 | ircd
229 | ism
230 | isync
231 | it
232 | ix
233 | j
234 | jira
235 | jobs
236 | john
237 | juniper
238 | k
239 | keynote
240 | l
241 | lab
242 | lab1
243 | labs
244 | lan
245 | lb
246 | ldap
247 | ldap 
248 | linux
249 | linux01
250 | linux02
251 | linux1
252 | linux2
253 | log
254 | log01
255 | log02
256 | log1
257 | log2
258 | logfile
259 | logfiles
260 | login
261 | logs
262 | lotus
263 | m
264 | mail
265 | mailserver
266 | mailhost
267 | management
268 | mandrake
269 | marketing
270 | mars
271 | mcast
272 | mci
273 | media
274 | members
275 | mgmt
276 | mike
277 | mobile
278 | mom
279 | mom 
280 | monitor
281 | monitoring
282 | mp3
283 | mrtg
284 | ms
285 | mssql
286 | mssql01
287 | mssql1
288 | mta
289 | mtu
290 | mx
291 | my
292 | mysql
293 | mysql01
294 | mysql1
295 | n
296 | nameserv
297 | nas
298 | navi
299 | navision
300 | neo
301 | neptuno
302 | net
303 | netapp
304 | netbsd
305 | netgear
306 | netscaler
307 | netscreen
308 | news
309 | nl
310 | nms
311 | no
312 | nokia
313 | ns
314 | ns0
315 | ns01
316 | ns02
317 | ns1
318 | ns2
319 | ns3
320 | ns4
321 | ns5
322 | ntp
323 | o
324 | odin
325 | online
326 | online4-cert
327 | open
328 | openbsd
329 | openview
330 | operations
331 | ops
332 | ops01
333 | ops02
334 | ops1
335 | ops2
336 | opsware
337 | ora
338 | oracle
339 | orcl
340 | outlook
341 | outside
342 | ov
343 | owa
344 | owa01
345 | p
346 | pantera
347 | payroll
348 | pbx
349 | pdc
350 | pdc1
351 | penadmin1
352 | people
353 | peoplesoft
354 | peter
355 | phone
356 | pop
357 | pop 
358 | pop3
359 | portal
360 | pos0/0
361 | poseidon
362 | power
363 | pr
364 | problemtracker
365 | proteo
366 | proxy
367 | prueba
368 | pub
369 | public
370 | q
371 | qa
372 | quake
373 | r
374 | r01
375 | r02
376 | r1
377 | r2
378 | radius
379 | rh
380 | rrhh
381 | rcs
382 | recruitment
383 | research
384 | redhat
385 | relay
386 | relay1
387 | relay2
388 | relay3
389 | relay4
390 | remote
391 | remstats
392 | retail
393 | router
394 | router1
395 | rss
396 | rtr		
397 | rtr01
398 | rtr1
399 | s
400 | s1
401 | s2
402 | sales
403 | san
404 | sap
405 | se1
406 | search
407 | sec 
408 | secure
409 | securid 
410 | server
411 | sharepoint
412 | siebel
413 | signin
414 | signup
415 | sim
416 | sl-app1
417 | sl-mon1
418 | sl-mysql-db1
419 | sl-od
420 | sl-public-web1
421 | sl-ts
422 | sl-web1
423 | sl-web2
424 | sl-web3
425 | slsg
426 | smdb
427 | sms
428 | smtp
429 | smtp 
430 | snort
431 | sof
432 | solaris
433 | spam
434 | splunk
435 | sprint
436 | sql
437 | sql01
438 | sql1
439 | squid
440 | ssl
441 | ssl01
442 | ssl1
443 | staff
444 | stage
445 | staging
446 | research
447 | start
448 | stat
449 | static
450 | statistics
451 | stats
452 | stream
453 | streaming
454 | strongmail
455 | subversion
456 | sun
457 | sun01
458 | sun02
459 | sun1
460 | sun2
461 | support
462 | suse
463 | sv
464 | svn
465 | sw
466 | sw01
467 | sw1
468 | switch
469 | switch1
470 | syslog
471 | syslogs
472 | t
473 | tacacs
474 | talk
475 | tcl
476 | test
477 | test1
478 | test2
479 | test3
480 | tftp
481 | thor
482 | titan
483 | tivoli
484 | tool
485 | tools
486 | toplayer
487 | torpedo
488 | tracker
489 | tux
490 | u
491 | uk
492 | ukgroup
493 | unreal
494 | ups
495 | ups 
496 | ups1
497 | uranus
498 | urchin
499 | us
500 | users
501 | vantive
502 | venus
503 | video
504 | videos
505 | vlan0
506 | vlan1
507 | vm
508 | vmware
509 | vnc
510 | voip
511 | vpn
512 | vpn01
513 | vpn02
514 | vpn1
515 | vpn2
516 | w
517 | wap
518 | warez
519 | web
520 | web2
521 | webalizer
522 | webdns1
523 | webdns2
524 | webmail
525 | webmail 
526 | websphere
527 | webstats
528 | webtrends
529 | wep
530 | wep1
531 | whois
532 | wiki
533 | win
534 | win01
535 | win02
536 | win1
537 | win2
538 | win2000
539 | win2k
540 | windows
541 | windows01
542 | windows02
543 | windows1
544 | windows2
545 | wireless
546 | wlan
547 | wordpress
548 | wusage
549 | www
550 | www-01
551 | www-02
552 | www-1
553 | www-2
554 | www-int 
555 | www01
556 | www02
557 | www1
558 | www2
559 | x
560 | xlogan
561 | xml
562 | y
563 | z
564 | z-log
565 | zeus
566 | zlog
567 | 


--------------------------------------------------------------------------------
/theHarvester/parsers/myparser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | 
  4 | class Parser:
  5 | 
  6 |     def __init__(self, results, word):
  7 |         self.results = results
  8 |         self.word = word
  9 |         self.temp = []
 10 | 
 11 |     def genericClean(self):
 12 |         self.results = self.results.replace('<em>', '').replace('<b>', '').replace('</b>', '').replace('</em>', '')\
 13 |             .replace('%2f', '').replace('%3a', '').replace('<strong>', '').replace('</strong>', '')\
 14 |             .replace('<wbr>', '').replace('</wbr>', '')
 15 | 
 16 |         for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C', '/', '\\'):
 17 |             self.results = self.results.replace(search, ' ')
 18 | 
 19 |     def urlClean(self):
 20 |         self.results = self.results.replace('<em>', '').replace('</em>', '').replace('%2f', '').replace('%3a', '')
 21 |         for search in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
 22 |             self.results = self.results.replace(search, ' ')
 23 | 
 24 |     def emails(self):
 25 |         self.genericClean()
 26 |         # Local part is required, charset is flexible.
 27 |         # https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
 28 |         reg_emails = re.compile(r'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' + '@' + '[a-zA-Z0-9.-]*' + self.word.replace('www.', ''))
 29 |         self.temp = reg_emails.findall(self.results)
 30 |         emails = self.unique()
 31 |         true_emails = {str(email)[1:].lower().strip() if len(str(email)) > 1 and str(email)[0] == '.'
 32 |                        else len(str(email)) > 1 and str(email).lower().strip() for email in emails}
 33 |         # if email starts with dot shift email string and make sure all emails are lowercase
 34 |         return true_emails
 35 | 
 36 |     def fileurls(self, file):
 37 |         urls = []
 38 |         reg_urls = re.compile('<a href="(.*?)"')
 39 |         self.temp = reg_urls.findall(self.results)
 40 |         allurls = self.unique()
 41 |         for iteration in allurls:
 42 |             if iteration.count('webcache') or iteration.count('google.com') or iteration.count('search?hl'):
 43 |                 pass
 44 |             else:
 45 |                 urls.append(iteration)
 46 |         return urls
 47 | 
 48 |     def hostnames(self):
 49 |         self.genericClean()
 50 |         reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word)
 51 |         self.temp = reg_hosts.findall(self.results)
 52 |         hostnames = self.unique()
 53 |         reg_hosts = re.compile(r'[a-zA-Z0-9.-]*\.' + self.word.replace('www.', ''))
 54 |         self.temp = reg_hosts.findall(self.results)
 55 |         hostnames.extend(self.unique())
 56 |         return list(set(hostnames))
 57 | 
 58 |     def people_googleplus(self):
 59 |         self.results = re.sub('</b>', '', self.results)
 60 |         self.results = re.sub('<b>', '', self.results)
 61 |         reg_people = re.compile(r'>[a-zA-Z0-9._ ]* - Google\+')
 62 |         self.temp = reg_people.findall(self.results)
 63 |         resul = []
 64 |         for iteration in self.temp:
 65 |             delete = iteration.replace(' | LinkedIn', '')
 66 |             delete = delete.replace(' profiles ', '')
 67 |             delete = delete.replace('LinkedIn', '')
 68 |             delete = delete.replace('"', '')
 69 |             delete = delete.replace('>', '')
 70 |             if delete != " ":
 71 |                 resul.append(delete)
 72 |         return resul
 73 | 
 74 |     def hostnames_all(self):
 75 |         reg_hosts = re.compile('<cite>(.*?)</cite>')
 76 |         temp = reg_hosts.findall(self.results)
 77 |         for iteration in temp:
 78 |             if iteration.count(':'):
 79 |                 res = iteration.split(':')[1].split('/')[2]
 80 |             else:
 81 |                 res = iteration.split('/')[0]
 82 |             self.temp.append(res)
 83 |         hostnames = self.unique()
 84 |         return hostnames
 85 | 
 86 |     def links_linkedin(self):
 87 |         reg_links = re.compile(r"url=https:\/\/www\.linkedin.com(.*?)&")
 88 |         self.temp = reg_links.findall(self.results)
 89 |         resul = []
 90 |         for regex in self.temp:
 91 |             final_url = regex.replace("url=", "")
 92 |             resul.append("https://www.linkedin.com" + final_url)
 93 |         return resul
 94 | 
 95 |     def people_linkedin(self):
 96 |         reg_people = re.compile(r'">[a-zA-Z0-9._ -]* \| LinkedIn')
 97 |         self.temp = reg_people.findall(self.results)
 98 |         resul = []
 99 |         for iteration in (self.temp):
100 |             delete = iteration.replace(' | LinkedIn', '')
101 |             delete = delete.replace(' profiles ', '')
102 |             delete = delete.replace('LinkedIn', '')
103 |             delete = delete.replace('"', '')
104 |             delete = delete.replace('>', '')
105 |             if delete != " ":
106 |                 resul.append(delete)
107 |         return resul
108 | 
109 |     def people_twitter(self):
110 |         reg_people = re.compile(r'(@[a-zA-Z0-9._ -]*)')
111 |         self.temp = reg_people.findall(self.results)
112 |         users = self.unique()
113 |         resul = []
114 |         for iteration in users:
115 |             delete = iteration.replace(' | LinkedIn', '')
116 |             delete = delete.replace(' profiles ', '')
117 |             delete = delete.replace('LinkedIn', '')
118 |             delete = delete.replace('"', '')
119 |             delete = delete.replace('>', '')
120 |             if delete != " ":
121 |                 resul.append(delete)
122 |         return resul
123 | 
124 |     def profiles(self):
125 |         reg_people = re.compile(r'">[a-zA-Z0-9._ -]* - <em>Google Profile</em>')
126 |         self.temp = reg_people.findall(self.results)
127 |         resul = []
128 |         for iteration in self.temp:
129 |             delete = iteration.replace(' <em>Google Profile</em>', '')
130 |             delete = delete.replace('-', '')
131 |             delete = delete.replace('">', '')
132 |             if delete != " ":
133 |                 resul.append(delete)
134 |         return resul
135 | 
136 |     def set(self):
137 |         reg_sets = re.compile(r'>[a-zA-Z0-9]*</a></font>')
138 |         self.temp = reg_sets.findall(self.results)
139 |         sets = []
140 |         for iteration in self.temp:
141 |             delete = iteration.replace('>', '')
142 |             delete = delete.replace('</a</font', '')
143 |             sets.append(delete)
144 |         return sets
145 | 
146 |     def urls(self):
147 |         found = re.finditer(r'(http|https)://(www\.)?trello.com/([a-zA-Z0-9\-_\.]+/?)*', self.results)
148 |         urls = {match.group().strip() for match in found}
149 |         return urls
150 | 
151 |     def unique(self) -> list:
152 |         return list(set(self.temp))
153 | 


--------------------------------------------------------------------------------
/tests/test_theHarvester.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from unittest.mock import patch, MagicMock
  4 | 
  5 | import theHarvester.__main__ as harvester
  6 | 
  7 | domain = 'metasploit.com'
  8 | sys.argv = args = [os.path.curdir + 'theHarvester.py', '-d', domain, '-b', 'domain']
  9 | 
 10 | 
 11 | @patch('theHarvester.discovery.baidusearch.SearchBaidu')
 12 | @patch('theHarvester.lib.stash.StashManager')
 13 | def test_baidu(stash, search_engine):
 14 |     args[-1] = 'baidu'
 15 |     harvester.start()
 16 |     assert stash().store_all.call_count == 2
 17 | 
 18 | 
 19 | @patch('theHarvester.discovery.bingsearch.SearchBing')
 20 | @patch('theHarvester.lib.stash.StashManager')
 21 | def test_bing(stash, search_engine):
 22 |     args[-1] = 'bing'
 23 |     harvester.start()
 24 |     args[-1] = 'bingapi'
 25 |     harvester.start()
 26 |     assert stash().store_all.call_count == 4
 27 | 
 28 | 
 29 | @patch('theHarvester.discovery.certspottersearch.SearchCertspoter')
 30 | @patch('theHarvester.lib.stash.StashManager')
 31 | def test_certspotter(stash, search_engine):
 32 |     args[-1] = 'certspotter'
 33 |     harvester.start()
 34 |     assert stash().store_all.call_count == 1
 35 | 
 36 | 
 37 | @patch('theHarvester.discovery.crtsh.SearchCrtsh')
 38 | @patch('theHarvester.lib.stash.StashManager')
 39 | def test_crtsh(stash, search_engine):
 40 |     args[-1] = 'crtsh'
 41 |     harvester.start()
 42 |     assert stash().store_all.call_count == 1
 43 | 
 44 | 
 45 | @patch('theHarvester.discovery.dnsdumpster.SearchDnsDumpster')
 46 | @patch('theHarvester.lib.stash.StashManager')
 47 | def test_dnsdumpster(stash, search_engine):
 48 |     args[-1] = 'dnsdumpster'
 49 |     harvester.start()
 50 |     assert stash().store_all.call_count == 1
 51 | 
 52 | 
 53 | @patch('theHarvester.discovery.dogpilesearch.SearchDogpile')
 54 | @patch('theHarvester.lib.stash.StashManager')
 55 | def test_dogpile(stash, search_engine):
 56 |     args[-1] = 'dogpile'
 57 |     harvester.start()
 58 |     assert stash().store_all.call_count == 2
 59 | 
 60 | 
 61 | @patch('theHarvester.discovery.duckduckgosearch.SearchDuckDuckGo')
 62 | @patch('theHarvester.lib.stash.StashManager')
 63 | def test_duckduckgo(stash, search_engine):
 64 |     args[-1] = 'duckduckgo'
 65 |     harvester.start()
 66 |     assert stash().store_all.call_count == 2
 67 | 
 68 | 
 69 | @patch('theHarvester.discovery.githubcode.SearchGithubCode')
 70 | @patch('theHarvester.lib.stash.StashManager')
 71 | def test_github(stash, search_engine):
 72 |     args[-1] = 'github-code'
 73 |     harvester.start()
 74 |     assert stash().store_all.call_count == 2
 75 | 
 76 | 
 77 | @patch('theHarvester.discovery.exaleadsearch.SearchExalead')
 78 | @patch('theHarvester.lib.stash.StashManager')
 79 | def test_exalead(stash, search_engine):
 80 |     args[-1] = 'exalead'
 81 |     harvester.start()
 82 |     assert stash().store_all.call_count == 2
 83 | 
 84 | 
 85 | @patch('theHarvester.discovery.googlesearch.SearchGoogle')
 86 | @patch('theHarvester.lib.stash.StashManager')
 87 | def test_google(stash, search_engine):
 88 |     args[-1] = 'google'
 89 |     harvester.start()
 90 |     assert stash().store_all.call_count == 2
 91 | 
 92 | 
 93 | @patch('theHarvester.discovery.huntersearch.SearchHunter')
 94 | @patch('theHarvester.lib.stash.StashManager')
 95 | def test_hunter(stash, search_engine):
 96 |     args[-1] = 'hunter'
 97 |     harvester.start()
 98 |     assert stash().store_all.call_count == 2
 99 | 
100 | 
101 | @patch('theHarvester.discovery.intelxsearch.SearchIntelx')
102 | @patch('theHarvester.lib.stash.StashManager')
103 | def test_intelx(stash, search_engine):
104 |     args[-1] = 'intelx'
105 |     harvester.start()
106 |     assert stash().store_all.call_count == 2
107 | 
108 | 
109 | @patch('theHarvester.discovery.linkedinsearch.SearchLinkedin')
110 | @patch('theHarvester.lib.stash.StashManager')
111 | def test_linkedin(stash, search_engine):
112 |     args[-1] = 'linkedin'
113 |     harvester.start()
114 |     assert stash().store_all.call_count == 1
115 | 
116 | 
117 | @patch('theHarvester.discovery.linkedinsearch.SearchLinkedin')
118 | @patch('theHarvester.lib.stash.StashManager')
119 | def test_linkedin_links(stash, search_engine):
120 |     args[-1] = 'linkedin_links'
121 |     harvester.start()
122 |     assert stash().store_all.call_count == 1
123 | 
124 | 
125 | @patch('theHarvester.discovery.netcraft.SearchNetcraft')
126 | @patch('theHarvester.lib.stash.StashManager')
127 | def test_netcraft(stash, search_engine):
128 |     args[-1] = 'netcraft'
129 |     harvester.start()
130 |     assert stash().store_all.call_count == 1
131 | 
132 | 
133 | @patch('theHarvester.discovery.otxsearch.SearchOtx')
134 | @patch('theHarvester.lib.stash.StashManager')
135 | def test_otx(stash, search_engine):
136 |     args[-1] = 'otx'
137 |     harvester.start()
138 |     assert stash().store_all.call_count == 2
139 | 
140 | 
141 | @patch('theHarvester.discovery.securitytrailssearch.SearchSecuritytrail')
142 | @patch('theHarvester.lib.stash.StashManager')
143 | def test_security_trails(stash, search_engine):
144 |     args[-1] = 'securityTrails'
145 |     harvester.start()
146 |     assert stash().store_all.call_count == 2
147 | 
148 | 
149 | @patch('theHarvester.discovery.suip.SearchSuip')
150 | @patch('theHarvester.lib.stash.StashManager')
151 | def test_suip(stash, search_engine):
152 |     args[-1] = 'suip'
153 |     harvester.start()
154 |     assert stash().store_all.call_count == 1
155 | 
156 | 
157 | @patch('theHarvester.discovery.threatcrowd.SearchThreatcrowd')
158 | @patch('theHarvester.lib.stash.StashManager')
159 | def test_threatcrowd(stash, search_engine):
160 |     args[-1] = 'threatcrowd'
161 |     harvester.start()
162 |     assert stash().store_all.call_count == 1
163 | 
164 | 
165 | @patch('theHarvester.discovery.trello.SearchTrello')
166 | @patch('theHarvester.lib.stash.StashManager')
167 | def test_trello(stash, search_engine):
168 |     search_engine().get_results = MagicMock(return_value=('user@trello.com', 'trello', 'trello.com'))
169 |     args[-1] = 'trello'
170 |     harvester.start()
171 |     assert stash().store_all.call_count == 3
172 | 
173 | 
174 | @patch('theHarvester.discovery.twittersearch.SearchTwitter')
175 | @patch('theHarvester.lib.stash.StashManager')
176 | def test_twitter(stash, search_engine):
177 |     args[-1] = 'twitter'
178 |     harvester.start()
179 |     assert stash().store_all.call_count == 1
180 | 
181 | 
182 | @patch('theHarvester.discovery.virustotal.SearchVirustotal')
183 | @patch('theHarvester.lib.stash.StashManager')
184 | def test_virustotal(stash, search_engine):
185 |     args[-1] = 'virustotal'
186 |     harvester.start()
187 |     assert stash().store_all.call_count == 1
188 | 
189 | 
190 | @patch('theHarvester.discovery.yahoosearch.SearchYahoo')
191 | @patch('theHarvester.lib.stash.StashManager')
192 | def test_yahoo(stash, search_engine):
193 |     args[-1] = 'yahoo'
194 |     harvester.start()
195 |     assert stash().store_all.call_count == 2
196 | 


--------------------------------------------------------------------------------
/theHarvester/discovery/googlesearch.py:
--------------------------------------------------------------------------------
  1 | from theHarvester.discovery.constants import *
  2 | from theHarvester.parsers import myparser
  3 | import requests
  4 | import time
  5 | 
  6 | 
  7 | class SearchGoogle:
  8 | 
  9 |     def __init__(self, word, limit, start):
 10 |         self.word = word
 11 |         self.results = ""
 12 |         self.totalresults = ""
 13 |         self.server = 'www.google.com'
 14 |         self.dorks = []
 15 |         self.links = []
 16 |         self.database = 'https://www.google.com/search?q='
 17 |         self.quantity = '100'
 18 |         self.limit = limit
 19 |         self.counter = start
 20 | 
 21 |     def do_search(self):
 22 |         # Do normal scraping.
 23 |         urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
 24 |             self.counter) + '&hl=en&meta=&q=%40\"' + self.word + '\"'
 25 |         try:
 26 |             headers = {'User-Agent': googleUA}
 27 |             r = requests.get(urly, headers=headers)
 28 |         except Exception as e:
 29 |             print(e)
 30 |         self.results = r.text
 31 |         if search(self.results):
 32 |             try:
 33 |                 self.results = google_workaround(urly)
 34 |                 if isinstance(self.results, bool):
 35 |                     print('Google is blocking your ip and the workaround, returning')
 36 |                     return
 37 |             except Exception:
 38 |                 # google blocked, no useful result
 39 |                 return
 40 |         time.sleep(getDelay())
 41 |         self.totalresults += self.results
 42 | 
 43 |     def do_search_profiles(self):
 44 |         urly = 'http://' + self.server + '/search?num=' + self.quantity + '&start=' + str(
 45 |             self.counter) + '&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20' + self.word + '\"'
 46 |         try:
 47 |             headers = {'User-Agent': googleUA}
 48 |             r = requests.get(urly, headers=headers)
 49 |         except Exception as e:
 50 |             print(e)
 51 |         self.results = r.text
 52 |         if search(self.results):
 53 |             try:
 54 |                 self.results = google_workaround(urly)
 55 |                 if isinstance(self.results, bool):
 56 |                     print('Google is blocking your ip and the workaround, returning')
 57 |                     return
 58 |             except Exception:
 59 |                 # google blocked, no useful result
 60 |                 return
 61 |         time.sleep(getDelay())
 62 |         self.totalresults += self.results
 63 | 
 64 |     def get_emails(self):
 65 |         rawres = myparser.Parser(self.totalresults, self.word)
 66 |         return rawres.emails()
 67 | 
 68 |     def get_hostnames(self):
 69 |         rawres = myparser.Parser(self.totalresults, self.word)
 70 |         return rawres.hostnames()
 71 | 
 72 |     def get_files(self):
 73 |         rawres = myparser.Parser(self.totalresults, self.word)
 74 |         return rawres.fileurls(self.files)
 75 | 
 76 |     def get_profiles(self):
 77 |         rawres = myparser.Parser(self.totalresults, self.word)
 78 |         return rawres.profiles()
 79 | 
 80 |     def process(self, google_dorking):
 81 |         if google_dorking is False:
 82 |             while self.counter <= self.limit and self.counter <= 1000:
 83 |                 self.do_search()
 84 |                 print(f'\tSearching {self.counter} results.')
 85 |                 self.counter += 100
 86 |         else:  # Google dorking is true.
 87 |             self.counter = 0  # Reset counter.
 88 |             print('\n')
 89 |             print('[-] Searching with Google Dorks: ')
 90 |             self.googledork()  # Call Google dorking method if user wanted it!
 91 | 
 92 |     def process_profiles(self):
 93 |         while self.counter < self.limit:
 94 |             self.do_search_profiles()
 95 |             time.sleep(getDelay())
 96 |             self.counter += 100
 97 |             print(f'\tSearching {self.counter} results.')
 98 | 
 99 |     def append_dorks(self):
100 |         # Wrap in try-except incase filepaths are messed up.
101 |         try:
102 |             with open('wordlists/dorks.txt', mode='r') as fp:
103 |                 self.dorks = [dork.strip() for dork in fp]
104 |         except FileNotFoundError as error:
105 |             print(error)
106 | 
107 |     def construct_dorks(self):
108 |         # Format is: site:targetwebsite.com + space + inurl:admindork
109 |         colon = '%3A'
110 |         plus = '%2B'
111 |         space = '+'
112 |         period = '%2E'
113 |         double_quote = '%22'
114 |         asterick = '%2A'
115 |         left_bracket = '%5B'
116 |         right_bracket = '%5D'
117 |         question_mark = '%3F'
118 |         slash = '%2F'
119 |         single_quote = '%27'
120 |         ampersand = '%26'
121 |         left_peren = '%28'
122 |         right_peren = '%29'
123 |         pipe = '%7C'
124 |         # Format is google.com/search?q=dork+space+self.word
125 |         self.links = tuple(self.database + str(dork).replace(':', colon).replace('+', plus).replace('.', period).replace('"', double_quote)
126 |                            .replace('*', asterick).replace('[', left_bracket).replace(']', right_bracket)
127 |                            .replace('?', question_mark).replace(' ', space).replace('/', slash).replace("'", single_quote)
128 |                            .replace('&', ampersand).replace('(', left_peren).replace(')', right_peren).replace('|', pipe) + space + self.word
129 |                            for dork in self.dorks)
130 | 
131 |     def googledork(self):
132 |         self.append_dorks()  # Call functions to create list.
133 |         self.construct_dorks()
134 |         self.send_dorks()
135 | 
136 |     def send_dorks(self):  # Helper function to minimize code reusability.
137 |         headers = {'User-Agent': googleUA}
138 |         # Get random user agent to try and prevent google from blocking IP.
139 |         for num in range(len(self.links)):
140 |             try:
141 |                 if num % 10 == 0 and num > 0:
142 |                     print(f'\tSearching through {num} results')
143 |                 link = self.links[num]
144 |                 req = requests.get(link, headers=headers)
145 |                 self.results = req.text
146 |                 if search(self.results):
147 |                     try:
148 |                         self.results = google_workaround(link)
149 |                         if isinstance(self.results, bool):
150 |                             print('Google is blocking your ip and the workaround, returning')
151 |                             return
152 |                     except Exception:
153 |                         # google blocked, no useful result
154 |                         return
155 |                 time.sleep(getDelay())
156 |                 self.totalresults += self.results
157 |             except Exception as e:
158 |                 print(f'\tException Occurred {e}')
159 | 


--------------------------------------------------------------------------------
/theHarvester/lib/statichtmlgenerator.py:
--------------------------------------------------------------------------------
  1 | class HtmlGenerator:
  2 | 
  3 |     def __init__(self, word):
  4 |         self.domain = word
  5 | 
  6 |     def generatepreviousscanresults(self, previousscanresults):
  7 |         try:
  8 |             if previousscanresults[0] == 'No results':
  9 |                 html = '''
 10 | <h2><span style="color: #000000;"><strong>Previous scan report </strong></span></h2>
 11 | <p>&nbsp;</p>
 12 | <table style="height: 63px; border-color: #000000;" border="#000000" width="811">
 13 | <tbody>
 14 | <tr>
 15 | <td style="width: 156.042px; text-align: center;"><strong>Date</strong></td>
 16 | <td style="width: 156.042px; text-align: center;"><strong>Domain</strong></td>
 17 | <td style="width: 157.153px; text-align: center;"><strong>Plugin</strong></td>
 18 | <td style="width: 157.153px; text-align: center;"><strong>Record type</strong></td>
 19 | <td style="width: 157.153px; text-align: center;"><strong>Result</strong></td>
 20 | </tr>
 21 | '''
 22 |                 for i in previousscanresults:
 23 |                     html += '<tr>'
 24 |                     html += '<td style="width: 156.042px;">' + str(i) + "</td>"
 25 |                     html += '<td style="width: 156.042px;">' + str(i) + "</td>"
 26 |                     html += '<td style="width: 157.153px;">' + str(i) + "</td>"
 27 |                     html += '<td style="width: 157.153px;">' + str(i) + "</td>"
 28 |                     html += '<td style="width: 157.153px;">' + str(i) + "</td>"
 29 |                     html += '</tr>'
 30 |             else:
 31 |                 html = '''
 32 | <h2><span style="color: #000000;"><strong>Previous scan report </strong></span></h2>
 33 | <p>&nbsp;</p>
 34 | <table style="height: 63px; border-color: #000000;" border="#000000" width="811">
 35 | <tbody>
 36 | <tr>
 37 | <td style="width: 156.042px; text-align: center;"><strong>Date</strong></td>
 38 | <td style="width: 156.042px; text-align: center;"><strong>Domain</strong></td>
 39 | <td style="width: 157.153px; text-align: center;"><strong>Plugin</strong></td>
 40 | <td style="width: 157.153px; text-align: center;"><strong>Record type</strong></td>
 41 | <td style="width: 157.153px; text-align: center;"><strong>Result</strong></td>
 42 | </tr>
 43 | <tr>
 44 | '''
 45 |                 for i in previousscanresults:
 46 |                     html += '<td style="width: 156.042px;">' + str(i[0]) + "</td>"
 47 |                     html += '<td style="width: 156.042px;">' + str(i[1]) + "</td>"
 48 |                     html += '<td style="width: 157.153px;">' + str(i[2]) + "</td>"
 49 |                     html += '<td style="width: 157.153px;">' + str(i[3]) + "</td>"
 50 |                     html += '<td style="width: 157.153px;">' + str(i[4]) + "</td>"
 51 |                     html += '</tr>'
 52 |             html += '''
 53 | </tbody>
 54 | </table>
 55 | <p>&nbsp;</p>
 56 | <p>&nbsp;</p>
 57 | <p>&nbsp;</p>
 58 | <p>&nbsp;</p>
 59 | '''
 60 |             return html
 61 |         except Exception as e:
 62 |             print(f'Error generating the previous scan results HTML code: {e}')
 63 | 
 64 |     def generatelatestscanresults(self, latestscanresults):
 65 |         try:
 66 |             html = '''
 67 | <h2><span style="color: #000000;"><strong>Latest scan report </strong></span></h2>
 68 | <p>&nbsp;</p>
 69 | <table style="height: 63px; border-color: #000000;" border="#000000" width="811">
 70 | <tbody>
 71 | <tr>
 72 | <td style="width: 156.042px; text-align: center;"><strong>Date</strong></td>
 73 | <td style="width: 156.042px; text-align: center;"><strong>Domain</strong></td>
 74 | <td style="width: 157.153px; text-align: center;"><strong>Plugin</strong></td>
 75 | <td style="width: 157.153px; text-align: center;"><strong>Record type</strong></td>
 76 | <td style="width: 157.153px; text-align: center;"><strong>Result</strong></td>
 77 | </tr>
 78 | '''
 79 |             for i in latestscanresults:
 80 |                 html += '<tr>'
 81 |                 html += '<td style="width: 156.042px;">' + str(i[0]) + "</td>"
 82 |                 html += '<td style="width: 156.042px;">' + str(i[1]) + "</td>"
 83 |                 html += '<td style="width: 157.153px;">' + str(i[2]) + "</td>"
 84 |                 html += '<td style="width: 157.153px;">' + str(i[3]) + "</td>"
 85 |                 html += '<td style="width: 157.153px;">' + str(i[4]) + "</td>"
 86 |                 html += '</tr>'
 87 |             html += '''
 88 | </tbody>
 89 | </table>
 90 | <p>&nbsp;</p>
 91 | <p>&nbsp;</p>
 92 | <p>&nbsp;</p>
 93 | <p>&nbsp;</p>
 94 | '''
 95 |             return html
 96 |         except Exception as e:
 97 |             print(f'Error generating the latest scan results HTML code: {e}')
 98 | 
 99 |     def beginhtml(self):
100 |         html = '''
101 | <!doctype html>
102 | <html>
103 | <head><script src="https://cdn.plot.ly/plotly-latest.min.js" type="text/javascript"></script></head>
104 | <title>theHarvester Scan Report</title>
105 | <body>
106 | <h1 style="text-align: center;"><span style="color: #ff0000;">theHarvester Scan Report</span></h1>
107 |         '''
108 |         return html
109 | 
110 |     def generatedashboardcode(self, scanboarddata):
111 |         try:
112 |             totalnumberofdomains = scanboarddata['domains']
113 |             totalnumberofhosts = scanboarddata['host']
114 |             totalnumberofip = scanboarddata['ip']
115 |             totalnumberofvhost = scanboarddata['vhost']
116 |             totalnumberofemail = scanboarddata['email']
117 |             totalnumberofshodan = scanboarddata['shodan']
118 |             html = '''
119 | <h2 style="text-align: center;"><span style="color: #ff0000;">Scan dashboard</span></h2>
120 | <table style="height: 108px; border-color: #000000; margin-left: auto; margin-right: auto;" border=" #000000" width="713">
121 | <tbody>
122 | <tr>
123 | <td style="width: 113px; text-align: center;background: #ffff38"><h2><strong>Domains</strong></h2></td>
124 | <td style="width: 108px; text-align: center;background: #1f77b4"><h2><strong>Hosts</strong></h2></td>
125 | <td style="width: 119px; text-align: center;background: #ff7f0e"><h2><strong>IP Addresses</strong></h2></td>
126 | <td style="width: 111px; text-align: center;background: #2ca02c"><h2><strong>Vhosts</strong></h2></td>
127 | <td style="width: 110px; text-align: center;background: #9467bd"><h2><strong>Emails</strong></h2></td>
128 | <td style="width: 110px; text-align: center;background: #d62728"><h2><strong>Shodan</strong></h2></td>
129 | </tr>
130 | <tr>
131 | <td style="width: 113px; text-align: center;background: #ffff38"><h2><strong>''' + str(totalnumberofdomains) + '''</strong></h2></td>
132 | <td style="width: 108px; text-align: center;background: #1f77b4"><h2><strong>''' + str(totalnumberofhosts) + '''</strong></h2></td>
133 | <td style="width: 119px; text-align: center;background: #ff7f0e"><h2><strong>''' + str(totalnumberofip) + '''</strong></h2></td>
134 | <td style="width: 111px; text-align: center;background: #2ca02c"><h2><strong>''' + str(totalnumberofvhost) + '''</strong></h2></td>
135 | <td style="width: 110px; text-align: center;background: #9467bd"><h2><strong>''' + str(totalnumberofemail) + '''</strong></h2></td>
136 | <td style="width: 110px; text-align: center;background: #d62728"><h2><strong>''' + str(totalnumberofshodan) + '''</strong></h2></td>
137 | </tr>
138 | </tbody>
139 | </table>
140 | <p>&nbsp;</p>
141 | <p>&nbsp;</p>
142 | '''
143 |             return html
144 |         except Exception as e:
145 |             print(f'Error generating dashboard HTML code: {e}')
146 | 
147 |     def generatepluginscanstatistics(self, scanstatistics):
148 |         try:
149 |             html = '''
150 | <h2 style="text-align: center;"><span style="color: #ff0000;">theHarvester plugin statistics</span></h2>
151 | <p>&nbsp;</p>
152 | <table style="height: 63px; border-color: #000000; margin-left: auto; margin-right: auto;" border="#000000" width="811">
153 | <tbody>
154 | <tr>
155 | <td style="width: 156.042px; text-align: center;"><strong>Domain</strong></td>
156 | <td style="width: 156.042px; text-align: center;"><strong>Date</strong></td>
157 | <td style="width: 157.153px; text-align: center;"><strong>Recordtype</strong></td>
158 | <td style="width: 157.153px; text-align: center;"><strong>Source</strong></td>
159 | <td style="width: 157.153px; text-align: center;"><strong>Total results</strong></td>
160 | </tr>
161 | '''
162 |             for i in scanstatistics:
163 |                 html += '<tr>'
164 |                 html += '<td style="width: 156.042px;">' + str(i[0]) + "</td>"
165 |                 html += '<td style="width: 156.042px;">' + str(i[1]) + "</td>"
166 |                 html += '<td style="width: 157.153px;">' + str(i[2]) + "</td>"
167 |                 html += '<td style="width: 157.153px;">' + str(i[3]) + "</td>"
168 |                 html += '<td style="width: 157.153px;">' + str(i[4]) + "</td>"
169 |                 html += '</tr>'
170 |             html += '''
171 | </tbody>
172 | </table>
173 | <p>&nbsp;</p>
174 | <p>&nbsp;</p>
175 | '''
176 |             return html
177 |         except Exception as e:
178 |             print(f'Error generating scan statistics HTML code: {e}')
179 | 


--------------------------------------------------------------------------------
/theHarvester/lib/stash.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import sqlite3
  3 | 
  4 | 
  5 | class StashManager:
  6 | 
  7 |     def __init__(self):
  8 |         self.db = "stash.sqlite"
  9 |         self.results = ""
 10 |         self.totalresults = ""
 11 |         self.latestscandomain = {}
 12 |         self.domainscanhistory = []
 13 |         self.scanboarddata = {}
 14 |         self.scanstats = []
 15 |         self.latestscanresults = []
 16 |         self.previousscanresults = []
 17 | 
 18 |     def do_init(self):
 19 |         conn = sqlite3.connect(self.db)
 20 |         c = conn.cursor()
 21 |         c.execute('CREATE TABLE IF NOT EXISTS results (domain text, resource text, type text, find_date date, source text)')
 22 |         conn.commit()
 23 |         conn.close()
 24 |         return
 25 | 
 26 |     def store(self, domain, resource, res_type, source):
 27 |         self.domain = domain
 28 |         self.resource = resource
 29 |         self.type = res_type
 30 |         self.source = source
 31 |         self.date = datetime.date.today()
 32 |         try:
 33 |             conn = sqlite3.connect(self.db)
 34 |             c = conn.cursor()
 35 |             c.execute('INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
 36 |                       (self.domain, self.resource, self.type, self.date, self.source))
 37 |             conn.commit()
 38 |             conn.close()
 39 |         except Exception as e:
 40 |             print(e)
 41 |         return
 42 | 
 43 |     def store_all(self, domain, all, res_type, source):
 44 |         self.domain = domain
 45 |         self.all = all
 46 |         self.type = res_type
 47 |         self.source = source
 48 |         self.date = datetime.date.today()
 49 |         for x in self.all:
 50 |             try:
 51 |                 conn = sqlite3.connect(self.db)
 52 |                 c = conn.cursor()
 53 |                 c.execute('INSERT INTO results (domain,resource, type, find_date, source) VALUES (?,?,?,?,?)',
 54 |                           (self.domain, x, self.type, self.date, self.source))
 55 |                 conn.commit()
 56 |                 conn.close()
 57 |             except Exception as e:
 58 |                 print(e)
 59 |         return
 60 | 
 61 |     def generatedashboardcode(self, domain):
 62 |         try:
 63 |             self.latestscandomain["domain"] = domain
 64 |             conn = sqlite3.connect(self.db)
 65 |             c = conn.cursor()
 66 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="host"''', (domain,))
 67 |             data = c.fetchone()
 68 |             self.latestscandomain["host"] = data[0]
 69 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="email"''', (domain,))
 70 |             data = c.fetchone()
 71 |             self.latestscandomain["email"] = data[0]
 72 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''', (domain,))
 73 |             data = c.fetchone()
 74 |             self.latestscandomain["ip"] = data[0]
 75 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''', (domain,))
 76 |             data = c.fetchone()
 77 |             self.latestscandomain["vhost"] = data[0]
 78 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''', (domain,))
 79 |             data = c.fetchone()
 80 |             self.latestscandomain["shodan"] = data[0]
 81 |             c.execute('''SELECT MAX(find_date) FROM results WHERE domain=?''', (domain,))
 82 |             data = c.fetchone()
 83 |             self.latestscandomain["latestdate"] = data[0]
 84 |             latestdate = data[0]
 85 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''', (domain, latestdate,))
 86 |             scandetailshost = c.fetchall()
 87 |             self.latestscandomain["scandetailshost"] = scandetailshost
 88 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
 89 |                       (domain, latestdate,))
 90 |             scandetailsemail = c.fetchall()
 91 |             self.latestscandomain["scandetailsemail"] = scandetailsemail
 92 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''', (domain, latestdate,))
 93 |             scandetailsip = c.fetchall()
 94 |             self.latestscandomain["scandetailsip"] = scandetailsip
 95 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
 96 |                       (domain, latestdate,))
 97 |             scandetailsvhost = c.fetchall()
 98 |             self.latestscandomain["scandetailsvhost"] = scandetailsvhost
 99 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
100 |                       (domain, latestdate,))
101 |             scandetailsshodan = c.fetchall()
102 |             self.latestscandomain["scandetailsshodan"] = scandetailsshodan
103 |             return self.latestscandomain
104 |         except Exception as e:
105 |             print(e)
106 |         finally:
107 |             conn.close()
108 | 
109 |     def getlatestscanresults(self, domain, previousday=False):
110 |         try:
111 |             conn = sqlite3.connect(self.db)
112 |             if previousday:
113 |                 try:
114 |                     c = conn.cursor()
115 |                     c.execute('''
116 |                     SELECT DISTINCT(find_date)
117 |                     FROM results
118 |                     WHERE find_date=date('now', '-1 day') and domain=?''', (domain,))
119 |                     previousscandate = c.fetchone()
120 |                     if not previousscandate:   # When theHarvester runs first time/day this query will return.
121 |                         self.previousscanresults = ["No results", "No results", "No results", "No results", "No results"]
122 |                     else:
123 |                         c = conn.cursor()
124 |                         c.execute('''
125 |                         SELECT find_date, domain, source, type, resource
126 |                         FROM results
127 |                         WHERE find_date=? and domain=?
128 |                         ORDER BY source,type
129 |                         ''', (previousscandate[0], domain,))
130 |                         results = c.fetchall()
131 |                         self.previousscanresults = results
132 |                     return self.previousscanresults
133 |                 except Exception as e:
134 |                     print(f'Error in getting the previous scan results from the database: {e}')
135 |             else:
136 |                 try:
137 |                     c = conn.cursor()
138 |                     c.execute('''SELECT MAX(find_date) FROM results WHERE domain=?''', (domain,))
139 |                     latestscandate = c.fetchone()
140 |                     c = conn.cursor()
141 |                     c.execute('''
142 |                     SELECT find_date, domain, source, type, resource
143 |                     FROM results
144 |                     WHERE find_date=? and domain=?
145 |                     ORDER BY source,type
146 |                     ''', (latestscandate[0], domain,))
147 |                     results = c.fetchall()
148 |                     self.latestscanresults = results
149 |                     return self.latestscanresults
150 |                 except Exception as e:
151 |                     print(f'Error in getting the latest scan results from the database: {e}')
152 |         except Exception as e:
153 |             print(f'Error connecting to theHarvester database: {e}')
154 |         finally:
155 |             conn.close()
156 | 
157 |     def getscanboarddata(self):
158 |         try:
159 |             conn = sqlite3.connect(self.db)
160 |             c = conn.cursor()
161 |             c.execute('''SELECT COUNT(*) from results WHERE type="host"''')
162 |             data = c.fetchone()
163 |             self.scanboarddata["host"] = data[0]
164 |             c.execute('''SELECT COUNT(*) from results WHERE type="email"''')
165 |             data = c.fetchone()
166 |             self.scanboarddata["email"] = data[0]
167 |             c.execute('''SELECT COUNT(*) from results WHERE type="ip"''')
168 |             data = c.fetchone()
169 |             self.scanboarddata["ip"] = data[0]
170 |             c.execute('''SELECT COUNT(*) from results WHERE type="vhost"''')
171 |             data = c.fetchone()
172 |             self.scanboarddata["vhost"] = data[0]
173 |             c.execute('''SELECT COUNT(*) from results WHERE type="shodan"''')
174 |             data = c.fetchone()
175 |             self.scanboarddata["shodan"] = data[0]
176 |             c.execute('''SELECT COUNT(DISTINCT(domain)) FROM results ''')
177 |             data = c.fetchone()
178 |             self.scanboarddata["domains"] = data[0]
179 |             return self.scanboarddata
180 |         except Exception as e:
181 |             print(e)
182 |         finally:
183 |             conn.close()
184 | 
185 |     def getscanhistorydomain(self, domain):
186 |         try:
187 |             conn = sqlite3.connect(self.db)
188 |             c = conn.cursor()
189 |             c.execute('''SELECT DISTINCT(find_date) FROM results WHERE domain=?''', (domain,))
190 |             dates = c.fetchall()
191 |             for date in dates:
192 |                 c = conn.cursor()
193 |                 c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="host" AND find_date=?''',
194 |                           (domain, date[0]))
195 |                 counthost = c.fetchone()
196 |                 c = conn.cursor()
197 |                 c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="email" AND find_date=?''',
198 |                           (domain, date[0]))
199 |                 countemail = c.fetchone()
200 |                 c = conn.cursor()
201 |                 c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="ip" AND find_date=?''',
202 |                           (domain, date[0]))
203 |                 countip = c.fetchone()
204 |                 c = conn.cursor()
205 |                 c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="vhost" AND find_date=?''',
206 |                           (domain, date[0]))
207 |                 countvhost = c.fetchone()
208 |                 c = conn.cursor()
209 |                 c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="shodan" AND find_date=?''',
210 |                           (domain, date[0]))
211 |                 countshodan = c.fetchone()
212 |                 results = {
213 |                     "date": str(date[0]),
214 |                     "hosts": str(counthost[0]),
215 |                     "email": str(countemail[0]),
216 |                     "ip": str(countip[0]),
217 |                     "vhost": str(countvhost[0]),
218 |                     "shodan": str(countshodan[0])
219 |                 }
220 |                 self.domainscanhistory.append(results)
221 |             return self.domainscanhistory
222 |         except Exception as e:
223 |             print(e)
224 |         finally:
225 |             conn.close()
226 | 
227 |     def getpluginscanstatistics(self):
228 |         try:
229 |             conn = sqlite3.connect(self.db)
230 |             c = conn.cursor()
231 |             c.execute('''
232 |             SELECT domain,find_date, type, source, count(*)
233 |             FROM results
234 |             GROUP BY domain, find_date, type, source
235 |             ''')
236 |             results = c.fetchall()
237 |             self.scanstats = results
238 |             return self.scanstats
239 |         except Exception as e:
240 |             print(e)
241 |         finally:
242 |             conn.close()
243 | 
244 |     def latestscanchartdata(self, domain):
245 |         try:
246 |             self.latestscandomain["domain"] = domain
247 |             conn = sqlite3.connect(self.db)
248 |             c = conn.cursor()
249 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="host"''', (domain,))
250 |             data = c.fetchone()
251 |             self.latestscandomain["host"] = data[0]
252 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="email"''', (domain,))
253 |             data = c.fetchone()
254 |             self.latestscandomain["email"] = data[0]
255 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="ip"''', (domain,))
256 |             data = c.fetchone()
257 |             self.latestscandomain["ip"] = data[0]
258 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="vhost"''', (domain,))
259 |             data = c.fetchone()
260 |             self.latestscandomain["vhost"] = data[0]
261 |             c.execute('''SELECT COUNT(*) from results WHERE domain=? AND type="shodan"''', (domain,))
262 |             data = c.fetchone()
263 |             self.latestscandomain["shodan"] = data[0]
264 |             c.execute('''SELECT MAX(find_date) FROM results WHERE domain=?''', (domain,))
265 |             data = c.fetchone()
266 |             self.latestscandomain["latestdate"] = data[0]
267 |             latestdate = data[0]
268 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="host"''', (domain, latestdate,))
269 |             scandetailshost = c.fetchall()
270 |             self.latestscandomain["scandetailshost"] = scandetailshost
271 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="email"''',
272 |                       (domain, latestdate,))
273 |             scandetailsemail = c.fetchall()
274 |             self.latestscandomain["scandetailsemail"] = scandetailsemail
275 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="ip"''', (domain, latestdate,))
276 |             scandetailsip = c.fetchall()
277 |             self.latestscandomain["scandetailsip"] = scandetailsip
278 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="vhost"''',
279 |                       (domain, latestdate,))
280 |             scandetailsvhost = c.fetchall()
281 |             self.latestscandomain["scandetailsvhost"] = scandetailsvhost
282 |             c.execute('''SELECT * FROM results WHERE domain=? AND find_date=? AND type="shodan"''',
283 |                       (domain, latestdate,))
284 |             scandetailsshodan = c.fetchall()
285 |             self.latestscandomain["scandetailsshodan"] = scandetailsshodan
286 |             return self.latestscandomain
287 |         except Exception as e:
288 |             print(e)
289 |         finally:
290 |             conn.close()
291 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 |                    GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 


--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |     "_meta": {
  3 |         "hash": {
  4 |             "sha256": "7e45ee0725c108a6f56760e8a6540c9400948fe984b9bb1ba7b29139a753b6ae"
  5 |         },
  6 |         "pipfile-spec": 6,
  7 |         "requires": {},
  8 |         "sources": [
  9 |             {
 10 |                 "name": "pypi",
 11 |                 "url": "https://pypi.python.org/simple",
 12 |                 "verify_ssl": true
 13 |             }
 14 |         ]
 15 |     },
 16 |     "default": {
 17 |         "aiodns": {
 18 |             "hashes": [
 19 |                 "sha256:815fdef4607474295d68da46978a54481dd1e7be153c7d60f9e72773cd38d77d",
 20 |                 "sha256:aaa5ac584f40fe778013df0aa6544bf157799bd3f608364b451840ed2c8688de"
 21 |             ],
 22 |             "index": "pypi",
 23 |             "version": "==2.0.0"
 24 |         },
 25 |         "beautifulsoup4": {
 26 |             "hashes": [
 27 |                 "sha256:5279c36b4b2ec2cb4298d723791467e3000e5384a43ea0cdf5d45207c7e97169",
 28 |                 "sha256:6135db2ba678168c07950f9a16c4031822c6f4aec75a65e0a97bc5ca09789931",
 29 |                 "sha256:dcdef580e18a76d54002088602eba453eec38ebbcafafeaabd8cab12b6155d57"
 30 |             ],
 31 |             "index": "pypi",
 32 |             "version": "==4.8.1"
 33 |         },
 34 |         "certifi": {
 35 |             "hashes": [
 36 |                 "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50",
 37 |                 "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef"
 38 |             ],
 39 |             "version": "==2019.9.11"
 40 |         },
 41 |         "cffi": {
 42 |             "hashes": [
 43 |                 "sha256:08f99e8b38d5134d504aa7e486af8e4fde66a2f388bbecc270cdd1e00fa09ff8",
 44 |                 "sha256:1112d2fc92a867a6103bce6740a549e74b1d320cf28875609f6e93857eee4f2d",
 45 |                 "sha256:1b9ab50c74e075bd2ae489853c5f7f592160b379df53b7f72befcbe145475a36",
 46 |                 "sha256:24eff2997436b6156c2f30bed215c782b1d8fd8c6a704206053c79af95962e45",
 47 |                 "sha256:2eff642fbc9877a6449026ad66bf37c73bf4232505fb557168ba5c502f95999b",
 48 |                 "sha256:362e896cea1249ed5c2a81cf6477fabd9e1a5088aa7ea08358a4c6b0998294d2",
 49 |                 "sha256:40eddb3589f382cb950f2dcf1c39c9b8d7bd5af20665ce273815b0d24635008b",
 50 |                 "sha256:5ed40760976f6b8613d4a0db5e423673ca162d4ed6c9ed92d1f4e58a47ee01b5",
 51 |                 "sha256:632c6112c1e914c486f06cfe3f0cc507f44aa1e00ebf732cedb5719e6aa0466a",
 52 |                 "sha256:64d84f0145e181f4e6cc942088603c8db3ae23485c37eeda71cb3900b5e67cb4",
 53 |                 "sha256:6cb4edcf87d0e7f5bdc7e5c1a0756fbb37081b2181293c5fdf203347df1cd2a2",
 54 |                 "sha256:6f19c9df4785305669335b934c852133faed913c0faa63056248168966f7a7d5",
 55 |                 "sha256:719537b4c5cd5218f0f47826dd705fb7a21d83824920088c4214794457113f3f",
 56 |                 "sha256:7b0e337a70e58f1a36fb483fd63880c9e74f1db5c532b4082bceac83df1523fa",
 57 |                 "sha256:853376efeeb8a4ae49a737d5d30f5db8cdf01d9319695719c4af126488df5a6a",
 58 |                 "sha256:85bbf77ffd12985d76a69d2feb449e35ecdcb4fc54a5f087d2bd54158ae5bb0c",
 59 |                 "sha256:8978115c6f0b0ce5880bc21c967c65058be8a15f1b81aa5fdbdcbea0e03952d1",
 60 |                 "sha256:8f7eec920bc83692231d7306b3e311586c2e340db2dc734c43c37fbf9c981d24",
 61 |                 "sha256:8fe230f612c18af1df6f348d02d682fe2c28ca0a6c3856c99599cdacae7cf226",
 62 |                 "sha256:92068ebc494b5f9826b822cec6569f1f47b9a446a3fef477e1d11d7fac9ea895",
 63 |                 "sha256:b57e1c8bcdd7340e9c9d09613b5e7fdd0c600be142f04e2cc1cc8cb7c0b43529",
 64 |                 "sha256:ba956c9b44646bc1852db715b4a252e52a8f5a4009b57f1dac48ba3203a7bde1",
 65 |                 "sha256:ca42034c11eb447497ea0e7b855d87ccc2aebc1e253c22e7d276b8599c112a27",
 66 |                 "sha256:dc9b2003e9a62bbe0c84a04c61b0329e86fccd85134a78d7aca373bbbf788165",
 67 |                 "sha256:dd308802beb4b2961af8f037becbdf01a1e85009fdfc14088614c1b3c383fae5",
 68 |                 "sha256:e77cd105b19b8cd721d101687fcf665fd1553eb7b57556a1ef0d453b6fc42faa",
 69 |                 "sha256:f56dff1bd81022f1c980754ec721fb8da56192b026f17f0f99b965da5ab4fbd2",
 70 |                 "sha256:fa4cc13c03ea1d0d37ce8528e0ecc988d2365e8ac64d8d86cafab4038cb4ce89",
 71 |                 "sha256:fa8cf1cb974a9f5911d2a0303f6adc40625c05578d8e7ff5d313e1e27850bd59",
 72 |                 "sha256:fb003019f06d5fc0aa4738492ad8df1fa343b8a37cbcf634018ad78575d185df",
 73 |                 "sha256:fd409b7778167c3bcc836484a8f49c0e0b93d3e745d975749f83aa5d18a5822f",
 74 |                 "sha256:fe5d65a3ee38122003245a82303d11ac05ff36531a8f5ce4bc7d4bbc012797e1"
 75 |             ],
 76 |             "version": "==1.13.0"
 77 |         },
 78 |         "chardet": {
 79 |             "hashes": [
 80 |                 "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
 81 |                 "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
 82 |             ],
 83 |             "version": "==3.0.4"
 84 |         },
 85 |         "click": {
 86 |             "hashes": [
 87 |                 "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
 88 |                 "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
 89 |             ],
 90 |             "version": "==7.0"
 91 |         },
 92 |         "click-plugins": {
 93 |             "hashes": [
 94 |                 "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b",
 95 |                 "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"
 96 |             ],
 97 |             "version": "==1.1.1"
 98 |         },
 99 |         "colorama": {
100 |             "hashes": [
101 |                 "sha256:05eed71e2e327246ad6b38c540c4a3117230b19679b875190486ddd2d721422d",
102 |                 "sha256:f8ac84de7840f5b9c4e3347b3c1eaa50f7e49c2b07596221daec5edaabbd7c48"
103 |             ],
104 |             "version": "==0.4.1"
105 |         },
106 |         "dnspython": {
107 |             "hashes": [
108 |                 "sha256:36c5e8e38d4369a08b6780b7f27d790a292b2b08eea01607865bf0936c558e01",
109 |                 "sha256:f69c21288a962f4da86e56c4905b49d11aba7938d3d740e80d9e366ee4f1632d"
110 |             ],
111 |             "index": "pypi",
112 |             "version": "==1.16.0"
113 |         },
114 |         "gevent": {
115 |             "hashes": [
116 |                 "sha256:0774babec518a24d9a7231d4e689931f31b332c4517a771e532002614e270a64",
117 |                 "sha256:0e1e5b73a445fe82d40907322e1e0eec6a6745ca3cea19291c6f9f50117bb7ea",
118 |                 "sha256:0ff2b70e8e338cf13bedf146b8c29d475e2a544b5d1fe14045aee827c073842c",
119 |                 "sha256:107f4232db2172f7e8429ed7779c10f2ed16616d75ffbe77e0e0c3fcdeb51a51",
120 |                 "sha256:14b4d06d19d39a440e72253f77067d27209c67e7611e352f79fe69e0f618f76e",
121 |                 "sha256:1b7d3a285978b27b469c0ff5fb5a72bcd69f4306dbbf22d7997d83209a8ba917",
122 |                 "sha256:1eb7fa3b9bd9174dfe9c3b59b7a09b768ecd496debfc4976a9530a3e15c990d1",
123 |                 "sha256:2711e69788ddb34c059a30186e05c55a6b611cb9e34ac343e69cf3264d42fe1c",
124 |                 "sha256:28a0c5417b464562ab9842dd1fb0cc1524e60494641d973206ec24d6ec5f6909",
125 |                 "sha256:3249011d13d0c63bea72d91cec23a9cf18c25f91d1f115121e5c9113d753fa12",
126 |                 "sha256:44089ed06a962a3a70e96353c981d628b2d4a2f2a75ea5d90f916a62d22af2e8",
127 |                 "sha256:4bfa291e3c931ff3c99a349d8857605dca029de61d74c6bb82bd46373959c942",
128 |                 "sha256:50024a1ee2cf04645535c5ebaeaa0a60c5ef32e262da981f4be0546b26791950",
129 |                 "sha256:53b72385857e04e7faca13c613c07cab411480822ac658d97fd8a4ddbaf715c8",
130 |                 "sha256:74b7528f901f39c39cdbb50cdf08f1a2351725d9aebaef212a29abfbb06895ee",
131 |                 "sha256:7d0809e2991c9784eceeadef01c27ee6a33ca09ebba6154317a257353e3af922",
132 |                 "sha256:896b2b80931d6b13b5d9feba3d4eebc67d5e6ec54f0cf3339d08487d55d93b0e",
133 |                 "sha256:8d9ec51cc06580f8c21b41fd3f2b3465197ba5b23c00eb7d422b7ae0380510b0",
134 |                 "sha256:9f7a1e96fec45f70ad364e46de32ccacab4d80de238bd3c2edd036867ccd48ad",
135 |                 "sha256:ab4dc33ef0e26dc627559786a4fba0c2227f125db85d970abbf85b77506b3f51",
136 |                 "sha256:d1e6d1f156e999edab069d79d890859806b555ce4e4da5b6418616322f0a3df1",
137 |                 "sha256:d752bcf1b98174780e2317ada12013d612f05116456133a6acf3e17d43b71f05",
138 |                 "sha256:e5bcc4270671936349249d26140c267397b7b4b1381f5ec8b13c53c5b53ab6e1"
139 |             ],
140 |             "version": "==1.4.0"
141 |         },
142 |         "greenlet": {
143 |             "hashes": [
144 |                 "sha256:000546ad01e6389e98626c1367be58efa613fa82a1be98b0c6fc24b563acc6d0",
145 |                 "sha256:0d48200bc50cbf498716712129eef819b1729339e34c3ae71656964dac907c28",
146 |                 "sha256:23d12eacffa9d0f290c0fe0c4e81ba6d5f3a5b7ac3c30a5eaf0126bf4deda5c8",
147 |                 "sha256:37c9ba82bd82eb6a23c2e5acc03055c0e45697253b2393c9a50cef76a3985304",
148 |                 "sha256:51503524dd6f152ab4ad1fbd168fc6c30b5795e8c70be4410a64940b3abb55c0",
149 |                 "sha256:8041e2de00e745c0e05a502d6e6db310db7faa7c979b3a5877123548a4c0b214",
150 |                 "sha256:81fcd96a275209ef117e9ec91f75c731fa18dcfd9ffaa1c0adbdaa3616a86043",
151 |                 "sha256:853da4f9563d982e4121fed8c92eea1a4594a2299037b3034c3c898cb8e933d6",
152 |                 "sha256:8b4572c334593d449113f9dc8d19b93b7b271bdbe90ba7509eb178923327b625",
153 |                 "sha256:9416443e219356e3c31f1f918a91badf2e37acf297e2fa13d24d1cc2380f8fbc",
154 |                 "sha256:9854f612e1b59ec66804931df5add3b2d5ef0067748ea29dc60f0efdcda9a638",
155 |                 "sha256:99a26afdb82ea83a265137a398f570402aa1f2b5dfb4ac3300c026931817b163",
156 |                 "sha256:a19bf883b3384957e4a4a13e6bd1ae3d85ae87f4beb5957e35b0be287f12f4e4",
157 |                 "sha256:a9f145660588187ff835c55a7d2ddf6abfc570c2651c276d3d4be8a2766db490",
158 |                 "sha256:ac57fcdcfb0b73bb3203b58a14501abb7e5ff9ea5e2edfa06bb03035f0cff248",
159 |                 "sha256:bcb530089ff24f6458a81ac3fa699e8c00194208a724b644ecc68422e1111939",
160 |                 "sha256:beeabe25c3b704f7d56b573f7d2ff88fc99f0138e43480cecdfcaa3b87fe4f87",
161 |                 "sha256:d634a7ea1fc3380ff96f9e44d8d22f38418c1c381d5fac680b272d7d90883720",
162 |                 "sha256:d97b0661e1aead761f0ded3b769044bb00ed5d33e1ec865e891a8b128bf7c656"
163 |             ],
164 |             "markers": "platform_python_implementation == 'CPython'",
165 |             "version": "==0.4.15"
166 |         },
167 |         "grequests": {
168 |             "hashes": [
169 |                 "sha256:8aeccc15e60ec65c7e67ee32e9c596ab2196979815497f85cf863465a1626490",
170 |                 "sha256:eb574b08f69b48c54e1029415f5f3316899ee006daa5624bbc5320648cdfdd52"
171 |             ],
172 |             "index": "pypi",
173 |             "version": "==0.4.0"
174 |         },
175 |         "idna": {
176 |             "hashes": [
177 |                 "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
178 |                 "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c"
179 |             ],
180 |             "version": "==2.8"
181 |         },
182 |         "netaddr": {
183 |             "hashes": [
184 |                 "sha256:38aeec7cdd035081d3a4c306394b19d677623bf76fa0913f6695127c7753aefd",
185 |                 "sha256:56b3558bd71f3f6999e4c52e349f38660e54a7a8a9943335f73dfc96883e08ca"
186 |             ],
187 |             "index": "pypi",
188 |             "version": "==0.7.19"
189 |         },
190 |         "plotly": {
191 |             "hashes": [
192 |                 "sha256:593418bbbd325ee020b7d0381a9452c603558981bde05a303b860455eb907574",
193 |                 "sha256:6650ddb4da3aa94dcaa32e0779e611c6b17f371b5250ffdbf5ece6d66ba4483b"
194 |             ],
195 |             "index": "pypi",
196 |             "version": "==4.2.1"
197 |         },
198 |         "pycares": {
199 |             "hashes": [
200 |                 "sha256:2ca080db265ea238dc45f997f94effb62b979a617569889e265c26a839ed6305",
201 |                 "sha256:6f79c6afb6ce603009db2042fddc2e348ad093ece9784cbe2daa809499871a23",
202 |                 "sha256:70918d06eb0603016d37092a5f2c0228509eb4e6c5a3faacb4184f6ab7be7650",
203 |                 "sha256:755187d28d24a9ea63aa2b4c0638be31d65fbf7f0ce16d41261b9f8cb55a1b99",
204 |                 "sha256:7baa4b1f2146eb8423ff8303ebde3a20fb444a60db761fba0430d104fe35ddbf",
205 |                 "sha256:90b27d4df86395f465a171386bc341098d6d47b65944df46518814ae298f6cc6",
206 |                 "sha256:9e090dd6b2afa65cb51c133883b2bf2240fd0f717b130b0048714b33fb0f47ce",
207 |                 "sha256:a11b7d63c3718775f6e805d6464cb10943780395ab042c7e5a0a7a9f612735dd",
208 |                 "sha256:b253f5dcaa0ac7076b79388a3ac80dd8f3bd979108f813baade40d3a9b8bf0bd",
209 |                 "sha256:c7f4f65e44ba35e35ad3febc844270665bba21cfb0fb7d749434e705b556e087",
210 |                 "sha256:cdb342e6a254f035bd976d95807a2184038fc088d957a5104dcaab8be602c093",
211 |                 "sha256:cf08e164f8bfb83b9fe633feb56f2754fae6baefcea663593794fa0518f8f98c",
212 |                 "sha256:df9bc694cf03673878ea8ce674082c5acd134991d64d6c306d4bd61c0c1df98f"
213 |             ],
214 |             "version": "==3.0.0"
215 |         },
216 |         "pycparser": {
217 |             "hashes": [
218 |                 "sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3"
219 |             ],
220 |             "version": "==2.19"
221 |         },
222 |         "pyyaml": {
223 |             "hashes": [
224 |                 "sha256:0113bc0ec2ad727182326b61326afa3d1d8280ae1122493553fd6f4397f33df9",
225 |                 "sha256:01adf0b6c6f61bd11af6e10ca52b7d4057dd0be0343eb9283c878cf3af56aee4",
226 |                 "sha256:5124373960b0b3f4aa7df1707e63e9f109b5263eca5976c66e08b1c552d4eaf8",
227 |                 "sha256:5ca4f10adbddae56d824b2c09668e91219bb178a1eee1faa56af6f99f11bf696",
228 |                 "sha256:7907be34ffa3c5a32b60b95f4d95ea25361c951383a894fec31be7252b2b6f34",
229 |                 "sha256:7ec9b2a4ed5cad025c2278a1e6a19c011c80a3caaac804fd2d329e9cc2c287c9",
230 |                 "sha256:87ae4c829bb25b9fe99cf71fbb2140c448f534e24c998cc60f39ae4f94396a73",
231 |                 "sha256:9de9919becc9cc2ff03637872a440195ac4241c80536632fffeb6a1e25a74299",
232 |                 "sha256:a5a85b10e450c66b49f98846937e8cfca1db3127a9d5d1e31ca45c3d0bef4c5b",
233 |                 "sha256:b0997827b4f6a7c286c01c5f60384d218dca4ed7d9efa945c3e1aa623d5709ae",
234 |                 "sha256:b631ef96d3222e62861443cc89d6563ba3eeb816eeb96b2629345ab795e53681",
235 |                 "sha256:bf47c0607522fdbca6c9e817a6e81b08491de50f3766a7a0e6a5be7905961b41",
236 |                 "sha256:f81025eddd0327c7d4cfe9b62cf33190e1e736cc6e97502b3ec425f574b3e7a8"
237 |             ],
238 |             "index": "pypi",
239 |             "version": "==5.1.2"
240 |         },
241 |         "requests": {
242 |             "hashes": [
243 |                 "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4",
244 |                 "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31"
245 |             ],
246 |             "index": "pypi",
247 |             "version": "==2.22.0"
248 |         },
249 |         "retrying": {
250 |             "hashes": [
251 |                 "sha256:08c039560a6da2fe4f2c426d0766e284d3b736e355f8dd24b37367b0bb41973b"
252 |             ],
253 |             "index": "pypi",
254 |             "version": "==1.3.3"
255 |         },
256 |         "shodan": {
257 |             "hashes": [
258 |                 "sha256:9d8bb822738d02a63dbe890b46f511f0df13fd33a60b754278c3bf5dd5cf9fc4"
259 |             ],
260 |             "index": "pypi",
261 |             "version": "==1.19.0"
262 |         },
263 |         "six": {
264 |             "hashes": [
265 |                 "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
266 |                 "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
267 |             ],
268 |             "version": "==1.12.0"
269 |         },
270 |         "soupsieve": {
271 |             "hashes": [
272 |                 "sha256:605f89ad5fdbfefe30cdc293303665eff2d188865d4dbe4eb510bba1edfbfce3",
273 |                 "sha256:b91d676b330a0ebd5b21719cb6e9b57c57d433671f65b9c28dd3461d9a1ed0b6"
274 |             ],
275 |             "version": "==1.9.4"
276 |         },
277 |         "texttable": {
278 |             "hashes": [
279 |                 "sha256:eff3703781fbc7750125f50e10f001195174f13825a92a45e9403037d539b4f4"
280 |             ],
281 |             "index": "pypi",
282 |             "version": "==1.6.2"
283 |         },
284 |         "urllib3": {
285 |             "hashes": [
286 |                 "sha256:3de946ffbed6e6746608990594d08faac602528ac7015ac28d33cee6a45b7398",
287 |                 "sha256:9a107b99a5393caf59c7aa3c1249c16e6879447533d0887f4336dde834c7be86"
288 |             ],
289 |             "version": "==1.25.6"
290 |         },
291 |         "xlsxwriter": {
292 |             "hashes": [
293 |                 "sha256:00e9c337589ec67a69f1220f47409146ab1affd8eb1e8eaad23f35685bd23e47",
294 |                 "sha256:5a5e2195a4672d17db79839bbdf1006a521adb57eaceea1c335ae4b3d19f088f"
295 |             ],
296 |             "version": "==1.2.2"
297 |         }
298 |     },
299 |     "develop": {
300 |         "atomicwrites": {
301 |             "hashes": [
302 |                 "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4",
303 |                 "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"
304 |             ],
305 |             "version": "==1.3.0"
306 |         },
307 |         "attrs": {
308 |             "hashes": [
309 |                 "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
310 |                 "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
311 |             ],
312 |             "version": "==19.3.0"
313 |         },
314 |         "entrypoints": {
315 |             "hashes": [
316 |                 "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19",
317 |                 "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"
318 |             ],
319 |             "version": "==0.3"
320 |         },
321 |         "flake8": {
322 |             "hashes": [
323 |                 "sha256:19241c1cbc971b9962473e4438a2ca19749a7dd002dd1a946eaba171b4114548",
324 |                 "sha256:8e9dfa3cecb2400b3738a42c54c3043e821682b9c840b0448c0503f781130696"
325 |             ],
326 |             "index": "pypi",
327 |             "version": "==3.7.8"
328 |         },
329 |         "importlib-metadata": {
330 |             "hashes": [
331 |                 "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26",
332 |                 "sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af"
333 |             ],
334 |             "markers": "python_version < '3.8'",
335 |             "version": "==0.23"
336 |         },
337 |         "mccabe": {
338 |             "hashes": [
339 |                 "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
340 |                 "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
341 |             ],
342 |             "version": "==0.6.1"
343 |         },
344 |         "more-itertools": {
345 |             "hashes": [
346 |                 "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832",
347 |                 "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4"
348 |             ],
349 |             "version": "==7.2.0"
350 |         },
351 |         "mypy": {
352 |             "hashes": [
353 |                 "sha256:1521c186a3d200c399bd5573c828ea2db1362af7209b2adb1bb8532cea2fb36f",
354 |                 "sha256:31a046ab040a84a0fc38bc93694876398e62bc9f35eca8ccbf6418b7297f4c00",
355 |                 "sha256:3b1a411909c84b2ae9b8283b58b48541654b918e8513c20a400bb946aa9111ae",
356 |                 "sha256:48c8bc99380575deb39f5d3400ebb6a8a1cb5cc669bbba4d3bb30f904e0a0e7d",
357 |                 "sha256:540c9caa57a22d0d5d3c69047cc9dd0094d49782603eb03069821b41f9e970e9",
358 |                 "sha256:672e418425d957e276c291930a3921b4a6413204f53fe7c37cad7bc57b9a3391",
359 |                 "sha256:6ed3b9b3fdc7193ea7aca6f3c20549b377a56f28769783a8f27191903a54170f",
360 |                 "sha256:9371290aa2cad5ad133e4cdc43892778efd13293406f7340b9ffe99d5ec7c1d9",
361 |                 "sha256:ace6ac1d0f87d4072f05b5468a084a45b4eda970e4d26704f201e06d47ab2990",
362 |                 "sha256:b428f883d2b3fe1d052c630642cc6afddd07d5cd7873da948644508be3b9d4a7",
363 |                 "sha256:d5bf0e6ec8ba346a2cf35cb55bf4adfddbc6b6576fcc9e10863daa523e418dbb",
364 |                 "sha256:d7574e283f83c08501607586b3167728c58e8442947e027d2d4c7dcd6d82f453",
365 |                 "sha256:dc889c84241a857c263a2b1cd1121507db7d5b5f5e87e77147097230f374d10b",
366 |                 "sha256:f4748697b349f373002656bf32fede706a0e713d67bfdcf04edf39b1f61d46eb"
367 |             ],
368 |             "index": "pypi",
369 |             "version": "==0.740"
370 |         },
371 |         "mypy-extensions": {
372 |             "hashes": [
373 |                 "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
374 |                 "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
375 |             ],
376 |             "index": "pypi",
377 |             "version": "==0.4.3"
378 |         },
379 |         "packaging": {
380 |             "hashes": [
381 |                 "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47",
382 |                 "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108"
383 |             ],
384 |             "version": "==19.2"
385 |         },
386 |         "pluggy": {
387 |             "hashes": [
388 |                 "sha256:0db4b7601aae1d35b4a033282da476845aa19185c1e6964b25cf324b5e4ec3e6",
389 |                 "sha256:fa5fa1622fa6dd5c030e9cad086fa19ef6a0cf6d7a2d12318e10cb49d6d68f34"
390 |             ],
391 |             "version": "==0.13.0"
392 |         },
393 |         "py": {
394 |             "hashes": [
395 |                 "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa",
396 |                 "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53"
397 |             ],
398 |             "version": "==1.8.0"
399 |         },
400 |         "pycodestyle": {
401 |             "hashes": [
402 |                 "sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56",
403 |                 "sha256:e40a936c9a450ad81df37f549d676d127b1b66000a6c500caa2b085bc0ca976c"
404 |             ],
405 |             "version": "==2.5.0"
406 |         },
407 |         "pyflakes": {
408 |             "hashes": [
409 |                 "sha256:17dbeb2e3f4d772725c777fabc446d5634d1038f234e77343108ce445ea69ce0",
410 |                 "sha256:d976835886f8c5b31d47970ed689944a0262b5f3afa00a5a7b4dc81e5449f8a2"
411 |             ],
412 |             "version": "==2.1.1"
413 |         },
414 |         "pyparsing": {
415 |             "hashes": [
416 |                 "sha256:6f98a7b9397e206d78cc01df10131398f1c8b8510a2f4d97d9abd82e1aacdd80",
417 |                 "sha256:d9338df12903bbf5d65a0e4e87c2161968b10d2e489652bb47001d82a9b028b4"
418 |             ],
419 |             "version": "==2.4.2"
420 |         },
421 |         "pytest": {
422 |             "hashes": [
423 |                 "sha256:7e4800063ccfc306a53c461442526c5571e1462f61583506ce97e4da6a1d88c8",
424 |                 "sha256:ca563435f4941d0cb34767301c27bc65c510cb82e90b9ecf9cb52dc2c63caaa0"
425 |             ],
426 |             "index": "pypi",
427 |             "version": "==5.2.1"
428 |         },
429 |         "six": {
430 |             "hashes": [
431 |                 "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c",
432 |                 "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73"
433 |             ],
434 |             "version": "==1.12.0"
435 |         },
436 |         "typed-ast": {
437 |             "hashes": [
438 |                 "sha256:1170afa46a3799e18b4c977777ce137bb53c7485379d9706af8a59f2ea1aa161",
439 |                 "sha256:18511a0b3e7922276346bcb47e2ef9f38fb90fd31cb9223eed42c85d1312344e",
440 |                 "sha256:262c247a82d005e43b5b7f69aff746370538e176131c32dda9cb0f324d27141e",
441 |                 "sha256:2b907eb046d049bcd9892e3076c7a6456c93a25bebfe554e931620c90e6a25b0",
442 |                 "sha256:354c16e5babd09f5cb0ee000d54cfa38401d8b8891eefa878ac772f827181a3c",
443 |                 "sha256:48e5b1e71f25cfdef98b013263a88d7145879fbb2d5185f2a0c79fa7ebbeae47",
444 |                 "sha256:4e0b70c6fc4d010f8107726af5fd37921b666f5b31d9331f0bd24ad9a088e631",
445 |                 "sha256:630968c5cdee51a11c05a30453f8cd65e0cc1d2ad0d9192819df9978984529f4",
446 |                 "sha256:66480f95b8167c9c5c5c87f32cf437d585937970f3fc24386f313a4c97b44e34",
447 |                 "sha256:71211d26ffd12d63a83e079ff258ac9d56a1376a25bc80b1cdcdf601b855b90b",
448 |                 "sha256:7954560051331d003b4e2b3eb822d9dd2e376fa4f6d98fee32f452f52dd6ebb2",
449 |                 "sha256:838997f4310012cf2e1ad3803bce2f3402e9ffb71ded61b5ee22617b3a7f6b6e",
450 |                 "sha256:95bd11af7eafc16e829af2d3df510cecfd4387f6453355188342c3e79a2ec87a",
451 |                 "sha256:bc6c7d3fa1325a0c6613512a093bc2a2a15aeec350451cbdf9e1d4bffe3e3233",
452 |                 "sha256:cc34a6f5b426748a507dd5d1de4c1978f2eb5626d51326e43280941206c209e1",
453 |                 "sha256:d755f03c1e4a51e9b24d899561fec4ccaf51f210d52abdf8c07ee2849b212a36",
454 |                 "sha256:d7c45933b1bdfaf9f36c579671fec15d25b06c8398f113dab64c18ed1adda01d",
455 |                 "sha256:d896919306dd0aa22d0132f62a1b78d11aaf4c9fc5b3410d3c666b818191630a",
456 |                 "sha256:fdc1c9bbf79510b76408840e009ed65958feba92a88833cdceecff93ae8fff66",
457 |                 "sha256:ffde2fbfad571af120fcbfbbc61c72469e72f550d676c3342492a9dfdefb8f12"
458 |             ],
459 |             "version": "==1.4.0"
460 |         },
461 |         "typing-extensions": {
462 |             "hashes": [
463 |                 "sha256:2ed632b30bb54fc3941c382decfd0ee4148f5c591651c9272473fea2c6397d95",
464 |                 "sha256:b1edbbf0652660e32ae780ac9433f4231e7339c7f9a8057d0f042fcbcea49b87",
465 |                 "sha256:d8179012ec2c620d3791ca6fe2bf7979d979acdbef1fca0bc56b37411db682ed"
466 |             ],
467 |             "version": "==3.7.4"
468 |         },
469 |         "wcwidth": {
470 |             "hashes": [
471 |                 "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e",
472 |                 "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c"
473 |             ],
474 |             "version": "==0.1.7"
475 |         },
476 |         "zipp": {
477 |             "hashes": [
478 |                 "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e",
479 |                 "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335"
480 |             ],
481 |             "version": "==0.6.0"
482 |         }
483 |     }
484 | }
485 | 


--------------------------------------------------------------------------------
/theHarvester/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | from theHarvester.discovery import *
  4 | from theHarvester.discovery.constants import *
  5 | from theHarvester.lib import hostchecker
  6 | from theHarvester.lib import reportgraph
  7 | from theHarvester.lib import stash
  8 | from theHarvester.lib import statichtmlgenerator
  9 | from theHarvester.lib.core import *
 10 | import argparse
 11 | import asyncio
 12 | import datetime
 13 | import netaddr
 14 | import re
 15 | import sys
 16 | import time
 17 | 
 18 | Core.banner()
 19 | 
 20 | 
 21 | def start():
 22 |     parser = argparse.ArgumentParser(
 23 |         description='theHarvester is used to gather open source intelligence (OSINT) on a\n'
 24 |                     'company or domain.')
 25 |     parser.add_argument('-d', '--domain', help='company name or domain to search', required=True)
 26 |     parser.add_argument('-l', '--limit', help='limit the number of search results, default=500', default=500, type=int)
 27 |     parser.add_argument('-S', '--start', help='start with result number X, default=0', default=0, type=int)
 28 |     parser.add_argument('-g', '--google-dork', help='use Google Dorks for Google search', default=False, action='store_true')
 29 |     parser.add_argument('-p', '--port-scan', help='scan the detected hosts and check for Takeovers (21,22,80,443,8080)', default=False, action='store_true')
 30 |     parser.add_argument('-s', '--shodan', help='use Shodan to query discovered hosts', default=False, action='store_true')
 31 |     parser.add_argument('-v', '--virtual-host', help='verify host name via DNS resolution and search for virtual hosts', action='store_const', const='basic', default=False)
 32 |     parser.add_argument('-e', '--dns-server', help='DNS server to use for lookup')
 33 |     parser.add_argument('-t', '--dns-tld', help='perform a DNS TLD expansion discovery, default False', default=False)
 34 |     parser.add_argument('-n', '--dns-lookup', help='enable DNS server lookup, default False', default=False, action='store_true')
 35 |     parser.add_argument('-c', '--dns-brute', help='perform a DNS brute force on the domain', default=False, action='store_true')
 36 |     parser.add_argument('-f', '--filename', help='save the results to an HTML and/or XML file', default='', type=str)
 37 |     parser.add_argument('-b', '--source', help='''baidu, bing, bingapi, certspotter, crtsh, dnsdumpster,
 38 |                         dogpile, duckduckgo, github-code, google,
 39 |                         hunter, intelx,
 40 |                         linkedin, linkedin_links, netcraft, otx, securityTrails, spyse(disabled for now), threatcrowd,
 41 |                         trello, twitter, vhost, virustotal, yahoo, all''')
 42 | 
 43 |     args = parser.parse_args()
 44 |     try:
 45 |         db = stash.StashManager()
 46 |         db.do_init()
 47 |     except Exception:
 48 |         pass
 49 | 
 50 |     all_emails: list = []
 51 |     all_hosts: list = []
 52 |     all_ip: list = []
 53 |     dnsbrute = args.dns_brute
 54 |     dnslookup = args.dns_lookup
 55 |     dnsserver = args.dns_server
 56 |     dnstld = args.dns_tld
 57 |     engines = []
 58 |     filename: str = args.filename
 59 |     full: list = []
 60 |     google_dorking = args.google_dork
 61 |     host_ip: list = []
 62 |     limit: int = args.limit
 63 |     ports_scanning = args.port_scan
 64 |     shodan = args.shodan
 65 |     start: int = args.start
 66 |     takeover_check = False
 67 |     all_urls: list = []
 68 |     vhost: list = []
 69 |     virtual = args.virtual_host
 70 |     word: str = args.domain
 71 | 
 72 |     def store(search_engine: Any, source: str, process_param: Any = None, store_host: bool = False,
 73 |               store_emails: bool = False, store_ip: bool = False, store_people: bool = False,
 74 |               store_data: bool = False, store_links: bool = False, store_results: bool = False) -> None:
 75 |         """
 76 |         Persist details into the database.
 77 |         The details to be stored is controlled by the parameters passed to the method.
 78 | 
 79 |         :param search_engine: search engine to fetch details from
 80 |         :param source: source against which the details (corresponding to the search engine) need to be persisted
 81 |         :param process_param: any parameters to be passed to the search engine
 82 |                               eg: Google needs google_dorking
 83 |         :param store_host: whether to store hosts
 84 |         :param store_emails: whether to store emails
 85 |         :param store_ip: whether to store IP address
 86 |         :param store_people: whether to store user details
 87 |         :param store_data: whether to fetch host from method get_data() and persist
 88 |         :param store_links: whether to store links
 89 |         :param store_results: whether to fetch details from get_results() and persist
 90 |         """
 91 |         search_engine.process() if process_param is None else search_engine.process(process_param)
 92 |         db_stash = stash.StashManager()
 93 | 
 94 |         if store_host:
 95 |             host_names = filter(search_engine.get_hostnames())
 96 |             all_hosts.extend(host_names)
 97 |             db_stash.store_all(word, all_hosts, 'host', source)
 98 |         if store_emails:
 99 |             email_list = filter(search_engine.get_emails())
100 |             all_emails.extend(email_list)
101 |             db_stash.store_all(word, email_list, 'email', source)
102 |         if store_ip:
103 |             ips_list = search_engine.get_ips()
104 |             all_ip.extend(ips_list)
105 |             db_stash.store_all(word, all_ip, 'ip', source)
106 |         if store_data:
107 |             data = filter(search_engine.get_data())
108 |             all_hosts.extend(data)
109 |             db.store_all(word, all_hosts, 'host', source)
110 |         if store_results:
111 |             email_list, host_names, urls = search_engine.get_results()
112 |             all_emails.extend(email_list)
113 |             host_names = filter(host_names)
114 |             all_urls.extend(filter(urls))
115 |             all_hosts.extend(host_names)
116 |             db.store_all(word, all_hosts, 'host', source)
117 |             db.store_all(word, all_emails, 'email', source)
118 |         if store_people:
119 |             people_list = search_engine.get_people()
120 |             db_stash.store_all(word, people_list, 'people', source)
121 |             if len(people_list) == 0:
122 |                 print('\n[*] No users found.\n\n')
123 |             else:
124 |                 print('\n[*] Users found: ' + str(len(people_list)))
125 |                 print('---------------------')
126 |                 for usr in sorted(list(set(people_list))):
127 |                     print(usr)
128 |         if store_links:
129 |             links = search_engine.get_links()
130 |             db.store_all(word, links, 'name', engineitem)
131 |             if len(links) == 0:
132 |                 print('\n[*] No links found.\n\n')
133 |             else:
134 |                 print(f'\n[*] Links found: {len(links)}')
135 |                 print('---------------------')
136 |                 for link in sorted(list(set(links))):
137 |                     print(link)
138 | 
139 |     if args.source is not None:
140 |         if args.source.lower() != 'all':
141 |             engines = sorted(set(map(str.strip, args.source.split(','))))
142 |         else:
143 |             engines = Core.get_supportedengines()
144 |         # Iterate through search engines in order
145 |         if set(engines).issubset(Core.get_supportedengines()):
146 |             print(f'\033[94m[*] Target: {word} \n \033[0m')
147 | 
148 |             for engineitem in engines:
149 |                 if engineitem == 'baidu':
150 |                     print('\033[94m[*] Searching Baidu. \033[0m')
151 |                     from theHarvester.discovery import baidusearch
152 |                     try:
153 |                         baidu_search = baidusearch.SearchBaidu(word, limit)
154 |                         store(baidu_search, engineitem, store_host=True, store_emails=True)
155 |                     except Exception:
156 |                         pass
157 | 
158 |                 elif engineitem == 'bing' or engineitem == 'bingapi':
159 |                     print('\033[94m[*] Searching Bing. \033[0m')
160 |                     from theHarvester.discovery import bingsearch
161 |                     try:
162 |                         bing_search = bingsearch.SearchBing(word, limit, start)
163 |                         bingapi = ''
164 |                         if engineitem == 'bingapi':
165 |                             bingapi += 'yes'
166 |                         else:
167 |                             bingapi += 'no'
168 |                         store(bing_search, 'bing', process_param=bingapi, store_host=True, store_emails=True)
169 |                     except Exception as e:
170 |                         if isinstance(e, MissingKey):
171 |                             print(e)
172 |                         else:
173 |                             pass
174 | 
175 |                 elif engineitem == 'certspotter':
176 |                     print('\033[94m[*] Searching CertSpotter. \033[0m')
177 |                     from theHarvester.discovery import certspottersearch
178 |                     try:
179 |                         certspotter_search = certspottersearch.SearchCertspoter(word)
180 |                         store(certspotter_search, engineitem, None, store_host=True)
181 |                     except Exception as e:
182 |                         print(e)
183 | 
184 |                 elif engineitem == 'crtsh':
185 |                     try:
186 |                         print('\033[94m[*] Searching CRT.sh. \033[0m')
187 |                         from theHarvester.discovery import crtsh
188 |                         crtsh_search = crtsh.SearchCrtsh(word)
189 |                         store(crtsh_search, 'CRTsh', store_data=True)
190 | 
191 |                     except Exception:
192 |                         print(f'\033[93m[!] A timeout occurred with crtsh, cannot find {args.domain}\033[0m')
193 | 
194 |                 elif engineitem == 'dnsdumpster':
195 |                     try:
196 |                         print('\033[94m[*] Searching DNSdumpster. \033[0m')
197 |                         from theHarvester.discovery import dnsdumpster
198 |                         dns_dumpster_search = dnsdumpster.SearchDnsDumpster(word)
199 |                         store(dns_dumpster_search, engineitem, store_host=True)
200 |                     except Exception as e:
201 |                         print(f'\033[93m[!] An error occurred with dnsdumpster: {e} \033[0m')
202 | 
203 |                 elif engineitem == 'dogpile':
204 |                     try:
205 |                         print('\033[94m[*] Searching Dogpile. \033[0m')
206 |                         from theHarvester.discovery import dogpilesearch
207 |                         dogpile_search = dogpilesearch.SearchDogpile(word, limit)
208 |                         store(dogpile_search, engineitem, store_host=True, store_emails=True)
209 |                     except Exception as e:
210 |                         print(f'\033[93m[!] An error occurred with Dogpile: {e} \033[0m')
211 | 
212 |                 elif engineitem == 'duckduckgo':
213 |                     print('\033[94m[*] Searching DuckDuckGo. \033[0m')
214 |                     from theHarvester.discovery import duckduckgosearch
215 |                     duckduckgo_search = duckduckgosearch.SearchDuckDuckGo(word, limit)
216 |                     store(duckduckgo_search, engineitem, store_host=True, store_emails=True)
217 | 
218 |                 elif engineitem == 'github-code':
219 |                     print('\033[94m[*] Searching Github (code). \033[0m')
220 |                     try:
221 |                         from theHarvester.discovery import githubcode
222 |                         github_search = githubcode.SearchGithubCode(word, limit)
223 |                         store(github_search, engineitem, store_host=True, store_emails=True)
224 |                     except MissingKey as ex:
225 |                         print(ex)
226 |                     else:
227 |                         pass
228 | 
229 |                 elif engineitem == 'exalead':
230 |                     print('\033[94m[*] Searching Exalead \033[0m')
231 |                     from theHarvester.discovery import exaleadsearch
232 |                     exalead_search = exaleadsearch.SearchExalead(word, limit, start)
233 |                     store(exalead_search, engineitem, store_host=True, store_emails=True)
234 | 
235 |                 elif engineitem == 'google':
236 |                     print('\033[94m[*] Searching Google. \033[0m')
237 |                     from theHarvester.discovery import googlesearch
238 |                     google_search = googlesearch.SearchGoogle(word, limit, start)
239 |                     store(google_search, engineitem, process_param=google_dorking, store_host=True, store_emails=True)
240 | 
241 |                 elif engineitem == 'hunter':
242 |                     print('\033[94m[*] Searching Hunter. \033[0m')
243 |                     from theHarvester.discovery import huntersearch
244 |                     # Import locally or won't work.
245 |                     try:
246 |                         hunter_search = huntersearch.SearchHunter(word, limit, start)
247 |                         store(hunter_search, engineitem, store_host=True, store_emails=True)
248 |                     except Exception as e:
249 |                         if isinstance(e, MissingKey):
250 |                             print(e)
251 |                         else:
252 |                             pass
253 | 
254 |                 elif engineitem == 'intelx':
255 |                     print('\033[94m[*] Searching Intelx. \033[0m')
256 |                     from theHarvester.discovery import intelxsearch
257 |                     # Import locally or won't work.
258 |                     try:
259 |                         intelx_search = intelxsearch.SearchIntelx(word, limit)
260 |                         store(intelx_search, engineitem, store_host=True, store_emails=True)
261 |                     except Exception as e:
262 |                         if isinstance(e, MissingKey):
263 |                             print(e)
264 |                         else:
265 |                             print(f'An exception has occurred in Intelx search: {e}')
266 | 
267 |                 elif engineitem == 'linkedin':
268 |                     print('\033[94m[*] Searching Linkedin. \033[0m')
269 |                     from theHarvester.discovery import linkedinsearch
270 |                     linkedin_search = linkedinsearch.SearchLinkedin(word, limit)
271 |                     store(linkedin_search, engineitem, store_people=True)
272 | 
273 |                 elif engineitem == 'linkedin_links':
274 |                     print('\033[94m[*] Searching Linkedin. \033[0m')
275 |                     from theHarvester.discovery import linkedinsearch
276 |                     linkedin_links_search = linkedinsearch.SearchLinkedin(word, limit)
277 |                     store(linkedin_links_search, 'linkedin', store_links=True)
278 | 
279 |                 elif engineitem == 'netcraft':
280 |                     print('\033[94m[*] Searching Netcraft. \033[0m')
281 |                     from theHarvester.discovery import netcraft
282 |                     netcraft_search = netcraft.SearchNetcraft(word)
283 |                     store(netcraft_search, engineitem, store_host=True)
284 | 
285 |                 elif engineitem == 'otx':
286 |                     print('\033[94m[*] Searching AlienVault OTX. \033[0m')
287 |                     from theHarvester.discovery import otxsearch
288 |                     try:
289 |                         otxsearch_search = otxsearch.SearchOtx(word)
290 |                         store(otxsearch_search, engineitem, store_host=True, store_ip=True)
291 |                     except Exception as e:
292 |                         print(e)
293 | 
294 |                 elif engineitem == 'securityTrails':
295 |                     print('\033[94m[*] Searching SecurityTrails. \033[0m')
296 |                     from theHarvester.discovery import securitytrailssearch
297 |                     try:
298 |                         securitytrails_search = securitytrailssearch.SearchSecuritytrail(word)
299 |                         store(securitytrails_search, engineitem, store_host=True, store_ip=True)
300 |                     except Exception as e:
301 |                         if isinstance(e, MissingKey):
302 |                             print(e)
303 |                         else:
304 |                             pass
305 | 
306 |                 elif engineitem == 'suip':
307 |                     print('\033[94m[*] Searching Suip. This module can take 10+ mins to run but it is worth it.\033[0m')
308 |                     from theHarvester.discovery import suip
309 |                     try:
310 |                         suip_search = suip.SearchSuip(word)
311 |                         store(suip_search, engineitem, store_host=True)
312 |                     except Exception as e:
313 |                         print(e)
314 | 
315 |                 # elif engineitem == 'spyse':
316 |                 #     print('\033[94m[*] Searching Spyse. \033[0m')
317 |                 #     from theHarvester.discovery import spyse
318 |                 #     try:
319 |                 #         spysesearch_search = spyse.SearchSpyse(word)
320 |                 #         spysesearch_search.process()
321 |                 #         hosts = filter(spysesearch_search.get_hostnames())
322 |                 #         all_hosts.extend(list(hosts))
323 |                 #         # ips = filter(spysesearch_search.get_ips())
324 |                 #         # all_ip.extend(list(ips))
325 |                 #         all_hosts.extend(hosts)
326 |                 #         db = stash.stash_manager()
327 |                 #         db.store_all(word, all_hosts, 'host', 'spyse')
328 |                 #         # db.store_all(word, all_ip, 'ip', 'spyse')
329 |                 #     except Exception as e:
330 |                 #         print(e)
331 | 
332 |                 elif engineitem == 'threatcrowd':
333 |                     print('\033[94m[*] Searching Threatcrowd. \033[0m')
334 |                     from theHarvester.discovery import threatcrowd
335 |                     try:
336 |                         threatcrowd_search = threatcrowd.SearchThreatcrowd(word)
337 |                         store(threatcrowd_search, engineitem, store_host=True)
338 |                     except Exception as e:
339 |                         print(e)
340 | 
341 |                 elif engineitem == 'trello':
342 |                     print('\033[94m[*] Searching Trello. \033[0m')
343 |                     from theHarvester.discovery import trello
344 |                     # Import locally or won't work.
345 |                     trello_search = trello.SearchTrello(word)
346 |                     store(trello_search, engineitem, store_results=True)
347 | 
348 |                 elif engineitem == 'twitter':
349 |                     print('\033[94m[*] Searching Twitter usernames using Google. \033[0m')
350 |                     from theHarvester.discovery import twittersearch
351 |                     twitter_search = twittersearch.SearchTwitter(word, limit)
352 |                     store(twitter_search, engineitem, store_people=True)
353 | 
354 |                 elif engineitem == 'virustotal':
355 |                     print('\033[94m[*] Searching VirusTotal. \033[0m')
356 |                     from theHarvester.discovery import virustotal
357 |                     virustotal_search = virustotal.SearchVirustotal(word)
358 |                     store(virustotal_search, engineitem, store_host=True)
359 | 
360 |                 elif engineitem == 'yahoo':
361 |                     print('\033[94m[*] Searching Yahoo. \033[0m')
362 |                     from theHarvester.discovery import yahoosearch
363 |                     yahoo_search = yahoosearch.SearchYahoo(word, limit)
364 |                     store(yahoo_search, engineitem, store_host=True, store_emails=True)
365 |         else:
366 |             print('\033[93m[!] Invalid source.\n\n \033[0m')
367 |             sys.exit(1)
368 | 
369 |     # Sanity check to see if all_emails and all_hosts are defined.
370 |     try:
371 |         all_emails
372 |     except NameError:
373 |         print('\n\n\033[93m[!] No emails found because all_emails is not defined.\n\n \033[0m')
374 |         sys.exit(1)
375 |     try:
376 |         all_hosts
377 |     except NameError:
378 |         print('\n\n\033[93m[!] No hosts found because all_hosts is not defined.\n\n \033[0m')
379 |         sys.exit(1)
380 | 
381 |     # Results
382 |     if len(all_ip) == 0:
383 |         print('\n[*] No IPs found.')
384 |     else:
385 |         print('\n[*] IPs found: ' + str(len(all_ip)))
386 |         print('-------------------')
387 |         # use netaddr as the list may contain ipv4 and ipv6 addresses
388 |         ip_list = sorted([netaddr.IPAddress(ip.strip()) for ip in set(all_ip)])
389 |         print('\n'.join(map(str, ip_list)))
390 | 
391 |     if len(all_emails) == 0:
392 |         print('\n[*] No emails found.')
393 |     else:
394 |         print('\n[*] Emails found: ' + str(len(all_emails)))
395 |         print('----------------------')
396 |         print(('\n'.join(sorted(list(set(all_emails))))))
397 | 
398 |     if len(all_hosts) == 0:
399 |         print('\n[*] No hosts found.\n\n')
400 |     else:
401 |         print('\n[*] Hosts found: ' + str(len(all_hosts)))
402 |         print('---------------------')
403 |         all_hosts = sorted(list(set(all_hosts)))
404 |         full_host = hostchecker.Checker(all_hosts)
405 |         full, ips = asyncio.run(full_host.check())
406 |         db = stash.StashManager()
407 |         for host in full:
408 |             host = str(host)
409 |             print(host)
410 |         host_ip = [netaddr_ip.format() for netaddr_ip in sorted([netaddr.IPAddress(ip) for ip in ips])]
411 |         db.store_all(word, host_ip, 'ip', 'DNS-resolver')
412 |     length_urls = len(all_urls)
413 |     if length_urls == 0:
414 |         if len(engines) >= 1 and 'trello' in engines:
415 |             print('\n[*] No Trello URLs found.')
416 |     else:
417 |         total = length_urls
418 |         print('\n[*] Trello URLs found: ' + str(total))
419 |         print('--------------------')
420 |         for url in sorted(all_urls):
421 |             print(url)
422 | 
423 |     # DNS brute force
424 |     # dnsres = []
425 |     if dnsbrute is True:
426 |         print('\n[*] Starting DNS brute force.')
427 |         a = dnssearch.DnsForce(word, dnsserver, verbose=True)
428 |         a.process()
429 |         # print('\n[*] Hosts found after DNS brute force:')
430 |         # for y in res:
431 |         # print('-------------------------------------')
432 |         #    print(y)
433 |         #   dnsres.append(y.split(':')[0])
434 |         #    if y not in full:
435 |         #        full.append(y)
436 |         # db = stash.stash_manager()
437 |         # db.store_all(word, dnsres, 'host', 'dns_bruteforce')
438 | 
439 |     # Port scanning
440 |     if ports_scanning is True:
441 |         print('\n\n[*] Scanning ports (active).\n')
442 |         for x in full:
443 |             host = x.split(':')[1]
444 |             domain = x.split(':')[0]
445 |             if host != 'empty':
446 |                 print(('[*] Scanning ' + host))
447 |                 ports = [21, 22, 80, 443, 8080]
448 |                 try:
449 |                     scan = port_scanner.PortScan(host, ports)
450 |                     openports = scan.process()
451 |                     if len(openports) > 1:
452 |                         print(('\t[*] Detected open ports: ' + ','.join(str(e) for e in openports)))
453 |                     takeover_check = 'True'
454 |                     if takeover_check == 'True' and len(openports) > 0:
455 |                         search_take = takeover.TakeOver(domain)
456 |                         search_take.process()
457 |                 except Exception as e:
458 |                     print(e)
459 | 
460 |     # DNS reverse lookup
461 |     dnsrev = []
462 |     if dnslookup is True:
463 |         print('\n[*] Starting active queries.')
464 |         analyzed_ranges = []
465 |         for entry in host_ip:
466 |             print(entry)
467 |             ip = entry.split(':')[0]
468 |             ip_range = ip.split('.')
469 |             ip_range[3] = '0/24'
470 |             s = '.'
471 |             ip_range = s.join(ip_range)
472 |             if not analyzed_ranges.count(ip_range):
473 |                 print('[*] Performing reverse lookup in ' + ip_range)
474 |                 a = dnssearch.DnsReverse(ip_range, True)
475 |                 a.list()
476 |                 res = a.process()
477 |                 analyzed_ranges.append(ip_range)
478 |             else:
479 |                 continue
480 |             for entries in res:
481 |                 if entries.count(word):
482 |                     dnsrev.append(entries)
483 |                     if entries not in full:
484 |                         full.append(entries)
485 |         print('[*] Hosts found after reverse lookup (in target domain):')
486 |         print('--------------------------------------------------------')
487 |         for xh in dnsrev:
488 |             print(xh)
489 | 
490 |     # DNS TLD expansion
491 |     dnstldres = []
492 |     if dnstld is True:
493 |         print('[*] Starting DNS TLD expansion.')
494 |         a = dnssearch.DnsTld(word, dnsserver, verbose=True)
495 |         res = a.process()
496 |         print('\n[*] Hosts found after DNS TLD expansion:')
497 |         print('----------------------------------------')
498 |         for y in res:
499 |             print(y)
500 |             dnstldres.append(y)
501 |             if y not in full:
502 |                 full.append(y)
503 | 
504 |     # Virtual hosts search
505 |     if virtual == 'basic':
506 |         print('\n[*] Virtual hosts:')
507 |         print('------------------')
508 |         for l in host_ip:
509 |             basic_search = bingsearch.SearchBing(l, limit, start)
510 |             basic_search.process_vhost()
511 |             results = basic_search.get_allhostnames()
512 |             for result in results:
513 |                 result = re.sub(r'[[\<\/?]*[\w]*>]*', '', result)
514 |                 result = re.sub('<', '', result)
515 |                 result = re.sub('>', '', result)
516 |                 print((l + '\t' + result))
517 |                 vhost.append(l + ':' + result)
518 |                 full.append(l + ':' + result)
519 |         vhost = sorted(set(vhost))
520 |     else:
521 |         pass
522 | 
523 |     # Shodan
524 |     shodanres = []
525 |     if shodan is True:
526 |         import texttable
527 |         tab = texttable.Texttable()
528 |         header = ['IP address', 'Hostname', 'Org', 'Services:Ports', 'Technologies']
529 |         tab.header(header)
530 |         tab.set_cols_align(['c', 'c', 'c', 'c', 'c'])
531 |         tab.set_cols_valign(['m', 'm', 'm', 'm', 'm'])
532 |         tab.set_chars(['-', '|', '+', '#'])
533 |         tab.set_cols_width([15, 20, 15, 15, 18])
534 |         print('\033[94m[*] Searching Shodan. \033[0m')
535 |         try:
536 |             for ip in host_ip:
537 |                 print(('\tSearching for ' + ip))
538 |                 shodan = shodansearch.SearchShodan()
539 |                 rowdata = shodan.search_ip(ip)
540 |                 time.sleep(2)
541 |                 tab.add_row(rowdata)
542 |             printedtable = tab.draw()
543 |             print(printedtable)
544 |         except Exception as e:
545 |             print(f'\033[93m[!] An error occurred with Shodan: {e} \033[0m')
546 |     else:
547 |         pass
548 | 
549 |     # Here we need to add explosion mode.
550 |     # We have to take out the TLDs to do this.
551 |     if args.dns_tld is not False:
552 |         counter = 0
553 |         for word in vhost:
554 |             search = googlesearch.SearchGoogle(word, limit, counter)
555 |             search.process(google_dorking)
556 |             emails = search.get_emails()
557 |             hosts = search.get_hostnames()
558 |             print(emails)
559 |             print(hosts)
560 |     else:
561 |         pass
562 | 
563 |     # Reporting
564 |     if filename != "":
565 |         try:
566 |             print('\n[*] Reporting started.')
567 |             db = stash.StashManager()
568 |             scanboarddata = db.getscanboarddata()
569 |             latestscanresults = db.getlatestscanresults(word)
570 |             previousscanresults = db.getlatestscanresults(word, previousday=True)
571 |             latestscanchartdata = db.latestscanchartdata(word)
572 |             scanhistorydomain = db.getscanhistorydomain(word)
573 |             pluginscanstatistics = db.getpluginscanstatistics()
574 |             generator = statichtmlgenerator.HtmlGenerator(word)
575 |             HTMLcode = generator.beginhtml()
576 |             HTMLcode += generator.generatelatestscanresults(latestscanresults)
577 |             HTMLcode += generator.generatepreviousscanresults(previousscanresults)
578 |             graph = reportgraph.GraphGenerator(word)
579 |             HTMLcode += graph.drawlatestscangraph(word, latestscanchartdata)
580 |             HTMLcode += graph.drawscattergraphscanhistory(word, scanhistorydomain)
581 |             HTMLcode += generator.generatepluginscanstatistics(pluginscanstatistics)
582 |             HTMLcode += generator.generatedashboardcode(scanboarddata)
583 |             HTMLcode += '<p><span style="color: #000000;">Report generated on ' + str(
584 |                 datetime.datetime.now()) + '</span></p>'
585 |             HTMLcode += '''
586 |             </body>
587 |             </html>
588 |             '''
589 |             Html_file = open(filename, 'w')
590 |             Html_file.write(HTMLcode)
591 |             Html_file.close()
592 |             print('[*] Reporting finished.')
593 |             print('[*] Saving files.')
594 |         except Exception as e:
595 |             print(e)
596 |             print('\n\033[93m[!] An error occurred while creating the output file.\n\n \033[0m')
597 |             sys.exit(1)
598 | 
599 |         try:
600 |             filename = filename.split('.')[0] + '.xml'
601 |             file = open(filename, 'w')
602 |             file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
603 |             for x in all_emails:
604 |                 file.write('<email>' + x + '</email>')
605 |             for x in full:
606 |                 x = x.split(':')
607 |                 if len(x) == 2:
608 |                     file.write(
609 |                         '<host>' + '<ip>' + x[1] + '</ip><hostname>' + x[0] + '</hostname>' + '</host>')
610 |                 else:
611 |                     file.write('<host>' + x + '</host>')
612 |             for x in vhost:
613 |                 x = x.split(':')
614 |                 if len(x) == 2:
615 |                     file.write(
616 |                         '<vhost>' + '<ip>' + x[1] + '</ip><hostname>' + x[0] + '</hostname>' + '</vhost>')
617 |                 else:
618 |                     file.write('<vhost>' + x + '</vhost>')
619 |             if shodanres != []:
620 |                 shodanalysis = []
621 |                 for x in shodanres:
622 |                     res = x.split('SAPO')
623 |                     file.write('<shodan>')
624 |                     file.write('<host>' + res[0] + '</host>')
625 |                     file.write('<port>' + res[2] + '</port>')
626 |                     file.write('<banner><!--' + res[1] + '--></banner>')
627 |                     reg_server = re.compile('Server:.*')
628 |                     temp = reg_server.findall(res[1])
629 |                     if temp:
630 |                         shodanalysis.append(res[0] + ':' + temp[0])
631 |                     file.write('</shodan>')
632 |                 if shodanalysis:
633 |                     shodanalysis = sorted(set(shodanalysis))
634 |                     file.write('<servers>')
635 |                     for x in shodanalysis:
636 |                         file.write('<server>' + x + '</server>')
637 |                     file.write('</servers>')
638 | 
639 |             file.write('</theHarvester>')
640 |             file.flush()
641 |             file.close()
642 |             print('[*] Files saved.')
643 |         except Exception as er:
644 |             print(f'\033[93m[!] An error occurred while saving the XML file: {er} \033[0m')
645 |         print('\n\n')
646 |         sys.exit(0)
647 | 
648 | 
649 | def entry_point():
650 |     try:
651 |         start()
652 |     except KeyboardInterrupt:
653 |         print('\n\n\033[93m[!] ctrl+c detected from user, quitting.\n\n \033[0m')
654 |     except Exception as error_entry_point:
655 |         print(error_entry_point)
656 |         sys.exit(1)
657 | 
658 | 
659 | if __name__ == '__main__':
660 |     entry_point()
661 | 


--------------------------------------------------------------------------------