├── lib
├── theHarvester
│ ├── .gitignore
│ ├── lib
│ │ ├── __init__.py
│ │ ├── hostchecker.py
│ │ ├── htmlExport.py
│ │ └── markup.py
│ ├── discovery
│ │ ├── shodan
│ │ │ ├── __init__.py
│ │ │ ├── wps.py
│ │ │ └── api.py
│ │ ├── DNS
│ │ │ ├── Lib.py
│ │ │ ├── Type.py
│ │ │ ├── Opcode.py
│ │ │ ├── Class.py
│ │ │ ├── lazy.py
│ │ │ ├── __init__.py
│ │ │ ├── Status.py
│ │ │ ├── win32dns.py
│ │ │ └── Base.py
│ │ ├── __init__.py
│ │ ├── shodansearch.py
│ │ ├── pgpsearch.py
│ │ ├── googlesets.py
│ │ ├── linkedinsearch.py
│ │ ├── baidusearch.py
│ │ ├── yahoosearch.py
│ │ ├── dogpilesearch.py
│ │ ├── twittersearch.py
│ │ ├── googleplussearch.py
│ │ ├── asksearch.py
│ │ ├── jigsaw.py
│ │ ├── yandexsearch.py
│ │ ├── googlesearch.py
│ │ ├── exaleadsearch.py
│ │ ├── bingsearch.py
│ │ ├── googleCSE.py
│ │ ├── dnssearch-threads.py
│ │ └── dnssearch.py
│ ├── tests
│ │ └── myparser_test.py
│ ├── LICENSES
│ ├── changelog.txt
│ ├── README
│ ├── myparser.py
│ ├── COPYING
│ └── theHarvester.py
├── hostchecker.py
├── htmlExport.py
└── markup.py
├── README.md
├── .gitattributes
└── snoop.py
/lib/theHarvester/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.idea
3 |
--------------------------------------------------------------------------------
/lib/theHarvester/lib/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["markup", "graphs", "hostchecker"]
2 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodan/__init__.py:
--------------------------------------------------------------------------------
1 | from api import WebAPI
2 |
3 | __version__ = "0.5.0"
4 |
5 | __all__ = ['WebAPI']
6 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Lib.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/DomainSnooper/master/lib/theHarvester/discovery/DNS/Lib.py
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Type.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/DomainSnooper/master/lib/theHarvester/discovery/DNS/Type.py
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DomainSnooper
2 | Tool for checking a list of domains for the purpose of gathering emails and social media handles and checking for potential issues
3 |
4 | This tool uses Troy Hunt's HaveIBeenPwned API and TheHarvester by laramies. TheHarvester is included in this repo to make things simple. Learn more here: https://github.com/laramies/theHarvester
5 |
6 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["bingsearch",
2 | "googlesearch",
3 | "googleplussearch",
4 | "pgpsearch",
5 | "linkedinsearch",
6 | "exaleadsearch",
7 | "yandexsearch",
8 | "googlesets",
9 | "dnssearch",
10 | "shodansearch",
11 | "jigsaw",
12 | "twittersearch",
13 | "dogpilesearch",
14 | "baidusearch",
15 | "yahoosearch",
16 | "googleCSE"]
17 |
--------------------------------------------------------------------------------
/lib/theHarvester/tests/myparser_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # Unit tests for myparser.py
3 | #
4 | import myparser
5 |
6 | import unittest
7 |
8 | class TestMyParser(unittest.TestCase):
9 |
10 | def test_emails(self):
11 | word = 'domain.com'
12 | results = '***a@domain***banotherdomain.com***c@domain.com***d@sub.domain.com***'
13 | p = myparser.parser(results, word)
14 | emails = sorted(p.emails())
15 | self.assertEquals(emails, [ 'c@domain.com', 'd@sub.domain.com' ])
16 |
17 | if __name__ == '__main__':
18 | unittest.main()
19 |
--------------------------------------------------------------------------------
/lib/hostchecker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | """
4 | Created by laramies on 2008-08-21.
5 | """
6 |
7 | import sys
8 | import socket
9 |
10 |
11 | class Checker():
12 |
13 | def __init__(self, hosts):
14 | self.hosts = hosts
15 | self.realhosts = []
16 |
17 | def check(self):
18 | for x in self.hosts:
19 | try:
20 | res = socket.gethostbyname(x)
21 | self.realhosts.append(res + ":" + x)
22 | except Exception as e:
23 | pass
24 | return self.realhosts
25 |
--------------------------------------------------------------------------------
/lib/theHarvester/lib/hostchecker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | """
4 | Created by laramies on 2008-08-21.
5 | """
6 |
7 | import sys
8 | import socket
9 |
10 |
11 | class Checker():
12 |
13 | def __init__(self, hosts):
14 | self.hosts = hosts
15 | self.realhosts = []
16 |
17 | def check(self):
18 | for x in self.hosts:
19 | try:
20 | res = socket.gethostbyname(x)
21 | self.realhosts.append(res + ":" + x)
22 | except Exception as e:
23 | pass
24 | return self.realhosts
25 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodansearch.py:
--------------------------------------------------------------------------------
1 | from shodan import WebAPI
2 | import sys
3 |
4 |
5 | class search_shodan():
6 |
7 | def __init__(self, host):
8 | self.host = host
9 | self.key = "oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt"
10 | if self.key == "":
11 | print "You need an API key in order to use SHODAN database. You can get one here: http://www.shodanhq.com/"
12 | sys.exit()
13 | self.api = WebAPI(self.key)
14 |
15 | def run(self):
16 | try:
17 | host = self.api.host(self.host)
18 | return host['data']
19 | except:
20 | print "SHODAN empty reply or error in the call"
21 | return "error"
22 |
--------------------------------------------------------------------------------
/lib/theHarvester/LICENSES:
--------------------------------------------------------------------------------
1 | Released under the GPL v 2.0.
2 |
3 | If you did not recieve a copy of the GPL, try http://www.gnu.org/.
4 |
5 | Copyright 2011 Christian Martorella
6 |
7 | theHarvester is free software; you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation version 2 of the License.
10 |
11 | theHarvester is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU General Public License for more details.
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 |
17 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/pgpsearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 |
6 |
7 | class search_pgp:
8 |
9 | def __init__(self, word):
10 | self.word = word
11 | self.results = ""
12 | self.server = "pgp.rediris.es:11371"
13 | self.hostname = "pgp.rediris.es"
14 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 |
16 | def process(self):
17 | h = httplib.HTTP(self.server)
18 | h.putrequest('GET', "/pks/lookup?search=" + self.word + "&op=index")
19 | h.putheader('Host', self.hostname)
20 | h.putheader('User-agent', self.userAgent)
21 | h.endheaders()
22 | returncode, returnmsg, headers = h.getreply()
23 | self.results = h.getfile().read()
24 |
25 | def get_emails(self):
26 | rawres = myparser.parser(self.results, self.word)
27 | return rawres.emails()
28 |
29 | def get_hostnames(self):
30 | rawres = myparser.parser(self.results, self.word)
31 | return rawres.hostnames()
32 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Opcode.py:
--------------------------------------------------------------------------------
1 | """
2 | $Id: Opcode.py,v 1.6 2002/04/23 10:51:43 anthonybaxter Exp $
3 |
4 | This file is part of the pydns project.
5 | Homepage: http://pydns.sourceforge.net
6 |
7 | This code is covered by the standard Python License.
8 |
9 | Opcode values in message header. RFC 1035, 1996, 2136.
10 | """
11 |
12 |
13 | QUERY = 0
14 | IQUERY = 1
15 | STATUS = 2
16 | NOTIFY = 4
17 | UPDATE = 5
18 |
19 | # Construct reverse mapping dictionary
20 |
21 | _names = dir()
22 | opcodemap = {}
23 | for _name in _names:
24 | if _name[0] != '_':
25 | opcodemap[eval(_name)] = _name
26 |
27 |
28 | def opcodestr(opcode):
29 | if opcode in opcodemap:
30 | return opcodemap[opcode]
31 | else:
32 | return repr(opcode)
33 |
34 | #
35 | # $Log: Opcode.py,v $
36 | # Revision 1.6 2002/04/23 10:51:43 anthonybaxter
37 | # Added UPDATE, NOTIFY.
38 | #
39 | # Revision 1.5 2002/03/19 12:41:33 anthonybaxter
40 | # tabnannied and reindented everything. 4 space indent, no tabs.
41 | # yay.
42 | #
43 | # Revision 1.4 2002/03/19 12:26:13 anthonybaxter
44 | # death to leading tabs.
45 | #
46 | # Revision 1.3 2001/08/09 09:08:55 anthonybaxter
47 | # added identifying header to top of each file
48 | #
49 | # Revision 1.2 2001/07/19 06:57:07 anthony
50 | # cvs keywords added
51 | #
52 | #
53 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googlesets.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 | import time
7 |
8 |
9 | class search_google_labs:
10 |
11 | def __init__(self, list):
12 | self.results = ""
13 | self.totalresults = ""
14 | self.server = "labs.google.com"
15 | self.hostname = "labs.google.com"
16 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 | id = 0
18 | self.set = ""
19 | for x in list:
20 | id += 1
21 | if id == 1:
22 | self.set = self.set + "q" + str(id) + "=" + str(x)
23 | else:
24 | self.set = self.set + "&q" + str(id) + "=" + str(x)
25 |
26 | def do_search(self):
27 | h = httplib.HTTP(self.server)
28 | h.putrequest('GET', "/sets?hl=en&" + self.set)
29 | h.putheader('Host', self.hostname)
30 | h.putheader('User-agent', self.userAgent)
31 | h.endheaders()
32 | returncode, returnmsg, headers = h.getreply()
33 | self.results = h.getfile().read()
34 | self.totalresults += self.results
35 |
36 | def get_set(self):
37 | rawres = myparser.parser(self.totalresults, list)
38 | return rawres.set()
39 |
40 | def process(self):
41 | self.do_search()
42 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/linkedinsearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import requests
3 | import sys
4 | import myparser
5 | import re
6 |
7 |
8 | class search_linkedin:
9 |
10 | def __init__(self, word, limit):
11 | self.word = word.replace(' ', '%20')
12 | self.results = ""
13 | self.totalresults = ""
14 | self.server = "www.google.com"
15 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
16 | self.quantity = "100"
17 | self.limit = int(limit)
18 | self.counter = 0
19 |
20 | def do_search(self):
21 | try:
22 | urly="http://"+ self.server + "/search?num=100&start=" + str(self.counter) + "&hl=en&meta=&q=site%3Alinkedin.com/in%20" + self.word
23 | except Exception, e:
24 | print e
25 | try:
26 | r=requests.get(urly)
27 | except Exception,e:
28 | print e
29 | self.results = r.content
30 | self.totalresults += self.results
31 |
32 | def get_people(self):
33 | rawres = myparser.parser(self.totalresults, self.word)
34 | return rawres.people_linkedin()
35 |
36 | def process(self):
37 | while (self.counter < self.limit):
38 | self.do_search()
39 | self.counter += 100
40 | print "\tSearching " + str(self.counter) + " results.."
41 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/baidusearch.py:
--------------------------------------------------------------------------------
1 | import httplib
2 | import myparser
3 | import time
4 | import sys
5 |
6 |
7 | class search_baidu:
8 |
9 | def __init__(self, word, limit):
10 | self.word = word
11 | self.total_results = ""
12 | self.server = "www.baidu.com"
13 | self.hostname = "www.baidu.com"
14 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 | self.limit = limit
16 | self.counter = 0
17 |
18 | def do_search(self):
19 | h = httplib.HTTP(self.server)
20 |
21 | h.putrequest('GET', "/s?wd=%40" + self.word
22 | + "&pn=" + str(self.counter))
23 | h.putheader('Host', self.hostname)
24 | h.putheader('User-agent', self.userAgent)
25 | h.endheaders()
26 | returncode, returnmsg, headers = h.getreply()
27 |
28 | self.total_results += h.getfile().read()
29 |
30 | def process(self):
31 | while self.counter <= self.limit and self.counter <= 1000:
32 | self.do_search()
33 | time.sleep(1)
34 |
35 | print "\tSearching " + str(self.counter) + " results..."
36 | self.counter += 10
37 |
38 | def get_emails(self):
39 | rawres = myparser.parser(self.total_results, self.word)
40 | return rawres.emails()
41 |
42 | def get_hostnames(self):
43 | rawres = myparser.parser(self.total_results, self.word)
44 | return rawres.hostnames()
45 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/yahoosearch.py:
--------------------------------------------------------------------------------
1 | import httplib
2 | import myparser
3 | import time
4 | import sys
5 |
6 |
7 | class search_yahoo:
8 |
9 | def __init__(self, word, limit):
10 | self.word = word
11 | self.total_results = ""
12 | self.server = "search.yahoo.com"
13 | self.hostname = "search.yahoo.com"
14 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 | self.limit = limit
16 | self.counter = 0
17 |
18 | def do_search(self):
19 | h = httplib.HTTP(self.server)
20 |
21 | h.putrequest('GET', "/search?p=\"%40" + self.word
22 | + "\"&b=" + str(self.counter) + "&pz=10")
23 | h.putheader('Host', self.hostname)
24 | h.putheader('User-agent', self.userAgent)
25 | h.endheaders()
26 | returncode, returnmsg, headers = h.getreply()
27 |
28 | self.total_results += h.getfile().read()
29 |
30 | def process(self):
31 | while self.counter <= self.limit and self.counter <= 1000:
32 | self.do_search()
33 | time.sleep(1)
34 |
35 | print "\tSearching " + str(self.counter) + " results..."
36 | self.counter += 10
37 |
38 | def get_emails(self):
39 | rawres = myparser.parser(self.total_results, self.word)
40 | return rawres.emails()
41 |
42 | def get_hostnames(self):
43 | rawres = myparser.parser(self.total_results, self.word)
44 | return rawres.hostnames()
45 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/dogpilesearch.py:
--------------------------------------------------------------------------------
1 | import httplib
2 | import myparser
3 | import time
4 | import sys
5 |
6 |
7 | class search_dogpile:
8 |
9 | def __init__(self, word, limit):
10 | self.word = word
11 | self.total_results = ""
12 | self.server = "www.dogpile.com"
13 | self.hostname = "www.dogpile.com"
14 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 | self.limit = limit
16 | self.counter = 0
17 |
18 | def do_search(self):
19 | h = httplib.HTTP(self.server)
20 |
21 | # Dogpile is hardcoded to return 10 results
22 | h.putrequest('GET', "/search/web?qsi=" + str(self.counter)
23 | + "&q=\"%40" + self.word + "\"")
24 | h.putheader('Host', self.hostname)
25 | h.putheader('User-agent', self.userAgent)
26 | h.endheaders()
27 | returncode, returnmsg, headers = h.getreply()
28 |
29 | self.total_results += h.getfile().read()
30 |
31 | def process(self):
32 | while self.counter <= self.limit and self.counter <= 1000:
33 | self.do_search()
34 | time.sleep(1)
35 |
36 | print "\tSearching " + str(self.counter) + " results..."
37 | self.counter += 10
38 |
39 | def get_emails(self):
40 | rawres = myparser.parser(self.total_results, self.word)
41 | return rawres.emails()
42 |
43 | def get_hostnames(self):
44 | rawres = myparser.parser(self.total_results, self.word)
45 | return rawres.hostnames()
46 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/twittersearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import requests
3 | import sys
4 | import myparser
5 | import re
6 |
7 |
8 | class search_twitter:
9 |
10 | def __init__(self, word, limit):
11 | self.word = word.replace(' ', '%20')
12 | self.results = ""
13 | self.totalresults = ""
14 | self.server = "www.google.com"
15 | self.hostname = "www.google.com"
16 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100116 Firefox/3.7"
17 | self.quantity = "100"
18 | self.limit = int(limit)
19 | self.counter = 0
20 |
21 | def do_search(self):
22 | try:
23 | urly="https://"+ self.server + "/search?num=100&start=" + str(self.counter) + "&hl=en&meta=&q=site%3Atwitter.com%20intitle%3A%22on+Twitter%22%20" + self.word
24 | except Exception, e:
25 | print e
26 | headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0'}
27 | try:
28 | r=requests.get(urly,headers=headers)
29 | except Exception,e:
30 | print e
31 | self.results = r.content
32 | self.totalresults += self.results
33 |
34 | def get_people(self):
35 | rawres = myparser.parser(self.totalresults, self.word)
36 | return rawres.people_twitter()
37 |
38 | def process(self):
39 | while (self.counter < self.limit):
40 | self.do_search()
41 | self.counter += 100
42 | print "\tSearching " + str(self.counter) + " results.."
43 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Class.py:
--------------------------------------------------------------------------------
1 | """
2 | $Id: Class.py,v 1.6 2002/04/23 12:52:19 anthonybaxter Exp $
3 |
4 | This file is part of the pydns project.
5 | Homepage: http://pydns.sourceforge.net
6 |
7 | This code is covered by the standard Python License.
8 |
9 | CLASS values (section 3.2.4)
10 | """
11 |
12 |
13 | IN = 1 # the Internet
14 | CS = 2 # the CSNET class (Obsolete - used only for examples in
15 | # some obsolete RFCs)
16 | CH = 3 # the CHAOS class. When someone shows me python running on
17 | # a Symbolics Lisp machine, I'll look at implementing this.
18 | HS = 4 # Hesiod [Dyer 87]
19 |
20 | # QCLASS values (section 3.2.5)
21 |
22 | ANY = 255 # any class
23 |
24 |
25 | # Construct reverse mapping dictionary
26 |
27 | _names = dir()
28 | classmap = {}
29 | for _name in _names:
30 | if _name[0] != '_':
31 | classmap[eval(_name)] = _name
32 |
33 |
34 | def classstr(klass):
35 | if klass in classmap:
36 | return classmap[klass]
37 | else:
38 | return repr(klass)
39 |
40 | #
41 | # $Log: Class.py,v $
42 | # Revision 1.6 2002/04/23 12:52:19 anthonybaxter
43 | # cleanup whitespace.
44 | #
45 | # Revision 1.5 2002/03/19 12:41:33 anthonybaxter
46 | # tabnannied and reindented everything. 4 space indent, no tabs.
47 | # yay.
48 | #
49 | # Revision 1.4 2002/03/19 12:26:13 anthonybaxter
50 | # death to leading tabs.
51 | #
52 | # Revision 1.3 2001/08/09 09:08:55 anthonybaxter
53 | # added identifying header to top of each file
54 | #
55 | # Revision 1.2 2001/07/19 06:57:07 anthony
56 | # cvs keywords added
57 | #
58 | #
59 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googleplussearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import requests
3 | import sys
4 | import myparser
5 | import re
6 |
7 |
8 | class search_googleplus:
9 |
10 | def __init__(self, word, limit):
11 | self.word = word.replace(' ', '%20')
12 | self.results = ""
13 | self.totalresults = ""
14 | self.server = "www.google.com"
15 | self.hostname = "www.google.com"
16 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 | self.quantity = "100"
18 | self.limit = int(limit)
19 | self.counter = 0
20 |
21 | def do_search(self):
22 | try:
23 | urly="https://" + self.server + "/search?num=100&start=" + str(self.counter) + "&hl=en&meta=&q=site%3Aplus.google.com%20intext%3A%22Works%20at%22%20" + self.word+ "%20-inurl%3Aphotos%20-inurl%3Aabout%20-inurl%3Aposts%20-inurl%3Aplusones"
24 | except Exception, e:
25 | print e
26 | try:
27 | headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0'}
28 | r=requests.get(urly,headers=headers)
29 | except Exception,e:
30 | print e
31 | self.results = r.content
32 | self.totalresults += self.results
33 |
34 | def get_people(self):
35 | rawres = myparser.parser(self.totalresults, self.word)
36 | return rawres.people_googleplus()
37 |
38 | def process(self):
39 | while (self.counter < self.limit):
40 | self.do_search()
41 | self.counter += 100
42 | print "\tSearching " + str(self.counter) + " results.."
43 |
--------------------------------------------------------------------------------
/lib/theHarvester/changelog.txt:
--------------------------------------------------------------------------------
1 |
2 | Changelog in 2.6:
3 | -----------------
4 | usage() improvement, CameronNemo.
5 | Added Yahoo and Baidu search engines. Thanks to Tatanus
6 | Added check for the existence of Requests library.
7 | Fixed email regex to provide cleaner results. Thanks to Peter McAlpine
8 |
9 | Changelog in 2.5:
10 | -----------------
11 |
12 |
13 | Changelog in 2.4:
14 | ------------------
15 | -Fixed Linkedin Parser
16 | -Fixed 123people
17 | -Added Dogpile Search engine (Marcus)
18 | -PEP8 compliant (Mario)
19 | -Fixed XML export (Marcus)
20 | -Expanded TLD list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt (Marcus)
21 | -DNS Bruteforce fixed (Tomas)
22 | -Added Google Custom Search Support - Need API Key to use it.
23 |
24 |
25 |
26 | Changelog in 2.3:
27 | --------------
28 | -Fixed duplicates
29 |
30 | Changelog in 2.2:
31 | ----------------
32 | -Added Jigsaw (www.jigsaw.com)
33 | -Added 123People (www.123people.com)
34 | -Added limit to google searches as the maximum results we can obtain is 1000
35 | -Removed SET, as service was discontinued by Google
36 | -Fixed parser to remove wrong results like emails starting with @
37 |
38 |
39 | Changelog in 2.1:
40 | ----------------
41 | -DNS Bruteforcer
42 | -DNS Reverse lookups
43 | -DNS TDL Expansion
44 | -SHODAN DB integration
45 | -HTML report
46 | -DNS server selection
47 |
48 |
49 | Changelog in 2.0:
50 | ----------------
51 | -Complete rewrite, more modular and easy to maintain
52 | -New sources (Exalead, Google-Profiles, Bing-Api)
53 | -Time delay between request, to prevent search engines from blocking our IP´s
54 | -You can start the search from the results page that you want, hence you can *resume* a search
55 | -Export to xml
56 | -All search engines harvesting
57 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/lazy.py:
--------------------------------------------------------------------------------
1 | # $Id: lazy.py,v 1.5.2.1 2007/05/22 20:23:38 customdesigned Exp $
2 | #
3 | # This file is part of the pydns project.
4 | # Homepage: http://pydns.sourceforge.net
5 | #
6 | # This code is covered by the standard Python License.
7 | #
8 |
9 | # routines for lazy people.
10 | import Base
11 | import string
12 |
13 |
14 | def revlookup(name):
15 | "convenience routine for doing a reverse lookup of an address"
16 | if Base.defaults['server'] == []:
17 | Base.DiscoverNameServers()
18 | a = string.split(name, '.')
19 | a.reverse()
20 | b = string.join(a, '.') + '.in-addr.arpa'
21 | # this will only return one of any records returned.
22 | return Base.DnsRequest(b, qtype='ptr').req().answers[0]['data']
23 |
24 |
25 | def mxlookup(name):
26 | """
27 | convenience routine for doing an MX lookup of a name. returns a
28 | sorted list of (preference, mail exchanger) records
29 | """
30 | if Base.defaults['server'] == []:
31 | Base.DiscoverNameServers()
32 | a = Base.DnsRequest(name, qtype='mx').req().answers
33 | l = sorted(map(lambda x: x['data'], a))
34 | return l
35 |
36 | #
37 | # $Log: lazy.py,v $
38 | # Revision 1.5.2.1 2007/05/22 20:23:38 customdesigned
39 | # Lazy call to DiscoverNameServers
40 | #
41 | # Revision 1.5 2002/05/06 06:14:38 anthonybaxter
42 | # reformat, move import to top of file.
43 | #
44 | # Revision 1.4 2002/03/19 12:41:33 anthonybaxter
45 | # tabnannied and reindented everything. 4 space indent, no tabs.
46 | # yay.
47 | #
48 | # Revision 1.3 2001/08/09 09:08:55 anthonybaxter
49 | # added identifying header to top of each file
50 | #
51 | # Revision 1.2 2001/07/19 06:57:07 anthony
52 | # cvs keywords added
53 | #
54 | #
55 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/asksearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 |
7 |
8 | class search_ask:
9 |
10 | def __init__(self, word, limit):
11 | self.word = word.replace(' ', '%20')
12 | self.results = ""
13 | self.totalresults = ""
14 | self.server = "www.ask.com"
15 | self.hostname = "www.ask.com"
16 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 | self.quantity = "100"
18 | self.limit = int(limit)
19 | self.counter = 0
20 |
21 | def do_search(self):
22 | h = httplib.HTTP(self.server)
23 | h.putrequest(
24 | 'GET',
25 | "/web?q=%40" +
26 | self.word +
27 | "&pu=100&page=" +
28 | self.counter)
29 | h.putheader('User-agent', self.userAgent)
30 | h.endheaders()
31 | returncode, returnmsg, headers = h.getreply()
32 | self.results = h.getfile().read()
33 | self.totalresults += self.results
34 |
35 | def check_next(self):
36 | renext = re.compile('> Next <')
37 | nextres = renext.findall(self.results)
38 | if nextres != []:
39 | nexty = "1"
40 | else:
41 | nexty = "0"
42 | return nexty
43 |
44 | def get_people(self):
45 | rawres = myparser.parser(self.totalresults, self.word)
46 | return rawres.people_jigsaw()
47 |
48 | def process(self):
49 | while (self.counter < self.limit):
50 | self.do_search()
51 | more = self.check_next()
52 | if more == "1":
53 | self.counter += 100
54 | else:
55 | break
56 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # $Id: __init__.py,v 1.8.2.2 2007/05/22 21:06:52 customdesigned Exp $
3 | #
4 | # This file is part of the pydns project.
5 | # Homepage: http://pydns.sourceforge.net
6 | #
7 | # This code is covered by the standard Python License.
8 | #
9 |
10 | # __init__.py for DNS class.
11 |
12 | __version__ = '2.3.1'
13 |
14 | import Type
15 | import Opcode
16 | import Status
17 | import Class
18 | from Base import DnsRequest, DNSError
19 | from Lib import DnsResult
20 | from Base import *
21 | from Lib import *
22 | Error = DNSError
23 | from lazy import *
24 | Request = DnsRequest
25 | Result = DnsResult
26 |
27 | #
28 | # $Log: __init__.py,v $
29 | # Revision 1.8.2.2 2007/05/22 21:06:52 customdesigned
30 | # utf-8 in __init__.py
31 | #
32 | # Revision 1.8.2.1 2007/05/22 20:39:20 customdesigned
33 | # Release 2.3.1
34 | #
35 | # Revision 1.8 2002/05/06 06:17:49 anthonybaxter
36 | # found that the old README file called itself release 2.2. So make
37 | # this one 2.3...
38 | #
39 | # Revision 1.7 2002/05/06 06:16:15 anthonybaxter
40 | # make some sort of reasonable version string. releasewards ho!
41 | #
42 | # Revision 1.6 2002/03/19 13:05:02 anthonybaxter
43 | # converted to class based exceptions (there goes the python1.4 compatibility :)
44 | #
45 | # removed a quite gross use of 'eval()'.
46 | #
47 | # Revision 1.5 2002/03/19 12:41:33 anthonybaxter
48 | # tabnannied and reindented everything. 4 space indent, no tabs.
49 | # yay.
50 | #
51 | # Revision 1.4 2001/11/26 17:57:51 stroeder
52 | # Added __version__
53 | #
54 | # Revision 1.3 2001/08/09 09:08:55 anthonybaxter
55 | # added identifying header to top of each file
56 | #
57 | # Revision 1.2 2001/07/19 06:57:07 anthony
58 | # cvs keywords added
59 | #
60 | #
61 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/jigsaw.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 | # http://www.jigsaw.com/SearchAcrossCompanies.xhtml?opCode=refresh&rpage=4&mode=0&cnCountry=&order=0&orderby=0&cmName=accuvant&cnDead=false&cnExOwned=false&count=0&screenNameType=0&screenName=&omitScreenNameType=0&omitScreenName=&companyId=0&estimatedCount=277&rowsPerPage=50
7 |
8 |
9 | class search_jigsaw:
10 |
11 | def __init__(self, word, limit):
12 | self.word = word.replace(' ', '%20')
13 | self.results = ""
14 | self.totalresults = ""
15 | self.server = "www.jigsaw.com"
16 | self.hostname = "www.jigsaw.com"
17 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
18 | self.quantity = "100"
19 | self.limit = int(limit)
20 | self.counter = 0
21 |
22 | def do_search(self):
23 | h = httplib.HTTP(self.server)
24 | h.putrequest(
25 | 'GET',
26 | "/FreeTextSearch.xhtml?opCode=search&autoSuggested=True&freeText=" +
27 | self.word)
28 | h.putheader('User-agent', self.userAgent)
29 | h.endheaders()
30 | returncode, returnmsg, headers = h.getreply()
31 | self.results = h.getfile().read()
32 | self.totalresults += self.results
33 |
34 | def check_next(self):
35 | renext = re.compile('> Next <')
36 | nextres = renext.findall(self.results)
37 | if nextres != []:
38 | nexty = "1"
39 | else:
40 | nexty = "0"
41 | return nexty
42 |
43 | def get_people(self):
44 | rawres = myparser.parser(self.totalresults, self.word)
45 | return rawres.people_jigsaw()
46 |
47 | def process(self):
48 | while (self.counter < self.limit):
49 | self.do_search()
50 | more = self.check_next()
51 | if more == "1":
52 | self.counter += 100
53 | else:
54 | break
55 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodan/wps.py:
--------------------------------------------------------------------------------
1 | """
2 | WiFi Positioning System
3 |
4 | Wrappers around the SkyHook and Google Locations APIs to resolve
5 | wireless routers' MAC addresses (BSSID) to physical locations.
6 | """
7 | try:
8 | from json import dumps, loads
9 | except:
10 | from simplejson import dumps, loads
11 | from urllib2 import Request, urlopen
12 | from urllib import urlencode
13 |
14 |
15 | class Skyhook:
16 |
17 | """Not yet ready for production, use the GoogleLocation class instead."""
18 |
19 | def __init__(self, username='api', realm='shodan'):
20 | self.username = username
21 | self.realm = realm
22 | self.url = 'https://api.skyhookwireless.com/wps2/location'
23 |
24 | def locate(self, mac):
25 | # Remove the ':'
26 | mac = mac.replace(':', '')
27 | print mac
28 | data = """
29 |
30 |
31 |
32 | %s
33 | %s
34 |
35 |
36 |
37 | %s
38 | -50
39 |
40 | """ % (self.username, self.realm, mac)
41 | request = Request(
42 | url=self.url,
43 | data=data,
44 | headers={'Content-type': 'text/xml'})
45 | response = urlopen(request)
46 | result = response.read()
47 | return result
48 |
49 |
50 | class GoogleLocation:
51 |
52 | def __init__(self):
53 | self.url = 'http://www.google.com/loc/json'
54 |
55 | def locate(self, mac):
56 | data = {
57 | 'version': '1.1.0',
58 | 'request_address': True,
59 | 'wifi_towers': [{
60 | 'mac_address': mac,
61 | 'ssid': 'g',
62 | 'signal_strength': -72
63 | }]
64 | }
65 | response = urlopen(self.url, dumps(data))
66 | data = response.read()
67 | return loads(data)
68 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Status.py:
--------------------------------------------------------------------------------
1 | """
2 | $Id: Status.py,v 1.7 2002/04/23 12:52:19 anthonybaxter Exp $
3 |
4 | This file is part of the pydns project.
5 | Homepage: http://pydns.sourceforge.net
6 |
7 | This code is covered by the standard Python License.
8 |
9 | Status values in message header
10 | """
11 |
12 | NOERROR = 0 # No Error [RFC 1035]
13 | FORMERR = 1 # Format Error [RFC 1035]
14 | SERVFAIL = 2 # Server Failure [RFC 1035]
15 | NXDOMAIN = 3 # Non-Existent Domain [RFC 1035]
16 | NOTIMP = 4 # Not Implemented [RFC 1035]
17 | REFUSED = 5 # Query Refused [RFC 1035]
18 | YXDOMAIN = 6 # Name Exists when it should not [RFC 2136]
19 | YXRRSET = 7 # RR Set Exists when it should not [RFC 2136]
20 | NXRRSET = 8 # RR Set that should exist does not [RFC 2136]
21 | NOTAUTH = 9 # Server Not Authoritative for zone [RFC 2136]
22 | NOTZONE = 10 # Name not contained in zone [RFC 2136]
23 | BADVERS = 16 # Bad OPT Version [RFC 2671]
24 | BADSIG = 16 # TSIG Signature Failure [RFC 2845]
25 | BADKEY = 17 # Key not recognized [RFC 2845]
26 | BADTIME = 18 # Signature out of time window [RFC 2845]
27 | BADMODE = 19 # Bad TKEY Mode [RFC 2930]
28 | BADNAME = 20 # Duplicate key name [RFC 2930]
29 | BADALG = 21 # Algorithm not supported [RFC 2930]
30 |
31 | # Construct reverse mapping dictionary
32 |
33 | _names = dir()
34 | statusmap = {}
35 | for _name in _names:
36 | if _name[0] != '_':
37 | statusmap[eval(_name)] = _name
38 |
39 |
40 | def statusstr(status):
41 | if status in statusmap:
42 | return statusmap[status]
43 | else:
44 | return repr(status)
45 |
46 | #
47 | # $Log: Status.py,v $
48 | # Revision 1.7 2002/04/23 12:52:19 anthonybaxter
49 | # cleanup whitespace.
50 | #
51 | # Revision 1.6 2002/04/23 10:57:57 anthonybaxter
52 | # update to complete the list of response codes.
53 | #
54 | # Revision 1.5 2002/03/19 12:41:33 anthonybaxter
55 | # tabnannied and reindented everything. 4 space indent, no tabs.
56 | # yay.
57 | #
58 | # Revision 1.4 2002/03/19 12:26:13 anthonybaxter
59 | # death to leading tabs.
60 | #
61 | # Revision 1.3 2001/08/09 09:08:55 anthonybaxter
62 | # added identifying header to top of each file
63 | #
64 | # Revision 1.2 2001/07/19 06:57:07 anthony
65 | # cvs keywords added
66 | #
67 | #
68 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/yandexsearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 | import time
7 |
8 |
9 | class search_yandex:
10 |
11 | def __init__(self, word, limit, start):
12 | self.word = word
13 | self.results = ""
14 | self.totalresults = ""
15 | self.server = "yandex.com"
16 | self.hostname = "yandex.com"
17 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
18 | self.limit = limit
19 | self.counter = start
20 |
21 | def do_search(self):
22 | h = httplib.HTTP(self.server)
23 | h.putrequest('GET', "/search?text=%40" + self.word +
24 | "&numdoc=50&lr=" + str(self.counter))
25 | h.putheader('Host', self.hostname)
26 | h.putheader('User-agent', self.userAgent)
27 | h.endheaders()
28 | returncode, returnmsg, headers = h.getreply()
29 | self.results = h.getfile().read()
30 | self.totalresults += self.results
31 | print self.results
32 |
33 | def do_search_files(self, files): # TODO
34 | h = httplib.HTTP(self.server)
35 | h.putrequest('GET', "/search?text=%40" + self.word +
36 | "&numdoc=50&lr=" + str(self.counter))
37 | h.putheader('Host', self.hostname)
38 | h.putheader('User-agent', self.userAgent)
39 | h.endheaders()
40 | returncode, returnmsg, headers = h.getreply()
41 | self.results = h.getfile().read()
42 | self.totalresults += self.results
43 |
44 | def check_next(self):
45 | renext = re.compile('topNextUrl')
46 | nextres = renext.findall(self.results)
47 | if nextres != []:
48 | nexty = "1"
49 | print str(self.counter)
50 | else:
51 | nexty = "0"
52 | return nexty
53 |
54 | def get_emails(self):
55 | rawres = myparser.parser(self.totalresults, self.word)
56 | return rawres.emails()
57 |
58 | def get_hostnames(self):
59 | rawres = myparser.parser(self.totalresults, self.word)
60 | return rawres.hostnames()
61 |
62 | def get_files(self):
63 | rawres = myparser.parser(self.totalresults, self.word)
64 | return rawres.fileurls(self.files)
65 |
66 | def process(self):
67 | while self.counter <= self.limit:
68 | self.do_search()
69 | self.counter += 50
70 | print "Searching " + str(self.counter) + " results..."
71 |
72 | def process_files(self, files):
73 | while self.counter < self.limit:
74 | self.do_search_files(files)
75 | time.sleep(0.3)
76 | self.counter += 50
77 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googlesearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import sys
3 | import myparser
4 | import re
5 | import time
6 | import requests
7 |
8 |
9 | class search_google:
10 |
11 | def __init__(self, word, limit, start):
12 | self.word = word
13 | self.results = ""
14 | self.totalresults = ""
15 | self.server = "www.google.com"
16 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 | self.quantity = "100"
18 | self.limit = limit
19 | self.counter = start
20 |
21 | def do_search(self):
22 | try:
23 | urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=%40\"" + self.word + "\""
24 | except Exception, e:
25 | print e
26 | try:
27 | r=requests.get(urly)
28 | except Exception,e:
29 | print e
30 | self.results = r.content
31 | self.totalresults += self.results
32 |
33 |
34 | def do_search_profiles(self):
35 | try:
36 | urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20" + self.word + "\""
37 | except Exception, e:
38 | print e
39 | try:
40 | r=requests.get(urly)
41 | except Exception,e:
42 | print e
43 | self.results = r.content
44 |
45 | #'&hl=en&meta=&q=site:www.google.com%20intitle:"Google%20Profile"%20"Companies%20I%27ve%20worked%20for"%20"at%20' + self.word + '"')
46 | self.totalresults += self.results
47 |
48 | def get_emails(self):
49 | rawres = myparser.parser(self.totalresults, self.word)
50 | return rawres.emails()
51 |
52 | def get_hostnames(self):
53 | rawres = myparser.parser(self.totalresults, self.word)
54 | return rawres.hostnames()
55 |
56 | def get_files(self):
57 | rawres = myparser.parser(self.totalresults, self.word)
58 | return rawres.fileurls(self.files)
59 |
60 | def get_profiles(self):
61 | rawres = myparser.parser(self.totalresults, self.word)
62 | return rawres.profiles()
63 |
64 | def process(self):
65 | while self.counter <= self.limit and self.counter <= 1000:
66 | self.do_search()
67 | #more = self.check_next()
68 | time.sleep(1)
69 | print "\tSearching " + str(self.counter) + " results..."
70 | self.counter += 100
71 |
72 |
73 | def process_profiles(self):
74 | while self.counter < self.limit:
75 | self.do_search_profiles()
76 | time.sleep(0.3)
77 | self.counter += 100
78 | print "\tSearching " + str(self.counter) + " results..."
79 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/exaleadsearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 | import time
7 |
8 |
9 | class search_exalead:
10 |
11 | def __init__(self, word, limit, start):
12 | self.word = word
13 | self.files = "pdf"
14 | self.results = ""
15 | self.totalresults = ""
16 | self.server = "www.exalead.com"
17 | self.hostname = "www.exalead.com"
18 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/4.0"
19 | self.limit = limit
20 | self.counter = start
21 |
22 | def do_search(self):
23 | h = httplib.HTTP(self.server)
24 | h.putrequest('GET', "/search/web/results/?q=%40" + self.word +
25 | "&elements_per_page=50&start_index=" + str(self.counter))
26 | h.putheader('Host', self.hostname)
27 | h.putheader(
28 | 'Referer',
29 | "http://" +
30 | self.hostname +
31 | "/search/web/results/?q=%40" +
32 | self.word)
33 | h.putheader('User-agent', self.userAgent)
34 | h.endheaders()
35 | returncode, returnmsg, headers = h.getreply()
36 | self.results = h.getfile().read()
37 | self.totalresults += self.results
38 |
39 | def do_search_files(self, files):
40 | h = httplib.HTTP(self.server)
41 | h.putrequest(
42 | 'GET',
43 | "search/web/results/?q=" +
44 | self.word +
45 | "filetype:" +
46 | self.files +
47 | "&elements_per_page=50&start_index=" +
48 | self.counter)
49 | h.putheader('Host', self.hostname)
50 | h.putheader('User-agent', self.userAgent)
51 | h.endheaders()
52 | returncode, returnmsg, headers = h.getreply()
53 | self.results = h.getfile().read()
54 | self.totalresults += self.results
55 |
56 | def check_next(self):
57 | renext = re.compile('topNextUrl')
58 | nextres = renext.findall(self.results)
59 | if nextres != []:
60 | nexty = "1"
61 | print str(self.counter)
62 | else:
63 | nexty = "0"
64 | return nexty
65 |
66 | def get_emails(self):
67 | rawres = myparser.parser(self.totalresults, self.word)
68 | return rawres.emails()
69 |
70 | def get_hostnames(self):
71 | rawres = myparser.parser(self.totalresults, self.word)
72 | return rawres.hostnames()
73 |
74 | def get_files(self):
75 | rawres = myparser.parser(self.totalresults, self.word)
76 | return rawres.fileurls(self.files)
77 |
78 | def process(self):
79 | while self.counter <= self.limit:
80 | self.do_search()
81 | self.counter += 50
82 | print "\tSearching " + str(self.counter) + " results..."
83 |
84 | def process_files(self, files):
85 | while self.counter < self.limit:
86 | self.do_search_files(files)
87 | time.sleep(1)
88 | more = self.check_next()
89 | if more == "1":
90 | self.counter += 50
91 | else:
92 | break
93 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/bingsearch.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 | import time
7 |
8 |
9 | class search_bing:
10 |
11 | def __init__(self, word, limit, start):
12 | self.word = word.replace(' ', '%20')
13 | self.results = ""
14 | self.totalresults = ""
15 | self.server = "www.bing.com"
16 | self.apiserver = "api.search.live.net"
17 | self.hostname = "www.bing.com"
18 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
19 | self.quantity = "50"
20 | self.limit = int(limit)
21 | self.bingApi = ""
22 | self.counter = start
23 |
24 | def do_search(self):
25 | h = httplib.HTTP(self.server)
26 | h.putrequest('GET', "/search?q=%40" + self.word +
27 | "&count=50&first=" + str(self.counter))
28 | h.putheader('Host', self.hostname)
29 | h.putheader('Cookie', 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50')
30 | h.putheader('Accept-Language', 'en-us,en')
31 | h.putheader('User-agent', self.userAgent)
32 | h.endheaders()
33 | returncode, returnmsg, headers = h.getreply()
34 | self.results = h.getfile().read()
35 | self.totalresults += self.results
36 |
37 | def do_search_api(self):
38 | h = httplib.HTTP(self.apiserver)
39 | h.putrequest('GET', "/xml.aspx?Appid=" + self.bingApi + "&query=%40" +
40 | self.word + "&sources=web&web.count=40&web.offset=" + str(self.counter))
41 | h.putheader('Host', "api.search.live.net")
42 | h.putheader('User-agent', self.userAgent)
43 | h.endheaders()
44 | returncode, returnmsg, headers = h.getreply()
45 | self.results = h.getfile().read()
46 | self.totalresults += self.results
47 |
48 | def do_search_vhost(self):
49 | h = httplib.HTTP(self.server)
50 | h.putrequest('GET', "/search?q=ip:" + self.word +
51 | "&go=&count=50&FORM=QBHL&qs=n&first=" + str(self.counter))
52 | h.putheader('Host', self.hostname)
53 | h.putheader(
54 | 'Cookie', 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50')
55 | h.putheader('Accept-Language', 'en-us,en')
56 | h.putheader('User-agent', self.userAgent)
57 | h.endheaders()
58 | returncode, returnmsg, headers = h.getreply()
59 | self.results = h.getfile().read()
60 | self.totalresults += self.results
61 |
62 | def get_emails(self):
63 | rawres = myparser.parser(self.totalresults, self.word)
64 | return rawres.emails()
65 |
66 | def get_hostnames(self):
67 | rawres = myparser.parser(self.totalresults, self.word)
68 | return rawres.hostnames()
69 |
70 | def get_allhostnames(self):
71 | rawres = myparser.parser(self.totalresults, self.word)
72 | return rawres.hostnames_all()
73 |
74 | def process(self, api):
75 | if api == "yes":
76 | if self.bingApi == "":
77 | print "Please insert your API key in the discovery/bingsearch.py"
78 | sys.exit()
79 | while (self.counter < self.limit):
80 | if api == "yes":
81 | self.do_search_api()
82 | time.sleep(0.3)
83 | else:
84 | self.do_search()
85 | time.sleep(1)
86 | self.counter += 50
87 | print "\tSearching " + str(self.counter) + " results..."
88 |
89 | def process_vhost(self):
90 | # Maybe it is good to use other limit for this.
91 | while (self.counter < self.limit):
92 | self.do_search_vhost()
93 | self.counter += 50
94 |
--------------------------------------------------------------------------------
/lib/theHarvester/README:
--------------------------------------------------------------------------------
1 | *******************************************************************
2 | * *
3 | * | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *
4 | * | __| '_ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *
5 | * | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *
6 | * \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *
7 | * *
8 | * TheHarvester Ver. 2.6 *
9 | * Coded by Christian Martorella *
10 | * Edge-Security Research *
11 | * cmartorella@edge-security.com *
12 | *******************************************************************
13 |
14 | What is this?
15 | -------------
16 |
17 | theHarvester is a tool for gathering e-mail accounts, subdomain names, virtual
18 | hosts, open ports/ banners, and employee names from different public sources
19 | (search engines, pgp key servers).
20 |
21 | Is a really simple tool, but very effective for the early stages of a penetration
22 | test or just to know the visibility of your company in the Internet.
23 |
24 | The sources are:
25 |
26 | Passive:
27 | --------
28 | -google: google search engine - www.google.com
29 |
30 | -googleCSE: google custom search engine
31 |
32 | -google-profiles: google search engine, specific search for Google profiles
33 |
34 | -bing: microsoft search engine - www.bing.com
35 |
36 | -bingapi: microsoft search engine, through the API (you need to add your Key in
37 | the discovery/bingsearch.py file)
38 |
39 | -pgp: pgp key server - pgp.rediris.es
40 |
41 | -linkedin: google search engine, specific search for Linkedin users
42 |
43 |
44 | -vhost: Bing virtual hosts search
45 |
46 | -twitter: twitter accounts related to an specific domain (uses google search)
47 |
48 | -googleplus: users that works in target company (uses google search)
49 |
50 | -yahoo: Yahoo search engine
51 |
52 | -baidu: Baidu search engine
53 |
54 | -shodan: Shodan Computer search engine, will search for ports and banner of the
55 | discovered hosts (http://www.shodanhq.com/)
56 |
57 |
58 | Active:
59 | -------
60 | -DNS brute force: this plugin will run a dictionary brute force enumeration
61 | -DNS reverse lookup: reverse lookup of ip´s discovered in order to find hostnames
62 | -DNS TDL expansion: TLD dictionary brute force enumeration
63 |
64 |
65 | Modules that need API keys to work:
66 | ----------------------------------
67 | -googleCSE: You need to create a Google Custom Search engine(CSE), and add your
68 | Google API key and CSE ID in the plugin (discovery/googleCSE.py)
69 | -shodan: You need to provide your API key in discovery/shodansearch.py
70 |
71 |
72 | Dependencies:
73 | ------------
74 | -Requests library (http://docs.python-requests.org/en/latest/)
75 | `pip install requests`
76 |
77 |
78 | Changelog in 2.6:
79 | ------------------
80 | -Added Yahoo and Baidu search engines. Thanks to Tatanus
81 | -Added check for the existence of Requests library.
82 | -Fixed email regex to provide cleaner results. Thanks to Peter McAlpine
83 |
84 | Changelog in 2.5:
85 | -----------------
86 | -Replaced httplib by Requests http library (for Google related)
87 | -Fixed Google searches
88 |
89 |
90 | Comments? Bugs? Requests?
91 | ------------------------
92 | cmartorella@edge-security.com
93 |
94 | Updates:
95 | --------
96 | https://github.com/laramies/theHarvester
97 |
98 | Thanks:
99 | -------
100 | John Matherly - SHODAN project
101 | Lee Baird for suggestions and bugs reporting
102 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googleCSE.py:
--------------------------------------------------------------------------------
1 | import string
2 | import httplib
3 | import sys
4 | import myparser
5 | import re
6 | import time
7 |
8 |
9 | class search_googleCSE:
10 |
11 | def __init__(self, word, limit, start):
12 | self.word = word
13 | self.files = "pdf"
14 | self.results = ""
15 | self.totalresults = ""
16 | self.server = "www.googleapis.com"
17 | self.hostname = "www.googleapis.com"
18 | self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
19 | self.quantity = "10"
20 | self.limit = limit
21 | self.counter = 1
22 | self.api_key = ""
23 | self.cse_id = ""
24 | self.lowRange = start
25 | self.highRange = start+100
26 |
27 | def do_search(self):
28 | h = httplib.HTTPS(self.server)
29 | h.putrequest('GET', "/customsearch/v1?key=" + self.api_key +"&highRange=" + str(self.highRange) + "&lowRange=" + str(self.lowRange) + "&cx=" +self.cse_id +
30 | "&start=" + str(self.counter) + "&q=%40\"" + self.word + "\"")
31 | h.putheader('Host', self.server)
32 | h.putheader('User-agent', self.userAgent)
33 | h.endheaders()
34 | returncode, returnmsg, headers = h.getreply()
35 | self.results = h.getfile().read()
36 | self.totalresults += self.results
37 |
38 | def do_search_files(self):
39 | h = httplib.HTTPS(self.server)
40 | h.putrequest('GET', "/customsearch/v1?key=" + self.api_key +"&highRange=" + str(self.highRange) + "&lowRange=" + str(self.lowRange) + "&cx=" +self.cse_id +
41 | "&start=" + str(self.counter) + "&q=filetype:" + files +"%20site:" + self.word)
42 | h.putheader('Host', self.server)
43 | h.putheader('User-agent', self.userAgent)
44 | h.endheaders()
45 | returncode, returnmsg, headers = h.getreply()
46 | self.results = h.getfile().read()
47 | self.totalresults += self.results
48 |
49 |
50 | def check_next(self):
51 | renext = re.compile('> Next <')
52 | nextres = renext.findall(self.results)
53 | if nextres != []:
54 | nexty = "1"
55 | else:
56 | nexty = "0"
57 | return nexty
58 |
59 | def get_emails(self):
60 | rawres = myparser.parser(self.totalresults, self.word)
61 | return rawres.emails()
62 |
63 | def get_hostnames(self):
64 | rawres = myparser.parser(self.totalresults, self.word)
65 | return rawres.hostnames()
66 |
67 | def get_files(self):
68 | rawres = myparser.parser(self.totalresults, self.word)
69 | return rawres.fileurls(self.files)
70 |
71 |
72 | def process(self):
73 | tracker=self.counter + self.lowRange
74 | while tracker <= self.limit:
75 | self.do_search()
76 | #time.sleep(1)
77 | ESC=chr(27)
78 | sys.stdout.write(ESC + '[2K' + ESC+'[G')
79 | sys.stdout.write("\r\t" + "Searching " + str(self.counter+self.lowRange) + " results ..." )
80 | sys.stdout.flush()
81 | #print "\tSearching " + str(self.counter+self.lowRange) + " results...\t\t\t\t\t\r"
82 | if self.counter == 101:
83 | self.counter = 1
84 | self.lowRange +=100
85 | self.highRange +=100
86 | else:
87 | self.counter += 10
88 | tracker=self.counter + self.lowRange
89 |
90 | def store_results(self):
91 | filename = "debug_results.txt"
92 | file = open(filename, 'w')
93 | file.write(self.totalresults)
94 |
95 |
96 | def process_files(self, files):
97 | while self.counter <= self.limit:
98 | self.do_search_files(files)
99 | time.sleep(1)
100 | self.counter += 100
101 | print "\tSearching " + str(self.counter) + " results..."
102 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/dnssearch-threads.py:
--------------------------------------------------------------------------------
1 | import IPy
2 | import DNS
3 | import string
4 | import socket
5 | import sys
6 |
7 |
8 | class dns_reverse():
9 |
10 | def __init__(self, range, verbose=True):
11 | self.range = range
12 | self.iplist = ''
13 | self.results = []
14 | self.verbose = verbose
15 | try:
16 | DNS.ParseResolvConf("/etc/resolv.conf")
17 | nameserver = DNS.defaults['server'][0]
18 | except:
19 | print "Error in DNS resolvers"
20 | sys.exit()
21 |
22 | def run(self, host):
23 | a = string.split(host, '.')
24 | a.reverse()
25 | b = string.join(a, '.') + '.in-addr.arpa'
26 | nameserver = DNS.defaults['server'][0]
27 | if self.verbose:
28 | ESC = chr(27)
29 | sys.stdout.write(ESC + '[2K' + ESC + '[G')
30 | sys.stdout.write("\r" + host)
31 | sys.stdout.flush()
32 | try:
33 | name = DNS.Base.DnsRequest(b, qtype='ptr').req().answers[0]['data']
34 | return host + ":" + name
35 | except:
36 | pass
37 |
38 | def get_ip_list(self, ips):
39 | """Generates the list of ips to reverse"""
40 | try:
41 | list = IPy.IP(ips)
42 | except:
43 | print "Error in IP format, check the input and try again. (Eg. 192.168.1.0/24)"
44 | sys.exit()
45 | name = []
46 | for x in list:
47 | name.append(str(x))
48 | return name
49 |
50 | def list(self):
51 | self.iplist = self.get_ip_list(self.range)
52 | return self.iplist
53 |
54 | def process(self):
55 | for x in self.iplist:
56 | host = self.run(x)
57 | if host is not None:
58 | self.results.append(host)
59 | return self.results
60 |
61 |
62 | class dns_force():
63 |
64 | def __init__(self, domain, dnsserver, verbose=False):
65 | self.domain = domain
66 | self.server = dnsserver
67 | self.file = "dns-names.txt"
68 | self.subdo = False
69 | self.verbose = verbose
70 | try:
71 | f = open(self.file, "r")
72 | except:
73 | print "Error opening dns dictionary file"
74 | sys.exit()
75 | self.list = f.readlines()
76 |
77 | def getdns(self, domain):
78 | DNS.ParseResolvConf("/etc/resolv.conf")
79 | nameserver = DNS.defaults['server'][0]
80 | dom = domain
81 | if self.subdo == True:
82 | dom = domain.split(".")
83 | dom.pop(0)
84 | rootdom = ".".join(dom)
85 | else:
86 | rootdom = dom
87 | if self.server == False:
88 | r = DNS.Request(rootdom, qtype='SOA').req()
89 | primary, email, serial, refresh, retry, expire, minimum = r.answers[
90 | 0]['data']
91 | test = DNS.Request(rootdom, qtype='NS', server=primary, aa=1).req()
92 | if test.header['status'] != "NOERROR":
93 | print "Error"
94 | sys.exit()
95 | self.nameserver = test.answers[0]['data']
96 | return self.nameserver
97 |
98 | def run(self, host):
99 | self.nameserver = self.getdns(self.domain)
100 | hostname = str(host.split("\n")[0]) + "." + str(self.domain)
101 | # nameserver=DNS.defaults['server'][0]
102 | if self.verbose:
103 | ESC = chr(27)
104 | sys.stdout.write(ESC + '[2K' + ESC + '[G')
105 | sys.stdout.write("\r" + hostname)
106 | sys.stdout.flush()
107 | try:
108 | test = DNS.Request(
109 | hostname,
110 | qtype='a',
111 | server=self.nameserver).req(
112 | )
113 | hostip = test.answers[0]['data']
114 | return hostip + ":" + hostname
115 | except Exception as e:
116 | pass
117 |
118 | def process(self):
119 | results = []
120 | for x in self.list:
121 | host = self.run(x)
122 | if host is not None:
123 | results.append(host)
124 | return results
125 |
--------------------------------------------------------------------------------
/lib/htmlExport.py:
--------------------------------------------------------------------------------
1 | from lib import markup
2 | from lib import graphs
3 | import re
4 |
5 |
6 | class htmlExport():
7 |
8 | def __init__(self, users, hosts, vhosts, dnsres,
9 | dnsrev, file, domain, shodan, tldres):
10 | self.users = users
11 | self.hosts = hosts
12 | self.vhost = vhosts
13 | self.fname = file
14 | self.dnsres = dnsres
15 | self.dnsrev = dnsrev
16 | self.domain = domain
17 | self.shodan = shodan
18 | self.tldres = tldres
19 | self.style = ""
20 |
21 | def styler(self):
22 | a = """
82 | """
83 | self.style = a
84 |
85 | def writehtml(self):
86 | page = markup.page()
87 | # page.init (title="theHarvester
88 | # Results",css=('edge.css'),footer="Edge-security 2011")A
89 | page.html()
90 | self.styler()
91 | page.head(self.style)
92 | page.body()
93 | page.h1("theHarvester results")
94 | page.h2("for :" + self.domain)
95 | page.h3("Dashboard:")
96 | graph = graphs.BarGraph('vBar')
97 | graph.values = [len(
98 | self.users),
99 | len(self.hosts),
100 | len(self.vhost),
101 | len(self.tldres),
102 | len(self.shodan)]
103 | graph.labels = ['Emails', 'hosts', 'Vhost', 'TLD', 'Shodan']
104 | graph.showValues = 1
105 | page.body(graph.create())
106 | page.h3("E-mails names found:")
107 | if self.users != []:
108 | page.ul(class_="userslist")
109 | page.li(self.users, class_="useritem")
110 | page.ul.close()
111 | else:
112 | page.h2("No emails found")
113 | page.h3("Hosts found:")
114 | if self.hosts != []:
115 | page.ul(class_="softlist")
116 | page.li(self.hosts, class_="softitem")
117 | page.ul.close()
118 | else:
119 | page.h2("No hosts found")
120 | if self.tldres != []:
121 | page.h3("TLD domains found in TLD expansion:")
122 | page.ul(class_="tldlist")
123 | page.li(self.tldres, class_="tlditem")
124 | page.ul.close()
125 | if self.dnsres != []:
126 | page.h3("Hosts found in DNS brute force:")
127 | page.ul(class_="dnslist")
128 | page.li(self.dnsres, class_="dnsitem")
129 | page.ul.close()
130 | if self.dnsrev != []:
131 | page.h3("Hosts found with reverse lookup :")
132 | page.ul(class_="dnsrevlist")
133 | page.li(self.dnsrev, class_="dnsrevitem")
134 | page.ul.close()
135 | if self.vhost != []:
136 | page.h3("Virtual hosts found:")
137 | page.ul(class_="pathslist")
138 | page.li(self.vhost, class_="pathitem")
139 | page.ul.close()
140 | if self.shodan != []:
141 | shodanalysis = []
142 | page.h3("Shodan results:")
143 | for x in self.shodan:
144 | res = x.split("SAPO")
145 | page.h3(res[0])
146 | page.a("Port :" + res[2])
147 | page.pre(res[1])
148 | page.pre.close()
149 | ban = res[1]
150 | reg_server = re.compile('Server:.*')
151 | temp = reg_server.findall(res[1])
152 | if temp != []:
153 | shodanalysis.append(res[0] + ":" + temp[0])
154 | if shodanalysis != []:
155 | page.h3("Server technologies:")
156 | repeated = []
157 | for x in shodanalysis:
158 | if x not in repeated:
159 | page.pre(x)
160 | page.pre.close()
161 | repeated.append(x)
162 | page.body.close()
163 | page.html.close()
164 | file = open(self.fname, 'w')
165 | for x in page.content:
166 | try:
167 | file.write(x)
168 | except:
169 | print "Exception" + x # send to logs
170 | pass
171 | file.close
172 | return "ok"
173 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/win32dns.py:
--------------------------------------------------------------------------------
1 | """
2 | $Id: win32dns.py,v 1.3.2.1 2007/05/22 20:26:49 customdesigned Exp $
3 |
4 | Extract a list of TCP/IP name servers from the registry 0.1
5 | 0.1 Strobl 2001-07-19
6 | Usage:
7 | RegistryResolve() returns a list of ip numbers (dotted quads), by
8 | scouring the registry for addresses of name servers
9 |
10 | Tested on Windows NT4 Server SP6a, Windows 2000 Pro SP2 and
11 | Whistler Pro (XP) Build 2462 and Windows ME
12 | ... all having a different registry layout wrt name servers :-/
13 |
14 | Todo:
15 |
16 | Program doesn't check whether an interface is up or down
17 |
18 | (c) 2001 Copyright by Wolfgang Strobl ws@mystrobl.de,
19 | License analog to the current Python license
20 | """
21 |
22 | import string
23 | import re
24 | import _winreg
25 |
26 |
27 | def binipdisplay(s):
28 | "convert a binary array of ip adresses to a python list"
29 | if len(s) % 4 != 0:
30 | raise EnvironmentError # well ...
31 | ol = []
32 | for i in range(len(s) / 4):
33 | s1 = s[:4]
34 | s = s[4:]
35 | ip = []
36 | for j in s1:
37 | ip.append(str(ord(j)))
38 | ol.append(string.join(ip, '.'))
39 | return ol
40 |
41 |
42 | def stringdisplay(s):
43 | '''convert "d.d.d.d,d.d.d.d" to ["d.d.d.d","d.d.d.d"].
44 | also handle u'd.d.d.d d.d.d.d', as reporting on SF
45 | '''
46 | import re
47 | return map(str, re.split("[ ,]", s))
48 |
49 |
50 | def RegistryResolve():
51 | nameservers = []
52 | x = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
53 | try:
54 | y = _winreg.OpenKey(x,
55 | r"SYSTEM\CurrentControlSet\Services\Tcpip\Parameters")
56 | except EnvironmentError: # so it isn't NT/2000/XP
57 | # windows ME, perhaps?
58 | try: # for Windows ME
59 | y = _winreg.OpenKey(x,
60 | r"SYSTEM\CurrentControlSet\Services\VxD\MSTCP")
61 | nameserver, dummytype = _winreg.QueryValueEx(y, 'NameServer')
62 | if nameserver and not (nameserver in nameservers):
63 | nameservers.extend(stringdisplay(nameserver))
64 | except EnvironmentError:
65 | pass
66 | return nameservers # no idea
67 | try:
68 | nameserver = _winreg.QueryValueEx(y, "DhcpNameServer")[0].split()
69 | except:
70 | nameserver = _winreg.QueryValueEx(y, "NameServer")[0].split()
71 | if nameserver:
72 | nameservers = nameserver
73 | nameserver = _winreg.QueryValueEx(y, "NameServer")[0]
74 | _winreg.CloseKey(y)
75 | try: # for win2000
76 | y = _winreg.OpenKey(x,
77 | r"SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\DNSRegisteredAdapters")
78 | for i in range(1000):
79 | try:
80 | n = _winreg.EnumKey(y, i)
81 | z = _winreg.OpenKey(y, n)
82 | dnscount, dnscounttype = _winreg.QueryValueEx(z,
83 | 'DNSServerAddressCount')
84 | dnsvalues, dnsvaluestype = _winreg.QueryValueEx(z,
85 | 'DNSServerAddresses')
86 | nameservers.extend(binipdisplay(dnsvalues))
87 | _winreg.CloseKey(z)
88 | except EnvironmentError:
89 | break
90 | _winreg.CloseKey(y)
91 | except EnvironmentError:
92 | pass
93 | #
94 | try: # for whistler
95 | y = _winreg.OpenKey(x,
96 | r"SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\Interfaces")
97 | for i in range(1000):
98 | try:
99 | n = _winreg.EnumKey(y, i)
100 | z = _winreg.OpenKey(y, n)
101 | try:
102 | nameserver, dummytype = _winreg.QueryValueEx(
103 | z, 'NameServer')
104 | if nameserver and not (nameserver in nameservers):
105 | nameservers.extend(stringdisplay(nameserver))
106 | except EnvironmentError:
107 | pass
108 | _winreg.CloseKey(z)
109 | except EnvironmentError:
110 | break
111 | _winreg.CloseKey(y)
112 | except EnvironmentError:
113 | # print "Key Interfaces not found, just do nothing"
114 | pass
115 | #
116 | _winreg.CloseKey(x)
117 | return nameservers
118 |
119 | if __name__ == "__main__":
120 | print "Name servers:", RegistryResolve()
121 |
122 | #
123 | # $Log: win32dns.py,v $
124 | # Revision 1.3.2.1 2007/05/22 20:26:49 customdesigned
125 | # Fix win32 nameserver discovery.
126 | #
127 | # Revision 1.3 2002/05/06 06:15:31 anthonybaxter
128 | # apparently some versions of windows return servers as unicode
129 | # string with space sep, rather than strings with comma sep.
130 | # *sigh*
131 | #
132 | # Revision 1.2 2002/03/19 12:41:33 anthonybaxter
133 | # tabnannied and reindented everything. 4 space indent, no tabs.
134 | # yay.
135 | #
136 | # Revision 1.1 2001/08/09 09:22:28 anthonybaxter
137 | # added what I hope is win32 resolver lookup support. I'll need to try
138 | # and figure out how to get the CVS checkout onto my windows machine to
139 | # make sure it works (wow, doing something other than games on the
140 | # windows machine :)
141 | #
142 | # Code from Wolfgang.Strobl@gmd.de
143 | # win32dns.py from
144 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66260
145 | #
146 | # Really, ParseResolvConf() should be renamed "FindNameServers" or
147 | # some such.
148 | #
149 | #
150 |
--------------------------------------------------------------------------------
/lib/theHarvester/lib/htmlExport.py:
--------------------------------------------------------------------------------
1 | from lib import markup
2 | from lib import graphs
3 | import re
4 |
5 |
6 | class htmlExport():
7 |
8 | def __init__(self, users, hosts, vhosts, dnsres,
9 | dnsrev, file, domain, shodan, tldres):
10 | self.users = users
11 | self.hosts = hosts
12 | self.vhost = vhosts
13 | self.fname = file
14 | self.dnsres = dnsres
15 | self.dnsrev = dnsrev
16 | self.domain = domain
17 | self.shodan = shodan
18 | self.tldres = tldres
19 | self.style = ""
20 |
21 | def styler(self):
22 | a = """
82 | """
83 | self.style = a
84 |
85 | def writehtml(self):
86 | page = markup.page()
87 | # page.init (title="theHarvester
88 | # Results",css=('edge.css'),footer="Edge-security 2011")A
89 | page.html()
90 | self.styler()
91 | page.head(self.style)
92 | page.body()
93 | page.h1("theHarvester results")
94 | page.h2("for :" + self.domain)
95 | page.h3("Dashboard:")
96 | graph = graphs.BarGraph('vBar')
97 | graph.values = [len(
98 | self.users),
99 | len(self.hosts),
100 | len(self.vhost),
101 | len(self.tldres),
102 | len(self.shodan)]
103 | graph.labels = ['Emails', 'hosts', 'Vhost', 'TLD', 'Shodan']
104 | graph.showValues = 1
105 | page.body(graph.create())
106 | page.h3("E-mails names found:")
107 | if self.users != []:
108 | page.ul(class_="userslist")
109 | page.li(self.users, class_="useritem")
110 | page.ul.close()
111 | else:
112 | page.h2("No emails found")
113 | page.h3("Hosts found:")
114 | if self.hosts != []:
115 | page.ul(class_="softlist")
116 | page.li(self.hosts, class_="softitem")
117 | page.ul.close()
118 | else:
119 | page.h2("No hosts found")
120 | if self.tldres != []:
121 | page.h3("TLD domains found in TLD expansion:")
122 | page.ul(class_="tldlist")
123 | page.li(self.tldres, class_="tlditem")
124 | page.ul.close()
125 | if self.dnsres != []:
126 | page.h3("Hosts found in DNS brute force:")
127 | page.ul(class_="dnslist")
128 | page.li(self.dnsres, class_="dnsitem")
129 | page.ul.close()
130 | if self.dnsrev != []:
131 | page.h3("Hosts found with reverse lookup :")
132 | page.ul(class_="dnsrevlist")
133 | page.li(self.dnsrev, class_="dnsrevitem")
134 | page.ul.close()
135 | if self.vhost != []:
136 | page.h3("Virtual hosts found:")
137 | page.ul(class_="pathslist")
138 | page.li(self.vhost, class_="pathitem")
139 | page.ul.close()
140 | if self.shodan != []:
141 | shodanalysis = []
142 | page.h3("Shodan results:")
143 | for x in self.shodan:
144 | res = x.split("SAPO")
145 | page.h3(res[0])
146 | page.a("Port :" + res[2])
147 | page.pre(res[1])
148 | page.pre.close()
149 | ban = res[1]
150 | reg_server = re.compile('Server:.*')
151 | temp = reg_server.findall(res[1])
152 | if temp != []:
153 | shodanalysis.append(res[0] + ":" + temp[0])
154 | if shodanalysis != []:
155 | page.h3("Server technologies:")
156 | repeated = []
157 | for x in shodanalysis:
158 | if x not in repeated:
159 | page.pre(x)
160 | page.pre.close()
161 | repeated.append(x)
162 | page.body.close()
163 | page.html.close()
164 | file = open(self.fname, 'w')
165 | for x in page.content:
166 | try:
167 | file.write(x)
168 | except:
169 | print "Exception" + x # send to logs
170 | pass
171 | file.close
172 | return "ok"
173 |
--------------------------------------------------------------------------------
/snoop.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python2
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | Small tool to take in a list of domains and spit out emails and potential issues
6 | Work smarter, not harder
7 |
8 | Chris Maddalena
9 | """
10 |
11 | import sys
12 | import os
13 | #from lib import *
14 | import pwnedcheck
15 | import urllib2
16 |
17 | sys.path.append('lib/theharvester/')
18 | from theHarvester import *
19 |
20 | def main():
21 | # Clear the terminal window
22 | os.system('cls' if os.name == 'nt' else 'clear')
23 | # Main menu display
24 | try:
25 | domainList = sys.argv[1]
26 | except Exception as e:
27 | print "ERROR: You must supply only an input text file!"
28 | print "ERROR: %s" % e
29 |
30 | print "[+] Trying to read %s" % domainList
31 | try:
32 | with open(domainList, 'r') as domains:
33 | for domain in domains:
34 | print "[+] Checking %s" % domain.rstrip()
35 | harvest(domain)
36 | except Exception as e:
37 | print "[!] Could not open your file, %s" % domainList
38 | print "ERROR: %s" % e
39 |
40 | # Number of commands
41 | total = 2 # Tests
42 | harvesterDomains = 6 # Search engines used with theHarvester
43 | # Headers for use with urllib2
44 | user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
45 | headers = { 'User-Agent' : user_agent }
46 |
47 | def harvest(domain):
48 |
49 | domain = domain.rstrip()
50 | harvestLimit = 100
51 | harvestStart = 0
52 | # Create drectory for client reports and report
53 | if not os.path.exists("reports/%s" % domain):
54 | try:
55 | os.makedirs("reports/%s" % domain)
56 | except Exception as e:
57 | print "[!] Could not create reports directory!"
58 | print "ERROR: %s" % e
59 |
60 | file = "reports/%s/%s" % (domain, domain + ".txt")
61 |
62 | print "[+] Running The Harvester (1/%s)" % total
63 | # Search through most of Harvester's supported engines
64 | # No Baidu because it always seems to hang or take way too long
65 | print "[-] Harvesting Google (1/%s)" % harvesterDomains
66 | search = googlesearch.search_google(domain,harvestLimit,harvestStart)
67 | search.process()
68 | googleHarvest = search.get_emails()
69 | print "[-] Harvesting LinkedIn (2/%s)" % harvesterDomains
70 | search = linkedinsearch.search_linkedin(domain,harvestLimit)
71 | search.process()
72 | linkHarvest = search.get_people()
73 | print "[-] Harvesting Twitter (3/%s)" % harvesterDomains
74 | search = twittersearch.search_twitter(domain,harvestLimit)
75 | search.process()
76 | twitHarvest = search.get_people()
77 | print "[-] Harvesting Yahoo (4/%s)" % harvesterDomains
78 | search = yahoosearch.search_yahoo(domain,harvestLimit)
79 | search.process()
80 | yahooHarvest = search.get_emails()
81 | print "[-] Harvesting Bing (5/%s)" % harvesterDomains
82 | search = bingsearch.search_bing(domain,harvestLimit,#harvestStart)
83 | search.process('no')
84 | bingHarvest = search.get_emails()
85 | print "[-] Harvesting Jigsaw (6/%s)" % harvesterDomains
86 | search = jigsaw.search_jigsaw(domain,harvestLimit)
87 | search.process()
88 | jigsawHarvest = search.get_people()
89 |
90 | # Combine lists and strip out duplicate findings for unique lists
91 | totalEmails = googleHarvest #+ bingHarvest + yahooHarvest
92 | temp = []
93 | for email in totalEmails:
94 | email = email.lower()
95 | temp.append(email)
96 | unique = set(temp)
97 | uniqueEmails = list(unique)
98 | # Do the same with people, but keep Twitter handles separate
99 | totalPeople = linkHarvest + jigsawHarvest
100 | unique = set(totalPeople)
101 | uniquePeople = list(unique)
102 | # Process Twitter handles to kill duplicates
103 | handles = []
104 | for twit in twitHarvest:
105 | # Split handle from account description and strip rogue periods
106 | handle = twit.split(' ')[0]
107 | handle = handle.rstrip('.')
108 | handles.append(handle.lower())
109 | unique = set(handles)
110 | uniqueTwitter = list(unique)
111 |
112 | print "[+] Harvester found a total of %s emails and %s names across all engines" % (len(uniqueEmails),len(uniquePeople) + len(uniqueTwitter))
113 | print "[+] Running emails through HaveIBeenPwned and writing report (2/%s)" % total
114 | with open(file, 'w') as report:
115 | report.write("### Email & People Report for %s ###\n" % domain)
116 | report.write("---THEHARVESTER Results---\n")
117 | report.write("Emails checked with HaveIBeenPwned for breaches and pastes\n")
118 | for email in uniqueEmails:
119 | # Make sure we drop that @domain.com result Harvester always includes
120 | if email == '@' + domain:
121 | pass
122 | else:
123 | report.write('\n' + 'Email: ' + email + '\n')
124 | report.write('Pwned: ')
125 | # Check haveibeenpwned data breaches
126 | try:
127 | pwned = pwnedcheck.check(email)
128 | except:
129 | print "[!] Could not parse JSON. Moving on..."
130 | # If no results for breaches we return None
131 | if not pwned:
132 | report.write('None' + '\n')
133 | else:
134 | report.write('\n')
135 | for pwn in pwned:
136 | report.write('+ ' + pwn + '\n')
137 | # Check haveibeenpwned for pastes from Pastebin, Pastie, Slexy, Ghostbin, QuickLeak, JustPaste, and AdHocUrl
138 | url = "https://haveibeenpwned.com/api/v2/pasteaccount/" + email
139 | page = urllib2.Request(url, None, headers)
140 | # We must use Try because an empty result is like a 404 and causes an error
141 | try:
142 | source = urllib2.urlopen(page).read()
143 | report.write("Pastes: " + source + "\n")
144 | except:
145 | report.write("Pastes: No pastes\n")
146 |
147 | report.write("\n---PEOPLE Results---\n")
148 | report.write("Names and social media accounts (Twitter and LinkedIn)\n\n")
149 | for person in uniquePeople:
150 | report.write('Name: ' + person + '\n')
151 | for twit in uniqueTwitter:
152 | # Drop the lonely @ Harvester often includes
153 | if twit == '@':
154 | pass
155 | else:
156 | report.write('Twitter: ' + twit + '\n')
157 |
158 | report.close()
159 |
160 |
161 | if __name__ == "__main__":
162 | main()
163 |
--------------------------------------------------------------------------------
/lib/theHarvester/myparser.py:
--------------------------------------------------------------------------------
1 | import string
2 | import re
3 |
4 |
5 | class parser:
6 |
7 | def __init__(self, results, word):
8 | self.results = results
9 | self.word = word
10 | self.temp = []
11 |
12 | def genericClean(self):
13 | self.results = re.sub('', '', self.results)
14 | self.results = re.sub('', '', self.results)
15 | self.results = re.sub('', '', self.results)
16 | self.results = re.sub('', '', self.results)
17 | self.results = re.sub('%2f', ' ', self.results)
18 | self.results = re.sub('%3a', ' ', self.results)
19 | self.results = re.sub('', '', self.results)
20 | self.results = re.sub('', '', self.results)
21 |
22 | for e in ('>', ':', '=', '<', '/', '\\', ';', '&', '%3A', '%3D', '%3C'):
23 | self.results = string.replace(self.results, e, ' ')
24 |
25 | def urlClean(self):
26 | self.results = re.sub('', '', self.results)
27 | self.results = re.sub('', '', self.results)
28 | self.results = re.sub('%2f', ' ', self.results)
29 | self.results = re.sub('%3a', ' ', self.results)
30 |
31 | for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
32 | self.results = string.replace(self.results, e, ' ')
33 |
34 | def emails(self):
35 | self.genericClean()
36 | reg_emails = re.compile(
37 | '[a-zA-Z0-9.-_]*' +
38 | '@' +
39 | '(?:[a-zA-Z0-9.-]*\.)?' +
40 | self.word)
41 | self.temp = reg_emails.findall(self.results)
42 | emails = self.unique()
43 | return emails
44 |
45 | def fileurls(self, file):
46 | urls = []
47 | reg_urls = re.compile('', '', self.results)
59 | self.results = re.sub('', '', self.results)
60 | reg_people = re.compile('>[a-zA-Z0-9._ ]* - Google\+')
61 | #reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
62 | self.temp = reg_people.findall(self.results)
63 | resul = []
64 | for x in self.temp:
65 | y = string.replace(x, ' | LinkedIn', '')
66 | y = string.replace(y, ' profiles ', '')
67 | y = string.replace(y, 'LinkedIn', '')
68 | y = string.replace(y, '"', '')
69 | y = string.replace(y, '>', '')
70 | if y != " ":
71 | resul.append(y)
72 | return resul
73 |
74 |
75 |
76 | def people_twitter(self):
77 | reg_people = re.compile('(@[a-zA-Z0-9._ -]*)')
78 | #reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
79 | self.temp = reg_people.findall(self.results)
80 | users = self.unique()
81 | resul = []
82 | for x in users:
83 | y = string.replace(x, ' | LinkedIn', '')
84 | y = string.replace(y, ' profiles ', '')
85 | y = string.replace(y, 'LinkedIn', '')
86 | y = string.replace(y, '"', '')
87 | y = string.replace(y, '>', '')
88 | if y != " ":
89 | resul.append(y)
90 | return resul
91 |
92 | def people_linkedin(self):
93 | reg_people = re.compile('">[a-zA-Z0-9._ -]* \| LinkedIn')
94 | #reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
95 | self.temp = reg_people.findall(self.results)
96 | resul = []
97 | for x in self.temp:
98 | y = string.replace(x, ' | LinkedIn', '')
99 | y = string.replace(y, ' profiles ', '')
100 | y = string.replace(y, 'LinkedIn', '')
101 | y = string.replace(y, '"', '')
102 | y = string.replace(y, '>', '')
103 | if y != " ":
104 | resul.append(y)
105 | return resul
106 |
107 | def profiles(self):
108 | reg_people = re.compile('">[a-zA-Z0-9._ -]* - Google Profile')
109 | self.temp = reg_people.findall(self.results)
110 | resul = []
111 | for x in self.temp:
112 | y = string.replace(x, ' Google Profile', '')
113 | y = string.replace(y, '-', '')
114 | y = string.replace(y, '">', '')
115 | if y != " ":
116 | resul.append(y)
117 | return resul
118 |
119 | def people_jigsaw(self):
120 | res = []
121 | #reg_people = re.compile("'tblrow' title='[a-zA-Z0-9.-]*'>")
122 | reg_people = re.compile(
123 | "href=javascript:showContact\('[0-9]*'\)>[a-zA-Z0-9., ]*")
124 | self.temp = reg_people.findall(self.results)
125 | for x in self.temp:
126 | a = x.split('>')[1].replace("[a-zA-Z0-9]*')
139 | self.temp = reg_sets.findall(self.results)
140 | sets = []
141 | for x in self.temp:
142 | y = string.replace(x, '>', '')
143 | y = string.replace(y, '(.*?)')
149 | temp = reg_hosts.findall(self.results)
150 | for x in temp:
151 | if x.count(':'):
152 | res = x.split(':')[1].split('/')[2]
153 | else:
154 | res = x.split("/")[0]
155 | self.temp.append(res)
156 | hostnames = self.unique()
157 | return hostnames
158 |
159 | def unique(self):
160 | self.new = []
161 | for x in self.temp:
162 | if x not in self.new:
163 | self.new.append(x)
164 | return self.new
165 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodan/api.py:
--------------------------------------------------------------------------------
1 | try:
2 | from json import dumps, loads
3 | except:
4 | from simplejson import dumps, loads
5 | from urllib2 import urlopen
6 | from urllib import urlencode
7 |
8 | __all__ = ['WebAPI']
9 |
10 |
11 | class WebAPIError(Exception):
12 |
13 | def __init__(self, value):
14 | self.value = value
15 |
16 | def __str__(self):
17 | return self.value
18 |
19 |
20 | class WebAPI:
21 |
22 | """Wrapper around the SHODAN webservices API"""
23 |
24 | class DatalossDb:
25 |
26 | def __init__(self, parent):
27 | self.parent = parent
28 |
29 | def search(self, **kwargs):
30 | """Search the Dataloss DB archive.
31 |
32 | Arguments:
33 | name -- Name of the affected company/ organisation
34 |
35 | arrest -- whether the incident resulted in an arrest
36 | breaches -- the type of breach that occurred (Hack, MissingLaptop etc.)
37 | country -- country where the incident took place
38 | ext -- whether an external, third party was affected
39 | ext_names -- the name of the third party company that was affected
40 | lawsuit -- whether the incident resulted in a lawsuit
41 | records -- the number of records that were lost/ stolen
42 | recovered -- whether the affected items were recovered
43 | sub_types -- the sub-categorization of the affected company/ organization
44 | source -- whether the incident occurred from inside or outside the organization
45 | stocks -- stock symbol of the affected company
46 | types -- the basic type of organization (government, business, educational)
47 | uid -- unique ID for the incident
48 |
49 | Returns:
50 | A dictionary with 2 main items: matches (list) and total (int).
51 |
52 | """
53 | return self.parent._request('datalossdb/search', dict(**kwargs))
54 |
55 | class Exploits:
56 |
57 | def __init__(self, parent):
58 | self.parent = parent
59 |
60 | def search(self, query, sources=[],
61 | cve=None, osvdb=None, msb=None, bid=None):
62 | """Search the entire Shodan Exploits archive using the same query syntax
63 | as the website.
64 |
65 | Arguments:
66 | query -- exploit search query; same syntax as website
67 |
68 | Optional arguments:
69 | sources -- metasploit, cve, osvdb, exploitdb, or packetstorm
70 | cve -- CVE identifier (ex. 2010-0432)
71 | osvdb -- OSVDB identifier (ex. 11666)
72 | msb -- Microsoft Security Bulletin ID (ex. MS05-030)
73 | bid -- Bugtraq identifier (ex. 13951)
74 |
75 | """
76 | if sources:
77 | query += ' source:' + ','.join(sources)
78 | if cve:
79 | query += ' cve:%s' % (str(cve).strip())
80 | if osvdb:
81 | query += ' osvdb:%s' % (str(osvdb).strip())
82 | if msb:
83 | query += ' msb:%s' % (str(msb).strip())
84 | if bid:
85 | query += ' bid:%s' % (str(bid).strip())
86 | return self.parent._request('search_exploits', {'q': query})
87 |
88 | class ExploitDb:
89 |
90 | def __init__(self, parent):
91 | self.parent = parent
92 |
93 | def download(self, id):
94 | """Download the exploit code from the ExploitDB archive.
95 |
96 | Arguments:
97 | id -- ID of the ExploitDB entry
98 |
99 | Returns:
100 | A dictionary with the following fields:
101 | filename -- Name of the file
102 | content-type -- Mimetype
103 | data -- Contents of the file
104 |
105 | """
106 | return self.parent._request('exploitdb/download', {'id': id})
107 |
108 | def search(self, query, **kwargs):
109 | """Search the ExploitDB archive.
110 |
111 | Arguments:
112 | query -- Search terms
113 |
114 | Optional arguments:
115 | author -- Name of the exploit submitter
116 | platform -- Target platform (e.g. windows, linux, hardware etc.)
117 | port -- Service port number
118 | type -- Any, dos, local, papers, remote, shellcode and webapps
119 |
120 | Returns:
121 | A dictionary with 2 main items: matches (list) and total (int).
122 | Each item in 'matches' is a dictionary with the following elements:
123 |
124 | id
125 | author
126 | date
127 | description
128 | platform
129 | port
130 | type
131 |
132 | """
133 | return (
134 | self.parent._request(
135 | 'exploitdb/search', dict(q=query, **kwargs))
136 | )
137 |
138 | class Msf:
139 |
140 | def __init__(self, parent):
141 | self.parent = parent
142 |
143 | def download(self, id):
144 | """Download a metasploit module given the fullname (id) of it.
145 |
146 | Arguments:
147 | id -- fullname of the module (ex. auxiliary/admin/backupexec/dump)
148 |
149 | Returns:
150 | A dictionary with the following fields:
151 | filename -- Name of the file
152 | content-type -- Mimetype
153 | data -- File content
154 | """
155 | return self.parent._request('msf/download', {'id': id})
156 |
157 | def search(self, query, **kwargs):
158 | """Search for a Metasploit module.
159 | """
160 | return self.parent._request('msf/search', dict(q=query, **kwargs))
161 |
162 | def __init__(self, key):
163 | """Initializes the API object.
164 |
165 | Arguments:
166 | key -- your API key
167 |
168 | """
169 | self.api_key = key
170 | self.base_url = 'http://www.shodanhq.com/api/'
171 | self.dataloss = self.DatalossDb(self)
172 | self.exploits = self.Exploits(self)
173 | self.exploitdb = self.ExploitDb(self)
174 | self.msf = self.Msf(self)
175 |
176 | def _request(self, function, params):
177 | """General-purpose function to create web requests to SHODAN.
178 |
179 | Arguments:
180 | function -- name of the function you want to execute
181 | params -- dictionary of parameters for the function
182 |
183 | Returns
184 | A JSON string containing the function's results.
185 |
186 | """
187 | # Add the API key parameter automatically
188 | params['key'] = self.api_key
189 |
190 | # Send the request
191 | data = urlopen(
192 | self.base_url +
193 | function +
194 | '?' +
195 | urlencode(
196 | params)).read(
197 | )
198 |
199 | # Parse the text into JSON
200 | data = loads(data)
201 |
202 | # Raise an exception if an error occurred
203 | if data.get('error', None):
204 | raise WebAPIError(data['error'])
205 |
206 | # Return the data
207 | return data
208 |
209 | def fingerprint(self, banner):
210 | """Determine the software based on the banner.
211 |
212 | Arguments:
213 | banner - HTTP banner
214 |
215 | Returns:
216 | A list of software that matched the given banner.
217 | """
218 | return self._request('fingerprint', {'banner': banner})
219 |
220 | def host(self, ip):
221 | """Get all available information on an IP.
222 |
223 | Arguments:
224 | ip -- IP of the computer
225 |
226 | Returns:
227 | All available information SHODAN has on the given IP,
228 | subject to API key restrictions.
229 |
230 | """
231 | return self._request('host', {'ip': ip})
232 |
233 | def search(self, query):
234 | """Search the SHODAN database.
235 |
236 | Arguments:
237 | query -- search query; identical syntax to the website
238 |
239 | Returns:
240 | A dictionary with 3 main items: matches, countries and total.
241 | Visit the website for more detailed information.
242 |
243 | """
244 | return self._request('search', {'q': query})
245 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/dnssearch.py:
--------------------------------------------------------------------------------
1 | import IPy
2 | import DNS
3 | import string
4 | import socket
5 | import sys
6 |
7 |
8 | class dns_reverse():
9 |
10 | def __init__(self, range, verbose=True):
11 | self.range = range
12 | self.iplist = ''
13 | self.results = []
14 | self.verbose = verbose
15 | try:
16 | DNS.ParseResolvConf("/etc/resolv.conf")
17 | nameserver = DNS.defaults['server'][0]
18 | except:
19 | print "Error in DNS resolvers"
20 | sys.exit()
21 |
22 | def run(self, host):
23 | a = string.split(host, '.')
24 | a.reverse()
25 | b = string.join(a, '.') + '.in-addr.arpa'
26 | nameserver = DNS.defaults['server'][0]
27 | if self.verbose:
28 | ESC = chr(27)
29 | sys.stdout.write(ESC + '[2K' + ESC + '[G')
30 | sys.stdout.write("\r\t" + host)
31 | sys.stdout.flush()
32 | try:
33 | name = DNS.Base.DnsRequest(b, qtype='ptr').req().answers[0]['data']
34 | return host + ":" + name
35 | except:
36 | pass
37 |
38 | def get_ip_list(self, ips):
39 | """Generates the list of ips to reverse"""
40 | try:
41 | list = IPy.IP(ips)
42 | except:
43 | print "Error in IP format, check the input and try again. (Eg. 192.168.1.0/24)"
44 | sys.exit()
45 | name = []
46 | for x in list:
47 | name.append(str(x))
48 | return name
49 |
50 | def list(self):
51 | self.iplist = self.get_ip_list(self.range)
52 | return self.iplist
53 |
54 | def process(self):
55 | for x in self.iplist:
56 | host = self.run(x)
57 | if host is not None:
58 | self.results.append(host)
59 | return self.results
60 |
61 |
62 | class dns_force():
63 |
64 | def __init__(self, domain, dnsserver, verbose=False):
65 | self.domain = domain
66 | self.nameserver = dnsserver
67 | self.file = "dns-names.txt"
68 | self.subdo = False
69 | self.verbose = verbose
70 | try:
71 | f = open(self.file, "r")
72 | except:
73 | print "Error opening dns dictionary file"
74 | sys.exit()
75 | self.list = f.readlines()
76 |
77 | def getdns(self, domain):
78 | DNS.ParseResolvConf("/etc/resolv.conf")
79 | # nameserver=DNS.defaults['server'][0]
80 | dom = domain
81 | if self.subdo == True:
82 | dom = domain.split(".")
83 | dom.pop(0)
84 | rootdom = ".".join(dom)
85 | else:
86 | rootdom = dom
87 | if self.nameserver == "":
88 | try:
89 | r = DNS.Request(rootdom, qtype='SOA').req()
90 | primary, email, serial, refresh, retry, expire, minimum = r.answers[
91 | 0]['data']
92 | test = DNS.Request(
93 | rootdom,
94 | qtype='NS',
95 | server=primary,
96 | aa=1).req()
97 | except Exception as e:
98 | print e
99 |
100 | if test.header['status'] != "NOERROR":
101 | print "Error"
102 | sys.exit()
103 | self.nameserver = test.answers[0]['data']
104 | elif self.nameserver == "local":
105 | self.nameserver = nameserver
106 | return self.nameserver
107 |
108 | def run(self, host):
109 | if self.nameserver == "":
110 | self.nameserver = self.getdns(self.domain)
111 | print "Using DNS server: " + self.nameserver
112 |
113 | hostname = str(host.split("\n")[0]) + "." + str(self.domain)
114 | if self.verbose:
115 | ESC = chr(27)
116 | sys.stdout.write(ESC + '[2K' + ESC + '[G')
117 | sys.stdout.write("\r" + hostname)
118 | sys.stdout.flush()
119 | try:
120 | test = DNS.Request(
121 | hostname,
122 | qtype='a',
123 | server=self.nameserver).req(
124 | )
125 | hostip = test.answers[0]['data']
126 | return hostip + ":" + hostname
127 | except Exception as e:
128 | pass
129 |
130 | def process(self):
131 | results = []
132 | for x in self.list:
133 | host = self.run(x)
134 | if host is not None:
135 | results.append(host)
136 | return results
137 |
138 |
139 | class dns_tld():
140 |
141 | def __init__(self, domain, dnsserver, verbose=False):
142 | self.domain = domain
143 | self.nameserver = dnsserver
144 | self.subdo = False
145 | self.verbose = verbose
146 | # Updated from http://data.iana.org/TLD/tlds-alpha-by-domain.txt
147 | self.tlds = [
148 | "ac", "academy", "ad", "ae", "aero", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "arpa", "as",
149 | "asia", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bike", "biz", "bj",
150 | "bm", "bn", "bo", "br", "bs", "bt", "builders", "buzz", "bv", "bw", "by", "bz", "ca", "cab", "camera",
151 | "camp", "careers", "cat", "cc", "cd", "center", "ceo", "cf", "cg", "ch", "ci", "ck", "cl", "clothing",
152 | "cm", "cn", "co", "codes", "coffee", "com", "company", "computer", "construction", "contractors", "coop",
153 | "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "diamonds", "directory", "dj", "dk", "dm", "do",
154 | "domains", "dz", "ec", "edu", "education", "ee", "eg", "email", "enterprises", "equipment", "er", "es",
155 | "estate", "et", "eu", "farm", "fi", "fj", "fk", "florist", "fm", "fo", "fr", "ga", "gallery", "gb", "gd",
156 | "ge", "gf", "gg", "gh", "gi", "gl", "glass", "gm", "gn", "gov", "gp", "gq", "gr", "graphics", "gs", "gt",
157 | "gu", "guru", "gw", "gy", "hk", "hm", "hn", "holdings", "holiday", "house", "hr", "ht", "hu", "id", "ie",
158 | "il", "im", "immobilien", "in", "info", "institute", "int", "international", "io", "iq", "ir", "is", "it",
159 | "je", "jm", "jo", "jobs", "jp", "kaufen", "ke", "kg", "kh", "ki", "kitchen", "kiwi", "km", "kn", "kp",
160 | "kr", "kw", "ky", "kz", "la", "land", "lb", "lc", "li", "lighting", "limo", "lk", "lr", "ls", "lt", "lu",
161 | "lv", "ly", "ma", "management", "mc", "md", "me", "menu", "mg", "mh", "mil", "mk", "ml", "mm", "mn", "mo",
162 | "mobi", "mp", "mq", "mr", "ms", "mt", "mu", "museum", "mv", "mw", "mx", "my", "mz", "na", "name", "nc",
163 | "ne", "net", "nf", "ng", "ni", "ninja", "nl", "no", "np", "nr", "nu", "nz", "om", "onl", "org", "pa", "pe",
164 | "pf", "pg", "ph", "photography", "photos", "pk", "pl", "plumbing", "pm", "pn", "post", "pr", "pro", "ps",
165 | "pt", "pw", "py", "qa", "re", "recipes", "repair", "ro", "rs", "ru", "ruhr", "rw", "sa", "sb", "sc", "sd",
166 | "se", "sexy", "sg", "sh", "shoes", "si", "singles", "sj", "sk", "sl", "sm", "sn", "so", "solar",
167 | "solutions", "sr", "st", "su", "support", "sv", "sx", "sy", "systems", "sz", "tattoo", "tc", "td",
168 | "technology", "tel", "tf", "tg", "th", "tips", "tj", "tk", "tl", "tm", "tn", "to", "today", "tp", "tr",
169 | "training", "travel", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "uno", "us", "uy", "uz", "va", "vc",
170 | "ve", "ventures", "vg", "vi", "viajes", "vn", "voyage", "vu", "wang", "wf", "wien", "ws", "xxx", "ye",
171 | "yt", "za", "zm", "zw"]
172 |
173 | def getdns(self, domain):
174 | # DNS.ParseResolvConf("/etc/resolv.conf")
175 | # nameserver=DNS.defaults['server'][0]
176 | dom = domain
177 | if self.subdo == True:
178 | dom = domain.split(".")
179 | dom.pop(0)
180 | rootdom = ".".join(dom)
181 | else:
182 | rootdom = dom
183 | if self.nameserver == False:
184 | r = DNS.Request(rootdom, qtype='SOA').req()
185 | primary, email, serial, refresh, retry, expire, minimum = r.answers[
186 | 0]['data']
187 | test = DNS.Request(rootdom, qtype='NS', server=primary, aa=1).req()
188 | if test.header['status'] != "NOERROR":
189 | print "Error"
190 | sys.exit()
191 | self.nameserver = test.answers[0]['data']
192 | elif self.nameserver == "local":
193 | self.nameserver = nameserver
194 | return self.nameserver
195 |
196 | def run(self, tld):
197 | self.nameserver = self.getdns(self.domain)
198 | hostname = self.domain.split(".")[0] + "." + tld
199 | if self.verbose:
200 | ESC = chr(27)
201 | sys.stdout.write(ESC + '[2K' + ESC + '[G')
202 | sys.stdout.write("\r\tSearching for: " + hostname)
203 | sys.stdout.flush()
204 | try:
205 | test = DNS.Request(
206 | hostname,
207 | qtype='a',
208 | server=self.nameserver).req(
209 | )
210 | hostip = test.answers[0]['data']
211 | return hostip + ":" + hostname
212 | except Exception as e:
213 | pass
214 |
215 | def process(self):
216 | results = []
217 | for x in self.tlds:
218 | host = self.run(x)
219 | if host is not None:
220 | results.append(host)
221 | return results
222 |
--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Base.py:
--------------------------------------------------------------------------------
1 | """
2 | $Id: Base.py,v 1.12.2.4 2007/05/22 20:28:31 customdesigned Exp $
3 |
4 | This file is part of the pydns project.
5 | Homepage: http://pydns.sourceforge.net
6 |
7 | This code is covered by the standard Python License.
8 |
9 | Base functionality. Request and Response classes, that sort of thing.
10 | """
11 |
12 | import socket
13 | import string
14 | import types
15 | import time
16 | import Type
17 | import Class
18 | import Opcode
19 | import asyncore
20 |
21 |
22 | class DNSError(Exception):
23 | pass
24 |
25 | defaults = {'protocol': 'udp', 'port': 53, 'opcode': Opcode.QUERY,
26 | 'qtype': Type.A, 'rd': 1, 'timing': 1, 'timeout': 30}
27 |
28 | defaults['server'] = []
29 |
30 |
31 | def ParseResolvConf(resolv_path):
32 | global defaults
33 | try:
34 | lines = open(resolv_path).readlines()
35 | except:
36 | print "error in path" + resolv_path
37 | for line in lines:
38 | line = string.strip(line)
39 | if not line or line[0] == ';' or line[0] == '#':
40 | continue
41 | fields = string.split(line)
42 | if len(fields) < 2:
43 | continue
44 | if fields[0] == 'domain' and len(fields) > 1:
45 | defaults['domain'] = fields[1]
46 | if fields[0] == 'search':
47 | pass
48 | if fields[0] == 'options':
49 | pass
50 | if fields[0] == 'sortlist':
51 | pass
52 | if fields[0] == 'nameserver':
53 | defaults['server'].append(fields[1])
54 |
55 |
56 | def DiscoverNameServers():
57 | import sys
58 | if sys.platform in ('win32', 'nt'):
59 | import win32dns
60 | defaults['server'] = win32dns.RegistryResolve()
61 | else:
62 | return ParseResolvConf()
63 |
64 |
65 | class DnsRequest:
66 |
67 | """ high level Request object """
68 |
69 | def __init__(self, *name, **args):
70 | self.donefunc = None
71 | self.async = None
72 | self.defaults = {}
73 | self.argparse(name, args)
74 | self.defaults = self.args
75 |
76 | def argparse(self, name, args):
77 | if not name and 'name' in self.defaults:
78 | args['name'] = self.defaults['name']
79 | if isinstance(name, types.StringType):
80 | args['name'] = name
81 | else:
82 | if len(name) == 1:
83 | if name[0]:
84 | args['name'] = name[0]
85 | for i in defaults.keys():
86 | if i not in args:
87 | if i in self.defaults:
88 | args[i] = self.defaults[i]
89 | else:
90 | args[i] = defaults[i]
91 | if isinstance(args['server'], types.StringType):
92 | args['server'] = [args['server']]
93 | self.args = args
94 |
95 | def socketInit(self, a, b):
96 | self.s = socket.socket(a, b)
97 |
98 | def processUDPReply(self):
99 | import time
100 | import select
101 | if self.args['timeout'] > 0:
102 | r, w, e = select.select([self.s], [], [], self.args['timeout'])
103 | if not len(r):
104 | raise DNSError('Timeout')
105 | self.reply = self.s.recv(1024)
106 | self.time_finish = time.time()
107 | self.args['server'] = self.ns
108 | return self.processReply()
109 |
110 | def processTCPReply(self):
111 | import time
112 | import Lib
113 | self.f = self.s.makefile('r')
114 | header = self.f.read(2)
115 | if len(header) < 2:
116 | raise DNSError('EOF')
117 | count = Lib.unpack16bit(header)
118 | self.reply = self.f.read(count)
119 | if len(self.reply) != count:
120 | raise DNSError('incomplete reply')
121 | self.time_finish = time.time()
122 | self.args['server'] = self.ns
123 | return self.processReply()
124 |
125 | def processReply(self):
126 | import Lib
127 | self.args['elapsed'] = (self.time_finish - self.time_start) * 1000
128 | u = Lib.Munpacker(self.reply)
129 | r = Lib.DnsResult(u, self.args)
130 | r.args = self.args
131 | # self.args=None # mark this DnsRequest object as used.
132 | return r
133 | #### TODO TODO TODO ####
134 | # if protocol == 'tcp' and qtype == Type.AXFR:
135 | # while 1:
136 | # header = f.read(2)
137 | # if len(header) < 2:
138 | # print '========== EOF =========='
139 | # break
140 | # count = Lib.unpack16bit(header)
141 | # if not count:
142 | # print '========== ZERO COUNT =========='
143 | # break
144 | # print '========== NEXT =========='
145 | # reply = f.read(count)
146 | # if len(reply) != count:
147 | # print '*** Incomplete reply ***'
148 | # break
149 | # u = Lib.Munpacker(reply)
150 | # Lib.dumpM(u)
151 |
152 | def conn(self):
153 | self.s.connect((self.ns, self.port))
154 |
155 | def req(self, *name, **args):
156 | " needs a refactoring "
157 | import time
158 | import Lib
159 | self.argparse(name, args)
160 | # if not self.args:
161 | # raise DNSError,'reinitialize request before reuse'
162 | protocol = self.args['protocol']
163 | self.port = self.args['port']
164 | opcode = self.args['opcode']
165 | rd = self.args['rd']
166 | server = self.args['server']
167 | if isinstance(self.args['qtype'], types.StringType):
168 | try:
169 | qtype = getattr(Type, string.upper(self.args['qtype']))
170 | except AttributeError:
171 | raise DNSError('unknown query type')
172 | else:
173 | qtype = self.args['qtype']
174 | if 'name' not in self.args:
175 | print self.args
176 | raise DNSError('nothing to lookup')
177 | qname = self.args['name']
178 | if qtype == Type.AXFR:
179 | print 'Query type AXFR, protocol forced to TCP'
180 | protocol = 'tcp'
181 | # print 'QTYPE %d(%s)' % (qtype, Type.typestr(qtype))
182 | m = Lib.Mpacker()
183 | # jesus. keywords and default args would be good. TODO.
184 | m.addHeader(0,
185 | 0, opcode, 0, 0, rd, 0, 0, 0,
186 | 1, 0, 0, 0)
187 | m.addQuestion(qname, qtype, Class.IN)
188 | self.request = m.getbuf()
189 | try:
190 | if protocol == 'udp':
191 | self.sendUDPRequest(server)
192 | else:
193 | self.sendTCPRequest(server)
194 | except socket.error as reason:
195 | raise DNSError(reason)
196 | if self.async:
197 | return None
198 | else:
199 | return self.response
200 |
201 | def sendUDPRequest(self, server):
202 | "refactor me"
203 | self.response = None
204 | self.socketInit(socket.AF_INET, socket.SOCK_DGRAM)
205 | for self.ns in server:
206 | try:
207 | # TODO. Handle timeouts &c correctly (RFC)
208 | #self.s.connect((self.ns, self.port))
209 | self.conn()
210 | self.time_start = time.time()
211 | if not self.async:
212 | self.s.send(self.request)
213 | self.response = self.processUDPReply()
214 | # except socket.error:
215 | except None:
216 | continue
217 | break
218 | if not self.response:
219 | if not self.async:
220 | raise DNSError('no working nameservers found')
221 |
222 | def sendTCPRequest(self, server):
223 | " do the work of sending a TCP request "
224 | import time
225 | import Lib
226 | self.response = None
227 | for self.ns in server:
228 | try:
229 | self.socketInit(socket.AF_INET, socket.SOCK_STREAM)
230 | self.time_start = time.time()
231 | self.conn()
232 | self.s.send(Lib.pack16bit(len(self.request)) + self.request)
233 | self.s.shutdown(1)
234 | self.response = self.processTCPReply()
235 | except socket.error:
236 | continue
237 | break
238 | if not self.response:
239 | raise DNSError('no working nameservers found')
240 |
241 | # class DnsAsyncRequest(DnsRequest):
242 |
243 |
244 | class DnsAsyncRequest(DnsRequest, asyncore.dispatcher_with_send):
245 |
246 | " an asynchronous request object. out of date, probably broken "
247 |
248 | def __init__(self, *name, **args):
249 | DnsRequest.__init__(self, *name, **args)
250 | # XXX todo
251 | if 'done' in args and args['done']:
252 | self.donefunc = args['done']
253 | else:
254 | self.donefunc = self.showResult
255 | # self.realinit(name,args) # XXX todo
256 | self.async = 1
257 |
258 | def conn(self):
259 | import time
260 | self.connect((self.ns, self.port))
261 | self.time_start = time.time()
262 | if 'start' in self.args and self.args['start']:
263 | asyncore.dispatcher.go(self)
264 |
265 | def socketInit(self, a, b):
266 | self.create_socket(a, b)
267 | asyncore.dispatcher.__init__(self)
268 | self.s = self
269 |
270 | def handle_read(self):
271 | if self.args['protocol'] == 'udp':
272 | self.response = self.processUDPReply()
273 | if self.donefunc:
274 | self.donefunc(*(self,))
275 |
276 | def handle_connect(self):
277 | self.send(self.request)
278 |
279 | def handle_write(self):
280 | pass
281 |
282 | def showResult(self, *s):
283 | self.response.show()
284 |
285 | #
286 | # $Log: Base.py,v $
287 | # Revision 1.12.2.4 2007/05/22 20:28:31 customdesigned
288 | # Missing import Lib
289 | #
290 | # Revision 1.12.2.3 2007/05/22 20:25:52 customdesigned
291 | # Use socket.inetntoa,inetaton.
292 | #
293 | # Revision 1.12.2.2 2007/05/22 20:21:46 customdesigned
294 | # Trap socket error
295 | #
296 | # Revision 1.12.2.1 2007/05/22 20:19:35 customdesigned
297 | # Skip bogus but non-empty lines in resolv.conf
298 | #
299 | # Revision 1.12 2002/04/23 06:04:27 anthonybaxter
300 | # attempt to refactor the DNSRequest.req method a little. after doing a bit
301 | # of this, I've decided to bite the bullet and just rewrite the puppy. will
302 | # be checkin in some design notes, then unit tests and then writing the sod.
303 | #
304 | # Revision 1.11 2002/03/19 13:05:02 anthonybaxter
305 | # converted to class based exceptions (there goes the python1.4 compatibility :)
306 | #
307 | # removed a quite gross use of 'eval()'.
308 | #
309 | # Revision 1.10 2002/03/19 12:41:33 anthonybaxter
310 | # tabnannied and reindented everything. 4 space indent, no tabs.
311 | # yay.
312 | #
313 | # Revision 1.9 2002/03/19 12:26:13 anthonybaxter
314 | # death to leading tabs.
315 | #
316 | # Revision 1.8 2002/03/19 10:30:33 anthonybaxter
317 | # first round of major bits and pieces. The major stuff here (summarised
318 | # from my local, off-net CVS server :/ this will cause some oddities with
319 | # the
320 | #
321 | # tests/testPackers.py:
322 | # a large slab of unit tests for the packer and unpacker code in DNS.Lib
323 | #
324 | # DNS/Lib.py:
325 | # placeholder for addSRV.
326 | # added 'klass' to addA, make it the same as the other A* records.
327 | # made addTXT check for being passed a string, turn it into a length 1 list.
328 | # explicitly check for adding a string of length > 255 (prohibited).
329 | # a bunch of cleanups from a first pass with pychecker
330 | # new code for pack/unpack. the bitwise stuff uses struct, for a smallish
331 | # (disappointly small, actually) improvement, while addr2bin is much
332 | # much faster now.
333 | #
334 | # DNS/Base.py:
335 | # added DiscoverNameServers. This automatically does the right thing
336 | # on unix/ win32. No idea how MacOS handles this. *sigh*
337 | # Incompatible change: Don't use ParseResolvConf on non-unix, use this
338 | # function, instead!
339 | # a bunch of cleanups from a first pass with pychecker
340 | #
341 | # Revision 1.5 2001/08/09 09:22:28 anthonybaxter
342 | # added what I hope is win32 resolver lookup support. I'll need to try
343 | # and figure out how to get the CVS checkout onto my windows machine to
344 | # make sure it works (wow, doing something other than games on the
345 | # windows machine :)
346 | #
347 | # Code from Wolfgang.Strobl@gmd.de
348 | # win32dns.py from
349 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66260
350 | #
351 | # Really, ParseResolvConf() should be renamed "FindNameServers" or
352 | # some such.
353 | #
354 | # Revision 1.4 2001/08/09 09:08:55 anthonybaxter
355 | # added identifying header to top of each file
356 | #
357 | # Revision 1.3 2001/07/19 07:20:12 anthony
358 | # Handle blank resolv.conf lines.
359 | # Patch from Bastian Kleineidam
360 | #
361 | # Revision 1.2 2001/07/19 06:57:07 anthony
362 | # cvs keywords added
363 | #
364 | #
365 |
--------------------------------------------------------------------------------
/lib/theHarvester/COPYING:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.
5 | 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Library General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
--------------------------------------------------------------------------------
/lib/theHarvester/theHarvester.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import string
4 | import httplib
5 | import sys
6 | import os
7 | from socket import *
8 | import re
9 | import getopt
10 |
11 | try:
12 | import requests
13 | except:
14 | print "Request library not found, please install it before proceeding\n"
15 | sys.exit()
16 |
17 | from discovery import *
18 | from lib import htmlExport
19 | from lib import hostchecker
20 |
21 | print "\n*******************************************************************"
22 | print "* *"
23 | print "* | |_| |__ ___ /\ /\__ _ _ ____ _____ ___| |_ ___ _ __ *"
24 | print "* | __| '_ \ / _ \ / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *"
25 | print "* | |_| | | | __/ / __ / (_| | | \ V / __/\__ \ || __/ | *"
26 | print "* \__|_| |_|\___| \/ /_/ \__,_|_| \_/ \___||___/\__\___|_| *"
27 | print "* *"
28 | print "* TheHarvester Ver. 2.6 *"
29 | print "* Coded by Christian Martorella *"
30 | print "* Edge-Security Research *"
31 | print "* cmartorella@edge-security.com *"
32 | print "*******************************************************************\n\n"
33 |
34 |
35 | def usage():
36 |
37 | comm = os.path.basename(sys.argv[0])
38 |
39 | if os.path.dirname(sys.argv[0]) == os.getcwd():
40 | comm = "./" + comm
41 |
42 | print "Usage: theharvester options \n"
43 | print " -d: Domain to search or company name"
44 | print """ -b: data source: google, googleCSE, bing, bingapi, pgp, linkedin,
45 | google-profiles, jigsaw, twitter, googleplus, all\n"""
46 | print " -s: Start in result number X (default: 0)"
47 | print " -v: Verify host name via dns resolution and search for virtual hosts"
48 | print " -f: Save the results into an HTML and XML file"
49 | print " -n: Perform a DNS reverse query on all ranges discovered"
50 | print " -c: Perform a DNS brute force for the domain name"
51 | print " -t: Perform a DNS TLD expansion discovery"
52 | print " -e: Use this DNS server"
53 | print " -l: Limit the number of results to work with(bing goes from 50 to 50 results,"
54 | print " google 100 to 100, and pgp doesn't use this option)"
55 | print " -h: use SHODAN database to query discovered hosts"
56 | print "\nExamples:"
57 | print " " + comm + " -d microsoft.com -l 500 -b google"
58 | print " " + comm + " -d microsoft.com -b pgp"
59 | print " " + comm + " -d microsoft -l 200 -b linkedin"
60 | print " " + comm + " -d apple.com -b googleCSE -l 500 -s 300\n"
61 |
62 |
63 | def start(argv):
64 | if len(sys.argv) < 4:
65 | usage()
66 | sys.exit()
67 | try:
68 | opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcte:")
69 | except getopt.GetoptError:
70 | usage()
71 | sys.exit()
72 | start = 0
73 | host_ip = []
74 | filename = ""
75 | bingapi = "yes"
76 | dnslookup = False
77 | dnsbrute = False
78 | dnstld = False
79 | shodan = False
80 | vhost = []
81 | virtual = False
82 | limit = 100
83 | dnsserver = ""
84 | for opt, arg in opts:
85 | if opt == '-l':
86 | limit = int(arg)
87 | elif opt == '-d':
88 | word = arg
89 | elif opt == '-s':
90 | start = int(arg)
91 | elif opt == '-v':
92 | virtual = "basic"
93 | elif opt == '-f':
94 | filename = arg
95 | elif opt == '-n':
96 | dnslookup = True
97 | elif opt == '-c':
98 | dnsbrute = True
99 | elif opt == '-h':
100 | shodan = True
101 | elif opt == '-e':
102 | dnsserver = arg
103 | elif opt == '-t':
104 | dnstld = True
105 | elif opt == '-b':
106 | engine = arg
107 | if engine not in ("google","googleCSE" , "linkedin", "pgp", "all", "google-profiles", "bing", "bingapi",
108 | "yandex", "jigsaw", "dogpilesearch", "twitter", "googleplus", "yahoo", "baidu"):
109 | usage()
110 | print "Invalid search engine, try with: bing, google, linkedin, pgp, jigsaw, bingapi, google-profiles, dogpilesearch, twitter, googleplus, yahoo, baidu"
111 | sys.exit()
112 | else:
113 | pass
114 | if engine == "google":
115 | print "[-] Searching in Google:"
116 | search = googlesearch.search_google(word, limit, start)
117 | search.process()
118 | all_emails = search.get_emails()
119 | all_hosts = search.get_hostnames()
120 |
121 | if engine == "googleCSE":
122 | print "[-] Searching in Google Custom Search:"
123 | search = googleCSE.search_googleCSE(word, limit, start)
124 | search.process()
125 | search.store_results()
126 | all_emails = search.get_emails()
127 | all_hosts = search.get_hostnames()
128 |
129 | if engine == "exalead":
130 | print "[-] Searching in Exalead:"
131 | search = exaleadsearch.search_exalead(word, limit, start)
132 | search.process()
133 | all_emails = search.get_emails()
134 | all_hosts = search.get_hostnames()
135 |
136 | elif engine == "bing" or engine == "bingapi":
137 | print "[-] Searching in Bing:"
138 | search = bingsearch.search_bing(word, limit, start)
139 | if engine == "bingapi":
140 | bingapi = "yes"
141 | else:
142 | bingapi = "no"
143 | search.process(bingapi)
144 | all_emails = search.get_emails()
145 | all_hosts = search.get_hostnames()
146 |
147 | elif engine == "yandex": # Not working yet
148 | print "[-] Searching in Yandex:"
149 | search = yandexsearch.search_yandex(word, limit, start)
150 | search.process()
151 | all_emails = search.get_emails()
152 | all_hosts = search.get_hostnames()
153 |
154 | elif engine == "pgp":
155 | print "[-] Searching in PGP key server.."
156 | search = pgpsearch.search_pgp(word)
157 | search.process()
158 | all_emails = search.get_emails()
159 | all_hosts = search.get_hostnames()
160 |
161 | elif engine == "jigsaw":
162 | print "[-] Searching in Jigsaw.."
163 | search = jigsaw.search_jigsaw(word, limit)
164 | search.process()
165 | people = search.get_people()
166 | print "Users from Jigsaw:"
167 | print "====================="
168 | for user in people:
169 | print user
170 | sys.exit()
171 |
172 | elif engine == "dogpilesearch":
173 | print "[-] Searching in Dogpilesearch.."
174 | search = dogpilesearch.search_dogpile(word, limit)
175 | search.process()
176 | all_emails = search.get_emails()
177 | all_hosts = search.get_hostnames()
178 |
179 | elif engine == "yahoo":
180 | print "[-] Searching in Yahoo.."
181 | search = yahoosearch.search_yahoo(word, limit)
182 | search.process()
183 | all_emails = search.get_emails()
184 | all_hosts = search.get_hostnames()
185 |
186 | elif engine == "baidu":
187 | print "[-] Searching in Baidu.."
188 | search = baidusearch.search_baidu(word, limit)
189 | search.process()
190 | all_emails = search.get_emails()
191 | all_hosts = search.get_hostnames()
192 |
193 | elif engine == "googleplus":
194 | print "[-] Searching in Google+ .."
195 | search = googleplussearch.search_googleplus(word, limit)
196 | search.process()
197 | people = search.get_people()
198 | print "Users from Google+:"
199 | print "===================="
200 | for user in people:
201 | print user
202 | sys.exit()
203 |
204 | elif engine == "twitter":
205 | print "[-] Searching in Twitter .."
206 | search = twittersearch.search_twitter(word, limit)
207 | search.process()
208 | people = search.get_people()
209 | print "Users from Twitter:"
210 | print "===================="
211 | for user in people:
212 | print user
213 | sys.exit()
214 |
215 | elif engine == "linkedin":
216 | print "[-] Searching in Linkedin.."
217 | search = linkedinsearch.search_linkedin(word, limit)
218 | search.process()
219 | people = search.get_people()
220 | print "Users from Linkedin:"
221 | print "===================="
222 | for user in people:
223 | print user
224 | sys.exit()
225 | elif engine == "google-profiles":
226 | print "[-] Searching in Google profiles.."
227 | search = googlesearch.search_google(word, limit, start)
228 | search.process_profiles()
229 | people = search.get_profiles()
230 | print "Users from Google profiles:"
231 | print "---------------------------"
232 | for users in people:
233 | print users
234 | sys.exit()
235 | elif engine == "all":
236 | print "Full harvest.."
237 | all_emails = []
238 | all_hosts = []
239 | virtual = "basic"
240 | print "[-] Searching in Google.."
241 | search = googlesearch.search_google(word, limit, start)
242 | search.process()
243 | emails = search.get_emails()
244 | hosts = search.get_hostnames()
245 | all_emails.extend(emails)
246 | all_hosts.extend(hosts)
247 | print "[-] Searching in PGP Key server.."
248 | search = pgpsearch.search_pgp(word)
249 | search.process()
250 | emails = search.get_emails()
251 | hosts = search.get_hostnames()
252 | all_hosts.extend(hosts)
253 | all_emails.extend(emails)
254 | print "[-] Searching in Bing.."
255 | bingapi = "no"
256 | search = bingsearch.search_bing(word, limit, start)
257 | search.process(bingapi)
258 | emails = search.get_emails()
259 | hosts = search.get_hostnames()
260 | all_hosts.extend(hosts)
261 | all_emails.extend(emails)
262 | print "[-] Searching in Exalead.."
263 | search = exaleadsearch.search_exalead(word, limit, start)
264 | search.process()
265 | emails = search.get_emails()
266 | hosts = search.get_hostnames()
267 | all_hosts.extend(hosts)
268 | all_emails.extend(emails)
269 | #Results############################################################
270 | print "\n\n[+] Emails found:"
271 | print "------------------"
272 | if all_emails == []:
273 | print "No emails found"
274 | else:
275 | for emails in all_emails:
276 | print emails
277 |
278 | print "\n[+] Hosts found in search engines:"
279 | print "------------------------------------"
280 | if all_hosts == []:
281 | print "No hosts found"
282 | else:
283 | print "[-] Resolving hostnames IPs... "
284 | full_host = hostchecker.Checker(all_hosts)
285 | full = full_host.check()
286 | for host in full:
287 | ip = host.split(':')[0]
288 | print host
289 | if host_ip.count(ip.lower()):
290 | pass
291 | else:
292 | host_ip.append(ip.lower())
293 |
294 | #DNS reverse lookup#################################################
295 | dnsrev = []
296 | if dnslookup == True:
297 | print "\n[+] Starting active queries:"
298 | analyzed_ranges = []
299 | for x in full:
300 | ip = x.split(":")[0]
301 | range = ip.split(".")
302 | range[3] = "0/24"
303 | range = string.join(range, '.')
304 | if not analyzed_ranges.count(range):
305 | print "[-]Performing reverse lookup in :" + range
306 | a = dnssearch.dns_reverse(range, True)
307 | a.list()
308 | res = a.process()
309 | analyzed_ranges.append(range)
310 | else:
311 | continue
312 | for x in res:
313 | if x.count(word):
314 | dnsrev.append(x)
315 | if x not in full:
316 | full.append(x)
317 | print "Hosts found after reverse lookup:"
318 | print "---------------------------------"
319 | for xh in dnsrev:
320 | print xh
321 | #DNS Brute force####################################################
322 | dnsres = []
323 | if dnsbrute == True:
324 | print "\n[-] Starting DNS brute force:"
325 | a = dnssearch.dns_force(word, dnsserver, verbose=True)
326 | res = a.process()
327 | print "\n[+] Hosts found after DNS brute force:\n"
328 | for y in res:
329 | print y
330 | dnsres.append(y)
331 | if y not in full:
332 | full.append(y)
333 | #DNS TLD expansion###################################################
334 | dnstldres = []
335 | if dnstld == True:
336 | print "[-] Starting DNS TLD expansion:"
337 | a = dnssearch.dns_tld(word, dnsserver, verbose=True)
338 | res = a.process()
339 | print "\n[+] Hosts found after DNS TLD expansion:"
340 | print "=========================================="
341 | for y in res:
342 | print y
343 | dnstldres.append(y)
344 | if y not in full:
345 | full.append(y)
346 |
347 | #Virtual hosts search###############################################
348 | if virtual == "basic":
349 | print "[+] Virtual hosts:"
350 | print "=================="
351 | for l in host_ip:
352 | search = bingsearch.search_bing(l, limit, start)
353 | search.process_vhost()
354 | res = search.get_allhostnames()
355 | for x in res:
356 | x = re.sub(r'[[\<\/?]*[\w]*>]*','',x)
357 | x = re.sub('<','',x)
358 | x = re.sub('>','',x)
359 | print l + "\t" + x
360 | vhost.append(l + ":" + x)
361 | full.append(l + ":" + x)
362 | else:
363 | pass
364 | shodanres = []
365 | shodanvisited = []
366 | if shodan == True:
367 | print "[+] Shodan Database search:"
368 | for x in full:
369 | print x
370 | try:
371 | ip = x.split(":")[0]
372 | if not shodanvisited.count(ip):
373 | print "\tSearching for: " + x
374 | a = shodansearch.search_shodan(ip)
375 | shodanvisited.append(ip)
376 | results = a.run()
377 | for res in results:
378 | shodanres.append(
379 | x + "SAPO" + str(res['banner']) + "SAPO" + str(res['port']))
380 | except:
381 | pass
382 | print "[+] Shodan results:"
383 | print "==================="
384 | for x in shodanres:
385 | print x.split("SAPO")[0] + ":" + x.split("SAPO")[1]
386 | else:
387 | pass
388 |
389 | ###################################################################
390 | # Here i need to add explosion mode.
391 | # Tengo que sacar los TLD para hacer esto.
392 | recursion = None
393 | if recursion:
394 | start = 0
395 | for word in vhost:
396 | search = googlesearch.search_google(word, limit, start)
397 | search.process()
398 | emails = search.get_emails()
399 | hosts = search.get_hostnames()
400 | print emails
401 | print hosts
402 | else:
403 | pass
404 |
405 | if filename != "":
406 | try:
407 | print "[+] Saving files..."
408 | html = htmlExport.htmlExport(
409 | all_emails,
410 | full,
411 | vhost,
412 | dnsres,
413 | dnsrev,
414 | filename,
415 | word,
416 | shodanres,
417 | dnstldres)
418 | save = html.writehtml()
419 | except Exception as e:
420 | print e
421 | print "Error creating the file"
422 | try:
423 | filename = filename.split(".")[0] + ".xml"
424 | file = open(filename, 'w')
425 | file.write('')
426 | for x in all_emails:
427 | file.write('' + x + '')
428 | for x in all_hosts:
429 | file.write('' + x + '')
430 | for x in vhost:
431 | file.write('' + x + '')
432 | file.write('')
433 | file.flush()
434 | file.close()
435 | print "Files saved!"
436 | except Exception as er:
437 | print "Error saving XML file: " + er
438 | sys.exit()
439 |
440 | if __name__ == "__main__":
441 | try:
442 | start(sys.argv[1:])
443 | except KeyboardInterrupt:
444 | print "Search interrupted by user.."
445 | except:
446 | sys.exit()
447 |
--------------------------------------------------------------------------------
/lib/markup.py:
--------------------------------------------------------------------------------
1 | # This code is in the public domain, it comes
2 | # with absolutely no warranty and you can do
3 | # absolutely whatever you want with it.
4 |
5 | __date__ = '17 May 2007'
6 | __version__ = '1.7'
7 | __doc__ = """
8 | This is markup.py - a Python module that attempts to
9 | make it easier to generate HTML/XML from a Python program
10 | in an intuitive, lightweight, customizable and pythonic way.
11 |
12 | The code is in the public domain.
13 |
14 | Version: %s as of %s.
15 |
16 | Documentation and further info is at http://markup.sourceforge.net/
17 |
18 | Please send bug reports, feature requests, enhancement
19 | ideas or questions to nogradi at gmail dot com.
20 |
21 | Installation: drop markup.py somewhere into your Python path.
22 | """ % ( __version__, __date__ )
23 |
24 | import string
25 |
26 |
27 | class element:
28 |
29 | """This class handles the addition of a new element."""
30 |
31 | def __init__(self, tag, case='lower', parent=None):
32 | self.parent = parent
33 |
34 | if case == 'lower':
35 | self.tag = tag.lower()
36 | else:
37 | self.tag = tag.upper()
38 |
39 | def __call__(self, *args, **kwargs):
40 | if len(args) > 1:
41 | raise ArgumentError(self.tag)
42 |
43 | # if class_ was defined in parent it should be added to every element
44 | if self.parent is not None and self.parent.class_ is not None:
45 | if 'class_' not in kwargs:
46 | kwargs['class_'] = self.parent.class_
47 |
48 | if self.parent is None and len(args) == 1:
49 | x = [self.render(self.tag, False, myarg, mydict)
50 | for myarg, mydict in _argsdicts(args, kwargs)]
51 | return '\n'.join(x)
52 | elif self.parent is None and len(args) == 0:
53 | x = [self.render(self.tag, True, myarg, mydict)
54 | for myarg, mydict in _argsdicts(args, kwargs)]
55 | return '\n'.join(x)
56 |
57 | if self.tag in self.parent.twotags:
58 | for myarg, mydict in _argsdicts(args, kwargs):
59 | self.render(self.tag, False, myarg, mydict)
60 | elif self.tag in self.parent.onetags:
61 | if len(args) == 0:
62 | for myarg, mydict in _argsdicts(args, kwargs):
63 | # here myarg is always None, because len( args ) = 0
64 | self.render(self.tag, True, myarg, mydict)
65 | else:
66 | raise ClosingError(self.tag)
67 | elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
68 | raise DeprecationError(self.tag)
69 | else:
70 | raise InvalidElementError(self.tag, self.parent.mode)
71 |
72 | def render(self, tag, single, between, kwargs):
73 | """Append the actual tags to content."""
74 |
75 | out = "<%s" % tag
76 | for key, value in kwargs.iteritems():
77 | # when value is None that means stuff like <... checked>
78 | if value is not None:
79 | # strip this so class_ will mean class, etc.
80 | key = key.strip('_')
81 | # special cases, maybe change _ to - overall?
82 | if key == 'http_equiv':
83 | key = 'http-equiv'
84 | elif key == 'accept_charset':
85 | key = 'accept-charset'
86 | out = "%s %s=\"%s\"" % (out, key, escape(value))
87 | else:
88 | out = "%s %s" % (out, key)
89 | if between is not None:
90 | out = "%s>%s%s>" % (out, between, tag)
91 | else:
92 | if single:
93 | out = "%s />" % out
94 | else:
95 | out = "%s>" % out
96 | if self.parent is not None:
97 | self.parent.content.append(out)
98 | else:
99 | return out
100 |
101 | def close(self):
102 | """Append a closing tag unless element has only opening tag."""
103 |
104 | if self.tag in self.parent.twotags:
105 | self.parent.content.append("%s>" % self.tag)
106 | elif self.tag in self.parent.onetags:
107 | raise ClosingError(self.tag)
108 | elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
109 | raise DeprecationError(self.tag)
110 |
111 | def open(self, **kwargs):
112 | """Append an opening tag."""
113 |
114 | if self.tag in self.parent.twotags or self.tag in self.parent.onetags:
115 | self.render(self.tag, False, None, kwargs)
116 | elif self.mode == 'strict_html' and self.tag in self.parent.deptags:
117 | raise DeprecationError(self.tag)
118 |
119 |
120 | class page:
121 |
122 | """This is our main class representing a document. Elements are added
123 | as attributes of an instance of this class."""
124 |
125 | def __init__(self, mode='strict_html', case='lower',
126 | onetags=None, twotags=None, separator='\n', class_=None):
127 | """Stuff that effects the whole document.
128 |
129 | mode -- 'strict_html' for HTML 4.01 (default)
130 | 'html' alias for 'strict_html'
131 | 'loose_html' to allow some deprecated elements
132 | 'xml' to allow arbitrary elements
133 |
134 | case -- 'lower' element names will be printed in lower case (default)
135 | 'upper' they will be printed in upper case
136 |
137 | onetags -- list or tuple of valid elements with opening tags only
138 | twotags -- list or tuple of valid elements with both opening and closing tags
139 | these two keyword arguments may be used to select
140 | the set of valid elements in 'xml' mode
141 | invalid elements will raise appropriate exceptions
142 |
143 | separator -- string to place between added elements, defaults to newline
144 |
145 | class_ -- a class that will be added to every element if defined"""
146 |
147 | valid_onetags = [
148 | "AREA",
149 | "BASE",
150 | "BR",
151 | "COL",
152 | "FRAME",
153 | "HR",
154 | "IMG",
155 | "INPUT",
156 | "LINK",
157 | "META",
158 | "PARAM"]
159 | valid_twotags = [
160 | "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON",
161 | "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET",
162 | "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS",
163 | "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
164 | "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE",
165 | "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR",
166 | "TT", "UL", "VAR"]
167 | deprecated_onetags = ["BASEFONT", "ISINDEX"]
168 | deprecated_twotags = [
169 | "APPLET",
170 | "CENTER",
171 | "DIR",
172 | "FONT",
173 | "MENU",
174 | "S",
175 | "STRIKE",
176 | "U"]
177 |
178 | self.header = []
179 | self.content = []
180 | self.footer = []
181 | self.case = case
182 | self.separator = separator
183 |
184 | # init( ) sets it to True so we know that