├── lib
    ├── theHarvester
    │   ├── .gitignore
    │   ├── lib
    │   │   ├── __init__.py
    │   │   ├── hostchecker.py
    │   │   ├── htmlExport.py
    │   │   └── markup.py
    │   ├── discovery
    │   │   ├── shodan
    │   │   │   ├── __init__.py
    │   │   │   ├── wps.py
    │   │   │   └── api.py
    │   │   ├── DNS
    │   │   │   ├── Lib.py
    │   │   │   ├── Type.py
    │   │   │   ├── Opcode.py
    │   │   │   ├── Class.py
    │   │   │   ├── lazy.py
    │   │   │   ├── __init__.py
    │   │   │   ├── Status.py
    │   │   │   ├── win32dns.py
    │   │   │   └── Base.py
    │   │   ├── __init__.py
    │   │   ├── shodansearch.py
    │   │   ├── pgpsearch.py
    │   │   ├── googlesets.py
    │   │   ├── linkedinsearch.py
    │   │   ├── baidusearch.py
    │   │   ├── yahoosearch.py
    │   │   ├── dogpilesearch.py
    │   │   ├── twittersearch.py
    │   │   ├── googleplussearch.py
    │   │   ├── asksearch.py
    │   │   ├── jigsaw.py
    │   │   ├── yandexsearch.py
    │   │   ├── googlesearch.py
    │   │   ├── exaleadsearch.py
    │   │   ├── bingsearch.py
    │   │   ├── googleCSE.py
    │   │   ├── dnssearch-threads.py
    │   │   └── dnssearch.py
    │   ├── tests
    │   │   └── myparser_test.py
    │   ├── LICENSES
    │   ├── changelog.txt
    │   ├── README
    │   ├── myparser.py
    │   ├── COPYING
    │   └── theHarvester.py
    ├── hostchecker.py
    ├── htmlExport.py
    └── markup.py
├── README.md
├── .gitattributes
└── snoop.py


/lib/theHarvester/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.idea
3 | 


--------------------------------------------------------------------------------
/lib/theHarvester/lib/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["markup", "graphs", "hostchecker"]
2 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodan/__init__.py:
--------------------------------------------------------------------------------
1 | from api import WebAPI
2 | 
3 | __version__ = "0.5.0"
4 | 
5 | __all__ = ['WebAPI']
6 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Lib.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/DomainSnooper/master/lib/theHarvester/discovery/DNS/Lib.py


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Type.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/DomainSnooper/master/lib/theHarvester/discovery/DNS/Type.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DomainSnooper
2 | Tool for checking a list of domains for the purpose of gathering emails and social media handles and checking for potential issues
3 | 
4 | This tool uses Troy Hunt's HaveIBeenPwned API and TheHarvester by laramies. TheHarvester is included in this repo to make things simple. Learn more here: https://github.com/laramies/theHarvester
5 | 
6 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ["bingsearch",
 2 |            "googlesearch",
 3 |            "googleplussearch",
 4 |            "pgpsearch",
 5 |            "linkedinsearch",
 6 |            "exaleadsearch",
 7 |            "yandexsearch",
 8 |            "googlesets",
 9 |            "dnssearch",
10 |            "shodansearch",
11 |            "jigsaw",
12 |            "twittersearch",
13 |            "dogpilesearch",
14 |            "baidusearch",
15 |            "yahoosearch",
16 |            "googleCSE"]
17 | 


--------------------------------------------------------------------------------
/lib/theHarvester/tests/myparser_test.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Unit tests for myparser.py
 3 | #
 4 | import myparser
 5 | 
 6 | import unittest
 7 | 
 8 | class TestMyParser(unittest.TestCase):
 9 | 
10 |   def test_emails(self):
11 |     word = 'domain.com'
12 |     results = '***a@domain***banotherdomain.com***c@domain.com***d@sub.domain.com***'
13 |     p = myparser.parser(results, word)
14 |     emails = sorted(p.emails())
15 |     self.assertEquals(emails, [ 'c@domain.com', 'd@sub.domain.com' ])
16 | 
17 | if __name__ == '__main__':
18 |   unittest.main()
19 | 


--------------------------------------------------------------------------------
/lib/hostchecker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | Created by laramies on 2008-08-21.
 5 | """
 6 | 
 7 | import sys
 8 | import socket
 9 | 
10 | 
11 | class Checker():
12 | 
13 |     def __init__(self, hosts):
14 |         self.hosts = hosts
15 |         self.realhosts = []
16 | 
17 |     def check(self):
18 |         for x in self.hosts:
19 |             try:
20 |                 res = socket.gethostbyname(x)
21 |                 self.realhosts.append(res + ":" + x)
22 |             except Exception as e:
23 |                 pass
24 |         return self.realhosts
25 | 


--------------------------------------------------------------------------------
/lib/theHarvester/lib/hostchecker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | Created by laramies on 2008-08-21.
 5 | """
 6 | 
 7 | import sys
 8 | import socket
 9 | 
10 | 
11 | class Checker():
12 | 
13 |     def __init__(self, hosts):
14 |         self.hosts = hosts
15 |         self.realhosts = []
16 | 
17 |     def check(self):
18 |         for x in self.hosts:
19 |             try:
20 |                 res = socket.gethostbyname(x)
21 |                 self.realhosts.append(res + ":" + x)
22 |             except Exception as e:
23 |                 pass
24 |         return self.realhosts
25 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodansearch.py:
--------------------------------------------------------------------------------
 1 | from shodan import WebAPI
 2 | import sys
 3 | 
 4 | 
 5 | class search_shodan():
 6 | 
 7 |     def __init__(self, host):
 8 |         self.host = host
 9 |         self.key = "oCiMsgM6rQWqiTvPxFHYcExlZgg7wvTt"
10 |         if self.key == "":
11 |             print "You need an API key in order to use SHODAN database. You can get one here: http://www.shodanhq.com/"
12 |             sys.exit()
13 |         self.api = WebAPI(self.key)
14 | 
15 |     def run(self):
16 |         try:
17 |             host = self.api.host(self.host)
18 |             return host['data']
19 |         except:
20 |             print "SHODAN empty reply or error in the call"
21 |             return "error"
22 | 


--------------------------------------------------------------------------------
/lib/theHarvester/LICENSES:
--------------------------------------------------------------------------------
 1 | Released under the GPL v 2.0.
 2 | 
 3 | If you did not recieve a copy of the GPL, try http://www.gnu.org/.
 4 | 
 5 | Copyright 2011 Christian Martorella 
 6 | 
 7 | theHarvester is free software; you can redistribute it and/or modify
 8 | it under the terms of the GNU General Public License as published by
 9 | the Free Software Foundation version 2 of the License.
10 | 
11 | theHarvester is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | GNU General Public License for more details.
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16 | 
17 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/pgpsearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | 
 6 | 
 7 | class search_pgp:
 8 | 
 9 |     def __init__(self, word):
10 |         self.word = word
11 |         self.results = ""
12 |         self.server = "pgp.rediris.es:11371"
13 |         self.hostname = "pgp.rediris.es"
14 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 | 
16 |     def process(self):
17 |         h = httplib.HTTP(self.server)
18 |         h.putrequest('GET', "/pks/lookup?search=" + self.word + "&op=index")
19 |         h.putheader('Host', self.hostname)
20 |         h.putheader('User-agent', self.userAgent)
21 |         h.endheaders()
22 |         returncode, returnmsg, headers = h.getreply()
23 |         self.results = h.getfile().read()
24 | 
25 |     def get_emails(self):
26 |         rawres = myparser.parser(self.results, self.word)
27 |         return rawres.emails()
28 | 
29 |     def get_hostnames(self):
30 |         rawres = myparser.parser(self.results, self.word)
31 |         return rawres.hostnames()
32 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Opcode.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  $Id: Opcode.py,v 1.6 2002/04/23 10:51:43 anthonybaxter Exp $
 3 | 
 4 |  This file is part of the pydns project.
 5 |  Homepage: http://pydns.sourceforge.net
 6 | 
 7 |  This code is covered by the standard Python License.
 8 | 
 9 |  Opcode values in message header. RFC 1035, 1996, 2136.
10 | """
11 | 
12 | 
13 | QUERY = 0
14 | IQUERY = 1
15 | STATUS = 2
16 | NOTIFY = 4
17 | UPDATE = 5
18 | 
19 | # Construct reverse mapping dictionary
20 | 
21 | _names = dir()
22 | opcodemap = {}
23 | for _name in _names:
24 |     if _name[0] != '_':
25 |         opcodemap[eval(_name)] = _name
26 | 
27 | 
28 | def opcodestr(opcode):
29 |     if opcode in opcodemap:
30 |         return opcodemap[opcode]
31 |     else:
32 |         return repr(opcode)
33 | 
34 | #
35 | # $Log: Opcode.py,v $
36 | # Revision 1.6  2002/04/23 10:51:43  anthonybaxter
37 | # Added UPDATE, NOTIFY.
38 | #
39 | # Revision 1.5  2002/03/19 12:41:33  anthonybaxter
40 | # tabnannied and reindented everything. 4 space indent, no tabs.
41 | # yay.
42 | #
43 | # Revision 1.4  2002/03/19 12:26:13  anthonybaxter
44 | # death to leading tabs.
45 | #
46 | # Revision 1.3  2001/08/09 09:08:55  anthonybaxter
47 | # added identifying header to top of each file
48 | #
49 | # Revision 1.2  2001/07/19 06:57:07  anthony
50 | # cvs keywords added
51 | #
52 | #
53 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googlesets.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | import re
 6 | import time
 7 | 
 8 | 
 9 | class search_google_labs:
10 | 
11 |     def __init__(self, list):
12 |         self.results = ""
13 |         self.totalresults = ""
14 |         self.server = "labs.google.com"
15 |         self.hostname = "labs.google.com"
16 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 |         id = 0
18 |         self.set = ""
19 |         for x in list:
20 |             id += 1
21 |             if id == 1:
22 |                 self.set = self.set + "q" + str(id) + "=" + str(x)
23 |             else:
24 |                 self.set = self.set + "&q" + str(id) + "=" + str(x)
25 | 
26 |     def do_search(self):
27 |         h = httplib.HTTP(self.server)
28 |         h.putrequest('GET', "/sets?hl=en&" + self.set)
29 |         h.putheader('Host', self.hostname)
30 |         h.putheader('User-agent', self.userAgent)
31 |         h.endheaders()
32 |         returncode, returnmsg, headers = h.getreply()
33 |         self.results = h.getfile().read()
34 |         self.totalresults += self.results
35 | 
36 |     def get_set(self):
37 |         rawres = myparser.parser(self.totalresults, list)
38 |         return rawres.set()
39 | 
40 |     def process(self):
41 |         self.do_search()
42 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/linkedinsearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import requests
 3 | import sys
 4 | import myparser
 5 | import re
 6 | 
 7 | 
 8 | class search_linkedin:
 9 | 
10 |     def __init__(self, word, limit):
11 |         self.word = word.replace(' ', '%20')
12 |         self.results = ""
13 |         self.totalresults = ""
14 |         self.server = "www.google.com"
15 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
16 |         self.quantity = "100"
17 |         self.limit = int(limit)
18 |         self.counter = 0
19 | 
20 |     def do_search(self):
21 |         try:
22 |             urly="http://"+ self.server + "/search?num=100&start=" + str(self.counter) + "&hl=en&meta=&q=site%3Alinkedin.com/in%20" + self.word
23 |         except Exception, e:
24 |             print e
25 |         try:
26 |             r=requests.get(urly)
27 |         except Exception,e:
28 |             print e
29 |         self.results = r.content
30 |         self.totalresults += self.results
31 | 
32 |     def get_people(self):
33 |         rawres = myparser.parser(self.totalresults, self.word)
34 |         return rawres.people_linkedin()
35 | 
36 |     def process(self):
37 |         while (self.counter < self.limit):
38 |             self.do_search()
39 |             self.counter += 100
40 |             print "\tSearching " + str(self.counter) + " results.."
41 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/baidusearch.py:
--------------------------------------------------------------------------------
 1 | import httplib
 2 | import myparser
 3 | import time
 4 | import sys
 5 | 
 6 | 
 7 | class search_baidu:
 8 | 
 9 |     def __init__(self, word, limit):
10 |         self.word = word
11 |         self.total_results = ""
12 |         self.server = "www.baidu.com"
13 |         self.hostname = "www.baidu.com"
14 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 |         self.limit = limit
16 |         self.counter = 0
17 | 
18 |     def do_search(self):
19 |         h = httplib.HTTP(self.server)
20 | 
21 |         h.putrequest('GET', "/s?wd=%40" + self.word
22 |                      + "&pn=" + str(self.counter))
23 |         h.putheader('Host', self.hostname)
24 |         h.putheader('User-agent', self.userAgent)
25 |         h.endheaders()
26 |         returncode, returnmsg, headers = h.getreply()
27 | 
28 |         self.total_results += h.getfile().read()
29 | 
30 |     def process(self):
31 |         while self.counter <= self.limit and self.counter <= 1000:
32 |             self.do_search()
33 |             time.sleep(1)
34 | 
35 |             print "\tSearching " + str(self.counter) + " results..."
36 |             self.counter += 10
37 | 
38 |     def get_emails(self):
39 |         rawres = myparser.parser(self.total_results, self.word)
40 |         return rawres.emails()
41 | 
42 |     def get_hostnames(self):
43 |         rawres = myparser.parser(self.total_results, self.word)
44 |         return rawres.hostnames()
45 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/yahoosearch.py:
--------------------------------------------------------------------------------
 1 | import httplib
 2 | import myparser
 3 | import time
 4 | import sys
 5 | 
 6 | 
 7 | class search_yahoo:
 8 | 
 9 |     def __init__(self, word, limit):
10 |         self.word = word
11 |         self.total_results = ""
12 |         self.server = "search.yahoo.com"
13 |         self.hostname = "search.yahoo.com"
14 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 |         self.limit = limit
16 |         self.counter = 0
17 | 
18 |     def do_search(self):
19 |         h = httplib.HTTP(self.server)
20 | 
21 |         h.putrequest('GET', "/search?p=\"%40" + self.word
22 |                      + "\"&b=" + str(self.counter) + "&pz=10")
23 |         h.putheader('Host', self.hostname)
24 |         h.putheader('User-agent', self.userAgent)
25 |         h.endheaders()
26 |         returncode, returnmsg, headers = h.getreply()
27 | 
28 |         self.total_results += h.getfile().read()
29 | 
30 |     def process(self):
31 |         while self.counter <= self.limit and self.counter <= 1000:
32 |             self.do_search()
33 |             time.sleep(1)
34 | 
35 |             print "\tSearching " + str(self.counter) + " results..."
36 |             self.counter += 10
37 | 
38 |     def get_emails(self):
39 |         rawres = myparser.parser(self.total_results, self.word)
40 |         return rawres.emails()
41 | 
42 |     def get_hostnames(self):
43 |         rawres = myparser.parser(self.total_results, self.word)
44 |         return rawres.hostnames()
45 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/dogpilesearch.py:
--------------------------------------------------------------------------------
 1 | import httplib
 2 | import myparser
 3 | import time
 4 | import sys
 5 | 
 6 | 
 7 | class search_dogpile:
 8 | 
 9 |     def __init__(self, word, limit):
10 |         self.word = word
11 |         self.total_results = ""
12 |         self.server = "www.dogpile.com"
13 |         self.hostname = "www.dogpile.com"
14 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
15 |         self.limit = limit
16 |         self.counter = 0
17 | 
18 |     def do_search(self):
19 |         h = httplib.HTTP(self.server)
20 | 
21 |         # Dogpile is hardcoded to return 10 results
22 |         h.putrequest('GET', "/search/web?qsi=" + str(self.counter)
23 |                      + "&q=\"%40" + self.word + "\"")
24 |         h.putheader('Host', self.hostname)
25 |         h.putheader('User-agent', self.userAgent)
26 |         h.endheaders()
27 |         returncode, returnmsg, headers = h.getreply()
28 | 
29 |         self.total_results += h.getfile().read()
30 | 
31 |     def process(self):
32 |         while self.counter <= self.limit and self.counter <= 1000:
33 |             self.do_search()
34 |             time.sleep(1)
35 | 
36 |             print "\tSearching " + str(self.counter) + " results..."
37 |             self.counter += 10
38 | 
39 |     def get_emails(self):
40 |         rawres = myparser.parser(self.total_results, self.word)
41 |         return rawres.emails()
42 | 
43 |     def get_hostnames(self):
44 |         rawres = myparser.parser(self.total_results, self.word)
45 |         return rawres.hostnames()
46 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/twittersearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import requests
 3 | import sys
 4 | import myparser
 5 | import re
 6 | 
 7 | 
 8 | class search_twitter:
 9 | 
10 |     def __init__(self, word, limit):
11 |         self.word = word.replace(' ', '%20')
12 |         self.results = ""
13 |         self.totalresults = ""
14 |         self.server = "www.google.com"
15 |         self.hostname = "www.google.com"
16 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100116 Firefox/3.7"
17 |         self.quantity = "100"
18 |         self.limit = int(limit)
19 |         self.counter = 0
20 | 
21 |     def do_search(self):
22 |         try:
23 |             urly="https://"+ self.server + "/search?num=100&start=" + str(self.counter) + "&hl=en&meta=&q=site%3Atwitter.com%20intitle%3A%22on+Twitter%22%20" + self.word
24 |         except Exception, e:
25 |             print e
26 |         headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0'}
27 |         try:
28 |             r=requests.get(urly,headers=headers)
29 |         except Exception,e:
30 |             print e
31 |         self.results = r.content
32 |         self.totalresults += self.results
33 | 
34 |     def get_people(self):
35 |         rawres = myparser.parser(self.totalresults, self.word)
36 |         return rawres.people_twitter()
37 | 
38 |     def process(self):
39 |         while (self.counter < self.limit):
40 |             self.do_search()
41 |             self.counter += 100
42 |             print "\tSearching " + str(self.counter) + " results.."
43 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Class.py:
--------------------------------------------------------------------------------
 1 | """
 2 | $Id: Class.py,v 1.6 2002/04/23 12:52:19 anthonybaxter Exp $
 3 | 
 4 |  This file is part of the pydns project.
 5 |  Homepage: http://pydns.sourceforge.net
 6 | 
 7 |  This code is covered by the standard Python License.
 8 | 
 9 |  CLASS values (section 3.2.4)
10 | """
11 | 
12 | 
13 | IN = 1          # the Internet
14 | CS = 2          # the CSNET class (Obsolete - used only for examples in
15 |                 # some obsolete RFCs)
16 | CH = 3          # the CHAOS class. When someone shows me python running on
17 |                 # a Symbolics Lisp machine, I'll look at implementing this.
18 | HS = 4          # Hesiod [Dyer 87]
19 | 
20 | # QCLASS values (section 3.2.5)
21 | 
22 | ANY = 255       # any class
23 | 
24 | 
25 | # Construct reverse mapping dictionary
26 | 
27 | _names = dir()
28 | classmap = {}
29 | for _name in _names:
30 |     if _name[0] != '_':
31 |         classmap[eval(_name)] = _name
32 | 
33 | 
34 | def classstr(klass):
35 |     if klass in classmap:
36 |         return classmap[klass]
37 |     else:
38 |         return repr(klass)
39 | 
40 | #
41 | # $Log: Class.py,v $
42 | # Revision 1.6  2002/04/23 12:52:19  anthonybaxter
43 | # cleanup whitespace.
44 | #
45 | # Revision 1.5  2002/03/19 12:41:33  anthonybaxter
46 | # tabnannied and reindented everything. 4 space indent, no tabs.
47 | # yay.
48 | #
49 | # Revision 1.4  2002/03/19 12:26:13  anthonybaxter
50 | # death to leading tabs.
51 | #
52 | # Revision 1.3  2001/08/09 09:08:55  anthonybaxter
53 | # added identifying header to top of each file
54 | #
55 | # Revision 1.2  2001/07/19 06:57:07  anthony
56 | # cvs keywords added
57 | #
58 | #
59 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googleplussearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import requests
 3 | import sys
 4 | import myparser
 5 | import re
 6 | 
 7 | 
 8 | class search_googleplus:
 9 | 
10 |     def __init__(self, word, limit):
11 |         self.word = word.replace(' ', '%20')
12 |         self.results = ""
13 |         self.totalresults = ""
14 |         self.server = "www.google.com"
15 |         self.hostname = "www.google.com"
16 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 |         self.quantity = "100"
18 |         self.limit = int(limit)
19 |         self.counter = 0
20 | 
21 |     def do_search(self):
22 |         try:
23 |             urly="https://" + self.server + "/search?num=100&start=" + str(self.counter) + "&hl=en&meta=&q=site%3Aplus.google.com%20intext%3A%22Works%20at%22%20" + self.word+ "%20-inurl%3Aphotos%20-inurl%3Aabout%20-inurl%3Aposts%20-inurl%3Aplusones"
24 |         except Exception, e:
25 |             print e
26 |         try:
27 |             headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:34.0) Gecko/20100101 Firefox/34.0'}
28 |             r=requests.get(urly,headers=headers)
29 |         except Exception,e:
30 |             print e
31 |         self.results = r.content 
32 |         self.totalresults += self.results
33 | 
34 |     def get_people(self):
35 |         rawres = myparser.parser(self.totalresults, self.word)
36 |         return rawres.people_googleplus()
37 | 
38 |     def process(self):
39 |         while (self.counter < self.limit):
40 |             self.do_search()
41 |             self.counter += 100
42 |             print "\tSearching " + str(self.counter) + " results.."
43 | 


--------------------------------------------------------------------------------
/lib/theHarvester/changelog.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Changelog in 2.6:
 3 | -----------------
 4 | usage() improvement, CameronNemo.
 5 | Added Yahoo and Baidu search engines. Thanks to Tatanus
 6 | Added check for the existence of Requests library.
 7 | Fixed email regex to provide cleaner results. Thanks to Peter McAlpine
 8 | 
 9 | Changelog in 2.5:
10 | -----------------
11 | 
12 | 
13 | Changelog in 2.4:
14 | ------------------
15 | -Fixed Linkedin Parser
16 | -Fixed 123people
17 | -Added Dogpile Search engine (Marcus)
18 | -PEP8 compliant (Mario)
19 | -Fixed XML export (Marcus)
20 | -Expanded TLD list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt (Marcus)
21 | -DNS Bruteforce fixed (Tomas)
22 | -Added Google Custom Search Support - Need API Key to use it.
23 | 
24 | 
25 | 
26 | Changelog in 2.3:
27 | --------------
28 | -Fixed duplicates
29 | 
30 | Changelog in 2.2:
31 | ----------------
32 | -Added Jigsaw (www.jigsaw.com)
33 | -Added 123People (www.123people.com)
34 | -Added limit to google searches as the maximum results we can obtain is 1000
35 | -Removed SET, as service was discontinued by Google
36 | -Fixed parser to remove wrong results like emails starting with @
37 | 
38 | 
39 | Changelog in 2.1:
40 | ----------------
41 | -DNS Bruteforcer
42 | -DNS Reverse lookups
43 | -DNS TDL Expansion
44 | -SHODAN DB integration
45 | -HTML report
46 | -DNS server selection
47 | 
48 | 
49 | Changelog in 2.0:
50 | ----------------
51 | -Complete rewrite, more modular and easy to maintain
52 | -New sources (Exalead, Google-Profiles, Bing-Api)
53 | -Time delay between request, to prevent search engines from blocking our IP´s
54 | -You can start the search from the results page that you want, hence you can *resume* a search
55 | -Export to xml
56 | -All search engines harvesting
57 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/lazy.py:
--------------------------------------------------------------------------------
 1 | # $Id: lazy.py,v 1.5.2.1 2007/05/22 20:23:38 customdesigned Exp $
 2 | #
 3 | # This file is part of the pydns project.
 4 | # Homepage: http://pydns.sourceforge.net
 5 | #
 6 | # This code is covered by the standard Python License.
 7 | #
 8 | 
 9 | # routines for lazy people.
10 | import Base
11 | import string
12 | 
13 | 
14 | def revlookup(name):
15 |     "convenience routine for doing a reverse lookup of an address"
16 |     if Base.defaults['server'] == []:
17 |         Base.DiscoverNameServers()
18 |     a = string.split(name, '.')
19 |     a.reverse()
20 |     b = string.join(a, '.') + '.in-addr.arpa'
21 |     # this will only return one of any records returned.
22 |     return Base.DnsRequest(b, qtype='ptr').req().answers[0]['data']
23 | 
24 | 
25 | def mxlookup(name):
26 |     """
27 |     convenience routine for doing an MX lookup of a name. returns a
28 |     sorted list of (preference, mail exchanger) records
29 |     """
30 |     if Base.defaults['server'] == []:
31 |         Base.DiscoverNameServers()
32 |     a = Base.DnsRequest(name, qtype='mx').req().answers
33 |     l = sorted(map(lambda x: x['data'], a))
34 |     return l
35 | 
36 | #
37 | # $Log: lazy.py,v $
38 | # Revision 1.5.2.1  2007/05/22 20:23:38  customdesigned
39 | # Lazy call to DiscoverNameServers
40 | #
41 | # Revision 1.5  2002/05/06 06:14:38  anthonybaxter
42 | # reformat, move import to top of file.
43 | #
44 | # Revision 1.4  2002/03/19 12:41:33  anthonybaxter
45 | # tabnannied and reindented everything. 4 space indent, no tabs.
46 | # yay.
47 | #
48 | # Revision 1.3  2001/08/09 09:08:55  anthonybaxter
49 | # added identifying header to top of each file
50 | #
51 | # Revision 1.2  2001/07/19 06:57:07  anthony
52 | # cvs keywords added
53 | #
54 | #
55 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/asksearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | import re
 6 | 
 7 | 
 8 | class search_ask:
 9 | 
10 |     def __init__(self, word, limit):
11 |         self.word = word.replace(' ', '%20')
12 |         self.results = ""
13 |         self.totalresults = ""
14 |         self.server = "www.ask.com"
15 |         self.hostname = "www.ask.com"
16 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 |         self.quantity = "100"
18 |         self.limit = int(limit)
19 |         self.counter = 0
20 | 
21 |     def do_search(self):
22 |         h = httplib.HTTP(self.server)
23 |         h.putrequest(
24 |             'GET',
25 |             "/web?q=%40" +
26 |             self.word +
27 |             "&pu=100&page=" +
28 |             self.counter)
29 |         h.putheader('User-agent', self.userAgent)
30 |         h.endheaders()
31 |         returncode, returnmsg, headers = h.getreply()
32 |         self.results = h.getfile().read()
33 |         self.totalresults += self.results
34 | 
35 |     def check_next(self):
36 |         renext = re.compile('>  Next  <')
37 |         nextres = renext.findall(self.results)
38 |         if nextres != []:
39 |             nexty = "1"
40 |         else:
41 |             nexty = "0"
42 |         return nexty
43 | 
44 |     def get_people(self):
45 |         rawres = myparser.parser(self.totalresults, self.word)
46 |         return rawres.people_jigsaw()
47 | 
48 |     def process(self):
49 |         while (self.counter < self.limit):
50 |             self.do_search()
51 |             more = self.check_next()
52 |             if more == "1":
53 |                 self.counter += 100
54 |             else:
55 |                 break
56 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # $Id: __init__.py,v 1.8.2.2 2007/05/22 21:06:52 customdesigned Exp $
 3 | #
 4 | # This file is part of the pydns project.
 5 | # Homepage: http://pydns.sourceforge.net
 6 | #
 7 | # This code is covered by the standard Python License.
 8 | #
 9 | 
10 | # __init__.py for DNS class.
11 | 
12 | __version__ = '2.3.1'
13 | 
14 | import Type
15 | import Opcode
16 | import Status
17 | import Class
18 | from Base import DnsRequest, DNSError
19 | from Lib import DnsResult
20 | from Base import *
21 | from Lib import *
22 | Error = DNSError
23 | from lazy import *
24 | Request = DnsRequest
25 | Result = DnsResult
26 | 
27 | #
28 | # $Log: __init__.py,v $
29 | # Revision 1.8.2.2  2007/05/22 21:06:52  customdesigned
30 | # utf-8 in __init__.py
31 | #
32 | # Revision 1.8.2.1  2007/05/22 20:39:20  customdesigned
33 | # Release 2.3.1
34 | #
35 | # Revision 1.8  2002/05/06 06:17:49  anthonybaxter
36 | # found that the old README file called itself release 2.2. So make
37 | # this one 2.3...
38 | #
39 | # Revision 1.7  2002/05/06 06:16:15  anthonybaxter
40 | # make some sort of reasonable version string. releasewards ho!
41 | #
42 | # Revision 1.6  2002/03/19 13:05:02  anthonybaxter
43 | # converted to class based exceptions (there goes the python1.4 compatibility :)
44 | #
45 | # removed a quite gross use of 'eval()'.
46 | #
47 | # Revision 1.5  2002/03/19 12:41:33  anthonybaxter
48 | # tabnannied and reindented everything. 4 space indent, no tabs.
49 | # yay.
50 | #
51 | # Revision 1.4  2001/11/26 17:57:51  stroeder
52 | # Added __version__
53 | #
54 | # Revision 1.3  2001/08/09 09:08:55  anthonybaxter
55 | # added identifying header to top of each file
56 | #
57 | # Revision 1.2  2001/07/19 06:57:07  anthony
58 | # cvs keywords added
59 | #
60 | #
61 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/jigsaw.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | import re
 6 |                 # http://www.jigsaw.com/SearchAcrossCompanies.xhtml?opCode=refresh&rpage=4&mode=0&cnCountry=&order=0&orderby=0&cmName=accuvant&cnDead=false&cnExOwned=false&count=0&screenNameType=0&screenName=&omitScreenNameType=0&omitScreenName=&companyId=0&estimatedCount=277&rowsPerPage=50
 7 | 
 8 | 
 9 | class search_jigsaw:
10 | 
11 |     def __init__(self, word, limit):
12 |         self.word = word.replace(' ', '%20')
13 |         self.results = ""
14 |         self.totalresults = ""
15 |         self.server = "www.jigsaw.com"
16 |         self.hostname = "www.jigsaw.com"
17 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
18 |         self.quantity = "100"
19 |         self.limit = int(limit)
20 |         self.counter = 0
21 | 
22 |     def do_search(self):
23 |         h = httplib.HTTP(self.server)
24 |         h.putrequest(
25 |             'GET',
26 |             "/FreeTextSearch.xhtml?opCode=search&autoSuggested=True&freeText=" +
27 |             self.word)
28 |         h.putheader('User-agent', self.userAgent)
29 |         h.endheaders()
30 |         returncode, returnmsg, headers = h.getreply()
31 |         self.results = h.getfile().read()
32 |         self.totalresults += self.results
33 | 
34 |     def check_next(self):
35 |         renext = re.compile('>  Next  <')
36 |         nextres = renext.findall(self.results)
37 |         if nextres != []:
38 |             nexty = "1"
39 |         else:
40 |             nexty = "0"
41 |         return nexty
42 | 
43 |     def get_people(self):
44 |         rawres = myparser.parser(self.totalresults, self.word)
45 |         return rawres.people_jigsaw()
46 | 
47 |     def process(self):
48 |         while (self.counter < self.limit):
49 |             self.do_search()
50 |             more = self.check_next()
51 |             if more == "1":
52 |                 self.counter += 100
53 |             else:
54 |                 break
55 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodan/wps.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WiFi Positioning System
 3 | 
 4 | Wrappers around the SkyHook and Google Locations APIs to resolve
 5 | wireless routers' MAC addresses (BSSID) to physical locations.
 6 | """
 7 | try:
 8 |     from json import dumps, loads
 9 | except:
10 |     from simplejson import dumps, loads
11 | from urllib2 import Request, urlopen
12 | from urllib import urlencode
13 | 
14 | 
15 | class Skyhook:
16 | 
17 |     """Not yet ready for production, use the GoogleLocation class instead."""
18 | 
19 |     def __init__(self, username='api', realm='shodan'):
20 |         self.username = username
21 |         self.realm = realm
22 |         self.url = 'https://api.skyhookwireless.com/wps2/location'
23 | 
24 |     def locate(self, mac):
25 |         # Remove the ':'
26 |         mac = mac.replace(':', '')
27 |         print mac
28 |         data = """<?xml version='1.0'?>
29 |         <LocationRQ xmlns='http://skyhookwireless.com/wps/2005' version='2.6' street-address-lookup='full'>
30 |           <authentication version='2.0'>
31 |             <simple>
32 |               <username>%s</username>
33 |               <realm>%s</realm>
34 |             </simple>
35 |           </authentication>
36 |           <access-point>
37 |             <mac>%s</mac>
38 |             <signal-strength>-50</signal-strength>
39 |           </access-point>
40 |         </LocationRQ>""" % (self.username, self.realm, mac)
41 |         request = Request(
42 |             url=self.url,
43 |             data=data,
44 |             headers={'Content-type': 'text/xml'})
45 |         response = urlopen(request)
46 |         result = response.read()
47 |         return result
48 | 
49 | 
50 | class GoogleLocation:
51 | 
52 |     def __init__(self):
53 |         self.url = 'http://www.google.com/loc/json'
54 | 
55 |     def locate(self, mac):
56 |         data = {
57 |             'version': '1.1.0',
58 |             'request_address': True,
59 |             'wifi_towers': [{
60 |                 'mac_address': mac,
61 |                 'ssid': 'g',
62 |                 'signal_strength': -72
63 |             }]
64 |         }
65 |         response = urlopen(self.url, dumps(data))
66 |         data = response.read()
67 |         return loads(data)
68 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Status.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  $Id: Status.py,v 1.7 2002/04/23 12:52:19 anthonybaxter Exp $
 3 | 
 4 |  This file is part of the pydns project.
 5 |  Homepage: http://pydns.sourceforge.net
 6 | 
 7 |  This code is covered by the standard Python License.
 8 | 
 9 |  Status values in message header
10 | """
11 | 
12 | NOERROR = 0  # No Error                           [RFC 1035]
13 | FORMERR = 1  # Format Error                       [RFC 1035]
14 | SERVFAIL = 2  # Server Failure                     [RFC 1035]
15 | NXDOMAIN = 3  # Non-Existent Domain                [RFC 1035]
16 | NOTIMP = 4  # Not Implemented                    [RFC 1035]
17 | REFUSED = 5  # Query Refused                      [RFC 1035]
18 | YXDOMAIN = 6  # Name Exists when it should not     [RFC 2136]
19 | YXRRSET = 7  # RR Set Exists when it should not   [RFC 2136]
20 | NXRRSET = 8  # RR Set that should exist does not  [RFC 2136]
21 | NOTAUTH = 9  # Server Not Authoritative for zone  [RFC 2136]
22 | NOTZONE = 10  # Name not contained in zone         [RFC 2136]
23 | BADVERS = 16  # Bad OPT Version                    [RFC 2671]
24 | BADSIG = 16  # TSIG Signature Failure             [RFC 2845]
25 | BADKEY = 17  # Key not recognized                 [RFC 2845]
26 | BADTIME = 18  # Signature out of time window       [RFC 2845]
27 | BADMODE = 19  # Bad TKEY Mode                      [RFC 2930]
28 | BADNAME = 20  # Duplicate key name                 [RFC 2930]
29 | BADALG = 21  # Algorithm not supported            [RFC 2930]
30 | 
31 | # Construct reverse mapping dictionary
32 | 
33 | _names = dir()
34 | statusmap = {}
35 | for _name in _names:
36 |     if _name[0] != '_':
37 |         statusmap[eval(_name)] = _name
38 | 
39 | 
40 | def statusstr(status):
41 |     if status in statusmap:
42 |         return statusmap[status]
43 |     else:
44 |         return repr(status)
45 | 
46 | #
47 | # $Log: Status.py,v $
48 | # Revision 1.7  2002/04/23 12:52:19  anthonybaxter
49 | # cleanup whitespace.
50 | #
51 | # Revision 1.6  2002/04/23 10:57:57  anthonybaxter
52 | # update to complete the list of response codes.
53 | #
54 | # Revision 1.5  2002/03/19 12:41:33  anthonybaxter
55 | # tabnannied and reindented everything. 4 space indent, no tabs.
56 | # yay.
57 | #
58 | # Revision 1.4  2002/03/19 12:26:13  anthonybaxter
59 | # death to leading tabs.
60 | #
61 | # Revision 1.3  2001/08/09 09:08:55  anthonybaxter
62 | # added identifying header to top of each file
63 | #
64 | # Revision 1.2  2001/07/19 06:57:07  anthony
65 | # cvs keywords added
66 | #
67 | #
68 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/yandexsearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | import re
 6 | import time
 7 | 
 8 | 
 9 | class search_yandex:
10 | 
11 |     def __init__(self, word, limit, start):
12 |         self.word = word
13 |         self.results = ""
14 |         self.totalresults = ""
15 |         self.server = "yandex.com"
16 |         self.hostname = "yandex.com"
17 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
18 |         self.limit = limit
19 |         self.counter = start
20 | 
21 |     def do_search(self):
22 |         h = httplib.HTTP(self.server)
23 |         h.putrequest('GET', "/search?text=%40" + self.word +
24 |                      "&numdoc=50&lr=" + str(self.counter))
25 |         h.putheader('Host', self.hostname)
26 |         h.putheader('User-agent', self.userAgent)
27 |         h.endheaders()
28 |         returncode, returnmsg, headers = h.getreply()
29 |         self.results = h.getfile().read()
30 |         self.totalresults += self.results
31 |         print self.results
32 | 
33 |     def do_search_files(self, files):  # TODO
34 |         h = httplib.HTTP(self.server)
35 |         h.putrequest('GET', "/search?text=%40" + self.word +
36 |                      "&numdoc=50&lr=" + str(self.counter))
37 |         h.putheader('Host', self.hostname)
38 |         h.putheader('User-agent', self.userAgent)
39 |         h.endheaders()
40 |         returncode, returnmsg, headers = h.getreply()
41 |         self.results = h.getfile().read()
42 |         self.totalresults += self.results
43 | 
44 |     def check_next(self):
45 |         renext = re.compile('topNextUrl')
46 |         nextres = renext.findall(self.results)
47 |         if nextres != []:
48 |             nexty = "1"
49 |             print str(self.counter)
50 |         else:
51 |             nexty = "0"
52 |         return nexty
53 | 
54 |     def get_emails(self):
55 |         rawres = myparser.parser(self.totalresults, self.word)
56 |         return rawres.emails()
57 | 
58 |     def get_hostnames(self):
59 |         rawres = myparser.parser(self.totalresults, self.word)
60 |         return rawres.hostnames()
61 | 
62 |     def get_files(self):
63 |         rawres = myparser.parser(self.totalresults, self.word)
64 |         return rawres.fileurls(self.files)
65 | 
66 |     def process(self):
67 |         while self.counter <= self.limit:
68 |             self.do_search()
69 |             self.counter += 50
70 |             print "Searching " + str(self.counter) + " results..."
71 | 
72 |     def process_files(self, files):
73 |         while self.counter < self.limit:
74 |             self.do_search_files(files)
75 |             time.sleep(0.3)
76 |             self.counter += 50
77 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googlesearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import sys
 3 | import myparser
 4 | import re
 5 | import time
 6 | import requests
 7 | 
 8 | 
 9 | class search_google:
10 | 
11 |     def __init__(self, word, limit, start):
12 |         self.word = word
13 |         self.results = ""
14 |         self.totalresults = ""
15 |         self.server = "www.google.com"
16 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
17 |         self.quantity = "100"
18 |         self.limit = limit
19 |         self.counter = start
20 |   
21 |     def do_search(self):
22 |         try:
23 |             urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=%40\"" + self.word + "\""
24 |         except Exception, e:
25 |             print e
26 |         try:
27 |             r=requests.get(urly)
28 |         except Exception,e:
29 |             print e
30 |         self.results = r.content 
31 |         self.totalresults += self.results
32 | 
33 | 
34 |     def do_search_profiles(self):
35 |         try:
36 |             urly="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=site:www.google.com%20intitle:\"Google%20Profile\"%20\"Companies%20I%27ve%20worked%20for\"%20\"at%20" + self.word + "\""
37 |         except Exception, e:
38 |             print e
39 |         try:
40 |             r=requests.get(urly)
41 |         except Exception,e:
42 |             print e
43 |         self.results = r.content 
44 | 
45 |         #'&hl=en&meta=&q=site:www.google.com%20intitle:"Google%20Profile"%20"Companies%20I%27ve%20worked%20for"%20"at%20' + self.word + '"')
46 |         self.totalresults += self.results
47 | 
48 |     def get_emails(self):
49 |         rawres = myparser.parser(self.totalresults, self.word)
50 |         return rawres.emails()
51 | 
52 |     def get_hostnames(self):
53 |         rawres = myparser.parser(self.totalresults, self.word)
54 |         return rawres.hostnames()
55 | 
56 |     def get_files(self):
57 |         rawres = myparser.parser(self.totalresults, self.word)
58 |         return rawres.fileurls(self.files)
59 | 
60 |     def get_profiles(self):
61 |         rawres = myparser.parser(self.totalresults, self.word)
62 |         return rawres.profiles()
63 | 
64 |     def process(self):
65 |         while self.counter <= self.limit and self.counter <= 1000:
66 |             self.do_search()
67 |             #more = self.check_next()
68 |             time.sleep(1)
69 |             print "\tSearching " + str(self.counter) + " results..."
70 |             self.counter += 100
71 | 
72 |             
73 |     def process_profiles(self):
74 |         while self.counter < self.limit:
75 |             self.do_search_profiles()
76 |             time.sleep(0.3)
77 |             self.counter += 100
78 |             print "\tSearching " + str(self.counter) + " results..."
79 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/exaleadsearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | import re
 6 | import time
 7 | 
 8 | 
 9 | class search_exalead:
10 | 
11 |     def __init__(self, word, limit, start):
12 |         self.word = word
13 |         self.files = "pdf"
14 |         self.results = ""
15 |         self.totalresults = ""
16 |         self.server = "www.exalead.com"
17 |         self.hostname = "www.exalead.com"
18 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/4.0"
19 |         self.limit = limit
20 |         self.counter = start
21 | 
22 |     def do_search(self):
23 |         h = httplib.HTTP(self.server)
24 |         h.putrequest('GET', "/search/web/results/?q=%40" + self.word +
25 |                      "&elements_per_page=50&start_index=" + str(self.counter))
26 |         h.putheader('Host', self.hostname)
27 |         h.putheader(
28 |             'Referer',
29 |             "http://" +
30 |             self.hostname +
31 |             "/search/web/results/?q=%40" +
32 |             self.word)
33 |         h.putheader('User-agent', self.userAgent)
34 |         h.endheaders()
35 |         returncode, returnmsg, headers = h.getreply()
36 |         self.results = h.getfile().read()
37 |         self.totalresults += self.results
38 | 
39 |     def do_search_files(self, files):
40 |         h = httplib.HTTP(self.server)
41 |         h.putrequest(
42 |             'GET',
43 |             "search/web/results/?q=" +
44 |             self.word +
45 |             "filetype:" +
46 |             self.files +
47 |             "&elements_per_page=50&start_index=" +
48 |             self.counter)
49 |         h.putheader('Host', self.hostname)
50 |         h.putheader('User-agent', self.userAgent)
51 |         h.endheaders()
52 |         returncode, returnmsg, headers = h.getreply()
53 |         self.results = h.getfile().read()
54 |         self.totalresults += self.results
55 | 
56 |     def check_next(self):
57 |         renext = re.compile('topNextUrl')
58 |         nextres = renext.findall(self.results)
59 |         if nextres != []:
60 |             nexty = "1"
61 |             print str(self.counter)
62 |         else:
63 |             nexty = "0"
64 |         return nexty
65 | 
66 |     def get_emails(self):
67 |         rawres = myparser.parser(self.totalresults, self.word)
68 |         return rawres.emails()
69 | 
70 |     def get_hostnames(self):
71 |         rawres = myparser.parser(self.totalresults, self.word)
72 |         return rawres.hostnames()
73 | 
74 |     def get_files(self):
75 |         rawres = myparser.parser(self.totalresults, self.word)
76 |         return rawres.fileurls(self.files)
77 | 
78 |     def process(self):
79 |         while self.counter <= self.limit:
80 |             self.do_search()
81 |             self.counter += 50
82 |             print "\tSearching " + str(self.counter) + " results..."
83 | 
84 |     def process_files(self, files):
85 |         while self.counter < self.limit:
86 |             self.do_search_files(files)
87 |             time.sleep(1)
88 |             more = self.check_next()
89 |             if more == "1":
90 |                 self.counter += 50
91 |             else:
92 |                 break
93 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/bingsearch.py:
--------------------------------------------------------------------------------
 1 | import string
 2 | import httplib
 3 | import sys
 4 | import myparser
 5 | import re
 6 | import time
 7 | 
 8 | 
 9 | class search_bing:
10 | 
11 |     def __init__(self, word, limit, start):
12 |         self.word = word.replace(' ', '%20')
13 |         self.results = ""
14 |         self.totalresults = ""
15 |         self.server = "www.bing.com"
16 |         self.apiserver = "api.search.live.net"
17 |         self.hostname = "www.bing.com"
18 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
19 |         self.quantity = "50"
20 |         self.limit = int(limit)
21 |         self.bingApi = ""
22 |         self.counter = start
23 | 
24 |     def do_search(self):
25 |         h = httplib.HTTP(self.server)
26 |         h.putrequest('GET', "/search?q=%40" + self.word +
27 |                      "&count=50&first=" + str(self.counter))
28 |         h.putheader('Host', self.hostname)
29 |         h.putheader('Cookie', 'SRCHHPGUSR=ADLT=DEMOTE&NRSLT=50')
30 |         h.putheader('Accept-Language', 'en-us,en')
31 |         h.putheader('User-agent', self.userAgent)
32 |         h.endheaders()
33 |         returncode, returnmsg, headers = h.getreply()
34 |         self.results = h.getfile().read()
35 |         self.totalresults += self.results
36 | 
37 |     def do_search_api(self):
38 |         h = httplib.HTTP(self.apiserver)
39 |         h.putrequest('GET', "/xml.aspx?Appid=" + self.bingApi + "&query=%40" +
40 |                      self.word + "&sources=web&web.count=40&web.offset=" + str(self.counter))
41 |         h.putheader('Host', "api.search.live.net")
42 |         h.putheader('User-agent', self.userAgent)
43 |         h.endheaders()
44 |         returncode, returnmsg, headers = h.getreply()
45 |         self.results = h.getfile().read()
46 |         self.totalresults += self.results
47 | 
48 |     def do_search_vhost(self):
49 |         h = httplib.HTTP(self.server)
50 |         h.putrequest('GET', "/search?q=ip:" + self.word +
51 |                      "&go=&count=50&FORM=QBHL&qs=n&first=" + str(self.counter))
52 |         h.putheader('Host', self.hostname)
53 |         h.putheader(
54 |             'Cookie', 'mkt=en-US;ui=en-US;SRCHHPGUSR=NEWWND=0&ADLT=DEMOTE&NRSLT=50')
55 |         h.putheader('Accept-Language', 'en-us,en')
56 |         h.putheader('User-agent', self.userAgent)
57 |         h.endheaders()
58 |         returncode, returnmsg, headers = h.getreply()
59 |         self.results = h.getfile().read()
60 |         self.totalresults += self.results
61 | 
62 |     def get_emails(self):
63 |         rawres = myparser.parser(self.totalresults, self.word)
64 |         return rawres.emails()
65 | 
66 |     def get_hostnames(self):
67 |         rawres = myparser.parser(self.totalresults, self.word)
68 |         return rawres.hostnames()
69 | 
70 |     def get_allhostnames(self):
71 |         rawres = myparser.parser(self.totalresults, self.word)
72 |         return rawres.hostnames_all()
73 | 
74 |     def process(self, api):
75 |         if api == "yes":
76 |             if self.bingApi == "":
77 |                 print "Please insert your API key in the discovery/bingsearch.py"
78 |                 sys.exit()
79 |         while (self.counter < self.limit):
80 |             if api == "yes":
81 |                 self.do_search_api()
82 |                 time.sleep(0.3)
83 |             else:
84 |                 self.do_search()
85 |                 time.sleep(1)
86 |             self.counter += 50
87 |             print "\tSearching " + str(self.counter) + " results..."
88 | 
89 |     def process_vhost(self):
90 |         # Maybe it is good to use other limit for this.
91 |         while (self.counter < self.limit):
92 |             self.do_search_vhost()
93 |             self.counter += 50
94 | 


--------------------------------------------------------------------------------
/lib/theHarvester/README:
--------------------------------------------------------------------------------
  1 | *******************************************************************
  2 | *                                                                 *
  3 | * | |_| |__   ___    /\  /\__ _ _ ____   _____  ___| |_ ___ _ __  *
  4 | * | __| '_ \ / _ \  / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *
  5 | * | |_| | | |  __/ / __  / (_| | |   \ V /  __/\__ \ ||  __/ |    *
  6 | *  \__|_| |_|\___| \/ /_/ \__,_|_|    \_/ \___||___/\__\___|_|    *
  7 | *                                                                 *
  8 | * TheHarvester Ver. 2.6                                          *
  9 | * Coded by Christian Martorella                                   *
 10 | * Edge-Security Research                                          *
 11 | * cmartorella@edge-security.com                                   *
 12 | *******************************************************************
 13 | 
 14 | What is this?
 15 | -------------
 16 | 
 17 | theHarvester is a tool for gathering e-mail accounts, subdomain names, virtual
 18 | hosts, open ports/ banners, and employee names from different public sources
 19 | (search engines, pgp key servers).
 20 | 
 21 | Is a really simple tool, but very effective for the early stages of a penetration
 22 | test or just to know the visibility of your company in the Internet.
 23 | 
 24 | The sources are:
 25 | 
 26 | Passive:
 27 | --------
 28 | -google: google search engine  - www.google.com
 29 | 
 30 | -googleCSE: google custom search engine
 31 | 
 32 | -google-profiles: google search engine, specific search for Google profiles
 33 | 
 34 | -bing: microsoft search engine  - www.bing.com
 35 | 
 36 | -bingapi: microsoft search engine, through the API (you need to add your Key in
 37 |           the discovery/bingsearch.py file)
 38 | 
 39 | -pgp: pgp key server - pgp.rediris.es
 40 | 
 41 | -linkedin: google search engine, specific search for Linkedin users
 42 | 
 43 | 
 44 | -vhost: Bing virtual hosts search
 45 | 
 46 | -twitter: twitter accounts related to an specific domain (uses google search)
 47 | 
 48 | -googleplus: users that works in target company (uses google search)
 49 | 
 50 | -yahoo: Yahoo search engine
 51 | 
 52 | -baidu: Baidu search engine
 53 | 
 54 | -shodan: Shodan Computer search engine, will search for ports and banner of the
 55 |          discovered hosts  (http://www.shodanhq.com/)
 56 | 
 57 | 
 58 | Active:
 59 | -------
 60 | -DNS brute force: this plugin will run a dictionary brute force enumeration
 61 | -DNS reverse lookup: reverse lookup of ip´s discovered in order to find hostnames
 62 | -DNS TDL expansion: TLD dictionary brute force enumeration
 63 | 
 64 | 
 65 | Modules that need API keys to work:
 66 | ----------------------------------
 67 | -googleCSE: You need to create a Google Custom Search engine(CSE), and add your
 68 |  Google API key and CSE ID in the plugin (discovery/googleCSE.py)
 69 | -shodan: You need to provide your API key in discovery/shodansearch.py
 70 | 
 71 | 
 72 | Dependencies:
 73 | ------------
 74 | -Requests library (http://docs.python-requests.org/en/latest/)
 75 | `pip install requests`
 76 | 
 77 | 
 78 | Changelog in 2.6:
 79 | ------------------
 80 | -Added Yahoo and Baidu search engines. Thanks to Tatanus
 81 | -Added check for the existence of Requests library.
 82 | -Fixed email regex to provide cleaner results. Thanks to Peter McAlpine
 83 | 
 84 | Changelog in 2.5:
 85 | -----------------
 86 | -Replaced httplib by Requests http library (for Google related)
 87 | -Fixed Google searches
 88 | 
 89 | 
 90 | Comments? Bugs? Requests?
 91 | ------------------------
 92 | cmartorella@edge-security.com
 93 | 
 94 | Updates:
 95 | --------
 96 | https://github.com/laramies/theHarvester
 97 | 
 98 | Thanks:
 99 | -------
100 | John Matherly -  SHODAN project
101 | Lee Baird for suggestions and bugs reporting
102 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/googleCSE.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | import httplib
  3 | import sys
  4 | import myparser
  5 | import re
  6 | import time
  7 | 
  8 | 
  9 | class search_googleCSE:
 10 | 
 11 |     def __init__(self, word, limit, start):
 12 |         self.word = word
 13 |         self.files = "pdf"
 14 |         self.results = ""
 15 |         self.totalresults = ""
 16 |         self.server = "www.googleapis.com"
 17 |         self.hostname = "www.googleapis.com"
 18 |         self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
 19 |         self.quantity = "10"
 20 |         self.limit = limit
 21 |         self.counter = 1
 22 |         self.api_key = ""
 23 |         self.cse_id = ""
 24 |         self.lowRange = start 
 25 |         self.highRange = start+100
 26 | 
 27 |     def do_search(self):
 28 |         h = httplib.HTTPS(self.server)
 29 |         h.putrequest('GET', "/customsearch/v1?key=" + self.api_key +"&highRange=" + str(self.highRange) + "&lowRange=" + str(self.lowRange) + "&cx=" +self.cse_id +
 30 |                      "&start=" + str(self.counter) + "&q=%40\"" + self.word + "\"")
 31 |         h.putheader('Host', self.server)
 32 |         h.putheader('User-agent', self.userAgent)
 33 |         h.endheaders()
 34 |         returncode, returnmsg, headers = h.getreply()
 35 |         self.results = h.getfile().read()
 36 |         self.totalresults += self.results
 37 | 
 38 |     def do_search_files(self):
 39 |         h = httplib.HTTPS(self.server)
 40 |         h.putrequest('GET', "/customsearch/v1?key=" + self.api_key +"&highRange=" + str(self.highRange) + "&lowRange=" + str(self.lowRange) + "&cx=" +self.cse_id +
 41 |                      "&start=" + str(self.counter) + "&q=filetype:" + files +"%20site:" + self.word)
 42 |         h.putheader('Host', self.server)
 43 |         h.putheader('User-agent', self.userAgent)
 44 |         h.endheaders()
 45 |         returncode, returnmsg, headers = h.getreply()
 46 |         self.results = h.getfile().read()
 47 |         self.totalresults += self.results
 48 | 
 49 | 
 50 |     def check_next(self):
 51 |         renext = re.compile('>  Next  <')
 52 |         nextres = renext.findall(self.results)
 53 |         if nextres != []:
 54 |             nexty = "1"
 55 |         else:
 56 |             nexty = "0"
 57 |         return nexty
 58 | 
 59 |     def get_emails(self):
 60 |         rawres = myparser.parser(self.totalresults, self.word)
 61 |         return rawres.emails()
 62 | 
 63 |     def get_hostnames(self):
 64 |         rawres = myparser.parser(self.totalresults, self.word)
 65 |         return rawres.hostnames()
 66 | 
 67 |     def get_files(self):
 68 |         rawres = myparser.parser(self.totalresults, self.word)
 69 |         return rawres.fileurls(self.files)
 70 | 
 71 |    
 72 |     def process(self):
 73 |         tracker=self.counter + self.lowRange
 74 |         while tracker <= self.limit:
 75 |             self.do_search()
 76 |             #time.sleep(1)
 77 |             ESC=chr(27)
 78 |             sys.stdout.write(ESC + '[2K' + ESC+'[G')
 79 |             sys.stdout.write("\r\t" + "Searching  " + str(self.counter+self.lowRange) + " results ..." )
 80 |             sys.stdout.flush()
 81 |             #print "\tSearching " + str(self.counter+self.lowRange) + " results...\t\t\t\t\t\r"
 82 |             if self.counter == 101:
 83 |                 self.counter = 1
 84 |                 self.lowRange +=100
 85 |                 self.highRange +=100
 86 |             else:
 87 |                 self.counter += 10
 88 |             tracker=self.counter + self.lowRange
 89 |         
 90 |     def store_results(self):
 91 |              filename = "debug_results.txt"
 92 |              file = open(filename, 'w')
 93 |              file.write(self.totalresults)
 94 | 
 95 | 
 96 |     def process_files(self, files):
 97 |         while self.counter <= self.limit:
 98 |             self.do_search_files(files)
 99 |             time.sleep(1)
100 |             self.counter += 100
101 |             print "\tSearching " + str(self.counter) + " results..."
102 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/dnssearch-threads.py:
--------------------------------------------------------------------------------
  1 | import IPy
  2 | import DNS
  3 | import string
  4 | import socket
  5 | import sys
  6 | 
  7 | 
  8 | class dns_reverse():
  9 | 
 10 |     def __init__(self, range, verbose=True):
 11 |         self.range = range
 12 |         self.iplist = ''
 13 |         self.results = []
 14 |         self.verbose = verbose
 15 |         try:
 16 |             DNS.ParseResolvConf("/etc/resolv.conf")
 17 |             nameserver = DNS.defaults['server'][0]
 18 |         except:
 19 |             print "Error in DNS resolvers"
 20 |             sys.exit()
 21 | 
 22 |     def run(self, host):
 23 |         a = string.split(host, '.')
 24 |         a.reverse()
 25 |         b = string.join(a, '.') + '.in-addr.arpa'
 26 |         nameserver = DNS.defaults['server'][0]
 27 |         if self.verbose:
 28 |             ESC = chr(27)
 29 |             sys.stdout.write(ESC + '[2K' + ESC + '[G')
 30 |             sys.stdout.write("\r" + host)
 31 |             sys.stdout.flush()
 32 |         try:
 33 |             name = DNS.Base.DnsRequest(b, qtype='ptr').req().answers[0]['data']
 34 |             return host + ":" + name
 35 |         except:
 36 |             pass
 37 | 
 38 |     def get_ip_list(self, ips):
 39 |         """Generates the list of ips to reverse"""
 40 |         try:
 41 |             list = IPy.IP(ips)
 42 |         except:
 43 |             print "Error in IP format, check the input and try again. (Eg. 192.168.1.0/24)"
 44 |             sys.exit()
 45 |         name = []
 46 |         for x in list:
 47 |             name.append(str(x))
 48 |         return name
 49 | 
 50 |     def list(self):
 51 |         self.iplist = self.get_ip_list(self.range)
 52 |         return self.iplist
 53 | 
 54 |     def process(self):
 55 |         for x in self.iplist:
 56 |             host = self.run(x)
 57 |             if host is not None:
 58 |                 self.results.append(host)
 59 |         return self.results
 60 | 
 61 | 
 62 | class dns_force():
 63 | 
 64 |     def __init__(self, domain, dnsserver, verbose=False):
 65 |         self.domain = domain
 66 |         self.server = dnsserver
 67 |         self.file = "dns-names.txt"
 68 |         self.subdo = False
 69 |         self.verbose = verbose
 70 |         try:
 71 |             f = open(self.file, "r")
 72 |         except:
 73 |             print "Error opening dns dictionary file"
 74 |             sys.exit()
 75 |         self.list = f.readlines()
 76 | 
 77 |     def getdns(self, domain):
 78 |         DNS.ParseResolvConf("/etc/resolv.conf")
 79 |         nameserver = DNS.defaults['server'][0]
 80 |         dom = domain
 81 |         if self.subdo == True:
 82 |             dom = domain.split(".")
 83 |             dom.pop(0)
 84 |             rootdom = ".".join(dom)
 85 |         else:
 86 |             rootdom = dom
 87 |         if self.server == False:
 88 |             r = DNS.Request(rootdom, qtype='SOA').req()
 89 |             primary, email, serial, refresh, retry, expire, minimum = r.answers[
 90 |                 0]['data']
 91 |             test = DNS.Request(rootdom, qtype='NS', server=primary, aa=1).req()
 92 |         if test.header['status'] != "NOERROR":
 93 |             print "Error"
 94 |             sys.exit()
 95 |         self.nameserver = test.answers[0]['data']
 96 |         return self.nameserver
 97 | 
 98 |     def run(self, host):
 99 |         self.nameserver = self.getdns(self.domain)
100 |         hostname = str(host.split("\n")[0]) + "." + str(self.domain)
101 |         # nameserver=DNS.defaults['server'][0]
102 |         if self.verbose:
103 |             ESC = chr(27)
104 |             sys.stdout.write(ESC + '[2K' + ESC + '[G')
105 |             sys.stdout.write("\r" + hostname)
106 |             sys.stdout.flush()
107 |         try:
108 |             test = DNS.Request(
109 |                 hostname,
110 |                 qtype='a',
111 |                 server=self.nameserver).req(
112 |             )
113 |             hostip = test.answers[0]['data']
114 |             return hostip + ":" + hostname
115 |         except Exception as e:
116 |             pass
117 | 
118 |     def process(self):
119 |         results = []
120 |         for x in self.list:
121 |             host = self.run(x)
122 |             if host is not None:
123 |                 results.append(host)
124 |         return results
125 | 


--------------------------------------------------------------------------------
/lib/htmlExport.py:
--------------------------------------------------------------------------------
  1 | from lib import markup
  2 | from lib import graphs
  3 | import re
  4 | 
  5 | 
  6 | class htmlExport():
  7 | 
  8 |     def __init__(self, users, hosts, vhosts, dnsres,
  9 |                  dnsrev, file, domain, shodan, tldres):
 10 |         self.users = users
 11 |         self.hosts = hosts
 12 |         self.vhost = vhosts
 13 |         self.fname = file
 14 |         self.dnsres = dnsres
 15 |         self.dnsrev = dnsrev
 16 |         self.domain = domain
 17 |         self.shodan = shodan
 18 |         self.tldres = tldres
 19 |         self.style = ""
 20 | 
 21 |     def styler(self):
 22 |         a = """<style type='text/css'>body {
 23 | 			 background: #FFFFFF  top no-repeat;
 24 | 		 }
 25 | 
 26 | 		h1 { font-family: arial, Times New Roman, times-roman, georgia, serif;
 27 | 			color: #680000;
 28 | 			margin: 0;
 29 | 			padding: 0px 0px 6px 0px;
 30 | 			font-size: 51px;
 31 | 			line-height: 44px;
 32 | 			letter-spacing: -2px;
 33 | 			font-weight: bold;
 34 | 		}
 35 | 
 36 | 		h3 { font-family: arial, Times New Roman, times-roman, georgia, serif;
 37 | 			color: #444;
 38 | 			margin: 0;
 39 | 			padding: 0px 0px 6px 0px;
 40 | 			font-size: 30px;
 41 | 			line-height: 44px;
 42 | 			letter-spacing: -2px;
 43 | 			font-weight: bold;
 44 | 		}
 45 | 
 46 | 		li { font-family: arial, Times New Roman, times-roman, georgia, serif;
 47 | 			color: #444;
 48 | 			margin: 0;
 49 | 			padding: 0px 0px 6px 0px;
 50 | 			font-size: 15px;
 51 | 			line-height: 15px;
 52 | 			letter-spacing: 0.4px;
 53 | 
 54 | 		}
 55 | 
 56 | 		h2{
 57 | 		font-family: arial, Times New Roman, times-roman, georgia, serif;
 58 | 				font-size: 48px;
 59 | 				line-height: 40px;
 60 | 				letter-spacing: -1px;
 61 | 				color: #680000 ;
 62 | 				margin: 0 0 0 0;
 63 | 				padding: 0 0 0 0;
 64 | 				font-weight: 100;
 65 | 
 66 | 		}
 67 | 
 68 | 		pre {
 69 | 		overflow: auto;
 70 | 		padding-left: 15px;
 71 | 		padding-right: 15px;
 72 | 		font-size: 11px;
 73 | 		line-height: 15px;
 74 | 		margin-top: 10px;
 75 | 		width: 93%;
 76 | 		display: block;
 77 | 		background-color: #eeeeee;
 78 | 		color: #000000;
 79 | 		max-height: 300px;
 80 | 		}
 81 | 		</style>
 82 | 		"""
 83 |         self.style = a
 84 | 
 85 |     def writehtml(self):
 86 |         page = markup.page()
 87 |         # page.init (title="theHarvester
 88 |         # Results",css=('edge.css'),footer="Edge-security 2011")A
 89 |         page.html()
 90 |         self.styler()
 91 |         page.head(self.style)
 92 |         page.body()
 93 |         page.h1("theHarvester results")
 94 |         page.h2("for :" + self.domain)
 95 |         page.h3("Dashboard:")
 96 |         graph = graphs.BarGraph('vBar')
 97 |         graph.values = [len(
 98 |             self.users),
 99 |             len(self.hosts),
100 |             len(self.vhost),
101 |             len(self.tldres),
102 |             len(self.shodan)]
103 |         graph.labels = ['Emails', 'hosts', 'Vhost', 'TLD', 'Shodan']
104 |         graph.showValues = 1
105 |         page.body(graph.create())
106 |         page.h3("E-mails names found:")
107 |         if self.users != []:
108 |             page.ul(class_="userslist")
109 |             page.li(self.users, class_="useritem")
110 |             page.ul.close()
111 |         else:
112 |             page.h2("No emails found")
113 |         page.h3("Hosts found:")
114 |         if self.hosts != []:
115 |             page.ul(class_="softlist")
116 |             page.li(self.hosts, class_="softitem")
117 |             page.ul.close()
118 |         else:
119 |             page.h2("No hosts found")
120 |         if self.tldres != []:
121 |             page.h3("TLD domains found in TLD expansion:")
122 |             page.ul(class_="tldlist")
123 |             page.li(self.tldres, class_="tlditem")
124 |             page.ul.close()
125 |         if self.dnsres != []:
126 |             page.h3("Hosts found in DNS brute force:")
127 |             page.ul(class_="dnslist")
128 |             page.li(self.dnsres, class_="dnsitem")
129 |             page.ul.close()
130 |         if self.dnsrev != []:
131 |             page.h3("Hosts found with reverse lookup :")
132 |             page.ul(class_="dnsrevlist")
133 |             page.li(self.dnsrev, class_="dnsrevitem")
134 |             page.ul.close()
135 |         if self.vhost != []:
136 |             page.h3("Virtual hosts found:")
137 |             page.ul(class_="pathslist")
138 |             page.li(self.vhost, class_="pathitem")
139 |             page.ul.close()
140 |         if self.shodan != []:
141 |             shodanalysis = []
142 |             page.h3("Shodan results:")
143 |             for x in self.shodan:
144 |                 res = x.split("SAPO")
145 |                 page.h3(res[0])
146 |                 page.a("Port :" + res[2])
147 |                 page.pre(res[1])
148 |                 page.pre.close()
149 |                 ban = res[1]
150 |                 reg_server = re.compile('Server:.*')
151 |                 temp = reg_server.findall(res[1])
152 |                 if temp != []:
153 |                     shodanalysis.append(res[0] + ":" + temp[0])
154 |             if shodanalysis != []:
155 |                 page.h3("Server technologies:")
156 |                 repeated = []
157 |                 for x in shodanalysis:
158 |                     if x not in repeated:
159 |                         page.pre(x)
160 |                         page.pre.close()
161 |                         repeated.append(x)
162 |         page.body.close()
163 |         page.html.close()
164 |         file = open(self.fname, 'w')
165 |         for x in page.content:
166 |             try:
167 |                 file.write(x)
168 |             except:
169 |                 print "Exception" + x  # send to logs
170 |                 pass
171 |         file.close
172 |         return "ok"
173 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/win32dns.py:
--------------------------------------------------------------------------------
  1 | """
  2 |  $Id: win32dns.py,v 1.3.2.1 2007/05/22 20:26:49 customdesigned Exp $
  3 | 
  4 |  Extract a list of TCP/IP name servers from the registry 0.1
  5 |     0.1 Strobl 2001-07-19
  6 |  Usage:
  7 |     RegistryResolve() returns a list of ip numbers (dotted quads), by
  8 |     scouring the registry for addresses of name servers
  9 | 
 10 |  Tested on Windows NT4 Server SP6a, Windows 2000 Pro SP2 and
 11 |  Whistler Pro (XP) Build 2462 and Windows ME
 12 |  ... all having a different registry layout wrt name servers :-/
 13 | 
 14 |  Todo:
 15 | 
 16 |    Program doesn't check whether an interface is up or down
 17 | 
 18 |  (c) 2001 Copyright by Wolfgang Strobl ws@mystrobl.de,
 19 |           License analog to the current Python license
 20 | """
 21 | 
 22 | import string
 23 | import re
 24 | import _winreg
 25 | 
 26 | 
 27 | def binipdisplay(s):
 28 |     "convert a binary array of ip adresses to a python list"
 29 |     if len(s) % 4 != 0:
 30 |         raise EnvironmentError  # well ...
 31 |     ol = []
 32 |     for i in range(len(s) / 4):
 33 |         s1 = s[:4]
 34 |         s = s[4:]
 35 |         ip = []
 36 |         for j in s1:
 37 |             ip.append(str(ord(j)))
 38 |         ol.append(string.join(ip, '.'))
 39 |     return ol
 40 | 
 41 | 
 42 | def stringdisplay(s):
 43 |     '''convert "d.d.d.d,d.d.d.d" to ["d.d.d.d","d.d.d.d"].
 44 |        also handle u'd.d.d.d d.d.d.d', as reporting on SF
 45 |     '''
 46 |     import re
 47 |     return map(str, re.split("[ ,]", s))
 48 | 
 49 | 
 50 | def RegistryResolve():
 51 |     nameservers = []
 52 |     x = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
 53 |     try:
 54 |         y = _winreg.OpenKey(x,
 55 |                             r"SYSTEM\CurrentControlSet\Services\Tcpip\Parameters")
 56 |     except EnvironmentError:  # so it isn't NT/2000/XP
 57 |         # windows ME, perhaps?
 58 |         try:  # for Windows ME
 59 |             y = _winreg.OpenKey(x,
 60 |                                 r"SYSTEM\CurrentControlSet\Services\VxD\MSTCP")
 61 |             nameserver, dummytype = _winreg.QueryValueEx(y, 'NameServer')
 62 |             if nameserver and not (nameserver in nameservers):
 63 |                 nameservers.extend(stringdisplay(nameserver))
 64 |         except EnvironmentError:
 65 |             pass
 66 |         return nameservers  # no idea
 67 |     try:
 68 |         nameserver = _winreg.QueryValueEx(y, "DhcpNameServer")[0].split()
 69 |     except:
 70 |         nameserver = _winreg.QueryValueEx(y, "NameServer")[0].split()
 71 |     if nameserver:
 72 |         nameservers = nameserver
 73 |     nameserver = _winreg.QueryValueEx(y, "NameServer")[0]
 74 |     _winreg.CloseKey(y)
 75 |     try:  # for win2000
 76 |         y = _winreg.OpenKey(x,
 77 |                             r"SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\DNSRegisteredAdapters")
 78 |         for i in range(1000):
 79 |             try:
 80 |                 n = _winreg.EnumKey(y, i)
 81 |                 z = _winreg.OpenKey(y, n)
 82 |                 dnscount, dnscounttype = _winreg.QueryValueEx(z,
 83 |                                                               'DNSServerAddressCount')
 84 |                 dnsvalues, dnsvaluestype = _winreg.QueryValueEx(z,
 85 |                                                                 'DNSServerAddresses')
 86 |                 nameservers.extend(binipdisplay(dnsvalues))
 87 |                 _winreg.CloseKey(z)
 88 |             except EnvironmentError:
 89 |                 break
 90 |         _winreg.CloseKey(y)
 91 |     except EnvironmentError:
 92 |         pass
 93 | #
 94 |     try:  # for whistler
 95 |         y = _winreg.OpenKey(x,
 96 |                             r"SYSTEM\CurrentControlSet\Services\Tcpip\Parameters\Interfaces")
 97 |         for i in range(1000):
 98 |             try:
 99 |                 n = _winreg.EnumKey(y, i)
100 |                 z = _winreg.OpenKey(y, n)
101 |                 try:
102 |                     nameserver, dummytype = _winreg.QueryValueEx(
103 |                         z, 'NameServer')
104 |                     if nameserver and not (nameserver in nameservers):
105 |                         nameservers.extend(stringdisplay(nameserver))
106 |                 except EnvironmentError:
107 |                     pass
108 |                 _winreg.CloseKey(z)
109 |             except EnvironmentError:
110 |                 break
111 |         _winreg.CloseKey(y)
112 |     except EnvironmentError:
113 |         # print "Key Interfaces not found, just do nothing"
114 |         pass
115 | #
116 |     _winreg.CloseKey(x)
117 |     return nameservers
118 | 
119 | if __name__ == "__main__":
120 |     print "Name servers:", RegistryResolve()
121 | 
122 | #
123 | # $Log: win32dns.py,v $
124 | # Revision 1.3.2.1  2007/05/22 20:26:49  customdesigned
125 | # Fix win32 nameserver discovery.
126 | #
127 | # Revision 1.3  2002/05/06 06:15:31  anthonybaxter
128 | # apparently some versions of windows return servers as unicode
129 | # string with space sep, rather than strings with comma sep.
130 | # *sigh*
131 | #
132 | # Revision 1.2  2002/03/19 12:41:33  anthonybaxter
133 | # tabnannied and reindented everything. 4 space indent, no tabs.
134 | # yay.
135 | #
136 | # Revision 1.1  2001/08/09 09:22:28  anthonybaxter
137 | # added what I hope is win32 resolver lookup support. I'll need to try
138 | # and figure out how to get the CVS checkout onto my windows machine to
139 | # make sure it works (wow, doing something other than games on the
140 | # windows machine :)
141 | #
142 | # Code from Wolfgang.Strobl@gmd.de
143 | # win32dns.py from
144 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66260
145 | #
146 | # Really, ParseResolvConf() should be renamed "FindNameServers" or
147 | # some such.
148 | #
149 | #
150 | 


--------------------------------------------------------------------------------
/lib/theHarvester/lib/htmlExport.py:
--------------------------------------------------------------------------------
  1 | from lib import markup
  2 | from lib import graphs
  3 | import re
  4 | 
  5 | 
  6 | class htmlExport():
  7 | 
  8 |     def __init__(self, users, hosts, vhosts, dnsres,
  9 |                  dnsrev, file, domain, shodan, tldres):
 10 |         self.users = users
 11 |         self.hosts = hosts
 12 |         self.vhost = vhosts
 13 |         self.fname = file
 14 |         self.dnsres = dnsres
 15 |         self.dnsrev = dnsrev
 16 |         self.domain = domain
 17 |         self.shodan = shodan
 18 |         self.tldres = tldres
 19 |         self.style = ""
 20 | 
 21 |     def styler(self):
 22 |         a = """<style type='text/css'>body {
 23 | 			 background: #FFFFFF  top no-repeat;
 24 | 		 }
 25 | 
 26 | 		h1 { font-family: arial, Times New Roman, times-roman, georgia, serif;
 27 | 			color: #680000;
 28 | 			margin: 0;
 29 | 			padding: 0px 0px 6px 0px;
 30 | 			font-size: 51px;
 31 | 			line-height: 44px;
 32 | 			letter-spacing: -2px;
 33 | 			font-weight: bold;
 34 | 		}
 35 | 
 36 | 		h3 { font-family: arial, Times New Roman, times-roman, georgia, serif;
 37 | 			color: #444;
 38 | 			margin: 0;
 39 | 			padding: 0px 0px 6px 0px;
 40 | 			font-size: 30px;
 41 | 			line-height: 44px;
 42 | 			letter-spacing: -2px;
 43 | 			font-weight: bold;
 44 | 		}
 45 | 
 46 | 		li { font-family: arial, Times New Roman, times-roman, georgia, serif;
 47 | 			color: #444;
 48 | 			margin: 0;
 49 | 			padding: 0px 0px 6px 0px;
 50 | 			font-size: 15px;
 51 | 			line-height: 15px;
 52 | 			letter-spacing: 0.4px;
 53 | 
 54 | 		}
 55 | 
 56 | 		h2{
 57 | 		font-family: arial, Times New Roman, times-roman, georgia, serif;
 58 | 				font-size: 48px;
 59 | 				line-height: 40px;
 60 | 				letter-spacing: -1px;
 61 | 				color: #680000 ;
 62 | 				margin: 0 0 0 0;
 63 | 				padding: 0 0 0 0;
 64 | 				font-weight: 100;
 65 | 
 66 | 		}
 67 | 
 68 | 		pre {
 69 | 		overflow: auto;
 70 | 		padding-left: 15px;
 71 | 		padding-right: 15px;
 72 | 		font-size: 11px;
 73 | 		line-height: 15px;
 74 | 		margin-top: 10px;
 75 | 		width: 93%;
 76 | 		display: block;
 77 | 		background-color: #eeeeee;
 78 | 		color: #000000;
 79 | 		max-height: 300px;
 80 | 		}
 81 | 		</style>
 82 | 		"""
 83 |         self.style = a
 84 | 
 85 |     def writehtml(self):
 86 |         page = markup.page()
 87 |         # page.init (title="theHarvester
 88 |         # Results",css=('edge.css'),footer="Edge-security 2011")A
 89 |         page.html()
 90 |         self.styler()
 91 |         page.head(self.style)
 92 |         page.body()
 93 |         page.h1("theHarvester results")
 94 |         page.h2("for :" + self.domain)
 95 |         page.h3("Dashboard:")
 96 |         graph = graphs.BarGraph('vBar')
 97 |         graph.values = [len(
 98 |             self.users),
 99 |             len(self.hosts),
100 |             len(self.vhost),
101 |             len(self.tldres),
102 |             len(self.shodan)]
103 |         graph.labels = ['Emails', 'hosts', 'Vhost', 'TLD', 'Shodan']
104 |         graph.showValues = 1
105 |         page.body(graph.create())
106 |         page.h3("E-mails names found:")
107 |         if self.users != []:
108 |             page.ul(class_="userslist")
109 |             page.li(self.users, class_="useritem")
110 |             page.ul.close()
111 |         else:
112 |             page.h2("No emails found")
113 |         page.h3("Hosts found:")
114 |         if self.hosts != []:
115 |             page.ul(class_="softlist")
116 |             page.li(self.hosts, class_="softitem")
117 |             page.ul.close()
118 |         else:
119 |             page.h2("No hosts found")
120 |         if self.tldres != []:
121 |             page.h3("TLD domains found in TLD expansion:")
122 |             page.ul(class_="tldlist")
123 |             page.li(self.tldres, class_="tlditem")
124 |             page.ul.close()
125 |         if self.dnsres != []:
126 |             page.h3("Hosts found in DNS brute force:")
127 |             page.ul(class_="dnslist")
128 |             page.li(self.dnsres, class_="dnsitem")
129 |             page.ul.close()
130 |         if self.dnsrev != []:
131 |             page.h3("Hosts found with reverse lookup :")
132 |             page.ul(class_="dnsrevlist")
133 |             page.li(self.dnsrev, class_="dnsrevitem")
134 |             page.ul.close()
135 |         if self.vhost != []:
136 |             page.h3("Virtual hosts found:")
137 |             page.ul(class_="pathslist")
138 |             page.li(self.vhost, class_="pathitem")
139 |             page.ul.close()
140 |         if self.shodan != []:
141 |             shodanalysis = []
142 |             page.h3("Shodan results:")
143 |             for x in self.shodan:
144 |                 res = x.split("SAPO")
145 |                 page.h3(res[0])
146 |                 page.a("Port :" + res[2])
147 |                 page.pre(res[1])
148 |                 page.pre.close()
149 |                 ban = res[1]
150 |                 reg_server = re.compile('Server:.*')
151 |                 temp = reg_server.findall(res[1])
152 |                 if temp != []:
153 |                     shodanalysis.append(res[0] + ":" + temp[0])
154 |             if shodanalysis != []:
155 |                 page.h3("Server technologies:")
156 |                 repeated = []
157 |                 for x in shodanalysis:
158 |                     if x not in repeated:
159 |                         page.pre(x)
160 |                         page.pre.close()
161 |                         repeated.append(x)
162 |         page.body.close()
163 |         page.html.close()
164 |         file = open(self.fname, 'w')
165 |         for x in page.content:
166 |             try:
167 |                 file.write(x)
168 |             except:
169 |                 print "Exception" + x  # send to logs
170 |                 pass
171 |         file.close
172 |         return "ok"
173 | 


--------------------------------------------------------------------------------
/snoop.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Small tool to take in a list of domains and spit out emails and potential issues
  6 | Work smarter, not harder
  7 | 
  8 | Chris Maddalena
  9 | """
 10 | 
 11 | import sys
 12 | import os
 13 | #from lib import *
 14 | import pwnedcheck
 15 | import urllib2
 16 | 
 17 | sys.path.append('lib/theharvester/')
 18 | from theHarvester import *
 19 | 
 20 | def main():
 21 | 	# Clear the terminal window
 22 | 	os.system('cls' if os.name == 'nt' else 'clear')
 23 | 	# Main menu display
 24 | 	try:
 25 | 		domainList = sys.argv[1]
 26 | 	except Exception as e:
 27 | 		print "ERROR: You must supply only an input text file!"
 28 | 		print "ERROR: %s" % e
 29 | 
 30 | 	print "[+] Trying to read %s" % domainList
 31 | 	try:
 32 | 		with open(domainList, 'r') as domains:
 33 | 			for domain in domains:
 34 | 				print "[+] Checking %s" % domain.rstrip()
 35 | 				harvest(domain)
 36 | 	except Exception as e:
 37 | 		print "[!] Could not open your file, %s" % domainList
 38 | 		print "ERROR: %s" % e
 39 | 
 40 | # Number of commands
 41 | total = 2 # Tests
 42 | harvesterDomains = 6 # Search engines used with theHarvester
 43 | # Headers for use with urllib2
 44 | user_agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"
 45 | headers = { 'User-Agent' : user_agent }
 46 | 
 47 | def harvest(domain):
 48 | 
 49 | 	domain = domain.rstrip()
 50 | 	harvestLimit = 100
 51 | 	harvestStart = 0
 52 | 	# Create drectory for client reports and report
 53 | 	if not os.path.exists("reports/%s" % domain):
 54 | 		try:
 55 | 			os.makedirs("reports/%s" % domain)
 56 | 		except Exception as e:
 57 | 			print "[!] Could not create reports directory!"
 58 | 			print "ERROR: %s" % e
 59 | 
 60 | 	file = "reports/%s/%s" % (domain, domain + ".txt")
 61 | 
 62 | 	print "[+] Running The Harvester (1/%s)" % total
 63 | 	# Search through most of Harvester's supported engines
 64 | 	# No Baidu because it always seems to hang or take way too long
 65 | 	print "[-] Harvesting Google (1/%s)" % harvesterDomains
 66 | 	search = googlesearch.search_google(domain,harvestLimit,harvestStart)
 67 | 	search.process()
 68 | 	googleHarvest = search.get_emails()
 69 | 	print "[-] Harvesting LinkedIn (2/%s)" % harvesterDomains
 70 | 	search = linkedinsearch.search_linkedin(domain,harvestLimit)
 71 | 	search.process()
 72 | 	linkHarvest = search.get_people()
 73 | 	print "[-] Harvesting Twitter (3/%s)" % harvesterDomains
 74 | 	search = twittersearch.search_twitter(domain,harvestLimit)
 75 | 	search.process()
 76 | 	twitHarvest = search.get_people()
 77 | 	print "[-] Harvesting Yahoo (4/%s)" % harvesterDomains
 78 | 	search = yahoosearch.search_yahoo(domain,harvestLimit)
 79 | 	search.process()
 80 | 	yahooHarvest = search.get_emails()
 81 | 	print "[-] Harvesting Bing (5/%s)" % harvesterDomains
 82 | 	search = bingsearch.search_bing(domain,harvestLimit,#harvestStart)
 83 | 	search.process('no')
 84 | 	bingHarvest = search.get_emails()
 85 | 	print "[-] Harvesting Jigsaw (6/%s)" % harvesterDomains
 86 | 	search = jigsaw.search_jigsaw(domain,harvestLimit)
 87 | 	search.process()
 88 | 	jigsawHarvest = search.get_people()
 89 | 
 90 | 	# Combine lists and strip out duplicate findings for unique lists
 91 | 	totalEmails = googleHarvest #+ bingHarvest + yahooHarvest
 92 | 	temp = []
 93 | 	for email in totalEmails:
 94 | 		email = email.lower()
 95 | 		temp.append(email)
 96 | 	unique = set(temp)
 97 | 	uniqueEmails = list(unique)
 98 | 	# Do the same with people, but keep Twitter handles separate
 99 | 	totalPeople = linkHarvest + jigsawHarvest
100 | 	unique = set(totalPeople)
101 | 	uniquePeople = list(unique)
102 | 	# Process Twitter handles to kill duplicates
103 | 	handles = []
104 | 	for twit in twitHarvest:
105 | 		# Split handle from account description and strip rogue periods
106 | 		handle = twit.split(' ')[0]
107 | 		handle = handle.rstrip('.')
108 | 		handles.append(handle.lower())
109 | 	unique = set(handles)
110 | 	uniqueTwitter = list(unique)
111 | 
112 | 	print "[+] Harvester found a total of %s emails and %s names across all engines" % (len(uniqueEmails),len(uniquePeople) + len(uniqueTwitter))
113 | 	print "[+] Running emails through HaveIBeenPwned and writing report (2/%s)" % total
114 | 	with open(file, 'w') as report:
115 | 		report.write("### Email & People Report for %s ###\n" % domain)
116 | 		report.write("---THEHARVESTER Results---\n")
117 | 		report.write("Emails checked with HaveIBeenPwned for breaches and pastes\n")
118 | 		for email in uniqueEmails:
119 | 			# Make sure we drop that @domain.com result Harvester always includes
120 | 			if email == '@' + domain:
121 | 				pass
122 | 			else:
123 | 				report.write('\n' + 'Email: ' + email + '\n')
124 | 				report.write('Pwned: ')
125 | 				# Check haveibeenpwned data breaches
126 | 				try:
127 | 					pwned = pwnedcheck.check(email)
128 | 				except:
129 | 					print "[!] Could not parse JSON. Moving on..."
130 | 				# If no results for breaches we return None
131 | 				if not pwned:
132 | 					report.write('None' + '\n')
133 | 				else:
134 | 					report.write('\n')
135 | 					for pwn in pwned:
136 | 						report.write('+ ' + pwn + '\n')
137 | 				# Check haveibeenpwned for pastes from Pastebin, Pastie, Slexy, Ghostbin, QuickLeak, JustPaste, and AdHocUrl
138 | 				url = "https://haveibeenpwned.com/api/v2/pasteaccount/" + email
139 | 				page = urllib2.Request(url, None, headers)
140 | 				# We must use Try because an empty result is like a 404 and causes an error
141 | 				try:
142 | 					source = urllib2.urlopen(page).read()
143 | 					report.write("Pastes: " + source + "\n")
144 | 				except:
145 | 					report.write("Pastes: No pastes\n")
146 | 
147 | 		report.write("\n---PEOPLE Results---\n")
148 | 		report.write("Names and social media accounts (Twitter and LinkedIn)\n\n")
149 | 		for person in uniquePeople:
150 | 			report.write('Name: ' + person + '\n')
151 | 		for twit in uniqueTwitter:
152 | 			# Drop the lonely @ Harvester often includes
153 | 			if twit == '@':
154 | 				pass
155 | 			else:
156 | 				report.write('Twitter: ' + twit + '\n')
157 | 
158 | 	report.close()
159 | 
160 | 
161 | if __name__ == "__main__":
162 | 	main()
163 | 


--------------------------------------------------------------------------------
/lib/theHarvester/myparser.py:
--------------------------------------------------------------------------------
  1 | import string
  2 | import re
  3 | 
  4 | 
  5 | class parser:
  6 | 
  7 |     def __init__(self, results, word):
  8 |         self.results = results
  9 |         self.word = word
 10 |         self.temp = []
 11 | 
 12 |     def genericClean(self):
 13 |         self.results = re.sub('<em>', '', self.results)
 14 |         self.results = re.sub('<b>', '', self.results)
 15 |         self.results = re.sub('</b>', '', self.results)
 16 |         self.results = re.sub('</em>', '', self.results)
 17 |         self.results = re.sub('%2f', ' ', self.results)
 18 |         self.results = re.sub('%3a', ' ', self.results)
 19 |         self.results = re.sub('<strong>', '', self.results)
 20 |         self.results = re.sub('</strong>', '', self.results)
 21 | 
 22 |         for e in ('>', ':', '=', '<', '/', '\\', ';', '&', '%3A', '%3D', '%3C'):
 23 |             self.results = string.replace(self.results, e, ' ')
 24 | 
 25 |     def urlClean(self):
 26 |         self.results = re.sub('<em>', '', self.results)
 27 |         self.results = re.sub('</em>', '', self.results)
 28 |         self.results = re.sub('%2f', ' ', self.results)
 29 |         self.results = re.sub('%3a', ' ', self.results)
 30 | 
 31 |         for e in ('<', '>', ':', '=', ';', '&', '%3A', '%3D', '%3C'):
 32 |             self.results = string.replace(self.results, e, ' ')
 33 | 
 34 |     def emails(self):
 35 |         self.genericClean()
 36 |         reg_emails = re.compile(
 37 |             '[a-zA-Z0-9.-_]*' +
 38 |             '@' +
 39 |             '(?:[a-zA-Z0-9.-]*\.)?' +
 40 |             self.word)
 41 |         self.temp = reg_emails.findall(self.results)
 42 |         emails = self.unique()
 43 |         return emails
 44 | 
 45 |     def fileurls(self, file):
 46 |         urls = []
 47 |         reg_urls = re.compile('<a href="(.*?)"')
 48 |         self.temp = reg_urls.findall(self.results)
 49 |         allurls = self.unique()
 50 |         for x in allurls:
 51 |             if x.count('webcache') or x.count('google.com') or x.count('search?hl'):
 52 |                 pass
 53 |             else:
 54 |                 urls.append(x)
 55 |         return urls
 56 | 
 57 |     def people_googleplus(self):
 58 |         self.results = re.sub('</b>', '', self.results)
 59 |         self.results = re.sub('<b>', '', self.results)
 60 |         reg_people = re.compile('>[a-zA-Z0-9._ ]* - Google\+')
 61 |         #reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
 62 |         self.temp = reg_people.findall(self.results)
 63 |         resul = []
 64 |         for x in self.temp:
 65 |             y = string.replace(x, ' | LinkedIn', '')
 66 |             y = string.replace(y, ' profiles ', '')
 67 |             y = string.replace(y, 'LinkedIn', '')
 68 |             y = string.replace(y, '"', '')
 69 |             y = string.replace(y, '>', '')
 70 |             if y != " ":
 71 |                 resul.append(y)
 72 |         return resul
 73 | 
 74 | 
 75 | 
 76 |     def people_twitter(self):
 77 |         reg_people = re.compile('(@[a-zA-Z0-9._ -]*)')
 78 |         #reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
 79 |         self.temp = reg_people.findall(self.results)
 80 |         users = self.unique()
 81 |         resul = []
 82 |         for x in users:
 83 |             y = string.replace(x, ' | LinkedIn', '')
 84 |             y = string.replace(y, ' profiles ', '')
 85 |             y = string.replace(y, 'LinkedIn', '')
 86 |             y = string.replace(y, '"', '')
 87 |             y = string.replace(y, '>', '')
 88 |             if y != " ":
 89 |                 resul.append(y)
 90 |         return resul
 91 | 
 92 |     def people_linkedin(self):
 93 |         reg_people = re.compile('">[a-zA-Z0-9._ -]* \| LinkedIn')
 94 |         #reg_people = re.compile('">[a-zA-Z0-9._ -]* profiles | LinkedIn')
 95 |         self.temp = reg_people.findall(self.results)
 96 |         resul = []
 97 |         for x in self.temp:
 98 |             y = string.replace(x, ' | LinkedIn', '')
 99 |             y = string.replace(y, ' profiles ', '')
100 |             y = string.replace(y, 'LinkedIn', '')
101 |             y = string.replace(y, '"', '')
102 |             y = string.replace(y, '>', '')
103 |             if y != " ":
104 |                 resul.append(y)
105 |         return resul
106 | 
107 |     def profiles(self):
108 |         reg_people = re.compile('">[a-zA-Z0-9._ -]* - <em>Google Profile</em>')
109 |         self.temp = reg_people.findall(self.results)
110 |         resul = []
111 |         for x in self.temp:
112 |             y = string.replace(x, ' <em>Google Profile</em>', '')
113 |             y = string.replace(y, '-', '')
114 |             y = string.replace(y, '">', '')
115 |             if y != " ":
116 |                 resul.append(y)
117 |         return resul
118 | 
119 |     def people_jigsaw(self):
120 |         res = []
121 |         #reg_people = re.compile("'tblrow' title='[a-zA-Z0-9.-]*'><span class='nowrap'/>")
122 |         reg_people = re.compile(
123 |             "href=javascript:showContact\('[0-9]*'\)>[a-zA-Z0-9., ]*</a></span>")
124 |         self.temp = reg_people.findall(self.results)
125 |         for x in self.temp:
126 |             a = x.split('>')[1].replace("</a", "")
127 |             res.append(a)
128 |         return res
129 | 
130 |     def hostnames(self):
131 |         self.genericClean()
132 |         reg_hosts = re.compile('[a-zA-Z0-9.-]*\.' + self.word)
133 |         self.temp = reg_hosts.findall(self.results)
134 |         hostnames = self.unique()
135 |         return hostnames
136 | 
137 |     def set(self):
138 |         reg_sets = re.compile('>[a-zA-Z0-9]*</a></font>')
139 |         self.temp = reg_sets.findall(self.results)
140 |         sets = []
141 |         for x in self.temp:
142 |             y = string.replace(x, '>', '')
143 |             y = string.replace(y, '</a</font', '')
144 |             sets.append(y)
145 |         return sets
146 | 
147 |     def hostnames_all(self):
148 |         reg_hosts = re.compile('<cite>(.*?)</cite>')
149 |         temp = reg_hosts.findall(self.results)
150 |         for x in temp:
151 |             if x.count(':'):
152 |                 res = x.split(':')[1].split('/')[2]
153 |             else:
154 |                 res = x.split("/")[0]
155 |             self.temp.append(res)
156 |         hostnames = self.unique()
157 |         return hostnames
158 | 
159 |     def unique(self):
160 |         self.new = []
161 |         for x in self.temp:
162 |             if x not in self.new:
163 |                 self.new.append(x)
164 |         return self.new
165 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/shodan/api.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     from json import dumps, loads
  3 | except:
  4 |     from simplejson import dumps, loads
  5 | from urllib2 import urlopen
  6 | from urllib import urlencode
  7 | 
  8 | __all__ = ['WebAPI']
  9 | 
 10 | 
 11 | class WebAPIError(Exception):
 12 | 
 13 |     def __init__(self, value):
 14 |         self.value = value
 15 | 
 16 |     def __str__(self):
 17 |         return self.value
 18 | 
 19 | 
 20 | class WebAPI:
 21 | 
 22 |     """Wrapper around the SHODAN webservices API"""
 23 | 
 24 |     class DatalossDb:
 25 | 
 26 |         def __init__(self, parent):
 27 |             self.parent = parent
 28 | 
 29 |         def search(self, **kwargs):
 30 |             """Search the Dataloss DB archive.
 31 | 
 32 |             Arguments:
 33 |             name          -- Name of the affected company/ organisation
 34 | 
 35 |             arrest        -- whether the incident resulted in an arrest
 36 |             breaches      -- the type of breach that occurred (Hack, MissingLaptop etc.)
 37 |             country       -- country where the incident took place
 38 |             ext           -- whether an external, third party was affected
 39 |             ext_names     -- the name of the third party company that was affected
 40 |             lawsuit       -- whether the incident resulted in a lawsuit
 41 |             records       -- the number of records that were lost/ stolen
 42 |             recovered     -- whether the affected items were recovered
 43 |             sub_types     -- the sub-categorization of the affected company/ organization
 44 |             source        -- whether the incident occurred from inside or outside the organization
 45 |             stocks        -- stock symbol of the affected company
 46 |             types         -- the basic type of organization (government, business, educational)
 47 |             uid           -- unique ID for the incident
 48 | 
 49 |             Returns:
 50 |             A dictionary with 2 main items: matches (list) and total (int).
 51 | 
 52 |             """
 53 |             return self.parent._request('datalossdb/search', dict(**kwargs))
 54 | 
 55 |     class Exploits:
 56 | 
 57 |         def __init__(self, parent):
 58 |             self.parent = parent
 59 | 
 60 |         def search(self, query, sources=[],
 61 |                    cve=None, osvdb=None, msb=None, bid=None):
 62 |             """Search the entire Shodan Exploits archive using the same query syntax
 63 |             as the website.
 64 | 
 65 |             Arguments:
 66 |             query    -- exploit search query; same syntax as website
 67 | 
 68 |             Optional arguments:
 69 |             sources  -- metasploit, cve, osvdb, exploitdb, or packetstorm
 70 |             cve      -- CVE identifier (ex. 2010-0432)
 71 |             osvdb    -- OSVDB identifier (ex. 11666)
 72 |             msb      -- Microsoft Security Bulletin ID (ex. MS05-030)
 73 |             bid      -- Bugtraq identifier (ex. 13951)
 74 | 
 75 |             """
 76 |             if sources:
 77 |                 query += ' source:' + ','.join(sources)
 78 |             if cve:
 79 |                 query += ' cve:%s' % (str(cve).strip())
 80 |             if osvdb:
 81 |                 query += ' osvdb:%s' % (str(osvdb).strip())
 82 |             if msb:
 83 |                 query += ' msb:%s' % (str(msb).strip())
 84 |             if bid:
 85 |                 query += ' bid:%s' % (str(bid).strip())
 86 |             return self.parent._request('search_exploits', {'q': query})
 87 | 
 88 |     class ExploitDb:
 89 | 
 90 |         def __init__(self, parent):
 91 |             self.parent = parent
 92 | 
 93 |         def download(self, id):
 94 |             """Download the exploit code from the ExploitDB archive.
 95 | 
 96 |             Arguments:
 97 |             id    -- ID of the ExploitDB entry
 98 | 
 99 |             Returns:
100 |             A dictionary with the following fields:
101 |             filename        -- Name of the file
102 |             content-type    -- Mimetype
103 |             data            -- Contents of the file
104 | 
105 |             """
106 |             return self.parent._request('exploitdb/download', {'id': id})
107 | 
108 |         def search(self, query, **kwargs):
109 |             """Search the ExploitDB archive.
110 | 
111 |             Arguments:
112 |             query     -- Search terms
113 | 
114 |             Optional arguments:
115 |             author    -- Name of the exploit submitter
116 |             platform  -- Target platform (e.g. windows, linux, hardware etc.)
117 |             port      -- Service port number
118 |             type      -- Any, dos, local, papers, remote, shellcode and webapps
119 | 
120 |             Returns:
121 |             A dictionary with 2 main items: matches (list) and total (int).
122 |             Each item in 'matches' is a dictionary with the following elements:
123 | 
124 |             id
125 |             author
126 |             date
127 |             description
128 |             platform
129 |             port
130 |             type
131 | 
132 |             """
133 |             return (
134 |                 self.parent._request(
135 |                     'exploitdb/search', dict(q=query, **kwargs))
136 |             )
137 | 
138 |     class Msf:
139 | 
140 |         def __init__(self, parent):
141 |             self.parent = parent
142 | 
143 |         def download(self, id):
144 |             """Download a metasploit module given the fullname (id) of it.
145 | 
146 |             Arguments:
147 |             id        -- fullname of the module (ex. auxiliary/admin/backupexec/dump)
148 | 
149 |             Returns:
150 |             A dictionary with the following fields:
151 |             filename        -- Name of the file
152 |             content-type    -- Mimetype
153 |             data            -- File content
154 |             """
155 |             return self.parent._request('msf/download', {'id': id})
156 | 
157 |         def search(self, query, **kwargs):
158 |             """Search for a Metasploit module.
159 |             """
160 |             return self.parent._request('msf/search', dict(q=query, **kwargs))
161 | 
162 |     def __init__(self, key):
163 |         """Initializes the API object.
164 | 
165 |         Arguments:
166 |         key -- your API key
167 | 
168 |         """
169 |         self.api_key = key
170 |         self.base_url = 'http://www.shodanhq.com/api/'
171 |         self.dataloss = self.DatalossDb(self)
172 |         self.exploits = self.Exploits(self)
173 |         self.exploitdb = self.ExploitDb(self)
174 |         self.msf = self.Msf(self)
175 | 
176 |     def _request(self, function, params):
177 |         """General-purpose function to create web requests to SHODAN.
178 | 
179 |         Arguments:
180 |         function  -- name of the function you want to execute
181 |         params      -- dictionary of parameters for the function
182 | 
183 |         Returns
184 |         A JSON string containing the function's results.
185 | 
186 |         """
187 |         # Add the API key parameter automatically
188 |         params['key'] = self.api_key
189 | 
190 |         # Send the request
191 |         data = urlopen(
192 |             self.base_url +
193 |             function +
194 |             '?' +
195 |             urlencode(
196 |                 params)).read(
197 |         )
198 | 
199 |         # Parse the text into JSON
200 |         data = loads(data)
201 | 
202 |         # Raise an exception if an error occurred
203 |         if data.get('error', None):
204 |             raise WebAPIError(data['error'])
205 | 
206 |         # Return the data
207 |         return data
208 | 
209 |     def fingerprint(self, banner):
210 |         """Determine the software based on the banner.
211 | 
212 |         Arguments:
213 |         banner  - HTTP banner
214 | 
215 |         Returns:
216 |         A list of software that matched the given banner.
217 |         """
218 |         return self._request('fingerprint', {'banner': banner})
219 | 
220 |     def host(self, ip):
221 |         """Get all available information on an IP.
222 | 
223 |         Arguments:
224 |         ip    -- IP of the computer
225 | 
226 |         Returns:
227 |         All available information SHODAN has on the given IP,
228 |         subject to API key restrictions.
229 | 
230 |         """
231 |         return self._request('host', {'ip': ip})
232 | 
233 |     def search(self, query):
234 |         """Search the SHODAN database.
235 | 
236 |         Arguments:
237 |         query    -- search query; identical syntax to the website
238 | 
239 |         Returns:
240 |         A dictionary with 3 main items: matches, countries and total.
241 |         Visit the website for more detailed information.
242 | 
243 |         """
244 |         return self._request('search', {'q': query})
245 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/dnssearch.py:
--------------------------------------------------------------------------------
  1 | import IPy
  2 | import DNS
  3 | import string
  4 | import socket
  5 | import sys
  6 | 
  7 | 
  8 | class dns_reverse():
  9 | 
 10 |     def __init__(self, range, verbose=True):
 11 |         self.range = range
 12 |         self.iplist = ''
 13 |         self.results = []
 14 |         self.verbose = verbose
 15 |         try:
 16 |             DNS.ParseResolvConf("/etc/resolv.conf")
 17 |             nameserver = DNS.defaults['server'][0]
 18 |         except:
 19 |             print "Error in DNS resolvers"
 20 |             sys.exit()
 21 | 
 22 |     def run(self, host):
 23 |         a = string.split(host, '.')
 24 |         a.reverse()
 25 |         b = string.join(a, '.') + '.in-addr.arpa'
 26 |         nameserver = DNS.defaults['server'][0]
 27 |         if self.verbose:
 28 |             ESC = chr(27)
 29 |             sys.stdout.write(ESC + '[2K' + ESC + '[G')
 30 |             sys.stdout.write("\r\t" + host)
 31 |             sys.stdout.flush()
 32 |         try:
 33 |             name = DNS.Base.DnsRequest(b, qtype='ptr').req().answers[0]['data']
 34 |             return host + ":" + name
 35 |         except:
 36 |             pass
 37 | 
 38 |     def get_ip_list(self, ips):
 39 |         """Generates the list of ips to reverse"""
 40 |         try:
 41 |             list = IPy.IP(ips)
 42 |         except:
 43 |             print "Error in IP format, check the input and try again. (Eg. 192.168.1.0/24)"
 44 |             sys.exit()
 45 |         name = []
 46 |         for x in list:
 47 |             name.append(str(x))
 48 |         return name
 49 | 
 50 |     def list(self):
 51 |         self.iplist = self.get_ip_list(self.range)
 52 |         return self.iplist
 53 | 
 54 |     def process(self):
 55 |         for x in self.iplist:
 56 |             host = self.run(x)
 57 |             if host is not None:
 58 |                 self.results.append(host)
 59 |         return self.results
 60 | 
 61 | 
 62 | class dns_force():
 63 | 
 64 |     def __init__(self, domain, dnsserver, verbose=False):
 65 |         self.domain = domain
 66 |         self.nameserver = dnsserver
 67 |         self.file = "dns-names.txt"
 68 |         self.subdo = False
 69 |         self.verbose = verbose
 70 |         try:
 71 |             f = open(self.file, "r")
 72 |         except:
 73 |             print "Error opening dns dictionary file"
 74 |             sys.exit()
 75 |         self.list = f.readlines()
 76 | 
 77 |     def getdns(self, domain):
 78 |         DNS.ParseResolvConf("/etc/resolv.conf")
 79 |         # nameserver=DNS.defaults['server'][0]
 80 |         dom = domain
 81 |         if self.subdo == True:
 82 |             dom = domain.split(".")
 83 |             dom.pop(0)
 84 |             rootdom = ".".join(dom)
 85 |         else:
 86 |             rootdom = dom
 87 |         if self.nameserver == "":
 88 |             try:
 89 |                 r = DNS.Request(rootdom, qtype='SOA').req()
 90 |                 primary, email, serial, refresh, retry, expire, minimum = r.answers[
 91 |                     0]['data']
 92 |                 test = DNS.Request(
 93 |                     rootdom,
 94 |                     qtype='NS',
 95 |                     server=primary,
 96 |                     aa=1).req()
 97 |             except Exception as e:
 98 |                 print e
 99 | 
100 |             if test.header['status'] != "NOERROR":
101 |                 print "Error"
102 |                 sys.exit()
103 |             self.nameserver = test.answers[0]['data']
104 |         elif self.nameserver == "local":
105 |             self.nameserver = nameserver
106 |         return self.nameserver
107 | 
108 |     def run(self, host):
109 |         if self.nameserver == "":
110 |             self.nameserver = self.getdns(self.domain)
111 |             print "Using DNS server: " + self.nameserver
112 | 
113 |         hostname = str(host.split("\n")[0]) + "." + str(self.domain)
114 |         if self.verbose:
115 |             ESC = chr(27)
116 |             sys.stdout.write(ESC + '[2K' + ESC + '[G')
117 |             sys.stdout.write("\r" + hostname)
118 |             sys.stdout.flush()
119 |         try:
120 |             test = DNS.Request(
121 |                 hostname,
122 |                 qtype='a',
123 |                 server=self.nameserver).req(
124 |             )
125 |             hostip = test.answers[0]['data']
126 |             return hostip + ":" + hostname
127 |         except Exception as e:
128 |             pass
129 | 
130 |     def process(self):
131 |         results = []
132 |         for x in self.list:
133 |             host = self.run(x)
134 |             if host is not None:
135 |                 results.append(host)
136 |         return results
137 | 
138 | 
139 | class dns_tld():
140 | 
141 |     def __init__(self, domain, dnsserver, verbose=False):
142 |         self.domain = domain
143 |         self.nameserver = dnsserver
144 |         self.subdo = False
145 |         self.verbose = verbose
146 |         # Updated from http://data.iana.org/TLD/tlds-alpha-by-domain.txt
147 |         self.tlds = [
148 |             "ac", "academy", "ad", "ae", "aero", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "arpa", "as",
149 |             "asia", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bike", "biz", "bj",
150 |             "bm", "bn", "bo", "br", "bs", "bt", "builders", "buzz", "bv", "bw", "by", "bz", "ca", "cab", "camera",
151 |             "camp", "careers", "cat", "cc", "cd", "center", "ceo", "cf", "cg", "ch", "ci", "ck", "cl", "clothing",
152 |             "cm", "cn", "co", "codes", "coffee", "com", "company", "computer", "construction", "contractors", "coop",
153 |             "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "diamonds", "directory", "dj", "dk", "dm", "do",
154 |             "domains", "dz", "ec", "edu", "education", "ee", "eg", "email", "enterprises", "equipment", "er", "es",
155 |             "estate", "et", "eu", "farm", "fi", "fj", "fk", "florist", "fm", "fo", "fr", "ga", "gallery", "gb", "gd",
156 |             "ge", "gf", "gg", "gh", "gi", "gl", "glass", "gm", "gn", "gov", "gp", "gq", "gr", "graphics", "gs", "gt",
157 |             "gu", "guru", "gw", "gy", "hk", "hm", "hn", "holdings", "holiday", "house", "hr", "ht", "hu", "id", "ie",
158 |             "il", "im", "immobilien", "in", "info", "institute", "int", "international", "io", "iq", "ir", "is", "it",
159 |             "je", "jm", "jo", "jobs", "jp", "kaufen", "ke", "kg", "kh", "ki", "kitchen", "kiwi", "km", "kn", "kp",
160 |             "kr", "kw", "ky", "kz", "la", "land", "lb", "lc", "li", "lighting", "limo", "lk", "lr", "ls", "lt", "lu",
161 |             "lv", "ly", "ma", "management", "mc", "md", "me", "menu", "mg", "mh", "mil", "mk", "ml", "mm", "mn", "mo",
162 |             "mobi", "mp", "mq", "mr", "ms", "mt", "mu", "museum", "mv", "mw", "mx", "my", "mz", "na", "name", "nc",
163 |             "ne", "net", "nf", "ng", "ni", "ninja", "nl", "no", "np", "nr", "nu", "nz", "om", "onl", "org", "pa", "pe",
164 |             "pf", "pg", "ph", "photography", "photos", "pk", "pl", "plumbing", "pm", "pn", "post", "pr", "pro", "ps",
165 |             "pt", "pw", "py", "qa", "re", "recipes", "repair", "ro", "rs", "ru", "ruhr", "rw", "sa", "sb", "sc", "sd",
166 |             "se", "sexy", "sg", "sh", "shoes", "si", "singles", "sj", "sk", "sl", "sm", "sn", "so", "solar",
167 |             "solutions", "sr", "st", "su", "support", "sv", "sx", "sy", "systems", "sz", "tattoo", "tc", "td",
168 |             "technology", "tel", "tf", "tg", "th", "tips", "tj", "tk", "tl", "tm", "tn", "to", "today", "tp", "tr",
169 |             "training", "travel", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "uno", "us", "uy", "uz", "va", "vc",
170 |             "ve", "ventures", "vg", "vi", "viajes", "vn", "voyage", "vu", "wang", "wf", "wien", "ws", "xxx", "ye",
171 |             "yt", "za", "zm", "zw"]
172 | 
173 |     def getdns(self, domain):
174 |         # DNS.ParseResolvConf("/etc/resolv.conf")
175 |         # nameserver=DNS.defaults['server'][0]
176 |         dom = domain
177 |         if self.subdo == True:
178 |             dom = domain.split(".")
179 |             dom.pop(0)
180 |             rootdom = ".".join(dom)
181 |         else:
182 |             rootdom = dom
183 |         if self.nameserver == False:
184 |             r = DNS.Request(rootdom, qtype='SOA').req()
185 |             primary, email, serial, refresh, retry, expire, minimum = r.answers[
186 |                 0]['data']
187 |             test = DNS.Request(rootdom, qtype='NS', server=primary, aa=1).req()
188 |             if test.header['status'] != "NOERROR":
189 |                 print "Error"
190 |                 sys.exit()
191 |             self.nameserver = test.answers[0]['data']
192 |         elif self.nameserver == "local":
193 |             self.nameserver = nameserver
194 |         return self.nameserver
195 | 
196 |     def run(self, tld):
197 |         self.nameserver = self.getdns(self.domain)
198 |         hostname = self.domain.split(".")[0] + "." + tld
199 |         if self.verbose:
200 |             ESC = chr(27)
201 |             sys.stdout.write(ESC + '[2K' + ESC + '[G')
202 |             sys.stdout.write("\r\tSearching for: " + hostname)
203 |             sys.stdout.flush()
204 |         try:
205 |             test = DNS.Request(
206 |                 hostname,
207 |                 qtype='a',
208 |                 server=self.nameserver).req(
209 |             )
210 |             hostip = test.answers[0]['data']
211 |             return hostip + ":" + hostname
212 |         except Exception as e:
213 |             pass
214 | 
215 |     def process(self):
216 |         results = []
217 |         for x in self.tlds:
218 |             host = self.run(x)
219 |             if host is not None:
220 |                 results.append(host)
221 |         return results
222 | 


--------------------------------------------------------------------------------
/lib/theHarvester/discovery/DNS/Base.py:
--------------------------------------------------------------------------------
  1 | """
  2 | $Id: Base.py,v 1.12.2.4 2007/05/22 20:28:31 customdesigned Exp $
  3 | 
  4 | This file is part of the pydns project.
  5 | Homepage: http://pydns.sourceforge.net
  6 | 
  7 | This code is covered by the standard Python License.
  8 | 
  9 |     Base functionality. Request and Response classes, that sort of thing.
 10 | """
 11 | 
 12 | import socket
 13 | import string
 14 | import types
 15 | import time
 16 | import Type
 17 | import Class
 18 | import Opcode
 19 | import asyncore
 20 | 
 21 | 
 22 | class DNSError(Exception):
 23 |     pass
 24 | 
 25 | defaults = {'protocol': 'udp', 'port': 53, 'opcode': Opcode.QUERY,
 26 |             'qtype': Type.A, 'rd': 1, 'timing': 1, 'timeout': 30}
 27 | 
 28 | defaults['server'] = []
 29 | 
 30 | 
 31 | def ParseResolvConf(resolv_path):
 32 |     global defaults
 33 |     try:
 34 |         lines = open(resolv_path).readlines()
 35 |     except:
 36 |         print "error in path" + resolv_path
 37 |     for line in lines:
 38 |         line = string.strip(line)
 39 |         if not line or line[0] == ';' or line[0] == '#':
 40 |             continue
 41 |         fields = string.split(line)
 42 |         if len(fields) < 2:
 43 |             continue
 44 |         if fields[0] == 'domain' and len(fields) > 1:
 45 |             defaults['domain'] = fields[1]
 46 |         if fields[0] == 'search':
 47 |             pass
 48 |         if fields[0] == 'options':
 49 |             pass
 50 |         if fields[0] == 'sortlist':
 51 |             pass
 52 |         if fields[0] == 'nameserver':
 53 |             defaults['server'].append(fields[1])
 54 | 
 55 | 
 56 | def DiscoverNameServers():
 57 |     import sys
 58 |     if sys.platform in ('win32', 'nt'):
 59 |         import win32dns
 60 |         defaults['server'] = win32dns.RegistryResolve()
 61 |     else:
 62 |         return ParseResolvConf()
 63 | 
 64 | 
 65 | class DnsRequest:
 66 | 
 67 |     """ high level Request object """
 68 | 
 69 |     def __init__(self, *name, **args):
 70 |         self.donefunc = None
 71 |         self.async = None
 72 |         self.defaults = {}
 73 |         self.argparse(name, args)
 74 |         self.defaults = self.args
 75 | 
 76 |     def argparse(self, name, args):
 77 |         if not name and 'name' in self.defaults:
 78 |             args['name'] = self.defaults['name']
 79 |         if isinstance(name, types.StringType):
 80 |             args['name'] = name
 81 |         else:
 82 |             if len(name) == 1:
 83 |                 if name[0]:
 84 |                     args['name'] = name[0]
 85 |         for i in defaults.keys():
 86 |             if i not in args:
 87 |                 if i in self.defaults:
 88 |                     args[i] = self.defaults[i]
 89 |                 else:
 90 |                     args[i] = defaults[i]
 91 |         if isinstance(args['server'], types.StringType):
 92 |             args['server'] = [args['server']]
 93 |         self.args = args
 94 | 
 95 |     def socketInit(self, a, b):
 96 |         self.s = socket.socket(a, b)
 97 | 
 98 |     def processUDPReply(self):
 99 |         import time
100 |         import select
101 |         if self.args['timeout'] > 0:
102 |             r, w, e = select.select([self.s], [], [], self.args['timeout'])
103 |             if not len(r):
104 |                 raise DNSError('Timeout')
105 |         self.reply = self.s.recv(1024)
106 |         self.time_finish = time.time()
107 |         self.args['server'] = self.ns
108 |         return self.processReply()
109 | 
110 |     def processTCPReply(self):
111 |         import time
112 |         import Lib
113 |         self.f = self.s.makefile('r')
114 |         header = self.f.read(2)
115 |         if len(header) < 2:
116 |             raise DNSError('EOF')
117 |         count = Lib.unpack16bit(header)
118 |         self.reply = self.f.read(count)
119 |         if len(self.reply) != count:
120 |             raise DNSError('incomplete reply')
121 |         self.time_finish = time.time()
122 |         self.args['server'] = self.ns
123 |         return self.processReply()
124 | 
125 |     def processReply(self):
126 |         import Lib
127 |         self.args['elapsed'] = (self.time_finish - self.time_start) * 1000
128 |         u = Lib.Munpacker(self.reply)
129 |         r = Lib.DnsResult(u, self.args)
130 |         r.args = self.args
131 |         # self.args=None  # mark this DnsRequest object as used.
132 |         return r
133 |         #### TODO TODO TODO ####
134 | #        if protocol == 'tcp' and qtype == Type.AXFR:
135 | #            while 1:
136 | #                header = f.read(2)
137 | #                if len(header) < 2:
138 | #                    print '========== EOF =========='
139 | #                    break
140 | #                count = Lib.unpack16bit(header)
141 | #                if not count:
142 | #                    print '========== ZERO COUNT =========='
143 | #                    break
144 | #                print '========== NEXT =========='
145 | #                reply = f.read(count)
146 | #                if len(reply) != count:
147 | #                    print '*** Incomplete reply ***'
148 | #                    break
149 | #                u = Lib.Munpacker(reply)
150 | #                Lib.dumpM(u)
151 | 
152 |     def conn(self):
153 |         self.s.connect((self.ns, self.port))
154 | 
155 |     def req(self, *name, **args):
156 |         " needs a refactoring "
157 |         import time
158 |         import Lib
159 |         self.argparse(name, args)
160 |         # if not self.args:
161 |         #    raise DNSError,'reinitialize request before reuse'
162 |         protocol = self.args['protocol']
163 |         self.port = self.args['port']
164 |         opcode = self.args['opcode']
165 |         rd = self.args['rd']
166 |         server = self.args['server']
167 |         if isinstance(self.args['qtype'], types.StringType):
168 |             try:
169 |                 qtype = getattr(Type, string.upper(self.args['qtype']))
170 |             except AttributeError:
171 |                 raise DNSError('unknown query type')
172 |         else:
173 |             qtype = self.args['qtype']
174 |         if 'name' not in self.args:
175 |             print self.args
176 |             raise DNSError('nothing to lookup')
177 |         qname = self.args['name']
178 |         if qtype == Type.AXFR:
179 |             print 'Query type AXFR, protocol forced to TCP'
180 |             protocol = 'tcp'
181 |         # print 'QTYPE %d(%s)' % (qtype, Type.typestr(qtype))
182 |         m = Lib.Mpacker()
183 |         # jesus. keywords and default args would be good. TODO.
184 |         m.addHeader(0,
185 |                     0, opcode, 0, 0, rd, 0, 0, 0,
186 |                     1, 0, 0, 0)
187 |         m.addQuestion(qname, qtype, Class.IN)
188 |         self.request = m.getbuf()
189 |         try:
190 |             if protocol == 'udp':
191 |                 self.sendUDPRequest(server)
192 |             else:
193 |                 self.sendTCPRequest(server)
194 |         except socket.error as reason:
195 |             raise DNSError(reason)
196 |         if self.async:
197 |             return None
198 |         else:
199 |             return self.response
200 | 
201 |     def sendUDPRequest(self, server):
202 |         "refactor me"
203 |         self.response = None
204 |         self.socketInit(socket.AF_INET, socket.SOCK_DGRAM)
205 |         for self.ns in server:
206 |             try:
207 |                 # TODO. Handle timeouts &c correctly (RFC)
208 |                 #self.s.connect((self.ns, self.port))
209 |                 self.conn()
210 |                 self.time_start = time.time()
211 |                 if not self.async:
212 |                     self.s.send(self.request)
213 |                     self.response = self.processUDPReply()
214 |             # except socket.error:
215 |             except None:
216 |                 continue
217 |             break
218 |         if not self.response:
219 |             if not self.async:
220 |                 raise DNSError('no working nameservers found')
221 | 
222 |     def sendTCPRequest(self, server):
223 |         " do the work of sending a TCP request "
224 |         import time
225 |         import Lib
226 |         self.response = None
227 |         for self.ns in server:
228 |             try:
229 |                 self.socketInit(socket.AF_INET, socket.SOCK_STREAM)
230 |                 self.time_start = time.time()
231 |                 self.conn()
232 |                 self.s.send(Lib.pack16bit(len(self.request)) + self.request)
233 |                 self.s.shutdown(1)
234 |                 self.response = self.processTCPReply()
235 |             except socket.error:
236 |                 continue
237 |             break
238 |         if not self.response:
239 |             raise DNSError('no working nameservers found')
240 | 
241 | # class DnsAsyncRequest(DnsRequest):
242 | 
243 | 
244 | class DnsAsyncRequest(DnsRequest, asyncore.dispatcher_with_send):
245 | 
246 |     " an asynchronous request object. out of date, probably broken "
247 | 
248 |     def __init__(self, *name, **args):
249 |         DnsRequest.__init__(self, *name, **args)
250 |         # XXX todo
251 |         if 'done' in args and args['done']:
252 |             self.donefunc = args['done']
253 |         else:
254 |             self.donefunc = self.showResult
255 |         # self.realinit(name,args) # XXX todo
256 |         self.async = 1
257 | 
258 |     def conn(self):
259 |         import time
260 |         self.connect((self.ns, self.port))
261 |         self.time_start = time.time()
262 |         if 'start' in self.args and self.args['start']:
263 |             asyncore.dispatcher.go(self)
264 | 
265 |     def socketInit(self, a, b):
266 |         self.create_socket(a, b)
267 |         asyncore.dispatcher.__init__(self)
268 |         self.s = self
269 | 
270 |     def handle_read(self):
271 |         if self.args['protocol'] == 'udp':
272 |             self.response = self.processUDPReply()
273 |             if self.donefunc:
274 |                 self.donefunc(*(self,))
275 | 
276 |     def handle_connect(self):
277 |         self.send(self.request)
278 | 
279 |     def handle_write(self):
280 |         pass
281 | 
282 |     def showResult(self, *s):
283 |         self.response.show()
284 | 
285 | #
286 | # $Log: Base.py,v $
287 | # Revision 1.12.2.4  2007/05/22 20:28:31  customdesigned
288 | # Missing import Lib
289 | #
290 | # Revision 1.12.2.3  2007/05/22 20:25:52  customdesigned
291 | # Use socket.inetntoa,inetaton.
292 | #
293 | # Revision 1.12.2.2  2007/05/22 20:21:46  customdesigned
294 | # Trap socket error
295 | #
296 | # Revision 1.12.2.1  2007/05/22 20:19:35  customdesigned
297 | # Skip bogus but non-empty lines in resolv.conf
298 | #
299 | # Revision 1.12  2002/04/23 06:04:27  anthonybaxter
300 | # attempt to refactor the DNSRequest.req method a little. after doing a bit
301 | # of this, I've decided to bite the bullet and just rewrite the puppy. will
302 | # be checkin in some design notes, then unit tests and then writing the sod.
303 | #
304 | # Revision 1.11  2002/03/19 13:05:02  anthonybaxter
305 | # converted to class based exceptions (there goes the python1.4 compatibility :)
306 | #
307 | # removed a quite gross use of 'eval()'.
308 | #
309 | # Revision 1.10  2002/03/19 12:41:33  anthonybaxter
310 | # tabnannied and reindented everything. 4 space indent, no tabs.
311 | # yay.
312 | #
313 | # Revision 1.9  2002/03/19 12:26:13  anthonybaxter
314 | # death to leading tabs.
315 | #
316 | # Revision 1.8  2002/03/19 10:30:33  anthonybaxter
317 | # first round of major bits and pieces. The major stuff here (summarised
318 | # from my local, off-net CVS server :/ this will cause some oddities with
319 | # the
320 | #
321 | # tests/testPackers.py:
322 | #   a large slab of unit tests for the packer and unpacker code in DNS.Lib
323 | #
324 | # DNS/Lib.py:
325 | #   placeholder for addSRV.
326 | #   added 'klass' to addA, make it the same as the other A* records.
327 | #   made addTXT check for being passed a string, turn it into a length 1 list.
328 | #   explicitly check for adding a string of length > 255 (prohibited).
329 | #   a bunch of cleanups from a first pass with pychecker
330 | #   new code for pack/unpack. the bitwise stuff uses struct, for a smallish
331 | #     (disappointly small, actually) improvement, while addr2bin is much
332 | #     much faster now.
333 | #
334 | # DNS/Base.py:
335 | #   added DiscoverNameServers. This automatically does the right thing
336 | #     on unix/ win32. No idea how MacOS handles this.  *sigh*
337 | #     Incompatible change: Don't use ParseResolvConf on non-unix, use this
338 | #     function, instead!
339 | #   a bunch of cleanups from a first pass with pychecker
340 | #
341 | # Revision 1.5  2001/08/09 09:22:28  anthonybaxter
342 | # added what I hope is win32 resolver lookup support. I'll need to try
343 | # and figure out how to get the CVS checkout onto my windows machine to
344 | # make sure it works (wow, doing something other than games on the
345 | # windows machine :)
346 | #
347 | # Code from Wolfgang.Strobl@gmd.de
348 | # win32dns.py from
349 | # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66260
350 | #
351 | # Really, ParseResolvConf() should be renamed "FindNameServers" or
352 | # some such.
353 | #
354 | # Revision 1.4  2001/08/09 09:08:55  anthonybaxter
355 | # added identifying header to top of each file
356 | #
357 | # Revision 1.3  2001/07/19 07:20:12  anthony
358 | # Handle blank resolv.conf lines.
359 | # Patch from Bastian Kleineidam
360 | #
361 | # Revision 1.2  2001/07/19 06:57:07  anthony
362 | # cvs keywords added
363 | #
364 | #
365 | 


--------------------------------------------------------------------------------
/lib/theHarvester/COPYING:
--------------------------------------------------------------------------------
  1 |                    GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
  5 |                        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Library General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 


--------------------------------------------------------------------------------
/lib/theHarvester/theHarvester.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import string
  4 | import httplib
  5 | import sys
  6 | import os
  7 | from socket import *
  8 | import re
  9 | import getopt
 10 | 
 11 | try:
 12 |     import requests
 13 | except:
 14 |     print "Request library not found, please install it before proceeding\n"
 15 |     sys.exit()
 16 | 
 17 | from discovery import *
 18 | from lib import htmlExport
 19 | from lib import hostchecker
 20 | 
 21 | print "\n*******************************************************************"
 22 | print "*                                                                 *"
 23 | print "* | |_| |__   ___    /\  /\__ _ _ ____   _____  ___| |_ ___ _ __  *"
 24 | print "* | __| '_ \ / _ \  / /_/ / _` | '__\ \ / / _ \/ __| __/ _ \ '__| *"
 25 | print "* | |_| | | |  __/ / __  / (_| | |   \ V /  __/\__ \ ||  __/ |    *"
 26 | print "*  \__|_| |_|\___| \/ /_/ \__,_|_|    \_/ \___||___/\__\___|_|    *"
 27 | print "*                                                                 *"
 28 | print "* TheHarvester Ver. 2.6                                           *"
 29 | print "* Coded by Christian Martorella                                   *"
 30 | print "* Edge-Security Research                                          *"
 31 | print "* cmartorella@edge-security.com                                   *"
 32 | print "*******************************************************************\n\n"
 33 | 
 34 | 
 35 | def usage():
 36 | 
 37 |     comm = os.path.basename(sys.argv[0])
 38 | 
 39 |     if os.path.dirname(sys.argv[0]) == os.getcwd():
 40 |         comm = "./" + comm
 41 | 
 42 |     print "Usage: theharvester options \n"
 43 |     print "       -d: Domain to search or company name"
 44 |     print """       -b: data source: google, googleCSE, bing, bingapi, pgp, linkedin,
 45 |                         google-profiles, jigsaw, twitter, googleplus, all\n"""
 46 |     print "       -s: Start in result number X (default: 0)"
 47 |     print "       -v: Verify host name via dns resolution and search for virtual hosts"
 48 |     print "       -f: Save the results into an HTML and XML file"
 49 |     print "       -n: Perform a DNS reverse query on all ranges discovered"
 50 |     print "       -c: Perform a DNS brute force for the domain name"
 51 |     print "       -t: Perform a DNS TLD expansion discovery"
 52 |     print "       -e: Use this DNS server"
 53 |     print "       -l: Limit the number of results to work with(bing goes from 50 to 50 results,"
 54 |     print "            google 100 to 100, and pgp doesn't use this option)"
 55 |     print "       -h: use SHODAN database to query discovered hosts"
 56 |     print "\nExamples:"
 57 |     print "        " + comm + " -d microsoft.com -l 500 -b google"
 58 |     print "        " + comm + " -d microsoft.com -b pgp"
 59 |     print "        " + comm + " -d microsoft -l 200 -b linkedin"
 60 |     print "        " + comm + " -d apple.com -b googleCSE -l 500 -s 300\n"
 61 | 
 62 | 
 63 | def start(argv):
 64 |     if len(sys.argv) < 4:
 65 |         usage()
 66 |         sys.exit()
 67 |     try:
 68 |         opts, args = getopt.getopt(argv, "l:d:b:s:vf:nhcte:")
 69 |     except getopt.GetoptError:
 70 |         usage()
 71 |         sys.exit()
 72 |     start = 0
 73 |     host_ip = []
 74 |     filename = ""
 75 |     bingapi = "yes"
 76 |     dnslookup = False
 77 |     dnsbrute = False
 78 |     dnstld = False
 79 |     shodan = False
 80 |     vhost = []
 81 |     virtual = False
 82 |     limit = 100
 83 |     dnsserver = ""
 84 |     for opt, arg in opts:
 85 |         if opt == '-l':
 86 |             limit = int(arg)
 87 |         elif opt == '-d':
 88 |             word = arg
 89 |         elif opt == '-s':
 90 |             start = int(arg)
 91 |         elif opt == '-v':
 92 |             virtual = "basic"
 93 |         elif opt == '-f':
 94 |             filename = arg
 95 |         elif opt == '-n':
 96 |             dnslookup = True
 97 |         elif opt == '-c':
 98 |             dnsbrute = True
 99 |         elif opt == '-h':
100 |             shodan = True
101 |         elif opt == '-e':
102 |             dnsserver = arg
103 |         elif opt == '-t':
104 |             dnstld = True
105 |         elif opt == '-b':
106 |             engine = arg
107 |             if engine not in ("google","googleCSE" , "linkedin", "pgp", "all", "google-profiles", "bing", "bingapi",
108 |                               "yandex", "jigsaw", "dogpilesearch", "twitter", "googleplus", "yahoo", "baidu"):
109 |                 usage()
110 |                 print "Invalid search engine, try with: bing, google, linkedin, pgp, jigsaw, bingapi, google-profiles, dogpilesearch, twitter, googleplus, yahoo, baidu"
111 |                 sys.exit()
112 |             else:
113 |                 pass
114 |     if engine == "google":
115 |         print "[-] Searching in Google:"
116 |         search = googlesearch.search_google(word, limit, start)
117 |         search.process()
118 |         all_emails = search.get_emails()
119 |         all_hosts = search.get_hostnames()
120 | 
121 |     if engine == "googleCSE":
122 |         print "[-] Searching in Google Custom Search:"
123 |         search = googleCSE.search_googleCSE(word, limit, start)
124 |         search.process()
125 |         search.store_results()
126 |         all_emails = search.get_emails()
127 |         all_hosts = search.get_hostnames()
128 | 
129 |     if engine == "exalead":
130 |         print "[-] Searching in Exalead:"
131 |         search = exaleadsearch.search_exalead(word, limit, start)
132 |         search.process()
133 |         all_emails = search.get_emails()
134 |         all_hosts = search.get_hostnames()
135 | 
136 |     elif engine == "bing" or engine == "bingapi":
137 |         print "[-] Searching in Bing:"
138 |         search = bingsearch.search_bing(word, limit, start)
139 |         if engine == "bingapi":
140 |             bingapi = "yes"
141 |         else:
142 |             bingapi = "no"
143 |         search.process(bingapi)
144 |         all_emails = search.get_emails()
145 |         all_hosts = search.get_hostnames()
146 | 
147 |     elif engine == "yandex":  # Not working yet
148 |         print "[-] Searching in Yandex:"
149 |         search = yandexsearch.search_yandex(word, limit, start)
150 |         search.process()
151 |         all_emails = search.get_emails()
152 |         all_hosts = search.get_hostnames()
153 | 
154 |     elif engine == "pgp":
155 |         print "[-] Searching in PGP key server.."
156 |         search = pgpsearch.search_pgp(word)
157 |         search.process()
158 |         all_emails = search.get_emails()
159 |         all_hosts = search.get_hostnames()
160 | 
161 |     elif engine == "jigsaw":
162 |         print "[-] Searching in Jigsaw.."
163 |         search = jigsaw.search_jigsaw(word, limit)
164 |         search.process()
165 |         people = search.get_people()
166 |         print "Users from Jigsaw:"
167 |         print "====================="
168 |         for user in people:
169 |             print user
170 |         sys.exit()
171 | 
172 |     elif engine == "dogpilesearch":
173 |         print "[-] Searching in Dogpilesearch.."
174 |         search = dogpilesearch.search_dogpile(word, limit)
175 |         search.process()
176 |         all_emails = search.get_emails()
177 |         all_hosts = search.get_hostnames()
178 | 
179 |     elif engine == "yahoo":
180 |         print "[-] Searching in Yahoo.."
181 |         search = yahoosearch.search_yahoo(word, limit)
182 |         search.process()
183 |         all_emails = search.get_emails()
184 |         all_hosts = search.get_hostnames()
185 | 
186 |     elif engine == "baidu":
187 |         print "[-] Searching in Baidu.."
188 |         search = baidusearch.search_baidu(word, limit)
189 |         search.process()
190 |         all_emails = search.get_emails()
191 |         all_hosts = search.get_hostnames()
192 | 
193 |     elif engine == "googleplus":
194 |         print "[-] Searching in Google+ .."
195 |         search = googleplussearch.search_googleplus(word, limit)
196 |         search.process()
197 |         people = search.get_people()
198 |         print "Users from Google+:"
199 |        	print "===================="
200 |        	for user in people:
201 |             print user
202 |         sys.exit()
203 | 
204 |     elif engine == "twitter":
205 |         print "[-] Searching in Twitter .."
206 |         search = twittersearch.search_twitter(word, limit)
207 |         search.process()
208 |         people = search.get_people()
209 |         print "Users from Twitter:"
210 |        	print "===================="
211 |        	for user in people:
212 |             print user
213 |         sys.exit()
214 | 
215 |     elif engine == "linkedin":
216 |         print "[-] Searching in Linkedin.."
217 |         search = linkedinsearch.search_linkedin(word, limit)
218 |         search.process()
219 |         people = search.get_people()
220 |         print "Users from Linkedin:"
221 |        	print "===================="
222 |        	for user in people:
223 |             print user
224 |         sys.exit()
225 |     elif engine == "google-profiles":
226 |         print "[-] Searching in Google profiles.."
227 |         search = googlesearch.search_google(word, limit, start)
228 |         search.process_profiles()
229 |         people = search.get_profiles()
230 |         print "Users from Google profiles:"
231 |         print "---------------------------"
232 |         for users in people:
233 |             print users
234 |         sys.exit()
235 |     elif engine == "all":
236 |         print "Full harvest.."
237 |         all_emails = []
238 |         all_hosts = []
239 |         virtual = "basic"
240 |         print "[-] Searching in Google.."
241 |         search = googlesearch.search_google(word, limit, start)
242 |         search.process()
243 |         emails = search.get_emails()
244 |         hosts = search.get_hostnames()
245 |         all_emails.extend(emails)
246 |         all_hosts.extend(hosts)
247 |         print "[-] Searching in PGP Key server.."
248 |         search = pgpsearch.search_pgp(word)
249 |         search.process()
250 |         emails = search.get_emails()
251 |         hosts = search.get_hostnames()
252 |         all_hosts.extend(hosts)
253 |         all_emails.extend(emails)
254 |         print "[-] Searching in Bing.."
255 |         bingapi = "no"
256 |         search = bingsearch.search_bing(word, limit, start)
257 |         search.process(bingapi)
258 |         emails = search.get_emails()
259 |         hosts = search.get_hostnames()
260 |         all_hosts.extend(hosts)
261 |         all_emails.extend(emails)
262 |         print "[-] Searching in Exalead.."
263 |         search = exaleadsearch.search_exalead(word, limit, start)
264 |         search.process()
265 |         emails = search.get_emails()
266 |         hosts = search.get_hostnames()
267 |         all_hosts.extend(hosts)
268 |         all_emails.extend(emails)
269 |     #Results############################################################
270 |     print "\n\n[+] Emails found:"
271 |     print "------------------"
272 |     if all_emails == []:
273 |         print "No emails found"
274 |     else:
275 |         for emails in all_emails:
276 |             print emails
277 | 
278 |     print "\n[+] Hosts found in search engines:"
279 |     print "------------------------------------"
280 |     if all_hosts == []:
281 |         print "No hosts found"
282 |     else:
283 |         print "[-] Resolving hostnames IPs... "
284 |         full_host = hostchecker.Checker(all_hosts)
285 |         full = full_host.check()
286 |         for host in full:
287 |             ip = host.split(':')[0]
288 |             print host
289 |             if host_ip.count(ip.lower()):
290 |                 pass
291 |             else:
292 |                 host_ip.append(ip.lower())
293 | 
294 |     #DNS reverse lookup#################################################
295 |     dnsrev = []
296 |     if dnslookup == True:
297 |         print "\n[+] Starting active queries:"
298 |         analyzed_ranges = []
299 |         for x in full:
300 |             ip = x.split(":")[0]
301 |             range = ip.split(".")
302 |             range[3] = "0/24"
303 |             range = string.join(range, '.')
304 |             if not analyzed_ranges.count(range):
305 |                 print "[-]Performing reverse lookup in :" + range
306 |                 a = dnssearch.dns_reverse(range, True)
307 |                 a.list()
308 |                 res = a.process()
309 |                 analyzed_ranges.append(range)
310 |             else:
311 |                 continue
312 |             for x in res:
313 |                 if x.count(word):
314 |                     dnsrev.append(x)
315 |                     if x not in full:
316 |                         full.append(x)
317 |         print "Hosts found after reverse lookup:"
318 |         print "---------------------------------"
319 |         for xh in dnsrev:
320 |             print xh
321 |     #DNS Brute force####################################################
322 |     dnsres = []
323 |     if dnsbrute == True:
324 |         print "\n[-] Starting DNS brute force:"
325 |         a = dnssearch.dns_force(word, dnsserver, verbose=True)
326 |         res = a.process()
327 |         print "\n[+] Hosts found after DNS brute force:\n"
328 |         for y in res:
329 |             print y
330 |             dnsres.append(y)
331 |             if y not in full:
332 |                 full.append(y)
333 |     #DNS TLD expansion###################################################
334 |     dnstldres = []
335 |     if dnstld == True:
336 |         print "[-] Starting DNS TLD expansion:"
337 |         a = dnssearch.dns_tld(word, dnsserver, verbose=True)
338 |         res = a.process()
339 |         print "\n[+] Hosts found after DNS TLD expansion:"
340 |         print "=========================================="
341 |         for y in res:
342 |             print y
343 |             dnstldres.append(y)
344 |             if y not in full:
345 |                 full.append(y)
346 | 
347 |     #Virtual hosts search###############################################
348 |     if virtual == "basic":
349 |         print "[+] Virtual hosts:"
350 |         print "=================="
351 |         for l in host_ip:
352 |             search = bingsearch.search_bing(l, limit, start)
353 |             search.process_vhost()
354 |             res = search.get_allhostnames()
355 |             for x in res:
356 |                 x = re.sub(r'[[\<\/?]*[\w]*>]*','',x)
357 |                 x = re.sub('<','',x)
358 |                 x = re.sub('>','',x)
359 |                 print l + "\t" + x
360 |                 vhost.append(l + ":" + x)
361 |                 full.append(l + ":" + x)
362 |     else:
363 |         pass
364 |     shodanres = []
365 |     shodanvisited = []
366 |     if shodan == True:
367 |         print "[+] Shodan Database search:"
368 |         for x in full:
369 |             print x
370 |             try:
371 |                 ip = x.split(":")[0]
372 |                 if not shodanvisited.count(ip):
373 |                     print "\tSearching for: " + x
374 |                     a = shodansearch.search_shodan(ip)
375 |                     shodanvisited.append(ip)
376 |                     results = a.run()
377 |                     for res in results:
378 |                         shodanres.append(
379 |                             x + "SAPO" + str(res['banner']) + "SAPO" + str(res['port']))
380 |             except:
381 |                 pass
382 |         print "[+] Shodan results:"
383 |         print "==================="
384 |         for x in shodanres:
385 |             print x.split("SAPO")[0] + ":" + x.split("SAPO")[1]
386 |     else:
387 |         pass
388 | 
389 |     ###################################################################
390 |     # Here i need to add explosion mode.
391 |     # Tengo que sacar los TLD para hacer esto.
392 |     recursion = None
393 |     if recursion:
394 |         start = 0
395 |         for word in vhost:
396 |             search = googlesearch.search_google(word, limit, start)
397 |             search.process()
398 |             emails = search.get_emails()
399 |             hosts = search.get_hostnames()
400 |             print emails
401 |             print hosts
402 |     else:
403 |         pass
404 | 
405 |     if filename != "":
406 |         try:
407 |             print "[+] Saving files..."
408 |             html = htmlExport.htmlExport(
409 |                 all_emails,
410 |                 full,
411 |                 vhost,
412 |                 dnsres,
413 |                 dnsrev,
414 |                 filename,
415 |                 word,
416 |                 shodanres,
417 |                 dnstldres)
418 |             save = html.writehtml()
419 |         except Exception as e:
420 |             print e
421 |             print "Error creating the file"
422 |         try:
423 |             filename = filename.split(".")[0] + ".xml"
424 |             file = open(filename, 'w')
425 |             file.write('<?xml version="1.0" encoding="UTF-8"?><theHarvester>')
426 |             for x in all_emails:
427 |                 file.write('<email>' + x + '</email>')
428 |             for x in all_hosts:
429 |                 file.write('<host>' + x + '</host>')
430 |             for x in vhost:
431 |                 file.write('<vhost>' + x + '</vhost>')
432 |             file.write('</theHarvester>')
433 |             file.flush()
434 |             file.close()
435 |             print "Files saved!"
436 |         except Exception as er:
437 |             print "Error saving XML file: " + er
438 |         sys.exit()
439 | 
440 | if __name__ == "__main__":
441 |     try:
442 |         start(sys.argv[1:])
443 |     except KeyboardInterrupt:
444 |         print "Search interrupted by user.."
445 |     except:
446 |         sys.exit()
447 | 


--------------------------------------------------------------------------------
/lib/markup.py:
--------------------------------------------------------------------------------
  1 | # This code is in the public domain, it comes
  2 | # with absolutely no warranty and you can do
  3 | # absolutely whatever you want with it.
  4 | 
  5 | __date__ = '17 May 2007'
  6 | __version__ = '1.7'
  7 | __doc__ = """
  8 | This is markup.py - a Python module that attempts to
  9 | make it easier to generate HTML/XML from a Python program
 10 | in an intuitive, lightweight, customizable and pythonic way.
 11 | 
 12 | The code is in the public domain.
 13 | 
 14 | Version: %s as of %s.
 15 | 
 16 | Documentation and further info is at http://markup.sourceforge.net/
 17 | 
 18 | Please send bug reports, feature requests, enhancement
 19 | ideas or questions to nogradi at gmail dot com.
 20 | 
 21 | Installation: drop markup.py somewhere into your Python path.
 22 | """ % ( __version__, __date__ )
 23 | 
 24 | import string
 25 | 
 26 | 
 27 | class element:
 28 | 
 29 |     """This class handles the addition of a new element."""
 30 | 
 31 |     def __init__(self, tag, case='lower', parent=None):
 32 |         self.parent = parent
 33 | 
 34 |         if case == 'lower':
 35 |             self.tag = tag.lower()
 36 |         else:
 37 |             self.tag = tag.upper()
 38 | 
 39 |     def __call__(self, *args, **kwargs):
 40 |         if len(args) > 1:
 41 |             raise ArgumentError(self.tag)
 42 | 
 43 |         # if class_ was defined in parent it should be added to every element
 44 |         if self.parent is not None and self.parent.class_ is not None:
 45 |             if 'class_' not in kwargs:
 46 |                 kwargs['class_'] = self.parent.class_
 47 | 
 48 |         if self.parent is None and len(args) == 1:
 49 |             x = [self.render(self.tag, False, myarg, mydict)
 50 |                  for myarg, mydict in _argsdicts(args, kwargs)]
 51 |             return '\n'.join(x)
 52 |         elif self.parent is None and len(args) == 0:
 53 |             x = [self.render(self.tag, True, myarg, mydict)
 54 |                  for myarg, mydict in _argsdicts(args, kwargs)]
 55 |             return '\n'.join(x)
 56 | 
 57 |         if self.tag in self.parent.twotags:
 58 |             for myarg, mydict in _argsdicts(args, kwargs):
 59 |                 self.render(self.tag, False, myarg, mydict)
 60 |         elif self.tag in self.parent.onetags:
 61 |             if len(args) == 0:
 62 |                 for myarg, mydict in _argsdicts(args, kwargs):
 63 |                     # here myarg is always None, because len( args ) = 0
 64 |                     self.render(self.tag, True, myarg, mydict)
 65 |             else:
 66 |                 raise ClosingError(self.tag)
 67 |         elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
 68 |             raise DeprecationError(self.tag)
 69 |         else:
 70 |             raise InvalidElementError(self.tag, self.parent.mode)
 71 | 
 72 |     def render(self, tag, single, between, kwargs):
 73 |         """Append the actual tags to content."""
 74 | 
 75 |         out = "<%s" % tag
 76 |         for key, value in kwargs.iteritems():
 77 |             # when value is None that means stuff like <... checked>
 78 |             if value is not None:
 79 |                 # strip this so class_ will mean class, etc.
 80 |                 key = key.strip('_')
 81 |                 # special cases, maybe change _ to - overall?
 82 |                 if key == 'http_equiv':
 83 |                     key = 'http-equiv'
 84 |                 elif key == 'accept_charset':
 85 |                     key = 'accept-charset'
 86 |                 out = "%s %s=\"%s\"" % (out, key, escape(value))
 87 |             else:
 88 |                 out = "%s %s" % (out, key)
 89 |         if between is not None:
 90 |             out = "%s>%s</%s>" % (out, between, tag)
 91 |         else:
 92 |             if single:
 93 |                 out = "%s />" % out
 94 |             else:
 95 |                 out = "%s>" % out
 96 |         if self.parent is not None:
 97 |             self.parent.content.append(out)
 98 |         else:
 99 |             return out
100 | 
101 |     def close(self):
102 |         """Append a closing tag unless element has only opening tag."""
103 | 
104 |         if self.tag in self.parent.twotags:
105 |             self.parent.content.append("</%s>" % self.tag)
106 |         elif self.tag in self.parent.onetags:
107 |             raise ClosingError(self.tag)
108 |         elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
109 |             raise DeprecationError(self.tag)
110 | 
111 |     def open(self, **kwargs):
112 |         """Append an opening tag."""
113 | 
114 |         if self.tag in self.parent.twotags or self.tag in self.parent.onetags:
115 |             self.render(self.tag, False, None, kwargs)
116 |         elif self.mode == 'strict_html' and self.tag in self.parent.deptags:
117 |             raise DeprecationError(self.tag)
118 | 
119 | 
120 | class page:
121 | 
122 |     """This is our main class representing a document. Elements are added
123 |     as attributes of an instance of this class."""
124 | 
125 |     def __init__(self, mode='strict_html', case='lower',
126 |                  onetags=None, twotags=None, separator='\n', class_=None):
127 |         """Stuff that effects the whole document.
128 | 
129 |         mode -- 'strict_html'   for HTML 4.01 (default)
130 |                 'html'          alias for 'strict_html'
131 |                 'loose_html'    to allow some deprecated elements
132 |                 'xml'           to allow arbitrary elements
133 | 
134 |         case -- 'lower'         element names will be printed in lower case (default)
135 |                 'upper'         they will be printed in upper case
136 | 
137 |         onetags --              list or tuple of valid elements with opening tags only
138 |         twotags --              list or tuple of valid elements with both opening and closing tags
139 |                                 these two keyword arguments may be used to select
140 |                                 the set of valid elements in 'xml' mode
141 |                                 invalid elements will raise appropriate exceptions
142 | 
143 |         separator --            string to place between added elements, defaults to newline
144 | 
145 |         class_ --               a class that will be added to every element if defined"""
146 | 
147 |         valid_onetags = [
148 |             "AREA",
149 |             "BASE",
150 |             "BR",
151 |             "COL",
152 |             "FRAME",
153 |             "HR",
154 |             "IMG",
155 |             "INPUT",
156 |             "LINK",
157 |             "META",
158 |             "PARAM"]
159 |         valid_twotags = [
160 |             "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON",
161 |             "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET",
162 |             "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS",
163 |             "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
164 |             "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE",
165 |             "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR",
166 |             "TT", "UL", "VAR"]
167 |         deprecated_onetags = ["BASEFONT", "ISINDEX"]
168 |         deprecated_twotags = [
169 |             "APPLET",
170 |             "CENTER",
171 |             "DIR",
172 |             "FONT",
173 |             "MENU",
174 |             "S",
175 |             "STRIKE",
176 |             "U"]
177 | 
178 |         self.header = []
179 |         self.content = []
180 |         self.footer = []
181 |         self.case = case
182 |         self.separator = separator
183 | 
184 |         # init( ) sets it to True so we know that </body></html> has to be
185 |         # printed at the end
186 |         self._full = False
187 |         self.class_ = class_
188 | 
189 |         if mode == 'strict_html' or mode == 'html':
190 |             self.onetags = valid_onetags
191 |             self.onetags += map(string.lower, self.onetags)
192 |             self.twotags = valid_twotags
193 |             self.twotags += map(string.lower, self.twotags)
194 |             self.deptags = deprecated_onetags + deprecated_twotags
195 |             self.deptags += map(string.lower, self.deptags)
196 |             self.mode = 'strict_html'
197 |         elif mode == 'loose_html':
198 |             self.onetags = valid_onetags + deprecated_onetags
199 |             self.onetags += map(string.lower, self.onetags)
200 |             self.twotags = valid_twotags + deprecated_twotags
201 |             self.twotags += map(string.lower, self.twotags)
202 |             self.mode = mode
203 |         elif mode == 'xml':
204 |             if onetags and twotags:
205 |                 self.onetags = onetags
206 |                 self.twotags = twotags
207 |             elif (onetags and not twotags) or (twotags and not onetags):
208 |                 raise CustomizationError()
209 |             else:
210 |                 self.onetags = russell()
211 |                 self.twotags = russell()
212 |             self.mode = mode
213 |         else:
214 |             raise ModeError(mode)
215 | 
216 |     def __getattr__(self, attr):
217 |         if attr.startswith("__") and attr.endswith("__"):
218 |             raise AttributeError(attr)
219 |         return element(attr, case=self.case, parent=self)
220 | 
221 |     def __str__(self):
222 | 
223 |         if self._full and (self.mode == 'strict_html' or self.mode == 'loose_html'):
224 |             end = ['</body>', '</html>']
225 |         else:
226 |             end = []
227 | 
228 |         return (
229 |             self.separator.join(
230 |                 self.header +
231 |                 self.content +
232 |                 self.footer +
233 |                 end)
234 |         )
235 | 
236 |     def __call__(self, escape=False):
237 |         """Return the document as a string.
238 | 
239 |         escape --   False   print normally
240 |                     True    replace < and > by &lt; and &gt;
241 |                             the default escape sequences in most browsers"""
242 | 
243 |         if escape:
244 |             return _escape(self.__str__())
245 |         else:
246 |             return self.__str__()
247 | 
248 |     def add(self, text):
249 |         """This is an alias to addcontent."""
250 |         self.addcontent(text)
251 | 
252 |     def addfooter(self, text):
253 |         """Add some text to the bottom of the document"""
254 |         self.footer.append(text)
255 | 
256 |     def addheader(self, text):
257 |         """Add some text to the top of the document"""
258 |         self.header.append(text)
259 | 
260 |     def addcontent(self, text):
261 |         """Add some text to the main part of the document"""
262 |         self.content.append(text)
263 | 
264 |     def init(self, lang='en', css=None, metainfo=None, title=None, header=None,
265 |              footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None):
266 |         """This method is used for complete documents with appropriate
267 |         doctype, encoding, title, etc information. For an HTML/XML snippet
268 |         omit this method.
269 | 
270 |         lang --     language, usually a two character string, will appear
271 |                     as <html lang='en'> in html mode (ignored in xml mode)
272 | 
273 |         css --      Cascading Style Sheet filename as a string or a list of
274 |                     strings for multiple css files (ignored in xml mode)
275 | 
276 |         metainfo -- a dictionary in the form { 'name':'content' } to be inserted
277 |                     into meta element(s) as <meta name='name' content='content'>
278 |                     (ignored in xml mode)
279 | 
280 |         bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added
281 |                     as attributes of the <body> element as <body key='value' ... >
282 |                     (ignored in xml mode)
283 | 
284 |         script --   dictionary containing src:type pairs, <script type='text/type' src=src></script>
285 | 
286 |         title --    the title of the document as a string to be inserted into
287 |                     a title element as <title>my title</title> (ignored in xml mode)
288 | 
289 |         header --   some text to be inserted right after the <body> element
290 |                     (ignored in xml mode)
291 | 
292 |         footer --   some text to be inserted right before the </body> element
293 |                     (ignored in xml mode)
294 | 
295 |         charset --  a string defining the character set, will be inserted into a
296 |                     <meta http-equiv='Content-Type' content='text/html; charset=myset'>
297 |                     element (ignored in xml mode)
298 | 
299 |         encoding -- a string defining the encoding, will be put into to first line of
300 |                     the document as <?xml version='1.0' encoding='myencoding' ?> in
301 |                     xml mode (ignored in html mode)
302 | 
303 |         doctype --  the document type string, defaults to
304 |                     <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>
305 |                     in html mode (ignored in xml mode)"""
306 | 
307 |         self._full = True
308 | 
309 |         if self.mode == 'strict_html' or self.mode == 'loose_html':
310 |             if doctype is None:
311 |                 doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>"
312 |             self.header.append(doctype)
313 |             self.html(lang=lang)
314 |             self.head()
315 |             if charset is not None:
316 |                 self.meta(
317 |                     http_equiv='Content-Type',
318 |                     content="text/html; charset=%s" %
319 |                     charset)
320 |             if metainfo is not None:
321 |                 self.metainfo(metainfo)
322 |             if css is not None:
323 |                 self.css(css)
324 |             if title is not None:
325 |                 self.title(title)
326 |             if script is not None:
327 |                 self.scripts(script)
328 |             self.head.close()
329 |             if bodyattrs is not None:
330 |                 self.body(**bodyattrs)
331 |             else:
332 |                 self.body()
333 |             if header is not None:
334 |                 self.content.append(header)
335 |             if footer is not None:
336 |                 self.footer.append(footer)
337 | 
338 |         elif self.mode == 'xml':
339 |             if doctype is None:
340 |                 if encoding is not None:
341 |                     doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding
342 |                 else:
343 |                     doctype = "<?xml version='1.0' ?>"
344 |             self.header.append(doctype)
345 | 
346 |     def css(self, filelist):
347 |         """This convenience function is only useful for html.
348 |         It adds css stylesheet(s) to the document via the <link> element."""
349 | 
350 |         if isinstance(filelist, basestring):
351 |             self.link(
352 |                 href=filelist,
353 |                 rel='stylesheet',
354 |                 type='text/css',
355 |                 media='all')
356 |         else:
357 |             for file in filelist:
358 |                 self.link(
359 |                     href=file,
360 |                     rel='stylesheet',
361 |                     type='text/css',
362 |                     media='all')
363 | 
364 |     def metainfo(self, mydict):
365 |         """This convenience function is only useful for html.
366 |         It adds meta information via the <meta> element, the argument is
367 |         a dictionary of the form { 'name':'content' }."""
368 | 
369 |         if isinstance(mydict, dict):
370 |             for name, content in mydict.iteritems():
371 |                 self.meta(name=name, content=content)
372 |         else:
373 |             raise TypeError(
374 |                 "Metainfo should be called with a dictionary argument of name:content pairs.")
375 | 
376 |     def scripts(self, mydict):
377 |         """Only useful in html, mydict is dictionary of src:type pairs will
378 |         be rendered as <script type='text/type' src=src></script>"""
379 | 
380 |         if isinstance(mydict, dict):
381 |             for src, type in mydict.iteritems():
382 |                 self.script('', src=src, type='text/%s' % type)
383 |         else:
384 |             raise TypeError(
385 |                 "Script should be given a dictionary of src:type pairs.")
386 | 
387 | 
388 | class _oneliner:
389 | 
390 |     """An instance of oneliner returns a string corresponding to one element.
391 |     This class can be used to write 'oneliners' that return a string
392 |     immediately so there is no need to instantiate the page class."""
393 | 
394 |     def __init__(self, case='lower'):
395 |         self.case = case
396 | 
397 |     def __getattr__(self, attr):
398 |         if attr.startswith("__") and attr.endswith("__"):
399 |             raise AttributeError(attr)
400 |         return element(attr, case=self.case, parent=None)
401 | 
402 | oneliner = _oneliner(case='lower')
403 | upper_oneliner = _oneliner(case='upper')
404 | 
405 | 
406 | def _argsdicts(args, mydict):
407 |     """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1."""
408 | 
409 |     if len(args) == 0:
410 |         args = None,
411 |     elif len(args) == 1:
412 |         args = _totuple(args[0])
413 |     else:
414 |         raise Exception("We should have never gotten here.")
415 | 
416 |     mykeys = mydict.keys()
417 |     myvalues = map(_totuple, mydict.values())
418 | 
419 |     maxlength = max(map(len, [args] + myvalues))
420 | 
421 |     for i in xrange(maxlength):
422 |         thisdict = {}
423 |         for key, value in zip(mykeys, myvalues):
424 |             try:
425 |                 thisdict[key] = value[i]
426 |             except IndexError:
427 |                 thisdict[key] = value[-1]
428 |         try:
429 |             thisarg = args[i]
430 |         except IndexError:
431 |             thisarg = args[-1]
432 | 
433 |         yield thisarg, thisdict
434 | 
435 | 
436 | def _totuple(x):
437 |     """Utility stuff to convert string, int, float, None or anything to a usable tuple."""
438 | 
439 |     if isinstance(x, basestring):
440 |         out = x,
441 |     elif isinstance(x, (int, float)):
442 |         out = str(x),
443 |     elif x is None:
444 |         out = None,
445 |     else:
446 |         out = tuple(x)
447 | 
448 |     return out
449 | 
450 | 
451 | def escape(text, newline=False):
452 |     """Escape special html characters."""
453 | 
454 |     if isinstance(text, basestring):
455 |         if '&' in text:
456 |             text = text.replace('&', '&amp;')
457 |         if '>' in text:
458 |             text = text.replace('>', '&gt;')
459 |         if '<' in text:
460 |             text = text.replace('<', '&lt;')
461 |         if '\"' in text:
462 |             text = text.replace('\"', '&quot;')
463 |         if '\'' in text:
464 |             text = text.replace('\'', '&quot;')
465 |         if newline:
466 |             if '\n' in text:
467 |                 text = text.replace('\n', '<br>')
468 | 
469 |     return text
470 | 
471 | _escape = escape
472 | 
473 | 
474 | def unescape(text):
475 |     """Inverse of escape."""
476 | 
477 |     if isinstance(text, basestring):
478 |         if '&amp;' in text:
479 |             text = text.replace('&amp;', '&')
480 |         if '&gt;' in text:
481 |             text = text.replace('&gt;', '>')
482 |         if '&lt;' in text:
483 |             text = text.replace('&lt;', '<')
484 |         if '&quot;' in text:
485 |             text = text.replace('&quot;', '\"')
486 | 
487 |     return text
488 | 
489 | 
490 | class dummy:
491 | 
492 |     """A dummy class for attaching attributes."""
493 |     pass
494 | 
495 | doctype = dummy()
496 | doctype.frameset = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Frameset//EN' 'http://www.w3.org/TR/html4/frameset.dtd'>"
497 | doctype.strict = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN' 'http://www.w3.org/TR/html4/strict.dtd'>"
498 | doctype.loose = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>"
499 | 
500 | 
501 | class russell:
502 | 
503 |     """A dummy class that contains anything."""
504 | 
505 |     def __contains__(self, item):
506 |         return True
507 | 
508 | 
509 | class MarkupError(Exception):
510 | 
511 |     """All our exceptions subclass this."""
512 | 
513 |     def __str__(self):
514 |         return self.message
515 | 
516 | 
517 | class ClosingError(MarkupError):
518 | 
519 |     def __init__(self, tag):
520 |         self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag
521 | 
522 | 
523 | class OpeningError(MarkupError):
524 | 
525 |     def __init__(self, tag):
526 |         self.message = "The element '%s' can not be opened." % tag
527 | 
528 | 
529 | class ArgumentError(MarkupError):
530 | 
531 |     def __init__(self, tag):
532 |         self.message = "The element '%s' was called with more than one non-keyword argument." % tag
533 | 
534 | 
535 | class InvalidElementError(MarkupError):
536 | 
537 |     def __init__(self, tag, mode):
538 |         self.message = "The element '%s' is not valid for your mode '%s'." % (
539 |             tag,
540 |             mode)
541 | 
542 | 
543 | class DeprecationError(MarkupError):
544 | 
545 |     def __init__(self, tag):
546 |         self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag
547 | 
548 | 
549 | class ModeError(MarkupError):
550 | 
551 |     def __init__(self, mode):
552 |         self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode
553 | 
554 | 
555 | class CustomizationError(MarkupError):
556 | 
557 |     def __init__(self):
558 |         self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'."
559 | 
560 | if __name__ == '__main__':
561 |     print __doc__
562 | 


--------------------------------------------------------------------------------
/lib/theHarvester/lib/markup.py:
--------------------------------------------------------------------------------
  1 | # This code is in the public domain, it comes
  2 | # with absolutely no warranty and you can do
  3 | # absolutely whatever you want with it.
  4 | 
  5 | __date__ = '17 May 2007'
  6 | __version__ = '1.7'
  7 | __doc__ = """
  8 | This is markup.py - a Python module that attempts to
  9 | make it easier to generate HTML/XML from a Python program
 10 | in an intuitive, lightweight, customizable and pythonic way.
 11 | 
 12 | The code is in the public domain.
 13 | 
 14 | Version: %s as of %s.
 15 | 
 16 | Documentation and further info is at http://markup.sourceforge.net/
 17 | 
 18 | Please send bug reports, feature requests, enhancement
 19 | ideas or questions to nogradi at gmail dot com.
 20 | 
 21 | Installation: drop markup.py somewhere into your Python path.
 22 | """ % ( __version__, __date__ )
 23 | 
 24 | import string
 25 | 
 26 | 
 27 | class element:
 28 | 
 29 |     """This class handles the addition of a new element."""
 30 | 
 31 |     def __init__(self, tag, case='lower', parent=None):
 32 |         self.parent = parent
 33 | 
 34 |         if case == 'lower':
 35 |             self.tag = tag.lower()
 36 |         else:
 37 |             self.tag = tag.upper()
 38 | 
 39 |     def __call__(self, *args, **kwargs):
 40 |         if len(args) > 1:
 41 |             raise ArgumentError(self.tag)
 42 | 
 43 |         # if class_ was defined in parent it should be added to every element
 44 |         if self.parent is not None and self.parent.class_ is not None:
 45 |             if 'class_' not in kwargs:
 46 |                 kwargs['class_'] = self.parent.class_
 47 | 
 48 |         if self.parent is None and len(args) == 1:
 49 |             x = [self.render(self.tag, False, myarg, mydict)
 50 |                  for myarg, mydict in _argsdicts(args, kwargs)]
 51 |             return '\n'.join(x)
 52 |         elif self.parent is None and len(args) == 0:
 53 |             x = [self.render(self.tag, True, myarg, mydict)
 54 |                  for myarg, mydict in _argsdicts(args, kwargs)]
 55 |             return '\n'.join(x)
 56 | 
 57 |         if self.tag in self.parent.twotags:
 58 |             for myarg, mydict in _argsdicts(args, kwargs):
 59 |                 self.render(self.tag, False, myarg, mydict)
 60 |         elif self.tag in self.parent.onetags:
 61 |             if len(args) == 0:
 62 |                 for myarg, mydict in _argsdicts(args, kwargs):
 63 |                     # here myarg is always None, because len( args ) = 0
 64 |                     self.render(self.tag, True, myarg, mydict)
 65 |             else:
 66 |                 raise ClosingError(self.tag)
 67 |         elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
 68 |             raise DeprecationError(self.tag)
 69 |         else:
 70 |             raise InvalidElementError(self.tag, self.parent.mode)
 71 | 
 72 |     def render(self, tag, single, between, kwargs):
 73 |         """Append the actual tags to content."""
 74 | 
 75 |         out = "<%s" % tag
 76 |         for key, value in kwargs.iteritems():
 77 |             # when value is None that means stuff like <... checked>
 78 |             if value is not None:
 79 |                 # strip this so class_ will mean class, etc.
 80 |                 key = key.strip('_')
 81 |                 # special cases, maybe change _ to - overall?
 82 |                 if key == 'http_equiv':
 83 |                     key = 'http-equiv'
 84 |                 elif key == 'accept_charset':
 85 |                     key = 'accept-charset'
 86 |                 out = "%s %s=\"%s\"" % (out, key, escape(value))
 87 |             else:
 88 |                 out = "%s %s" % (out, key)
 89 |         if between is not None:
 90 |             out = "%s>%s</%s>" % (out, between, tag)
 91 |         else:
 92 |             if single:
 93 |                 out = "%s />" % out
 94 |             else:
 95 |                 out = "%s>" % out
 96 |         if self.parent is not None:
 97 |             self.parent.content.append(out)
 98 |         else:
 99 |             return out
100 | 
101 |     def close(self):
102 |         """Append a closing tag unless element has only opening tag."""
103 | 
104 |         if self.tag in self.parent.twotags:
105 |             self.parent.content.append("</%s>" % self.tag)
106 |         elif self.tag in self.parent.onetags:
107 |             raise ClosingError(self.tag)
108 |         elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
109 |             raise DeprecationError(self.tag)
110 | 
111 |     def open(self, **kwargs):
112 |         """Append an opening tag."""
113 | 
114 |         if self.tag in self.parent.twotags or self.tag in self.parent.onetags:
115 |             self.render(self.tag, False, None, kwargs)
116 |         elif self.mode == 'strict_html' and self.tag in self.parent.deptags:
117 |             raise DeprecationError(self.tag)
118 | 
119 | 
120 | class page:
121 | 
122 |     """This is our main class representing a document. Elements are added
123 |     as attributes of an instance of this class."""
124 | 
125 |     def __init__(self, mode='strict_html', case='lower',
126 |                  onetags=None, twotags=None, separator='\n', class_=None):
127 |         """Stuff that effects the whole document.
128 | 
129 |         mode -- 'strict_html'   for HTML 4.01 (default)
130 |                 'html'          alias for 'strict_html'
131 |                 'loose_html'    to allow some deprecated elements
132 |                 'xml'           to allow arbitrary elements
133 | 
134 |         case -- 'lower'         element names will be printed in lower case (default)
135 |                 'upper'         they will be printed in upper case
136 | 
137 |         onetags --              list or tuple of valid elements with opening tags only
138 |         twotags --              list or tuple of valid elements with both opening and closing tags
139 |                                 these two keyword arguments may be used to select
140 |                                 the set of valid elements in 'xml' mode
141 |                                 invalid elements will raise appropriate exceptions
142 | 
143 |         separator --            string to place between added elements, defaults to newline
144 | 
145 |         class_ --               a class that will be added to every element if defined"""
146 | 
147 |         valid_onetags = [
148 |             "AREA",
149 |             "BASE",
150 |             "BR",
151 |             "COL",
152 |             "FRAME",
153 |             "HR",
154 |             "IMG",
155 |             "INPUT",
156 |             "LINK",
157 |             "META",
158 |             "PARAM"]
159 |         valid_twotags = [
160 |             "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON",
161 |             "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET",
162 |             "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS",
163 |             "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
164 |             "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE",
165 |             "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR",
166 |             "TT", "UL", "VAR"]
167 |         deprecated_onetags = ["BASEFONT", "ISINDEX"]
168 |         deprecated_twotags = [
169 |             "APPLET",
170 |             "CENTER",
171 |             "DIR",
172 |             "FONT",
173 |             "MENU",
174 |             "S",
175 |             "STRIKE",
176 |             "U"]
177 | 
178 |         self.header = []
179 |         self.content = []
180 |         self.footer = []
181 |         self.case = case
182 |         self.separator = separator
183 | 
184 |         # init( ) sets it to True so we know that </body></html> has to be
185 |         # printed at the end
186 |         self._full = False
187 |         self.class_ = class_
188 | 
189 |         if mode == 'strict_html' or mode == 'html':
190 |             self.onetags = valid_onetags
191 |             self.onetags += map(string.lower, self.onetags)
192 |             self.twotags = valid_twotags
193 |             self.twotags += map(string.lower, self.twotags)
194 |             self.deptags = deprecated_onetags + deprecated_twotags
195 |             self.deptags += map(string.lower, self.deptags)
196 |             self.mode = 'strict_html'
197 |         elif mode == 'loose_html':
198 |             self.onetags = valid_onetags + deprecated_onetags
199 |             self.onetags += map(string.lower, self.onetags)
200 |             self.twotags = valid_twotags + deprecated_twotags
201 |             self.twotags += map(string.lower, self.twotags)
202 |             self.mode = mode
203 |         elif mode == 'xml':
204 |             if onetags and twotags:
205 |                 self.onetags = onetags
206 |                 self.twotags = twotags
207 |             elif (onetags and not twotags) or (twotags and not onetags):
208 |                 raise CustomizationError()
209 |             else:
210 |                 self.onetags = russell()
211 |                 self.twotags = russell()
212 |             self.mode = mode
213 |         else:
214 |             raise ModeError(mode)
215 | 
216 |     def __getattr__(self, attr):
217 |         if attr.startswith("__") and attr.endswith("__"):
218 |             raise AttributeError(attr)
219 |         return element(attr, case=self.case, parent=self)
220 | 
221 |     def __str__(self):
222 | 
223 |         if self._full and (self.mode == 'strict_html' or self.mode == 'loose_html'):
224 |             end = ['</body>', '</html>']
225 |         else:
226 |             end = []
227 | 
228 |         return (
229 |             self.separator.join(
230 |                 self.header +
231 |                 self.content +
232 |                 self.footer +
233 |                 end)
234 |         )
235 | 
236 |     def __call__(self, escape=False):
237 |         """Return the document as a string.
238 | 
239 |         escape --   False   print normally
240 |                     True    replace < and > by &lt; and &gt;
241 |                             the default escape sequences in most browsers"""
242 | 
243 |         if escape:
244 |             return _escape(self.__str__())
245 |         else:
246 |             return self.__str__()
247 | 
248 |     def add(self, text):
249 |         """This is an alias to addcontent."""
250 |         self.addcontent(text)
251 | 
252 |     def addfooter(self, text):
253 |         """Add some text to the bottom of the document"""
254 |         self.footer.append(text)
255 | 
256 |     def addheader(self, text):
257 |         """Add some text to the top of the document"""
258 |         self.header.append(text)
259 | 
260 |     def addcontent(self, text):
261 |         """Add some text to the main part of the document"""
262 |         self.content.append(text)
263 | 
264 |     def init(self, lang='en', css=None, metainfo=None, title=None, header=None,
265 |              footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None):
266 |         """This method is used for complete documents with appropriate
267 |         doctype, encoding, title, etc information. For an HTML/XML snippet
268 |         omit this method.
269 | 
270 |         lang --     language, usually a two character string, will appear
271 |                     as <html lang='en'> in html mode (ignored in xml mode)
272 | 
273 |         css --      Cascading Style Sheet filename as a string or a list of
274 |                     strings for multiple css files (ignored in xml mode)
275 | 
276 |         metainfo -- a dictionary in the form { 'name':'content' } to be inserted
277 |                     into meta element(s) as <meta name='name' content='content'>
278 |                     (ignored in xml mode)
279 | 
280 |         bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added
281 |                     as attributes of the <body> element as <body key='value' ... >
282 |                     (ignored in xml mode)
283 | 
284 |         script --   dictionary containing src:type pairs, <script type='text/type' src=src></script>
285 | 
286 |         title --    the title of the document as a string to be inserted into
287 |                     a title element as <title>my title</title> (ignored in xml mode)
288 | 
289 |         header --   some text to be inserted right after the <body> element
290 |                     (ignored in xml mode)
291 | 
292 |         footer --   some text to be inserted right before the </body> element
293 |                     (ignored in xml mode)
294 | 
295 |         charset --  a string defining the character set, will be inserted into a
296 |                     <meta http-equiv='Content-Type' content='text/html; charset=myset'>
297 |                     element (ignored in xml mode)
298 | 
299 |         encoding -- a string defining the encoding, will be put into to first line of
300 |                     the document as <?xml version='1.0' encoding='myencoding' ?> in
301 |                     xml mode (ignored in html mode)
302 | 
303 |         doctype --  the document type string, defaults to
304 |                     <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>
305 |                     in html mode (ignored in xml mode)"""
306 | 
307 |         self._full = True
308 | 
309 |         if self.mode == 'strict_html' or self.mode == 'loose_html':
310 |             if doctype is None:
311 |                 doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>"
312 |             self.header.append(doctype)
313 |             self.html(lang=lang)
314 |             self.head()
315 |             if charset is not None:
316 |                 self.meta(
317 |                     http_equiv='Content-Type',
318 |                     content="text/html; charset=%s" %
319 |                     charset)
320 |             if metainfo is not None:
321 |                 self.metainfo(metainfo)
322 |             if css is not None:
323 |                 self.css(css)
324 |             if title is not None:
325 |                 self.title(title)
326 |             if script is not None:
327 |                 self.scripts(script)
328 |             self.head.close()
329 |             if bodyattrs is not None:
330 |                 self.body(**bodyattrs)
331 |             else:
332 |                 self.body()
333 |             if header is not None:
334 |                 self.content.append(header)
335 |             if footer is not None:
336 |                 self.footer.append(footer)
337 | 
338 |         elif self.mode == 'xml':
339 |             if doctype is None:
340 |                 if encoding is not None:
341 |                     doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding
342 |                 else:
343 |                     doctype = "<?xml version='1.0' ?>"
344 |             self.header.append(doctype)
345 | 
346 |     def css(self, filelist):
347 |         """This convenience function is only useful for html.
348 |         It adds css stylesheet(s) to the document via the <link> element."""
349 | 
350 |         if isinstance(filelist, basestring):
351 |             self.link(
352 |                 href=filelist,
353 |                 rel='stylesheet',
354 |                 type='text/css',
355 |                 media='all')
356 |         else:
357 |             for file in filelist:
358 |                 self.link(
359 |                     href=file,
360 |                     rel='stylesheet',
361 |                     type='text/css',
362 |                     media='all')
363 | 
364 |     def metainfo(self, mydict):
365 |         """This convenience function is only useful for html.
366 |         It adds meta information via the <meta> element, the argument is
367 |         a dictionary of the form { 'name':'content' }."""
368 | 
369 |         if isinstance(mydict, dict):
370 |             for name, content in mydict.iteritems():
371 |                 self.meta(name=name, content=content)
372 |         else:
373 |             raise TypeError(
374 |                 "Metainfo should be called with a dictionary argument of name:content pairs.")
375 | 
376 |     def scripts(self, mydict):
377 |         """Only useful in html, mydict is dictionary of src:type pairs will
378 |         be rendered as <script type='text/type' src=src></script>"""
379 | 
380 |         if isinstance(mydict, dict):
381 |             for src, type in mydict.iteritems():
382 |                 self.script('', src=src, type='text/%s' % type)
383 |         else:
384 |             raise TypeError(
385 |                 "Script should be given a dictionary of src:type pairs.")
386 | 
387 | 
388 | class _oneliner:
389 | 
390 |     """An instance of oneliner returns a string corresponding to one element.
391 |     This class can be used to write 'oneliners' that return a string
392 |     immediately so there is no need to instantiate the page class."""
393 | 
394 |     def __init__(self, case='lower'):
395 |         self.case = case
396 | 
397 |     def __getattr__(self, attr):
398 |         if attr.startswith("__") and attr.endswith("__"):
399 |             raise AttributeError(attr)
400 |         return element(attr, case=self.case, parent=None)
401 | 
402 | oneliner = _oneliner(case='lower')
403 | upper_oneliner = _oneliner(case='upper')
404 | 
405 | 
406 | def _argsdicts(args, mydict):
407 |     """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1."""
408 | 
409 |     if len(args) == 0:
410 |         args = None,
411 |     elif len(args) == 1:
412 |         args = _totuple(args[0])
413 |     else:
414 |         raise Exception("We should have never gotten here.")
415 | 
416 |     mykeys = mydict.keys()
417 |     myvalues = map(_totuple, mydict.values())
418 | 
419 |     maxlength = max(map(len, [args] + myvalues))
420 | 
421 |     for i in xrange(maxlength):
422 |         thisdict = {}
423 |         for key, value in zip(mykeys, myvalues):
424 |             try:
425 |                 thisdict[key] = value[i]
426 |             except IndexError:
427 |                 thisdict[key] = value[-1]
428 |         try:
429 |             thisarg = args[i]
430 |         except IndexError:
431 |             thisarg = args[-1]
432 | 
433 |         yield thisarg, thisdict
434 | 
435 | 
436 | def _totuple(x):
437 |     """Utility stuff to convert string, int, float, None or anything to a usable tuple."""
438 | 
439 |     if isinstance(x, basestring):
440 |         out = x,
441 |     elif isinstance(x, (int, float)):
442 |         out = str(x),
443 |     elif x is None:
444 |         out = None,
445 |     else:
446 |         out = tuple(x)
447 | 
448 |     return out
449 | 
450 | 
451 | def escape(text, newline=False):
452 |     """Escape special html characters."""
453 | 
454 |     if isinstance(text, basestring):
455 |         if '&' in text:
456 |             text = text.replace('&', '&amp;')
457 |         if '>' in text:
458 |             text = text.replace('>', '&gt;')
459 |         if '<' in text:
460 |             text = text.replace('<', '&lt;')
461 |         if '\"' in text:
462 |             text = text.replace('\"', '&quot;')
463 |         if '\'' in text:
464 |             text = text.replace('\'', '&quot;')
465 |         if newline:
466 |             if '\n' in text:
467 |                 text = text.replace('\n', '<br>')
468 | 
469 |     return text
470 | 
471 | _escape = escape
472 | 
473 | 
474 | def unescape(text):
475 |     """Inverse of escape."""
476 | 
477 |     if isinstance(text, basestring):
478 |         if '&amp;' in text:
479 |             text = text.replace('&amp;', '&')
480 |         if '&gt;' in text:
481 |             text = text.replace('&gt;', '>')
482 |         if '&lt;' in text:
483 |             text = text.replace('&lt;', '<')
484 |         if '&quot;' in text:
485 |             text = text.replace('&quot;', '\"')
486 | 
487 |     return text
488 | 
489 | 
490 | class dummy:
491 | 
492 |     """A dummy class for attaching attributes."""
493 |     pass
494 | 
495 | doctype = dummy()
496 | doctype.frameset = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Frameset//EN' 'http://www.w3.org/TR/html4/frameset.dtd'>"
497 | doctype.strict = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN' 'http://www.w3.org/TR/html4/strict.dtd'>"
498 | doctype.loose = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>"
499 | 
500 | 
501 | class russell:
502 | 
503 |     """A dummy class that contains anything."""
504 | 
505 |     def __contains__(self, item):
506 |         return True
507 | 
508 | 
509 | class MarkupError(Exception):
510 | 
511 |     """All our exceptions subclass this."""
512 | 
513 |     def __str__(self):
514 |         return self.message
515 | 
516 | 
517 | class ClosingError(MarkupError):
518 | 
519 |     def __init__(self, tag):
520 |         self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag
521 | 
522 | 
523 | class OpeningError(MarkupError):
524 | 
525 |     def __init__(self, tag):
526 |         self.message = "The element '%s' can not be opened." % tag
527 | 
528 | 
529 | class ArgumentError(MarkupError):
530 | 
531 |     def __init__(self, tag):
532 |         self.message = "The element '%s' was called with more than one non-keyword argument." % tag
533 | 
534 | 
535 | class InvalidElementError(MarkupError):
536 | 
537 |     def __init__(self, tag, mode):
538 |         self.message = "The element '%s' is not valid for your mode '%s'." % (
539 |             tag,
540 |             mode)
541 | 
542 | 
543 | class DeprecationError(MarkupError):
544 | 
545 |     def __init__(self, tag):
546 |         self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag
547 | 
548 | 
549 | class ModeError(MarkupError):
550 | 
551 |     def __init__(self, mode):
552 |         self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode
553 | 
554 | 
555 | class CustomizationError(MarkupError):
556 | 
557 |     def __init__(self):
558 |         self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'."
559 | 
560 | if __name__ == '__main__':
561 |     print __doc__
562 | 


--------------------------------------------------------------------------------