├── ODIN.png
├── ODIN.pxm
├── docs
    ├── ExampleGraph.png
    ├── GraphSchema.png
    ├── DATABASE.md
    └── NEO4J.md
├── lib
    ├── __init__.py
    ├── asciis.py
    ├── screenshots.py
    ├── hibp.py
    ├── searchparser.py
    ├── shodantools.py
    ├── fullcontact.py
    ├── dns.py
    ├── verification.py
    ├── helpers.py
    ├── cloud.py
    ├── filehunter.py
    ├── whois.py
    ├── takeovers.py
    ├── typosquat.py
    ├── grapher.py
    └── subdomains.py
├── .gitignore
├── Pipfile
├── auth
    └── keys.config.sample
├── LICENSE
├── README.md
├── CHANGELOG.md
├── setup
    └── setup_check.py
└── odin.py


/ODIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/ODIN/HEAD/ODIN.png


--------------------------------------------------------------------------------
/ODIN.pxm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/ODIN/HEAD/ODIN.pxm


--------------------------------------------------------------------------------
/docs/ExampleGraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/ODIN/HEAD/docs/ExampleGraph.png


--------------------------------------------------------------------------------
/docs/GraphSchema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chrismaddalena/ODIN/HEAD/docs/GraphSchema.png


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | import glob
3 | modules = glob.glob(os.path.dirname(__file__)+"/*.py")
4 | __all__ = [ os.path.basename(f)[:-3] for f in modules]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .pyc
 2 | .DS_Store
 3 | 
 4 | *.pyc
 5 | *.html
 6 | *.nessus
 7 | *.txt
 8 | *.json
 9 | *.xlsx
10 | *.log
11 | *.lock
12 | 
13 | /auth/keys.config
14 | /reports/*
15 | /pyfoca-downloads/*
16 | 
17 | .vscode
18 | 
19 | chromedriver


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | url = "https://pypi.python.org/simple"
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | 
 6 | [packages]
 7 | shodan = "*"
 8 | python-whois = "*"
 9 | bs4 = "*"
10 | netaddr = "*"
11 | cymon = "*"
12 | tweepy = "*"
13 | ipwhois = "*"
14 | IPy = "*"
15 | dnspython = "*"
16 | censys = "*"
17 | PyPDF2 = "*"
18 | dnslib = "*"
19 | validators = "*"
20 | lxml = "*"
21 | selenium = "*"
22 | multiprocess = "*"
23 | boto3 = "*"
24 | neo4j-driver = "*"
25 | 
26 | [dev-packages]
27 | pylint = "*"
28 | 
29 | [requires]
30 | python_version = "3.7"
31 | 


--------------------------------------------------------------------------------
/auth/keys.config.sample:
--------------------------------------------------------------------------------
 1 | [Shodan]
 2 | api_key:
 3 | 
 4 | [Cymon]
 5 | api_key:
 6 | 
 7 | [Full Contact]
 8 | api_key:
 9 | 
10 | [EmailHunter]
11 | api_key: 
12 | 
13 | [URLVoid]
14 | api_key:
15 | 
16 | [Censys]
17 | api_id:
18 | api_secret:
19 | 
20 | [Twitter]
21 | consumer_key:
22 | key_secret:
23 | access_token:
24 | token_secret: 
25 | 
26 | [WebDriver]
27 | driver_path: /Users/Odin/Downloads/chromedriver
28 | 
29 | [AWS]
30 | access_key: 
31 | secret: 
32 | 
33 | [WhoXY]
34 | api_key:
35 | 
36 | [GraphDatabase]
37 | uri: bolt://localhost:####
38 | username: neo4j
39 | password: password


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, Chris Maddalena
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/lib/asciis.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | This module prints ASCII art banners, for fun!
 6 | """
 7 | 
 8 | import random
 9 | 
10 | 
11 | def print_art():
12 |     r1 = """
13 |  :::====     :::====     :::    :::= ===
14 |  :::  ===    :::  ===    :::    :::=====
15 |  ===  ===    ===  ===    ===    ========
16 |  ===  ===    ===  ===    ===    === ====
17 |   ======  :: =======  :: === :: ===  === ::
18 |         """
19 | 
20 | 
21 |     r2 = """
22 |  ██████╗    ██████╗    ██╗   ███╗   ██╗
23 | ██╔═══██╗   ██╔══██╗   ██║   ████╗  ██║
24 | ██║   ██║   ██║  ██║   ██║   ██╔██╗ ██║
25 | ██║   ██║   ██║  ██║   ██║   ██║╚██╗██║
26 | ╚██████╔╝██╗██████╔╝██╗██║██╗██║ ╚████║██╗
27 |  ╚═════╝ ╚═╝╚═════╝ ╚═╝╚═╝╚═╝╚═╝  ╚═══╝╚═╝
28 |         """
29 | 
30 |     r3 = """
31 |  ▒█████       ▓█████▄       ██▓      ███▄    █
32 | ▒██▒  ██▒     ▒██▀ ██▌     ▓██▒      ██ ▀█   █
33 | ▒██░  ██▒     ░██   █▌     ▒██▒     ▓██  ▀█ ██▒
34 | ▒██   ██░     ░▓█▄   ▌     ░██░     ▓██▒  ▐▌██▒
35 | ░ ████▓▒░ ██▓ ░▒████▓  ██▓ ░██░ ██▓ ▒██░   ▓██░ ██▓
36 | ░ ▒░▒░▒░  ▒▓▒  ▒▒▓  ▒  ▒▓▒ ░▓   ▒▓▒ ░ ▒░   ▒ ▒  ▒▓▒
37 |   ░ ▒ ▒░  ░▒   ░ ▒  ▒  ░▒   ▒ ░ ░▒  ░ ░░   ░ ▒░ ░▒
38 | ░ ░ ░ ▒   ░    ░ ░  ░  ░    ▒ ░ ░      ░   ░ ░  ░
39 |     ░ ░    ░     ░      ░   ░    ░           ░   ░
40 |            ░   ░        ░        ░               ░
41 |         """
42 | 
43 |     r4 = """
44 |  ▄██████▄  ████████▄   ▄█  ███▄▄▄▄
45 | ███    ███ ███   ▀███ ███  ███▀▀▀██▄
46 | ███    ███ ███    ███ ███▌ ███   ███
47 | ███    ███ ███    ███ ███▌ ███   ███
48 | ███    ███ ███    ███ ███▌ ███   ███
49 | ███    ███ ███    ███ ███  ███   ███
50 | ███    ███ ███   ▄███ ███  ███   ███
51 |  ▀██████▀  ████████▀  █▀    ▀█   █▀
52 |         """
53 | 
54 |     r5 = """
55 |          ·▄▄▄▄     ▪      ▐ ▄
56 | ▪        ██▪ ██    ██    •█▌▐█
57 |  ▄█▀▄    ▐█· ▐█▌   ▐█·   ▐█▐▐▌
58 | ▐█▌.▐▌   ██. ██    ▐█▌   ██▐█▌
59 |  ▀█▄▀▪ ▀ ▀▀▀▀▀•  ▀ ▀▀▀ ▀ ▀▀ █▪ ▀
60 |  		"""
61 | 
62 |     art = [r1,r2,r3,r4,r5]
63 |     chosen_ascii = random.choice(art)
64 |     return chosen_ascii
65 | 


--------------------------------------------------------------------------------
/lib/screenshots.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """This module contains all of tools and functions used for taking screenshots of webpages."""
 5 | 
 6 | import click
 7 | from selenium import webdriver
 8 | from selenium.webdriver.chrome.options import Options
 9 | from selenium.common.exceptions import TimeoutException,NoSuchElementException,WebDriverException
10 | 
11 | from lib import helpers
12 | 
13 | 
14 | class Screenshotter(object):
15 |     """Class containing the tools for taking screenshots of webpages."""
16 |     # Set the timeout, in seconds, for the webdriver
17 |     browser_timeout = 10
18 | 
19 |     def __init__(self,webdriver):
20 |         """Everything that should be initiated with a new object goes here.
21 | 
22 |         Parameters:
23 |         webdriver   A Selenium webdriver object to use for automated web browsing
24 |         """
25 |         self.browser = webdriver
26 |         self.browser.set_page_load_timeout(self.browser_timeout)
27 | 
28 |     def take_screenshot(self,target,directory):
29 |         """Function to take a screenshot of a target webpage.
30 | 
31 |         Parameters:
32 |         target      The IP address or domain name to use for the web request
33 |         directory   The directory where the saved screenshots will be stored
34 |         """
35 |         try:
36 |             out_name = target.split("//")[1]
37 |         except:
38 |             out_name = target
39 |             target = "http://" + target
40 |             target_ssl = "https://" + target
41 |         # Attempt to take a screenshot of the target using HTTP and HTTPS
42 |         try:
43 |             # Try HTTPS
44 |             self.browser.get(target_ssl)
45 |             # Attempt to dismiss any alerts
46 |             try:
47 |                 alert = self.browser.switch_to.alert
48 |                 alert.dismiss()
49 |             except:
50 |                 pass
51 |             self.browser.save_screenshot(directory + out_name + "_ssl.png")
52 |             # Try HTTP
53 |             self.browser.get(target)
54 |             # Attempt to dismiss any alerts
55 |             try:
56 |                 alert = self.browser.switch_to.alert
57 |                 alert.dismiss()
58 |             except:
59 |                 pass
60 |             self.browser.save_screenshot(directory + out_name + ".png")
61 |         except TimeoutException:
62 |             pass
63 |         except WebDriverException:
64 |             pass
65 |         except Exception:
66 |             pass
67 | 


--------------------------------------------------------------------------------
/lib/hibp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | This module contains functions for checking email addresses against Have I Been Pwned's database of
 6 | security breaches and public pastes.
 7 | """
 8 | 
 9 | import json
10 | 
11 | import click
12 | from selenium.common.exceptions import TimeoutException,NoSuchElementException,WebDriverException
13 | 
14 | from lib import helpers
15 | 
16 | 
17 | class HaveIBeenPwned(object):
18 |     """Class containing the tools for checking email addresses against the Have I Been Pwned
19 |     breach and paste databases.
20 |     """
21 |     # Set the timeout, in seconds, for the webdriver
22 |     browser_timeout = 10
23 |     # The Have I Been Pwned API endpoints
24 |     hibp_paste_uri = "https://haveibeenpwned.com/api/v2/pasteaccount/{}"
25 |     hibp_breach_uri = "https://haveibeenpwned.com/api/v2/breachedaccount/{}"
26 | 
27 |     def __init__(self,webdriver):
28 |         """Everything that should be initiated with a new object goes here.
29 | 
30 |         Parameters:
31 |         webdriver   A Selenium webdriver object to be used for web browsing
32 |         """
33 |         self.browser = webdriver
34 |         self.browser.set_page_load_timeout(self.browser_timeout)
35 | 
36 |     def pwn_check(self,email):
37 |         """Check for the target's email in public security breaches using HIBP's API.
38 | 
39 |         Parameters:
40 |         email       The email address to look-up in Have I Been Pwned's breach database
41 |         """
42 |         try:
43 |             self.browser.get(self.hibp_breach_uri.format(email))
44 |             # cookies = browser.get_cookies()
45 |             json_text = self.browser.find_element_by_css_selector('pre').get_attribute('innerText')
46 |             pwned = json.loads(json_text)
47 |             return pwned
48 |         except TimeoutException:
49 |             click.secho("[!] The connection to HaveIBeenPwned timed out!",fg="red")
50 |             return []
51 |         except NoSuchElementException:
52 |             # This is likely an "all clear" -- no hits in HIBP
53 |             return []
54 |         except WebDriverException:
55 |             return []
56 | 
57 |     def paste_check(self,email):
58 |         """Check for the target's email in pastes across multiple paste websites. This includes
59 |         sites like Slexy, Ghostbin, Pastebin using HIBP's API.
60 | 
61 |         Parameters:
62 |         email       The email address to look-up in Have I Been Pwned's pastes database
63 |         """
64 |         try:
65 |             self.browser.get(self.hibp_paste_uri.format(email))
66 |             # cookies = browser.get_cookies()
67 |             json_text = self.browser.find_element_by_css_selector('pre').get_attribute('innerText')
68 |             pastes = json.loads(json_text)
69 |             return pastes
70 |         except TimeoutException:
71 |             click.secho("[!] The connection to HaveIBeenPwned timed out!",fg="red")
72 |             return []
73 |         except NoSuchElementException:
74 |             # This is likely an "all clear" -- no hits in HIBP
75 |             return []
76 |         except WebDriverException:
77 |             return []
78 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ODIN
 2 | 
 3 | ## Observe, Detect, and Investigate Networks
 4 | 
 5 | [![Python Version](https://img.shields.io/badge/Python-3.7-brightgreen.svg)](.) [![License](https://img.shields.io/badge/License-BSD3-darkred.svg)](.)
 6 | 
 7 | ![ODIN](https://github.com/chrismaddalena/ODIN/raw/master/ODIN.png)
 8 | 
 9 | ### Current version: v2.0.0 "Huginn"
10 | 
11 | ODIN is Python tool for automating intelligence gathering, asset discovery, and reporting. Remember, check the dev branch for the bleeding edge, and feedback is welcome!
12 | 
13 | See the GitHub wiki for details and installation and setup instructions.
14 | 
15 | ## What Can ODIN Do?
16 | 
17 | ODIN aims to automate the basic recon tasks used by red teams to discover and collect data on network assets, including domains, IP addresses, and internet-facing systems. The key feature of ODIN is the data management and reporting. The data is organized in a database and then, optionally, that database can be converted into an HTML report or a Neo4j graph database for visualizing the data.
18 | 
19 | ODIN performs this in multiple phases:
20 | 
21 | ### Phase 1 - Asset Discovery
22 | 
23 | * Collect basic organization information from sources like the Full Contact marketing database.
24 | * Check DNS Dumpster, Netcraft, and TLS certificates to discover subdomains for the provided domains.
25 | * Resolve domain and subdomains to IP addresses via socket connections and DNS records.
26 | * Collect information for all IP addresses, such as ownership and organization data, from RDAP, whois, and other data sources.
27 | * Lookup domains and search for IP addresses on Shodan to collect additional data, such as operating systems, service banners, and open ports.
28 | * Check for the possibility of takeovers and domain fronting with the domains and subdomains.
29 | 
30 | ### Phase 2 - Employee Discovery
31 | 
32 | * Harvest email addresses and employee names for the target organization.
33 | * Link employees to social media profiles via search engines and the Twitter API.
34 | * Cross check discovered email addresses with Troy Hunt's Have I Been Pwned.
35 | 
36 | ### Phase 3 - Cloud and Web Services
37 | 
38 | * Hunt for Office files and PDFs under the target domain, download them, and extract metadata.
39 | * Search for AWS S3 buckets and Digital Ocean Spaces using keywords related to the organization.
40 | * Take screenshots of discovered web services for a quick, early review of services.
41 | 
42 | ### Phase 4 - Reporting
43 | 
44 | * Save all data to a SQLite3 database to allow the data to be easily queried.
45 | * Generate an HTML report using default SQL queries to make it simple to peruse the data in a web browser.
46 | * Create a Neo4j graph database that ties all of the discovered entities (IP addresses, domains, subdomains, ports, and certificates) together with relationships (e.g. RESOLVES_TO, HAS_PORT).
47 | 
48 | At the end of all of this you will have multiple ways to browse and visualize the data. Even a simple Neo4j query like `MATCH (n) RETURN n` (display everything) can create a fascinating graph of the organization's external perimeter and make it simple to see how assets are linked. The [Neo4j wiki pages](https://github.com/chrismaddalena/ODIN/wiki/Graphing-Data) contain better query examples.


--------------------------------------------------------------------------------
/lib/searchparser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | This module contains all of parsing tools used to extract email addresses and social media profile
 6 | handles and URLs from search results.
 7 | 
 8 | While much of the original code has been removed, rearranged, or modified, this code was
 9 | seeded using functions pulled from Christian Martorella's TheHarvester tool.
10 | 
11 | The original code can be found here:
12 | 
13 | https://github.com/laramies/theHarvester
14 | """
15 | 
16 | import re
17 | 
18 | 
19 | class Parser:
20 |     """Class for parsing search results to extract specific information, such as email addresses
21 |     and social media profiles.
22 |     """
23 |     def __init__(self,results,word):
24 |         """Everything that should be initiated with a new object goes here.
25 | 
26 |         Parameters:
27 |         results     Results from a harvester.py module to search
28 |         word        The word to search for
29 |         """
30 |         self.temp = []
31 |         self.word = word
32 |         self.results = results
33 | 
34 |     def generic_clean(self):
35 |         """Remove generic HTML tags from the results and replace URL encoded characters."""
36 |         self.results = re.sub('<em>','',self.results)
37 |         self.results = re.sub('<b>','',self.results)
38 |         self.results = re.sub('</b>','',self.results)
39 |         self.results = re.sub('</em>','',self.results)
40 |         self.results = re.sub('%2f',' ',self.results)
41 |         self.results = re.sub('%3a',' ',self.results)
42 |         self.results = re.sub('<strong>','',self.results)
43 |         self.results = re.sub('</strong>','',self.results)
44 |         self.results = re.sub('<wbr>','',self.results)
45 |         self.results = re.sub('</wbr>','',self.results)
46 |         for character in ('>',':','=','<','/','\\',';','&','%3A','%3D','%3C'):
47 |             self.results = str.replace(self.results,character,' ')
48 | 
49 |     def parse_emails(self):
50 |         """Search for and return email addresses in the search results."""
51 |         self.generic_clean()
52 |         reg_emails = re.compile(
53 |             # Local part is required, charset is flexible
54 |             # https://tools.ietf.org/html/rfc6531 (removed * and () as they provide FP mostly)
55 |             '[a-zA-Z0-9.\-_+#~!$&\',;=:]+' +
56 |             '@' +
57 |             '[a-zA-Z0-9.-]*' + self.word)
58 |         self.temp = reg_emails.findall(self.results)
59 |         emails = self.unique()
60 |         return emails
61 | 
62 |     def parse_twitter(self):
63 |         """Search for and return Twitter handles in the search results."""
64 |         # Reg Ex for finding profile links in the search results
65 |         reg_profiles = re.compile('(?<=https:\/\/twitter.com\/)(.*?)(?=\/status|\&|\"|\<)')
66 |         self.temp = reg_profiles.findall(self.results)
67 |         profiles = self.unique()
68 |         results = []
69 |         for user in profiles:
70 |             # Skip over handle/lists/list_name and statuses/status_id strings
71 |             if not "/lists/" in user and not "statuses/" in user:
72 |                 user = user.replace('hashtag/','').replace('#!/','')
73 |                 # Filter out the generic Twitter links that have "/i/web/" in place of the username
74 |                 if user != " " and user != "i/web":
75 |                     results.append(user)
76 |         return results
77 | 
78 |     def unique(self):
79 |         """Remove duplicate results and produce a unique list."""
80 |         self.new = []
81 |         for x in self.temp:
82 |             if x.lower() not in self.new:
83 |                 self.new.append(x.lower())
84 |         return self.new
85 | 


--------------------------------------------------------------------------------
/docs/DATABASE.md:
--------------------------------------------------------------------------------
 1 | # Database Schema
 2 | 
 3 | ## Primary Tables
 4 | ```
 5 | CREATE TABLE 'certificates' ('id' INTEGER PRIMARY KEY, 'host' text, 'subject' text, 'issuer' text)
 6 | ```
 7 | ```
 8 | CREATE TABLE 'cloud' ('name' text, 'bucket_uri' text, 'bucket_arn' text, 'publicly_accessible' text)
 9 | ```
10 | ```
11 | CREATE TABLE 'company_info' ('company_name' text, 'logo' text, 'website' text, 'employees' text, 'year_founded' text, 'website_overview' text, 'corporate_keyword' text, 'email_address' text, 'phone_number' text, 'physical_address' text)
12 | ```
13 | ```
14 | CREATE TABLE 'dns' ('id' INTEGER PRIMARY KEY, 'domain' text, 'ns_record' text, 'a_record' text, 'mx_record' text, 'txt_record' text, 'soa_record' text, 'dmarc' text, 'vulnerable_cache_snooping' text)
15 | ```
16 | ```
17 | CREATE TABLE 'email_addresses'  ('email_address' text, 'breaches' text, 'pastes' text)
18 | ```
19 | ```
20 | CREATE TABLE 'employee_data' ('name' text, 'job_title' text, 'phone_number' text, 'linkedin_url' text)
21 | ```
22 | ```
23 | CREATE TABLE 'hosts' ('id' INTEGER PRIMARY KEY, 'host_address' text, 'in_scope_file' text, 'source' text)
24 | ```
25 | ```
26 | CREATE TABLE 'ip_history' ('id' INTEGER PRIMARY KEY, 'domain' text, 'netblock_owner' text, 'ip_address' text)
27 | ```
28 | ```
29 | CREATE TABLE 'rdap_data' ('id' INTEGER PRIMARY KEY, 'ip_address' text, 'rdap_source' text, 'organization' text, 'network_cidr' text, 'asn' text, 'country_code' text, 'robtex_related_domains' text)
30 | ```
31 | ```
32 | CREATE TABLE 'shodan_host_lookup' ('id' INTEGER PRIMARY KEY, 'ip_address' text, 'os' text, 'organization' text, 'port' text, 'banner_data' text)
33 | ```
34 | ```
35 | CREATE TABLE 'shodan_search' ('id' INTEGER PRIMARY KEY, 'domain' text, 'ip_address' text, 'hostname' text, 'os' text, 'port' text, 'banner_data' text)
36 | ```
37 | ```
38 | CREATE TABLE 'subdomains' ('id' INTEGER PRIMARY KEY,'domain' text, 'subdomain' text, 'ip_address' text, 'domain_frontable' text, 'source' text)
39 | ```
40 | ```
41 | CREATE TABLE 'twitter' ('handle' text, 'real_name' text, 'follower_count' text, 'location' text, 'description' text)
42 | ```
43 | ```
44 | CREATE TABLE 'whois_data' ('id' INTEGER PRIMARY KEY, 'domain' text, 'registrar' text, 'expiration' text, 'organization' text, 'registrant' text, 'admin_contact' text, 'tech_contact' text, 'address' text, 'dns_sec' text)"
45 | ```
46 | 
47 | ## Link Tables
48 | ```
49 | CREATE TABLE 'certificate_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'cert_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(cert_id) REFERENCES certificates(id))
50 | ```
51 | ```
52 | CREATE TABLE 'dns_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'dns_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(dns_id) REFERENCES dns(id))
53 | ```
54 | ```
55 | CREATE TABLE 'ip_hist_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'hist_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(hist_id) REFERENCES ip_history(id))
56 | ```
57 | ```
58 | CREATE TABLE 'rdap_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'rdap_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(rdap_id) REFERENCES rdap_data(id))
59 | ```
60 | ```
61 | CREATE TABLE 'shodan_host_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'shodan_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(shodan_id) REFERENCES shodan_search(id))
62 | ```
63 | ```
64 | CREATE TABLE 'shodan_search_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'shodan_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(shodan_id) REFERENCES shodan_host_lookup(id))
65 | ```
66 | ```
67 | CREATE TABLE 'subdomain_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'subdomain_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(subdomain_id) REFERENCES subdomains(id))
68 | ```
69 | ```
70 | CREATE TABLE 'whois_link' ('link_id' INTEGER PRIMARY KEY, 'host_id' text, 'whois_id' text, FOREIGN KEY(host_id) REFERENCES hosts(id), FOREIGN KEY(whois_id) REFERENCES whois_data(id))
71 | ```


--------------------------------------------------------------------------------
/lib/shodantools.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | This module uses the Shodan library to interact with shodan.io to lookup and search for hostnames
 6 | and IP addresses.
 7 | """
 8 | 
 9 | import click
10 | import shodan
11 | import requests
12 | 
13 | from . import helpers
14 | 
15 | 
16 | class ShodanTools(object):
17 |     """Class with various tools for interacting with Shodan to search for IP addresses and domains."""
18 |     # Set the timeout, in seconds, for the web requests
19 |     requests_timeout = 10
20 |     # The Shodan API endpoint for DNS resolution
21 |     shodan_dns_resolve_uri = "https://api.shodan.io/dns/resolve?hostnames={}&key={}"
22 | 
23 |     def __init__(self):
24 |         """Everything that should be initiated with a new object goes here."""
25 |         try:
26 |             self.shodan_api_key = helpers.config_section_map("Shodan")["api_key"]
27 |             self.shodan_api = shodan.Shodan(self.shodan_api_key)
28 |         except Exception:
29 |             self.shodan_api = None
30 |             click.secho("[!] Did not find a Shodan API key.",fg="yellow")
31 | 
32 |     def run_shodan_search(self,target):
33 |         """Collect information Shodan has for target domain name. This uses the Shodan search
34 |         instead of host lookup and returns the target results dictionary from Shodan.
35 | 
36 |         A Shodan API key is required.
37 | 
38 |         Parameters:
39 |         target      The domain to search for on Shodan
40 |         """
41 |         if self.shodan_api is None:
42 |             pass
43 |         else:
44 |             try:
45 |                 target_results = self.shodan_api.search(target)
46 |                 return target_results
47 |             except shodan.APIError as error:
48 |                 pass
49 | 
50 |     def run_shodan_resolver(self,target):
51 |         """Resolve a hosname to an IP address using the Shodan API's DNS endpoint.
52 |         
53 |         A Shodan API key is required.
54 | 
55 |         Parameters:
56 |         target      The hostname to resolve to an IP address using Shodan
57 |         """
58 |         if not helpers.is_ip(target):
59 |             try:
60 |                 resolved = requests.get(self.shodan_dns_resolve_uri.format(target,self.shodan_api_key),timeout=self.requests_timeout)
61 |                 target_ip = resolved.json()[target]
62 |                 return target_ip
63 |             except requests.exceptions.Timeout:
64 |                 click.secho("\n[!] The connection to Shodan timed out!",fg="red")
65 |             except requests.exceptions.TooManyRedirects:
66 |                 click.secho("\n[!] The connection to Shodan encountered too many redirects!",fg="red")
67 |             except requests.exceptions.RequestException as error:
68 |                 click.secho("\n[!] The connection to Shodan encountered an error!",fg="red")
69 |                 click.secho("L.. Details: {}".format(error),fg="red")
70 |             return None
71 |         else:
72 |             click.secho("[!] Only a hostname can be resolved to an IP address.",fg="red")
73 | 
74 |     def run_shodan_lookup(self,target):
75 |         """Collect information Shodan has for target IP address. This uses the Shodan host lookup
76 |         instead of search and returns the target results dictionary from Shodan.
77 | 
78 |         A Shodan API key is required.
79 | 
80 |         Parameters:
81 |         target      The IP address to use for the Shodan query
82 |         """
83 |         if self.shodan_api is None:
84 |             pass
85 |         else:
86 |             try:
87 |                 target_results = self.shodan_api.host(target)
88 |                 return target_results
89 |             except shodan.APIError as error:
90 |                 if error == "Invalid IP":
91 |                     click.secho("[*] A domain resolved to {}, which Shodan has flagged as an invalid \
92 | IP address. Review it and check the hostname in the final results. If it is a valid address, the \
93 | domain may resolve to an internal asset or have a CNAME for an internal asset.",fg="yellow")
94 |                 else:
95 |                     pass
96 | 


--------------------------------------------------------------------------------
/lib/fullcontact.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | This module interacts with the Full Contact API to collect information about a company and/or its
 6 | employees.
 7 | """
 8 | 
 9 | import json
10 | 
11 | import click
12 | import requests
13 | 
14 | from . import helpers
15 | 
16 | 
17 | class FullContact(object):
18 |     """Class for collecting information from Full Contact's API."""
19 |     # Set the timeout, in seconds, for web requests
20 |     requests_timeout = 10
21 |     # Set the Full Contact API endpoints
22 |     person_api_uri = "https://api.fullcontact.com/v3/person.enrich"
23 |     company_api_uri = "https://api.fullcontact.com/v3/company.enrich"
24 | 
25 |     def __init__(self):
26 |         """Everything that should be initiated with a new object goes here."""
27 |         try:
28 |             self.contact_api_key = helpers.config_section_map("Full Contact")["api_key"]
29 |         except Exception:
30 |             self.contact_api_key = None
31 |             click.secho("[!] Did not find a Full Contact API key.",fg="yellow")
32 | 
33 |     def full_contact_company(self,domain):
34 |         """Collect company profile information for the target domain using the Full Contact API.
35 | 
36 |         Parameters:
37 |         domain      The domain to look-up in Full Contact's database
38 |         """
39 |         if self.contact_api_key is None:
40 |             click.secho("[!] No Full Contact API key, so skipping company lookup.",fg="red")
41 |             return None
42 |         else:
43 |             headers = {"Authorization": "Bearer %s" % self.contact_api_key}
44 |             payload = {"domain": domain}
45 |             try:
46 |                 resp = requests.post(self.company_api_uri,data=json.dumps(payload),headers=headers,timeout=self.requests_timeout)
47 |                 if resp.status_code == 200:
48 |                     return resp.json()
49 |                 elif resp.status_code == 401:
50 |                     click.secho("[!] Full Contact says the provided API key is no good. Make sure you are using a valid key for API v3.",fg="red")
51 |                     return None
52 |             except requests.exceptions.Timeout:
53 |                 click.secho("\n[!] The connection to Full Contact timed out!",fg="red")
54 |             except requests.exceptions.TooManyRedirects:
55 |                 click.secho("\n[!] The connection to Full Contact encountered too many redirects!",fg="red")
56 |             except requests.exceptions.RequestException as error:
57 |                 click.secho("\n[!] The connection to Full Contact encountered an error!",fg="red")
58 |                 click.secho("L.. Details: {}".format(error),fg="red")
59 |             return None
60 | 
61 |     def full_contact_email(self,email):
62 |         """Collect social information for the target email address using the Full Contact API.
63 | 
64 |         Parameters:
65 |         email       The email to look-up in Full Contact's database
66 |         """
67 |         if self.contact_api_key is None:
68 |             click.secho("[!] No Full Contact API key, so skipping company lookup.",fg="red")
69 |             return None
70 |         else:
71 |             headers = {"Authorization": "Bearer %s" % self.contact_api_key}
72 |             payload = {"email": email}
73 |             try:
74 |                 resp = requests.post(self.person_api_uri,data=json.dumps(payload),headers=headers,timeout=self.requests_timeout)
75 |                 if resp.status_code == 200:
76 |                     return resp.json()
77 |                 elif resp.status_code == 401:
78 |                     click.secho("[!] Full Contact says the provided API key is no good. Make sure you are using a valid key for API v3.",fg="red")
79 |                     return None
80 |             except requests.exceptions.Timeout:
81 |                 click.secho("\n[!] The connection to Full Contact timed out!",fg="red")
82 |             except requests.exceptions.TooManyRedirects:
83 |                 click.secho("\n[!] The connection to Full Contact encountered too many redirects!",fg="red")
84 |             except requests.exceptions.RequestException as error:
85 |                 click.secho("\n[!] The connection to Full Contact encountered an error!",fg="red")
86 |                 click.secho("L.. Details: {}".format(error),fg="red")
87 |             return None
88 | 


--------------------------------------------------------------------------------
/docs/NEO4J.md:
--------------------------------------------------------------------------------
  1 | # Neo4j Graph Database Schema
  2 | 
  3 | The grapher.py library can be run independently or as part of ODIN when using the `--graph` flag. It takes the provided SQLite3 database and creates a Neo4j graph database. That database contains the following nodes and relationhips.
  4 | 
  5 | ## Current Schema
  6 | `CALL db.schema()`
  7 | 
  8 | ![Neo4j Schema](https://github.com/chrismaddalena/viper/raw/origin/dev/docs/GraphSchema.png)
  9 | 
 10 | ### Example Graph
 11 | ![Example Neo4j Graph](https://github.com/chrismaddalena/viper/raw/origin/dev/docs/ExampleGraph.png)
 12 | 
 13 | ## Nodes
 14 | 
 15 | ### Organization
 16 | Each organization name ODIN learns about is recorded. This is typically pulled form whois records for domains related to the target organization. Organization nodes have these labels:
 17 | 
 18 | * Name: The organization's name.
 19 | 
 20 | * Website: The organization's website(s) pulled from Full Contact.
 21 | 
 22 | * WebsiteOverview: A brief description of the organization based on their website and pulled from Full Contact.
 23 | 
 24 | * Employees: The number of employees pulled from Full Contact.
 25 | 
 26 | * YearFounded: The year the organization was founded, pulled from Full Contact.
 27 | 
 28 | ### IP
 29 | These are IP addresses ODIN has learned about and contain these labels:
 30 | 
 31 | * Address: The IP address.
 32 | 
 33 | * Scoped: A True/False value indicating if the address was in the user-provided scope file.
 34 | 
 35 | * Source: Where ODIN learned about this address.
 36 | 
 37 | * RDAPSource: The source of the RDAP information, which will always be ARIN.
 38 | 
 39 | * Organization: The organization that owns the IP address.
 40 | 
 41 | * CIDR: The CIDR that contains the IP address.
 42 | 
 43 | * ASN: The AS number for the IP address.
 44 | 
 45 | * CountryCode: The country to which the IP address is attached.
 46 | 
 47 | * RelatedDomains: Domains known to be attached to the IP address, accordinf to Robtex.
 48 | 
 49 | ### Domain
 50 | These are domain names ODIN has learned about and contain these labels:
 51 | 
 52 | * Name: The domain name.
 53 | 
 54 | * Scoped: A True/False value indicating if the address was in the user-provided scope file.
 55 | 
 56 | * Source: Where ODIN learned about this address.
 57 | 
 58 | * NameServers: Name servers collected from the domain's DNS records.
 59 | 
 60 | * Address: The address, if one exists, for the registrant in the domain's whois record.
 61 | 
 62 | * MXRecords: MX record collected from the domain's DNS records.
 63 | 
 64 | * TXTRecords: TXT record(s) collected from the domain's DNS records.
 65 | 
 66 | * SOARecords: SOA records collected from the domain's DNS records.
 67 | 
 68 | * DMARC: DMARC record pulled for the domain.
 69 | 
 70 | * Registrar: The registrar listed in the domain's whois record.
 71 | 
 72 | * Expiration: The expiration date listed in the domain's whois record.
 73 | 
 74 | * Organization: The organization listed in the domain's whois record.
 75 | 
 76 | * Registrant: The registrant listed in the domain's whois record.
 77 | 
 78 | * Admin: The Admin contact listed in the domain's whois record.
 79 | 
 80 | * Tech: The Technical contact listed in the domain's whois record.
 81 | 
 82 | * ContactAddress: The registrant's contact information listed in the domain's whois record.
 83 | 
 84 | * DNSSEC: The domain's DNSSEC status listed in the domain's whois record.
 85 | 
 86 | ### Subdomain
 87 | These are subdomains ODIN has found and contain these labels:
 88 | 
 89 | Name: The subdomain name.
 90 | 
 91 | Address: The IP address of the subdomain. This is listed as "Lookup Failed" if the subdomain could not be resolved.
 92 | 
 93 | DomainFrontable: If this domain might be use dfor domain fronting this label will contain the CDN information.
 94 | 
 95 | ### Certificate
 96 | These are certificates ODIN collected from Censys.io and have these labels:
 97 | 
 98 | * Subject: The certificate's subject.
 99 | 
100 | * Issuer: The certificate's issuer.
101 | 
102 | * StartDate: The certificate's start date.
103 | 
104 | * ExpirationDate: The certificate's expiry date.
105 | 
106 | * SelfSigned: A True/False value indicating if the certificate is self-signed or not.
107 | 
108 | * SignatureAlgo: The certificate's signature algorithm.
109 | 
110 | * CensysFingerprint: Censys' hash assigned to the certificate that can be used to look-up this particular signature on censys.io.
111 | 
112 | ### Port
113 | These are ports reported as open by Shodan and have these labels:
114 | 
115 | * Number: The port number reported as open.
116 | 
117 | * OS: The operating system, if the information is available.
118 | 
119 | * Organization: The organization attache dhte IP address and therefore the port.
120 | 
121 | * Hostname: The hostname of the IP address, if the information is available.
122 | 
123 | ## Relationships
124 | The above nodes share these relationships:
125 | 
126 | * :OWNS
127 |     * Organization nodes -[:OWNS]-> Domain nodes
128 | 
129 | * :SUBDOMAIN_OF
130 |     * Subdomains are -[:SUBDOMAIN_OF]-> Domain nodes
131 | 
132 | * :HAS_PORT
133 |     * IP nodes -[:HAS_PORT]-> Port nodes
134 | 
135 | * :RESOLVES_TO
136 |     * Domain nodes -[:RESOLVES_TO]-> IP nodes
137 | 
138 | * :ISSUED_FOR
139 |     * Certificate nodes are -[:ISSUED_FOR]-> Domain or Subdomain nodes
140 | 
141 | ## Example Queries
142 | Here are some example queries showing how this database might be used to visualize the perimter or collect lists of potentially interesting information:
143 | 
144 | ### 1. Return a List of Network Providers
145 | This will return providers like Google, Amazon.com, CloudFlare, etc.
146 | 
147 | `MATCH (p:Port) RETURN DISTINCT p.Organization`
148 | 
149 | ### 2. Return a List of IP Addresses with Known Open Ports
150 | This will return only IP address that have open ports.
151 | 
152 | `MATCH (n)-[:HAS_PORT]->(p:Port) RETURN DISTINCT n.Address`
153 | 
154 | ### 3. Return a List of all Unique Subdomains
155 | This returns all unique subdomains found the various domain names.
156 | 
157 | `MATCH (sub:Subdomain) RETURN DISTINCT sub.Name`
158 | 
159 | ### 4. Map the External Perimeter
160 | This query first matches the Organization, Domain, and IP nodes that have :OWNS and :RESOLVES_TO relationships. It then matches the Subdomains that have :SUBDOMAIN_OF or :RESOLVES_TO relationships with any node. Finally, it matches any Port nodes with a :HAS_PORT relationship with one of the matches IP nodes.
161 | 
162 | ```
163 | MATCH (org:Organization)-[r1:OWNS]->(dom:Domain)-[:RESOLVES_TO]->(add:IP)
164 | MATCH (sub:Subdomain)-[r2:SUBDOMAIN_OF|:RESOLVES_TO]->(n)
165 | MATCH (add)-[r3:HAS_PORT]->(p:Port)
166 | RETURN org,dom,sub,add,p,n,r1,r2,r3
167 | ```


--------------------------------------------------------------------------------
/lib/dns.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains the tools required for collecting and parsing DNS records.
  6 | """
  7 | 
  8 | import click
  9 | import dns.resolver
 10 | 
 11 | 
 12 | class DNSCollector(object):
 13 |     """Class to retrieve DNS records and perform some basic analysis."""
 14 |     # Setup a DNS resolver so a timeout can be set
 15 |     # No timeout means a very, very long wait if a domain has no records
 16 |     resolver = dns.resolver.Resolver()
 17 |     resolver.timeout = 1
 18 |     resolver.lifetime = 1
 19 | 
 20 |     def __init__(self):
 21 |         """Everything that should be initiated with a new object goes here."""
 22 | 
 23 |     def get_dns_record(self,domain,record_type):
 24 |         """Collect the specified DNS record type for the target domain.
 25 | 
 26 |         Parameters:
 27 |         domain          The domain to be used for DNS record collection
 28 |         record_type     The DNS record type to collect
 29 |         """
 30 |         answer = self.resolver.query(domain,record_type)
 31 |         return answer
 32 | 
 33 |     def parse_dns_answer(self,dns_record):
 34 |         """Parse the provided DNS record and return a list containing each item.
 35 | 
 36 |         Parameters:
 37 |         dns_record      The DNS record to be parsed
 38 |         """
 39 |         temp = []
 40 |         for rdata in dns_record.response.answer:
 41 |             for item in rdata.items:
 42 |                 temp.append(item.to_text())
 43 |         return ", ".join(temp)
 44 | 
 45 |     def return_dns_record_list(self,domain,record_type):
 46 |         """Collect and parse a DNS record for the given domain and DNS record type and then return
 47 |         a list.
 48 | 
 49 |         Parameters:
 50 |         domain          The domain to be used for DNS record collection
 51 |         record_type     The DNS record type to collect
 52 |         """
 53 |         record = self.get_dns_record(domain,record_type)
 54 |         return self.parse_dns_answer(record)
 55 | 
 56 |     def check_dns_cache(self,name_server):
 57 |         """Check if the given name server is vulnerable to DNS cache snooping.
 58 | 
 59 |         Code adapted for ODIN from work done by z0mbiehunt3r with DNS Snoopy:
 60 |         https://github.com/z0mbiehunt3r/dns-snoopy
 61 | 
 62 |         Parameters:
 63 |         name_server     The name server to check
 64 |         """
 65 |         vulnerable_dns_servers = ""
 66 |         # Domains that are commonly resolved and can be used for testing DNS servers
 67 |         common_domains = ["google.es","google.com","facebook.com","youtube.com","yahoo.com",
 68 |                           "live.com","baidu.com","wikipedia.org","blogger.com","msn.com",
 69 |                           "twitter.com","wordpress.com","amazon.com","adobe.com",
 70 |                           "microsoft.com","amazon.co.uk","facebook.com"]
 71 |         # Attempt to check the name server
 72 |         answers = self.get_dns_record(name_server,"A")
 73 |         nameserver_ip = str(answers.rrset[0])
 74 |         for domain in common_domains:
 75 |             if self.dns_cache_request(domain,nameserver_ip):
 76 |                 vulnerable_dns_servers = name_server
 77 |                 break
 78 |         return vulnerable_dns_servers
 79 | 
 80 |     def dns_cache_request(self,domain,nameserver_ip,check_ttl=False):
 81 |         """Perform cache requests against the name server for the provided domain.
 82 | 
 83 |         Parameters:
 84 |         domain              The domain to check for cache snooping
 85 |         nameserver_ip       The IP address of the name server to check
 86 |         check_ttl           A flag to check the cached TTL or not (Default: False)
 87 |         """
 88 |         query = dns.message.make_query(domain,dns.rdatatype.A,dns.rdataclass.IN)
 89 |         # Negate recursion desired bit
 90 |         query.flags ^= dns.flags.RD
 91 |         dns_response = dns.query.udp(q=query,where=nameserver_ip)
 92 |         """
 93 |         Check length major of 0 to avoid those answers with root servers in authority section
 94 |         ;; QUESTION SECTION:
 95 |         ;www.facebook.com.        IN    A
 96 | 
 97 |         ;; AUTHORITY SECTION:
 98 |         com.            123348    IN    NS    d.gtld-servers.net.
 99 |         com.            123348    IN    NS    m.gtld-servers.net.
100 |         [...]
101 |         com.            123348    IN    NS    a.gtld-servers.net.
102 |         com.            123348    IN    NS    g.gtld-servers.net.    `
103 |         """
104 |         if len(dns_response.answer) > 0 and check_ttl:
105 |             # Get cached TTL
106 |             # ttl_cached = dns_response.answer[0].ttl
107 |             # First, get NS for the first cached domain
108 |             cached_domain_dns = self.get_dns_record(domain,"NS")[0]
109 |             # After, resolve its IP address
110 |             cached_domain_dns_IP = self.get_dns_record(cached_domain_dns,"A")
111 |             # Now, obtain original TTL
112 |             query = dns.message.make_query(domain,dns.rdatatype.A,dns.rdataclass.IN)
113 |             query.flags ^= dns.flags.RD
114 |             dns_response = dns.query.udp(q=query,where=cached_domain_dns_IP)
115 |             # ttl_original = dns_response.answer[0].ttl
116 |             # cached_ago = ttl_original-ttl_cached
117 |         elif len(dns_response.answer) > 0:
118 |             return 1
119 |         return 0
120 | 
121 |     def check_office_365(self,domain):
122 |         """Checks if the provided domain is an Office 365 tenant. If records are returned, the
123 |         domain is an Office 365 tenant. Otherwise, the look-up will fail (NXDOMAIN).
124 | 
125 |         Parameters:
126 |         domain      The domain to check for Office 365
127 |         """
128 |         # Office 365 tenant domains will always be the domain and domain TLD with "." converted to a "-"
129 |         # There are slightly different domains for North America, China, and international customers
130 |         domain = domain.replace(".","-")
131 |         na_o365 = domain + ".mail.protection.outlook.com"
132 |         china_o365 = domain + ".mail.protection.partner.outlook.cn"
133 |         international_o365 = domain + ".mail.protection.outlook.de"
134 |         # Check if any of the domains resolve -- NXDOMAIN means the domain is not a tenant
135 |         try:
136 |             answer = self.get_dns_record(na_o365,"A")
137 |             return "Yes;" + na_o365
138 |         except:
139 |             pass
140 |         try:
141 |             answer = self.get_dns_record(china_o365,"A")
142 |             return "Yes;" + china_o365
143 |         except:
144 |             pass
145 |         try:
146 |             answer = self.get_dns_record(international_o365,"A")
147 |             return "Yes; " + international_o365
148 |         except:
149 |             pass
150 |         return "No"
151 | 


--------------------------------------------------------------------------------
/lib/verification.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains tools to help verify the ownership of a list of IP addresses and/or
  6 | domains. This is accomplished via certificates, WHOIS data, and IP ownership data.
  7 | 
  8 | This is based on the verification script created by Ninjasl0th. The original code is here:
  9 | 
 10 | https://github.com/NinjaSl0th/IP-Check
 11 | """
 12 | 
 13 | import sys
 14 | import ssl
 15 | import csv
 16 | import socket
 17 | from time import sleep
 18 | 
 19 | import click
 20 | import OpenSSL
 21 | import requests
 22 | from netaddr import iter_iprange,IPNetwork
 23 | 
 24 | from lib import helpers
 25 | 
 26 | 
 27 | # Set a socket timeout so the verification does not take ages for long lists
 28 | socket.setdefaulttimeout(5)
 29 | 
 30 | 
 31 | def prepare_scope(scope_file,expanded_scope):
 32 |     """Parse IP ranges inside the provided scope file to expand IP ranges. This supports ranges
 33 |     with hyphens, underscores, and CIDRs.
 34 | 
 35 |     Parameters:
 36 |     scope_file          A file containing domain name and IP addresses/ranges
 37 |     expanded_scope      A list object for storing to expanded scope list
 38 |     """
 39 |     try:
 40 |         with open(scope_file,"r") as scope_file:
 41 |             for target in scope_file:
 42 |                 target = target.rstrip()
 43 |                 # Record individual IPs and expand CIDRs
 44 |                 if helpers.is_ip(target):
 45 |                     ip_list = list(IPNetwork(target))
 46 |                     for address in sorted(ip_list):
 47 |                         str_address = str(address)
 48 |                         expanded_scope.append(str_address)
 49 |                 # Sort IP ranges from domain names and expand the ranges
 50 |                 if not helpers.is_domain(target):
 51 |                     # Check for hyphenated ranges like those accepted by Nmap, e.g. 192.168.1.1-50
 52 |                     if "-" in target:
 53 |                         target = target.rstrip()
 54 |                         parts = target.split("-")
 55 |                         startrange = parts[0]
 56 |                         b = parts[0]
 57 |                         dot_split = b.split(".")
 58 |                         temp = "."
 59 |                         # Join the values using a "." so it makes a valid IP
 60 |                         combine = dot_split[0],dot_split[1],dot_split[2],parts[1]
 61 |                         endrange = temp.join(combine)
 62 |                         # Calculate the IP range
 63 |                         ip_list = list(iter_iprange(startrange,endrange))
 64 |                         # Iterate through the range and remove ip_list
 65 |                         for x in ip_list:
 66 |                             temp = str(x)
 67 |                             expanded_scope.append(temp)
 68 |                     # Check if range has an underscore, e.g. 192.168.1.2_192.168.1.155
 69 |                     elif "_" in target:
 70 |                         target = target.rstrip()
 71 |                         parts = target.split("_")
 72 |                         startrange = parts[0]
 73 |                         endrange = parts[1]
 74 |                         ip_list = list(iter_iprange(startrange,endrange))
 75 |                         for address in ip_list:
 76 |                             str_address = str(address)
 77 |                             expanded_scope.append(str_address)
 78 |                 else:
 79 |                     expanded_scope.append(target.rstrip())
 80 |             click.secho("[+] Scope list expanded to {} items. Proceeding with verification \
 81 | checks.".format(len(expanded_scope)),fg="green")
 82 |     except IOError as error:
 83 |         click.secho("[!] Parsing of scope file failed!",fg="red")
 84 |         click.secho("L.. Details: {}".format(error),fg="red")
 85 | 
 86 | 
 87 | def reverse_lookup(target):
 88 |     """Attempt to resolve the provided IP address to a hostname.
 89 | 
 90 |     Parameters:
 91 |     target      The target to look-up
 92 |     """
 93 |     try:
 94 |         check = socket.gethostbyaddr(target)
 95 |         return check[0]
 96 |     except:
 97 |         return None
 98 | 
 99 | 
100 | def get_certificate(target):
101 |     """Attempt to collect SSL/TLS certificate information for the given host.
102 |     
103 |     Parameters:
104 |     target      The domain name to be used for certificate collection
105 |     """
106 |     # Attempt to connect over port 443
107 |     try:
108 |         cert = ssl.get_server_certificate((target,443))
109 |     # If it can't connect, return nothing/fail
110 |     except:
111 |         return None
112 |     # Try to use OpenSSL to pull certificate information
113 |     try:
114 |         certificate = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,cert)
115 |         subj = certificate.get_subject()
116 |         comp = subj.get_components()
117 |         for i in comp:
118 |             if 'CN' in i[0].decode("utf-8"):
119 |                 return i[1].decode("utf-8")
120 |             elif 'CN' not in i[0].decode("utf-8"):
121 |                 continue
122 |             else:
123 |                 return None
124 |     # If OpenSSL fails to get information, return nothing/fail
125 |     except:
126 |         return None
127 | 
128 | 
129 | def perform_whois(expanded_scope,output):
130 |     """Look-up the provided IP address in the ARIN database.
131 |     
132 |     Parameters:
133 |     expanded_scope      A list of domain name and IP addresses (no ranges)
134 |     output              A list object for storing the output
135 |     """
136 |     total_addresses = len(expanded_scope)
137 |     with click.progressbar(expanded_scope,
138 |                            label='Collecting information on addresses',
139 |                            length=total_addresses) as bar:
140 |         for address in bar:
141 |             if not helpers.is_ip(address):
142 |                 pass
143 |             else:
144 |                 # Try to send GET request to the ARIN REST API for IP values
145 |                 try:
146 |                     r = requests.get("http://whois.arin.net/rest/ip/" + address + ".json",timeout=10)
147 |                     tmp = r.json()
148 |                     try:
149 |                         name = tmp['net']['customerRef']['@name']
150 |                         # start = tmp['net']['netBlocks']['netBlock']['startAddress']['$']
151 |                         # end = tmp['net']['netBlocks']['netBlock']['endAddress']['$']
152 |                         hostname = reverse_lookup(address)
153 |                         cn = get_certificate(address)
154 |                         output[address] = address,name,hostname,cn
155 |                     except:
156 |                         # The formatting of ARIN data may change if an org is used for the contact
157 |                         name = tmp['net']['orgRef']['@name']
158 |                         # start = tmp['net']['netBlocks']['netBlock']['startAddress']['$']
159 |                         # end = tmp['net']['netBlocks']['netBlock']['endAddress']['$']
160 |                         hostname = reverse_lookup(address)
161 |                         cn = get_certificate(address)
162 |                         output[address] = address,name,hostname,cn
163 |                 except:
164 |                     pass
165 |             # Pause for just a sec to not destroy ARIN with requests
166 |             sleep(1)
167 | 
168 | 
169 | def print_output(results,report_path):
170 |     """Write the final results to a csv.
171 |     
172 |     Parameters:
173 |     results        The results to be written to the file
174 |     report_path    File path for the report output
175 |     """
176 |     with open(report_path,"w") as csv_report:
177 |         writer = csv.writer(csv_report)
178 |         writer.writerow(["Address","Organization","Hostname","Certificate CN"])
179 |         result_values = list(results.values())
180 |         for values in result_values:
181 |             writer.writerow(values)
182 | 


--------------------------------------------------------------------------------
/lib/helpers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Helper functions for ODIN's custom libraries. These functions are used across different modules.
  6 | """
  7 | 
  8 | import sys
  9 | import configparser
 10 | 
 11 | import click
 12 | from IPy import IP
 13 | from neo4j.v1 import GraphDatabase
 14 | from netaddr import IPNetwork,iter_iprange
 15 | from selenium import webdriver
 16 | from selenium.webdriver.chrome.options import Options
 17 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 18 | from selenium.common.exceptions import TimeoutException,NoSuchElementException,WebDriverException
 19 | 
 20 | 
 21 | try:
 22 |     CONFIG_PARSER = configparser.ConfigParser()
 23 |     CONFIG_PARSER.read("auth/keys.config")
 24 | except configparser.Error as error:
 25 |     click.secho("[!] Could not open keys.config file inside the auth directory -- make sure it \
 26 | exists and is readable.",fg="red")
 27 |     click.secho("L.. Details: {}".format(error),fg="red")
 28 | 
 29 | def config_section_map(section):
 30 |     """This function helps by reading a config file section and returning a dictionary object 
 31 |     that can be referenced for configuration settings.
 32 | 
 33 | 
 34 |     Parameters:
 35 |     section     The config section to be collected from the config file
 36 |     """
 37 |     try:
 38 |         section_dict = {}
 39 |         # Parse the config file's sections into options
 40 |         options = CONFIG_PARSER.options(section)
 41 |         # Loop through each option
 42 |         for option in options:
 43 |             # Get the section and option and add it to the dictionary
 44 |             section_dict[option] = CONFIG_PARSER.get(section,option)
 45 |             if section_dict[option] == -1:
 46 |                 click.secho("[*] Skipping: {}".format(option),fg="yellow")
 47 |         # Return the dictionary of settings and values
 48 |         return section_dict
 49 |     except configparser.Error as error:
 50 |         click.secho("[!] There was an error with: {}".format(section),fg="red")
 51 |         click.secho("L.. Details: {}".format(error),fg="red")
 52 | 
 53 | def is_ip(value):
 54 |     """Checks if the provided string is an IP address or not. If the check fails, it will be 
 55 |     assumed the string is a domain in most cases.
 56 | 
 57 |     IPy is used to determine if a string is a valid IP address. A True or False is returned.
 58 | 
 59 |     Parameters:
 60 |     value       The string to be determined to be evaluated as an IP address name or not
 61 |     """
 62 |     try:
 63 |         IP(value)
 64 |     except ValueError:
 65 |         return False
 66 |     return True
 67 | 
 68 | def is_domain(value):
 69 |     """A very basic check to see if the provided string contains any letters. This is useful for
 70 |     determining if a string should be treated as an IP address range or a domain.
 71 | 
 72 |     The is_ip() function will recognize an individual IP address or a CIDR, but will not validate a
 73 |     range like 192.168.1.0-50. Ranges will never contain letters, so this serves to separate domain
 74 |     names with hyphens from IP address ranges with hyphens.
 75 | 
 76 |     Parameters:
 77 |     value       The string to be determined to be evaluated as a domain name or not
 78 |     """
 79 |     result = any(check.isalpha() for check in value)
 80 |     return result
 81 | 
 82 | def setup_gdatabase_conn():
 83 |     """Function to setup the database connection to the active Neo4j project meant to contain the
 84 |     ODIN data.
 85 |     """
 86 |     try:
 87 |         database_uri = config_section_map("GraphDatabase")["uri"]
 88 |         database_user = config_section_map("GraphDatabase")["username"]
 89 |         database_pass = config_section_map("GraphDatabase")["password"]
 90 |         click.secho("[*] Attempting to connect to your Neo4j project using {}:{} @ {}."
 91 |                     .format(database_user,database_pass,database_uri),fg="yellow")
 92 |         neo4j_driver = GraphDatabase.driver(database_uri,auth=(database_user,database_pass))
 93 |         click.secho("[+] Success!",fg="green")
 94 |         return neo4j_driver
 95 |     except Exception:
 96 |         neo4j_driver = None
 97 |         click.secho("[!] Could not create a database connection using the details provided in \
 98 | your database.config! Please check the URI, username, and password. Also, make sure your Neo4j \
 99 | project is running. Note that the bolt port can change.",fg="red")
100 |         exit()
101 | 
102 | def execute_query(driver,query):
103 |     """Execute the provided query using the provided Neo4j database connection and driver.
104 | 
105 |     Parameters:
106 |     driver      A Neo4j bolt driver object
107 |     query       A Cypher query to be executed against the Neo4j database
108 |     """
109 |     with driver.session() as session:
110 |         results = session.run(query)
111 |     return results
112 | 
113 | def setup_headless_chrome(unsafe=False):
114 |     """Attempt to setup a Selenium webdriver using headless Chrome. If this fails, fallback to
115 |     PhantomJS. PhantomJS is a last resort, but better than nothing for the time being.
116 | 
117 |     Parameters:
118 |     unsafe      A flag to set Chrome's `--no-sandbox` option
119 |     """
120 |     try:
121 |         chrome_driver_path = config_section_map("WebDriver")["driver_path"]
122 |         # Try loading the driver as a test
123 |         chrome_options = Options()
124 |         chrome_options.add_argument("--headless")
125 |         chrome_options.add_argument("--window-size=1920x1080")
126 |         # Setup 'capabilities' to ignore expired/self-signed certs so a screenshot is captured
127 |         chrome_capabilities = DesiredCapabilities.CHROME.copy()
128 |         chrome_capabilities['acceptSslCerts'] = True
129 |         chrome_capabilities['acceptInsecureCerts'] = True
130 |         # For Kali users, Chrome will get angry if the root user is used and requires --no-sandbox
131 |         if unsafe:
132 |             chrome_options.add_argument("--no-sandbox")
133 |         browser = webdriver.Chrome(chrome_options=chrome_options,executable_path=chrome_driver_path,
134 |                                    desired_capabilities=chrome_capabilities)
135 |         click.secho("[*] Headless Chrome browser test was successful!",fg="yellow")
136 |     # Catch issues with the web driver or path
137 |     except WebDriverException:
138 |         click.secho("[!] There was a problem with the specified Chrome web driver in your \
139 | keys.config! Please check it. For now ODIN will try to use PhantomJS.",fg="yellow")
140 |         browser = setup_phantomjs()
141 |     # Catch issues loading the value from the config file
142 |     except Exception:
143 |         click.secho("[*] Could not load a Chrome webdriver for Selenium, so we will try to use \
144 | PantomJS, but PhantomJS is no longer actively developed and is less reliable.",fg="yellow")
145 |         browser = setup_phantomjs()
146 |     return browser
147 | 
148 | def setup_phantomjs():
149 |     """Create and return a PhantomJS browser object."""
150 |     try:
151 |         # Setup capabilities for the PhantomJS browser
152 |         phantomjs_capabilities = DesiredCapabilities.PHANTOMJS
153 |         # Some basic creds to use against an HTTP Basic Auth prompt
154 |         phantomjs_capabilities['phantomjs.page.settings.userName'] = 'none'
155 |         phantomjs_capabilities['phantomjs.page.settings.password'] = 'none'
156 |         # Flags to ignore SSL problems and get screenshots
157 |         service_args = []
158 |         service_args.append('--ignore-ssl-errors=true')
159 |         service_args.append('--web-security=no')
160 |         service_args.append('--ssl-protocol=any')
161 |         # Create the PhantomJS browser and set the window size
162 |         browser = webdriver.PhantomJS(desired_capabilities=phantomjs_capabilities,service_args=service_args)
163 |         browser.set_window_size(1920,1080)
164 |     except Exception as error:
165 |         click.secho("[!] Bad news: PhantomJS failed to load (not installed?), so activities \
166 | requiring a web browser will be skipped.",fg="red")
167 |         click.secho("L.. Details: {}".format(error),fg="red")
168 |         browser = None
169 |     return browser
170 | 
171 | def generate_scope(scope_file):
172 |     """Parse IP ranges inside the provided scope file to expand IP ranges. This supports ranges
173 |     with hyphens, underscores, and CIDRs.
174 | 
175 |     Parameters:
176 |     scope_file  A file containing domain names and IP addresses/ranges
177 |     """
178 |     scope = []
179 |     try:
180 |         with open(scope_file,"r") as scope_file:
181 |             for target in scope_file:
182 |                 target = target.rstrip()
183 |                 # Record individual IPs and expand CIDRs
184 |                 if is_ip(target):
185 |                     ip_list = list(IPNetwork(target))
186 |                     for address in sorted(ip_list):
187 |                         str_address = str(address)
188 |                         scope.append(str_address)
189 |                 # Sort IP ranges from domain names and expand the ranges
190 |                 if not is_domain(target):
191 |                     # Check for hyphenated ranges like those accepted by Nmap
192 |                     # Ex: 192.168.1.1-50 will become 192.168.1.1 ... 192.168.1.50
193 |                     if "-" in target:
194 |                         target = target.rstrip()
195 |                         parts = target.split("-")
196 |                         startrange = parts[0]
197 |                         b = parts[0]
198 |                         dot_split = b.split(".")
199 |                         temp = "."
200 |                         # Join the values using a "." so it makes a valid IP
201 |                         combine = dot_split[0],dot_split[1],dot_split[2],parts[1]
202 |                         endrange = temp.join(combine)
203 |                         # Calculate the IP range
204 |                         ip_list = list(iter_iprange(startrange,endrange))
205 |                         # Iterate through the range and remove ip_list
206 |                         for x in ip_list:
207 |                             temp = str(x)
208 |                             scope.append(temp)
209 |                     # Check if range has an underscore because underscores are fine, I guess?
210 |                     # Ex: 192.168.1.2_192.168.1.155
211 |                     elif "_" in target:
212 |                         target = target.rstrip()
213 |                         parts = target.split("_")
214 |                         startrange = parts[0]
215 |                         endrange = parts[1]
216 |                         ip_list = list(iter_iprange(startrange,endrange))
217 |                         for address in ip_list:
218 |                             str_address = str(address)
219 |                             scope.append(str_address)
220 |                 else:
221 |                     scope.append(target.rstrip())
222 |     except IOError as error:
223 |         click.secho("[!] Parsing of scope file failed!",fg="red")
224 |         click.secho("L.. Details: {}".format(error),fg="red")
225 |     return scope
226 | 


--------------------------------------------------------------------------------
/lib/cloud.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains functions for brute forcing bucket names for Amazon Web Services and Digital
  6 | Ocean. If a bucket is found, the bucket is checked for public access.
  7 | """
  8 | 
  9 | import re
 10 | 
 11 | import click
 12 | import requests
 13 | import validators
 14 | import boto3
 15 | from botocore.exceptions import ClientError,EndpointConnectionError
 16 | 
 17 | 
 18 | class BucketHunter(object):
 19 |     """Class for hunting buckets, cloud storage containers, in AWS and Digital Ocean."""
 20 |     # Timeout, in seconds, for web requests
 21 |     requests_timeout = 10
 22 | 
 23 |     def __init__(self):
 24 |         """Everything that should be initiated with a new object goes here."""
 25 |         # Test connecting to a test S3 bucket with the credentials supplied to `aws configure`
 26 |         try:
 27 |             self.boto3_client = boto3.client('s3')
 28 |             self.boto3_client.head_bucket(Bucket="hostmenow")
 29 |         except Exception:
 30 |             self.boto3_client = None
 31 |             click.secho("[!] Could not create an AWS client with the supplied secrets.",fg="yellow")
 32 | 
 33 |     def enumerate_buckets(self,client,domain,wordlist=None,fix_wordlist=None):
 34 |         """Search for AWS S3 buckets and Digital Ocean Spaces. Default search terms are the
 35 |         client, domain, and domain without its TLD. A wordlist is optional.
 36 | 
 37 |         If a wordlist is provided, it may have comments that begin with a #.
 38 | 
 39 |         This is based on modules from aws_pwn by Dagrz on GitHub.
 40 | 
 41 |         Parameters:
 42 |         client          The name of the client organization (for wordlist generation)
 43 |         domain          The organization's domain name (for wordlist generation)
 44 |         wordlist        An optional user-provided wordlist
 45 |         fix_wordlist    An option user-provided list of prefixes and suffixes
 46 |         """
 47 |         # Take the user input as the initial list of keywords here
 48 |         # Both example.com and example are valid bucket names, so domain+tld and domain are tried
 49 |         search_terms = [domain,domain.split(".")[0],client.replace(" ","").lower()]
 50 |         # Potentially valid and interesting keywords that might be used a prefix or suffix
 51 |         fixes = ["apps","downloads","software","deployment","qa","dev","test","vpn",
 52 |                  "secret","user","confidential","invoice","config","backup","bak",
 53 |                  "backups","xls","csv","ssn","resources","web","testing","uac",
 54 |                  "legacy","adhoc","docs","documents","res","nas"]
 55 |         bucket_results = []
 56 |         # account_results = []
 57 |         # Add user-provided wordlist terms to our list of search terms
 58 |         if wordlist is not None:
 59 |             with open(wordlist,"r") as bucket_list:
 60 |                 for name in bucket_list:
 61 |                     name = name.strip()
 62 |                     if name and not name.startswith("#"):
 63 |                         search_terms.append(name)
 64 |         # Add user-provided list of pre/suffixes to our list of fixes
 65 |         if fix_wordlist is not None:
 66 |             with open(fix_wordlist,"r") as new_fixes:
 67 |                 for fix in new_fixes:
 68 |                     fix = fix.strip()
 69 |                     if fix and not fix.startswith("#"):
 70 |                         fixes.append(fix)
 71 |         # Modify search terms with some common prefixes and suffixes
 72 |         # We use this new list to avoid endlessly looping
 73 |         final_search_terms = []
 74 |         for fix in fixes:
 75 |             for term in search_terms:
 76 |                 final_search_terms.append(fix + "-" + term)
 77 |                 final_search_terms.append(term + "-" + fix)
 78 |                 final_search_terms.append(fix + "." + term)
 79 |                 final_search_terms.append(term + "." + fix)
 80 |                 final_search_terms.append(fix + term)
 81 |                 final_search_terms.append(term + fix)
 82 |         # Now include our original list of base terms
 83 |         for term in search_terms:
 84 |             final_search_terms.append(term)
 85 |         # Ensure we have only unique search terms in our list and start hunting
 86 |         final_search_terms = list(set(final_search_terms))
 87 |         click.secho("[*] There are {} possible bucket names for the hunt.".format(len(final_search_terms)),fg="yellow")
 88 |         with click.progressbar(final_search_terms,
 89 |                                label="[*] Enumerating Cloud Storage Buckets",
 90 |                                length=len(final_search_terms)) as bar:
 91 |             # Check for buckets and spaces
 92 |             for term in bar:
 93 |                 if self.boto3_client:
 94 |                     result = self.validate_bucket("head",term)
 95 |                     bucket_results.append(result)
 96 |                 result = self.validate_do_space("ams3",term)
 97 |                 bucket_results.append(result)
 98 |                 result = self.validate_do_space("nyc3",term)
 99 |                 bucket_results.append(result)
100 |                 result = self.validate_do_space("sgp1",term)
101 |                 bucket_results.append(result)
102 |                 # Check for accounts
103 |                 # result = self.validate_account(term)
104 |                 # account_results.append(result)
105 |         return bucket_results
106 | 
107 |     def validate_bucket(self,validation_type,bucket_name):
108 |         """Used by validate_bucket_head() to validate an AWS bucket name.
109 | 
110 |         Parameters:
111 |         validation_type     Web request type to use for validation, e.g. head
112 |         bucket_name         The bucket name to check
113 |         """
114 |         validation_functions = {
115 |             'head': self.validate_bucket_head
116 |         }
117 |         if validation_functions[validation_type]:
118 |             return validation_functions[validation_type](bucket_name)
119 | 
120 |     def validate_bucket_head(self,bucket_name):
121 |         """Check a string to see if it exists as the name of an Amazon S3 bucket. This version uses
122 |         awscli to identify a bucket and then uses Requests to check public access. The benefit of
123 |         this is awscli will gather information from buckets that are otherwise inaccessible via
124 |         anonymous web requests.
125 | 
126 |         This test requires authentication. Check credentials before use!
127 | 
128 |         Parameters:
129 |         bucket_name     The bucket name to validate
130 |         """
131 |         error_values = {
132 |             '400':True,
133 |             '403':True,
134 |             '404':False
135 |         }
136 |         result = {
137 |             'bucketName':bucket_name,
138 |             'bucketUri':'http://' + bucket_name + '.s3.amazonaws.com',
139 |             'arn':'arn:aws:s3:::' + bucket_name,
140 |             'exists':False,
141 |             'public':False
142 |         }
143 |         try:
144 |             self.boto3_client.head_bucket(Bucket=bucket_name)
145 |             result['exists'] = True
146 |             try:
147 |                 # Request the bucket to check the response
148 |                 request = requests.get(result['bucketUri'],timeout=self.requests_timeout)
149 |                 # All bucket names will get something, so look for the NoSuchBucket status
150 |                 if "NoSuchBucket" in request.text:
151 |                     result['exists'] = False
152 |                 else:
153 |                     result['exists'] = True
154 |                 # Check for a 200 OK to indicate a publicly listable bucket
155 |                 if request.status_code == 200:
156 |                     result['public'] = True
157 |                     click.secho("\n[*] Found a public bucket: {}".format(result['bucketName']),fg="yellow")
158 |             except requests.exceptions.RequestException:
159 |                 result['exists'] = False
160 |         except ClientError as error:
161 |             result['exists'] = error_values[error.response['Error']['Code']]
162 |         except EndpointConnectionError as error:
163 |             click.secho("\n[*] Warning: Could not connect to a bucket to check it. If you see this \
164 | message repeatedly, it's possible your awscli region is misconfigured, or this bucket is weird.",fg="red")
165 |             click.secho("L.. Details: {}".format(error),fg="red")
166 |             result['exists'] = error
167 |         return result
168 | 
169 |     def validate_bucket_noncli(self,bucket_name):
170 |         """Check a string to see if it exists as the name of an Amazon S3 bucket. This version uses
171 |         only Requests and the bucket's URL.
172 | 
173 |         This is deprecated, but here just in case.
174 | 
175 |         Parameters:
176 |         bucket_name     The bucket name to validate
177 |         """
178 |         bucket_uri = "http://" + bucket_name + ".s3.amazonaws.com"
179 |         result = {
180 |             'bucketName':bucket_name,
181 |             'bucketUri':bucket_uri,
182 |             'arn':'arn:aws:s3:::' + bucket_name,
183 |             'exists':False,
184 |             'public':False
185 |         }
186 |         try:
187 |             # Request the bucket to check the response
188 |             request = requests.get(bucket_uri,timeout=self.requests_timeout)
189 |             # All bucket names will get something, so look for the NoSuchBucket status
190 |             if "NoSuchBucket" in request.text:
191 |                 result['exists'] = False
192 |             else:
193 |                 result['exists'] = True
194 |             # Check for a 200 OK to indicate a publicly listable bucket
195 |             if request.status_code == 200:
196 |                 result['public'] = True
197 |         except requests.exceptions.RequestException:
198 |             result['exists'] = False
199 |         return result
200 | 
201 |     def validate_do_space(self,region,space_name):
202 |         """Check a string to see if it exists as the name of a Digital Ocean Space.
203 | 
204 |         Parameters:
205 |         region      The Digital Ocean region to use for searches
206 |         space_name  The name of the Space to validate
207 |         """
208 |         space_uri = "http://" + space_name + region + ".digitaloceanspaces.com"
209 |         result = {
210 |             'bucketName':space_name,
211 |             'bucketUri':space_uri,
212 |             'arn':'arn:do:space:::' + space_name,
213 |             'exists':False,
214 |             'public':False
215 |         }
216 |         try:
217 |             # Request the Space to check the response
218 |             request = requests.get(space_uri)
219 |             # All Space names will get something, so look for the NoSuchBucket status
220 |             if "NoSuchBucket" in request.text:
221 |                 result['exists'] = False
222 |             else:
223 |                 result['exists'] = True
224 |             # Check for a 200 OK to indicate a publicly listable Space
225 |             if request.status_code == 200:
226 |                 result['public'] = True
227 |         except requests.exceptions.RequestException:
228 |             result['exists'] = False
229 |         return result
230 | 
231 |     def validate_account(self,account):
232 |         """Check a string to see if it exists as the name of an AWS alias.
233 | 
234 |         Parameters:
235 |         account     The AWS account alias to validate
236 |         """
237 |         result = {
238 |             'accountAlias': None,
239 |             'accountId': None,
240 |             'signinUri': 'https://' + account + '.signin.aws.amazon.com/',
241 |             'exists': False,
242 |             'error': None
243 |         }
244 |         # Check if the provided account name is a string of numbers (an ID) or not (an alias)
245 |         if re.match(r'\d{12}',account):
246 |             result['accountId'] = account
247 |         else:
248 |             result['accountAlias'] = account
249 |         if not validators.url(result['signinUri']):
250 |             result['error'] = 'Invalid URI'
251 |             return result
252 |         try:
253 |             # Request the sign-in URL and don't allow the redirect
254 |             request = requests.get(result['signinUri'],allow_redirects=False,timeout=self.requests_timeout)
255 |             # If we have a redirect, not a 404, we have a valid account alias for AWS
256 |             if request.status_code == 302:
257 |                 result['exists'] = True
258 |         except requests.exceptions.RequestException as error:
259 |             result['error'] = error
260 |         return result
261 | 


--------------------------------------------------------------------------------
/lib/filehunter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module works searches for files under the provided doamin name using Google. If a file is
  6 | found, it is downloaded and examined for metadata.
  7 | """
  8 | 
  9 | import os
 10 | import re
 11 | import time
 12 | import zipfile
 13 | import subprocess
 14 | from random import randint
 15 | 
 16 | import lxml
 17 | import click
 18 | import requests
 19 | from PyPDF2 import PdfFileReader
 20 | from selenium import webdriver
 21 | from selenium.webdriver.chrome.options import Options
 22 | from selenium.common.exceptions import TimeoutException,NoSuchElementException,WebDriverException
 23 | 
 24 | from lib import helpers
 25 | 
 26 | 
 27 | class Metaparser:
 28 |     """Search a domain for files and then attempt to extract metadata from any discovered files."""
 29 |     # Set sleep times for Google searches
 30 |     google_min_sleep = 5
 31 |     google_max_sleep = 15
 32 |     # Set the timeout, in seconds, for web requests
 33 |     requests_timeout = 10
 34 |     # Set the user-agent to be used for web requests and searches
 35 |     user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
 36 | 
 37 |     def __init__(self,domain_name,page_results,exts,download_dir,webdriver):
 38 |         """Everything that should be initiated with a new object goes here.
 39 | 
 40 |         Parameters:
 41 |         domain_name     The domain name to search under for files
 42 |         page_results    A limit on the number of search page results to request and parse
 43 |         exts            A list of file type extensions for the searches
 44 |         download_dir    The directory to use for the downloaded files
 45 |         webdriver       A Selenium webdriver object to use for the web browsing
 46 |         """
 47 |         self.exts = exts
 48 |         self.container = list()
 49 |         self.browser = webdriver
 50 |         self.domain_name = domain_name
 51 |         self.page_results = page_results
 52 |         self.download_dir = download_dir + "file_downloads/"
 53 | 
 54 |     def process_file(self,curr_file):
 55 |         """Process the provided file. If the file is a PDF, the PyPDF2 library will be used.
 56 |         Otherwise, the extract tool is used, so extract must be installed. This is the one
 57 |         piece that requires Linux.
 58 | 
 59 |         Parameters:
 60 |         curr_file       The filepath of the file to be processed
 61 |         """
 62 |         date = "None"
 63 |         modded = "None"
 64 |         author = "None"
 65 |         created = "None"
 66 |         producer = "None"
 67 |         last_saved = "None"
 68 |         # Process the current file as a PDF
 69 |         if ".pdf" in curr_file:
 70 |             try:
 71 |                 pdf_file = PdfFileReader(open(curr_file,"rb"))
 72 |                 if pdf_file.getIsEncrypted():
 73 |                     pdf_file.decrypt('')
 74 |                 # getDocumentInfo() returns something like:
 75 |                 #   {'/Author': 'Chris Maddalena',
 76 |                 #   '/CreationDate': "D:20131014182824-04'00'",
 77 |                 #   '/Creator': 'Microsoft® Excel® 2013',1
 78 |                 #   '/ModDate': "D:20131015141200-04'00'",
 79 |                 #   '/Producer': 'Microsoft® Excel® 2013'}
 80 |                 doc_info = pdf_file.getDocumentInfo()
 81 |                 # If there is no info, just return
 82 |                 if not doc_info:
 83 |                     return
 84 |                 # Parse the document into
 85 |                 if "/CreationDate" in doc_info:
 86 |                     data = doc_info["/CreationDate"].strip("D:|'")
 87 |                     year = data[0:4]
 88 |                     date = data[4:6] + "/" + data[6:8]
 89 |                     created_time = data[8:10] + ":" + data[10:12]
 90 |                     created_time = time.strftime("%I:%M %p",time.strptime(created_time,"%H:%M"))
 91 |                     created = date + "/" + year + " " + created_time
 92 |                 if "/Author" in doc_info:
 93 |                     author = doc_info["/Author"]
 94 |                 if "/Producer" in doc_info:
 95 |                     producer = doc_info["/Producer"].strip("(Windows)")
 96 |                     producer = re.sub(r'[^\w]',' ',producer)
 97 |                     while True:
 98 |                         if "  " in producer:
 99 |                             producer = producer.replace("  "," ")
100 |                         else:
101 |                             break
102 |                 if "/ModDate" in doc_info:
103 |                     data = doc_info["/ModDate"].strip("D:|'")
104 |                     year = data[0:4]
105 |                     date = data[4:6] + "/" + data[6:8]
106 |                     modded_time = data[8:10] + ":" + data[10:12]
107 |                     modded_time = time.strftime("%I:%M %p",time.strptime(modded_time,"%H:%M"))
108 |                     modded = date + "/" + year + " "  + modded_time
109 |                 # Strips '/' off filename (if it includes directory name)
110 |                 if "/" in curr_file:
111 |                     curr_file = curr_file[curr_file.rfind("/")+1:]
112 |                 if "\\" in curr_file:
113 |                     curr_file = curr_file.replace("\\","")
114 |                 # Add the document info to the container
115 |                 self.container.append([curr_file,created,author,producer,modded,last_saved])
116 |             except Exception:
117 |                 return
118 |         # Not a PDF, so treat the current file as an Office doc
119 |         else:
120 |             curr_file = curr_file.replace(" ","\ ").replace("(","\(").replace(")","\)")
121 |             try:
122 |                 # Unzip the contents of the document to get the contents of core.xml and app.xml files
123 |                 unzipped = zipfile.ZipFile(curr_file)
124 |                 doc_xml = lxml.etree.fromstring(unzipped.read("docProps/core.xml"))
125 |                 app_xml = lxml.etree.fromstring(unzipped.read("docProps/app.xml"))
126 |                 # Namespaces for doc.xml
127 |                 dc_ns = {"dc":"http://purl.org/dc/elements/1.1/"}
128 |                 cp_ns = {"cp":"http://schemas.openxmlformats.org/package/2006/metadata/core-properties"}
129 |                 dcterms_ns = {"dcterms":"http://purl.org/dc/terms/"}
130 |                 # Namespaces for app.xml:
131 |                 #   app_ns = {"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"}
132 |                 #   vt_ns = {"vt": "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"}
133 |                 #   tags = doc_xml.xpath('//cp:keywords', namespaces=cp_ns)[0].text
134 |                 #   description = doc_xml.xpath('//dc:description', namespaces=dc_ns)[0].text
135 |                 author = doc_xml.xpath('//dc:creator',namespaces=dc_ns)[0].text
136 |                 modded = doc_xml.xpath('//cp:lastModifiedBy',namespaces=cp_ns)[0].text
137 |                 created = doc_xml.xpath('//dcterms:created',namespaces=dcterms_ns)[0].text
138 |                 last_saved = doc_xml.xpath('//dcterms:modified',namespaces=dcterms_ns)[0].text
139 |                 # Convert the created time to a prettier format
140 |                 created_date = created.split("T")[0]
141 |                 created_time = created.split("T")[1].strip("Z")
142 |                 modded_time = time.strftime("%I:%M %p",time.strptime(created_time,"%H:%M:%S"))
143 |                 created = created_date + " "  + modded_time
144 |                 # Determine the Office application and version that created this document
145 |                 for child in app_xml:
146 |                     if 'AppVersion' in child.tag:
147 |                         office_version = child.text
148 |                         if "16." in office_version:
149 |                             version = "2016"
150 |                         elif "15." in office_version:
151 |                             version = "2013"
152 |                         elif "14." in office_version:
153 |                             version = "2010"
154 |                         elif "12." in office_version:
155 |                             version = "2007"
156 |                         if ".xls" in curr_file:
157 |                             producer = "Microsoft Excel " + version
158 |                         elif ".doc" in curr_file:
159 |                             producer = "Microsoft Word " + version
160 |                         elif ".ppt" in curr_file:
161 |                             producer = "Microsoft PowerPoint " + version
162 |                 # Remove any slashes in the filename
163 |                 if "/" in curr_file:
164 |                     curr_file = curr_file[curr_file.rfind("/")+1:]
165 |                 if "\\" in curr_file:
166 |                     curr_file = curr_file.replace("\\","")
167 |                 # Add the results to the container
168 |                 self.container.append([curr_file,created,author,producer,modded,last_saved])
169 |             except Exception as error:
170 |                 click.secho("[!] Failed to extract metadata from {}!".format(curr_file),fg="red")
171 |                 click.secho("L.. Details: {}".format(error),fg="red")
172 |                 pass
173 | 
174 |     def grab_meta(self):
175 |         """Attempt to extract the metadata from all downloaded files."""
176 |         files = []
177 |         total_count = 0
178 |         for extension in self.exts:
179 |             count = 0
180 |             while count < self.page_results:
181 |                 try:
182 |                     # headers = {'User-Agent': self.user_agent}
183 |                     # request = requests.get("https://www.google.com/search?q=site:{}+filetype:{}&start={}".format(
184 |                     #                         self.domain_name,extension,count),headers=headers,timeout=self.requests_timeout,verify=False)
185 |                     # contents = request.text
186 |                     self.browser.get("https://www.google.com/search?q=site:{}+filetype:{}&start={}".format(
187 |                                       self.domain_name,extension,count))
188 |                     contents = self.browser.page_source
189 |                     if "https://www.google.com/recaptcha/api2/anchor" in contents:
190 |                         click.secho("\n[!] Google returned their reCAPtCHA prompt! File searches cannot be performed.",fg="red")
191 |                         exit()
192 |                     new_pattern = "(?P<url>https?://[^:]+\.%s)" % extension
193 |                     new_pattern = re.findall(new_pattern,contents)
194 |                     for hit in new_pattern:
195 |                         if hit not in files:
196 |                             files.append(hit)
197 |                     count += 1
198 |                     total_count += 1
199 |                     # Sleep to try to avoid Google's reCAPTCHA between result pages
200 |                     time.sleep(randint(self.google_min_sleep,self.google_max_sleep))
201 |                 except requests.exceptions.Timeout:
202 |                     pass
203 |                 except Exception:
204 |                     pass
205 |             # Sleep to try to avoid Google's reCAPTCHA between extension searches
206 |             time.sleep(randint(self.google_min_sleep,self.google_max_sleep))
207 |         if len(files) == 0:
208 |             click.secho("[+] No files were located within Google based on the extension(s) and domain you provided.",fg="green")
209 |             exit()
210 |         # Create downloads directory if it doesn't exist
211 |         if not os.path.exists(self.download_dir):
212 |             os.makedirs(self.download_dir)
213 |         # Set maximum number of spaces for file names
214 |         spaces = 0
215 |         for item in files:
216 |             item = item[item.rfind("/")+1:]
217 |             short_file = item
218 |             if len(short_file) > spaces:
219 |                 spaces = len(short_file) + 3
220 |         # Download each file that has been added to the 'files' list
221 |         for item in files:
222 |             # Throw out any truncated addresses
223 |             if "..." in item:
224 |                 del files[files.index(item)]
225 |                 continue
226 |             filename = item[item.rfind("/")+1:]
227 |             try:
228 |                 response = requests.get(item,timeout=self.requests_timeout)
229 |                 source = response.content
230 |                 with open(self.download_dir + filename,"wb") as file_descriptor:
231 |                     file_descriptor.write(source)
232 |                 filename = filename.replace("(","\(").replace(")","\)")
233 |                 short_file = filename
234 |             except Exception as error:
235 |                 click.secho("[!] There was an error downloading a file from this URL: {}".format(item),fg="red")
236 |                 click.secho("L.. Details: {}".format(error),fg="red")
237 |                 continue
238 |         for item in files:
239 |             filename = item[item.rfind("/")+1:]
240 |             self.process_file(self.download_dir + filename)
241 |         return self.container
242 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Change Log
  2 | ## November 1, 2018, Commit 200 and v2.0.0 RELEASE!
  3 | * Many, many changes were made to clean-up code, squash some bugs, and prepare everything for v2.0.0. A few notable fixes:
  4 |     * Web requests now have strict timeouts (10-15 seconds in most cases) to address failed web requests occasionally causing modules to hang or take a very long time to complete.
  5 |     * Some small issues with parsing HTML for names, emails, and Twitter handles have been addressed to weed out bad results.
  6 |     * Fixed an issue with Have I Been Pwned where a misplaced `sleep` was slowing things down without having the desired effect on requests.
  7 | * New features have been added for this release:
  8 |   * Findsubdomains.com is now queried as an additional source of pre-indexed subdomains.
  9 |   * ODIN now checks for the domains and DNS records necessary to determine if a domain is an Office 365 tenant.
 10 |   * The HTML report is a bit more colorful now with some text highlighted to call out interesting things. This includes:
 11 |         * Office 365 tenants
 12 |         * Weak or missing SPF records
 13 |         * Weak or missing DMARC records
 14 |     * The TakeoverChecks module now checks subdomains against 50 fingerprints for different services and CDNs to flag possible domain takeovers.
 15 |     * Neo4j graphs have been improved. Subdomains of subdomains are now linked in chains rather than every subdomain being linked to the base domain (e.g. dev.api.foo.com-[:SUBDOMAIN_OF]->api.foo.com-[:SUBDOMAIN_OF]->foo.com).
 16 |     * To try to avoid Google reCAPTCHAs for longer periods of time when users want to search under many domains or for all filetypes, there is now a variable sleep time between requests to help randomize the  somewhat.
 17 |     * The typosquatting module has been rewritten to remove reliances on URLCrazy as an external command. The new module is implemented completely in Python and uses ODIN's own modules.
 18 | 
 19 | ## August 31, 2018, v2.0.0 BETA
 20 | * Removed the `extract` command from use. File metadata collection is now 100% Python and platform agnostic.
 21 | * Screenshot module will now try to dismiss webpage alerts before taking a screenshot.
 22 | * ODIN's web browser will now ignore expired and self-signed certificates so screenshots can be taken of the website contents.
 23 | * Added an `unsafe` flag for Kali users running ODIN as root. Chrome will not open if the user is root and not using `--no-sandbox`, so setting `--unsafe` will add that option to the Chrome browser. While ODIN really only accesses search engines using headless Chrome, this still is not for everyone and should only be used if ODIN is being run onf a Kali Linux VM as root.
 24 | * More clean-up work ansd small bugs squashed.
 25 | 
 26 | ## August 28, 2016, v2.0.0 BETA
 27 | * ODIN now makes better use of wider terminals for help text.
 28 | * Removed the need for an additional ANSI colors library. ODIN now leverages CLICK's built-in color support for macOS and Linux.
 29 | * Added "." to wordlist generation for S3 bucket hunting. ODIN now generates wordlists using all possible DNS-compliant names for the given keywords and affixes. This means a 33% increase in wordlist size, but that just means more buckets that can be found!
 30 | * Added progress bars to reduce some terminal noise and simultaneously improve visibility into what ODIN is crunching when it may appear to be "stuck."
 31 | * Reworked large pieces of email and social media discovery:
 32 |     * ODIN used to rely on TheHarvester for the bulk of this work. TheHarvester has been removed, but the new code is still based on its concepts and ideas.
 33 |     * Jigsaw has been removed. It no longer exists and probably stopped being a thing a long time ago. Jigsaw now just redirects to https://connect.data.com/.
 34 |     * Replaced LinkedIn search functions. The old LinkedIn search produced a lot unrelated and duplicate names and the profile link search was really just a stab in the dark most of the time. ODIN now does a better job of finding LinkedIn profiles for people that may be related to the target organization and will try to pull names and job titles from the results to go with the discovered profile links.
 35 |     * Replaced Twitter profile search functions. The old Twitter profile search relied on regex for "@some_handle" which missed some profiles when the handle did not make it into the search egnine result description and also returned a lot of false positives, i.e. returned webpage elements with "@something" that were not Twitter handles. This has been improved and now the results are much better and cleaner.
 36 |     * Email harvesting functions are now in harvester.py.
 37 |     * Parse class used for finding Twitter profiles and email addresses in search results are now in searchparser.py.
 38 | * The urlcrazy/lookalike domain/malicious domain checks are now optional and enabled using `--typo`.
 39 | * Overhauled the `verify` module to make it much better at its job, a bit faster, and improve the output. It might actually be useful now.
 40 | * Cleaned-up a lot of function descriptors and some lines of code. Nothing of consequence worth documenting specifically.
 41 | 
 42 | ## August 25, 2018, 1.9.3
 43 | * Fixed a bug that could occur during whois lookups that would cause ODIN to stop.
 44 | 
 45 | ## August 11, 2018, Post-Vegas Edition, 1.9.2
 46 | * Improved WhoXY record parsing and stopped assuming whois and reverse whois results had identical JSON.
 47 | * ODIN now checks WhoXY whois and reverse whois API crdit balances and warns users if their balance is below the user's WhoXY limit.
 48 | * Added a new option for `--whoxy-limit` to allow users to set a limit for how many domains found via reverse whois look-ups will be automatically added to the master list of domains that are later resolved and used for queries against services like Shodan and Censys.
 49 | * Email harvesting is now performed for all domains, both those provided and those discovered via reverse whois (see above point).
 50 | * Switched to Full Contact's latest API, v3, for better "enriched" data.
 51 | * Improvemed grapher.py's ability to create organization nodes in instances where an organization's name may not have been returned from Full Contact ot whois queries.
 52 | * Tweaked the HTML report to improve some of the language.
 53 | * Cleaned-up some of the dev branch code to prepare for merging with master.
 54 | * Added a sample Neo4j graph for the documentation.
 55 | 
 56 | ## August 4, 2017, BlackHat Edition, 1.9.1.1
 57 | * Implemented support and use of the WhoXY API.
 58 | * If provided a WhoXY API key, ODIN will now use WhoXY whois as a backup for whois lookups and use WhoXY reverse whois lookups to discover additional domains.
 59 | * Fixed an oversight in grapher that led to "orphaned" IP addresses discovered from DNS records (relationships were only created for the first A-record connected to a domain).
 60 | * Some clean-up and organization of code.
 61 | * DNS record lookups now have a 1 second timeout so ODIN no longer appears to be "stuck" if it hits a domain with no records.
 62 | 
 63 | ## August 1, 2018, 1.9.1
 64 | * Fixed Neo4j graph database not being if the --nuke flag was not used.
 65 | * Fixed bug with email address hunting that could cause only a portion of the discovered email addresses to be recorded in the database.
 66 | * Improved the recording of Have I Been Pwned results in the database.
 67 | * Added some status updates for Have I Been Pwned as this process can take a while if a lot of addresses are discovered.
 68 | 
 69 | ## July 31, 2018, 1.9.1
 70 | * Improved boto3 / AWS error handling for unusual edge cases where the bucket is unreachable due to AWS region or other issue.
 71 | * Incorporated the Neo4j grapher into the CLI options so the database can be generated automatically at the end of a run.
 72 | * Fixed the HTML reporter's SQL queries after making changes to the tables for Neo4j updates.
 73 | * Switched back to the Censys Python package after testing and further improved data collection for certificates.
 74 | * Updated requirements in the Pipfile.
 75 | 
 76 | ## July 26, 2018, 1.9.0
 77 | This brings ODIN closer to its v2.0 release:
 78 | * ODIN now has a helper script, grapher.py, that will take your complete ODIN SQLite3 database and convert it to a Neo4j graph database with relationships!
 79 | * Certificates are better than ever! ODIN will now use the returned ID from Censys to look-up all of the additional details about the certificate, like alternative names. Even more subdomains and fun to be had with certificates! Additionally, certificates ar enow paired properly with the subdomains to which they are attached, not the root *.whatever.tld domain.
 80 | * The DNSDumpster output has been cleaned-up to remove some weird artifacts (e.g. HTTP: added to the end of some subdomains during scraping).
 81 | 
 82 | ## July 7, 2018, 1.8.6
 83 | * Company Info table now supports a target organization having multiple profiles on one social media platform.
 84 | * Chromium now operates in headless mode, so no more Chrome windows covering your screen.
 85 | * Scope generation and parsing is no longer tripped up by domain names with hyphens in them.
 86 | * Some minor text chanes for typos and small clarifications.
 87 | * [BETA] Improved the screenshots functionality to add both HTTP and HTTPS to hostnames for screenshots.
 88 | 
 89 | ## May 25, 2018, 1.8.5
 90 | * Fixed a few bugs in the HTML report and made it much, much prettier.
 91 | * The reports directory has been organized! Now a reports/<organization_name>/ directory will be made. Under that, separate file_downloads, screenshots, and html_report directories are created for these items.
 92 | * Fixed a few misc bugs reported by users.
 93 | * [BETA] A new `--screenshots` flag has been added. If used, ODIN will take screenshots of the web services on hosts. For now this is limited to adding "http://" to the front of the IP address, domain, or subdomain and giving it a shot.
 94 | * [BETA] Added a screenshots.html report page to the HTML report. This page serves as a gallery for web screenshots.
 95 | 
 96 | ## May 24, 2018, 1.8.5
 97 | This is another large update with an all new arbitrary version number, v1.8.5! Many, many improvements and changes have been implemented to make ODIN better but also to pave the way for some plans coming for version 2.0 :D
 98 | 
 99 | Here we go:
100 | 
101 | * Fixed an issue with the Have I Been Pwned API failing to return results due to changes in API endpoints.
102 | * Redesigned SQLite3 database to be more in-line with design standards -- naming conventions, keys, and so forth.
103 | * Reworked multiprocess implementation to share variables between processes. This was to allow for the next several changes...
104 | * Reverse DNS is performed against all subdomains and IP addresses are added to the new hosts tables (formerly the report scope table).
105 | * IP addresses from DNS records and other sources are now added to the hosts table as well.
106 | * The hosts table now as a "source" column to note where the IP or domain was found if it wasn't in your scope file.
107 |     * There is also a column with True/False values that makes it easy to run a query and see the IPs and domains found that were not in the provided scope file.
108 | * Speaking of the scope file, it's no longer required. Feel free to not include it if you would rather just provide a name and a single domain name to get started.
109 | * Updated Full Contact API queries to v3, up from v2.
110 | * Shodan queries are now run against all IP addresses and domains discovered during the initial discovery phase instead of just those found in the provided scope file.
111 | * Tables have been cleaned up, made leaner, and can now be connected using keys. This will allow for link tables to create relationships between an IP address in the hosts table and DNS records or subdomains in other tables.
112 | * Link tables now exist to connect relationships between different information.
113 | * A new `--html` option now exists to generate an HTML report from the database upon completion.
114 | * Also fixed a dozen little things like typos, little periods hanging out at the ends of domain names, and other stuff!
115 | 
116 | ## May 13, 2018
117 | * Full SSL/TLS certificates from Censys are now stored in their own table by default (not only verbose mode).
118 | * Subdomains pulled from Censys certificate data is now added to the Subdomains table.
119 | * Subdomains in the DB should now be unique and the table now includes a "Sources" column.
120 | * Fixed an issue tha could cause Censys query credits to be gobbled up if your target has a lot of certificates.
121 | * Cymon API searches now use Cymon API v2.
122 | * Added a progress bar for AWS S3 Bucket and Digital Ocean Spaces hunting.
123 |     * Known Issue: This causes messy terminal output, but better than nothing for when large wordlists are used.
124 | * URLVoid is now used for URLCrazy typosqautted domains, like the Cymon API.
125 | * URLVoid is no longer has its own table and there will be no ore searches for domains in scope because, really, that just didn't make sense. URLVoid is for malware/malicious domains, not domains that are in scope for testing.
126 | * Added a check to make sure boto3 and AWS credentials are setup prior to attempting S3 bucket hunting using awscli.
127 | 
128 | ## March 6, 2018
129 | * Added support for detecting oportunities for DNS cache snooping.
130 | * Added a new option to provide a wordlist of terms to be used as prefixes and suffixes for S3 bucket hunting.
131 | * Added Pipfile to replace requirements.txt and avoid conflicts with Python 2.x installs.
132 | * Finally updated the URLCrazy module for the SQLite3 database change.
133 | 
134 | ## January 3, 2018 v1.7.0
135 | * Converted the old XLSX reports to a SQLite3 database solution!
136 | * Implemented multiprocessing (!) to greatly improve efficiency and shorten runtime!
137 | * Various other little bug fixes and tweaks.


--------------------------------------------------------------------------------
/setup/setup_check.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | import sys
  6 | import configparser
  7 | from configparser import NoSectionError,NoOptionError,ParsingError
  8 | 
  9 | import click
 10 | from selenium import webdriver
 11 | from selenium.webdriver.chrome.options import Options
 12 | from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
 13 | 
 14 | 
 15 | class SetupReview(object):
 16 |     """Class used for reviewing the provided keys.config file for ODIN."""
 17 |     def __init__(self, auth_file):
 18 |         """Everything begins here."""
 19 |         click.secho("[+] This tool will check to make sure your keys.config file is present and contains \
 20 | the API keys used by ODIN.", fg="green")
 21 |         self.how_do_we_look = 0
 22 |         try:
 23 |             self.CONFIG_PARSER = configparser.ConfigParser()
 24 |             self.CONFIG_PARSER.read(auth_file)
 25 |             click.secho("[*] Loaded {} file with these sections:\n".format(auth_file), fg="green")
 26 |             for section in self.CONFIG_PARSER.sections():
 27 |                 click.secho("\t* " + section, fg="green")
 28 |         except Exception as error:
 29 |             click.secho("[!] Could not open keys.config file -- make sure it exists and is readable.", fg="red")
 30 |             click.secho("L.. Details: {}".format(error), fg="red")
 31 |             exit()
 32 | 
 33 |     def config_section_map(self, section):
 34 |         """This function helps by reading a config file section and returning a dictionary object
 35 |         that can be referenced for configuration settings.
 36 |         """
 37 |         section_dict = {}
 38 |         # Parse the config file's sections into options
 39 |         options = self.CONFIG_PARSER.options(section)
 40 |         # Loop through each option
 41 |         for option in options:
 42 |             try:
 43 |                 # Get the section and option and add it to the dictionary
 44 |                 section_dict[option] = self.CONFIG_PARSER.get(section, option)
 45 |                 if section_dict[option] == -1:
 46 |                     click.secho("[*] Skipping: {}".format(option), fg="yellow")
 47 |             except:
 48 |                 click.secho("[!] There was an error with: {}".format(option), fg="red")
 49 |                 section_dict[option] = None
 50 | 
 51 |         # Return the dictionary of settings and values
 52 |         return section_dict
 53 | 
 54 |     def check_api(self):
 55 |         """Function to check each section of the keys.config file and perform any necessary tests."""
 56 |         try:
 57 |             SHODAN_API_KEY = self.config_section_map("Shodan")["api_key"]
 58 |             if SHODAN_API_KEY == "":
 59 |                 click.secho("\n[!] No Shodan API key!", fg="red")
 60 |                 self.how_do_we_look += 1
 61 |             else:
 62 |                 click.secho("\n[+] Found Shodan API key:", fg="green")
 63 |                 click.secho("... API Key:\t\t{}".format(SHODAN_API_KEY), fg="yellow")
 64 |         except Exception as error:
 65 |             click.secho("\n[!] Could not get the Shodan API key!", fg="red")
 66 |             click.secho("L.. Details: {}".format(error), fg="red")
 67 | 
 68 |         try:
 69 |             CYMON_API_KEY = self.config_section_map("Cymon")["api_key"]
 70 |             if CYMON_API_KEY == "":
 71 |                 click.secho("\n[!] No Cymon API key!", fg="red")
 72 |                 self.how_do_we_look += 1
 73 |             else:
 74 |                 click.secho("\n[+] Found Cymon key:", fg="green")
 75 |                 click.secho("... API Key:\t\t{}".format(CYMON_API_KEY), fg="yellow")
 76 |         except Exception as error:
 77 |             click.secho("\n[!] Could not get the Cymon API key!", fg="red")
 78 |             click.secho("L.. Details: {}".format(error), fg="red")
 79 |             self.how_do_we_look += 1
 80 | 
 81 |         try:
 82 |             URLVOID_API_KEY = self.config_section_map("URLVoid")["api_key"]
 83 |             if URLVOID_API_KEY == "":
 84 |                 click.secho("\n[!] No URLVoid API key!", fg="red")
 85 |                 self.how_do_we_look += 1
 86 |             click.secho("\n[+] Found URLVoid API key:", fg="green")
 87 |             click.secho("... API Key:\t\t{}".format(URLVOID_API_KEY), fg="yellow")
 88 |         except Exception as error:
 89 |             click.secho("\n[!] Could not get the URLVoid API key!", fg="red")
 90 |             click.secho("L.. Details: {}".format(error), fg="red")
 91 |             self.how_do_we_look += 1
 92 | 
 93 |         try:
 94 |             CENSYS_API_ID = self.config_section_map("Censys")["api_id"]
 95 |             CENSYS_API_SECRET = self.config_section_map("Censys")["api_secret"]
 96 |             if CENSYS_API_ID == "" or CENSYS_API_SECRET == "":
 97 |                 click.secho("\n[!] No Censys API ID or secret!", fg="red")
 98 |                 self.how_do_we_look += 1
 99 |             else:
100 |                 click.secho("\n[+] Found Censys API info:", fg="green")
101 |                 click.secho("... API ID:\t\t{}".format(CENSYS_API_ID), fg="yellow")
102 |                 click.secho("... API Secret:\t\t{}".format(CENSYS_API_SECRET), fg="yellow")
103 |         except Exception as error:
104 |             click.secho("\n[!] Could not get the Censys API key!", fg="red")
105 |             click.secho("L.. Details: {}".format(error), fg="red")
106 |             self.how_do_we_look += 1
107 | 
108 |         try:
109 |             CONSUMER_KEY = self.config_section_map("Twitter")["consumer_key"]
110 |             CONSUMER_KEY_SECRET = self.config_section_map("Twitter")["key_secret"]
111 |             ACCESS_TOKEN = self.config_section_map("Twitter")["access_token"]
112 |             ACCESS_TOKEN_SECRET = self.config_section_map("Twitter")["token_secret"]
113 |             if CONSUMER_KEY == "" or CONSUMER_KEY_SECRET == "" \
114 |             or ACCESS_TOKEN == "" or ACCESS_TOKEN_SECRET == "":
115 |                 click.secho("\n[!] Missing Twitter tokens!", fg="red")
116 |                 self.how_do_we_look += 1
117 |             else:        
118 |                 click.secho("\n[+] Found Twitter tokens:", fg="green")
119 |                 click.secho("... Key:\t\t{}".format(CONSUMER_KEY), fg="yellow")
120 |                 click.secho("... Key Secret:\t\t{}".format(CONSUMER_KEY_SECRET), fg="yellow")
121 |                 click.secho("... Token: \t\t{}".format(ACCESS_TOKEN), fg="yellow")
122 |                 click.secho("... Token Secret:\t{}".format(ACCESS_TOKEN_SECRET), fg="yellow")
123 |         except Exception as error:
124 |             click.secho("\n[!] Could not get the Twitter tokens!", fg="red")
125 |             click.secho("L.. Details: {}".format(error), fg="red")
126 |             self.how_do_we_look += 1
127 | 
128 |         try:
129 |             HUNTER_API = self.config_section_map("EmailHunter")["api_key"]
130 |             if HUNTER_API == "":
131 |                 click.secho("\n[!] No EmailHunter API key!", fg="red")
132 |                 self.how_do_we_look += 1
133 |             else:
134 |                 click.secho("\n[+] Found EmailHunter API info:", fg="green")
135 |                 click.secho("... API Key:\t\t{}".format(HUNTER_API), fg="yellow")
136 |         except Exception as error:
137 |             click.secho("\n[!] Could not get the EmailHunter API key!", fg="red")
138 |             click.secho("L.. Details: {}".format(error), fg="red")
139 |             self.how_do_we_look += 1
140 | 
141 |         try:
142 |             CONTACT_API = self.config_section_map("Full Contact")["api_key"]
143 |             if CONTACT_API == "":
144 |                 click.secho("\n[!] No Full Contact API key!", fg="red")
145 |                 self.how_do_we_look += 1
146 |             else:
147 |                 click.secho("\n[+] Found Full Contact API info:", fg="green")
148 |                 click.secho("... API Key:\t\t{}".format(CONTACT_API), fg="yellow")
149 |         except Exception as error:
150 |             click.secho("\n[!] Could not get the Full Contact API key!", fg="red")
151 |             click.secho("L.. Details: {}".format(error), fg="red")
152 |             self.how_do_we_look += 1
153 | 
154 |         try:
155 |             WEB_DRIVER = self.config_section_map("WebDriver")["driver_path"]
156 |             if WEB_DRIVER == "":
157 |                 click.secho("\n[!] No Chrome web driver filepath found!", fg="red")
158 |                 self.how_do_we_look += 1
159 |             else:
160 |                 click.secho("\n[+] Found filepath for a Chrome web driver:", fg="green")
161 |                 click.secho("... File Path:\t\t{}".format(WEB_DRIVER), fg="yellow")
162 |             try:
163 |                 chrome_options = Options()
164 |                 chrome_options.add_argument("--headless")
165 |                 chrome_options.add_argument("--window-size=1920x1080")
166 |                 webdriver.Chrome(chrome_options=chrome_options, executable_path=WEB_DRIVER)
167 |                 click.secho("... Browser Test:\tSuccess!", fg="green")
168 |             except WebDriverException as error:
169 |                 click.secho("... Browser Test:\tFAILED, WebDriverException!", fg="red")
170 |                 click.secho("{}".format(error), fg="red")
171 |                 self.how_do_we_look += 1
172 |             except Exception as error:
173 |                 click.secho("... Browser Test:\t\t FAILED, general exception!", fg="red")
174 |                 self.how_do_we_look += 1
175 |         except Exception as error:
176 |             click.secho("\n[!] Could not get the filepath for your Chrome wbedriver binary!", fg="red")
177 |             click.secho("L.. Details: {}".format(error), fg="red")
178 |             self.how_do_we_look += 1
179 | 
180 |         try:
181 |             NEO4J_URI = self.config_section_map("GraphDatabase")["uri"]
182 |             NEO4J_USER = self.config_section_map("GraphDatabase")["username"]
183 |             NEO4J_PASS = self.config_section_map("GraphDatabase")["password"]
184 |             if NEO4J_URI == "" or NEO4J_USER == "" or NEO4J_PASS == "":
185 |                 click.secho("\n[!] Incomplete Neo4j connection info!", fg="red")
186 |                 self.how_do_we_look += 1
187 |             else:
188 |                 click.secho("\n[+] Found Neo4j connection info:", fg="green")
189 |                 click.secho("... URI:\t\t{}".format(NEO4J_URI), fg="yellow")
190 |                 click.secho("... User:\t\t{}".format(NEO4J_USER), fg="yellow")
191 |                 click.secho("... Pass:\t\t{}".format(NEO4J_PASS), fg="yellow")
192 |         except Exception as error:
193 |             click.secho("\n[!] Could not get your Neo4j connection info!", fg="red")
194 |             click.secho("L.. Details: {}".format(error), fg="red")
195 |             self.how_do_we_look += 1
196 | 
197 |         try:
198 |             AWS_KEY = self.config_section_map("AWS")["access_key"]
199 |             AWS_SECRET = self.config_section_map("AWS")["secret"]
200 |             if AWS_KEY == "" or AWS_SECRET == "":
201 |                 click.secho("\n[!] Missing AWS access tokens!", fg="red")
202 |                 self.how_do_we_look += 1
203 |             else:
204 |                 click.secho("\n[+] Found AWS acsess token details:", fg="green")
205 |                 click.secho("... Key:\t\t{}".format(AWS_KEY), fg="yellow")
206 |                 click.secho("... Secret:\t\t{}".format(AWS_SECRET), fg="yellow")
207 |         except Exception as error:
208 |             click.secho("\n[!] Could not get your AWS token info!", fg="red")
209 |             click.secho("L.. Details: {}".format(error), fg="red")
210 |             self.how_do_we_look += 1
211 | 
212 |         try:
213 |             WHOXY_API = self.config_section_map("WhoXY")["api_key"]
214 |             if WHOXY_API == "":
215 |                 click.secho("\n[!] No WhoXY API key!", fg="red")
216 |                 self.how_do_we_look += 1
217 |             else:
218 |                 click.secho("\n[+] Found WhoXY API info:", fg="green")
219 |                 click.secho("... API Key:\t\t{}".format(WHOXY_API), fg="yellow")
220 |         except Exception as error:
221 |             click.secho("\n[!] Could not get the WhoXY API key!", fg="red")
222 |             click.secho("L.. Details: {}".format(error), fg="red")
223 |             self.how_do_we_look += 1
224 | 
225 |         if self.how_do_we_look == 0:
226 |             click.secho("\n[+] It looks like keys.config is filled out! Just check to make sure those \
227 | all of the information is correct!", fg="green")
228 |         else:
229 |             click.secho("\n[!] Warning: It looks like there is still some work to do before API access is ready. \
230 | No API keys are required, but using them is encouraged!", fg="yellow")
231 | 
232 | 
233 | if __name__ == "__main__":
234 |     if len(sys.argv) < 2:
235 |         click.secho("[!] Provide your keys.config for review", fg="red")
236 |         click.secho("L.. Usage: setup_check.py auth/keys.config", fg="red")
237 |     elif len(sys.argv) > 2:
238 |         click.secho("[!] Too many arguments", fg="red")
239 |         click.secho("L.. Usage: setup_check.py auth/keys.config", fg="red")
240 |     else:
241 |         auth_file = sys.argv[1]
242 |         if os.path.isfile(auth_file):
243 |             file_name = auth_file.split("/")
244 |             if file_name[-1] == "keys.config":
245 |                 checkup = SetupReview(auth_file)
246 |                 checkup.check_api()
247 |             else:
248 |                 click.secho("[!] This file path does not appear to include a keys.config file. Are \
249 | you sure you specified a file names keys.config?", fg="red")
250 |         else:
251 |             click.secho("[!] Could not open the specified keys.config file: {}".format(auth_file), fg="red")
252 |             click.secho("L.. Usage: setup_check.py auth/keys.config", fg="red")
253 | 


--------------------------------------------------------------------------------
/lib/whois.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains the tools to collect ownership information for domain names and
  6 | IP addresses/ranges and discover additional domain names. Ownership is checked using
  7 | WHOIS records and RDAP records. Domains are discovered via reverse WHOIS look-ups
  8 | using WhoXY.
  9 | """
 10 | 
 11 | import warnings
 12 | 
 13 | import click
 14 | import whois
 15 | import requests
 16 | from ipwhois import IPWhois
 17 | 
 18 | from . import helpers
 19 | 
 20 | 
 21 | class Identify(object):
 22 |     """Class for identifying the owners of domain names and IP addresses using WHOIS and RDAP."""
 23 |     # Set the timeout, in seconds, for the web requests
 24 |     requests_timeout = 15
 25 |     # The endpoints used for the web requests
 26 |     robtex_api_endpoint = "https://freeapi.robtex.com/ipquery/{}"
 27 |     whoxy_api_endpoint = "http://api.whoxy.com/?key={}&whois={}"
 28 |     whoxy_balance_uri = "http://api.whoxy.com/?key={}&account=balance"
 29 |     reverse_whoxy_api_endpoint = "http://api.whoxy.com/?key={}&reverse=whois&company={}"
 30 | 
 31 |     def __init__(self):
 32 |         """Everything that should be initiated with a new object goes here."""
 33 |         try:
 34 |             self.whoxy_api_key = helpers.config_section_map("WhoXY")["api_key"]
 35 |             try:
 36 |                 balance_endpoint = self.whoxy_balance_uri.format(self.whoxy_api_key)
 37 |                 balance_json = requests.get(balance_endpoint,timeout=self.requests_timeout).json()
 38 |                 live_whois_balance = balance_json['live_whois_balance']
 39 |                 reverse_whois_balance = balance_json['reverse_whois_balance']
 40 |                 if live_whois_balance < 50:
 41 |                     click.secho("[*] You are low on WhoXY whois credits: {} credits".format(live_whois_balance),fg="yellow")
 42 |                 if reverse_whois_balance < 50:
 43 |                     click.secho("[*] You are low on WhoXY reverse whois credits: {} credits".format(reverse_whois_balance),fg="yellow")
 44 |             except requests.exceptions.Timeout:
 45 |                 click.secho("\n[!] The connection to WhoXY timed out!",fg="red")
 46 |             except requests.exceptions.TooManyRedirects:
 47 |                 click.secho("\n[!] The connection to WhoXY encountered too many redirects!",fg="red")
 48 |             except requests.exceptions.RequestException as error:
 49 |                 click.secho("\n[!] The connection to WhoXY encountered an error!",fg="red")
 50 |                 click.secho("L.. Details: {}".format(error),fg="red")
 51 |         except Exception:
 52 |             self.whoxy_api_key = None
 53 |             click.secho("[!] Did not find a WhoXY API key.",fg="yellow")
 54 | 
 55 |     def run_whois(self,domain):
 56 |         """Perform a WHOIS lookup for the provided target domain. The WHOIS results are returned
 57 |         as a dictionary.
 58 | 
 59 |         This can fail, usually if the domain is protected by a WHOIS privacy service or the
 60 |         registrar has their own WHOIS service.
 61 | 
 62 |         Parameters:
 63 |         domain      The domain to use for the WHOIS query
 64 |         """
 65 |         try:
 66 |             who = whois.whois(domain)
 67 |             results = {}
 68 |             # Check if info was returned before proceeding because sometimes records are protected
 69 |             if who.registrar:
 70 |                 results['domain_name'] = who.domain_name
 71 |                 results['registrar'] = who.registrar
 72 |                 results['expiration_date'] = who.expiration_date
 73 |                 results['registrant'] = who.name
 74 |                 results['org'] = who.org
 75 |                 results['admin_email'] = who.emails[0]
 76 |                 results['tech_email'] = who.emails[1]
 77 |                 results['address'] = "{}, {}{}, {}, {}".format(who.address,who.city,who.zipcode,who.state,who.country)
 78 |                 results['dnssec'] = who.dnssec
 79 |             else:
 80 |                 click.secho("[*] WHOIS record for {} came back empty. You might try looking at dnsstuff.com.".format(domain),fg="yellow")
 81 |             return results
 82 |         except Exception as error:
 83 |             click.secho("[!] The WHOIS lookup for {} failed!".format(domain),fg="red")
 84 |             click.secho("L.. Details: {}".format(error),fg="red")
 85 | 
 86 |     def parse_whoxy_results(self,whoxy_data,reverse=False):
 87 |         """Takes JSON returned by WhoXY API queries and parses the data into a simpler dictionary
 88 |         object. An optional `reverse` parameter can be set to True if the data being parsed is from
 89 |         a WhoXY reverse WHOIS look-up.
 90 | 
 91 |         Parameters:
 92 |         whoxy_data  The raw WhoXY API results to be parsed
 93 |         reverse     A boolean value to flag the WhoXY data as reverse WHOIS results (Default: False)
 94 |         """
 95 |         results = {}
 96 |         results['domain'] = whoxy_data['domain_name']
 97 |         # Check for the registrar information
 98 |         if "domain_registrar" in whoxy_data:
 99 |             results['registrar'] = whoxy_data['domain_registrar']['registrar_name']
100 |         elif "registrar" in whoxy_data:
101 |             results['registrar'] = whoxy_data['registrar_name']
102 |         else:
103 |             results['registrar'] = "None Listed"
104 |         # Check for an expiration date
105 |         if "expiry_date" in whoxy_data:
106 |             results['expiry_date'] = whoxy_data['expiry_date']
107 |         else:
108 |             results['expiry_date'] = "None Listed"
109 |         # Check for a company name
110 |         if "company_name" in whoxy_data:
111 |             results['organization'] = whoxy_data['registrant_contact']['company_name']
112 |         else:
113 |             results['organization'] = "None Listed"
114 |         # Check for a registrant's name
115 |         if "full_name" in whoxy_data:
116 |             results['registrant'] = whoxy_data['registrant_contact']['full_name']
117 |         else:
118 |             results['registrant'] = "None Listed"
119 |         # A few pieces of information are unavailable from WhoXY's reverse WHOIS lookups
120 |         if reverse:
121 |             results['address'] = "Unavailable for Reverse WHOIS"
122 |             results['admin_contact'] = "Unavailable for Reverse WHOIS"
123 |             results['tech_contact'] = "Unavailable for Reverse WHOIS"
124 |         # Try to assemble different pieces of information from the record
125 |         else:
126 |             try:
127 |                 reg_address = whoxy_data['registrant_contact']['mailing_address']
128 |                 reg_city = whoxy_data['registrant_contact']['city_name']
129 |                 reg_state = whoxy_data['registrant_contact']['state_name']
130 |                 reg_zip = whoxy_data['registrant_contact']['zip_code']
131 |                 reg_email = whoxy_data['registrant_contact']['email_address']
132 |                 reg_phone = whoxy_data['registrant_contact']['phone_number']
133 |                 results['address'] = "{} {}, {} {} {} {}".format(reg_address,reg_city,reg_state,reg_zip,reg_email,reg_phone)
134 |             except:
135 |                 results['address'] = "None Listed"
136 |             try:
137 |                 admin_name = whoxy_data['administrative_contact']['full_name']
138 |                 admin_address = whoxy_data['administrative_contact']['mailing_address']
139 |                 admin_city = whoxy_data['administrative_contact']['city_name']
140 |                 admin_state = whoxy_data['administrative_contact']['state_name']
141 |                 admin_zip = whoxy_data['administrative_contact']['zip_code']
142 |                 admin_email = whoxy_data['administrative_contact']['email_address']
143 |                 admin_phone = whoxy_data['administrative_contact']['phone_number']
144 |                 results['admin_contact'] = "{} {} {}, {} {} {} {}".format(admin_name,admin_address,admin_city,admin_state,admin_zip,admin_email,admin_phone)
145 |             except:
146 |                 results['admin_contact'] = "None Listed"
147 |             try:
148 |                 tech_name = whoxy_data['technical_contact']['full_name']
149 |                 tech_address = whoxy_data['technical_contact']['mailing_address']
150 |                 tech_city = whoxy_data['technical_contact']['city_name']
151 |                 tech_state = whoxy_data['technical_contact']['state_name']
152 |                 tech_zip = whoxy_data['technical_contact']['zip_code']
153 |                 tech_email = whoxy_data['technical_contact']['email_address']
154 |                 tech_phone = whoxy_data['technical_contact']['phone_number']
155 |                 results['tech_contact'] = "{} {} {}, {} {} {} {}".format(tech_name,tech_address,tech_city,tech_state,tech_zip,tech_email,tech_phone)
156 |             except:
157 |                 results['tech_contact'] = "None Listed" 
158 |         return results
159 | 
160 |     def run_whoxy_whois(self,domain):
161 |         """Perform a WHOIS lookup for the provided target domain using WhoXY's API. The WHOIS
162 |         results are returned as a dictionary.
163 | 
164 |         Parameters:
165 |         domain      The domain to use for the WhoXY WHOIS query
166 |         """
167 |         if self.whoxy_api_key:
168 |             try:
169 |                 results = requests.get(self.whoxy_api_endpoint.format(self.whoxy_api_key,domain),timeout=self.requests_timeout).json()
170 |                 if results['status'] == 1:
171 |                     whois_results = self.parse_whoxy_results(results)
172 |                     return whois_results
173 |                 else:
174 |                     click.secho("[*] WhoXY returned status code 0, error/no results, for WHOIS lookup on {}.".format(domain),fg="yellow")
175 |             except requests.exceptions.Timeout:
176 |                 click.secho("\n[!] The connection to WhoXY timed out!",fg="red")
177 |             except requests.exceptions.TooManyRedirects:
178 |                 click.secho("\n[!] The connection to WhoXY encountered too many redirects!",fg="red")
179 |             except requests.exceptions.RequestException as error:
180 |                 click.secho("[!] Error connecting to WhoXY for WHOIS on {}!".format(domain),fg="red")
181 |                 click.secho("L.. Details: {}".format(error),fg="red")
182 | 
183 |     def run_whoxy_company_search(self,company):
184 |         """Use WhoXY's API to search for a company name and return the associated domain names. The
185 |         information is returned as a dictionary.
186 | 
187 |         Parameters:
188 |         company     The company to use for the WhoXY reverse WHOIS search
189 |         """
190 |         if self.whoxy_api_key:
191 |             try:
192 |                 results = requests.get(self.reverse_whoxy_api_endpoint.format(self.whoxy_api_key,company),timeout=self.requests_timeout).json()
193 |                 if results['status'] == 1 and results['total_results'] > 0:
194 |                     whois_results = {}
195 |                     total_results = results['total_results']
196 |                     for domain in results['search_result']:
197 |                         domain_name = domain['domain_name']
198 |                         temp = self.parse_whoxy_results(domain,True)
199 |                         whois_results[domain_name] = temp
200 |                     return whois_results,total_results
201 |                 else:
202 |                     click.secho("[*] WhoXY returned status code 0, error/no results, for reverse company search.",fg="yellow")
203 |             except requests.exceptions.Timeout:
204 |                 click.secho("\n[!] The connection to WhoXY timed out!",fg="red")
205 |             except requests.exceptions.TooManyRedirects:
206 |                 click.secho("\n[!] The connection to WhoXY encountered too many redirects!",fg="red")
207 |             except requests.exceptions.RequestException as error:
208 |                 click.secho("[!] Error connecting to WhoXY for reverse company search!",fg="yellow")
209 |                 click.secho("L.. Details: {}".format(error),fg="yellow")
210 | 
211 |     def run_rdap(self,ip_address):
212 |         """Perform an RDAP lookup for an IP address. An RDAP lookup object is returned.
213 | 
214 |         From IPWhois: IPWhois.lookup_rdap() is now the recommended lookup method. RDAP provides
215 |         a far better data structure than legacy WHOIS and REST lookups (previous implementation).
216 |         RDAP queries allow for parsing of contact information and details for users, organizations,
217 |         and groups. RDAP also provides more detailed network information.
218 | 
219 |         Parameters:
220 |         ip_address  The IP address to use for the RDAP look-up
221 |         """
222 |         try:
223 |             with warnings.catch_warnings():
224 |                 # Hide the 'allow_permutations has been deprecated' warning until ipwhois removes it
225 |                 warnings.filterwarnings("ignore",category=UserWarning)
226 |                 rdapwho = IPWhois(ip_address)
227 |                 results = rdapwho.lookup_rdap(depth=1)
228 |             return results
229 |         except Exception as error:
230 |             click.secho("[!] Failed to collect RDAP information for {}!".format(ip_address),fg="red")
231 |             click.secho("L.. Details: {}".format(error),fg="red")
232 | 
233 |     def lookup_robtex_ip_info(self,ip_address):
234 |         """Lookup information about a target IP address with Robtex. The primary purpose of this is
235 |         identifying domain names that share the provided IP address. An example result for the
236 |         IP address of apple.com looks like:
237 |         
238 |         {'o': 'www.xserve.net', 't': 1500942193}], 'city': 'Cupertino', 'country': 'United States',
239 |         'as': 714, 'asname': 'Apple Apple Corporation', 'whoisdesc': 'Apple Inc. (APPLEC-1-Z)',
240 |         'routedesc': 'Apple', 'bgproute': '17.168.0.0/13'}
241 | 
242 |         Parameters:
243 |         ip_address  The IP address to use for the Robtex query
244 |         """
245 |         if helpers.is_ip(ip_address):
246 |             try:
247 |                 request = requests.get(self.robtex_api_endpoint.format(ip_address),timeout=self.requests_timeout)
248 |                 ip_json = request.json()
249 |                 return ip_json
250 |             except requests.exceptions.Timeout:
251 |                 click.secho("\n[!] The connection to Robtex timed out!",fg="red")
252 |             except requests.exceptions.TooManyRedirects:
253 |                 click.secho("\n[!] The connection to Robtex encountered too many redirects!",fg="red")
254 |             except requests.exceptions.RequestException as error:
255 |                 click.secho("\n[!] The connection to Robtex encountered an error!",fg="red")
256 |                 click.secho("L.. Details: {}".format(error),fg="red")
257 |             return None
258 |         else:
259 |             click.secho("[!] The provided IP for Robtex is invalid!",fg="red")
260 | 


--------------------------------------------------------------------------------
/lib/takeovers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains functions for analyzing domains and subdomains to determine if a domain
  6 | takeover is possible via dangling DNS records, cloud services, and various hosting providers.
  7 | """
  8 | 
  9 | import re
 10 | from . import dns
 11 | 
 12 | import requests
 13 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
 14 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
 15 | 
 16 | 
 17 | class TakeoverChecks(object):
 18 |     """Class with tools to check for potential domain and subdomain takeovers."""
 19 |     dns_toolkit = dns.DNSCollector()
 20 |     # Fingerprints for the various CDNs and services and associated 404 pages
 21 |     # Sources:
 22 |     # https://github.com/EdOverflow/can-i-take-over-xyz
 23 |     # https://github.com/Ice3man543/SubOver/blob/master/providers.json
 24 |     fingerprints = [
 25 |         {
 26 |             "name":"github", 
 27 |             "cname":["github.io", "github.map.fastly.net"], 
 28 |             "response":["There isn't a GitHub Pages site here.", "For root URLs (like http:\/\/example.com\/) you must provide an index.html file"]
 29 |         },
 30 |         {
 31 |             "name":"heroku", 
 32 |             "cname":["herokudns.com", "herokussl.com", "herokuapp.com"], 
 33 |             "response":["There's nothing here, yet.", "herokucdn.com/error-pages/no-such-app.html", "<title>No such app</title>"]
 34 |         },
 35 |         {
 36 |             "name":"unbounce",
 37 |             "cname":["unbouncepages.com"],
 38 |             "response":["The requested URL / was not found on this server.", "The requested URL was not found on this server"]
 39 |         },
 40 |         {
 41 |             "name":"tumblr",
 42 |             "cname":["tumblr.com"],
 43 |             "response":["There's nothing here.", "Whatever you were looking for doesn't currently exist at this address."]
 44 |         },
 45 |         {
 46 |             "name":"shopify",
 47 |             "cname":["myshopify.com"],
 48 |             "response":["Sorry, this shop is currently unavailable.", "Only one step left!"]
 49 |         },
 50 |         {
 51 |             "name":"instapage",
 52 |             "cname":["pageserve.co", "secure.pageserve.co", "https:\/\/instapage.com\/"],
 53 |             "response":["Looks Like You're Lost"]
 54 |         },
 55 |         {
 56 |             "name":"desk",
 57 |             "cname":["desk.com"],
 58 |             "response":["Please try again or try Desk.com free for 14 days.", "Sorry, We Couldn't Find That Page"]
 59 |         },
 60 |         {
 61 |             "name":"tictail",
 62 |             "cname":["tictail.com", "domains.tictail.com"],
 63 |             "response":["Building a brand of your own?", "to target URL: <a href=\"https:\/\/tictail.com", "Start selling on Tictail."]
 64 |         },
 65 |         {
 66 |             "name":"campaignmonitor",
 67 |             "cname":["createsend.com", "name.createsend.com"],
 68 |             "response":["Double check the URL", "<strong>Trying to access your account?</strong>", "Double check the URL or <a href=\"mailto:help@createsend.com"]
 69 |         },
 70 |         {
 71 |             "name":"cargocollective",
 72 |             "cname":["cargocollective.com"],
 73 |             "response":['<div class="notfound">']
 74 |         },
 75 |         {
 76 |             "name":"statuspage",
 77 |             "cname":["statuspage.io"],
 78 |             "response":["StatusPage.io is the best way for web infrastructure", "You are being <a href=\"https:\/\/www.statuspage.io\">redirected"]
 79 |         },
 80 |         {
 81 |             "name":"amazonaws",
 82 |             "cname":["amazonaws.com"],
 83 |             "response":["NoSuchBucket", "The specified bucket does not exist"]
 84 |         },
 85 |         {
 86 |             "name":"cloudfront",
 87 |             "cname":["cloudfront.net"],
 88 |             "response":["The request could not be satisfied", "ERROR: The request could not be satisfied"]
 89 |         },
 90 |         {
 91 |             "name":"bitbucket",
 92 |             "cname":["bitbucket.org"],	
 93 |             "response":["The Git solution for professional teams"]
 94 |         },
 95 |         {
 96 |             "name":"smartling",
 97 |             "cname":["smartling.com"],
 98 |             "response":["Domain is not configured"]
 99 |         },
100 |         {
101 |             "name":"acquia",
102 |             "cname":["acquia.com"],
103 |             "response":["If you are an Acquia Cloud customer and expect to see your site at this address"]
104 |         },
105 |         {
106 |             "name":"fastly",
107 |             "cname":["fastly.net"],
108 |             "response":["Please check that this domain has been added to a service", "Fastly error: unknown domain"]
109 |         },
110 |         {
111 |             "name":"pantheon",
112 |             "cname":["pantheonsite.io"],
113 |             "response":["The gods are wise", "The gods are wise, but do not know of the site which you seek."]
114 |         },
115 |         {
116 |             "name":"zendesk",
117 |             "cname":["zendesk.com"],
118 |             "response":["<title>Help Center Closed | Zendesk</title>", "Help Center Closed"]
119 |         },
120 |         {
121 |             "name":"uservoice",
122 |             "cname":["uservoice.com"],
123 |             "response":["This UserVoice subdomain is currently available!", "This UserVoice instance does not exist."]
124 |         },
125 |         {
126 |             "name":"ghost",
127 |             "cname":["ghost.io"],
128 |             "response":["The thing you were looking for is no longer here", "The thing you were looking for is no longer here, or never was"]
129 |         },
130 |         {
131 |             "name":"pingdom",
132 |             "cname":["stats.pingdom.com"],
133 |             "response":["pingdom"]
134 |         },
135 |         {
136 |             "name":"tilda",
137 |             "cname":["tilda.ws"],
138 |             "response":["Domain has been assigned", "http:\/\/tilda.ws\/img\/logo404.png"]
139 |         },
140 |         {
141 |             "name":"wordpress",
142 |             "cname":["wordpress.com"],	
143 |             "response":["Do you want to register"]
144 |         },
145 |         {
146 |             "name":"teamwork",
147 |             "cname":["teamwork.com"],
148 |             "response":["Oops - We didn't find your site."]
149 |         },
150 |         {
151 |             "name":"helpjuice",
152 |             "cname":["helpjuice.com"],
153 |             "response":["We could not find what you're looking for."]
154 |         },
155 |         {
156 |             "name":"helpscout",
157 |             "cname":["helpscoutdocs.com"],
158 |             "response":["No settings were found for this company:"]
159 |         },
160 |         {
161 |             "name":"cargo",
162 |             "cname":["cargocollective.com"],
163 |             "response":["If you're moving your domain away from Cargo you must make this configuration through your registrar's DNS control panel."]
164 |         },
165 |         {
166 |             "name":"feedpress",
167 |             "cname":["redirect.feedpress.me"],
168 |             "response":["The feed has not been found."]
169 |         },
170 |         {
171 |             "name":"surge",
172 |             "cname":["surge.sh"],
173 |             "response":["project not found"]
174 |         },
175 |         {
176 |             "name":"surveygizmo",
177 |             "cname":["privatedomain.sgizmo.com", "privatedomain.surveygizmo.eu", "privatedomain.sgizmoca.com"],
178 |             "response":["data-html-name"]
179 |         },
180 |         {
181 |             "name":"mashery",
182 |             "cname":["mashery.com"],
183 |             "response":["Unrecognized domain <strong>"]
184 |         },
185 |         {
186 |             "name":"intercom",
187 |             "cname":["custom.intercom.help"],
188 |             "response":["This page is reserved for artistic dogs.","<h1 class=\"headline\">Uh oh. That page doesn’t exist.</h1>"]
189 |         },
190 |         {
191 |             "name":"webflow",
192 |             "cname":["proxy.webflow.io"],
193 |             "response":["<p class=\"description\">The page you are looking for doesn't exist or has been moved.</p>"]
194 |         },
195 |         {
196 |             "name":"kajabi",
197 |             "cname":["endpoint.mykajabi.com"],
198 |             "response":["<h1>The page you were looking for doesn't exist.</h1>"]
199 |         },
200 |         {
201 |             "name":"thinkific",
202 |             "cname":["thinkific.com"],
203 |             "response":["You may have mistyped the address or the page may have moved."]
204 |         },
205 |         {
206 |             "name":"tave",
207 |             "cname":["clientaccess.tave.com"],
208 |             "response":["<h1>Error 404: Page Not Found</h1>"]
209 |         },
210 |         {
211 |             "name":"wishpond",
212 |             "cname":["wishpond.com"],   
213 |             "response":["https:\/\/www.wishpond.com\/404\?campaign=true"]
214 |         },
215 |         {
216 |             "name":"aftership",
217 |             "cname":["aftership.com"],
218 |             "response":["Oops.</h2><p class=\"text-muted text-tight\">The page you're looking for doesn't exist."]
219 |         },
220 |         {
221 |             "name":"aha",
222 |             "cname":["ideas.aha.io"],
223 |             "response":["There is no portal here ... sending you back to Aha!"]
224 |         },
225 |         {
226 |             "name":"brightcove",
227 |             "cname":["brightcovegallery.com", "gallery.video", "bcvp0rtal.com"],
228 |             "response":["<p class=\"bc-gallery-error-code\">Error Code: 404</p>"]
229 |         },
230 |         {
231 |             "name":"bigcartel",
232 |             "cname":["bigcartel.com"],
233 |             "response":["<h1>Oops! We couldn&#8217;t find that page.</h1>"]
234 |         },
235 |         {
236 |             "name":"activecompaign",
237 |             "cname":["activehosted.com"],
238 |             "response":["alt=\"LIGHTTPD - fly light.\""]
239 |         },
240 |         {
241 |             "name":"acquia",
242 |             "cname":["acquia-test.co"],
243 |             "response":["The site you are looking for could not be found."]
244 |         },
245 |         {
246 |             "name":"proposify",
247 |             "cname":["proposify.biz"],
248 |             "response":["If you need immediate assistance, please contact <a href=\"mailto:support@proposify.biz"]
249 |         },
250 |         {
251 |             "name":"simplebooklet",
252 |             "cname":["simplebooklet.com"],
253 |             "response":["We can't find this <a href=\"https:\/\/simplebooklet.com", "First Impressions Count"]
254 |         },
255 |         {
256 |             "name":"getresponse",
257 |             "cname":[".gr8.com"],
258 |             "response":["With GetResponse Landing Pages, lead generation has never been easier"]
259 |         },
260 |         {
261 |             "name":"vend",
262 |             "cname":["vendecommerce.com"],
263 |             "response":["Looks like you've traveled too far into cyberspace."]
264 |         },
265 |         {
266 |             "name":"jetbrains",
267 |             "cname":["myjetbrains.com"],
268 |             "response":["is not a registered InCloud YouTrack."]
269 |         },
270 |         {
271 |             "name":"azure",
272 |             "cname":["azurewebsites.net"],
273 |             "response":["404 Web Site not found"]
274 |         }
275 |     ]
276 | 
277 |     def __init__(self):
278 |         """Everything that should be initiated with a new object goes here."""
279 |         pass
280 | 
281 |     def check_domain_fronting(self,domain):
282 |         """Check the A records for a given domain to look for references to various CDNs and
283 |         flag the domain for domain frontability.
284 | 
285 |         Many CDN keywords provided by Rvrsh3ll on GitHub:
286 |         https://github.com/rvrsh3ll/FindFrontableDomains
287 | 
288 |         Parameters:
289 |         domain      The domain or subdomain to check
290 |         """
291 |         domain = domain.strip()
292 |         try:
293 |             # Get the A record(s) for the domain
294 |             query = self.dns_toolkit.get_dns_record(domain,"A")
295 |             # Look for records matching known CDNs
296 |             for item in query.response.answer:
297 |                 for text in item.items:
298 |                     target = text.to_text()
299 |                     if "s3.amazonaws.com" in target:
300 |                         return "S3 Bucket: {}".format(target)
301 |                     if "cloudfront" in target:
302 |                         return "Cloudfront: {}".format(target)
303 |                     elif "appspot.com" in target:
304 |                         return "Google: {}".format(target)
305 |                     elif "googleplex.com" in target:
306 |                         return "Google: {}".format(target)
307 |                     elif "msecnd.net" in target:
308 |                         return "Azure: {}".format(target)
309 |                     elif "aspnetcdn.com" in target:
310 |                         return "Azure: {}".format(target)
311 |                     elif "azureedge.net" in target:
312 |                         return "Azure: {}".format(target)
313 |                     elif "a248.e.akamai.net" in target:
314 |                         return "Akamai: {}".format(target)
315 |                     elif "secure.footprint.net" in target:
316 |                         return "Level 3: {}".format(target)
317 |                     elif "cloudflare" in target:
318 |                         return "Cloudflare: {}".format(target)
319 |                     elif "unbouncepages.com" in target:
320 |                         return "Unbounce: {}".format(target)
321 |                     elif "secure.footprint.net" in target:
322 |                         return "Level 3: {}".format(target)
323 |                     else:
324 |                         return False
325 |         except Exception:
326 |             return False
327 |     
328 |     def check_domain_takeover(self,domain):
329 |         """Check the web response for a domain and compare it against fingerprints to identify
330 |         responses that could indicate a domain takeover is possible.
331 | 
332 |         Parameters:
333 |         domain      The domain or subdomain to check
334 |         """
335 |         domain = domain.strip()
336 |         try:
337 |             session = requests.session()
338 |             request = session.get('https://' + domain.strip(),verify=False,timeout=10)
339 |             for indentifier in self.fingerprints:
340 |                 for item in indentifier['response']:
341 |                     take_reg = re.compile(item)
342 |                     temp = take_reg.findall(request.text)
343 |                     if temp != []:
344 |                         return indentifier['name'].capitalize()
345 |         except Exception as e:
346 |                 pass
347 |         return False
348 | 


--------------------------------------------------------------------------------
/lib/typosquat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains all of tools and functions used for generating lookalike domain names,
  6 | determining if they are registered, and then deermining if the registered domains have been
  7 | linked to malicious activity.
  8 | 
  9 | This was once based on URLCrazy, but reliance on a Ruby utility was undesirable and could
 10 | return way too many results for certain domains. This is now based on Jon Oberhide's script
 11 | for generating lookalike domain names:
 12 | 
 13 | https://github.com/duo-labs/lookalike-domains
 14 | """
 15 | 
 16 | import os
 17 | import sys
 18 | import operator
 19 | 
 20 | import whois
 21 | import click
 22 | import requests
 23 | from cymon import Cymon
 24 | from xml.etree import ElementTree as ET
 25 | 
 26 | from lib import helpers,dns
 27 | 
 28 | 
 29 | class TypoCheck(object):
 30 |     """A class containing the tools for generating lookalike domain names and performing some
 31 |     checks to see if they have been registered and if the registered domains have been flagged
 32 |     for any suspicious or malicious activities.
 33 |     """
 34 |     dns_collector = dns.DNSCollector()
 35 |     # The number of lookalike domains that will checked before stopping
 36 |     domain_limit = 30
 37 |     # Timeout, in seconds, used for web requests
 38 |     requests_timeout = 15
 39 | 
 40 |     def __init__(self):
 41 |         """Everything that should be initiated with a new object goes here."""
 42 |         # Collect the API keys from the config file
 43 |         try:
 44 |             self.cymon_api_key = helpers.config_section_map("Cymon")["api_key"]
 45 |             self.cymon_api = Cymon(self.cymon_api_key)
 46 |         except Exception:
 47 |             self.cymon_api = Cymon()
 48 |             click.secho("[!] Did not find a Cymon API key.",fg="yellow")
 49 |         try:
 50 |             self.urlvoid_api_key = helpers.config_section_map("URLVoid")["api_key"]
 51 |         except Exception:
 52 |             self.urlvoid_api_key = ""
 53 |             click.secho("[!] Did not find a URLVoid API key.",fg="yellow")
 54 | 
 55 |     def find_all(self,needle,haystack):
 56 |         """Search for the needle in the haystack.
 57 |         
 58 |         Parameters:
 59 |         haystack    The item to search 
 60 |         needle      The item to search for
 61 |         """
 62 |         start = 0
 63 |         while True:
 64 |             start = haystack.find(needle,start)
 65 |             if start == -1: return
 66 |             yield start
 67 |             start += len(needle)
 68 | 
 69 |     def quadrantize(self,pos,domain):
 70 |         """Used to modify the final rank of the lookalike domain.
 71 | 
 72 |         Parameters:
 73 |         pos         Position
 74 |         domain      The lookalike domain being ranked
 75 |         """
 76 |         pos = pos + 1
 77 |         chunk = len(domain)/4.0
 78 |         if pos <= chunk*1:
 79 |             return 1
 80 |         elif pos <= chunk*2:
 81 |             return 2
 82 |         elif pos <= chunk*3:
 83 |             return 3
 84 |         else:
 85 |             return 4
 86 | 
 87 |     def generate_homoglyphs(self,domain,naked,tld):
 88 |         """Generate and rank the homoglyph replacements in the domain name.
 89 | 
 90 |         Parameters:
 91 |         domain      The domain name being analyzed
 92 |         naked       The "naked" domain name (name without the TLD) being analyzed
 93 |         tld         The top level domain (TLD) of the domain name being analyzed
 94 |         """
 95 |         # Replacements with rankings
 96 |         replacers = [
 97 |             ('rn','m',1.0), # rn and m are high!
 98 |             ('l','t',1.0),  # l and t are high!
 99 |             ('r','i',0.6),  # r and i are medium, if you squint!
100 |             ('n','m',0.6),  # n and m are medium
101 |             ('d','cl',0.6), # d and cl are medium, spacing stands out
102 |             ('vv','w',0.6), # vv and w are medium, spacing stands out
103 |             ('l','i',0.3),  # l and i are medium/low, l is too tall and stands out
104 |             ('j','i',0.3),  # j and i are low, j sticks out below
105 |             ('l','1',0.3),  # l and 1 are low, 1 stands out due to width 
106 |             ('o','c',0.3),  # o and c are low
107 |             ('u','v',0.3),  # u and v are low
108 |             ('nn','m',0.3), # nn and m are low
109 |         ]
110 |         # Favor replacements that occur towards the middle of the domain
111 |         # Ddd the quadrant rank to the replacement rank to favor the ranking
112 |         # Quadrant rank order: 3rd, 2nd, 4th, 1st
113 |         quadrant_rank = {
114 |             1:0.01,
115 |             2:0.03,
116 |             3:0.04,
117 |             4:0.02,
118 |         }
119 |         domains = []
120 |         replacements = []
121 |         # Find all the candidate replacements
122 |         for search,replace,rank in replacers:
123 |             for pos in list(self.find_all(search,naked)):
124 |                 replacements.append((search,replace,pos,rank))
125 |             for pos in list(self.find_all(replace,naked)):
126 |                 replacements.append((replace,search,pos,rank))
127 |         # First pass of single replacements
128 |         for find,replace,pos,rank in replacements:
129 |             candidate = naked[:pos] + replace + naked[pos+len(find):]
130 |             final_rank = rank + quadrant_rank[self.quadrantize(pos,naked)]
131 |             domains.append(('%s.%s' % (candidate,tld),final_rank))
132 |         # TODO: second pass of multiple replacements to provide more quantity
133 |         # We could also do alternate TLDs with single pass
134 |         return domains
135 | 
136 |     def generate_alt_tlds(self,domain,naked,tld):
137 |         """Generate and rank the alternate TLD replacements for the domain name.
138 | 
139 |         Parameters:
140 |         domain      The domain name being analyzed
141 |         naked       The "naked" domain name (name without the TLD) being analyzed
142 |         tld         The top level domain (TLD) of the domain name being analyzed
143 |         """
144 |         # Preferred TLDs: .com, .net, .org, .biz, .company
145 |         # Note: The .co and .cm TLDs are not supported by Route 53:
146 |         #   https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/registrar-tld-list.html#C
147 |         domains = []
148 |         alt_tlds = [
149 |             ('com',1.0),
150 |             ('co',0.9),
151 |             ('cm',0.9),
152 |             ('net',0.8),
153 |             ('org',0.8),
154 |             ('io',0.5),
155 |             ('biz',0.5),
156 |             ('company',0.5),
157 |         ]
158 |         for alt_tld,rank in alt_tlds:
159 |             alt = '%s.%s' % (naked,alt_tld)
160 |             domains.append((alt,rank))
161 |         return domains
162 | 
163 |     def generate_suffixes(self,domain,naked,tld):
164 |         """Generate and rank the domain name with added suffixes and prefixes.
165 | 
166 |         Parameters:
167 |         domain      The domain name being analyzed
168 |         naked       The "naked" domain name (name without the TLD) being analyzed
169 |         tld         The top level domain (TLD) of the domain name being analyzed
170 |         """
171 |         # Preferred fixes: -secure, -login, -logon, -secure-login, -secure-logon
172 |         domains = []
173 |         suffixes = [
174 |             ('-secure',0.8),
175 |             ('-login',0.6),
176 |             ('-logon',0.6),
177 |             ('-secure-login',0.4),
178 |             ('-secure-logon',0.4),
179 |         ]
180 |         for suffix, rank in suffixes:
181 |             alt = '%s%s.%s' % (naked, suffix, tld)
182 |             domains.append((alt, rank))
183 |         prefixes = [
184 |             ('secure-',0.7),
185 |             ('login-',0.5),
186 |             ('logon-',0.5),
187 |             ('secure-login-',0.3),
188 |             ('secure-logon-',0.3),
189 |         ]
190 |         for prefix, rank in prefixes:
191 |             alt = '%s%s.%s' % (prefix,naked,tld)
192 |             domains.append((alt,rank))
193 |         return domains
194 | 
195 |     def check_availability(self,domain):
196 |         """Check whether or not the domain is registered.
197 | 
198 |         Parameters:
199 |         domain      The domain name to be checked
200 |         """
201 |         try:
202 |             who = whois.whois(domain)
203 |             if who['status']:
204 |                 return True
205 |             else:
206 |                 return False
207 |         except:
208 |             return False
209 | 
210 |     def run_domain_twister(self,domain,limit=domain_limit):
211 |         """Generate lookalike domain names for the given domain. Then confirm if the domains have
212 |         been registered and collect DNS records. The domain names and IP addresses are checked
213 |         against Cymon.io's threat feeds. If a result is found (200 OK), then the domain or IP has
214 |         been reported to be part of some sort of malicious activity relatively recently.
215 | 
216 |         This function returns a list of domains, A-records, MX-records, and the results from Cymon.
217 | 
218 |         A Cymon API key is recommended, but not required.
219 | 
220 |         Parameters:
221 |         domain      The base domain used for generating lookalike domains
222 |         limit       An upper limit on how many lookalike domain names to check
223 |         """
224 |         naked,_,tld = domain.rpartition('.')
225 |         domain_data = []
226 |         candidates = {
227 |             'alt_tlds':self.generate_alt_tlds(domain,naked,tld),
228 |             'homoglyphs':self.generate_homoglyphs(domain,naked,tld),
229 |             'suffixes':self.generate_suffixes(domain,naked,tld),
230 |         }
231 |         for kind in candidates.keys():
232 |             # Sort by the ranking score before checking availability
233 |             ranked = sorted(candidates[kind],key=operator.itemgetter(1),reverse=True)
234 |             for name,rank in ranked:
235 |                 # If the domain is registered, collect the DNS records for further analysis
236 |                 if self.check_availability(name):
237 |                     try:
238 |                         a_records = self.dns_collector.return_dns_record_list(name,"A")
239 |                     except:
240 |                         a_records = "None"
241 |                     try:
242 |                         mx_records = self.dns_collector.return_dns_record_list(name,"MX")
243 |                     except:
244 |                         mx_records = "None"
245 |                 else:
246 |                     a_records = "None"
247 |                     mx_records = "None"
248 |                 data = {'name':name,'rank':rank,'a_records':a_records,'mx_records':mx_records}
249 |                 domain_data.append(data)
250 |                 if len(domain_data) >= int(limit):
251 |                     break
252 |         twister_results = []
253 |         for candidate in domain_data:
254 |             # Search for domains and IP addresses tied to the domain name
255 |             try:
256 |                 results = self.search_cymon_domain(candidate['name'])
257 |                 if results:
258 |                     malicious_domain = 1
259 |                 else:
260 |                     malicious_domain = 0
261 |             except Exception as error:
262 |                 malicious_domain = 0
263 |                 click.secho("\n[!] There was an error checking {} with Cymon.io!".format(candidate['name']),fg="red")
264 |             # Search for domains and IP addresses tied to the A-record IP
265 |             for record in candidate['a_records']:
266 |                 try:
267 |                     results = self.search_cymon_ip(record)
268 |                     if results:
269 |                         malicious_ip = 1
270 |                     else:
271 |                         malicious_ip = 0
272 |                 except Exception as error:
273 |                     malicious_ip = 0
274 |                     click.secho("\n[!] There was an error checking {} with Cymon.io!".format(domain[1]),fg="red")
275 |             if malicious_domain == 1:
276 |                 cymon_result = "Yes"
277 |             elif malicious_ip == 1:
278 |                 cymon_result = "Yes"
279 |             else:
280 |                 cymon_result = "No"
281 |             temp = {}
282 |             temp['domain'] = candidate['name']
283 |             temp['rank'] = candidate['rank']
284 |             temp['a_records'] = candidate['a_records']
285 |             temp['mx_records'] = candidate['mx_records']
286 |             temp['malicious'] = cymon_result
287 |             twister_results.append(temp)
288 |         return twister_results
289 | 
290 |     def run_urlvoid_lookup(self,domain):
291 |         """Collect reputation data from URLVoid for the target domain. This returns an ElementTree
292 |         object.
293 | 
294 |         A URLVoid API key is required.
295 | 
296 |         Parameters:
297 |         domain      The domain name to check with URLVoid
298 |         """
299 |         if not helpers.is_ip(domain):
300 |             try:
301 |                 if self.urlvoid_api_key != "":
302 |                     url = "http://api.urlvoid.com/api1000/{}/host/{}".format(self.urlvoid_api_key,domain)
303 |                     response = requests.get(url,timeout=self.requests_timeout)
304 |                     tree = ET.fromstring(response.content)
305 |                     return tree
306 |                 else:
307 |                     click.secho("[*] No URLVoid API key, so skipping this test.",fg="green")
308 |                     return None
309 |             except Exception as error:
310 |                 click.secho("[!] Could not load URLVoid for reputation check!",fg="red")
311 |                 click.secho("L.. Details: {}".format(error),fg="red")
312 |                 return None
313 |         else:
314 |             click.secho("[!] Target is not a domain, so skipping URLVoid queries.",fg="red")
315 | 
316 |     def search_cymon_ip(self,ip_address):
317 |         """Get reputation data from Cymon.io for target IP address. This returns two dictionaries
318 |         for domains and security events.
319 | 
320 |         A Cymon API key is not required, but is recommended.
321 | 
322 |         Parameters:
323 |         ip_address  The IP address to check with Cymon
324 |         """
325 |         try:
326 |             # Search for IP and domains tied to the IP
327 |             data = self.cymon_api.ip_domains(ip_address)
328 |             domains_results = data['results']
329 |             # Search for security events for the IP
330 |             data = self.cymon_api.ip_events(ip_address)
331 |             ip_results = data['results']
332 |             return domains_results,ip_results
333 |         except Exception:
334 |             # click.secho("[!] Cymon.io returned a 404 indicating no results.",fg="red")
335 |             return None
336 | 
337 |     def search_cymon_domain(self,domain):
338 |         """Get reputation data from Cymon.io for target domain. This returns a dictionary for
339 |         the IP addresses tied to the domain.
340 | 
341 |         A Cymon API key is not required, but is recommended.
342 | 
343 |         Parameters:
344 |         domain      The domain name to check with Cymon
345 |         """
346 |         try:
347 |             # Search for domains and IP addresses tied to the domain
348 |             results = self.cymon_api.domain_lookup(domain)
349 |             return results
350 |         except Exception:
351 |             # click.secho("[!] Cymon.io returned a 404 indicating no results.",fg="red")
352 |             return None
353 | 


--------------------------------------------------------------------------------
/lib/grapher.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains functions and Cypher queries necessary to convert the provided SQLite3
  6 | database to a Neo4j graph database. This module can also be run independently to convert a
  7 | SQLite3 database at a later time.
  8 | """
  9 | 
 10 | import sys
 11 | import sqlite3
 12 | 
 13 | import click
 14 | from neo4j.v1 import GraphDatabase
 15 | 
 16 | # Try importing helpers.py two different ways to allow for grapher.py to be executed independently
 17 | try:
 18 |     from lib import helpers
 19 | except:
 20 |     import helpers
 21 | 
 22 | 
 23 | class Grapher(object):
 24 |     """A class for converting the ODIN SQLite3 database to a Neo4j graph database."""
 25 | 
 26 |     def __init__(self,database_path):
 27 |         """Everything that should be initiated with a new object goes here.
 28 |         
 29 |         Parameters:
 30 |         database_path   The full filepath to the SQLite3 database
 31 |         """
 32 |         # Connect to the SQLite3 database
 33 |         try:
 34 |             self.conn = sqlite3.connect(database_path)
 35 |             self.c = self.conn.cursor()
 36 |             self.neo4j_driver = helpers.setup_gdatabase_conn()
 37 |         except Exception as error:
 38 |                 click.secho("[!] Could not open the database file!",fg="red")
 39 |                 click.secho("L.. Details: {}".format(error),fg="red")
 40 | 
 41 |     def _graph_company(self):
 42 |         """Create nodes for the organization names and link them to domains based on WHOIS records
 43 |         and Full Contact API results.
 44 |         """
 45 |         org_names = []
 46 |         try:
 47 |             self.c.execute("SELECT company_name,website,website_overview,employees,year_founded FROM company_info")
 48 |             company_info = self.c.fetchone()
 49 |             org_names.append(company_info[0])
 50 |         except:
 51 |             pass
 52 |         org_names = set(org_names)
 53 |         if len(org_names) > 0:
 54 |             for org in org_names:
 55 |                 query = """
 56 |                 MERGE (x:Organization {Name:"%s"})
 57 |                 RETURN x
 58 |                 """ % (org)
 59 |                 helpers.execute_query(self.neo4j_driver,query)
 60 |         else:
 61 |             query = """
 62 |             MERGE (x:Organization {Name:"Target"})
 63 |             RETURN x
 64 |             """
 65 |             helpers.execute_query(self.neo4j_driver,query)
 66 | 
 67 |         if company_info:
 68 |             query = """
 69 |             MATCH (x:Organization {Name:'%s'})
 70 |             SET x += {Website:'%s', WebsiteOverview:"%s", Employees:'%s', YearFounded:'%s'}
 71 |             RETURN x
 72 |             """% (company_info[0],company_info[1],company_info[2],company_info[3],company_info[4])
 73 |             helpers.execute_query(self.neo4j_driver,query)
 74 | 
 75 |         for org in org_names:
 76 |             if len(org_names) == 1:
 77 |                 # Associate the domain nodes with the organization
 78 |                 query = """
 79 |                 MATCH (d:Domain)
 80 |                 SET d += {Organization:'%s'}
 81 |                 RETURN d
 82 |                 """% (org)
 83 |                 helpers.execute_query(self.neo4j_driver,query)
 84 |                 # Create the relationships between the organization node and the domain nodes
 85 |                 query = """
 86 |                 MATCH (o:Organization {Name:"%s"})
 87 |                 MATCH (d:Domain) WHERE d.Organization="%s"
 88 |                 MERGE (o)-[r:OWNS]->(d)
 89 |                 RETURN o,r,d
 90 |                 """% (org,org)
 91 |                 helpers.execute_query(self.neo4j_driver,query)            
 92 |             else:
 93 |                 query = """
 94 |                 MATCH (o:Organization {Name:"%s"})
 95 |                 MATCH (d:Domain) WHERE d.Organization="%s"
 96 |                 MERGE (o)-[r:OWNS]->(d)
 97 |                 RETURN o,r,d
 98 |                 """% (org,org)
 99 |                 helpers.execute_query(self.neo4j_driver,query)
100 | 
101 |     def _graph_hosts(self):
102 |         """Convert the hosts table into Neo4j graph nodes."""
103 |         self.c.execute("SELECT host_address,in_scope_file,source FROM hosts")
104 |         all_hosts = self.c.fetchall()
105 |         with click.progressbar(all_hosts,
106 |                                label="Creating Domain and IP nodes",
107 |                                length=len(all_hosts)) as bar:
108 |             for row in bar:
109 |                 if row[1] == 0:
110 |                     scoped = False
111 |                 else:
112 |                     scoped = True
113 |                 if helpers.is_ip(row[0]):
114 |                     query = """
115 |                     MERGE (x:IP {Address:'%s', Scoped:'%s', Source:'%s'})
116 |                     RETURN x
117 |                     """% (row[0],scoped,row[2])
118 |                     helpers.execute_query(self.neo4j_driver,query)
119 |                 else:
120 |                     query = """
121 |                     MERGE (x:Domain {Name:'%s', Scoped:'%s', Source:'%s'})
122 |                     RETURN x
123 |                     """ % (row[0],scoped,row[2])
124 |                     helpers.execute_query(self.neo4j_driver,query)
125 | 
126 |     def _graph_subdomains(self):
127 |         """Convert the subdomains table into Neo4j graph nodes with relationships to the domain
128 |         and other subdomain nodes.
129 |         """
130 |         self.c.execute("SELECT domain,subdomain,ip_address,domain_frontable FROM subdomains")
131 |         all_subdomains = self.c.fetchall()
132 |         with click.progressbar(all_subdomains,
133 |                                label="Creating Subdomain nodes",
134 |                                length=len(all_subdomains)) as bar:
135 |             # Enforce unique nodes for subdomains
136 |             query = """
137 |             CREATE CONSTRAINT ON (a:Subdomain) ASSERT a.Name IS UNIQUE
138 |             """
139 |             helpers.execute_query(self.neo4j_driver,query)
140 |             # Loop over each subdomain to create nodes
141 |             for row in bar:
142 |                 # Start with the full domain info and then split it apart
143 |                 # If we have a subdomain of a subdomain we want to create that relationship
144 |                 base_domain = row[0]
145 |                 subdomain = row[1]
146 |                 partial_subdomain = '.'.join(subdomain.split('.')[1:])
147 |                 ip_address = row[2]
148 |                 domain_frontable = row[3]
149 |                 # Create the subdomain node
150 |                 query = """
151 |                 MERGE (x:Subdomain {Name:'%s'})
152 |                 ON CREATE SET x.Address = "%s", x.DomainFrontable = '%s'
153 |                 ON MATCH SET x.Address = "%s", x.DomainFrontable = '%s'
154 |                 """ % (subdomain,ip_address,domain_frontable,ip_address,domain_frontable)
155 |                 helpers.execute_query(self.neo4j_driver,query)
156 |                 # Check if the partial subdomain is the base domain
157 |                 if partial_subdomain == base_domain:
158 |                     query = """
159 |                     MATCH (b:Domain {Name:'%s'})
160 |                     MERGE (a:Subdomain {Name:'%s'})
161 |                     ON CREATE SET a.Address = "%s", a.DomainFrontable = '%s'
162 |                     MERGE (c:IP {Address:"%s"})
163 |                     MERGE (c)<-[r1:RESOLVES_TO]-(a)<-[r2:HAS_SUBDOMAIN]-(b)
164 |                     RETURN a,b,c
165 |                     """ % (base_domain,subdomain,ip_address,domain_frontable,ip_address)
166 |                     helpers.execute_query(self.neo4j_driver,query)
167 |                 # If not, the subdomain is a subdomain of another subdomain, so create that relationship
168 |                 else:
169 |                     query = """
170 |                     MERGE (a:Subdomain {Name:'%s'})
171 |                     ON CREATE SET a.Address = "%s", a.DomainFrontable = '%s'
172 |                     MERGE (b:Subdomain {Name:'%s'})
173 |                     MERGE (c:IP {Address:"%s"})
174 |                     MERGE (c)<-[r1:RESOLVES_TO]-(a)<-[r2:HAS_SUBDOMAIN]-(b)
175 |                     RETURN a,b,c
176 |                     """ % (subdomain,ip_address,domain_frontable,partial_subdomain,ip_address)
177 |                     helpers.execute_query(self.neo4j_driver,query)
178 | 
179 |     def _graph_certificates(self):
180 |         """Convert the certificates table into Neo4j graph nodes with relationships to the domain
181 |         nodes.
182 |         """
183 |         self.c.execute("SELECT host,subject,issuer,start_date,expiration_date,self_signed,signature_algo,censys_fingerprint,alternate_names FROM certificates")
184 |         all_certificates = self.c.fetchall()
185 |         with click.progressbar(all_certificates,
186 |                                label="Creating Certificate nodes",
187 |                                length=len(all_certificates)) as bar:
188 |             for row in bar:
189 |                 if row[5]:
190 |                     self_signed = False
191 |                 else:
192 |                     self_signed = True
193 |                 query = """
194 |                 CREATE (a:Certificate {Subject:"%s", Issuer:"%s", StartDate:"%s", ExpirationDate:"%s", SelfSigned:"%s", SignatureAlgo:"%s", CensysFingerprint:"%s"})
195 |                 RETURN a
196 |                 """ % (row[1],row[2],row[3],row[4],self_signed,row[6],row[7])
197 |                 helpers.execute_query(self.neo4j_driver,query)
198 |                 alt_names = row[8].split(",")
199 |                 for name in alt_names:
200 |                     query = """
201 |                     MERGE (a:Subdomain {Name:"%s"})
202 |                     MERGE (b:Certificate {CensysFingerprint:"%s"})
203 |                     MERGE (a)<-[r:ISSUED_FOR]-(b)
204 |                     """ % (name.strip(),row[7])
205 |                     helpers.execute_query(self.neo4j_driver,query)
206 |                     query = """
207 |                     MERGE (a:Domain {Name:"%s"})
208 |                     MERGE (b:Certificate {CensysFingerprint:"%s"})
209 |                     MERGE (a)<-[r:ISSUED_FOR]-(b)
210 |                     """ % (name.strip(),row[7])
211 |                     helpers.execute_query(self.neo4j_driver,query)
212 | 
213 |     def _update_dns(self):
214 |         """Update domain nodes with DNS information."""
215 |         self.c.execute("SELECT domain,ns_record,a_record,mx_record,txt_record,soa_record,dmarc,vulnerable_cache_snooping FROM dns")
216 |         dns_data = self.c.fetchall()
217 | 
218 |         with click.progressbar(dns_data,
219 |                                label="Updating Domain nodes with DNS info",
220 |                                length=len(dns_data)) as bar:
221 |             for row in bar:
222 |                 query = """
223 |                 MATCH (a:Domain {Name:"%s"})
224 |                 SET a += {NameServers:"%s", Address:"%s", MXRecords:'%s', TXTRecords:'%s', SOARecords:'%s', DMARC:'%s'}
225 |                 RETURN a
226 |                 """ % (row[0],row[1],row[2],row[3],row[4],row[5],row[6])
227 |                 helpers.execute_query(self.neo4j_driver,query)
228 |                 for address in row[2].split(","):
229 |                     query = """
230 |                     MATCH (a:Domain {Name:'%s'})
231 |                     MATCH (b:IP {Address:'%s'})
232 |                     CREATE UNIQUE (a)-[r:RESOLVES_TO]->(b)
233 |                     RETURN a,r,b
234 |                     """ % (row[0],address)
235 |                     helpers.execute_query(self.neo4j_driver,query)
236 | 
237 |     def _update_rdap(self):
238 |         """Update host nodes with RDAP information."""
239 |         self.c.execute("SELECT ip_address,rdap_source,organization,network_cidr,asn,country_code,robtex_related_domains FROM rdap_data")
240 |         all_rdap = self.c.fetchall()
241 |         with click.progressbar(all_rdap,
242 |                                label="Updating IP nodes with RDAP info",
243 |                                length=len(all_rdap)) as bar:
244 |             for row in bar:
245 |                 query = """
246 |                 MATCH (a:IP {Address:'%s'})
247 |                 SET a += {RDAPSource:'%s', Organization:"%s", CIDR:'%s', ASN:'%s', CountryCode:'%s', RelatedDomains:'%s'}
248 |                 RETURN a
249 |                 """ % (row[0],row[1],row[2],row[3],row[4],row[5],row[6])
250 |                 helpers.execute_query(self.neo4j_driver,query)
251 | 
252 |     def _update_whois(self):
253 |         """Update domain nodes with WHOIS information."""
254 |         self.c.execute("SELECT domain,registrar,expiration,organization,registrant,admin_contact,tech_contact,address,dns_sec FROM whois_data")
255 |         all_whois = self.c.fetchall()
256 |         with click.progressbar(all_whois,
257 |                                label="Updating Domain nodes with WHOIS info",
258 |                                length=len(all_whois)) as bar:
259 |             for row in bar:
260 |                 query = """
261 |                 MATCH (a:Domain {Name:'%s'})
262 |                 SET a += {Registrar:"%s", Expiration:'%s', Organization:"%s", Registrant:"%s", Admin:"%s", Tech:"%s", ContactAddress:"%s", DNSSEC:'%s'}
263 |                 RETURN a
264 |                 """ % (row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8])
265 |                 helpers.execute_query(self.neo4j_driver,query)
266 | 
267 |     def _graph_shodan(self):
268 |         """Convert the Shodan tables with ports added as Neo4j graph nodes linked to hosts."""
269 |         self.c.execute("SELECT ip_address,port,banner_data,os,organization FROM shodan_host_lookup")
270 |         all_shodan_lookup = self.c.fetchall()
271 |         with click.progressbar(all_shodan_lookup,
272 |                                label="Creating Port nodes",
273 |                                length=len(all_shodan_lookup)) as bar:
274 |             for row in bar:
275 |                 query = """
276 |                 MATCH (a:IP {Address:'%s'})
277 |                 CREATE UNIQUE (b:Port {Number:'%s', OS:'%s', Organization:"%s", Hostname:''})<-[r:HAS_PORT]-(a)
278 |                 SET a.Organization = "%s"
279 |                 RETURN a,b
280 |                 """ % (row[0],row[1],row[3],row[4],row[4])
281 |                 helpers.execute_query(self.neo4j_driver,query)
282 | 
283 |         self.c.execute("SELECT domain,ip_address,port,banner_data,os,hostname FROM shodan_search")
284 |         all_shodan_search = self.c.fetchall()
285 |         with click.progressbar(all_shodan_search,
286 |                                label="Creating Port and IP relationships",
287 |                                length=len(all_shodan_search)) as bar:
288 |             for row in bar:
289 |                 query = """
290 |                 MATCH (a:Port)<-[:HAS_PORT]-(b:IP {Address:'%s'})
291 |                 SET a.Hostname = "%s"
292 |                 RETURN a
293 |                 """ % (row[1],row[5])
294 |                 helpers.execute_query(self.neo4j_driver,query)
295 | 
296 |     def convert(self):
297 |         """Executes the necessary Neo4j queries to convert a complete ODIN SQLite3 database to a
298 |         Neo4j graph database.
299 |         """
300 |         self._graph_hosts()
301 |         self._graph_subdomains()
302 |         self._graph_certificates()
303 |         self._update_dns()
304 |         self._update_whois()
305 |         self._update_rdap()
306 |         self._graph_shodan()
307 |         self._graph_company()
308 | 
309 |     def clear_neo4j_database(self):
310 |         """Clear the current Neo4j database by detaching and deleting all nodes."""
311 |         query = "MATCH (n) DETACH DELETE n"
312 |         helpers.execute_query(self.neo4j_driver,query)
313 | 
314 | 
315 | # Setup a class for CLICK
316 | class AliasedGroup(click.Group):
317 |     """Allows commands to be called by their first unique character."""
318 | 
319 |     def get_command(self,ctx,cmd_name):
320 |         """
321 |         Allows commands to be called by their first unique character
322 |             :param ctx: Context information from click
323 |             :param cmd_name: Calling command name
324 |             :return:
325 |         """
326 |         command = click.Group.get_command(self,ctx,cmd_name)
327 |         if command is not None:
328 |             return command
329 |         matches = [x for x in self.list_commands(ctx)
330 |                    if x.startswith(cmd_name)]
331 |         if not matches:
332 |             return None
333 |         elif len(matches) == 1:
334 |             return click.Group.get_command(self,ctx,matches[0])
335 |         ctx.fail("Too many matches: %s" % ", ".join(sorted(matches)))
336 | 
337 | 
338 | # That's right, we support -h and --help! Not using -h for an argument like 'host'! ;D
339 | CONTEXT_SETTINGS = dict(help_option_names=["-h","--help"])
340 | @click.command(context_settings=CONTEXT_SETTINGS)
341 | # Declare our CLI options
342 | @click.option('-d','--database',help="The path to your completed ODIN database file.",\
343 |               type=click.Path(exists=True,readable=True,resolve_path=True),required=True)
344 | @click.option('--nuke',is_flag=True,help="Nuke the Neo4j database to start over. This destroys \
345 | ALL data to start fresh.")
346 | 
347 | def visualize(database,nuke):
348 |     click.secho("[+] Loading ODIN database file {}".format(database),fg="green")
349 |     graph = Grapher(database)
350 |     if nuke:
351 |         if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \
352 | fresh start. Proceed?",fg="red"),default=True):
353 |             graph.clear_neo4j_database()
354 |             click.secho("[+] Database successfully wiped!\n",fg="green")
355 |         else:
356 |             click.secho("[!] Then remove the --nuke flag and try again...",fg="red")
357 |             exit()
358 |     graph.convert()
359 |     click.secho("\n[+] Data successfully graphed!",fg="green")
360 | 
361 | if __name__ == "__main__":
362 |     visualize()
363 | 


--------------------------------------------------------------------------------
/odin.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 |  :::====   :::====   :::  :::= ===
  6 |  :::  ===  :::  ===  :::  :::=====
  7 |  ===  ===  ===  ===  ===  ========
  8 |  ===  ===  ===  ===  ===  === ====
  9 |   ======   =======   ===  ===  ===
 10 | 
 11 | Developer:   Chris "cmaddy" Maddalena
 12 | Version:     2.0.0 "Huginn"
 13 | Description: Observation, Detection, and Investigation of Networks
 14 |              ODIN was designed to assist with OSINT automation for penetration testing clients and
 15 |              their networks, both the types with IP address and social. Provide a client's name and
 16 |              some domains to gather information from sources like RDAP, DNS, Shodan, and
 17 |              so much more.
 18 | 
 19 |              ODIN is made possible through the help, input, and work provided by others. Therefore,
 20 |              this project is entirely open source and available to all to use/modify.
 21 | """
 22 | 
 23 | import os
 24 | 
 25 | import click
 26 | from multiprocess import Process,Manager
 27 | 
 28 | from lib import reporter,asciis,verification,htmlreporter,grapher,helpers
 29 | 
 30 | 
 31 | VERSION = "2.0.0"
 32 | CODENAME = "HUGINN"
 33 | 
 34 | 
 35 | def setup_reports(client):
 36 |     """Function to create a reports directory structure for the target organization."""
 37 |     if not os.path.exists("reports/{}".format(client)):
 38 |         try:
 39 |             os.makedirs("reports/{}".format(client))
 40 |             os.makedirs("reports/{}/screenshots".format(client))
 41 |             os.makedirs("reports/{}/file_downloads".format(client))
 42 |             os.makedirs("reports/{}/html_report".format(client))
 43 |         except OSError as error:
 44 |             click.secho("[!] Could not create the reports directory!",fg="red")
 45 |             click.secho("L.. Details: {}".format(error),fg="red")
 46 | 
 47 | 
 48 | # Setup a class for CLICK
 49 | class AliasedGroup(click.Group):
 50 |     """Allows commands to be called by their first unique character."""
 51 | 
 52 |     def get_command(self,ctx,cmd_name):
 53 |         """
 54 |         Allows commands to be called by their first unique character
 55 |             :param ctx: Context information from click
 56 |             :param cmd_name: Calling command name
 57 |             :return:
 58 |         """
 59 |         command = click.Group.get_command(self,ctx,cmd_name)
 60 |         if command is not None:
 61 |             return command
 62 |         matches = [x for x in self.list_commands(ctx)
 63 |                    if x.startswith(cmd_name)]
 64 |         if not matches:
 65 |             return None
 66 |         elif len(matches) == 1:
 67 |             return click.Group.get_command(self,ctx,matches[0])
 68 |         ctx.fail("Too many matches: %s" % ", ".join(sorted(matches)))
 69 | 
 70 | 
 71 | # That's right, we support -h and --help! Not using -h for an argument like 'host'! ;D
 72 | CONTEXT_SETTINGS = dict(help_option_names=['-h','--help'],max_content_width=200)
 73 | @click.group(cls=AliasedGroup,context_settings=CONTEXT_SETTINGS)
 74 | 
 75 | # Note: The following function descriptors will look weird and some will contain '\n' in spots.
 76 | # This is necessary for CLICK. These are displayed with the help info and need to be written
 77 | # just like we want them to be displayed in the user's terminal. Whitespace really matters.
 78 | 
 79 | def odin():
 80 |     """
 81 | Welcome to ODIN! To use ODIN, select a module you wish to run. Functions are split into modules
 82 | to support a few different use cases.\n
 83 | Run 'odin.py <MODULE> --help' for more information on a specific module.
 84 |     """
 85 |     # Everything starts here
 86 |     pass
 87 | 
 88 | # The OSINT module -- This is the primary module that does all the stuff
 89 | # Basic, required arguments
 90 | @odin.command(name='osint',short_help="The full OSINT suite of tools will be run (see README).")
 91 | @click.option('-o','--organization',help='The target client, such as "ABC Company," to use for \
 92 | report titles and searches for domains and cloud storage buckets.',required=True)
 93 | @click.option('-d','--domain',help="The target's primary domain, such as example.com. Use \
 94 | whatever the target uses for email and their main website. Provide additional domains in a scope \
 95 | file using --scope-file.",required=True)
 96 | # Optional arguments
 97 | @click.option('-sf','--scope-file',type=click.Path(exists=True,readable=True,\
 98 | resolve_path=True),help="A text file containing additional domain names you want to include. IP \
 99 | addresses can also be provided, if necessary. List each one on a new line.",required=False)
100 | @click.option('--whoxy-limit',default=10,help="The maximum number of domains discovered via \
101 | reverse WHOIS that ODIN will resolve and use when searching services like Censys and Shodan. \
102 | You may get hundreds of results from reverse WHOIS, so this is intended to save time and \
103 | API credits. Default is 10 domains and setting it above maybe 20 or 30 is not recommended. \
104 | It is preferable to perform a search using a tool like Vincent Yiu's DomLink and then provide \
105 | the newly discovered domains in your scope file with --scope-file.")
106 | @click.option('--typo',is_flag=True,help="Generate a list of lookalike domain names for the \
107 | provided domain (--domain), check if they have been registered, and then check those domains \
108 | against URLVoid and Cymon.io to see if the domains or associated IP addresses have been \
109 | flagged as malicious.")
110 | # File searching arguments
111 | @click.option('--files',is_flag=True,help="Use this option to use Google to search for files \
112 | under the provided domain (--domain), download files, and extract metadata.")
113 | @click.option('-e','--ext',default="all",help="File extensions to look for with --file. \
114 | Default is 'all' or you can pick from key, pdf, doc, docx, xls, xlsx, and ppt.")
115 | # Cloud-related arguments
116 | @click.option('-w','--aws',help="A list of additional keywords to be used when searching for \
117 | cloud sotrage buckets.",type=click.Path(exists=True,readable=True,resolve_path=True))
118 | @click.option('-wf','--aws-fixes',help="A list of strings to be added to the start and end of \
119 | the cloud storage bucket names.",type=click.Path(exists=True,readable=True,resolve_path=True))
120 | # Reporting-related arguments
121 | @click.option('--html',is_flag=True,help="Create an HTML report at the end for easy browsing.")
122 | @click.option('--graph',is_flag=True,help="Create a Neo4j graph database from the completed \
123 | SQLite3 database.")
124 | @click.option('--nuke',is_flag=True,help="Clear the Neo4j project before converting the \
125 | database. This is only used with --graph.")
126 | @click.option('--screenshots',is_flag=True,help="Attempt to take screenshots of discovered \
127 | web services.")
128 | @click.option('--unsafe',is_flag=True,help="Adding this flag will spawn the headless Chrome \
129 | browser with the --no-sandbox command line flag. This is NOT recommended for any users who are \
130 | NOT running ODIN on a Kali Linux VM as root. Chrome will not run as the root user on Kali \
131 | without this option.")
132 | # Pass the above arguments on to your osint function
133 | @click.pass_context
134 | 
135 | def osint(self,organization,domain,files,ext,scope_file,aws,aws_fixes,html,
136 |           screenshots,graph,nuke,whoxy_limit,typo,unsafe):
137 |     """
138 | The OSINT toolkit:
139 | 
140 | This is ODIN's primary module. ODIN will take the tagret organization, domain, and other data
141 | provided and hunt for information. On the human side, ODIN looks for employee names,
142 | email addresses, and social media profiles. Names and emails are cross-referenced with
143 | HaveIBeenPwned, Twitter's API, and search engines to collect additional information.
144 | 
145 | ODIN also uses various tools and APIs to collect information on the provided IP addresses
146 | and domain names, including things like DNS and IP address history.
147 | 
148 | View the wiki for the full details, reporting information, and lists of API keys.
149 | 
150 | Note: If providing any IP addresses in a scope file, acceptable IP addresses/ranges include:
151 | 
152 |     * Single Address:      8.8.8.8
153 | 
154 |     * Basic CIDR:          8.8.8.0/24
155 | 
156 |     * Nmap-friendly Range: 8.8.8.8-10
157 | 
158 |     * Underscores? OK:     8.8.8.8_8.8.8.10
159 |     """
160 |     click.clear()
161 |     click.secho(asciis.print_art(),fg="magenta")
162 |     click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta")
163 |     click.secho("[+] OSINT Module Selected: ODIN will run all recon modules.",fg="green")
164 |     # Perform prep work for reporting
165 |     setup_reports(organization)
166 |     report_path = "reports/{}/".format(organization)
167 |     output_report = report_path + "OSINT_DB.db"
168 |     if __name__ == "__main__":
169 |         # Create manager server to handle variables shared between jobs
170 |         manager = Manager()
171 |         ip_list = manager.list()
172 |         domain_list = manager.list()
173 |         rev_domain_list = manager.list()
174 |         # Create reporter object and generate lists of everything, just IP addresses, and just domains
175 |         browser = helpers.setup_headless_chrome(unsafe)
176 |         report = reporter.Reporter(organization,report_path,output_report,browser)
177 |         report.create_tables()
178 |         scope,ip_list,domain_list = report.prepare_scope(ip_list,domain_list,scope_file,domain)
179 |         # Create some jobs and put Python to work!
180 |         # Job queue 1 is for the initial phase
181 |         jobs = []
182 |         # Job queue 2 is used for jobs using data from job queue 1
183 |         more_jobs = []
184 |         # Job queue 3 is used for jobs that take a while and use the progress bar, i.e. AWS enum
185 |         even_more_jobs = []
186 |         # Phase 1 jobs
187 |         company_info = Process(name="Company Info Collector",
188 |                                target=report.create_company_info_table,
189 |                                args=(domain,))
190 |         jobs.append(company_info)
191 |         employee_report = Process(name="Employee Hunter",
192 |                                   target=report.create_people_table,
193 |                                   args=(domain_list,rev_domain_list,organization))
194 |         jobs.append(employee_report)
195 |         domain_report = Process(name="Domain and IP Hunter",
196 |                                 target=report.create_domain_report_table,
197 |                                 args=(organization,scope,ip_list,domain_list,rev_domain_list,whoxy_limit))
198 |         jobs.append(domain_report)
199 |         # Phase 2 jobs
200 |         shodan_report = Process(name="Shodan Hunter",
201 |                                 target=report.create_shodan_table,
202 |                                 args=(ip_list,domain_list))
203 |         more_jobs.append(shodan_report)
204 |         if typo:
205 |             lookalike_report = Process(name="Lookalike Domain Reviewer",
206 |                                       target=report.create_lookalike_table,
207 |                                       args=(organization,domain))
208 |             more_jobs.append(lookalike_report)
209 |         if screenshots:
210 |             take_screenshots = Process(name="Screenshot Snapper",
211 |                                        target=report.capture_web_snapshots,
212 |                                        args=(report_path,browser))
213 |             more_jobs.append(take_screenshots)
214 |         if files:
215 |             files_report = Process(name="File Hunter",
216 |                                    target=report.create_metadata_table,
217 |                                    args=(domain,ext,report_path))
218 |             more_jobs.append(files_report)
219 |         # Phase 3 jobs
220 |         cloud_report = Process(name="Cloud Hunter",
221 |                                target=report.create_cloud_table,
222 |                                args=(organization,domain,aws,aws_fixes))
223 |         even_more_jobs.append(cloud_report)
224 |         # Process the lists of jobs in phases, starting with phase 1
225 |         click.secho("[+] Beginning initial discovery phase! This could take some time...",fg="green")
226 |         for job in jobs:
227 |             click.secho("[+] Starting new process: {}".format(job.name),fg="green")
228 |             job.start()
229 |         for job in jobs:
230 |             job.join()
231 |         # Wait for phase 1 and then begin phase 2 jobs
232 |         click.secho("[+] Initial discovery is complete! Proceeding with additional queries...",fg="green")
233 |         for job in more_jobs:
234 |             click.secho("[+] Starting new process: {}".format(job.name),fg="green")
235 |             job.start()
236 |         for job in more_jobs:
237 |             job.join()
238 |         # Wait for phase 2 and then begin phase 3 jobs
239 |         click.secho("[+] Final phase: checking the cloud and web services...",fg="green")
240 |         for job in even_more_jobs:
241 |             click.secho("[+] Starting new process: {}".format(job.name),fg="green")
242 |             job.start()
243 |         for job in even_more_jobs:
244 |             job.join()
245 |         # All jobs are done, so close out the SQLIte3 database connection
246 |         report.close_out_reporting()
247 |         click.secho("[+] Job's done! Your results are in {} and can be viewed and queried with \
248 | any SQLite browser.".format(output_report),fg="green")
249 |         # Perform additional tasks depending on the user's command line options
250 |         if graph:
251 |             graph_reporter = grapher.Grapher(output_report)
252 |             click.secho("[+] Loading ODIN database file {} for conversion to Neo4j".format(output_report),fg="green")
253 |             if nuke:
254 |                 if click.confirm(click.style("[!] You set the --nuke option. This wipes out all nodes for a \
255 | fresh start. Proceed?",fg="red"),default=True):
256 |                     try:
257 |                         graph_reporter.clear_neo4j_database()
258 |                         click.secho("[+] Database successfully wiped!\n",fg="green")
259 |                     except Exception as error:
260 |                         click.secho("[!] Failed to clear the database! Check the Neo4j console and \
261 | your configuration and try running grapher.py again.",fg="red")
262 |                         click.secho("L.. Details: {}".format(error),fg="red")
263 |                 else:
264 |                     click.secho("[!] You can convert your database to a graph database later. \
265 | Run lib/grapher.py with the appropriate options.",fg="red")
266 |                 try:
267 |                     graph_reporter.convert()
268 |                 except Exception as error:
269 |                     click.secho("[!] Failed to convert the database! Check the Neo4j console and \
270 | your configuration and try running grapher.py again.",fg="red")
271 |                     click.secho("L.. Details: {}".format(error),fg="red")
272 |         if html:
273 |             click.secho("\n[+] Creating the HTML report using {}.".format(output_report),fg="green")
274 |             try:
275 |                 html_reporter = htmlreporter.HTMLReporter(organization,report_path + "/html_report/",output_report)
276 |                 html_reporter.generate_full_report()
277 |             except Exception as error:
278 |                 click.secho("[!] Failed to create the HTML report!",fg="red")
279 |                 click.secho("L.. Details: {}".format(error),fg="red")
280 | 
281 | # The VERIFY module -- No OSINT, just a way to check a ownership of a list of IPs
282 | @odin.command(name='verify',short_help="This module assists with verifying ownership of a list \
283 | of IP addresses. This returns a csv file with SSL cert, WHOIS, and other data for verification.")
284 | @click.option('-o','--organization',help='The target client, such as "ABC Company," to use for \
285 | report titles and some keyword searches.',required=True)
286 | @click.option('-sf','--scope-file',help="Name of the file with your IP addresses.",\
287 |               type=click.Path(exists=True,readable=True,resolve_path=True),required=True)
288 | @click.option('-r','--report',default="Verification.csv",help="Output file (CSV) for the \
289 | findings.")
290 | # Pass the above arguments on to your verify function
291 | @click.pass_context
292 | 
293 | def verify(self,organization,scope_file,report):
294 |     """
295 | The Verify module:
296 | 
297 | Uses reverse DNS, ARIN, and SSL/TLS certificate information to help you verify ownership of a
298 | list of IP addresses.
299 | 
300 | This is only for verifying IP addresses. Domains may not have public ownership information
301 | available. Compare the IP ownership information from ARIN and certificate information to what
302 | you know about the presumed owner to determine ownership.
303 | 
304 | Acceptable IP addresses/ranges include:
305 | 
306 |     * Single Address:      8.8.8.8
307 | 
308 |     * Basic CIDR:          8.8.8.0/24
309 | 
310 |     * Nmap-friendly Range: 8.8.8.8-10
311 | 
312 |     * Underscores? OK:     8.8.8.8_8.8.8.10
313 |     """
314 |     click.secho(asciis.print_art(),fg="magenta")
315 |     click.secho("\tRelease v{}, {}".format(VERSION,CODENAME),fg="magenta")
316 |     click.secho("[+] Scope Verification Module Selected: ODIN will attempt to verify who owns \
317 | the provided IP addresses.",fg="green")
318 |     setup_reports(organization)
319 |     report_path = "reports/{}/{}".format(organization,report)
320 |     expanded_scope = []
321 |     results = {}
322 |     try:
323 |         verification.prepare_scope(scope_file,expanded_scope)
324 |         verification.perform_whois(expanded_scope,results)
325 |         verification.print_output(results,report_path)
326 |     except Exception as error:
327 |         click.secho("[!] Verification failed!",fg="red")
328 |         click.secho("L.. Details: {}".format(error),fg="red")
329 |     click.secho("[+] Job's done! Your identity report is in {}.".format(report_path),fg="green")
330 | 
331 | if __name__ == "__main__":
332 |     odin()
333 | 


--------------------------------------------------------------------------------
/lib/subdomains.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | This module contains everything needed to hunt for subdomains, including collecting certificate
  6 | data from Censys.io and crt.sh for a given domain name.
  7 | 
  8 | The original crt.sh code is from PaulSec's unofficial crt.sh API. That project can be
  9 | found here:
 10 | 
 11 | https://github.com/PaulSec/crt.sh
 12 | """
 13 | 
 14 | import re
 15 | import json
 16 | import base64
 17 | from time import sleep
 18 | 
 19 | import click
 20 | import requests
 21 | import censys.certificates
 22 | from bs4 import BeautifulSoup
 23 | 
 24 | from . import helpers
 25 | 
 26 | 
 27 | class CertSearcher(object):
 28 |     """Class for searching crt.sh and Censys.io for certificates and parsing the results."""
 29 |     # Set a timeout, in seconds, for the web requests
 30 |     requests_timeout = 10
 31 |     # The user-agent and endpoint URIs used for the web requests
 32 |     crtsh_base_uri = "https://crt.sh/?q={}&output=json"
 33 |     user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
 34 | 
 35 |     def __init__(self):
 36 |         """Everything that should be initiated with a new object goes here."""
 37 |         try:
 38 |             censys_api_id = helpers.config_section_map("Censys")["api_id"]
 39 |             censys_api_secret = helpers.config_section_map("Censys")["api_secret"]
 40 |             self.censys_cert_search = censys.certificates.CensysCertificates(api_id=censys_api_id,api_secret=censys_api_secret)
 41 |         except censys.base.CensysUnauthorizedException:
 42 |             self.censys_cert_search = None
 43 |             click.secho("[!] Censys reported your API information is invalid, so Censys searches will be skipped.",fg="yellow")
 44 |             click.secho("L.. You provided ID %s & Secret %s" % (censys_api_id,censys_api_secret),fg="yellow")
 45 |         except Exception as error:
 46 |             self.censys_cert_search = None
 47 |             click.secho("[!] Did not find a Censys API ID/secret.",fg="yellow")
 48 |             click.secho("L.. Details:  {}".format(error),fg="yellow")
 49 | 
 50 |     def search_crtsh(self,domain,wildcard=True):
 51 |         """Collect certificate information from crt.sh for the target domain name. This returns
 52 |         a JSON containing certificate information that includes the issuer, issuer and expiration
 53 |         dates, and the name.
 54 | 
 55 |         Parameters:
 56 |         domain     Domain to search for on crt.sh
 57 |         wildcard   Whether or not to prepend a wildcard to the domain (default: True)
 58 | 
 59 |         Return a list of objects, like so:
 60 |         {
 61 |             "issuer_ca_id": 16418,
 62 |             "issuer_name": "C=US, O=Let's Encrypt, CN=Let's Encrypt Authority X3",
 63 |             "name_value": "hatch.uber.com",
 64 |             "min_cert_id": 325717795,
 65 |             "min_entry_timestamp": "2018-02-08T16:47:39.089",
 66 |             "not_before": "2018-02-08T15:47:39"
 67 |         }
 68 |         """
 69 |         headers = {"User-Agent":self.user_agent}
 70 |         if wildcard:
 71 |             domain = "%25.{}".format(domain)
 72 |         try:
 73 |             req = requests.get(self.crtsh_base_uri.format(domain),headers=headers,timeout=self.requests_timeout)
 74 |             if req.ok:
 75 |                 try:
 76 |                     content = req.content.decode("utf-8")
 77 |                     data = json.loads("[{}]".format(content.replace('}{','},{')))
 78 |                     return data
 79 |                 except:
 80 |                     pass
 81 |         except requests.exceptions.Timeout:
 82 |             click.secho("\n[!] The connection to crt.sh timed out!",fg="red")
 83 |         except requests.exceptions.TooManyRedirects:
 84 |             click.secho("\n[!] The connection to crt.sh encountered too many redirects!",fg="red")
 85 |         except requests.exceptions.RequestException as error:
 86 |             click.secho("\n[!] The connection to crt.sh encountered an error!",fg="red")
 87 |             click.secho("L.. Details: {}".format(error),fg="red")
 88 |         return None
 89 | 
 90 |     def search_censys_certificates(self,target):
 91 |         """Collect certificate information from Censys for the target domain name. This returns
 92 |         a dictionary of certificate information that includes the issuer, subject, and a hash
 93 |         Censys uses for the /view/ API calls to fetch additional information.
 94 | 
 95 |         A Censys API key is required.
 96 | 
 97 |         Parameters
 98 |         target  The domain name, e.g. apple.com, to be looked-up with on Censys.
 99 |         """
100 |         if self.censys_cert_search is None:
101 |             pass
102 |         else:
103 |             try:
104 |                 # Use the `parsed.names` filter to avoid unwanted domains
105 |                 query = "parsed.names: %s" % target
106 |                 results = self.censys_cert_search.search(query,fields=['parsed.names',
107 |                           'parsed.signature_algorithm.name','parsed.signature.self_signed',
108 |                           'parsed.validity.start','parsed.validity.end','parsed.fingerprint_sha256',
109 |                           'parsed.subject_dn','parsed.issuer_dn'])
110 |                 return results
111 |             except censys.base.CensysRateLimitExceededException:
112 |                 click.secho("\n[!] Censys reports your account has run out of API credits.",fg="red")
113 |                 return None
114 |             except Exception as error:
115 |                 click.secho("\n[!] Error collecting Censys certificate data for {}.".format(target),fg="red")
116 |                 click.secho("L.. Details: {}".format(error),fg="red")
117 |                 return None
118 | 
119 |     def parse_cert_subdomain(self,subject_dn):
120 |         """Accepts the Censys certificate data and parses the individual certificate's domain.
121 | 
122 |         Parameters:
123 |         subject_dn  Accepts the subject_dn field from a Censys search result.
124 |         """
125 |         if "," in subject_dn:
126 |             pos = subject_dn.find('CN=')+3
127 |         else:
128 |             pos = 3
129 |         tmp = subject_dn[pos:]
130 |         if "," in tmp:
131 |             pos = tmp.find(",")
132 |             tmp = tmp[:pos]
133 |         return tmp
134 | 
135 |     def filter_subdomains(self,domain,subdomains):
136 |         """Filter out uninteresting domains that may be returned from certificates. These are
137 |         domains unrelated to the true target. For example, a search for blizzard.com on Censys
138 |         can return iran-blizzard.ir, an unwanted and unrelated domain.
139 | 
140 |         Credit to christophetd for this nice bit of code:
141 |         https://github.com/christophetd/censys-subdomain-finder/blob/master/censys_subdomain_finder.py#L31
142 | 
143 |         Parameters:
144 |         domain      The base domain to be used for filtering subdomains, e.g. apple.com
145 |         subdomains  A list of collected subdomains to filter
146 |         """
147 |         return [ subdomain for subdomain in subdomains if '*' not in subdomain and subdomain.endswith(domain) ]
148 | 
149 | 
150 | class SubdomainCollector(object):
151 |     """Class for scraping DNS Dumpster and NetCraft to discover subdomains."""
152 |     # Set a timeout, in seconds, for the web requests
153 |     requests_timeout = 10
154 |     # The user-agent and endpoint URIs used for the web requests
155 |     dnsdumpster_uri = "https://dnsdumpster.com/"
156 |     findsubdomains_uri = "https://findsubdomains.com/subdomains-of/{}"
157 |     netcraft_uri = "http://searchdns.netcraft.com/?host={}"
158 |     netcraft_history_uri = "http://toolbar.netcraft.com/site_report?url={}"
159 |     user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
160 | 
161 |     def __init__(self,webdriver=None):
162 |         """Everything that should be initiated with a new object goes here.
163 | 
164 |         Parameters:
165 |         webdriver   A selenium webdriver object to be used for automated web browsing
166 |         """
167 |         self.browser = webdriver
168 |         self.browser.set_page_load_timeout(10)
169 | 
170 |     def check_dns_dumpster(self,domain):
171 |         """Collect subdomains known to DNS Dumpster for the provided domain. This is based on
172 |         PaulSec's unofficial DNS Dumpster API available on GitHub.
173 | 
174 |         Parameters:
175 |         domain      The domain to search for on DNS Dumpster
176 |         """
177 |         results = {}
178 |         cookies = {}
179 |         # Disable SSL warnings and create a session for web browsing
180 |         requests.packages.urllib3.disable_warnings()
181 |         session = requests.session()
182 |         # Try connecting to DNS Dumpster
183 |         # This is all in one try/except because request 1 must success for request 2
184 |         try:
185 |             # Make a request to stash the CSRF token and setup cookies and headers for the next request
186 |             request = session.get(self.dnsdumpster_uri,verify=False,timeout=self.requests_timeout)
187 |             csrf_token = session.cookies['csrftoken']
188 |             cookies['csrftoken'] = session.cookies['csrftoken']
189 |             headers = {"Referer": self.dnsdumpster_uri}
190 |             data = {"csrfmiddlewaretoken": csrf_token,"targetip":domain}
191 |             # Now make a POST to DNS Dumpster with the new cookies and headers to perform the search
192 |             request = session.post(self.dnsdumpster_uri,cookies=cookies,data=data,headers=headers,timeout=self.requests_timeout)
193 |             # Check if a 200 OK was returned
194 |             if request.ok:
195 |                 soup = BeautifulSoup(request.content,"lxml")
196 |                 tables = soup.findAll("table")
197 |                 results['domain'] = domain
198 |                 results['dns_records'] = {}
199 |                 results['dns_records']['dns'] = self._retrieve_results(tables[0])
200 |                 results['dns_records']['mx'] = self._retrieve_results(tables[1])
201 |                 results['dns_records']['txt'] = self._retrieve_txt_record(tables[2])
202 |                 results['dns_records']['host'] = self._retrieve_results(tables[3])
203 |                 # Try to fetch the network mapping image
204 |                 try:
205 |                     val = soup.find('img',attrs={'class': 'img-responsive'})['src']
206 |                     tmp_url = "{}{}".format(self.dnsdumpster_uri,val)
207 |                     image_data = base64.b64encode(requests.get(tmp_url,timeout=self.requests_timeout).content)
208 |                 except Exception:
209 |                     image_data = None
210 |                 finally:
211 |                     results['image_data'] = image_data
212 |             else:
213 |                 click.secho("\n[!] The DNS Dumpster request returned a {} status code!".format(request.status_code),fg="red")
214 |         except requests.exceptions.Timeout:
215 |             click.secho("\n[!] The connection to crt.sh timed out!",fg="red")
216 |         except requests.exceptions.TooManyRedirects:
217 |             click.secho("\n[!] The connection to crt.sh encountered too many redirects!",fg="red")
218 |         except requests.exceptions.RequestException as error:
219 |             click.secho("\n[!] The connection to crt.sh encountered an error!",fg="red")
220 |             click.secho("L.. Details: {}".format(error),fg="red")
221 |         return results
222 | 
223 |     def _retrieve_results(self,table):
224 |         """Used by check_dns_dumpster() to extract the results from the HTML.
225 | 
226 |         Parameters:
227 |         table       The HTML table pulled from DNS Dumpster results
228 |         """
229 |         results = []
230 |         trs = table.findAll('tr')
231 |         for tr in trs:
232 |             tds = tr.findAll('td')
233 |             pattern_ip = r'([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3})'
234 |             ip = re.findall(pattern_ip,tds[1].text)[0]
235 |             domain = tds[0].text.replace('\n','').split(' ')[0]
236 |             header = ' '.join(tds[0].text.replace('\n','').split(' ')[1:])
237 |             reverse_dns = tds[1].find('span',attrs={}).text
238 |             additional_info = tds[2].text
239 |             country = tds[2].find('span',attrs={}).text
240 |             autonomous_system = additional_info.split(' ')[0]
241 |             provider = ' '.join(additional_info.split(' ')[1:])
242 |             provider = provider.replace(country,'')
243 |             data = {'domain':domain,
244 |                     'ip':ip,
245 |                     'reverse_dns':reverse_dns,
246 |                     'as':autonomous_system,
247 |                     'provider':provider,
248 |                     'country':country,
249 |                     'header':header}
250 |             results.append(data)
251 |         return results
252 | 
253 |     def _retrieve_txt_record(self,table):
254 |         """Used by check_dns_dumpster() to extracts the domain's DNS TXT records.
255 | 
256 |         Parameters:
257 |         table       The HTML table pulled from DNS Dumpster results
258 |         """
259 |         results = []
260 |         for td in table.findAll('td'):
261 |             results.append(td.text)
262 |         return results
263 | 
264 |     def check_netcraft(self,domain):
265 |         """Collect subdomains known to NetCraft for the provided domain. NetCraft blocks scripted
266 |         requests by requiring cookies and JavaScript for all browser, so Selenium is required.
267 | 
268 |         This is based on code from the DataSploit project, but updated to work with today's
269 |         NetCraft and Python 3.
270 | 
271 |         Parameters:
272 |         domain      The domain to look-up on NetCraft
273 |         """
274 |         results = []
275 |         target_dom_name = domain.split(".")
276 |         self.browser.get(self.netcraft_uri.format(domain))
277 |         link_regx = re.compile(r'<a href="http://toolbar.netcraft.com/site_report\?url=(.*)">')
278 |         links_list = link_regx.findall(self.browser.page_source)
279 |         for x in links_list:
280 |             dom_name = x.split("/")[2].split(".")
281 |             if (dom_name[len(dom_name) - 1] == target_dom_name[1]) and \
282 |             (dom_name[len(dom_name) - 2] == target_dom_name[0]):
283 |                 results.append(x.split("/")[2])
284 |         num_regex = re.compile('Found (.*) site')
285 |         num_subdomains = num_regex.findall(self.browser.page_source)
286 |         if not num_subdomains:
287 |             num_regex = re.compile('First (.*) sites returned')
288 |             num_subdomains = num_regex.findall(self.browser.page_source)
289 |         if num_subdomains:
290 |             if num_subdomains[0] != str(0):
291 |                 num_pages = int(num_subdomains[0]) // 20 + 1
292 |                 if num_pages > 1:
293 |                     last_regex = re.compile(
294 |                         '<td align="left">%s.</td><td align="left">\n<a href="(.*)" rel="nofollow">' % (20))
295 |                     last_item = last_regex.findall(self.browser.page_source)[0].split("/")[2]
296 |                     next_page = 21
297 |                     for x in range(2,num_pages):
298 |                         url = "http://searchdns.netcraft.com/?host=%s&last=%s&from=%s&restriction=/site%%20contains" % (domain,last_item,next_page)
299 |                         self.browser.get(url)
300 |                         link_regx = re.compile(
301 |                             r'<a href="http://toolbar.netcraft.com/site_report\?url=(.*)">')
302 |                         links_list = link_regx.findall(self.browser.page_source)
303 |                         for y in links_list:
304 |                             dom_name1 = y.split("/")[2].split(".")
305 |                             if (dom_name1[len(dom_name1) - 1] == target_dom_name[1]) and \
306 |                                (dom_name1[len(dom_name1) - 2] == target_dom_name[0]):
307 |                                 results.append(y.split("/")[2])
308 |                         last_item = links_list[len(links_list) - 1].split("/")[2]
309 |                         next_page = 20 * x + 1
310 |             else:
311 |                 pass
312 |         return results
313 | 
314 |     def fetch_netcraft_domain_history(self,domain):
315 |         """Fetch a domain's IP address history from NetCraft.
316 | 
317 |         Parameters:
318 |         domain      The domain to look-up on NetCraft
319 |         """
320 |         # TODO: See if the "Last Seen" and other data can be easily collected for here
321 |         ip_history = []
322 |         sleep(1)
323 |         self.browser.get(self.netcraft_history_uri.format(domain))
324 |         soup = BeautifulSoup(self.browser.page_source,'html.parser')
325 |         urls_parsed = soup.findAll('a',href=re.compile(r".*netblock\?q.*"))
326 |         for url in urls_parsed:
327 |             if urls_parsed.index(url) != 0:
328 |                 result = [str(url).split('=')[2].split(">")[1].split("<")[0],\
329 |                 str(url.parent.findNext('td')).strip("<td>").strip("</td>")]
330 |                 ip_history.append(result)
331 |         return ip_history
332 | 
333 |     def query_subdomainof(self,domain):
334 |         """Look-up the given domain on findsubdomains.com and parse the results to get a list of
335 |         subdomains.
336 | 
337 |         Parameters:
338 |         domain      The base domain for the subdomains query
339 |         """
340 |         subdomains = []
341 |         headers = { 'User-Agent': self.user_agent }
342 |         request = requests.get(self.findsubdomains_uri.format(domain),headers=headers,timeout=self.requests_timeout)
343 |         soup = BeautifulSoup(request.content,"lxml")
344 |         subdomain_links = soup.findAll('a',{'class': 'aggregated-link'})
345 |         for subdomain in subdomain_links:
346 |             if not subdomain.string.strip() == domain:
347 |                 subdomains.append(subdomain.string.strip())
348 |         unique_subdomains = list(set(subdomains))
349 |         return unique_subdomains
350 | 


--------------------------------------------------------------------------------