├── .gitignore ├── README.md ├── common.py ├── core.py ├── dnslib.py ├── dnstwist.py ├── examples └── example_tld.csv ├── gfyp_db.py ├── requirements.txt └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | /db.* 2 | /*.db 3 | /*.pyc 4 | /*.log 5 | /*.bak 6 | /*.csv 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GFYP - Go Find Your Phishers 2 | 3 | This tool augments [dnstwist](https://github.com/elceef/dnstwist) with a database that tracks identified phishing sites over times, and provides email alerts when new ones are discovered. 4 | 5 | ## Installation 6 | 7 | $ pip install -r requirements.txt 8 | 9 | ## Configuration 10 | 11 | 1. Initialize database with `python util.py build` 12 | 2. Either add your SMTP credentials by hard-coding them in core.py, or set the 13 | following environment variables: 14 | * `GFYP_EMAIL_USERNAME` 15 | * `GFYP_EMAIL_PASSWORD` 16 | * `GFYP_EMAIL_SMTPSERVER` 17 | 18 | Ex. 19 | 20 | $ export GFYP_EMAIL_USERNAME=alice@example.com 21 | $ export GFYP_EMAIL_PASSWORD=ilovemallory 22 | $ export GFYP_EMAIL_SMTPSERVER=smtp.example.com 23 | 24 | ## Usage 25 | 26 | # add domain to list for which to hunt phishing domains 27 | python util.py add (domain name) (email address) [optional: path to csv containing additional TLDs to check] 28 | # start searching process 29 | python core.py # or set it as a cron job to regular reports 30 | 31 | ## Troubleshooting 32 | 33 | ### GMail 34 | 35 | If using GMail as an SMTP provider, you may first need to log into GMail in the web interface and enable the "Allow less secure apps" option in the "Sign-in & security" section. 36 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | """Shared functions and constants""" 2 | import sys 3 | import logging 4 | 5 | LOG_FILENAME = 'gfyp.log' 6 | 7 | BOLD = "" 8 | END = "" 9 | if sys.platform != 'win32' and sys.stdout.isatty(): 10 | BOLD = "\033[1m" 11 | END = "\033[0m" 12 | 13 | def pretty_print(string): 14 | """For *nix systems, augment TTY output. For others, strip such syntax.""" 15 | string = string.replace('$BOLD$', BOLD) 16 | string = string.replace('$END$', END) 17 | print(string) 18 | 19 | def log(msg, level=logging.INFO): 20 | """Add a string to the log file.""" 21 | logging.basicConfig(filename=LOG_FILENAME, 22 | format='%(asctime)s:%(levelname)s:%(message)s', 23 | level=logging.INFO) 24 | if level == logging.DEBUG: 25 | logging.debug(msg) 26 | elif level == logging.INFO: 27 | logging.info(msg) 28 | elif level == logging.WARNING: 29 | logging.warning(msg) 30 | elif level == logging.ERROR: 31 | logging.error(msg) 32 | elif level == logging.CRITICAL: 33 | logging.critical(msg) 34 | else: 35 | raise ValueError(str(level)) 36 | -------------------------------------------------------------------------------- /core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Checks database for new phishing entries and executes alerts.""" 16 | 17 | import os 18 | import sys 19 | import smtplib 20 | import logging 21 | 22 | from dnslib import dnslib #dnslib.py 23 | import gfyp_db #gfyp_db.py 24 | from common import pretty_print, log #common.py 25 | 26 | #SET EMAIL SETTINGS HERE IF NOT USING ENVIRONMENT VARIABLES 27 | EMAIL_USERNAME = None 28 | EMAIL_PASSWORD = None 29 | EMAIL_SMTPSERVER = None 30 | 31 | def send_email(smtp_auth, recipient, subject, body): 32 | """Send email via SMTP. 33 | Args: 34 | smtp_auth (dict): Contains 'username' (str), 'password' (str), and 35 | 'server' (str). 36 | recipient (str): The email address to send to 37 | subject (str) 38 | body (str) 39 | 40 | http://stackoverflow.com/questions/10147455/trying-to-send-email-gmail-as-mail-provider-using-python 41 | """ 42 | email_to = [recipient] 43 | 44 | #Sending message, first construct actual message 45 | message = ("From: %s\nTo: %s\nSubject: %s\n\n%s" % 46 | (smtp_auth['username'], ", ".join(email_to), subject, body)) 47 | try: 48 | server_ssl = smtplib.SMTP_SSL(smtp_auth['server'], 465) 49 | server_ssl.ehlo() 50 | server_ssl.login(smtp_auth['username'], smtp_auth['password']) 51 | server_ssl.sendmail(smtp_auth['username'], email_to, message) 52 | server_ssl.close() 53 | except Exception as err: 54 | msg = "Failed to send mail: %s" % str(err) 55 | log(msg, logging.ERROR) 56 | sys.exit(msg) 57 | 58 | msg = "Email sent to %s." % recipient 59 | print(msg) 60 | log(msg) 61 | 62 | def check_and_send_alert(smtp_auth, alert_email, domain, escape_alert=False, 63 | db_con=None): 64 | """Consult DB whether an alert needs to be sent for domain, and send one. 65 | Args: 66 | smtp_auth (dict): Credentials for SMTP server, including 'username', 67 | 'password', and 'server'. 68 | alert_email (str) 69 | domain (str) 70 | escape_alert (bool): Whether or not to escape periods in the email body 71 | in order to avoid spam filtering. (Default: False) 72 | db_con (None or `gfyp_db.DatabaseConnection`): This can optionally 73 | provide a database connection to reuse. Otherwise, a new one will 74 | be created. 75 | """ 76 | msg = "Now checking %s - %s" % (alert_email, domain) 77 | print(msg) 78 | log(msg) 79 | close_db = False 80 | if db_con is None: 81 | db_con = gfyp_db.DatabaseConnection() 82 | close_db = True 83 | body = "" 84 | dns_check = dnslib() 85 | entries = dns_check.checkDomain(domain) 86 | msg = "DNSTwist found %d variant domains from %s." % (len(entries), domain) 87 | print(msg) 88 | log(msg) 89 | num_new_entries = 0 90 | for domain_found, domain_info in entries: 91 | found_entries = db_con.get_matching_found_domains(domain_found) 92 | entries_iter = found_entries.fetchall() 93 | 94 | if len(entries_iter) == 0: 95 | db_con.add_discovered_domain(domain_found, domain_info) 96 | body += "\r\n\r\n%s - %s" % (domain_found, domain_info) 97 | num_new_entries += 1 98 | 99 | if body != "": 100 | recipient = alert_email 101 | subject = 'GFYP - New Entries for %s' % domain 102 | if escape_alert: 103 | body = body.replace('.', '[.]') 104 | send_email(smtp_auth, recipient, subject, body) 105 | 106 | msg = "Found %d new domain variants from %s" % (num_new_entries, domain) 107 | print(msg) 108 | log(msg) 109 | 110 | if close_db: 111 | db_con.conn.close() 112 | 113 | def main(): 114 | """Description: Search for new domain variants and email alerts for new ones. 115 | """ 116 | args = get_args() 117 | #Get configuration from env variables or fallback to hard-coded values 118 | smtp_auth = dict() 119 | smtp_auth['username'] = os.getenv('GFYP_EMAIL_USERNAME', EMAIL_USERNAME) 120 | smtp_auth['password'] = os.getenv('GFYP_EMAIL_PASSWORD', EMAIL_PASSWORD) 121 | smtp_auth['server'] = os.getenv('GFYP_EMAIL_SMTPSERVER', EMAIL_SMTPSERVER) 122 | for key, value in list(smtp_auth.items()): 123 | if value is None: 124 | msg = "Fatal error: Email setting '%s' has not been set." % key 125 | log(msg, logging.ERROR) 126 | sys.exit(msg) 127 | 128 | if any([EMAIL_USERNAME, EMAIL_PASSWORD, EMAIL_SMTPSERVER]): 129 | msg = ("WARNING: You have hard-coded credentials into a code file. Do " 130 | "not commit it to a public Git repo!") 131 | print(msg) 132 | log(msg, logging.WARNING) 133 | 134 | with gfyp_db.DatabaseConnection() as db_con: 135 | if db_con.is_db_current(): 136 | domain_entries = db_con.get_watch_entries() 137 | 138 | if len(domain_entries) == 0: 139 | msg = ("No domains have been added for watching/alerts. Use " 140 | "util.py to add domains.") 141 | print(msg) 142 | log(msg) 143 | 144 | for row in domain_entries: 145 | alert_email = row[0] 146 | domain = row[1] 147 | check_and_send_alert( 148 | smtp_auth, alert_email, domain, 149 | escape_alert=args['escape_alert'], db_con=db_con) 150 | else: 151 | msg = "GFYP database is not current. Please run 'python util.py migrate' to update to the current schema" 152 | print(msg) 153 | log(msg,logging.ERROR) 154 | 155 | def usage(): 156 | """Print usage info.""" 157 | usage_str = ( 158 | "GFYP Core - Find domain variants and send alerts\n" 159 | "usage: python core.py [$BOLD$-escapealert$END$]\n" 160 | "Options:\n" 161 | " $BOLD$-escapealert$END$ - Escape periods in email alert to avoid " 162 | "spam filter") 163 | pretty_print(usage_str) 164 | sys.exit() 165 | 166 | def get_args(): 167 | """Get command line arguments. 168 | 169 | Current arguments: 170 | * escape_alert (bool): Whether to escape periods in alert email. 171 | """ 172 | args = dict() 173 | args['escape_alert'] = False 174 | if len(sys.argv) == 1: 175 | return args 176 | elif len(sys.argv) == 2: 177 | if sys.argv[1] == '-escapealert': 178 | args['escape_alert'] = True 179 | else: 180 | log("Invalid arguments: %s" % sys.argv, logging.ERROR) 181 | usage() 182 | else: 183 | log("Invalid arguments: %s" % sys.argv, logging.ERROR) 184 | usage() 185 | return args 186 | 187 | if __name__ == "__main__": 188 | main() 189 | -------------------------------------------------------------------------------- /dnslib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # dnstwist 4 | # 5 | # Generate and resolve domain variations to detect typo squatting, 6 | # phishing and corporate espionage. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | #Note: this was originally code from dnstwist (https://github.com/elceef/dnstwist). I've since moved it out to an unmodified form that is accompanying this code. 21 | #See dnstwist.py 22 | 23 | from dnstwist import DomainFuzz 24 | try: 25 | import dns.resolver 26 | module_dnspython = True 27 | except: 28 | module_dnspython = False 29 | pass 30 | try: 31 | import whois 32 | module_whois = True 33 | except: 34 | module_whois = False 35 | pass 36 | 37 | class dnslib: 38 | 39 | def __init__(self): 40 | #Putting this here for now as a placeholder for the dynamic fuzzing code I'm working on. 41 | self.domains = [] 42 | 43 | def checkDomain(self,dnsEntryName): 44 | """TODO: describe arguments and return value!""" 45 | fuzzer = DomainFuzz(dnsEntryName.lower()) 46 | fuzzer.generate() 47 | domains = fuzzer.domains 48 | 49 | total_hits = 0 50 | 51 | for i in range(0, len(domains)): 52 | if module_dnspython: 53 | resolv = dns.resolver.Resolver() 54 | resolv.lifetime = 1 55 | resolv.timeout = 1 56 | 57 | try: 58 | ns = resolv.query(domains[i]['domain-name'], 'NS') 59 | domains[i]['ns'] = str(ns[0])[:-1].lower() 60 | except: 61 | pass 62 | 63 | if 'ns' in domains[i]: 64 | try: 65 | ns = resolv.query(domains[i]['domain-name'], 'A') 66 | domains[i]['a'] = str(ns[0]) 67 | except: 68 | pass 69 | 70 | try: 71 | ns = resolv.query(domains[i]['domain-name'], 'AAAA') 72 | domains[i]['aaaa'] = str(ns[0]) 73 | except: 74 | pass 75 | 76 | try: 77 | mx = resolv.query(domains[i]['domain-name'], 'MX') 78 | domains[i]['mx'] = str(mx[0].exchange)[:-1].lower() 79 | except: 80 | pass 81 | 82 | if 'ns' in domains[i] or 'a' in domains[i]: 83 | try: 84 | whoisdb = whois.query(domains[i]['domain-name']) 85 | domains[i]['created'] = str(whoisdb.creation_date).replace(' ', 'T') 86 | domains[i]['updated'] = str(whoisdb.last_updated).replace(' ', 'T') 87 | except: 88 | pass 89 | 90 | returnDomains = [] 91 | for i in domains: 92 | info = '' 93 | 94 | if 'a' in i: 95 | info += i['a'] 96 | if 'country' in i: 97 | info += '/' + i['country'] 98 | if 'banner-http' in i: 99 | info += ' HTTP:"%s"' % i['banner-http'] 100 | elif 'ns' in i: 101 | info += 'NS:' + i['ns'] 102 | 103 | if 'aaaa' in i: 104 | info += ' ' + i['aaaa'] 105 | 106 | if 'mx' in i: 107 | info += ' MX:' + i['mx'] 108 | if 'banner-smtp' in i: 109 | info += ' SMTP:"%s"' % i['banner-smtp'] 110 | 111 | if 'created' in i and 'updated' in i and i['created'] == i['updated']: 112 | info += ' Created/Updated:' + i['created'] 113 | else: 114 | if 'created' in i: 115 | info += ' Created:' + i['created'] 116 | if 'updated' in i: 117 | info += ' Updated:' + i['updated'] 118 | 119 | if info: 120 | returnDomains.append([i['domain-name'],info]) 121 | 122 | return returnDomains 123 | -------------------------------------------------------------------------------- /dnstwist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # dnstwist 4 | # 5 | # Generate and resolve domain variations to detect typo squatting, 6 | # phishing and corporate espionage. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | __author__ = 'Marcin Ulikowski' 21 | __version__ = '1.02' 22 | __email__ = 'marcin@ulikowski.pl' 23 | 24 | import re 25 | import sys 26 | import socket 27 | import signal 28 | import time 29 | import argparse 30 | import threading 31 | from random import randint 32 | from os import path 33 | import smtplib 34 | import json 35 | 36 | try: 37 | import queue 38 | except ImportError: 39 | if sys.version_info[0] == 2: 40 | import Queue as queue 41 | else: 42 | import queue as queue 43 | 44 | try: 45 | import dns.resolver 46 | MODULE_DNSPYTHON = True 47 | except ImportError: 48 | MODULE_DNSPYTHON = False 49 | pass 50 | 51 | try: 52 | import GeoIP 53 | MODULE_GEOIP = True 54 | except ImportError: 55 | MODULE_GEOIP = False 56 | pass 57 | 58 | try: 59 | import whois 60 | MODULE_WHOIS = True 61 | except ImportError: 62 | MODULE_WHOIS = False 63 | pass 64 | 65 | try: 66 | import ssdeep 67 | MODULE_SSDEEP = True 68 | except ImportError: 69 | MODULE_SSDEEP = False 70 | 71 | try: 72 | import requests 73 | MODULE_REQUESTS = True 74 | except ImportError: 75 | MODULE_REQUESTS = False 76 | pass 77 | 78 | DIR = path.abspath(path.dirname(sys.argv[0])) 79 | DIR_DB = 'database' 80 | FILE_GEOIP = path.join(DIR, DIR_DB, 'GeoIP.dat') 81 | FILE_TLD = path.join(DIR, DIR_DB, 'effective_tld_names.dat') 82 | 83 | DB_GEOIP = path.exists(FILE_GEOIP) 84 | DB_TLD = path.exists(FILE_TLD) 85 | 86 | REQUEST_TIMEOUT_DNS = 5 87 | REQUEST_TIMEOUT_HTTP = 5 88 | REQUEST_TIMEOUT_SMTP = 5 89 | THREAD_COUNT_DEFAULT = 10 90 | 91 | if sys.platform != 'win32' and sys.stdout.isatty(): 92 | FG_RND = '\x1b[3%dm' % randint(1, 8) 93 | FG_RED = '\x1b[31m' 94 | FG_YEL = '\x1b[33m' 95 | FG_GRE = '\x1b[32m' 96 | FG_MAG = '\x1b[35m' 97 | FG_CYA = '\x1b[36m' 98 | FG_BLU = '\x1b[34m' 99 | FG_RST = '\x1b[39m' 100 | ST_BRI = '\x1b[1m' 101 | ST_RST = '\x1b[0m' 102 | else: 103 | FG_RND = '' 104 | FG_RED = '' 105 | FG_YEL = '' 106 | FG_GRE = '' 107 | FG_MAG = '' 108 | FG_CYA = '' 109 | FG_BLU = '' 110 | FG_RST = '' 111 | ST_BRI = '' 112 | ST_RST = '' 113 | 114 | 115 | def p_cli(data): 116 | global args 117 | if not args.csv and not args.json: 118 | sys.stdout.write(data) 119 | sys.stdout.flush() 120 | 121 | 122 | def p_err(data): 123 | sys.stderr.write(path.basename(sys.argv[0]) + ': ' + data) 124 | sys.stderr.flush() 125 | 126 | 127 | def p_csv(data): 128 | global args 129 | if args.csv: 130 | sys.stdout.write(data) 131 | 132 | 133 | def p_json(data): 134 | global args 135 | if args.json: 136 | sys.stdout.write(data) 137 | 138 | 139 | def bye(code): 140 | sys.stdout.write(FG_RST + ST_RST) 141 | sys.exit(code) 142 | 143 | 144 | def sigint_handler(signal, frame): 145 | sys.stdout.write('\nStopping threads... ') 146 | sys.stdout.flush() 147 | for worker in threads: 148 | worker.stop() 149 | time.sleep(1) 150 | sys.stdout.write('Done\n') 151 | bye(0) 152 | 153 | 154 | class UrlParser(): 155 | 156 | def __init__(self, url): 157 | if '://' not in url: 158 | self.url = 'http://' + url 159 | else: 160 | self.url = url 161 | self.scheme = '' 162 | self.authority = '' 163 | self.domain = '' 164 | self.path = '' 165 | self.query = '' 166 | 167 | self.__parse() 168 | 169 | def __parse(self): 170 | re_rfc3986_enhanced = re.compile( 171 | r''' 172 | ^ 173 | (?:(?P[^:/?#\s]+):)? 174 | (?://(?P[^/?#\s]*))? 175 | (?P[^?#\s]*) 176 | (?:\?(?P[^#\s]*))? 177 | (?:\#(?P[^\s]*))? 178 | $ 179 | ''', re.MULTILINE | re.VERBOSE 180 | ) 181 | 182 | m_uri = re_rfc3986_enhanced.match(self.url) 183 | 184 | if m_uri: 185 | if m_uri.group('scheme'): 186 | if m_uri.group('scheme').startswith('http'): 187 | self.scheme = m_uri.group('scheme') 188 | else: 189 | self.scheme = 'http' 190 | if m_uri.group('authority'): 191 | self.authority = m_uri.group('authority') 192 | self.domain = self.authority.split(':')[0].lower() 193 | if not self.__validate_domain(self.domain): 194 | raise ValueError('Invalid domain name.') 195 | if m_uri.group('path'): 196 | self.path = m_uri.group('path') 197 | if m_uri.group('query'): 198 | if len(m_uri.group('query')): 199 | self.query = '?' + m_uri.group('query') 200 | 201 | def __validate_domain(self, domain): 202 | if len(domain) > 255: 203 | return False 204 | if domain[-1] == '.': 205 | domain = domain[:-1] 206 | allowed = re.compile('\A([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}\Z', re.IGNORECASE) 207 | return allowed.match(domain) 208 | 209 | def get_full_uri(self): 210 | return self.scheme + '://' + self.domain + self.path + self.query 211 | 212 | 213 | class DomainFuzz(): 214 | 215 | def __init__(self, domain): 216 | self.domain, self.tld = self.__domain_tld(domain) 217 | self.domains = [] 218 | self.qwerty = { 219 | '1': '2q', '2': '3wq1', '3': '4ew2', '4': '5re3', '5': '6tr4', '6': '7yt5', '7': '8uy6', '8': '9iu7', '9': '0oi8', '0': 'po9', 220 | 'q': '12wa', 'w': '3esaq2', 'e': '4rdsw3', 'r': '5tfde4', 't': '6ygfr5', 'y': '7uhgt6', 'u': '8ijhy7', 'i': '9okju8', 'o': '0plki9', 'p': 'lo0', 221 | 'a': 'qwsz', 's': 'edxzaw', 'd': 'rfcxse', 'f': 'tgvcdr', 'g': 'yhbvft', 'h': 'ujnbgy', 'j': 'ikmnhu', 'k': 'olmji', 'l': 'kop', 222 | 'z': 'asx', 'x': 'zsdc', 'c': 'xdfv', 'v': 'cfgb', 'b': 'vghn', 'n': 'bhjm', 'm': 'njk' 223 | } 224 | self.qwertz = { 225 | '1': '2q', '2': '3wq1', '3': '4ew2', '4': '5re3', '5': '6tr4', '6': '7zt5', '7': '8uz6', '8': '9iu7', '9': '0oi8', '0': 'po9', 226 | 'q': '12wa', 'w': '3esaq2', 'e': '4rdsw3', 'r': '5tfde4', 't': '6zgfr5', 'z': '7uhgt6', 'u': '8ijhz7', 'i': '9okju8', 'o': '0plki9', 'p': 'lo0', 227 | 'a': 'qwsy', 's': 'edxyaw', 'd': 'rfcxse', 'f': 'tgvcdr', 'g': 'zhbvft', 'h': 'ujnbgz', 'j': 'ikmnhu', 'k': 'olmji', 'l': 'kop', 228 | 'y': 'asx', 'x': 'ysdc', 'c': 'xdfv', 'v': 'cfgb', 'b': 'vghn', 'n': 'bhjm', 'm': 'njk' 229 | } 230 | self.azerty = { 231 | '1': '2a', '2': '3za1', '3': '4ez2', '4': '5re3', '5': '6tr4', '6': '7yt5', '7': '8uy6', '8': '9iu7', '9': '0oi8', '0': 'po9', 232 | 'a': '2zq1', 'z': '3esqa2', 'e': '4rdsz3', 'r': '5tfde4', 't': '6ygfr5', 'y': '7uhgt6', 'u': '8ijhy7', 'i': '9okju8', 'o': '0plki9', 'p': 'lo0m', 233 | 'q': 'zswa', 's': 'edxwqz', 'd': 'rfcxse', 'f': 'tgvcdr', 'g': 'yhbvft', 'h': 'ujnbgy', 'j': 'iknhu', 'k': 'olji', 'l': 'kopm', 'm': 'lp', 234 | 'w': 'sxq', 'x': 'zsdc', 'c': 'xdfv', 'v': 'cfgb', 'b': 'vghn', 'n': 'bhj' 235 | } 236 | self.keyboards = [ self.qwerty, self.qwertz, self.azerty ] 237 | 238 | def __domain_tld(self, domain): 239 | domain = domain.rsplit('.', 2) 240 | 241 | if len(domain) == 2: 242 | return domain[0], domain[1] 243 | 244 | if DB_TLD: 245 | cc_tld = {} 246 | re_tld = re.compile('^[a-z]{2,4}\.[a-z]{2}$', re.IGNORECASE) 247 | 248 | for line in open(FILE_TLD): 249 | line = line[:-1] 250 | if re_tld.match(line): 251 | sld, tld = line.split('.') 252 | if not tld in cc_tld: 253 | cc_tld[tld] = [] 254 | cc_tld[tld].append(sld) 255 | 256 | sld_tld = cc_tld.get(domain[2]) 257 | if sld_tld: 258 | if domain[1] in sld_tld: 259 | return domain[0], domain[1] + '.' + domain[2] 260 | 261 | return domain[0] + '.' + domain[1], domain[2] 262 | 263 | def __validate_domain(self, domain): 264 | if len(domain) > 255: 265 | return False 266 | if domain[-1] == '.': 267 | domain = domain[:-1] 268 | allowed = re.compile('\A([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,}\Z', re.IGNORECASE) 269 | return allowed.match(domain) 270 | 271 | def __filter_domains(self): 272 | seen = set() 273 | filtered = [] 274 | 275 | for d in self.domains: 276 | if self.__validate_domain(d['domain-name']) and d['domain-name'] not in seen: 277 | seen.add(d['domain-name']) 278 | filtered.append(d) 279 | 280 | self.domains = filtered 281 | 282 | def __bitsquatting(self): 283 | result = [] 284 | masks = [1, 2, 4, 8, 16, 32, 64, 128] 285 | for i in range(0, len(self.domain)): 286 | c = self.domain[i] 287 | for j in range(0, len(masks)): 288 | b = chr(ord(c) ^ masks[j]) 289 | o = ord(b) 290 | if (o >= 48 and o <= 57) or (o >= 97 and o <= 122) or o == 45: 291 | result.append(self.domain[:i] + b + self.domain[i+1:]) 292 | 293 | return result 294 | 295 | def __homoglyph(self): 296 | glyphs = { 297 | 'd': ['b', 'cl', 'dl', 'di'], 'm': ['n', 'nn', 'rn', 'rr'], 'l': ['1', 'i'], 298 | 'o': ['0'], 'k': ['lk', 'ik', 'lc'], 'h': ['lh', 'ih'], 'w': ['vv'], 299 | 'n': ['m', 'r'], 'b': ['d', 'lb', 'ib'], 'i': ['1', 'l'], 'g': ['q'], 'q': ['g'] 300 | } 301 | result = [] 302 | 303 | for ws in range(0, len(self.domain)): 304 | for i in range(0, (len(self.domain)-ws)+1): 305 | win = self.domain[i:i+ws] 306 | 307 | j = 0 308 | while j < ws: 309 | c = win[j] 310 | if c in glyphs: 311 | win_copy = win 312 | for g in glyphs[c]: 313 | win = win.replace(c, g) 314 | result.append(self.domain[:i] + win + self.domain[i+ws:]) 315 | win = win_copy 316 | j += 1 317 | 318 | return list(set(result)) 319 | 320 | def __hyphenation(self): 321 | result = [] 322 | 323 | for i in range(1, len(self.domain)): 324 | if self.domain[i] not in ['-', '.'] and self.domain[i-1] not in ['-', '.']: 325 | result.append(self.domain[:i] + '-' + self.domain[i:]) 326 | 327 | return result 328 | 329 | def __insertion(self): 330 | result = [] 331 | 332 | for i in range(1, len(self.domain)-1): 333 | for keys in self.keyboards: 334 | if self.domain[i] in keys: 335 | for c in keys[self.domain[i]]: 336 | result.append(self.domain[:i] + c + self.domain[i] + self.domain[i+1:]) 337 | result.append(self.domain[:i] + self.domain[i] + c + self.domain[i+1:]) 338 | 339 | return list(set(result)) 340 | 341 | def __omission(self): 342 | result = [] 343 | 344 | for i in range(0, len(self.domain)): 345 | result.append(self.domain[:i] + self.domain[i+1:]) 346 | 347 | n = re.sub(r'(.)\1+', r'\1', self.domain) 348 | 349 | if n not in result and n != self.domain: 350 | result.append(n) 351 | 352 | return list(set(result)) 353 | 354 | def __repetition(self): 355 | result = [] 356 | 357 | for i in range(0, len(self.domain)): 358 | if self.domain[i].isalpha(): 359 | result.append(self.domain[:i] + self.domain[i] + self.domain[i] + self.domain[i+1:]) 360 | 361 | return list(set(result)) 362 | 363 | def __replacement(self): 364 | result = [] 365 | 366 | for i in range(0, len(self.domain)): 367 | for keys in self.keyboards: 368 | if self.domain[i] in keys: 369 | for c in keys[self.domain[i]]: 370 | result.append(self.domain[:i] + c + self.domain[i+1:]) 371 | 372 | return list(set(result)) 373 | 374 | def __subdomain(self): 375 | result = [] 376 | 377 | for i in range(1, len(self.domain)): 378 | if self.domain[i] not in ['-', '.'] and self.domain[i-1] not in ['-', '.']: 379 | result.append(self.domain[:i] + '.' + self.domain[i:]) 380 | 381 | return result 382 | 383 | def __transposition(self): 384 | result = [] 385 | 386 | for i in range(0, len(self.domain)-1): 387 | if self.domain[i+1] != self.domain[i]: 388 | result.append(self.domain[:i] + self.domain[i+1] + self.domain[i] + self.domain[i+2:]) 389 | 390 | return result 391 | 392 | def __addition(self): 393 | result = [] 394 | 395 | for i in range(97, 123): 396 | result.append(self.domain + chr(i)) 397 | 398 | return result 399 | 400 | def generate(self): 401 | self.domains.append({ 'fuzzer': 'Original*', 'domain-name': self.domain + '.' + self.tld }) 402 | 403 | for domain in self.__addition(): 404 | self.domains.append({ 'fuzzer': 'Addition', 'domain-name': domain + '.' + self.tld }) 405 | for domain in self.__bitsquatting(): 406 | self.domains.append({ 'fuzzer': 'Bitsquatting', 'domain-name': domain + '.' + self.tld }) 407 | for domain in self.__homoglyph(): 408 | self.domains.append({ 'fuzzer': 'Homoglyph', 'domain-name': domain + '.' + self.tld }) 409 | for domain in self.__hyphenation(): 410 | self.domains.append({ 'fuzzer': 'Hyphenation', 'domain-name': domain + '.' + self.tld }) 411 | for domain in self.__insertion(): 412 | self.domains.append({ 'fuzzer': 'Insertion', 'domain-name': domain + '.' + self.tld }) 413 | for domain in self.__omission(): 414 | self.domains.append({ 'fuzzer': 'Omission', 'domain-name': domain + '.' + self.tld }) 415 | for domain in self.__repetition(): 416 | self.domains.append({ 'fuzzer': 'Repetition', 'domain-name': domain + '.' + self.tld }) 417 | for domain in self.__replacement(): 418 | self.domains.append({ 'fuzzer': 'Replacement', 'domain-name': domain + '.' + self.tld }) 419 | for domain in self.__subdomain(): 420 | self.domains.append({ 'fuzzer': 'Subdomain', 'domain-name': domain + '.' + self.tld }) 421 | for domain in self.__transposition(): 422 | self.domains.append({ 'fuzzer': 'Transposition', 'domain-name': domain + '.' + self.tld }) 423 | 424 | if not self.domain.startswith('www.'): 425 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': 'ww' + self.domain + '.' + self.tld }) 426 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': 'www' + self.domain + '.' + self.tld }) 427 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': 'www-' + self.domain + '.' + self.tld }) 428 | if '.' in self.tld: 429 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': self.domain + '.' + self.tld.split('.')[-1] }) 430 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': self.domain + self.tld }) 431 | if '.' not in self.tld: 432 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': self.domain + self.tld + '.' + self.tld }) 433 | if self.tld != 'com' and '.' not in self.tld: 434 | self.domains.append({ 'fuzzer': 'Various', 'domain-name': self.domain + '-' + self.tld + '.com' }) 435 | 436 | self.__filter_domains() 437 | 438 | 439 | class DomainDict(DomainFuzz): 440 | 441 | def __init__(self, domain): 442 | DomainFuzz.__init__(self, domain) 443 | 444 | self.dictionary = [] 445 | 446 | def load_dict(self, file): 447 | if path.exists(file): 448 | for word in open(file): 449 | word = word.strip('\n') 450 | if word.isalpha() and word not in self.dictionary: 451 | self.dictionary.append(word) 452 | 453 | def __dictionary(self): 454 | result = [] 455 | 456 | domain = self.domain.rsplit('.', 1) 457 | if len(domain) > 1: 458 | prefix = domain[0] + '.' 459 | name = domain[1] 460 | else: 461 | prefix = '' 462 | name = domain[0] 463 | 464 | for word in self.dictionary: 465 | result.append(prefix + name + '-' + word) 466 | result.append(prefix + name + word) 467 | result.append(prefix + word + '-' + name) 468 | result.append(prefix + word + name) 469 | 470 | return result 471 | 472 | def generate(self): 473 | for domain in self.__dictionary(): 474 | self.domains.append({ 'fuzzer': 'Dictionary', 'domain-name': domain + '.' + self.tld }) 475 | 476 | 477 | class DomainThread(threading.Thread): 478 | 479 | def __init__(self, queue): 480 | threading.Thread.__init__(self) 481 | self.jobs = queue 482 | self.kill_received = False 483 | 484 | self.ssdeep_orig = '' 485 | self.domain_orig = '' 486 | 487 | self.uri_scheme = 'http' 488 | self.uri_path = '' 489 | self.uri_query = '' 490 | 491 | self.option_extdns = False 492 | self.option_geoip = False 493 | self.option_whois = False 494 | self.option_ssdeep = False 495 | self.option_banners = False 496 | self.option_mxcheck = False 497 | 498 | def __banner_http(self, ip, vhost): 499 | try: 500 | http = socket.socket() 501 | http.settimeout(1) 502 | http.connect((ip, 80)) 503 | http.send('HEAD / HTTP/1.1\r\nHost: %s\r\nUser-agent: Mozilla/5.0\r\n\r\n' % str(vhost)) 504 | response = http.recv(1024) 505 | http.close() 506 | except Exception: 507 | pass 508 | else: 509 | sep = '\r\n' if '\r\n' in response else '\n' 510 | headers = response.split(sep) 511 | for field in headers: 512 | if field.startswith('Server: '): 513 | return field[8:] 514 | banner = headers[0].split(' ') 515 | if len(banner) > 1: 516 | return 'HTTP %s' % banner[1] 517 | 518 | def __banner_smtp(self, mx): 519 | try: 520 | smtp = socket.socket() 521 | smtp.settimeout(1) 522 | smtp.connect((mx, 25)) 523 | response = smtp.recv(1024) 524 | smtp.close() 525 | except Exception: 526 | pass 527 | else: 528 | sep = '\r\n' if '\r\n' in response else '\n' 529 | hello = response.split(sep)[0] 530 | if hello.startswith('220'): 531 | return hello[4:].strip() 532 | return hello[:40] 533 | 534 | def __mxcheck(self, mx, from_domain, to_domain): 535 | from_addr = 'randombob' + str(randint(1, 9)) + '@' + from_domain 536 | to_addr = 'randomalice' + str(randint(1, 9)) + '@' + to_domain 537 | try: 538 | smtp = smtplib.SMTP(mx, 25, timeout=REQUEST_TIMEOUT_SMTP) 539 | smtp.sendmail(from_addr, to_addr, 'And that\'s how the cookie crumbles') 540 | smtp.quit() 541 | except Exception: 542 | return False 543 | else: 544 | return True 545 | 546 | def stop(self): 547 | self.kill_received = True 548 | 549 | def run(self): 550 | while not self.kill_received: 551 | domain = self.jobs.get() 552 | 553 | if self.option_extdns: 554 | resolv = dns.resolver.Resolver() 555 | resolv.lifetime = REQUEST_TIMEOUT_DNS 556 | resolv.timeout = REQUEST_TIMEOUT_DNS 557 | 558 | try: 559 | ans = resolv.query(domain['domain-name'], 'SOA') 560 | domain['dns-ns'] = str(sorted(ans)[0]).split(' ')[0][:-1].lower() 561 | except Exception: 562 | pass 563 | 564 | if 'dns-ns' in domain: 565 | try: 566 | ans = resolv.query(domain['domain-name'], 'A') 567 | domain['dns-a'] = str(sorted(ans)[0]) 568 | except Exception: 569 | pass 570 | 571 | try: 572 | ans = resolv.query(domain['domain-name'], 'AAAA') 573 | domain['dns-aaaa'] = str(sorted(ans)[0]) 574 | except Exception: 575 | pass 576 | 577 | try: 578 | ans = resolv.query(domain['domain-name'], 'MX') 579 | mx = str(sorted(ans)[0].exchange)[:-1].lower() 580 | if mx: domain['dns-mx'] = mx 581 | except Exception: 582 | pass 583 | else: 584 | try: 585 | ip = socket.getaddrinfo(domain['domain-name'], 80) 586 | except Exception: 587 | pass 588 | else: 589 | for j in ip: 590 | if '.' in j[4][0]: 591 | domain['dns-a'] = j[4][0] 592 | break 593 | for j in ip: 594 | if ':' in j[4][0]: 595 | domain['dns-aaaa'] = j[4][0] 596 | break 597 | 598 | if self.option_mxcheck: 599 | if 'dns-mx' in domain: 600 | if domain['domain-name'] is not self.domain_orig: 601 | if self.__mxcheck(domain['dns-mx'], self.domain_orig, domain['domain-name']): 602 | domain['mx-spy'] = True 603 | 604 | if self.option_whois: 605 | if 'dns-ns' in domain or 'dns-a' in domain: 606 | try: 607 | whoisdb = whois.query(domain['domain-name']) 608 | domain['whois-created'] = str(whoisdb.creation_date).replace(' ', 'T') 609 | domain['whois-updated'] = str(whoisdb.last_updated).replace(' ', 'T') 610 | except Exception: 611 | pass 612 | 613 | if self.option_geoip: 614 | if 'dns-a' in domain: 615 | gi = GeoIP.open(FILE_GEOIP, GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE) 616 | try: 617 | country = gi.country_name_by_addr(domain['dns-a']) 618 | except Exception: 619 | pass 620 | else: 621 | if country: 622 | domain['geoip-country'] = country.split(',')[0] 623 | 624 | if self.option_banners: 625 | if 'dns-a' in domain: 626 | banner = self.__banner_http(domain['dns-a'], domain['domain-name']) 627 | if banner: 628 | domain['banner-http'] = banner 629 | if 'dns-mx' in domain: 630 | banner = self.__banner_smtp(domain['dns-mx']) 631 | if banner: 632 | domain['banner-smtp'] = banner 633 | 634 | if self.option_ssdeep: 635 | if 'dns-a' in domain: 636 | try: 637 | req = requests.get(self.uri_scheme + '://' + domain['domain-name'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP, headers={'User-Agent': 'Mozilla/5.0 (dnstwist)'}) 638 | #ssdeep_fuzz = ssdeep.hash(req.text.replace(' ', '').replace('\n', '')) 639 | ssdeep_fuzz = ssdeep.hash(req.text) 640 | except Exception: 641 | pass 642 | else: 643 | if req.status_code / 100 == 2: 644 | domain['ssdeep-score'] = ssdeep.compare(self.ssdeep_orig, ssdeep_fuzz) 645 | 646 | self.jobs.task_done() 647 | 648 | 649 | def generate_json(domains): 650 | return json.dumps(domains, indent=4, sort_keys=True) 651 | 652 | 653 | def generate_csv(domains): 654 | output = 'fuzzer,domain-name,dns-a,dns-aaaa,dns-mx,dns-ns,geoip-country,whois-created,whois-updated,ssdeep-score\n' 655 | 656 | for domain in domains: 657 | output += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (domain.get('fuzzer'), domain.get('domain-name'), domain.get('dns-a', ''), 658 | domain.get('dns-aaaa', ''), domain.get('dns-mx', ''), domain.get('dns-ns', ''), domain.get('geoip-country', ''), 659 | domain.get('whois-created', ''), domain.get('whois-updated', ''), str(domain.get('ssdeep-score', ''))) 660 | 661 | return output 662 | 663 | 664 | def generate_cli(domains): 665 | output = '' 666 | 667 | width_fuzzer = max([len(d['fuzzer']) for d in domains]) + 2 668 | width_domain = max([len(d['domain-name']) for d in domains]) + 2 669 | 670 | for domain in domains: 671 | info = '' 672 | 673 | if 'dns-a' in domain: 674 | info += domain['dns-a'] 675 | if 'geoip-country' in domain: 676 | info += FG_CYA + '/' + domain['geoip-country'] + FG_RST 677 | info += ' ' 678 | 679 | if 'dns-aaaa' in domain: 680 | info += domain['dns-aaaa'] + ' ' 681 | 682 | if 'dns-ns' in domain: 683 | info += '%sNS:%s%s%s ' % (FG_YEL, FG_CYA, domain['dns-ns'], FG_RST) 684 | 685 | if 'dns-mx' in domain: 686 | if 'mx-spy' in domain: 687 | info += '%sSPYING-MX:%s%s' % (FG_YEL, domain['dns-mx'], FG_RST) 688 | else: 689 | info += '%sMX:%s%s%s ' % (FG_YEL, FG_CYA, domain['dns-mx'], FG_RST) 690 | 691 | if 'banner-http' in domain: 692 | info += '%sHTTP:%s"%s"%s ' % (FG_YEL, FG_CYA, domain['banner-http'], FG_RST) 693 | 694 | if 'banner-smtp' in domain: 695 | info += '%sSMTP:%s"%s"%s ' % (FG_YEL, FG_CYA, domain['banner-smtp'], FG_RST) 696 | 697 | if 'whois-created' in domain and 'whois-updated' in domain: 698 | if domain['whois-created'] == domain['whois-updated']: 699 | info += '%sCreated/Updated:%s%s%s ' % (FG_YEL, FG_CYA, domain['whois-created'], FG_RST) 700 | else: 701 | if 'whois-created' in domain: 702 | info += '%sCreated:%s%s%s ' % (FG_YEL, FG_CYA, domain['whois-created'], FG_RST) 703 | if 'whois-updated' in domain: 704 | info += '%sUpdated:%s%s%s ' % (FG_YEL, FG_CYA, domain['whois-updated'], FG_RST) 705 | 706 | if 'ssdeep-score' in domain: 707 | if domain['ssdeep-score'] > 0: 708 | info += '%sSSDEEP:%d%%%s ' % (FG_YEL, domain['ssdeep-score'], FG_RST) 709 | 710 | info = info.strip() 711 | 712 | if not info: 713 | info = '-' 714 | 715 | output += '%s%s%s %s %s\n' % (FG_BLU, domain['fuzzer'].ljust(width_fuzzer), FG_RST, domain['domain-name'].ljust(width_domain), info) 716 | 717 | return output 718 | 719 | 720 | def main(): 721 | signal.signal(signal.SIGINT, sigint_handler) 722 | 723 | parser = argparse.ArgumentParser( 724 | usage='%s [OPTION]... DOMAIN' % sys.argv[0], 725 | add_help = True, 726 | description= 727 | '''Find similar-looking domain names that adversaries can use to attack you. ''' 728 | '''Can detect typosquatters, phishing attacks, fraud and corporate espionage. ''' 729 | '''Useful as an additional source of targeted threat intelligence.''' 730 | ) 731 | 732 | parser.add_argument('domain', help='domain name or URL to check') 733 | parser.add_argument('-c', '--csv', action='store_true', help='print output in CSV format') 734 | parser.add_argument('-j', '--json', action='store_true', help='print output in JSON format') 735 | parser.add_argument('-r', '--registered', action='store_true', help='show only registered domain names') 736 | parser.add_argument('-w', '--whois', action='store_true', help='perform lookup for WHOIS creation/update time (slow)') 737 | parser.add_argument('-g', '--geoip', action='store_true', help='perform lookup for GeoIP location') 738 | parser.add_argument('-b', '--banners', action='store_true', help='determine HTTP and SMTP service banners') 739 | parser.add_argument('-s', '--ssdeep', action='store_true', help='fetch web pages and compare their fuzzy hashes to evaluate similarity') 740 | parser.add_argument('-m', '--mxcheck', action='store_true', help='check if MX host can be used to intercept e-mails') 741 | parser.add_argument('-d', '--dictionary', type=str, metavar='FILE', help='generate additional domains using dictionary FILE') 742 | parser.add_argument('-t', '--threads', type=int, metavar='NUMBER', default=THREAD_COUNT_DEFAULT, help='start specified NUMBER of threads (default: %d)' % THREAD_COUNT_DEFAULT) 743 | 744 | if len(sys.argv) < 2: 745 | sys.stdout.write('%sdnstwist %s by <%s>%s\n\n' % (ST_BRI, __version__, __email__, ST_RST)) 746 | parser.print_help() 747 | bye(0) 748 | 749 | global args 750 | args = parser.parse_args() 751 | 752 | if args.csv and args.json: 753 | p_err('error: cannot use both CSV and JSON as output\n') 754 | bye(-1) 755 | 756 | if args.threads < 1: 757 | args.threads = THREAD_COUNT_DEFAULT 758 | 759 | try: 760 | url = UrlParser(args.domain) 761 | except ValueError as err: 762 | p_err('error: %s\n' % err) 763 | bye(-1) 764 | 765 | dfuzz = DomainFuzz(url.domain) 766 | dfuzz.generate() 767 | domains = dfuzz.domains 768 | 769 | if args.dictionary: 770 | if not path.exists(args.dictionary): 771 | p_err('error: dictionary not found: %s\n' % args.dictionary) 772 | bye(-1) 773 | ddict = DomainDict(url.domain) 774 | ddict.load_dict(args.dictionary) 775 | ddict.generate() 776 | domains += ddict.domains 777 | 778 | if not DB_TLD: 779 | p_err('error: missing TLD database file: %s\n' % FILE_TLD) 780 | bye(-1) 781 | if not DB_GEOIP and args.geoip: 782 | p_err('error: missing GeoIP database file: %\n' % FILE_GEOIP) 783 | bye(-1) 784 | 785 | if not MODULE_DNSPYTHON: 786 | p_err('notice: missing module: dnspython (DNS features limited)\n') 787 | if not MODULE_GEOIP and args.geoip: 788 | p_err('notice: missing module: GeoIP (geographical location not available)\n') 789 | if not MODULE_WHOIS and args.whois: 790 | p_err('notice: missing module: whois (WHOIS database not accessible)\n') 791 | if not MODULE_SSDEEP and args.ssdeep: 792 | p_err('notice: missing module: ssdeep (fuzzy hashes not available)\n') 793 | if not MODULE_REQUESTS and args.ssdeep: 794 | p_err('notice: missing module: Requests (web page downloads not possible)\n') 795 | 796 | p_cli(FG_RND + ST_BRI + 797 | ''' _ _ _ _ 798 | __| |_ __ ___| |___ _(_)___| |_ 799 | / _` | '_ \/ __| __\ \ /\ / / / __| __| 800 | | (_| | | | \__ \ |_ \ V V /| \__ \ |_ 801 | \__,_|_| |_|___/\__| \_/\_/ |_|___/\__| {%s} 802 | 803 | ''' % __version__ + FG_RST + ST_RST) 804 | 805 | if MODULE_WHOIS and args.whois: 806 | p_cli('Disabling multithreaded job distribution in order to query WHOIS servers\n') 807 | args.threads = 1 808 | 809 | if args.ssdeep and MODULE_SSDEEP and MODULE_REQUESTS: 810 | p_cli('Fetching content from: ' + url.get_full_uri() + ' ... ') 811 | try: 812 | req = requests.get(url.get_full_uri(), timeout=REQUEST_TIMEOUT_HTTP, headers={'User-Agent': 'Mozilla/5.0 (dnstwist)'}) 813 | except requests.exceptions.ConnectionError: 814 | p_cli('Connection error\n') 815 | args.ssdeep = False 816 | pass 817 | except requests.exceptions.HTTPError: 818 | p_cli('Invalid HTTP response\n') 819 | args.ssdeep = False 820 | pass 821 | except requests.exceptions.Timeout: 822 | p_cli('Timeout (%d seconds)\n' % REQUEST_TIMEOUT_HTTP) 823 | args.ssdeep = False 824 | pass 825 | except Exception: 826 | p_cli('Failed!\n') 827 | args.ssdeep = False 828 | pass 829 | else: 830 | p_cli('%d %s (%.1f Kbytes)\n' % (req.status_code, req.reason, float(len(req.text))/1000)) 831 | if req.status_code / 100 == 2: 832 | #ssdeep_orig = ssdeep.hash(req.text.replace(' ', '').replace('\n', '')) 833 | ssdeep_orig = ssdeep.hash(req.text) 834 | else: 835 | args.ssdeep = False 836 | 837 | p_cli('Processing %d domain variants ' % len(domains)) 838 | 839 | jobs = queue.Queue() 840 | 841 | global threads 842 | threads = [] 843 | 844 | for i in range(len(domains)): 845 | jobs.put(domains[i]) 846 | 847 | for i in range(args.threads): 848 | worker = DomainThread(jobs) 849 | worker.setDaemon(True) 850 | 851 | worker.uri_scheme = url.scheme 852 | worker.uri_path = url.path 853 | worker.uri_query = url.query 854 | 855 | worker.domain_orig = url.domain 856 | 857 | if MODULE_DNSPYTHON: 858 | worker.option_extdns = True 859 | if MODULE_WHOIS and args.whois: 860 | worker.option_whois = True 861 | if MODULE_GEOIP and DB_GEOIP and args.geoip: 862 | worker.option_geoip = True 863 | if args.banners: 864 | worker.option_banners = True 865 | if args.ssdeep and MODULE_REQUESTS and MODULE_SSDEEP and 'ssdeep_orig' in locals(): 866 | worker.option_ssdeep = True 867 | worker.ssdeep_orig = ssdeep_orig 868 | if args.mxcheck: 869 | worker.option_mxcheck = True 870 | 871 | worker.start() 872 | threads.append(worker) 873 | 874 | qperc = 0 875 | while not jobs.empty(): 876 | p_cli('.') 877 | qcurr = 100 * (len(domains) - jobs.qsize()) / len(domains) 878 | if qcurr - 20 >= qperc: 879 | qperc = qcurr 880 | p_cli('%u%%' % qperc) 881 | time.sleep(1) 882 | 883 | for worker in threads: 884 | worker.stop() 885 | 886 | hits_total = sum('dns-ns' in d or 'dns-a' in d for d in domains) 887 | hits_percent = 100 * hits_total / len(domains) 888 | p_cli(' %d hits (%d%%)\n\n' % (hits_total, hits_percent)) 889 | time.sleep(1) 890 | 891 | if args.registered: 892 | domains_registered = [] 893 | for d in domains: 894 | if 'dns-ns' in d or 'dns-a' in d: 895 | domains_registered.append(d) 896 | domains = domains_registered 897 | del domains_registered 898 | 899 | if args.csv: 900 | p_csv(generate_csv(domains)) 901 | elif args.json: 902 | p_json(generate_json(domains)) 903 | else: 904 | p_cli(generate_cli(domains)) 905 | 906 | bye(0) 907 | 908 | 909 | if __name__ == '__main__': 910 | main() 911 | -------------------------------------------------------------------------------- /examples/example_tld.csv: -------------------------------------------------------------------------------- 1 | club 2 | tk 3 | info 4 | xxx 5 | -------------------------------------------------------------------------------- /gfyp_db.py: -------------------------------------------------------------------------------- 1 | """Functions for interacting with the database.""" 2 | import sqlite3 3 | import logging 4 | from common import log #common.py 5 | 6 | DB_FILENAME_DEFAULT = 'db.db' 7 | DB_SCHEMA_VERSION = 1 8 | 9 | class DatabaseConnection(object): 10 | """A connection to the database. 11 | 12 | Usage: 13 | with gfyp_db.DatabaseConnection() as db_con: 14 | db_con.foo() 15 | ... 16 | """ 17 | 18 | def __init__(self, filename=DB_FILENAME_DEFAULT): 19 | self.filename = filename 20 | self.conn = sqlite3.connect(self.filename) 21 | 22 | def __enter__(self): 23 | return self 24 | 25 | def __exit__(self, exec_type, exec_value, exec_traceback): 26 | self.conn.close() 27 | 28 | def _create_table(self, stmt): 29 | try: 30 | self.sql_execute(stmt) 31 | except sqlite3.OperationalError as err: 32 | msg = "Error creating table: %s" % str(err) 33 | print(msg) 34 | log(msg, logging.ERROR) 35 | return True 36 | return False 37 | 38 | def table_init(self): 39 | """Initialize database with required tables. 40 | 41 | Return: bool: Whether any errors were encounterd. 42 | """ 43 | stmt1 = "CREATE TABLE lookupTable(emailAddy text, domainName text UNIQUE)" 44 | stmt2 = "CREATE TABLE foundDomains(domainName text, info text, notes text,dateFound integer)" 45 | stmt3 = "PRAGMA user_version = %s" % str(DB_SCHEMA_VERSION) 46 | return self._create_table(stmt1) or self._create_table(stmt2) or self._create_table(stmt3) 47 | 48 | def add_watch_entry(self, domain_name, alert_email): 49 | """Add a domain to monitor for phishing variants.""" 50 | stmt = "INSERT INTO lookupTable VALUES (?, ?)" 51 | arglist = (alert_email, domain_name) 52 | try: 53 | num_changes = self.sql_execute(stmt, arglist) 54 | msg = "" 55 | log_level = logging.INFO 56 | msg = "Added domain %s for monitoring." % domain_name 57 | except sqlite3.IntegrityError: 58 | msg = "No domains added for monitoring! Already present? - %s" % domain_name 59 | log_level = logging.WARNING 60 | 61 | print(msg) 62 | log(msg, log_level) 63 | 64 | def delete_watch_entry(self, domain_name): 65 | """Delete all entries for monitoring for specified domain.""" 66 | stmt = "DELETE FROM lookupTable WHERE lookupTable.domainName = ?" 67 | arglist = (domain_name,) 68 | num_changes = self.sql_execute(stmt, arglist) 69 | msg = "" 70 | log_level = logging.INFO 71 | if num_changes == 0: 72 | msg = "No domains removed from monitoring! Not already present?" 73 | log_level = logging.WARNING 74 | else: 75 | msg = ("Removed %s from monitoring and removed %d alert%s." % 76 | (domain_name, num_changes, 's' if num_changes != 1 else '')) 77 | print(msg) 78 | log(msg, log_level) 79 | 80 | def get_watch_entries(self): 81 | """Get a list of (email, domain) entries monitored for phishing variants. 82 | 83 | Returns: List of tuples (alert_email, domain) of length >= 0 84 | """ 85 | cur = self.conn.cursor() 86 | domain_entries = cur.execute('SELECT * FROM lookupTable').fetchall() 87 | return domain_entries 88 | 89 | def get_all_found_domains(self): 90 | """Get list of (domain, info) previously found. 91 | 92 | Returns: List of tuples (domain, info) of length >= 0 93 | """ 94 | cur = self.conn.cursor() 95 | info_entries = cur.execute("SELECT * FROM foundDomains") 96 | return info_entries 97 | 98 | def delete_found_domain(self, domain_name): 99 | """Delete all entries from list of domains found that match domain.""" 100 | stmt = "DELETE FROM foundDomains WHERE foundDomains.domainName = ?" 101 | arglist = (domain_name,) 102 | num_changes = self.sql_execute(stmt, arglist) 103 | msg = "" 104 | log_level = logging.INFO 105 | if num_changes == 0: 106 | msg = "No domains removed from list! Not previously found?" 107 | log_level = logging.WARNING 108 | elif num_changes == 1: 109 | msg = ("Removed domain %s from list of previously discovered domains." % 110 | domain_name) 111 | else: 112 | msg = "Made more database changes (%d) than expected!" % num_changes 113 | log_level = logging.ERROR 114 | 115 | print(msg) 116 | log(msg, log_level) 117 | 118 | def get_matching_found_domains(self, domain_name): 119 | """Get list of (domain, info) previously found matching specified domain. 120 | 121 | Returns: List of tuples (domain, info) of length >= 0 122 | """ 123 | cur = self.conn.cursor() 124 | stmt = "SELECT * FROM foundDomains WHERE domainName = ?" 125 | arglist = (domain_name,) 126 | info_entries = cur.execute(stmt, arglist) 127 | return info_entries 128 | 129 | def get_found_domains_last_seven_days(self): 130 | """Get list of found domain entries from last seven days""" 131 | cur = self.conn.cursor() 132 | stmt = "SELECT * from foundDomains where strftime('%s','now') - dateFound < 604800" 133 | info_entries = cur.execute(stmt) 134 | return info_entries 135 | 136 | def add_discovered_domain(self, domain_name, domain_info): 137 | """Add a new domain to list of domains discovered by dnstwist. 138 | Args: 139 | domain_name (str): The name of the domain, e.g. 'example.com'. 140 | domain_info (str): Additiona DNS information from DNS lookup. 141 | """ 142 | cur = self.conn.cursor() 143 | stmt = "INSERT INTO foundDomains VALUES (?, ?, ?, strftime('%s','now'))" 144 | arglist = (domain_name, domain_info,"") 145 | cur.execute(stmt, arglist) 146 | self.conn.commit() 147 | 148 | def is_db_current(self): 149 | """Gets the user_version from the db and checks to see if it matches the current schema number""" 150 | cur = self.conn.cursor() 151 | stmt = "PRAGMA user_version" 152 | info = cur.execute(stmt) 153 | return DB_SCHEMA_VERSION == (info.fetchone())[0] 154 | 155 | def get_version(self): 156 | """Gets the user_version from the db and returns the value""" 157 | cur = self.conn.cursor() 158 | stmt = "PRAGMA user_version" 159 | info = cur.execute(stmt) 160 | return (info.fetchone())[0] 161 | 162 | def add_note(self,domain_name, note): 163 | """Adds a note to a discovered domain""" 164 | cur = self.conn.cursor() 165 | stmt = "UPDATE foundDomains SET notes = ? WHERE domainName = ?" 166 | arglist = (note,domain_name) 167 | cur.execute(stmt,arglist) 168 | self.conn.commit() 169 | 170 | def sql_execute(self, stmt, arglist=None): 171 | """Execute the SQL statement and return number of db changes.""" 172 | cur = self.conn.cursor() 173 | if arglist is not None: 174 | cur.execute(stmt, arglist) 175 | else: 176 | cur.execute(stmt) 177 | self.conn.commit() 178 | num_changes = self.conn.total_changes 179 | return num_changes 180 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | db-sqlite3 2 | dnspython 3 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """A utility for maintaining the GFYP database.""" 3 | 4 | import sys 5 | import csv 6 | import logging 7 | import shutil 8 | from datetime import datetime 9 | 10 | import gfyp_db #gfyp_db.py 11 | from common import pretty_print, log #common.py 12 | 13 | def usage(): 14 | """Print usage info.""" 15 | usage_str = ( 16 | "GFYP Utilities\n" 17 | "usage: python util.py <$BOLD$command$END$> [command parameters space " 18 | "separated]\n" 19 | "Commands:\n" 20 | " $BOLD$usage$END$ - prints this message\n" 21 | " $BOLD$build$END$ - creates a blank database named db.db\n" 22 | " $BOLD$add$END$ (domain name) (email address) [optional: filename of csv file containing additional tlds] - inserts a new " 23 | "domain(s) to monitor into db.db\n" 24 | " $BOLD$removemonitor$END$ (domain name) - removes a domain from " 25 | "being monitored\n" 26 | " $BOLD$removeentry$END$ (domain name) - removes an identified " 27 | "domain from the found entries\n" 28 | " $BOLD$dump$END$ (file name) - Writes the contents of the found " 29 | "domain name table into the file in CSV format\n" 30 | " $BOLD$migrate$END$ - Upgrades the GFYP database to the most " 31 | "recent schema format\n" 32 | " $BOLD$addnote$END$ (domain name) (note in quotes)- Add a note " 33 | "to a discovered domain entry") 34 | pretty_print(usage_str) 35 | sys.exit() 36 | 37 | def dump(): 38 | """Write database to CSV file.""" 39 | filename = sys.argv[2] 40 | with gfyp_db.DatabaseConnection() as db_con: 41 | with open(filename, 'wb') as csvfile: 42 | csvoutput = csv.writer(csvfile) 43 | found_entries = db_con.get_all_found_domains() 44 | entries_iter = found_entries.fetchall() 45 | for entry in entries_iter: 46 | csvoutput.writerow(entry) 47 | print(("Wrote %d entries to '%s'." % (len(entries_iter), filename))) 48 | 49 | def build(): 50 | """Create tables.""" 51 | with gfyp_db.DatabaseConnection() as db_con: 52 | is_err = db_con.table_init() 53 | err_msg = ", but with errors" 54 | msg = "Database is initalized%s." % (err_msg if is_err else '') 55 | print(msg) 56 | log_level = logging.ERROR if is_err else logging.INFO 57 | log(msg, log_level) 58 | 59 | def add_domain(): 60 | """Inserts a new domain to monitor""" 61 | if len(sys.argv) != 4 and len(sys.argv) != 5: 62 | log("Incorrect number of arguments for adding domain: %s" % sys.argv, 63 | logging.ERROR) 64 | usage() 65 | email_notif_addr = sys.argv[3] 66 | domain_list = [] 67 | domain_list.append(sys.argv[2]) 68 | if len(sys.argv) == 5: 69 | #Looks like a TLD file is present, add them as well 70 | baseName = ((sys.argv[2]).rsplit('.'))[0] 71 | with open(sys.argv[4],'rb') as csvfile: 72 | csvreader = csv.reader(csvfile) 73 | for tld in csvreader: 74 | domain_list.append(baseName+"."+tld[0]) 75 | with gfyp_db.DatabaseConnection() as db_con: 76 | for domain in domain_list: 77 | db_con.add_watch_entry(domain, email_notif_addr) 78 | 79 | def remove_domain(): 80 | """Removes a domain from being monitored""" 81 | domain_name = sys.argv[2] 82 | with gfyp_db.DatabaseConnection() as db_con: 83 | db_con.delete_watch_entry(domain_name) 84 | 85 | def remove_entry(): 86 | """Removes an identified domain from the list of found entries""" 87 | domain_name = sys.argv[2] 88 | 89 | with gfyp_db.DatabaseConnection() as db_con: 90 | db_con.delete_found_domain(domain_name) 91 | 92 | def migrate(): 93 | """Update the database to the current schema version""" 94 | is_curr = False 95 | db_ver = 0 96 | 97 | with gfyp_db.DatabaseConnection() as db_con: 98 | is_curr = db_con.is_db_current() 99 | db_ver = db_con.get_version() 100 | 101 | if not is_curr: 102 | dst = "db.bak.%s" % str(datetime.now()) 103 | msg = "Updated database to most recent version - Existing database stored as: %s" % dst 104 | shutil.move("db.db",dst) 105 | build() 106 | if db_ver == 0: 107 | # Case db_ver == 0: Needs to be modified to account for UNIQUE monitor domains 108 | with gfyp_db.DatabaseConnection() as db_con: 109 | with gfyp_db.DatabaseConnection(filename=dst) as old_db_con: 110 | existing_watch_entries = old_db_con.get_watch_entries() 111 | for entry in existing_watch_entries: 112 | db_con.add_watch_entry(entry[1],entry[0]) 113 | 114 | existing_found_entries = old_db_con.get_all_found_domains() 115 | entries_iter = existing_found_entries.fetchall() 116 | for entry in entries_iter: 117 | db_con.add_discovered_domain(entry[0],entry[1]) 118 | else: 119 | msg = "Database is currently at the most recent schema version. No changes necessary." 120 | 121 | print(msg) 122 | log(msg,logging.INFO) 123 | 124 | def addnote(): 125 | """Add a note for a found domain""" 126 | domain_name = sys.argv[2] 127 | note = sys.argv[3] 128 | with gfyp_db.DatabaseConnection() as db_con: 129 | db_con.add_note(domain_name,note) 130 | 131 | FUNCTIONS = {'build': build, 132 | 'usage': usage, 133 | 'add': add_domain, 134 | 'removeentry' : remove_entry, 135 | 'removemonitor' : remove_domain, 136 | 'dump' : dump, 137 | 'migrate' : migrate, 138 | 'addnote' : addnote} 139 | 140 | if __name__ == "__main__": 141 | if len(sys.argv) < 2 or sys.argv[1] not in FUNCTIONS: 142 | log("Invalid arguments: %s" % sys.argv, logging.ERROR) 143 | usage() 144 | else: 145 | # python util.py (utility function argument) (function parameters space separated) 146 | FUNCTIONS[sys.argv[1]]() 147 | --------------------------------------------------------------------------------