├── LICENSE ├── README.md ├── opdb-sample.db ├── opdb.ini ├── pyopdb.py └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 openphish 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This module lets you integrate the OpenPhish Phishing Database into existing systems and build custom tools. The API module interacts with a local, periodically updated copy of the OpenPhish Database. It also includes a simple command-line interface that allows you to quickly interact with the database for everyday tasks, such as searching for a URL. 2 | 3 | In order to use the OPDB API you need to have a valid production or free trial license. Details: https://openphish.com/phishing_database.html 4 | 5 | For more information and documentation, visit the project Wiki: https://github.com/openphish/pyopdb/wiki 6 | -------------------------------------------------------------------------------- /opdb-sample.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openphish/pyopdb/16a3e8d08a5448d606d6fd2aa7499f305d6d5b33/opdb-sample.db -------------------------------------------------------------------------------- /opdb.ini: -------------------------------------------------------------------------------- 1 | [license] 2 | access_key = 3 | secret_key = 4 | type = 5 | 6 | [settings] 7 | local_db_path = ./opdb.db 8 | -------------------------------------------------------------------------------- /pyopdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sqlite3 3 | import argparse 4 | import configparser 5 | import os 6 | import urllib.parse 7 | import ntpath 8 | import re 9 | import json 10 | import hashlib 11 | import tempfile 12 | import tarfile 13 | import shutil 14 | import boto3 15 | import requests 16 | import ipaddress 17 | 18 | def api_result(success: bool, data: dict=None, msg: str=None): 19 | return {"success": success, "message": msg, "data": data} 20 | 21 | def prepare_url(url: str): 22 | """Prepares URL for querying the url_norm field""" 23 | if not url: 24 | return None 25 | 26 | scheme, host, path, params, _, _ = urllib.parse.urlparse(url) 27 | # URLs that do not start with http are considered invalid 28 | if ':' in url and not url.lower().startswith('http'): 29 | return None 30 | 31 | # URLs without a scheme or a host are invalid 32 | if not scheme or not host: 33 | return None 34 | 35 | # Encode IDNA hostnames 36 | host = host.encode('idna').decode('ascii') 37 | 38 | path = re.sub("/{2,}", "/", path) 39 | direcotory, page = ntpath.split(path) 40 | page_name, page_ext = os.path.splitext(page) 41 | # replace common index pages with '/' 42 | if page and page_name.lower() in ['index', 'default']: 43 | if direcotory != '/': 44 | path = direcotory + '/' 45 | else: 46 | path = '/' 47 | 48 | if not path: 49 | path = '/' 50 | elif not page_ext and path[-1] != '/': 51 | path = path + '/' 52 | 53 | parts = [scheme, host, path, params, None, None] 54 | return requests.utils.requote_uri(urllib.parse.urlunparse(parts)) 55 | 56 | class OPDB(): 57 | def __init__(self, cfg_file="./opdb.ini"): 58 | self._cfg_file = cfg_file 59 | self._config = dict() 60 | self._localdb_hash = None 61 | self._db_connection = None 62 | self._db_cursor = None 63 | 64 | self._load_config() 65 | self._load_db() 66 | 67 | @classmethod 68 | def _checksum(self, path): 69 | h = hashlib.sha256() 70 | with open(path, "rb") as f: 71 | for block in iter(lambda: f.read(4096), b""): 72 | h.update(block) 73 | return h.hexdigest() 74 | 75 | def _load_db(self): 76 | """Load local database""" 77 | self._db_path = self._config.get('settings', {}).get('local_db_path') 78 | if not self._db_path: 79 | raise ValueError("local_db_path is not set in config file") 80 | 81 | if not os.path.exists(self._db_path): 82 | return 83 | 84 | self._localdb_hash = self._checksum(self._db_path) 85 | self._db_connection = sqlite3.connect(self._db_path) 86 | self._db_cursor = self._db_connection.cursor() 87 | 88 | def _load_config(self): 89 | """Read configuration file""" 90 | if not os.path.isfile(self._cfg_file): 91 | raise FileNotFoundError(self._cfg_file) 92 | elif not os.access(self._cfg_file, os.R_OK): 93 | raise PermissionError("Cannot read {}".format(self._cfg_file)) 94 | elif os.stat(self._cfg_file).st_size == 0: 95 | raise IOError("Config file {} is empty".format(self._cfg_file)) 96 | 97 | config = configparser.ConfigParser() 98 | config.read(self._cfg_file) 99 | sections = config.sections() 100 | for section in sections: 101 | self._config[section] = dict(config.items(section)) 102 | 103 | def update(self): 104 | """Update the local database with a remote copy""" 105 | dblicense = self._config.get('license', {}) 106 | if not dblicense: 107 | return api_result(False, msg="No license section in config file") 108 | 109 | api_key = dblicense.get('access_key') 110 | secret_key = dblicense.get('secret_key') 111 | if not api_key or not secret_key: 112 | return api_result(False, msg="API keys are not set in config file") 113 | 114 | try: 115 | bucket, level = dblicense.get('type', '').split(':') 116 | except ValueError: 117 | return api_result(False, msg="Invalid license type") 118 | 119 | if not bucket or not level: 120 | return api_result(False, msg="Invalid license type") 121 | 122 | remote_db_path = "{}.db.tgz".format(level) 123 | s3_client = boto3.client( 124 | 's3', 125 | aws_access_key_id=api_key, 126 | aws_secret_access_key=secret_key) 127 | 128 | # Get metadata of remote database 129 | try: 130 | response = s3_client.head_object(Bucket=bucket, Key=remote_db_path) 131 | except Exception as e: 132 | return api_result(False, msg=str(e)) 133 | if not response or response.get("ResponseMetadata") is None: 134 | return api_result(False, msg="Invalid HEAD response from server") 135 | 136 | remote_hash = (response.get("ResponseMetadata") 137 | .get("HTTPHeaders", {}) 138 | .get("x-amz-meta-opdb-checksum", None)) 139 | if not remote_hash: 140 | return api_result(False, msg="No checksum in headers from server") 141 | 142 | # Check if local database needs to be updated 143 | if self._localdb_hash == remote_hash: 144 | return api_result(True, msg="DB is up to date") 145 | 146 | # Download the new database to a temporary file 147 | temp = tempfile.NamedTemporaryFile(prefix="opdb_") 148 | s3_client.download_file(bucket, remote_db_path, temp.name) 149 | if not os.path.exists(temp.name): 150 | return api_result(False, msg="Failed to download remote db") 151 | 152 | # Extract the new database and replace the existing one 153 | with tempfile.TemporaryDirectory() as tmpdirname: 154 | try: 155 | tar = tarfile.open(temp.name, 'r:gz') 156 | tar.extractall(tmpdirname) 157 | except Exception as e: 158 | return api_result(False, error=str(e)) 159 | new_db = os.path.join(tmpdirname, level + ".db") 160 | # Verify checksum against remote checksum 161 | if self._checksum(new_db) != remote_hash: 162 | return api_result(False, 163 | msg="Failed to verify remote db integrity") 164 | shutil.move(new_db, self._db_path) 165 | 166 | # Reload the database 167 | self._load_db() 168 | 169 | return api_result(True, msg="DB updated successfully") 170 | 171 | def run_query(self, q: str, *args): 172 | """Run a custom query""" 173 | if not self._db_connection or not self._db_cursor: 174 | return api_result(False, msg="OPDB is not initialized") 175 | 176 | if not q: 177 | return api_result(False, msg="Invalid query") 178 | 179 | try: 180 | self._db_cursor.execute(q, args) 181 | except sqlite3.OperationalError as e: 182 | return api_result(False, msg=str(e)) 183 | 184 | return api_result(True, data=self._db_cursor.fetchall()) 185 | 186 | def check_url(opdb: OPDB, url: str): 187 | """Basic search to check if a URL is phishing""" 188 | if not isinstance(opdb, OPDB) or not url: 189 | raise TypeError("Invalid arguments") 190 | 191 | if not url.lower().startswith("http"): 192 | return api_result(False, msg="URL must start with http/https") 193 | 194 | url = prepare_url(url) 195 | if not url: 196 | return api_result(False, msg="URL is invalid") 197 | 198 | query = ("SELECT isotime, brand FROM phishing_urls WHERE url_norm = ? " 199 | "ORDER BY isotime DESC LIMIT 1") 200 | result = opdb.run_query(query, url) 201 | if not result["success"]: 202 | return result 203 | 204 | if not result["data"]: 205 | return api_result(True, msg="Not a phishing URL") 206 | 207 | url_entry = result["data"][0] 208 | response = {"discovery_date": url_entry[0], "brand": url_entry[1]} 209 | return api_result(True, data=response, msg="Phishing URL") 210 | 211 | def check_ip(opdb: OPDB, ip: str): 212 | """Basic search to check if phishing URLs exist on an IP address""" 213 | if not isinstance(opdb, OPDB) or not ip: 214 | raise TypeError("Invalid arguments") 215 | 216 | try: 217 | ipaddr = ipaddress.ip_address(ip) 218 | if ipaddr.version != 4: 219 | return api_result(False, msg="IP is not IPv4") 220 | except ValueError: 221 | return api_result(False, msg="Invalid IP address") 222 | 223 | result = opdb.run_query( 224 | "SELECT DISTINCT url FROM phishing_urls WHERE ip = ?", ip) 225 | if not result["success"]: 226 | return result 227 | 228 | if not result["data"]: 229 | return api_result(True, msg="No phishing URLs") 230 | 231 | return api_result(True, data={"url": [r[0] for r in result["data"]]}) 232 | 233 | def check_hostname(opdb: OPDB, hostname: str): 234 | """Basic fuzzy match to check for phishing URLs on a hostname""" 235 | if not isinstance(opdb, OPDB) or not hostname: 236 | raise TypeError("Invalid arguments") 237 | 238 | result = opdb.run_query( 239 | "SELECT DISTINCT host FROM phishing_urls WHERE host LIKE ?", 240 | "%{}%".format(hostname)) 241 | if not result["success"]: 242 | return result 243 | 244 | if not result["data"]: 245 | return api_result(True, msg="No URLs on hostname") 246 | 247 | return api_result(True, data={"hosts": [r[0] for r in result["data"]]}) 248 | 249 | if __name__ == "__main__": 250 | cli_handler = argparse.ArgumentParser() 251 | cli_handler.add_argument( 252 | "--checkurl", 253 | help="check if URL is phishing", 254 | type=str, 255 | nargs=1) 256 | cli_handler.add_argument( 257 | "--checkip", 258 | help="check for phishing URLs on IPv4", 259 | type=str, 260 | nargs=1) 261 | cli_handler.add_argument( 262 | "--checkhost", 263 | help="check for phishing URLs on hostname", 264 | type=str, 265 | nargs=1) 266 | cli_handler.add_argument( 267 | "--update", 268 | help="update phishing db", 269 | action="store_true" 270 | ) 271 | cli_handler.add_argument( 272 | "--config", 273 | help="config file path", 274 | type=str 275 | ) 276 | 277 | result = None 278 | args = cli_handler.parse_args() 279 | if args.config: 280 | opdb = OPDB(cfg_file=args.config) 281 | else: 282 | opdb = OPDB() 283 | 284 | if args.checkurl: 285 | result = check_url(opdb, args.checkurl[0]) 286 | elif args.checkip: 287 | result = check_ip(opdb, args.checkip[0]) 288 | elif args.checkhost: 289 | result = check_hostname(opdb, args.checkhost[0]) 290 | elif args.update: 291 | result = opdb.update() 292 | else: 293 | cli_handler.print_help() 294 | 295 | if result: 296 | print(json.dumps(result, indent=2)) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3>=1.16.13 2 | requests>=2.24.0 3 | --------------------------------------------------------------------------------