├── .gitignore
├── requirements.txt
├── checkpwnedemails.conf_example
├── setup_virtualenv.ps1
├── setup_virtualenv.sh
├── LICENSE
├── README.md
└── checkpwnedemails.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.conf
2 | *.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests


--------------------------------------------------------------------------------
/checkpwnedemails.conf_example:
--------------------------------------------------------------------------------
1 | [hibp]
2 | HIBP_APIKEY=api_key_goes_here
3 | HIBP_RATELIMIT=6.0


--------------------------------------------------------------------------------
/setup_virtualenv.ps1:
--------------------------------------------------------------------------------
 1 | $GITEMP=".\.gitignore_temp"
 2 | $PYTHON3=(Get-Command python).Path
 3 | 
 4 | Rename-Item .\.gitignore $GITEMP
 5 | iex "$($PYTHON3) -m virtualenv ."
 6 | Remove-Item .\.gitignore
 7 | Rename-Item $GITEMP .gitignore
 8 | .\Scripts\activate
 9 | pip3 install -r requirements.txt
10 | deactivate


--------------------------------------------------------------------------------
/setup_virtualenv.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | GITEMP=".gitignore_temp"
 4 | PYTHON3=$(which python3)
 5 | 
 6 | mv .gitignore $GITEMP
 7 | virtualenv -p $PYTHON3 .
 8 | mv $GITEMP .gitignore
 9 | source bin/activate
10 | pip3 install -r requirements.txt
11 | deactivate
12 | cp checkpwnedemails.conf_example checkpwnedemails.conf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Alexan Mardigian
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # checkpwnedemails
 2 | 
 3 | This Python3 script will check if a single email address, or a text file listing several email addresses, has been compromised in a data breach (pwned).  This script uses the haveibeenpwned API to compare the email address(es), provided by the user, to the haveibeenpwned database to check if they have been pwned or not.
 4 | 
 5 | ## Prerequisites
 6 | This web interface requires:
 7 | * python3 (version 3.6 or later)
 8 | * python3-pip
 9 | * virtualenv
10 | 
11 | The prerequisites can be installed on a Debian based linux machine, like so:
12 | 
13 | `sudo apt-get install git python3 python3-pip && sudo pip3 install virtualenv`
14 | 
15 | ## Setup
16 | Once those prerequisites have been installed, git clone this repo, cd into it, and set up the virtual environment:
17 | 
18 | `cd /path/to/checkpwnedemails && ./setup_virtualenv.sh`
19 | 
20 | Or, if you're installing it on a Windows machine via Powershell:
21 | 
22 | `cd \path\to\checkpwnedemails && .\setup_virtualenv.ps1`
23 | 
24 | setup_virtualenv.sh (as well as setup_virtualenv.ps1) will set checkpwnedemails as the virtual environment, activate it, and call pip3 to download and install all the python3 dependencies for this script.  These python dependencies are listed in requirements.txt.
25 | 
26 | ## API Key 
27 | [As of the HaveIBeenPwned v3 update](https://www.troyhunt.com/authentication-and-the-have-i-been-pwned-api/), you will need an API key to run checkpwnedemails.py.  You can get one [here](https://haveibeenpwned.com/API/Key).
28 | 
29 | Once you have acquired an API key, copy and paste it into the checkpwnedemails.conf file on the line that says 'HIBP_APIKEY='.
30 | 
31 | ## Rate Limit
32 | [As of this HaveIBeenPwned update](https://www.troyhunt.com/the-have-i-been-pwned-api-now-has-different-rate-limits-and-annual-billing/), the rate limit defined in the checkpwnedemails.conf file will depend on your pricing tier.  For example, if you bought the 50 RPM (requests for minute) tier, set HIBP_RATELIMIT to 1.2.  `60 / 50 = 1.2 seconds`
33 | 
34 | ## Usage
35 | 
36 | To start, activate the python virtualenv.  On linux:
37 | 
38 | `cd /path/to/checkpwnedemails/ && source bin/activate`
39 | 
40 | Or on Windows Powershell:
41 | 
42 | `cd \path\to\checkpwnedemails\; .\Scripts\activate`
43 | 
44 | To check a single email address:
45 | 
46 | `python3 checkpwnedemails.py -s email_address`
47 | 
48 | To check multiple email address:
49 | 
50 | `python3 checkpwnedemails.py -i text_file_listing_email_addresses`
51 | 
52 | By default, the results will be printed to standard output.  However, if the -o option is provided, the output data will be printed to a tab delimited textfiles (one for breaches, one for pastebins) for later use.
53 | 
54 | For more options:
55 | 
56 | `python3 checkpwnedemails.py -h`
57 | 


--------------------------------------------------------------------------------
/checkpwnedemails.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import json
  3 | from argparse import ArgumentParser, Namespace
  4 | from configparser import ConfigParser
  5 | from os.path import exists
  6 | from time import sleep
  7 | from typing import List
  8 | 
  9 | import requests
 10 | 
 11 | __author__ = "Alexan Mardigian"
 12 | __version__ = "3.0"
 13 | 
 14 | EMAILINDEX = 0
 15 | PWNEDINDEX = 1
 16 | DATAINDEX = 2
 17 | 
 18 | BREACHED = "breachedaccount"
 19 | PASTEBIN = "pasteaccount"
 20 | 
 21 | def get_args() -> Namespace:
 22 |     parser = ArgumentParser()
 23 |     parser.add_argument('-b', action="store_true", dest='only_breaches',
 24 |                         help='Return results for breaches only.')
 25 |     parser.add_argument('-c', default='checkpwnedemails.conf',
 26 | 	                    dest='config_path',
 27 | 						help='Path to configuration file.')
 28 |     parser.add_argument('-i', dest='input_path',
 29 |                         help='Path to text file that lists email addresses.')
 30 |     parser.add_argument('-n', action="store_true", dest='names_only',
 31 |                         help='Return the name of the breach(es) only.')
 32 |     parser.add_argument('-o', dest='output_path',
 33 |                         help='Path to output (tab deliminated) text file.')
 34 |     parser.add_argument('-p', action="store_true", dest='only_pwned',
 35 |                         help='Print only the pwned email addresses.')
 36 |     parser.add_argument('-s', dest="single_email",
 37 |                         help='Send query for just one email address.')
 38 |     parser.add_argument('-t', action="store_true", dest='only_pastebins',
 39 |                         help='Return results for pastebins only.')
 40 | 
 41 |     # If no arguments were provided, then print help and exit.
 42 |     if len(sys.argv) == 1:
 43 |         parser.print_help()
 44 |         sys.exit(1)
 45 | 
 46 |     return parser.parse_args()
 47 | 
 48 | def read_config_file(filename:str) -> ConfigParser:
 49 |     if not exists(filename):
 50 |         raise FileNotFoundError(f"Config file {filename} not found.")
 51 | 
 52 |     config = ConfigParser()
 53 |     config.read(filename)
 54 |         
 55 |     if 'hibp' not in config.keys():
 56 |         raise KeyError(f"The [hibp] is missing in {filename}.")
 57 |             
 58 |     return config['hibp']
 59 | 
 60 | def clean_list(strings: List[str]) -> List[str]:
 61 |     """
 62 |     Returns a list of strings stripped of trailing '\n' character.
 63 |     """
 64 |     return [str(x).strip() for x in strings]
 65 | 
 66 | 
 67 | def printHTTPErrorOutput(http_error_code: int,
 68 |                          hibp_api_key: str, email: str = None) -> None:
 69 |     """
 70 |     This function will print the appropriate output string based on the
 71 |     HTTP error code what was passed in. If an invalid HIBP API key was used
 72 |     (error code 401), then checkpwnedemails.py will stop running.
 73 |     """
 74 |     ERROR_CODE_OUTPUT = {
 75 |         400: f"HTTP Error 400. {email} does not appear to be a valid email address.",
 76 |         401: f"HTTP Error 401.  Unauthorised - the API key provided {hibp_api_key} was not valid.",
 77 |         403: "HTTP Error 403.  Forbidden - no user agent has been specified in the request.",
 78 |         429: "HTTP Error 429.  Too many requests; the rate limit has been exceeded.",
 79 |         503: "HTTP Error 503.  Service unavailable."
 80 |     }
 81 |     default_output = f"HTTP Error {http_error_code}"
 82 |     print(ERROR_CODE_OUTPUT.get(http_error_code, default_output))
 83 | 
 84 |     if http_error_code == 401:
 85 |         sys.exit(1)
 86 | 
 87 | 
 88 | def get_results(emails: List[str], service: str,
 89 |                 opts: Namespace, config: ConfigParser) -> List:
 90 |     """
 91 |     Returns results from the HIBP API, if any.
 92 |     """
 93 |     hibp_api_key = config['hibp_apikey']
 94 |     URL_BASE = "https://haveibeenpwned.com/api/v3/"
 95 |     HEADERS = {
 96 |         "User-Agent": "checkpwnedemails",
 97 |         "hibp-api-key": hibp_api_key,
 98 |     }
 99 |     results = []  # list of tuples (email address, been pwned?, json data)
100 | 
101 |     for email in emails:
102 |         email = email.strip()
103 |         data = []
104 |         names_only = str(opts.names_only).lower()
105 | 
106 |         try:
107 |             url = f'{URL_BASE}{service}/{email}?truncateResponse={names_only}'
108 |             response = requests.get(headers=HEADERS, url=url)
109 |             is_pwned = True
110 | 
111 |             # Before parsing the response (for JSON), check if any content was returned.
112 |             # Otherwise, a json.decoder.JSONDecodeError will be thrown because we were trying
113 |             # to parse JSON from an empty response.
114 |             if response.content:
115 |                 data = response.json()
116 |             else:
117 |                 # No results came back for this email.
118 |                 # According to HIBP, this email was not pwned.
119 |                 data = None
120 |                 is_pwned = False
121 | 
122 |             results.append((email, is_pwned, data))
123 |         except requests.exceptions.HTTPError as e:
124 |             if e.code == 404 and not opts.only_pwned:
125 |                 # No results came back for this email.
126 |                 # According to HIBP, this email was not pwned.
127 |                 results.append((email, False, data))
128 |             elif e.code != 404:
129 |                 printHTTPErrorOutput(e.code, hibp_api_key, email)
130 | 
131 |         sleep(float(config['hibp_ratelimit']))  # For rate limiting.
132 | 
133 |     return results
134 | 
135 | 
136 | def print_results(results: List, not_pwned_msg: str) -> None:
137 |     for result in results:
138 |         data = result[DATAINDEX]
139 |         email = result[EMAILINDEX]
140 |         if not result[PWNEDINDEX]:
141 |             print(not_pwned_msg % (email))
142 |         else:
143 |             print(f"\n{email} pwned!\n==========")
144 |             print(json.dumps(data, indent=4))
145 | 
146 | 
147 | def clean_and_encode(dlist: List) -> List[str]:
148 |     """
149 |     This function will convert every item, in dlist, into a string
150 |     and encode any unicode strings into an 8-bit string.
151 |     """
152 |     cleaned_list = []
153 |     for d in dlist:
154 |         try:
155 |             cleaned_list.append(str(d))
156 |         except UnicodeEncodeError:
157 |             cleaned_list.append(str(d.encode('utf-8')))  # Clean the data.
158 | 
159 |     return cleaned_list
160 | 
161 | 
162 | def tab_delimited_string(data: tuple) -> str:
163 |     begining_sub_str = f'{data[EMAILINDEX]}\t{str(data[PWNEDINDEX])}'
164 |     output = []
165 | 
166 |     if data[DATAINDEX]:
167 |         for bp in data[DATAINDEX]:  # bp stands for breaches/pastebins
168 |             try:
169 |                 s = '\t'.join(clean_and_encode(bp.values()))
170 |                 row = f'{begining_sub_str}\t{s}'
171 |             except AttributeError:
172 |                 statusCode = data[DATAINDEX].get('statusCode')
173 |                 message = data[DATAINDEX].get('message')
174 |                 row = f'{begining_sub_str}\t{statusCode}\t{message}'
175 | 
176 |             output.append(row)
177 |     else:
178 |         output.append(begining_sub_str)
179 | 
180 |     return '\n'.join(output)
181 | 
182 | 
183 | def write_results_to_file(results: tuple, opts: Namespace) -> None:
184 |     BREACHESTXT = "_breaches.txt"
185 |     PASTESTXT = "_pastes.txt"
186 |     BREACH_HEADER = (
187 |         "Email Address", "Is Pwned", "Name", "Title", "Domain", "Breach Date",
188 |         "Added Date", "Modified Date", "Pwn Count", "Description", "Logo Path",
189 |         "Data Classes", "Is Verified", "Is Fabricated", "Is Sensitive",
190 |         "Is Retired", "Is SpamList"
191 |     )
192 |     PASTES_HEADER = ("Email Address", "Is Pwned", "ID",
193 |                      "Source", "Title", "Date", "Email Count"
194 |                      )
195 |     files = []
196 |     file_headers = {
197 |         BREACHESTXT: "\t".join(BREACH_HEADER),
198 |         PASTESTXT:   "\t".join(PASTES_HEADER)
199 |     }
200 | 
201 |     if opts.only_breaches:
202 |         files.append(BREACHESTXT)
203 |     elif opts.only_pastebins:
204 |         files.append(PASTESTXT)
205 |     else:
206 |         files.append(BREACHESTXT)
207 |         files.append(PASTESTXT)
208 | 
209 |     out_path = opts.output_path
210 |     filename = out_path
211 |     if out_path.rfind('.') > -1:
212 |         filename = out_path[: out_path.rfind('.')]
213 | 
214 |     for result, f in zip(results, files):
215 |         with open(filename + f, 'w', encoding='utf-8') as outfile:
216 |             outfile.write(file_headers[f] + '\n')
217 | 
218 |             for r in result:
219 |                 outfile.write(tab_delimited_string(r) + '\n')
220 | 
221 | 
222 | def main() -> None:
223 |     opts = get_args()
224 |     try:
225 |         config = read_config_file(opts.config_path)
226 |     except FileNotFoundError as e:
227 |         print(e)
228 |         sys.exit(1)
229 |     except KeyError as e:
230 |         print(e)
231 |         sys.exit(1)
232 | 
233 |     emails = None
234 |     if opts.single_email:
235 |         emails = tuple([opts.single_email])
236 |     elif opts.input_path:
237 |         with open(opts.input_path, 'r') as emails_file:
238 |             emails = tuple(clean_list(emails_file.readlines()))
239 |     else:
240 |         print("\nNo email addresses were provided.")
241 |         print("Please provide a single email address (using -s) or a list of email addresses (using -i).\n")
242 |         sys.exit(1)
243 | 
244 |     breaches = []
245 |     pastebins = []
246 | 
247 |     if opts.only_breaches:
248 |         breaches = get_results(emails, BREACHED, opts, config)
249 |     elif opts.only_pastebins:
250 |         pastebins = get_results(emails, PASTEBIN, opts, config)
251 |     else:
252 |         breaches = get_results(emails, BREACHED, opts, config)
253 |         pastebins = get_results(emails, PASTEBIN, opts, config)
254 | 
255 |     if not opts.output_path:
256 |         print_results(breaches, "Email address %s not pwned.  Yay!")
257 |         print_results(
258 |             pastebins, "Email address %s was not found in any pastebins.  Yay!"
259 |         )
260 |     else:
261 |         results = []
262 |         if breaches:
263 |             results.append(breaches)
264 |         if pastebins:
265 |             results.append(pastebins)
266 |         write_results_to_file(tuple(results), opts)
267 | 
268 | if __name__ == '__main__':
269 |     main()
270 | 


--------------------------------------------------------------------------------