├── LICENSE ├── README.md └── abeebus.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Richard Davis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Abeebus 2 | ## This project is no longer actively maintained. Consider using [SharpAbeebus](https://github.com/13Cubed/SharpAbeebus) instead! It's a modern C# .NET 8 rewrite that is much, much faster and includes new features. 3 | **Abeebus is a GeoIP lookup utility utilizing [ipinfo.io](https://ipinfo.io) services. This script is very useful for parsing email headers, log files, and any other arbitrary data for IPv4 addresses, and then obtaining GeoIP data for each of those addresses.** 4 | 5 | **Video Demo:** 6 | 7 | [![Parse Email Headers and Files for GeoIP Location Data](https://img.youtube.com/vi/egv63oso8Qc/0.jpg)](https://www.youtube.com/watch?v=egv63oso8Qc) 8 | 9 | For any given file(s), Abeebus will: 10 | 11 | - Extract valid IPv4 addresses (e.g., "CSI: Cyber" addresses like 951.27.9.840 will not match) 12 | - Ignore duplicates 13 | - Ignore bogon addresses, the loopback network, link local addresses, and RFC 1918 (private) addresses 14 | 15 | For each remaining address, Abeebus will provide the following data as available from ipinfo.io: 16 | 17 | **- IP Address, Hostname, Country, Region, City, Postal Code, Latitude, Longitude, ASN, Count** 18 | 19 | By default, Abeebus will display the data to stdout in the following format: 20 | 21 | ``` 22 | IP Address | Hostname | Country | Region | City | Postal Code | Latitude | Longitude | ASN | Count 23 | 52.73.116.225 | ec2-52-73-116-225.compute-1.amazonaws.com | US | Virginia | Ashburn | 20149 | 39.0437 | -77.4875 | AS14618 Amazon.com Inc. | 5 24 | ``` 25 | - Using the "**-w**" option, you can provide a filename to which Abeebus will output the data in CSV format (useful for working with large data sets in **Timeline Explorer**, **Microsoft Excel**, or **LibreOffice Calc**): 26 | 27 | ``` 28 | IP Address,Hostname,Country,Region,City,Postal Code,Latitude,Longitude,ASN,Count 29 | 52.73.116.225,ec2-52-73-116-225.compute-1.amazonaws.com,US,Virginia,Ashburn,20149,39.0437,-77.4875,AS14618 Amazon.com Inc.,5 30 | ``` 31 | - Using the "**-a**" option, you can provide an **ipinfo.io API** key if you have large datasets to process. 32 | 33 | **Abeebus requires Python 3 (no external libraries needed).** 34 | -------------------------------------------------------------------------------- /abeebus.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # Abeebus 2.0 - A GeoIP lookup utility utilizing ipinfo.io services. 3 | # Copyright 2020 13Cubed. All rights reserved. Written by: Richard Davis 4 | 5 | import sys 6 | import json 7 | import re 8 | import csv 9 | import argparse 10 | 11 | def getData(filenames, apiToken): 12 | """ 13 | The given file is scraped for IPv4 addresses, and the addresses are used 14 | with the GeoIP location provider to obtain location data in JSON format. 15 | The JSON data is then parsed and appended to the 'results' list. 16 | """ 17 | from urllib.request import urlopen 18 | 19 | addresses = [] 20 | filteredAddresses = [] 21 | results = [] 22 | 23 | for filename in filenames: 24 | # Open each specified file for processing 25 | try: 26 | f = open(filename, 'r', encoding='ISO-8859-1') 27 | except IOError: 28 | print('Could not find the specified file:', filename) 29 | sys.exit(1) 30 | 31 | # Parse file for valid IPv4 addresses via RegEx 32 | addresses += re.findall(r'(\b(?:(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])\b)',f.read()) 33 | f.close() 34 | 35 | # Count number of occurrences for each IP address 36 | from collections import Counter 37 | addressCounts = Counter(addresses) 38 | 39 | # Remove duplicates from list 40 | addresses = set(addresses) 41 | 42 | # Filter list to eliminate bogon addresses, the loopback network, link local addresses, and RFC 1918 ranges; add results to new list 43 | for address in addresses: 44 | if not (re.match(r'^0.\d{1,3}.\d{1,3}.\d{1,3}$|^127.\d{1,3}.\d{1,3}.\d{1,3}$|^169.254.\d{1,3}.\d{1,3}$|^10.\d{1,3}.\d{1,3}.\d{1,3}$|^172.(1[6-9]|2[0-9]|3[0-1]).[0-9]{1,3}.[0-9]{1,3}$|^192.168.\d{1,3}.\d{1,3}$', address)): 45 | filteredAddresses.append(address) 46 | 47 | total = len(filteredAddresses) 48 | i = 0 49 | 50 | # Iterate through new list and obtain GeoIP information from ipinfo.io 51 | for filteredAddress in filteredAddresses: 52 | # Show progress bar 53 | progressBar(i, total, status='Getting Results') 54 | i += 1 55 | 56 | formattedData = '' 57 | # Build query URL from found addresses 58 | 59 | # Sort addresses by count (descending) 60 | results = sorted(results, key=lambda x: int(x.split(',')[9]), reverse=True) 61 | 62 | if apiToken: 63 | url = ('https://ipinfo.io/' + filteredAddress + '/json/?token=' + apiToken) 64 | else: 65 | url = ('https://ipinfo.io/' + filteredAddress + '/json') 66 | 67 | try: 68 | rawData = urlopen(url).read() 69 | rawData = json.loads(rawData.decode()) 70 | except: 71 | if apiToken: 72 | print('\n\nIs your API key valid?') 73 | 74 | print('Error parsing address:', filteredAddress) 75 | sys.exit(1) 76 | 77 | keys = ['ip','hostname','country','region','city','postal','loc','org'] 78 | 79 | for key in keys: 80 | try: 81 | # If the key exists but is null, set its value to 'N/A' 82 | if (rawData[key] == ""): 83 | rawData[key] = 'N/A' 84 | 85 | # If the key is loc, add a trailing comma to the end of the value 86 | if (key == 'loc'): 87 | formattedData += rawData[key] + ',' 88 | # If the key is anything else, strip the commas from the value, then add a trailing comma to the end of the value 89 | else: 90 | formattedData += rawData[key].replace(',','') + ',' 91 | 92 | except: 93 | # If the loc key is missing, add 'N/A,N/A' and a trailing comma 94 | if (key == 'loc'): 95 | formattedData += 'N/A,N/A,' 96 | # If any other key is missing, add 'N/A' and a trailing comma 97 | else: 98 | formattedData += 'N/A,' 99 | 100 | # Get number of occurrences for IP address and add to results 101 | addressCount = addressCounts[filteredAddress] 102 | formattedData += str(addressCount) 103 | 104 | # Add final formatted data string to list 105 | results.append(formattedData) 106 | 107 | # Sort results from highest count to lowest 108 | results = sorted(results, key=lambda x: int(x.split(',')[9]), reverse=True) 109 | 110 | # Add column headers 111 | results.insert(0,'IP Address,Hostname,Country,Region,City,Postal Code,Latitude,Longitude,ASN,Count') 112 | 113 | return results 114 | 115 | def printData(results): 116 | rows = list(csv.reader(results)) 117 | widths = [max(len(row[i]) for row in rows) for i in range(len(rows[0]))] 118 | 119 | for row in rows: 120 | print(' | '.join(cell.ljust(width) for cell, width in zip(row, widths))) 121 | 122 | def writeData(results,outfile): 123 | try: 124 | f = open(outfile, 'w') 125 | except IOError: 126 | print('Could not write the specified file:', outfile) 127 | sys.exit(1) 128 | 129 | for result in results: 130 | # While Unicode characters will not be displayed via stdout, they will be written to the file 131 | f.write(result + '\n') 132 | 133 | f.close() 134 | 135 | def progressBar(count, total, status=''): 136 | # From https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 137 | bar_len = 60 138 | filled_len = int(round(bar_len * count / float(total))) 139 | 140 | percents = round(100.0 * count / float(total), 1) 141 | bar = '#' * filled_len + '.' * (bar_len - filled_len) 142 | 143 | sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status)) 144 | sys.stdout.flush() 145 | 146 | def main(): 147 | parser = argparse.ArgumentParser(description='Abeebus 2.0 - A GeoIP lookup utility utilizing ipinfo.io services.', usage='abeebus.py filename(s) [-w outfile] [-a token]', add_help=False) 148 | parser.add_argument('filenames', nargs="*") 149 | parser.add_argument('-w', '--write', help='Write output to CSV file instead of stdout', required=False) 150 | parser.add_argument('-a', '--api-token', help='Specify ipinfo.io API token', required=False) 151 | parser.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help='Show this help message and exit') 152 | args = vars(parser.parse_args()) 153 | 154 | # Make sure at least one filename was provided 155 | if not (args['filenames']): 156 | parser.print_usage() 157 | parser.exit() 158 | 159 | filenames = args['filenames'] 160 | writeToFile = 0 161 | apiToken = "" 162 | 163 | if (args['write']): 164 | writeToFile = 1 165 | outfile = args['write'] 166 | 167 | if (args['api_token']): 168 | apiToken = args['api_token'] 169 | 170 | output = getData(filenames,apiToken) 171 | 172 | if (writeToFile == 1): 173 | writeData(output,outfile) 174 | 175 | else: 176 | printData(output) 177 | 178 | print('\nCopyright (C) 2020 13Cubed. All rights reserved.') 179 | 180 | if __name__ == '__main__': 181 | main() 182 | --------------------------------------------------------------------------------