├── .gitignore ├── LICENSE ├── README.md ├── devprof.py ├── devprof.txt ├── sample_ports_for_profile.json └── sample_profiles_for_ip.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dprof 2 | Device profile: Define acceptable amounts of traffic for your devices and see a report of outliers. 3 | -------------------------------------------------------------------------------- /devprof.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """This program imports bro/zeek logs that watch a given network and reports on systems whose volume of traffic (in payload bytes) exceeds a profile assigned to that system.""" 3 | 4 | 5 | import os #For directory listing and others 6 | import sys 7 | import gzip #To read gzip compressed files 8 | import re #For regular expression parsing 9 | import json #To load user configuration 10 | import math #For float comparison 11 | try: 12 | import ipaddress #IP address/network objects and functions 13 | except ImportError: 14 | print("Missing ipaddress module; perhaps 'sudo port install py-ipaddress', 'sudo yum install python-ipaddress' or 'sudo -H pip install ipaddress' ? Exiting.") 15 | raise 16 | 17 | 18 | #======== Functions ======== 19 | def fail(fail_message): 20 | """Print a debug string and exit.""" 21 | 22 | sys.stderr.write(str(fail_message) + ', exiting.\n') 23 | sys.stderr.flush() 24 | sys.exit(1) 25 | 26 | 27 | def Debug(DebugStr): 28 | """Prints a note to stderr""" 29 | if Devel != False: 30 | sys.stderr.write(DebugStr + '\n') 31 | sys.stderr.flush() 32 | 33 | 34 | def write_object(filename, generic_object): 35 | """Write out an object to a file.""" 36 | 37 | try: 38 | with open(filename, "wb") as write_h: 39 | write_h.write(generic_object.encode('utf-8')) 40 | except: 41 | sys.stderr.write("Problem writing " + filename + ", skipping.") 42 | raise 43 | 44 | return 45 | 46 | 47 | def load_json(json_filename, default_content): 48 | 49 | json_to_return = None 50 | 51 | if os.path.exists(json_filename): 52 | with open(json_filename) as json_h: 53 | try: 54 | json_to_return = json.loads(json_h.read()) 55 | except json.decoder.JSONDecodeError: 56 | sys.stderr.write("Unable to load " + json_filename + " . Please check that it contains valid json.\n") 57 | sys.stderr.flush() 58 | raise 59 | else: 60 | json_to_return = default_content 61 | write_object(json_filename, json.dumps(json_to_return)) 62 | sys.stderr.write("No configuration file " + json_filename + ' . Using empty configuration.\n') 63 | sys.stderr.flush() 64 | 65 | return json_to_return 66 | 67 | 68 | def LoadMacData(MacFile): 69 | """Load Ethernet Mac address prefixes from standard locations (from ettercap, nmap, wireshark, and/or arp-scan).""" 70 | global EtherManuf 71 | 72 | More = '' 73 | if len(EtherManuf) > 0: 74 | More = ' more' 75 | 76 | LoadCount = 0 77 | 78 | if os.path.isfile(MacFile): 79 | try: 80 | MacHandle = open(MacFile, 'r') 81 | 82 | for line in MacHandle: 83 | if (len(line) >= 8) and (line[2] == ':') and (line[5] == ':'): 84 | #uppercase incoming strings just in case one of the files uses lowercase 85 | MacHeader = line[:8].upper() 86 | Manuf = line[8:].strip() 87 | if not MacHeader in EtherManuf: 88 | EtherManuf[MacHeader] = Manuf 89 | LoadCount += 1 90 | elif (len(line) >= 7) and (re.search('^[0-9A-F]{6}[ \t]', line) is not None): 91 | MacHeader = str.upper(line[0:2] + ':' + line[2:4] + ':' + line[4:6]) 92 | Manuf = line[7:].strip() 93 | if MacHeader not in EtherManuf: 94 | EtherManuf[MacHeader] = Manuf 95 | LoadCount += 1 96 | 97 | MacHandle.close() 98 | if '00:00:00' in EtherManuf: 99 | del EtherManuf['00:00:00'] #Not really Xerox 100 | LoadCount -= 1 101 | Debug(str(LoadCount) + More + " mac prefixes loaded from " + str(MacFile)) 102 | return True 103 | except: 104 | Debug("Unable to load " + str(MacFile)) 105 | return False 106 | else: 107 | Debug("Unable to load " + str(MacFile)) 108 | return False 109 | 110 | 111 | def tree_file_listing(top_level_dir): 112 | """Returns a set of files in a directory tree (recursively).""" 113 | 114 | ret_file_list = set() 115 | 116 | if os.path.isdir(top_level_dir): 117 | for top_level, dirs, files in os.walk(top_level_dir): 118 | for one_dir in dirs: 119 | for new_file in tree_file_listing(os.path.join(top_level, one_dir)): 120 | ret_file_list.add(new_file) 121 | for one_file in files: 122 | ret_file_list.add(os.path.join(top_level, one_file)) 123 | 124 | return ret_file_list 125 | 126 | 127 | 128 | def load_file_into_originator_stats(incoming_log): 129 | """Loads in a single file of Bro/Zeek logs into originator_stats and ips_of_mac.""" 130 | 131 | global originator_stats 132 | global ips_of_mac 133 | global files_successfully_loaded 134 | 135 | #We unconditionally load these for each new file, as the format may change between files. 136 | field_pos = {} #Key: name of field. Value: position where that field can be found. Recommend using field_pos.get('fieldname', None) 137 | field_name = {} #Key: position in the line. Value: name of the field. 138 | 139 | load_completed = True 140 | 141 | with gzip.open(incoming_log, 'rt') as bro_h: 142 | for raw_line in bro_h: 143 | line = raw_line.strip() 144 | if line.startswith('#fields'): 145 | all_names = line.split('\t')[1:] #[1:] drops the "#fields" label at the far left. 146 | for x in list(range(0, len(all_names))): #Load in the file's header so we know which column holds which field. 147 | field_pos[all_names[x]] = x 148 | field_name[x] = all_names[x] 149 | #Sample field_name loaded: 150 | #{0: 'ts', 1: 'uid', 2: 'id.orig_h', 3: 'id.orig_p', 4: 'id.resp_h', 5: 'id.resp_p', 6: 'proto', 7: 'service', 8: 'duration', 9: 'orig_bytes', 10: 'resp_bytes', 11: 'conn_state', 12: 'local_orig', 13: 'local_resp', 14: 'missed_bytes', 15: 'history', 16: 'orig_pkts', 17: 'orig_ip_bytes', 18: 'resp_pkts', 19: 'resp_ip_bytes', 20: 'tunnel_parents'} 151 | #Sample field_pos loaded 152 | #{'duration': 8, 'id.resp_h': 4, 'orig_bytes': 9, 'orig_pkts': 16, 'local_orig': 12, 'tunnel_parents': 20, 'history': 15, 'service': 7, 'proto': 6, 'id.orig_p': 3, 'resp_ip_bytes': 19, 'missed_bytes': 14, 'resp_bytes': 10, 'conn_state': 11, 'id.resp_p': 5, 'orig_ip_bytes': 17, 'id.orig_h': 2, 'ts': 0, 'local_resp': 13, 'uid': 1, 'resp_pkts': 18} 153 | if 'id.orig_h' not in field_pos or 'id.orig_p' not in field_pos or 'id.resp_p' not in field_pos or 'proto' not in field_pos or 'service' not in field_pos or 'orig_bytes' not in field_pos or 'resp_bytes' not in field_pos: #We require at least these fields, exit entirely if not present 154 | Debug(str(incoming_log) + " is missing crucial field.") 155 | load_completed = False 156 | break 157 | elif not line.startswith('#'): 158 | #FIXME - manually set field positions if not set by now 159 | fields = line.split('\t') 160 | 161 | src_ip = fields[field_pos['id.orig_h']] 162 | 163 | if fields[field_pos['proto']] == "icmp": #With icmp, the "type" is stored in id.orig_p. 164 | conn_key = fields[field_pos['id.orig_p']] + ':' + fields[field_pos['proto']] + ':' + fields[field_pos['service']] 165 | else: 166 | conn_key = fields[field_pos['id.resp_p']] + ':' + fields[field_pos['proto']] + ':' + fields[field_pos['service']] 167 | #At this point we're left with a conn_key like "1484:tcp:ftp-data" 168 | #If the profile for this IP lists this specific string, great. If not, let's strip off the port and see if 169 | #":tcp:ftp-data" is in the profile for this IP, and use that if so. 170 | stripped_conn_key = ':' + fields[field_pos['proto']] + ':' + fields[field_pos['service']] 171 | profile_for_src_ip = ports_for_identifier(src_ip) 172 | if conn_key not in profile_for_src_ip and stripped_conn_key in profile_for_src_ip: 173 | #OK, we do have the shortened key, so we use that from here on: 174 | conn_key = stripped_conn_key 175 | 176 | if src_ip not in originator_stats: 177 | originator_stats[src_ip] = {} 178 | if conn_key not in originator_stats[src_ip]: 179 | originator_stats[src_ip][conn_key] = 0 180 | if fields[field_pos['orig_bytes']] != '-': 181 | originator_stats[src_ip][conn_key] += int(fields[field_pos['orig_bytes']]) 182 | if fields[field_pos['resp_bytes']] != '-': 183 | originator_stats[src_ip][conn_key] += int(fields[field_pos['resp_bytes']]) 184 | 185 | if 'orig_l2_addr' in field_pos: 186 | src_mac = fields[field_pos['orig_l2_addr']].upper() #This is the mac address from which the packet came, which might be the mac address of the source IP or the mac address of a router in between. 187 | if not src_mac.startswith('33:33'): #ipv6 multicast 188 | if src_mac not in ips_of_mac: 189 | ips_of_mac[src_mac] = set() 190 | ips_of_mac[src_mac].add(str(fields[field_pos['id.orig_h']])) 191 | 192 | if 'resp_l2_addr' in field_pos and 'id.resp_h' in field_pos: 193 | dst_mac = fields[field_pos['resp_l2_addr']].upper() #This is the mac address to which the packet is going, which might be the mac address of the dest IP or the mac address of a router in between. 194 | if not dst_mac.startswith('33:33'): 195 | if dst_mac not in ips_of_mac: 196 | ips_of_mac[dst_mac] = set() 197 | ips_of_mac[dst_mac].add(str(fields[field_pos['id.resp_h']])) 198 | 199 | if load_completed: 200 | files_successfully_loaded = files_successfully_loaded + 1 201 | 202 | 203 | def merge_two_ranges(first_range, second_range): 204 | """When a port is specified twice in two different profiles (such as "22:tcp:ssh": [None, 1000000]" and "22:tcp:ssh": [1000, 100000000]"), we need to pick the more restrictive values in both ranges.""" 205 | #For reference, first_range, second_range, and the returned "[lower_limit, upper_limit]" are both 2 element lists whose values are a number or None (which is "null" in a json file) 206 | 207 | if first_range[0] is None: 208 | lower_limit = second_range[0] 209 | elif second_range[0] is None: 210 | lower_limit = first_range[0] 211 | else: 212 | lower_limit = max(first_range[0], second_range[0]) 213 | 214 | if first_range[1] is None: 215 | upper_limit = second_range[1] 216 | elif second_range[1] is None: 217 | upper_limit = first_range[1] 218 | else: 219 | upper_limit = min(first_range[1], second_range[1]) 220 | 221 | #Debug("Merged " + str(first_range) + " and " + str(second_range) + " into [" + str(lower_limit) + ", " + str(upper_limit) + "]") 222 | return [lower_limit, upper_limit] 223 | 224 | 225 | def normalize_bytes(byte_limit): 226 | """Turn nnnMB into nnn*1048576, etc. Works for KB, MB, GB, TB, PB.""" 227 | #We use KB/MB... as 1000^N, and KiB/MiB... as 1024^N . See https://en.wikipedia.org/wiki/Binary_prefix 228 | 229 | retval = None 230 | 231 | if byte_limit is None: 232 | retval = None 233 | elif isinstance(byte_limit, str): 234 | if byte_limit.endswith("KB"): 235 | retval = int(byte_limit[:-2]) * 1000 236 | elif byte_limit.endswith("MB"): 237 | retval = int(byte_limit[:-2]) * 1000000 238 | elif byte_limit.endswith("GB"): 239 | retval = int(byte_limit[:-2]) * 1000000000 240 | elif byte_limit.endswith("TB"): 241 | retval = int(byte_limit[:-2]) * 1000000000000 242 | elif byte_limit.endswith("PB"): 243 | retval = int(byte_limit[:-2]) * 1000000000000000 244 | elif byte_limit.endswith("KiB"): 245 | retval = int(byte_limit[:-3]) * 1024 246 | elif byte_limit.endswith("MiB"): 247 | retval = int(byte_limit[:-3]) * 1048576 248 | elif byte_limit.endswith("GiB"): 249 | retval = int(byte_limit[:-3]) * 1073741824 250 | elif byte_limit.endswith("TiB"): 251 | retval = int(byte_limit[:-3]) * 1099511627776 252 | elif byte_limit.endswith("PiB"): 253 | retval = int(byte_limit[:-3]) * 1125899906842624 254 | else: 255 | retval = int(byte_limit) 256 | elif isinstance(byte_limit, (int, float)): 257 | retval = byte_limit 258 | else: 259 | fail("Unrecognized value: " + str(byte_limit)) 260 | 261 | return retval 262 | 263 | 264 | def create_ports_for_ip(user_profiles_for_ip, user_ports_for_profile): 265 | """Generate and return ports_for_ip based on profiles_for_ip and user_ports_for_profile.""" 266 | 267 | created_ports_for_ip = {} 268 | created_networks_for_ip = {} 269 | 270 | if 'system_profile_pairs' not in user_profiles_for_ip: 271 | fail("Profiles_for_ip does not start with system_profile_pairs") 272 | 273 | for one_pair in user_profiles_for_ip['system_profile_pairs']: 274 | #one_pair looks like: { 275 | # "systems": ["10.0.0.41", "10.10.10.10"], 276 | # "profiles": ["mac", "traceroute", "general_traffic", "local_lan"] 277 | # }, 278 | 279 | system_list = one_pair["systems"] 280 | if not isinstance(system_list, list): #isinstance(system_list, (str, unicode)) not needed 281 | fail("One of the profiles_for_ip ip lists is not a list") 282 | profile_list = one_pair["profiles"] 283 | for one_profile in profile_list: 284 | if one_profile not in user_ports_for_profile: 285 | fail("No profile named " + str(one_profile) + " in ports_for_profile") 286 | 287 | for one_ip in system_list: 288 | if '/' in one_ip: 289 | #This is a subnet, so we make an ipaddress object for it 290 | one_ip = ipaddress.ip_network(one_ip, strict=False) 291 | 292 | if one_ip not in created_networks_for_ip: 293 | created_networks_for_ip[one_ip] = {} 294 | for one_profile in profile_list: 295 | for one_port in user_ports_for_profile[one_profile].keys(): 296 | normalized_min = normalize_bytes(user_ports_for_profile[one_profile][one_port][0]) 297 | normalized_max = normalize_bytes(user_ports_for_profile[one_profile][one_port][1]) 298 | created_networks_for_ip[one_ip][one_port] = merge_two_ranges([normalized_min, normalized_max], created_networks_for_ip[one_ip].get(one_port, [None, None])) #If we already had min/max for this port, find the most restrictive intersection with old and new min/max, otherwise just use the new min/max 299 | else: 300 | #This is an IP address, mac address, or hostname 301 | if one_ip not in created_ports_for_ip: 302 | created_ports_for_ip[one_ip] = {} 303 | for one_profile in profile_list: 304 | for one_port in user_ports_for_profile[one_profile].keys(): 305 | normalized_min = normalize_bytes(user_ports_for_profile[one_profile][one_port][0]) 306 | normalized_max = normalize_bytes(user_ports_for_profile[one_profile][one_port][1]) 307 | created_ports_for_ip[one_ip][one_port] = merge_two_ranges([normalized_min, normalized_max], created_ports_for_ip[one_ip].get(one_port, [None, None])) #If we already had min/max for this port, find the most restrictive intersection with old and new min/max, otherwise just use the new min/max 308 | 309 | return created_ports_for_ip, created_networks_for_ip 310 | 311 | 312 | 313 | def ports_for_identifier(one_id): 314 | """Returns the list of port specifications for a given identifier (ipv4 address, ipv6 address, (or, future enhancement, mac address)).""" 315 | 316 | #Remembers what profile we've found for a given IP address so we only have to look it up once. 317 | if "pfi_cache" not in ports_for_identifier.__dict__: 318 | ports_for_identifier.pfi_cache = {} 319 | 320 | if "ports_for_ip" not in ports_for_identifier.__dict__: 321 | ports_for_identifier.ports_for_ip = {} #Dict: Key: IP, value: dictionary with portspec as key, 2 element list ([min, max]) as value 322 | ports_for_identifier.networks_for_ip = {} #Dict: Key: ipaddress network object, value: dictionary with portspec as key, 2 element list ([min, max]) as value 323 | ports_for_identifier.ports_for_ip, ports_for_identifier.networks_for_ip = create_ports_for_ip(profiles_for_ip, ports_for_profile) #This is populated from profiles_for_ip and named_profiles. Key: IP, value: dictionary with portspec as key, 2 element list ([min, max]) as value 324 | 325 | ip_profile = {} 326 | 327 | if one_id in ports_for_identifier.pfi_cache: #If in cache already, use that. 328 | ip_profile = ports_for_identifier.pfi_cache[one_id] 329 | elif one_id in ports_for_identifier.ports_for_ip: #If the id exactly matches an IP address, use that. 330 | ip_profile = ports_for_identifier.ports_for_ip[one_id] 331 | ports_for_identifier.pfi_cache[one_id] = ip_profile 332 | else: 333 | old_prefix_len = None 334 | ip_obj = ipaddress.ip_address(one_id) #Make an ip address object to check against supplied networks 335 | for one_net in ports_for_identifier.networks_for_ip: 336 | if ip_obj in one_net: #If we match any of the supplied IP networks, use the list for it. 337 | if ip_profile: #If we match more than one... 338 | if one_net.prefixlen > old_prefix_len: #And this new one is a smaller subnet (greater /N)... 339 | ip_profile = ports_for_identifier.networks_for_ip[one_net] #Use it. 340 | ports_for_identifier.pfi_cache[one_id] = ip_profile 341 | old_prefix_len = one_net.prefixlen 342 | else: 343 | #We have _not_ already matched a previous network, so just use this one. 344 | ip_profile = ports_for_identifier.networks_for_ip[one_net] 345 | ports_for_identifier.pfi_cache[one_id] = ip_profile 346 | old_prefix_len = one_net.prefixlen 347 | 348 | return ip_profile 349 | 350 | 351 | 352 | def manuf_label(mac_addr, ManufTable): 353 | """Returns the correct Manufacturer name for a given mac address.""" 354 | 355 | if mac_addr[:8] == '-': 356 | ret_manuf_label = "" 357 | elif mac_addr[:14].startswith(('00:00:5E:00:01')): #https://www.iana.org/assignments/ethernet-numbers/ethernet-numbers.xhtml#ethernet-numbers-1 358 | ret_manuf_label = "VRRP Router" 359 | elif mac_addr[:14].startswith(('00:00:5E:00:02')): 360 | ret_manuf_label = "IPv6 VRRP Router" 361 | elif mac_addr[:8].startswith(('01:00:5E', '33:33:', 'FF:FF:FF')): 362 | ret_manuf_label = "Ethernet broadcast/multicast" 363 | elif mac_addr[:8] in ManufTable: 364 | ret_manuf_label = ManufTable[mac_addr[:8]] 365 | else: 366 | ret_manuf_label = 'Unrecognized mac prefix' 367 | 368 | return ret_manuf_label 369 | 370 | 371 | def output_results(output_lists, ips_of_mac_dict, user_args, EtherManufDict): 372 | """Print the output tables.""" 373 | 374 | if user_args['web'] and user_args['header']: 375 | print("\n\ndevice_profile stats\n\n") 376 | 377 | if output_lists: 378 | if user_args['web']: 379 | print("") 380 | print("") 381 | current_header = '' 382 | for value_list in sorted(output_lists): 383 | if value_list[0] != current_header: 384 | if user_args['web']: 385 | print('") 386 | else: 387 | print("======== " + value_list[0]) 388 | current_header = value_list[0] 389 | if user_args['web']: 390 | print("") 391 | else: 392 | print('{1:<40s} {2:>20s} {3:>18,}'.format(*value_list)) 393 | if user_args['web']: 394 | print("
IPProtocolBytes
' + str(value_list[0]) + "
" + str(value_list[1]) + "" + str(value_list[2]) + "" + str("{:,}".format(value_list[3])) + "
") 395 | 396 | if ips_of_mac_dict: 397 | if user_args['web']: 398 | print("
\n") 399 | print('') 400 | else: 401 | print('') 402 | print("======== Mac addresses") 403 | 404 | 405 | for one_mac in sorted(ips_of_mac_dict.keys()): 406 | my_manuf = manuf_label(one_mac, EtherManufDict) 407 | 408 | if len(ips_of_mac_dict[one_mac]) < 20: 409 | ip_list = ', '.join(sorted(ips_of_mac_dict[one_mac])) 410 | else: 411 | ip_list = str(len(ips_of_mac_dict[one_mac])) + " ips" 412 | 413 | if user_args['web']: 414 | print("") 415 | else: 416 | print('{0:<18s} {1:<35s} {2:") 421 | 422 | if user_args['web'] and user_args['header']: 423 | print("\n") 424 | 425 | 426 | 427 | #======== Global variables ======== 428 | devprof_version = '2.2' 429 | 430 | EtherManuf = {} #String dictionary: for a given key of the first three uppercase octets of a mac address ("00:01:0F"), who made this card? 431 | originator_stats = {} #Dict; key is orig_ip/port/ip_proto/app_proto , value is total payload bytes (orig+responder) 432 | ips_of_mac = {} #Dict; key is the mac address, value is a list of IPs associated with that mac. one IP means that's almost certainly the mac of that system, more than one means its the mac of a router leading to more than one system. 433 | files_successfully_loaded = 0 434 | 435 | Devel = False 436 | default_config_dir = os.environ["HOME"] + '/.config/devprof/' 437 | pfi_filename = 'profiles_for_ip.json' 438 | pfp_filename = 'ports_for_profile.json' 439 | 440 | 441 | 442 | 443 | #======== Code ======== 444 | if __name__ == "__main__": 445 | 446 | #==== Configuration ==== 447 | import argparse 448 | 449 | parser = argparse.ArgumentParser(description='devprof version ' + str(devprof_version)) 450 | parser.add_argument('-c', '--config', help='Directory that holds configuration files (Default: ' + str(default_config_dir) + ')', default=default_config_dir, required=False) 451 | parser.add_argument('-t', '--time', help='Time (in hours) covered by logs (default: number of logs loaded).', required=False, type=float) 452 | parser.add_argument('-d', '--directory', help='Directory that holds Bro/Zeek log files.', required=True) 453 | parser.add_argument('-w', '--web', help='Show in web (HTML) format (default: text)', required=False, default=False, action='store_true') 454 | parser.add_argument('--header', help='Add HTML header and footer', required=False, default=False, action='store_true') 455 | parser.add_argument('--debug', help='Show additional debugging information on stderr', required=False, default=False, action='store_true') 456 | cl_args = vars(parser.parse_args()) 457 | 458 | Devel = cl_args['debug'] 459 | 460 | config_dir = cl_args['config'] 461 | if not os.path.isdir(config_dir): 462 | fail("No configuration directory " + config_dir + " : please create it and rerun this program") 463 | 464 | profiles_for_ip = load_json(config_dir + '/' + pfi_filename, {"system_profile_pairs": [{"systems": ["0.0.0.0/0", "::/0"], "profiles": []}]}) 465 | ports_for_profile = load_json(config_dir + '/' + pfp_filename, {}) 466 | 467 | #==== Support data ==== 468 | for oneMacFile in ('/usr/share/ettercap/etter.finger.mac', '/opt/local/share/ettercap/etter.finger.mac', '/usr/share/nmap/nmap-mac-prefixes', '/opt/local/share/nmap/nmap-mac-prefixes', '/usr/share/wireshark/manuf', '/opt/local/share/wireshark/manuf', '/usr/share/ethereal/manuf', '/usr/share/arp-scan/ieee-oui.txt', '/opt/local/share/arp-scan/ieee-oui.txt'): 469 | if os.path.isfile(oneMacFile): 470 | LoadMacData(oneMacFile) 471 | if len(EtherManuf) == 0: 472 | Debug("None of the default mac address listings found. Please install ettercap, nmap, wireshark, and/or arp-scan.") 473 | else: 474 | Debug(str(len(EtherManuf)) + " mac prefixes loaded.") 475 | 476 | #==== Load Bro/Zeek logs ==== 477 | bro_log_dir = cl_args['directory'] 478 | if bro_log_dir[-1:] != '/': 479 | bro_log_dir = bro_log_dir + '/' #Make sure it ends with a slash. 480 | 481 | for one_log in tree_file_listing(bro_log_dir): 482 | if one_log.startswith(bro_log_dir + 'conn') and os.path.isfile(one_log): 483 | Debug(one_log) 484 | load_file_into_originator_stats(one_log) 485 | 486 | if files_successfully_loaded == 0: 487 | fail("Unable to successfully load any files") 488 | 489 | if 'hours' in cl_args: #If we have anything other than 24 hours of logs, this adjusts the total paylod to be payload over 24 hours (which all profiles assume) 490 | if math.isclose(cl_args['hours'], 0.0, rel_tol=1e-09): 491 | fail("specified time is too close to 0") 492 | else: 493 | multiplier = float(24) / cl_args['hours'] #Use user-supplied value if there is one, 494 | else: 495 | multiplier = float(24) / files_successfully_loaded #...otherwise go by the number of logs successfully loaded. 496 | 497 | #==== Compare loaded traffic stats to the user-defined limits ==== 498 | out_lists = [] 499 | 500 | for one_src_ip in originator_stats.keys(): 501 | my_ip_profile = ports_for_identifier(one_src_ip) 502 | 503 | for one_proto in originator_stats[one_src_ip]: 504 | this_proto_category = 'unknown' 505 | for one_prof in my_ip_profile.keys(): 506 | if one_proto == one_prof: 507 | if this_proto_category not in ('too_little', 'too_much'): 508 | this_proto_category = 'in_range' 509 | 510 | if my_ip_profile[one_prof][0] is not None and (originator_stats[one_src_ip][one_proto] * multiplier) < my_ip_profile[one_prof][0]: #Traffic for this port is lower than profile minimum for this port (if there is one) 511 | this_proto_category = 'too_little' 512 | elif my_ip_profile[one_prof][1] is not None and (originator_stats[one_src_ip][one_proto] * multiplier) >= my_ip_profile[one_prof][1]: #Traffic for this port is greater than or equal to profile maximum for this port (if there is one) 513 | this_proto_category = 'too_much' 514 | if this_proto_category != 'in_range': 515 | out_lists.append([this_proto_category, one_src_ip, one_proto, originator_stats[one_src_ip][one_proto]]) 516 | #else: 517 | # print(this_proto_category) 518 | 519 | 520 | 521 | #==== Display results ==== 522 | output_results(out_lists, ips_of_mac, cl_args, EtherManuf) 523 | -------------------------------------------------------------------------------- /devprof.txt: -------------------------------------------------------------------------------- 1 | 2 | 30 Second intro 3 | devprof reads the amount of payload exchanged between hosts (from 4 | Bro or Zeek logs). Based on limits set by the user, it reports on ports 5 | that are sharing too little or too much data in a 24 hour period. 6 | 7 | Primary features: 8 | - If mac_logging is enable in Bro/Zeek, it reports on all IPs associated 9 | with a mac address and the manufacturer of the card. 10 | - A given IP address can be associated with one or more profiles (groups 11 | of ports along with their minimum and maximum limits). This allows you 12 | to build the port requirements for a system in blocks, such as "linux", 13 | "ubuntu", "apache", and "production", or "devel_team", "macosx", and 14 | "system_scanner". 15 | 16 | 17 | Configuration files 18 | 19 | devprof uses a "profile" as a decription of the allowed traffic 20 | for a system of a given type. Here are some examples of profiles: 21 | 22 | "windows10": a Windows 10 system. This profile would know about the 23 | types of traffic likely to be created by a windows 10 machine. 24 | 25 | "yyz_iot_camera": a particular model of IP camera on the network. 26 | 27 | "developer": Your development team members might have different traffic 28 | patterns than your other users; they may be allowed to make ssh 29 | connections, for example, when this is not allowed for everyone else. 30 | 31 | "bob": This particular user runs security checks against development 32 | machines, and as such is allowed to connect to more ports than others. 33 | 34 | You're obviously welcome to use any organization that works for 35 | you, but you may want to consider some combination of operating system, 36 | hardware, user group, and/or specific user profiles like the above. 37 | 38 | 39 | devprof is controlled by two configuration files, both stored in 40 | $HOME/.config/devprof/ (you can change this directory with the "-c" 41 | option). 42 | 43 | profiles_for_ip.json 44 | 45 | This file connects systems to profiles. Here's an example: 46 | 47 | {"systems": ["172.16.12.1", "fe80::2/128"], "profiles": ["windows10", "background_noise"]}, 48 | 49 | Both 172.16.12.1 and fe80::2 are windows 10 systems, and the 50 | traffic we'd normally see from a windows 10 system will be accepted. 51 | 52 | 53 | The above examples of profiles can be added together, so it's not 54 | unreasonable to have: 55 | 56 | {"systems": ["10.2.7.1", "10.0.0.0/16", "10.0.0.0/24", "c999:abd:0:12::/64"], "profiles": ["windows10", "developer", "background_noise"]}, 57 | 58 | , which says that the above IP address and subnets are all 59 | allowed to have both windows 10 and developer traffic, along with 60 | effectively ignoring reasonable amounts of background noise traffic.. 61 | 62 | We include the file sample_profiles_for_ip.json so you can see 63 | the format. 64 | 65 | 66 | ports_for_profile.json 67 | 68 | So what kinds of things might you find in a profile? Here are 69 | some examples: 70 | 71 | "background_noise": { 72 | "9:icmp:-": [null, "100KB"], 73 | "67:udp:dhcp": [null, null], 74 | "133:icmp:-": [null, "100KB"], 75 | "135:icmp:-": [null, "1MB"], 76 | "136:icmp:-": [null, "100KB"], 77 | "5355:udp:dns": [null, "100KB"] 78 | }, 79 | 80 | 81 | "developer": { 82 | "21:tcp:ftp": [null, "100KB"], 83 | "53:tcp:-": [null, "100KB"], 84 | "53:tcp:dns": [null, "100MB"], 85 | "53:udp:dns": [null, "1GB"], 86 | "80:tcp:-": [null, "1MB"], 87 | "80:tcp:http": [null, "10GB"], 88 | "123:udp:-": ["100", "1MB"], 89 | "443:udp:-": [null, "1GB"], 90 | "443:tcp:-": [null, "1GB"], 91 | "443:tcp:ssl": [null, "1GB"], 92 | ":tcp:ftp-data": [null, "10MB"] 93 | }, 94 | 95 | See below for more details about the port specifications and the 96 | associated size range. 97 | 98 | These two files together allow you to 1) define your own profiles 99 | along with what you consider reasonable amounts of traffic, and 2) 100 | associate one or more profiles with specific IPv4 and/or IPv6 IP 101 | addresses and subnets. 102 | 103 | 104 | 105 | 106 | Port specification details 107 | When specifying the port, match what Bro/Zeek is providing as a 108 | port specification. If Bro lists "53:tcp:dns", use that, but be aware 109 | that sometimes Bro is not able to verify that the traffic on that port is 110 | actually DNS traffic. You may also need to list "53:tcp:-" (possibly 111 | with a lower number of bytes in the hope that Bro recognizes most of it 112 | as DNS traffic). 113 | 114 | Most protocols stay on a single port (such as DNS on port 53). 115 | There are a few that run on almost any port; the worst offender is 116 | ftp-data which can show up on any tcp port. In cases like this, you may 117 | want to switch from Bro's spec using a specific port: 118 | "1484:tcp:ftp-data" 119 | to the form that no longer specifies a port: 120 | ":tcp:ftp-data" 121 | Note, you can only do this wildcarding with the port number, not 122 | any other field, and it needs to start with a colon like above. 123 | 124 | If you have multiple forms, such as 125 | "22:tcp:ssh": [None, "10MB"], 126 | "22:tcp:-": [None, "1MB"], 127 | ":tcp:ssh": [None, 0], 128 | we first try to match incoming traffic against the port-specific 129 | rules (lines 1 and 2), and if there isn't any for this system, we see if 130 | there's a wildcarded form (line 3). Effectively this allows 10MB of ssh 131 | payload that Bro recognizes as ssh traffic, another 1MB of port 22 132 | traffic that Bro couldn't recognize as ssh, and we accept ssh running on 133 | any other port _and alert on it_ as this will exceed the "Maximum 0 bytes 134 | of payload" requirement. It's a handy way to identify that someone has 135 | ssh running on a different port (though you'll have to go back to the Bro 136 | logs to find out which port that is). 137 | This approach works fine when Bro can figure out the protocol 138 | name ("ssh", above), but we discourage using this approach when it can't 139 | (like ":tcp:-"). 140 | 141 | 142 | Size range details 143 | Next to each port key is a range of values for that type of 144 | traffic, such as: 145 | "123:udp:-": ["100", "1MB"], 146 | 147 | This is read as "The combined amount of payload for port 123 over 148 | 24 hours should be >= 100 bytes and <1,000,000 bytes.". If 0-99 bytes or 149 | 1000000 or more bytes of payload are found in udp port 123 packets, this 150 | will be listed as "out_of_range" for this particular IP address. 151 | 152 | Either of the values can be "null". If the left value (minimum) 153 | is null, there is no minimum requirement; there can be a total of 0 bytes 154 | of payload, or no udp port 123 traffic at all. If the right value 155 | (maximum) is null we won't alert on "too much" payload. 156 | Here are some practical examples of how to use these: 157 | 158 | This device should send at least 400 but no more than 3000 bytes of payload on UDP port 999: 159 | "999:udp:-" ["400", "3001"], 160 | 161 | This device should have some traffic on that port, but no requirement on how much: 162 | "999:udp:-" ["1", null], 163 | 164 | There should be no more than 1 megabyte of traffic on that port: 165 | "999:udp:-" [null, "1MB"], 166 | 167 | There should be absolutely no payload traffic sent on that port: 168 | "999:udp:-" [null, 0], 169 | 170 | This port should be totally ignored (don't care if there's any traffic at all or not): 171 | "999:udp:-" [null, null], 172 | 173 | If the same port specification shows up in 2 separate profiles, such as 174 | ... 175 | "devel_system": { 176 | "22:tcp:ssh": [null, "100MB"], 177 | ... 178 | }, 179 | "server_backup": { 180 | "22:tcp:ssh": ["1KB", null], 181 | ... 182 | }, 183 | ... 184 | and at least one IP has both of these profiles assigned to it, 185 | that IP will grab the more restrictive values for both minimum and 186 | maximum (in this case, ["1KB", "100MB"]). 187 | 188 | We use "xMB" as x * 1000000, and "xMiB" as x * 1048576. See 189 | https://en.wikipedia.org/wiki/Binary_prefix for more details. 190 | 191 | All of the numbers placed as minimum or maximum are for a 24 hour 192 | period. If less than 24 hours of logs are fed in, the number of bytes 193 | seen on a port is scaled up to a 24 hour period (4 hours of logs would 194 | have their byte counts multiplied by 6). If more than 24 hours of logs 195 | are seen, the byte counts are scaled down to match 24 hours (if you feed 196 | in 60 hours of logs, each byte count will be divided by 2.5). This means 197 | that all comparisons are "bytes seen in the equivalent of a day" to 198 | minimum and maximum values for a day of traffic. 199 | 200 | This scaling happens automatically; the byte counts are 201 | multiplied by (24/number_of_logs). If you know that these logs are not 202 | complete hours and wish to manually set the time covered, use the "-t" 203 | command line parameter to set the number of hours. For example, if you 204 | know the first log of 4 only covered 15 minutes you can use "-t 3.25" to 205 | fix the time covered. 206 | 207 | If you'd like to add a comment about what a port is, you can add 208 | a third column to the min/max list, changing: 209 | "135:icmp:-": [null, "1MB"], 210 | to 211 | "135:icmp:-": [null, "1MB", "Neighbor solicitation"], 212 | If you do this you'll want to keep an eye out as a future release 213 | might start using that third field. 214 | 215 | -------------------------------------------------------------------------------- /sample_ports_for_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "sample_local_lan": { 3 | "9:icmp:-": [null, "100KB"], 4 | "133:icmp:-": [null, "100KB"], 5 | "135:icmp:-": [null, "1MB"], 6 | "136:icmp:-": [null, "100KB"], 7 | "5355:udp:dns": [null, "100KB"] 8 | }, 9 | "sample_general_traffic": { 10 | "1:icmp:-": [null, "1MB"], 11 | "3:icmp:-": [null, "1MB"], 12 | "11:icmp:-": [null, "1MB"], 13 | "21:tcp:ftp": [null, "100KB"], 14 | "53:tcp:-": [null, "100KB"], 15 | "53:tcp:dns": [null, "100MB"], 16 | "53:udp:dns": [null, "1GB"], 17 | "67:udp:dhcp": [null, null], 18 | "80:tcp:-": [null, "1MB"], 19 | "80:tcp:http": [null, "10GB"], 20 | "123:udp:-": ["100", "1MB"], 21 | "443:udp:-": [null, "1GB"], 22 | "443:tcp:-": [null, "1GB"], 23 | "443:tcp:ssl": [null, "1GB"], 24 | "547:udp:-": [null, "1MB"], 25 | "5223:tcp:-": [null, "100KB"], 26 | "5223:tcp:ssl": [null, "100KB"], 27 | "5353:udp:dns": [null, "10MB"], 28 | ":tcp:ftp-data": [null, "10MB"] 29 | }, 30 | "sample_router": { 31 | "67:udp:dhcp": [null, null], 32 | "546:udp:-": [null, "10KB"], 33 | "1900:udp:-": [null, "10MB"] 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /sample_profiles_for_ip.json: -------------------------------------------------------------------------------- 1 | { 2 | "system_profile_pairs": [ 3 | {"systems": ["10.0.0.1", "fe80::f60e:dead:beef:cafe", "192.168.0.0/24"], "profiles": ["sample_router", "sample_local_lan"]}, 4 | {"systems": ["10.0.0.64"], "profiles": ["sample_general_traffic", "sample_local_lan"]}, 5 | {"systems": ["10.2.7.1", "10.0.0.0/16", "10.0.0.0/24", "fe80::/16"], "profiles": ["sample_general_traffic", "sample_local_lan"]}, 6 | {"systems": ["172.16.12.1"], "profiles": ["sample_local_lan"]}, 7 | {"systems": ["0.0.0.0/0", "::/0"], "profiles": ["sample_general_traffic"]} 8 | ] 9 | } 10 | --------------------------------------------------------------------------------
Mac addresses
" + str(one_mac) + "" + str(my_manuf) + "" + str(ip_list) + "