├── .gitignore
├── LICENSE
├── README.md
├── devprof.py
├── devprof.txt
├── sample_ports_for_profile.json
└── sample_profiles_for_ip.json
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # dprof
2 | Device profile: Define acceptable amounts of traffic for your devices and see a report of outliers.
3 |
--------------------------------------------------------------------------------
/devprof.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """This program imports bro/zeek logs that watch a given network and reports on systems whose volume of traffic (in payload bytes) exceeds a profile assigned to that system."""
3 |
4 |
5 | import os #For directory listing and others
6 | import sys
7 | import gzip #To read gzip compressed files
8 | import re #For regular expression parsing
9 | import json #To load user configuration
10 | import math #For float comparison
11 | try:
12 | import ipaddress #IP address/network objects and functions
13 | except ImportError:
14 | print("Missing ipaddress module; perhaps 'sudo port install py-ipaddress', 'sudo yum install python-ipaddress' or 'sudo -H pip install ipaddress' ? Exiting.")
15 | raise
16 |
17 |
18 | #======== Functions ========
19 | def fail(fail_message):
20 | """Print a debug string and exit."""
21 |
22 | sys.stderr.write(str(fail_message) + ', exiting.\n')
23 | sys.stderr.flush()
24 | sys.exit(1)
25 |
26 |
27 | def Debug(DebugStr):
28 | """Prints a note to stderr"""
29 | if Devel != False:
30 | sys.stderr.write(DebugStr + '\n')
31 | sys.stderr.flush()
32 |
33 |
34 | def write_object(filename, generic_object):
35 | """Write out an object to a file."""
36 |
37 | try:
38 | with open(filename, "wb") as write_h:
39 | write_h.write(generic_object.encode('utf-8'))
40 | except:
41 | sys.stderr.write("Problem writing " + filename + ", skipping.")
42 | raise
43 |
44 | return
45 |
46 |
47 | def load_json(json_filename, default_content):
48 |
49 | json_to_return = None
50 |
51 | if os.path.exists(json_filename):
52 | with open(json_filename) as json_h:
53 | try:
54 | json_to_return = json.loads(json_h.read())
55 | except json.decoder.JSONDecodeError:
56 | sys.stderr.write("Unable to load " + json_filename + " . Please check that it contains valid json.\n")
57 | sys.stderr.flush()
58 | raise
59 | else:
60 | json_to_return = default_content
61 | write_object(json_filename, json.dumps(json_to_return))
62 | sys.stderr.write("No configuration file " + json_filename + ' . Using empty configuration.\n')
63 | sys.stderr.flush()
64 |
65 | return json_to_return
66 |
67 |
68 | def LoadMacData(MacFile):
69 | """Load Ethernet Mac address prefixes from standard locations (from ettercap, nmap, wireshark, and/or arp-scan)."""
70 | global EtherManuf
71 |
72 | More = ''
73 | if len(EtherManuf) > 0:
74 | More = ' more'
75 |
76 | LoadCount = 0
77 |
78 | if os.path.isfile(MacFile):
79 | try:
80 | MacHandle = open(MacFile, 'r')
81 |
82 | for line in MacHandle:
83 | if (len(line) >= 8) and (line[2] == ':') and (line[5] == ':'):
84 | #uppercase incoming strings just in case one of the files uses lowercase
85 | MacHeader = line[:8].upper()
86 | Manuf = line[8:].strip()
87 | if not MacHeader in EtherManuf:
88 | EtherManuf[MacHeader] = Manuf
89 | LoadCount += 1
90 | elif (len(line) >= 7) and (re.search('^[0-9A-F]{6}[ \t]', line) is not None):
91 | MacHeader = str.upper(line[0:2] + ':' + line[2:4] + ':' + line[4:6])
92 | Manuf = line[7:].strip()
93 | if MacHeader not in EtherManuf:
94 | EtherManuf[MacHeader] = Manuf
95 | LoadCount += 1
96 |
97 | MacHandle.close()
98 | if '00:00:00' in EtherManuf:
99 | del EtherManuf['00:00:00'] #Not really Xerox
100 | LoadCount -= 1
101 | Debug(str(LoadCount) + More + " mac prefixes loaded from " + str(MacFile))
102 | return True
103 | except:
104 | Debug("Unable to load " + str(MacFile))
105 | return False
106 | else:
107 | Debug("Unable to load " + str(MacFile))
108 | return False
109 |
110 |
111 | def tree_file_listing(top_level_dir):
112 | """Returns a set of files in a directory tree (recursively)."""
113 |
114 | ret_file_list = set()
115 |
116 | if os.path.isdir(top_level_dir):
117 | for top_level, dirs, files in os.walk(top_level_dir):
118 | for one_dir in dirs:
119 | for new_file in tree_file_listing(os.path.join(top_level, one_dir)):
120 | ret_file_list.add(new_file)
121 | for one_file in files:
122 | ret_file_list.add(os.path.join(top_level, one_file))
123 |
124 | return ret_file_list
125 |
126 |
127 |
128 | def load_file_into_originator_stats(incoming_log):
129 | """Loads in a single file of Bro/Zeek logs into originator_stats and ips_of_mac."""
130 |
131 | global originator_stats
132 | global ips_of_mac
133 | global files_successfully_loaded
134 |
135 | #We unconditionally load these for each new file, as the format may change between files.
136 | field_pos = {} #Key: name of field. Value: position where that field can be found. Recommend using field_pos.get('fieldname', None)
137 | field_name = {} #Key: position in the line. Value: name of the field.
138 |
139 | load_completed = True
140 |
141 | with gzip.open(incoming_log, 'rt') as bro_h:
142 | for raw_line in bro_h:
143 | line = raw_line.strip()
144 | if line.startswith('#fields'):
145 | all_names = line.split('\t')[1:] #[1:] drops the "#fields" label at the far left.
146 | for x in list(range(0, len(all_names))): #Load in the file's header so we know which column holds which field.
147 | field_pos[all_names[x]] = x
148 | field_name[x] = all_names[x]
149 | #Sample field_name loaded:
150 | #{0: 'ts', 1: 'uid', 2: 'id.orig_h', 3: 'id.orig_p', 4: 'id.resp_h', 5: 'id.resp_p', 6: 'proto', 7: 'service', 8: 'duration', 9: 'orig_bytes', 10: 'resp_bytes', 11: 'conn_state', 12: 'local_orig', 13: 'local_resp', 14: 'missed_bytes', 15: 'history', 16: 'orig_pkts', 17: 'orig_ip_bytes', 18: 'resp_pkts', 19: 'resp_ip_bytes', 20: 'tunnel_parents'}
151 | #Sample field_pos loaded
152 | #{'duration': 8, 'id.resp_h': 4, 'orig_bytes': 9, 'orig_pkts': 16, 'local_orig': 12, 'tunnel_parents': 20, 'history': 15, 'service': 7, 'proto': 6, 'id.orig_p': 3, 'resp_ip_bytes': 19, 'missed_bytes': 14, 'resp_bytes': 10, 'conn_state': 11, 'id.resp_p': 5, 'orig_ip_bytes': 17, 'id.orig_h': 2, 'ts': 0, 'local_resp': 13, 'uid': 1, 'resp_pkts': 18}
153 | if 'id.orig_h' not in field_pos or 'id.orig_p' not in field_pos or 'id.resp_p' not in field_pos or 'proto' not in field_pos or 'service' not in field_pos or 'orig_bytes' not in field_pos or 'resp_bytes' not in field_pos: #We require at least these fields, exit entirely if not present
154 | Debug(str(incoming_log) + " is missing crucial field.")
155 | load_completed = False
156 | break
157 | elif not line.startswith('#'):
158 | #FIXME - manually set field positions if not set by now
159 | fields = line.split('\t')
160 |
161 | src_ip = fields[field_pos['id.orig_h']]
162 |
163 | if fields[field_pos['proto']] == "icmp": #With icmp, the "type" is stored in id.orig_p.
164 | conn_key = fields[field_pos['id.orig_p']] + ':' + fields[field_pos['proto']] + ':' + fields[field_pos['service']]
165 | else:
166 | conn_key = fields[field_pos['id.resp_p']] + ':' + fields[field_pos['proto']] + ':' + fields[field_pos['service']]
167 | #At this point we're left with a conn_key like "1484:tcp:ftp-data"
168 | #If the profile for this IP lists this specific string, great. If not, let's strip off the port and see if
169 | #":tcp:ftp-data" is in the profile for this IP, and use that if so.
170 | stripped_conn_key = ':' + fields[field_pos['proto']] + ':' + fields[field_pos['service']]
171 | profile_for_src_ip = ports_for_identifier(src_ip)
172 | if conn_key not in profile_for_src_ip and stripped_conn_key in profile_for_src_ip:
173 | #OK, we do have the shortened key, so we use that from here on:
174 | conn_key = stripped_conn_key
175 |
176 | if src_ip not in originator_stats:
177 | originator_stats[src_ip] = {}
178 | if conn_key not in originator_stats[src_ip]:
179 | originator_stats[src_ip][conn_key] = 0
180 | if fields[field_pos['orig_bytes']] != '-':
181 | originator_stats[src_ip][conn_key] += int(fields[field_pos['orig_bytes']])
182 | if fields[field_pos['resp_bytes']] != '-':
183 | originator_stats[src_ip][conn_key] += int(fields[field_pos['resp_bytes']])
184 |
185 | if 'orig_l2_addr' in field_pos:
186 | src_mac = fields[field_pos['orig_l2_addr']].upper() #This is the mac address from which the packet came, which might be the mac address of the source IP or the mac address of a router in between.
187 | if not src_mac.startswith('33:33'): #ipv6 multicast
188 | if src_mac not in ips_of_mac:
189 | ips_of_mac[src_mac] = set()
190 | ips_of_mac[src_mac].add(str(fields[field_pos['id.orig_h']]))
191 |
192 | if 'resp_l2_addr' in field_pos and 'id.resp_h' in field_pos:
193 | dst_mac = fields[field_pos['resp_l2_addr']].upper() #This is the mac address to which the packet is going, which might be the mac address of the dest IP or the mac address of a router in between.
194 | if not dst_mac.startswith('33:33'):
195 | if dst_mac not in ips_of_mac:
196 | ips_of_mac[dst_mac] = set()
197 | ips_of_mac[dst_mac].add(str(fields[field_pos['id.resp_h']]))
198 |
199 | if load_completed:
200 | files_successfully_loaded = files_successfully_loaded + 1
201 |
202 |
203 | def merge_two_ranges(first_range, second_range):
204 | """When a port is specified twice in two different profiles (such as "22:tcp:ssh": [None, 1000000]" and "22:tcp:ssh": [1000, 100000000]"), we need to pick the more restrictive values in both ranges."""
205 | #For reference, first_range, second_range, and the returned "[lower_limit, upper_limit]" are both 2 element lists whose values are a number or None (which is "null" in a json file)
206 |
207 | if first_range[0] is None:
208 | lower_limit = second_range[0]
209 | elif second_range[0] is None:
210 | lower_limit = first_range[0]
211 | else:
212 | lower_limit = max(first_range[0], second_range[0])
213 |
214 | if first_range[1] is None:
215 | upper_limit = second_range[1]
216 | elif second_range[1] is None:
217 | upper_limit = first_range[1]
218 | else:
219 | upper_limit = min(first_range[1], second_range[1])
220 |
221 | #Debug("Merged " + str(first_range) + " and " + str(second_range) + " into [" + str(lower_limit) + ", " + str(upper_limit) + "]")
222 | return [lower_limit, upper_limit]
223 |
224 |
225 | def normalize_bytes(byte_limit):
226 | """Turn nnnMB into nnn*1048576, etc. Works for KB, MB, GB, TB, PB."""
227 | #We use KB/MB... as 1000^N, and KiB/MiB... as 1024^N . See https://en.wikipedia.org/wiki/Binary_prefix
228 |
229 | retval = None
230 |
231 | if byte_limit is None:
232 | retval = None
233 | elif isinstance(byte_limit, str):
234 | if byte_limit.endswith("KB"):
235 | retval = int(byte_limit[:-2]) * 1000
236 | elif byte_limit.endswith("MB"):
237 | retval = int(byte_limit[:-2]) * 1000000
238 | elif byte_limit.endswith("GB"):
239 | retval = int(byte_limit[:-2]) * 1000000000
240 | elif byte_limit.endswith("TB"):
241 | retval = int(byte_limit[:-2]) * 1000000000000
242 | elif byte_limit.endswith("PB"):
243 | retval = int(byte_limit[:-2]) * 1000000000000000
244 | elif byte_limit.endswith("KiB"):
245 | retval = int(byte_limit[:-3]) * 1024
246 | elif byte_limit.endswith("MiB"):
247 | retval = int(byte_limit[:-3]) * 1048576
248 | elif byte_limit.endswith("GiB"):
249 | retval = int(byte_limit[:-3]) * 1073741824
250 | elif byte_limit.endswith("TiB"):
251 | retval = int(byte_limit[:-3]) * 1099511627776
252 | elif byte_limit.endswith("PiB"):
253 | retval = int(byte_limit[:-3]) * 1125899906842624
254 | else:
255 | retval = int(byte_limit)
256 | elif isinstance(byte_limit, (int, float)):
257 | retval = byte_limit
258 | else:
259 | fail("Unrecognized value: " + str(byte_limit))
260 |
261 | return retval
262 |
263 |
264 | def create_ports_for_ip(user_profiles_for_ip, user_ports_for_profile):
265 | """Generate and return ports_for_ip based on profiles_for_ip and user_ports_for_profile."""
266 |
267 | created_ports_for_ip = {}
268 | created_networks_for_ip = {}
269 |
270 | if 'system_profile_pairs' not in user_profiles_for_ip:
271 | fail("Profiles_for_ip does not start with system_profile_pairs")
272 |
273 | for one_pair in user_profiles_for_ip['system_profile_pairs']:
274 | #one_pair looks like: {
275 | # "systems": ["10.0.0.41", "10.10.10.10"],
276 | # "profiles": ["mac", "traceroute", "general_traffic", "local_lan"]
277 | # },
278 |
279 | system_list = one_pair["systems"]
280 | if not isinstance(system_list, list): #isinstance(system_list, (str, unicode)) not needed
281 | fail("One of the profiles_for_ip ip lists is not a list")
282 | profile_list = one_pair["profiles"]
283 | for one_profile in profile_list:
284 | if one_profile not in user_ports_for_profile:
285 | fail("No profile named " + str(one_profile) + " in ports_for_profile")
286 |
287 | for one_ip in system_list:
288 | if '/' in one_ip:
289 | #This is a subnet, so we make an ipaddress object for it
290 | one_ip = ipaddress.ip_network(one_ip, strict=False)
291 |
292 | if one_ip not in created_networks_for_ip:
293 | created_networks_for_ip[one_ip] = {}
294 | for one_profile in profile_list:
295 | for one_port in user_ports_for_profile[one_profile].keys():
296 | normalized_min = normalize_bytes(user_ports_for_profile[one_profile][one_port][0])
297 | normalized_max = normalize_bytes(user_ports_for_profile[one_profile][one_port][1])
298 | created_networks_for_ip[one_ip][one_port] = merge_two_ranges([normalized_min, normalized_max], created_networks_for_ip[one_ip].get(one_port, [None, None])) #If we already had min/max for this port, find the most restrictive intersection with old and new min/max, otherwise just use the new min/max
299 | else:
300 | #This is an IP address, mac address, or hostname
301 | if one_ip not in created_ports_for_ip:
302 | created_ports_for_ip[one_ip] = {}
303 | for one_profile in profile_list:
304 | for one_port in user_ports_for_profile[one_profile].keys():
305 | normalized_min = normalize_bytes(user_ports_for_profile[one_profile][one_port][0])
306 | normalized_max = normalize_bytes(user_ports_for_profile[one_profile][one_port][1])
307 | created_ports_for_ip[one_ip][one_port] = merge_two_ranges([normalized_min, normalized_max], created_ports_for_ip[one_ip].get(one_port, [None, None])) #If we already had min/max for this port, find the most restrictive intersection with old and new min/max, otherwise just use the new min/max
308 |
309 | return created_ports_for_ip, created_networks_for_ip
310 |
311 |
312 |
313 | def ports_for_identifier(one_id):
314 | """Returns the list of port specifications for a given identifier (ipv4 address, ipv6 address, (or, future enhancement, mac address))."""
315 |
316 | #Remembers what profile we've found for a given IP address so we only have to look it up once.
317 | if "pfi_cache" not in ports_for_identifier.__dict__:
318 | ports_for_identifier.pfi_cache = {}
319 |
320 | if "ports_for_ip" not in ports_for_identifier.__dict__:
321 | ports_for_identifier.ports_for_ip = {} #Dict: Key: IP, value: dictionary with portspec as key, 2 element list ([min, max]) as value
322 | ports_for_identifier.networks_for_ip = {} #Dict: Key: ipaddress network object, value: dictionary with portspec as key, 2 element list ([min, max]) as value
323 | ports_for_identifier.ports_for_ip, ports_for_identifier.networks_for_ip = create_ports_for_ip(profiles_for_ip, ports_for_profile) #This is populated from profiles_for_ip and named_profiles. Key: IP, value: dictionary with portspec as key, 2 element list ([min, max]) as value
324 |
325 | ip_profile = {}
326 |
327 | if one_id in ports_for_identifier.pfi_cache: #If in cache already, use that.
328 | ip_profile = ports_for_identifier.pfi_cache[one_id]
329 | elif one_id in ports_for_identifier.ports_for_ip: #If the id exactly matches an IP address, use that.
330 | ip_profile = ports_for_identifier.ports_for_ip[one_id]
331 | ports_for_identifier.pfi_cache[one_id] = ip_profile
332 | else:
333 | old_prefix_len = None
334 | ip_obj = ipaddress.ip_address(one_id) #Make an ip address object to check against supplied networks
335 | for one_net in ports_for_identifier.networks_for_ip:
336 | if ip_obj in one_net: #If we match any of the supplied IP networks, use the list for it.
337 | if ip_profile: #If we match more than one...
338 | if one_net.prefixlen > old_prefix_len: #And this new one is a smaller subnet (greater /N)...
339 | ip_profile = ports_for_identifier.networks_for_ip[one_net] #Use it.
340 | ports_for_identifier.pfi_cache[one_id] = ip_profile
341 | old_prefix_len = one_net.prefixlen
342 | else:
343 | #We have _not_ already matched a previous network, so just use this one.
344 | ip_profile = ports_for_identifier.networks_for_ip[one_net]
345 | ports_for_identifier.pfi_cache[one_id] = ip_profile
346 | old_prefix_len = one_net.prefixlen
347 |
348 | return ip_profile
349 |
350 |
351 |
352 | def manuf_label(mac_addr, ManufTable):
353 | """Returns the correct Manufacturer name for a given mac address."""
354 |
355 | if mac_addr[:8] == '-':
356 | ret_manuf_label = ""
357 | elif mac_addr[:14].startswith(('00:00:5E:00:01')): #https://www.iana.org/assignments/ethernet-numbers/ethernet-numbers.xhtml#ethernet-numbers-1
358 | ret_manuf_label = "VRRP Router"
359 | elif mac_addr[:14].startswith(('00:00:5E:00:02')):
360 | ret_manuf_label = "IPv6 VRRP Router"
361 | elif mac_addr[:8].startswith(('01:00:5E', '33:33:', 'FF:FF:FF')):
362 | ret_manuf_label = "Ethernet broadcast/multicast"
363 | elif mac_addr[:8] in ManufTable:
364 | ret_manuf_label = ManufTable[mac_addr[:8]]
365 | else:
366 | ret_manuf_label = 'Unrecognized mac prefix'
367 |
368 | return ret_manuf_label
369 |
370 |
371 | def output_results(output_lists, ips_of_mac_dict, user_args, EtherManufDict):
372 | """Print the output tables."""
373 |
374 | if user_args['web'] and user_args['header']:
375 | print("\n
\ndevice_profile stats\n\n")
376 |
377 | if output_lists:
378 | if user_args['web']:
379 | print("")
380 | print("IP | Protocol | Bytes |
")
381 | current_header = ''
382 | for value_list in sorted(output_lists):
383 | if value_list[0] != current_header:
384 | if user_args['web']:
385 | print('' + str(value_list[0]) + " |
")
386 | else:
387 | print("======== " + value_list[0])
388 | current_header = value_list[0]
389 | if user_args['web']:
390 | print("" + str(value_list[1]) + " | " + str(value_list[2]) + " | " + str("{:,}".format(value_list[3])) + " |
")
391 | else:
392 | print('{1:<40s} {2:>20s} {3:>18,}'.format(*value_list))
393 | if user_args['web']:
394 | print("
")
395 |
396 | if ips_of_mac_dict:
397 | if user_args['web']:
398 | print("
\n")
399 | print('Mac addresses |
')
400 | else:
401 | print('')
402 | print("======== Mac addresses")
403 |
404 |
405 | for one_mac in sorted(ips_of_mac_dict.keys()):
406 | my_manuf = manuf_label(one_mac, EtherManufDict)
407 |
408 | if len(ips_of_mac_dict[one_mac]) < 20:
409 | ip_list = ', '.join(sorted(ips_of_mac_dict[one_mac]))
410 | else:
411 | ip_list = str(len(ips_of_mac_dict[one_mac])) + " ips"
412 |
413 | if user_args['web']:
414 | print("" + str(one_mac) + " | " + str(my_manuf) + " | " + str(ip_list) + " |
")
415 | else:
416 | print('{0:<18s} {1:<35s} {2:")
421 |
422 | if user_args['web'] and user_args['header']:
423 | print("\n")
424 |
425 |
426 |
427 | #======== Global variables ========
428 | devprof_version = '2.2'
429 |
430 | EtherManuf = {} #String dictionary: for a given key of the first three uppercase octets of a mac address ("00:01:0F"), who made this card?
431 | originator_stats = {} #Dict; key is orig_ip/port/ip_proto/app_proto , value is total payload bytes (orig+responder)
432 | ips_of_mac = {} #Dict; key is the mac address, value is a list of IPs associated with that mac. one IP means that's almost certainly the mac of that system, more than one means its the mac of a router leading to more than one system.
433 | files_successfully_loaded = 0
434 |
435 | Devel = False
436 | default_config_dir = os.environ["HOME"] + '/.config/devprof/'
437 | pfi_filename = 'profiles_for_ip.json'
438 | pfp_filename = 'ports_for_profile.json'
439 |
440 |
441 |
442 |
443 | #======== Code ========
444 | if __name__ == "__main__":
445 |
446 | #==== Configuration ====
447 | import argparse
448 |
449 | parser = argparse.ArgumentParser(description='devprof version ' + str(devprof_version))
450 | parser.add_argument('-c', '--config', help='Directory that holds configuration files (Default: ' + str(default_config_dir) + ')', default=default_config_dir, required=False)
451 | parser.add_argument('-t', '--time', help='Time (in hours) covered by logs (default: number of logs loaded).', required=False, type=float)
452 | parser.add_argument('-d', '--directory', help='Directory that holds Bro/Zeek log files.', required=True)
453 | parser.add_argument('-w', '--web', help='Show in web (HTML) format (default: text)', required=False, default=False, action='store_true')
454 | parser.add_argument('--header', help='Add HTML header and footer', required=False, default=False, action='store_true')
455 | parser.add_argument('--debug', help='Show additional debugging information on stderr', required=False, default=False, action='store_true')
456 | cl_args = vars(parser.parse_args())
457 |
458 | Devel = cl_args['debug']
459 |
460 | config_dir = cl_args['config']
461 | if not os.path.isdir(config_dir):
462 | fail("No configuration directory " + config_dir + " : please create it and rerun this program")
463 |
464 | profiles_for_ip = load_json(config_dir + '/' + pfi_filename, {"system_profile_pairs": [{"systems": ["0.0.0.0/0", "::/0"], "profiles": []}]})
465 | ports_for_profile = load_json(config_dir + '/' + pfp_filename, {})
466 |
467 | #==== Support data ====
468 | for oneMacFile in ('/usr/share/ettercap/etter.finger.mac', '/opt/local/share/ettercap/etter.finger.mac', '/usr/share/nmap/nmap-mac-prefixes', '/opt/local/share/nmap/nmap-mac-prefixes', '/usr/share/wireshark/manuf', '/opt/local/share/wireshark/manuf', '/usr/share/ethereal/manuf', '/usr/share/arp-scan/ieee-oui.txt', '/opt/local/share/arp-scan/ieee-oui.txt'):
469 | if os.path.isfile(oneMacFile):
470 | LoadMacData(oneMacFile)
471 | if len(EtherManuf) == 0:
472 | Debug("None of the default mac address listings found. Please install ettercap, nmap, wireshark, and/or arp-scan.")
473 | else:
474 | Debug(str(len(EtherManuf)) + " mac prefixes loaded.")
475 |
476 | #==== Load Bro/Zeek logs ====
477 | bro_log_dir = cl_args['directory']
478 | if bro_log_dir[-1:] != '/':
479 | bro_log_dir = bro_log_dir + '/' #Make sure it ends with a slash.
480 |
481 | for one_log in tree_file_listing(bro_log_dir):
482 | if one_log.startswith(bro_log_dir + 'conn') and os.path.isfile(one_log):
483 | Debug(one_log)
484 | load_file_into_originator_stats(one_log)
485 |
486 | if files_successfully_loaded == 0:
487 | fail("Unable to successfully load any files")
488 |
489 | if 'hours' in cl_args: #If we have anything other than 24 hours of logs, this adjusts the total paylod to be payload over 24 hours (which all profiles assume)
490 | if math.isclose(cl_args['hours'], 0.0, rel_tol=1e-09):
491 | fail("specified time is too close to 0")
492 | else:
493 | multiplier = float(24) / cl_args['hours'] #Use user-supplied value if there is one,
494 | else:
495 | multiplier = float(24) / files_successfully_loaded #...otherwise go by the number of logs successfully loaded.
496 |
497 | #==== Compare loaded traffic stats to the user-defined limits ====
498 | out_lists = []
499 |
500 | for one_src_ip in originator_stats.keys():
501 | my_ip_profile = ports_for_identifier(one_src_ip)
502 |
503 | for one_proto in originator_stats[one_src_ip]:
504 | this_proto_category = 'unknown'
505 | for one_prof in my_ip_profile.keys():
506 | if one_proto == one_prof:
507 | if this_proto_category not in ('too_little', 'too_much'):
508 | this_proto_category = 'in_range'
509 |
510 | if my_ip_profile[one_prof][0] is not None and (originator_stats[one_src_ip][one_proto] * multiplier) < my_ip_profile[one_prof][0]: #Traffic for this port is lower than profile minimum for this port (if there is one)
511 | this_proto_category = 'too_little'
512 | elif my_ip_profile[one_prof][1] is not None and (originator_stats[one_src_ip][one_proto] * multiplier) >= my_ip_profile[one_prof][1]: #Traffic for this port is greater than or equal to profile maximum for this port (if there is one)
513 | this_proto_category = 'too_much'
514 | if this_proto_category != 'in_range':
515 | out_lists.append([this_proto_category, one_src_ip, one_proto, originator_stats[one_src_ip][one_proto]])
516 | #else:
517 | # print(this_proto_category)
518 |
519 |
520 |
521 | #==== Display results ====
522 | output_results(out_lists, ips_of_mac, cl_args, EtherManuf)
523 |
--------------------------------------------------------------------------------
/devprof.txt:
--------------------------------------------------------------------------------
1 |
2 | 30 Second intro
3 | devprof reads the amount of payload exchanged between hosts (from
4 | Bro or Zeek logs). Based on limits set by the user, it reports on ports
5 | that are sharing too little or too much data in a 24 hour period.
6 |
7 | Primary features:
8 | - If mac_logging is enable in Bro/Zeek, it reports on all IPs associated
9 | with a mac address and the manufacturer of the card.
10 | - A given IP address can be associated with one or more profiles (groups
11 | of ports along with their minimum and maximum limits). This allows you
12 | to build the port requirements for a system in blocks, such as "linux",
13 | "ubuntu", "apache", and "production", or "devel_team", "macosx", and
14 | "system_scanner".
15 |
16 |
17 | Configuration files
18 |
19 | devprof uses a "profile" as a decription of the allowed traffic
20 | for a system of a given type. Here are some examples of profiles:
21 |
22 | "windows10": a Windows 10 system. This profile would know about the
23 | types of traffic likely to be created by a windows 10 machine.
24 |
25 | "yyz_iot_camera": a particular model of IP camera on the network.
26 |
27 | "developer": Your development team members might have different traffic
28 | patterns than your other users; they may be allowed to make ssh
29 | connections, for example, when this is not allowed for everyone else.
30 |
31 | "bob": This particular user runs security checks against development
32 | machines, and as such is allowed to connect to more ports than others.
33 |
34 | You're obviously welcome to use any organization that works for
35 | you, but you may want to consider some combination of operating system,
36 | hardware, user group, and/or specific user profiles like the above.
37 |
38 |
39 | devprof is controlled by two configuration files, both stored in
40 | $HOME/.config/devprof/ (you can change this directory with the "-c"
41 | option).
42 |
43 | profiles_for_ip.json
44 |
45 | This file connects systems to profiles. Here's an example:
46 |
47 | {"systems": ["172.16.12.1", "fe80::2/128"], "profiles": ["windows10", "background_noise"]},
48 |
49 | Both 172.16.12.1 and fe80::2 are windows 10 systems, and the
50 | traffic we'd normally see from a windows 10 system will be accepted.
51 |
52 |
53 | The above examples of profiles can be added together, so it's not
54 | unreasonable to have:
55 |
56 | {"systems": ["10.2.7.1", "10.0.0.0/16", "10.0.0.0/24", "c999:abd:0:12::/64"], "profiles": ["windows10", "developer", "background_noise"]},
57 |
58 | , which says that the above IP address and subnets are all
59 | allowed to have both windows 10 and developer traffic, along with
60 | effectively ignoring reasonable amounts of background noise traffic..
61 |
62 | We include the file sample_profiles_for_ip.json so you can see
63 | the format.
64 |
65 |
66 | ports_for_profile.json
67 |
68 | So what kinds of things might you find in a profile? Here are
69 | some examples:
70 |
71 | "background_noise": {
72 | "9:icmp:-": [null, "100KB"],
73 | "67:udp:dhcp": [null, null],
74 | "133:icmp:-": [null, "100KB"],
75 | "135:icmp:-": [null, "1MB"],
76 | "136:icmp:-": [null, "100KB"],
77 | "5355:udp:dns": [null, "100KB"]
78 | },
79 |
80 |
81 | "developer": {
82 | "21:tcp:ftp": [null, "100KB"],
83 | "53:tcp:-": [null, "100KB"],
84 | "53:tcp:dns": [null, "100MB"],
85 | "53:udp:dns": [null, "1GB"],
86 | "80:tcp:-": [null, "1MB"],
87 | "80:tcp:http": [null, "10GB"],
88 | "123:udp:-": ["100", "1MB"],
89 | "443:udp:-": [null, "1GB"],
90 | "443:tcp:-": [null, "1GB"],
91 | "443:tcp:ssl": [null, "1GB"],
92 | ":tcp:ftp-data": [null, "10MB"]
93 | },
94 |
95 | See below for more details about the port specifications and the
96 | associated size range.
97 |
98 | These two files together allow you to 1) define your own profiles
99 | along with what you consider reasonable amounts of traffic, and 2)
100 | associate one or more profiles with specific IPv4 and/or IPv6 IP
101 | addresses and subnets.
102 |
103 |
104 |
105 |
106 | Port specification details
107 | When specifying the port, match what Bro/Zeek is providing as a
108 | port specification. If Bro lists "53:tcp:dns", use that, but be aware
109 | that sometimes Bro is not able to verify that the traffic on that port is
110 | actually DNS traffic. You may also need to list "53:tcp:-" (possibly
111 | with a lower number of bytes in the hope that Bro recognizes most of it
112 | as DNS traffic).
113 |
114 | Most protocols stay on a single port (such as DNS on port 53).
115 | There are a few that run on almost any port; the worst offender is
116 | ftp-data which can show up on any tcp port. In cases like this, you may
117 | want to switch from Bro's spec using a specific port:
118 | "1484:tcp:ftp-data"
119 | to the form that no longer specifies a port:
120 | ":tcp:ftp-data"
121 | Note, you can only do this wildcarding with the port number, not
122 | any other field, and it needs to start with a colon like above.
123 |
124 | If you have multiple forms, such as
125 | "22:tcp:ssh": [None, "10MB"],
126 | "22:tcp:-": [None, "1MB"],
127 | ":tcp:ssh": [None, 0],
128 | we first try to match incoming traffic against the port-specific
129 | rules (lines 1 and 2), and if there isn't any for this system, we see if
130 | there's a wildcarded form (line 3). Effectively this allows 10MB of ssh
131 | payload that Bro recognizes as ssh traffic, another 1MB of port 22
132 | traffic that Bro couldn't recognize as ssh, and we accept ssh running on
133 | any other port _and alert on it_ as this will exceed the "Maximum 0 bytes
134 | of payload" requirement. It's a handy way to identify that someone has
135 | ssh running on a different port (though you'll have to go back to the Bro
136 | logs to find out which port that is).
137 | This approach works fine when Bro can figure out the protocol
138 | name ("ssh", above), but we discourage using this approach when it can't
139 | (like ":tcp:-").
140 |
141 |
142 | Size range details
143 | Next to each port key is a range of values for that type of
144 | traffic, such as:
145 | "123:udp:-": ["100", "1MB"],
146 |
147 | This is read as "The combined amount of payload for port 123 over
148 | 24 hours should be >= 100 bytes and <1,000,000 bytes.". If 0-99 bytes or
149 | 1000000 or more bytes of payload are found in udp port 123 packets, this
150 | will be listed as "out_of_range" for this particular IP address.
151 |
152 | Either of the values can be "null". If the left value (minimum)
153 | is null, there is no minimum requirement; there can be a total of 0 bytes
154 | of payload, or no udp port 123 traffic at all. If the right value
155 | (maximum) is null we won't alert on "too much" payload.
156 | Here are some practical examples of how to use these:
157 |
158 | This device should send at least 400 but no more than 3000 bytes of payload on UDP port 999:
159 | "999:udp:-" ["400", "3001"],
160 |
161 | This device should have some traffic on that port, but no requirement on how much:
162 | "999:udp:-" ["1", null],
163 |
164 | There should be no more than 1 megabyte of traffic on that port:
165 | "999:udp:-" [null, "1MB"],
166 |
167 | There should be absolutely no payload traffic sent on that port:
168 | "999:udp:-" [null, 0],
169 |
170 | This port should be totally ignored (don't care if there's any traffic at all or not):
171 | "999:udp:-" [null, null],
172 |
173 | If the same port specification shows up in 2 separate profiles, such as
174 | ...
175 | "devel_system": {
176 | "22:tcp:ssh": [null, "100MB"],
177 | ...
178 | },
179 | "server_backup": {
180 | "22:tcp:ssh": ["1KB", null],
181 | ...
182 | },
183 | ...
184 | and at least one IP has both of these profiles assigned to it,
185 | that IP will grab the more restrictive values for both minimum and
186 | maximum (in this case, ["1KB", "100MB"]).
187 |
188 | We use "xMB" as x * 1000000, and "xMiB" as x * 1048576. See
189 | https://en.wikipedia.org/wiki/Binary_prefix for more details.
190 |
191 | All of the numbers placed as minimum or maximum are for a 24 hour
192 | period. If less than 24 hours of logs are fed in, the number of bytes
193 | seen on a port is scaled up to a 24 hour period (4 hours of logs would
194 | have their byte counts multiplied by 6). If more than 24 hours of logs
195 | are seen, the byte counts are scaled down to match 24 hours (if you feed
196 | in 60 hours of logs, each byte count will be divided by 2.5). This means
197 | that all comparisons are "bytes seen in the equivalent of a day" to
198 | minimum and maximum values for a day of traffic.
199 |
200 | This scaling happens automatically; the byte counts are
201 | multiplied by (24/number_of_logs). If you know that these logs are not
202 | complete hours and wish to manually set the time covered, use the "-t"
203 | command line parameter to set the number of hours. For example, if you
204 | know the first log of 4 only covered 15 minutes you can use "-t 3.25" to
205 | fix the time covered.
206 |
207 | If you'd like to add a comment about what a port is, you can add
208 | a third column to the min/max list, changing:
209 | "135:icmp:-": [null, "1MB"],
210 | to
211 | "135:icmp:-": [null, "1MB", "Neighbor solicitation"],
212 | If you do this you'll want to keep an eye out as a future release
213 | might start using that third field.
214 |
215 |
--------------------------------------------------------------------------------
/sample_ports_for_profile.json:
--------------------------------------------------------------------------------
1 | {
2 | "sample_local_lan": {
3 | "9:icmp:-": [null, "100KB"],
4 | "133:icmp:-": [null, "100KB"],
5 | "135:icmp:-": [null, "1MB"],
6 | "136:icmp:-": [null, "100KB"],
7 | "5355:udp:dns": [null, "100KB"]
8 | },
9 | "sample_general_traffic": {
10 | "1:icmp:-": [null, "1MB"],
11 | "3:icmp:-": [null, "1MB"],
12 | "11:icmp:-": [null, "1MB"],
13 | "21:tcp:ftp": [null, "100KB"],
14 | "53:tcp:-": [null, "100KB"],
15 | "53:tcp:dns": [null, "100MB"],
16 | "53:udp:dns": [null, "1GB"],
17 | "67:udp:dhcp": [null, null],
18 | "80:tcp:-": [null, "1MB"],
19 | "80:tcp:http": [null, "10GB"],
20 | "123:udp:-": ["100", "1MB"],
21 | "443:udp:-": [null, "1GB"],
22 | "443:tcp:-": [null, "1GB"],
23 | "443:tcp:ssl": [null, "1GB"],
24 | "547:udp:-": [null, "1MB"],
25 | "5223:tcp:-": [null, "100KB"],
26 | "5223:tcp:ssl": [null, "100KB"],
27 | "5353:udp:dns": [null, "10MB"],
28 | ":tcp:ftp-data": [null, "10MB"]
29 | },
30 | "sample_router": {
31 | "67:udp:dhcp": [null, null],
32 | "546:udp:-": [null, "10KB"],
33 | "1900:udp:-": [null, "10MB"]
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/sample_profiles_for_ip.json:
--------------------------------------------------------------------------------
1 | {
2 | "system_profile_pairs": [
3 | {"systems": ["10.0.0.1", "fe80::f60e:dead:beef:cafe", "192.168.0.0/24"], "profiles": ["sample_router", "sample_local_lan"]},
4 | {"systems": ["10.0.0.64"], "profiles": ["sample_general_traffic", "sample_local_lan"]},
5 | {"systems": ["10.2.7.1", "10.0.0.0/16", "10.0.0.0/24", "fe80::/16"], "profiles": ["sample_general_traffic", "sample_local_lan"]},
6 | {"systems": ["172.16.12.1"], "profiles": ["sample_local_lan"]},
7 | {"systems": ["0.0.0.0/0", "::/0"], "profiles": ["sample_general_traffic"]}
8 | ]
9 | }
10 |
--------------------------------------------------------------------------------