├── LICENSE.txt ├── README.md ├── examples ├── conn.log ├── load_zeek_log_csv.py ├── load_zeek_log_into_elk.py ├── load_zeek_log_json.py ├── zeek_to_csv.py └── zeek_to_json.py ├── parsezeeklogs └── __init__.py ├── setup.cfg └── setup.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Dan Gunter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParseZeekLogs 2 | A lightweight utility for programmatically reading and manipulating Zeek (Bro) NSM log files and outputting into JSON or CSV format. This library works on both Python 2 and Python 3. 3 | 4 | ## Examples 5 | The following example first loads records from the Zeek connection log named conn.log. The data is the written out to a file named out.json. The name of the log file to read must be provided when creating the ParseBroLog class. You can use the safe_headers=True option in the to_json method to replace all instances of a dot with an underscore. 6 | ```python 7 | from parsezeeklogs import ParseZeekLogs 8 | 9 | with open('out.json',"w") as outfile: 10 | for log_record in ParseZeekLogs("conn.log", output_format="json", safe_headers=False): 11 | if log_record is not None: 12 | outfile.write(log_record + "\n") 13 | ``` 14 | 15 | This is another example that instead uses the csv output method to write the data out to a file named out.csv. This example shows filtering on specific fields within the log file. Field names should be provided as list elements. 16 | 17 | ```python 18 | from parsezeeklogs import ParseZeekLogs 19 | 20 | with open('out.csv',"w") as outfile: 21 | for log_record in ParseZeekLogs("conn.log", output_format="csv", safe_headers=False, fields=["ts","id.orig_h","id.orig_p","id.resp_h","id.resp_p"]): 22 | if log_record is not None: 23 | outfile.write(log_record + "\n") 24 | ``` 25 | 26 | ## Special Thanks 27 | * [@geekscrapy](https://github.com/geekscrapy): For bug fixes and the safe header feature addition 28 | -------------------------------------------------------------------------------- /examples/load_zeek_log_csv.py: -------------------------------------------------------------------------------- 1 | from parsezeeklogs import ParseZeekLogs 2 | 3 | log_iterator = ParseZeekLogs("conn.log", output_format="csv", safe_headers=False) 4 | # Print the field line out 5 | print(log_iterator.get_fields()) 6 | for log_record in log_iterator: 7 | if log_record is not None: 8 | print(str(log_record)) -------------------------------------------------------------------------------- /examples/load_zeek_log_into_elk.py: -------------------------------------------------------------------------------- 1 | from parsezeeklogs import ParseZeekLogs 2 | 3 | ParseZeekLogs.batch_to_elk("http.log", meta={"source": "http"}) 4 | -------------------------------------------------------------------------------- /examples/load_zeek_log_json.py: -------------------------------------------------------------------------------- 1 | from parsezeeklogs import ParseZeekLogs 2 | 3 | # Print the field line out 4 | for log_record in ParseZeekLogs("conn.log", output_format="json", safe_headers=False): 5 | if log_record is not None: 6 | print(str(log_record)) 7 | -------------------------------------------------------------------------------- /examples/zeek_to_csv.py: -------------------------------------------------------------------------------- 1 | from parsezeeklogs import ParseZeekLogs 2 | 3 | with open('out.csv',"w") as outfile: 4 | for log_record in ParseZeekLogs("conn.log", output_format="csv", safe_headers=False, fields=["ts","id.orig_h","id.orig_p","id.resp_h","id.resp_p"]): 5 | if log_record is not None: 6 | outfile.write(log_record + "\n") 7 | -------------------------------------------------------------------------------- /examples/zeek_to_json.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # File: zeek_to_json.py 4 | # Description: Convert multiple bro files to json 5 | # 6 | # Usage: 7 | # ./zeek_to_json.py *.log 8 | # ^^ will convert all zeek logs in this directory to same filename + .json (example: conn.log -> conn.json) 9 | 10 | from parsezeeklogs import ParseZeekLogs 11 | import argparse 12 | import os 13 | 14 | if __name__ == "__main__": 15 | # Create argument parser 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('zeekfile', nargs='+', help="Zeek file file to parse") 18 | 19 | # Parse arguments 20 | args = parser.parse_args() 21 | 22 | # Loop through each Bro file and convert to json 23 | for zeek_file in args.zeekfile: 24 | outfilename = os.path.splitext(zeek_file)[0] + '.json' 25 | 26 | #log_data = ParseBroLogs(bro_file) 27 | with open(outfilename, "w") as outfile: 28 | for log_data in ParseZeekLogs(zeek_file, output_format="json"): 29 | outfile.write(log_data) 30 | -------------------------------------------------------------------------------- /parsezeeklogs/__init__.py: -------------------------------------------------------------------------------- 1 | from json import loads, dumps 2 | from collections import OrderedDict 3 | from elasticsearch import Elasticsearch, helpers 4 | from datetime import datetime 5 | from traceback import print_exc 6 | 7 | 8 | class ParseZeekLogs(object): 9 | """Class that parses Zeek logs and allows log data to be output in CSV or json format. 10 | 11 | Attributes: 12 | filepath: Path of Zeek log file to read 13 | 14 | """ 15 | 16 | def __init__(self, filepath, batchsize=500, fields=None, output_format=None, ignore_keys=[], meta={}, safe_headers=False): 17 | self.fd = open(filepath,"r") 18 | self.options = OrderedDict() 19 | self.firstRun = True 20 | self.filtered_fields = fields 21 | self.batchsize = batchsize 22 | self.output_format = output_format 23 | self.ignore_keys = ignore_keys 24 | self.meta = meta 25 | self.safe_headers = safe_headers 26 | 27 | # Convert ' to " in meta string 28 | meta = loads(dumps(meta).replace("'", '"')) 29 | 30 | # Read the header option lines 31 | l = self.fd.readline().strip("\n") 32 | while l.strip().startswith("#"): 33 | # Parse the options out 34 | if l.startswith("#separator"): 35 | key = str(l[1:].split(" ")[0]) 36 | value = str.encode(l[1:].split(" ")[1].strip()).decode('unicode_escape') 37 | self.options[key] = value 38 | elif l.startswith("#"): 39 | key = str(l[1:].split(self.options.get('separator'))[0]) 40 | value = l[1:].split(self.options.get('separator'))[1:] 41 | self.options[key] = value 42 | 43 | # Read the next line 44 | l = self.fd.readline().strip("\n") 45 | 46 | self.firstLine = l 47 | 48 | # Save mapping of fields to values: 49 | self.fields = self.options.get('fields') 50 | self.types = self.options.get('types') 51 | 52 | # Convert field names if safe_headers is enabled 53 | #if self.safe_headers is True: 54 | # for i, val in enumerate(self.fields): 55 | # self.fields[i] = self.fields[i].replace(".", "_") 56 | 57 | self.data_types = {} 58 | for i, val in enumerate(self.fields): 59 | # Convert field names if safe_headers is enabled 60 | if self.safe_headers is True: 61 | self.fields[i] = self.fields[i].replace(".", "_") 62 | 63 | # Match types with each other 64 | self.data_types[self.fields[i]] = self.types[i] 65 | 66 | def __del__(self): 67 | self.fd.close() 68 | 69 | def __iter__(self): 70 | return self 71 | 72 | def __next__(self): 73 | retVal = "" 74 | if self.firstRun is True: 75 | retVal = self.firstLine 76 | self.firstRun = False 77 | else: 78 | retVal = self.fd.readline().strip("\n") 79 | 80 | # If an empty string is returned, readline is done reading 81 | if retVal == "" or retVal is None: 82 | raise StopIteration 83 | 84 | # Split out the data we are going to return 85 | retVal = retVal.split(self.options.get('separator')) 86 | 87 | record = None 88 | # Make sure we aren't dealing with a comment line 89 | if len(retVal) > 0 and not str(retVal[0]).strip().startswith("#") \ 90 | and len(retVal) is len(self.options.get("fields")): 91 | record = OrderedDict() 92 | # Prepare fields for conversion 93 | for x in range(0, len(retVal)): 94 | if self.safe_headers is True: 95 | converted_field_name = self.options.get("fields")[x].replace(".", "_") 96 | else: 97 | converted_field_name = self.options.get("fields")[x] 98 | if self.filtered_fields is None or converted_field_name in self.filtered_fields: 99 | # Translate - to "" to fix a conversation error 100 | if retVal[x] == "-": 101 | retVal[x] = "" 102 | # Save the record field if the field isn't filtered out 103 | record[converted_field_name] = retVal[x] 104 | 105 | # Convert values to the appropriate record type 106 | record = self.convert_values(record, self.ignore_keys, self.data_types) 107 | 108 | if record is not None and self.output_format == "json": 109 | # Output will be json 110 | 111 | # Add metadata to json 112 | for k, v in self.meta.items(): 113 | record[k] = v 114 | 115 | retVal = dumps(record) 116 | elif record is not None and self.output_format == "csv": 117 | retVal = "" 118 | # Add escaping to csv format 119 | for k, v in record.items(): 120 | # Add escaping to string values 121 | if isinstance(v, str): 122 | retVal += str("\"" + str(v).strip() + "\"" + ",") 123 | else: 124 | retVal += str(str(v).strip() + ",") 125 | # Remove the trailing comma 126 | retVal = retVal[:-1] 127 | else: 128 | retVal = None 129 | 130 | return retVal 131 | 132 | def convert_values(self, data, ignore_keys=[], data_types={}): 133 | keys_to_delete = [] 134 | for k, v in data.items(): 135 | # print("evaluating k: " + str(k) + " v: " + str(v)) 136 | 137 | if isinstance(v, dict): 138 | data[k] = self.convert_values(v) 139 | else: 140 | if data_types.get(k) is not None: 141 | if (data_types.get(k) == "port" or data_types.get(k) == "count"): 142 | if v != "": 143 | data[k] = int(v) 144 | else: 145 | keys_to_delete.append(k) 146 | elif (data_types.get(k) == "double" or data_types.get(k) == "interval"): 147 | if v != "": 148 | data[k] = float(v) 149 | else: 150 | keys_to_delete.append(k) 151 | elif data_types.get(k) == "bool": 152 | data[k] = bool(v) 153 | else: 154 | data[k] = v 155 | 156 | for k in keys_to_delete: 157 | del data[k] 158 | 159 | return data 160 | 161 | def get_fields(self): 162 | """Returns all fields present in the log file 163 | 164 | Returns: 165 | A python list containing all field names in the log file 166 | """ 167 | field_names = "" 168 | if self.output_format == "csv": 169 | for i, v in enumerate(self.fields): 170 | if self.filtered_fields is None or v in self.filtered_fields: 171 | field_names += str(v) + "," 172 | # Remove the trailing comma 173 | field_names = field_names[:-1].strip() 174 | else: 175 | field_names = [] 176 | for i, v in enumerate(self.fields): 177 | if self.filtered_fields is None or v in self.filtered_fields: 178 | field_names.append(v) 179 | return field_names 180 | 181 | @staticmethod 182 | def bulk_to_elasticsearch(es, bulk_queue): 183 | try: 184 | helpers.bulk(es, bulk_queue) 185 | return True 186 | except: 187 | print(print_exc()) 188 | return False 189 | 190 | @staticmethod 191 | def batch_to_elk(filepath=None, batch_size=500, fields=None, elk_ip="127.0.0.1", index="zeeklogs", meta={}, 192 | ignore_keys=[]): 193 | # Create handle to ELK 194 | es = Elasticsearch([elk_ip]) 195 | 196 | # Create a handle to the log data 197 | dataHandle = ParseZeekLogs(filepath, fields=fields, output_format="json", meta=meta) 198 | 199 | # Begin to process and output data 200 | dataBatch = [] 201 | for record in dataHandle: 202 | try: 203 | record = loads(record) 204 | 205 | if isinstance(record, dict): 206 | record["_index"] = index 207 | record["_type"] = index 208 | try: 209 | record['timestamp'] = datetime.utcfromtimestamp(float(record['ts'])).isoformat() 210 | except: 211 | pass 212 | 213 | dataBatch.append(record) 214 | 215 | if len(dataBatch) >= batch_size: 216 | # Batch the queue to ELK 217 | # print("Batching to elk: " + str(len(dataBatch))) 218 | dataHandle.bulk_to_elasticsearch(es, dataBatch) 219 | # Clear the data queue 220 | dataBatch = [] 221 | except: 222 | pass 223 | 224 | # Batch the final data to ELK 225 | # print("Batching final data to elk: " + str(len(dataBatch))) 226 | dataHandle.bulk_to_elasticsearch(es, dataBatch) 227 | # Clear the data queue 228 | dataBatch = [] 229 | 230 | def __str__(self): 231 | return dumps(self.data) 232 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | setup( 3 | name='parsezeeklogs', 4 | version='2.0.1', 5 | description='A lightweight utility for programmatically reading and manipulating Zeek IDS (Bro IDS) log files and outputting into JSON or CSV format.', 6 | author='Dan Gunter', 7 | author_email='dangunter@gmail.com', 8 | url='https://github.com/dgunter/parsezeeklogs', 9 | packages=find_packages(include=['parsezeeklogs', 'parsezeeklogs.*']), 10 | install_requires=[ 11 | 'elasticsearch==7.16.1' 12 | ], 13 | ) 14 | 15 | --------------------------------------------------------------------------------