├── LICENSE.txt ├── README.rst ├── apply_mapping.sh ├── clear_index.sh ├── evtxtoelk.py └── requirements.txt /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Dan Gunter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | EvtxtoElk 2 | ============ 3 | 4 | A lightweight tool to load Windows Event Log evtx files into Elasticsearch. 5 | 6 | Examples 7 | -------- 8 | 9 | We wrote a blog on basic usage here https://dragos.com/blog/20180717EvtxToElk.html 10 | 11 | More details will be posted here shortly. 12 | -------------------------------------------------------------------------------- /apply_mapping.sh: -------------------------------------------------------------------------------- 1 | curl -X PUT http://localhost:9200/hostlogs -H'Content-Type: application/json' -d '{ 2 | "mappings": { 3 | "hostlogs": { 4 | "properties": { 5 | "Event.System.TimeCreated.@SystemTime": { 6 | "type":"date" 7 | } 8 | } 9 | } 10 | } 11 | }' -------------------------------------------------------------------------------- /clear_index.sh: -------------------------------------------------------------------------------- 1 | curl -XDELETE http://localhost:9200/hostlogs -------------------------------------------------------------------------------- /evtxtoelk.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import mmap 3 | import traceback 4 | import json 5 | import argparse 6 | from collections import OrderedDict 7 | from datetime import datetime 8 | 9 | from Evtx.Evtx import FileHeader 10 | from Evtx.Views import evtx_file_xml_view 11 | from elasticsearch import Elasticsearch, helpers 12 | import xmltodict 13 | import sys 14 | 15 | 16 | class EvtxToElk: 17 | @staticmethod 18 | def bulk_to_elasticsearch(es, bulk_queue): 19 | try: 20 | helpers.bulk(es, bulk_queue) 21 | return True 22 | except: 23 | print(traceback.print_exc()) 24 | return False 25 | 26 | @staticmethod 27 | def evtx_to_elk(filename, elk_ip, elk_index="hostlogs", bulk_queue_len_threshold=500, metadata={}): 28 | bulk_queue = [] 29 | es = Elasticsearch([elk_ip]) 30 | with open(filename) as infile: 31 | with contextlib.closing(mmap.mmap(infile.fileno(), 0, access=mmap.ACCESS_READ)) as buf: 32 | fh = FileHeader(buf, 0x0) 33 | data = "" 34 | for xml, record in evtx_file_xml_view(fh): 35 | try: 36 | contains_event_data = False 37 | log_line = xmltodict.parse(xml) 38 | 39 | # Format the date field 40 | date = log_line.get("Event").get("System").get("TimeCreated").get("@SystemTime") 41 | if "." not in str(date): 42 | date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S") 43 | else: 44 | date = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f") 45 | log_line['@timestamp'] = str(date.isoformat()) 46 | log_line["Event"]["System"]["TimeCreated"]["@SystemTime"] = str(date.isoformat()) 47 | 48 | # Process the data field to be searchable 49 | data = "" 50 | if log_line.get("Event") is not None: 51 | data = log_line.get("Event") 52 | if log_line.get("Event").get("EventData") is not None: 53 | data = log_line.get("Event").get("EventData") 54 | if log_line.get("Event").get("EventData").get("Data") is not None: 55 | data = log_line.get("Event").get("EventData").get("Data") 56 | if isinstance(data, list): 57 | contains_event_data = True 58 | data_vals = {} 59 | for dataitem in data: 60 | try: 61 | if dataitem.get("@Name") is not None: 62 | data_vals[str(dataitem.get("@Name"))] = str( 63 | str(dataitem.get("#text"))) 64 | except: 65 | pass 66 | log_line["Event"]["EventData"]["Data"] = data_vals 67 | else: 68 | if isinstance(data, OrderedDict): 69 | log_line["Event"]["EventData"]["RawData"] = json.dumps(data) 70 | else: 71 | log_line["Event"]["EventData"]["RawData"] = str(data) 72 | del log_line["Event"]["EventData"]["Data"] 73 | else: 74 | if isinstance(data, OrderedDict): 75 | log_line["Event"]["RawData"] = json.dumps(data) 76 | else: 77 | log_line["Event"]["RawData"] = str(data) 78 | del log_line["Event"]["EventData"] 79 | else: 80 | if isinstance(data, OrderedDict): 81 | log_line = dict(data) 82 | else: 83 | log_line["RawData"] = str(data) 84 | del log_line["Event"] 85 | else: 86 | pass 87 | 88 | # Insert data into queue 89 | #event_record = json.loads(json.dumps(log_line)) 90 | #event_record.update({ 91 | # "_index": elk_index, 92 | # "_type": elk_index, 93 | # "metadata": metadata 94 | #}) 95 | #bulk_queue.append(event_record) 96 | event_data = json.loads(json.dumps(log_line)) 97 | event_data["_index"] = elk_index 98 | event_data["_type"] = elk_index 99 | event_data["meta"] = metadata 100 | bulk_queue.append(event_data) 101 | 102 | #bulk_queue.append({ 103 | # "_index": elk_index, 104 | # "_type": elk_index, 105 | # "body": json.loads(json.dumps(log_line)), 106 | # "metadata": metadata 107 | #}) 108 | 109 | if len(bulk_queue) == bulk_queue_len_threshold: 110 | print('Bulkingrecords to ES: ' + str(len(bulk_queue))) 111 | # start parallel bulking to ElasticSearch, default 500 chunks; 112 | if EvtxToElk.bulk_to_elasticsearch(es, bulk_queue): 113 | bulk_queue = [] 114 | else: 115 | print('Failed to bulk data to Elasticsearch') 116 | sys.exit(1) 117 | 118 | except: 119 | print("***********") 120 | print("Parsing Exception") 121 | print(traceback.print_exc()) 122 | print(json.dumps(log_line, indent=2)) 123 | print("***********") 124 | 125 | # Check for any remaining records in the bulk queue 126 | if len(bulk_queue) > 0: 127 | print('Bulking final set of records to ES: ' + str(len(bulk_queue))) 128 | if EvtxToElk.bulk_to_elasticsearch(es, bulk_queue): 129 | bulk_queue = [] 130 | else: 131 | print('Failed to bulk data to Elasticsearch') 132 | sys.exit(1) 133 | 134 | 135 | if __name__ == "__main__": 136 | # Create argument parser 137 | parser = argparse.ArgumentParser() 138 | # Add arguments 139 | parser.add_argument('evtxfile', help="Evtx file to parse") 140 | parser.add_argument('elk_ip', default="localhost", help="IP (and port) of ELK instance") 141 | parser.add_argument('-i', default="hostlogs", help="ELK index to load data into") 142 | parser.add_argument('-s', default=500, help="Size of queue") 143 | parser.add_argument('-meta', default={}, type=json.loads, help="Metadata to add to records") 144 | # Parse arguments and call evtx to elk class 145 | args = parser.parse_args() 146 | EvtxToElk.evtx_to_elk(args.evtxfile, args.elk_ip, elk_index=args.i, bulk_queue_len_threshold=int(args.s), metadata=args.meta) 147 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-evtx 2 | elasticsearch 3 | xmltodict 4 | --------------------------------------------------------------------------------