├── README.md └── Log Extractor └── log_extract.py /README.md: -------------------------------------------------------------------------------- 1 | # Log Extractor 2 | 3 | A tool to extract Windows Event Logs into a reasonably usable json format for use with Elasticsearch, JQ, Grep, whatever.. 4 | 5 | Tested on Windows 10 and Windows 7. Code functional rather than pretty. 6 | 7 | ``` 8 | usage: log_extract.exe [-h] [-g] -o OUTPUT [-v] 9 | 10 | Log Collector 11 | 12 | optional arguments: 13 | -h, --help show this help message and exit 14 | -g, --gzip Compress with GZIP 15 | -o OUTPUT, --output OUTPUT 16 | Output Directory or ES server path (http://username:password@host:port) 17 | -v, --version show program's version number and exit 18 | -e, --elastic output to elasticsearch 19 | ``` 20 | 21 | -------------------------------------------------------------------------------- /Log Extractor/log_extract.py: -------------------------------------------------------------------------------- 1 | import os, sys, ctypes, gzip, argparse, json 2 | import win32con, win32evtlog, win32evtlogutil, winerror, pywintypes 3 | import xml.etree as etree 4 | import xmltodict 5 | from elasticsearch import Elasticsearch, AuthenticationException 6 | from elasticsearch.helpers import bulk 7 | 8 | version = '0.3' 9 | apptitle = 'Log Collector' 10 | date = '29/03/2022' 11 | author = 'Chris Basnett (chris.basnett@mdsec.co.uk)' 12 | 13 | def is_admin(): 14 | '''Check if we're being run as an admin''' 15 | try: 16 | return ctypes.windll.shell32.IsUserAnAdmin() 17 | except: 18 | return False 19 | 20 | def get_all_channels(): 21 | e = win32evtlog.EvtOpenChannelEnum() 22 | channels = [] 23 | channel = win32evtlog.EvtNextChannelPath(e) 24 | while channel != None: 25 | channels.append(channel) 26 | channel = win32evtlog.EvtNextChannelPath(e) 27 | return channels 28 | 29 | def get_all_publishers(): 30 | e = win32evtlog.EvtOpenPublisherEnum() 31 | publishers = [] 32 | publisher = win32evtlog.EvtNextPublisherId(e) 33 | while publisher != None: 34 | publishers.append(publisher) 35 | publisher = win32evtlog.EvtNextPublisherId(e) 36 | return publishers 37 | 38 | def log_normalise(line): 39 | format = { 40 | "Provider": { 41 | "Name": "", 42 | "GUID": "" 43 | }, 44 | "Event": { 45 | "ID": 0, 46 | "Version": 0, 47 | "Level": 0, 48 | "Created": "", 49 | "Channel": "", 50 | "Message": "", 51 | "Computer": "", 52 | "UserID": "" 53 | }, 54 | "Meta": {} 55 | } 56 | 57 | event = line['Event'] 58 | eventkeys = event.keys() # Save us calling this a number of times 59 | # Basic Sanity Check 60 | if 'Provider' not in eventkeys: # if there's no provider 61 | if 'TimeCreated' not in eventkeys: 62 | return None 63 | 64 | if type(event['Provider']) == type(''): 65 | format['Provider']['Name'] = event['Provider'] 66 | else: 67 | format['Provider']['Name'] = event['Provider']['Name'] 68 | format['Provider']['GUID'] = event['Provider'].get('Guid','') 69 | 70 | if 'EventID' in eventkeys: 71 | if type(event['EventID']) != type(''): 72 | format['Event']['ID'] = int(event['EventID']['Qualifiers']) 73 | elif event.get('EventID',None): 74 | format['Event']['ID'] = int(event['EventID']) 75 | 76 | format['Event']['Version'] = int(event.get('Version',0)) 77 | format['Event']['Level'] = int(event.get('Level',0)) 78 | 79 | if 'TimeCreated' in eventkeys: 80 | format['Event']['Created'] = event['TimeCreated']['SystemTime'] # Probably want to save this as an int tbh 81 | format['Event']['Channel'] = event['Channel'] 82 | if 'Computer' in eventkeys: 83 | format['Event']['Computer'] = event['Computer'] 84 | if 'Security' in eventkeys: 85 | if line['Event']['Security']: 86 | format['Event']['UserID'] = event['Security'].get('UserID',None) 87 | 88 | format['Event']['Message'] = line['Message'] 89 | 90 | if event.get('Execution',None): 91 | for k in event.get('Execution'): 92 | if k == "ProcessID": 93 | format['Meta'][k] = int(event['Execution'][k]) # If it's a processid we want an int 94 | if k == "ThreadID": 95 | format['Meta'][k] = int(event['Execution'][k]) # If it's a threadid we want an int 96 | 97 | # Sysmon Specific formatting 98 | if event['Channel'] == "Microsoft-Windows-Sysmon/Operational": 99 | message = format['Event']['Message'] 100 | format['Meta']['Sysmon'] = {} 101 | for item in message.split('\r\n'): 102 | try: 103 | key,value = item.split(': ') 104 | format['Meta']['Sysmon'][key] = value 105 | except: 106 | continue 107 | # Security Process audit specific formatting 108 | if event['Channel'] == 'Security': 109 | message = format['Event']['Message'] 110 | if format['Event']['ID'] == 4688: # If it's a process creation 111 | format['Meta']['Audit'] = {} 112 | split = message.split('\r\n\r\n') 113 | for s in split: 114 | if 'Process Information' in s: 115 | for field in s.split('Process Information:')[1].split('\r\n\t'): 116 | try: 117 | key, value = field.split(':\t') 118 | value = value.strip('\t') 119 | key = key.replace(' ','') 120 | format['Meta']['Audit'][key] = value 121 | except: 122 | pass 123 | 124 | return(format) 125 | 126 | def get_logs(channel=None): 127 | evts = [] 128 | flags = win32evtlog.EvtQueryChannelPath 129 | if channel != None: 130 | query = win32evtlog.EvtQuery(channel,flags,'*',None) 131 | else: 132 | query = win32evtlog.EvtQuery('Security',flags,'*',None) 133 | 134 | 135 | bookmark = win32evtlog.EvtCreateBookmark() # In case we want to actually save off our location to allow us to not grab all logs every time. 136 | 137 | events = True 138 | while events: 139 | events = win32evtlog.EvtNext(query,100,-1,0) 140 | context = win32evtlog.EvtCreateRenderContext(win32evtlog.EvtRenderContextSystem) 141 | for event in events: 142 | import sys 143 | 144 | result = win32evtlog.EvtRender(event, win32evtlog.EvtRenderEventValues, Context=context) 145 | provider_name_value, provider_name_variant = result[win32evtlog.EvtSystemProviderName] 146 | try: 147 | metadata = win32evtlog.EvtOpenPublisherMetadata(provider_name_value) 148 | except: 149 | metadata = None # Lazy exception here to make life simple 150 | 151 | try: 152 | message = win32evtlog.EvtFormatMessage(metadata, event, win32evtlog.EvtFormatMessageEvent) 153 | except: 154 | message = "The Description for this event could not be found" 155 | 156 | event_xml = win32evtlog.EvtRender(event,win32evtlog.EvtRenderEventXml) 157 | #evts.append([event_xml,message]) 158 | 159 | win32evtlog.EvtUpdateBookmark(bookmark,event) 160 | parsed_event = parse_event([event_xml,message]) 161 | if args.elastic: 162 | parsed_event = log_normalise(parsed_event) 163 | 164 | yield parsed_event 165 | #return evts 166 | 167 | def parse_event(event): 168 | event,message = event 169 | evt = {} 170 | data = json.loads(json.dumps(xmltodict.parse(event))) 171 | data = data['Event'] 172 | data.pop('@xmlns') # Get rid of the xml schema horseshit 173 | for k in data.keys(): 174 | if k == 'EventData': 175 | ed = {} 176 | evt['EventData'] = data.get('EventData') 177 | elif k == 'UserData' : 178 | ud = {} 179 | 180 | else: 181 | evnt = {} 182 | d = data[k] 183 | for key in d.keys(): 184 | if key != '': 185 | if type(d[key]) == dict: # If there's a key 186 | evnt[key] = {} 187 | for k in d[key]: 188 | if k != '': 189 | if '@' in k: 190 | nk = k.split('@')[1] 191 | evnt[key][nk] = d[key][k] 192 | else: 193 | evnt[key] = d[key] 194 | evt['Event'] = evnt 195 | evt['Message'] = message 196 | 197 | return evt 198 | 199 | def parse(args): 200 | 201 | 202 | if args.elastic: # If we're telling it the output should be elastic 203 | client = Elasticsearch(args.output) 204 | 205 | else: 206 | if not os.path.exists(args.output): 207 | os.mkdir(args.output) 208 | 209 | for c in get_all_channels(): 210 | name = c.replace('/','_') 211 | path = os.path.join(args.output,name) 212 | print("Processing: {}".format(c)) 213 | if args.elastic: 214 | 215 | try: 216 | response = bulk(client, get_logs(c),index='log_extract') 217 | except AuthenticationException: 218 | print("Problem with Authentication, are you using the correct credentials?") 219 | import sys 220 | sys.exit() 221 | except Exception as E: 222 | print(E) 223 | 224 | else: 225 | if args.gzip: 226 | f = gzip.open('{}.gz'.format(path),'w') 227 | 228 | else: 229 | f = open('{}.log'.format(path),'wb') 230 | 231 | 232 | for l in get_logs(c): 233 | f.write(str(l).encode('utf-8')) 234 | f.write(b'\n') 235 | f.close() 236 | 237 | if __name__ == '__main__': 238 | parser = argparse.ArgumentParser(add_help=True, description=apptitle) 239 | parser.version = version 240 | parser.add_argument('-g', '--gzip', action="store_true", help='Compress with GZIP') 241 | parser.add_argument('-o', '--output', action='store', type=str, help='Output Directory or ES server path (http://username:password@host:port',required=True) 242 | parser.add_argument('-v','--version',action='version') 243 | parser.add_argument('-e','--elastic',action='store_true') 244 | 245 | args = parser.parse_args() 246 | os.system('cls') 247 | print(apptitle) 248 | print("Version {}\n".format(version)) 249 | print(author) 250 | print("\n") 251 | 252 | if not args.output: 253 | parser.print_help() 254 | else: 255 | if is_admin(): 256 | parse(args) 257 | else: 258 | ctypes.windll.shell32.ShellExecuteW(None, u"runas", sys.executable, " ".join(sys.argv[1:]), None, 1) 259 | --------------------------------------------------------------------------------