├── LICENSE ├── README.md └── dnsplice_v1.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 nerdiosity 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # DNSplice was created by Shelly Giesbrecht (nerdiosity) to assist incident responders 3 | # to quickly and easily parse client query events from ugly DNS logs for 4 | # Microsoft Windows 2003/2008R2 (DNS debug log) to Windows 2012R2/2016 (DNS Analytical 5 | # log) into a format (CSV) suitable for additional analysis or insertion into a larger 6 | # timeline. 7 | # version: DNSplice v1.0 8 | # date of release: June 8, 2018 9 | # 10 | # This project was created in answer to a problem encountered by me over years of doing 11 | # IR, and as a way of learning to code. Comments or suggestions are greatly appreciated. 12 | # email: info@nerdiosity.com twitter: @nerdiosity 13 | # github: https://github.com/nerdiosity/DNSplice 14 | ######################################################################################## 15 | 16 | Requirements: 17 | DNSplice uses the requests module for python. This will need to be installed to run. 18 | command: pip install requests 19 | 20 | To run: 21 | At command prompt: python dnsplice_v1.py -i -v -t 22 | 23 | Options: 24 | -i, --input : DNS log filename (REQUIRED) 25 | -v, --vtkey : VirusTotal API key (OPTIONAL) 26 | -t, --tgkey : Cisco ThreatGrid API key (OPTIONAL) 27 | 28 | Output: 29 | DNS logs are parsed to include timedatestamp, client IP, uri requested, and domain, and are outputted automagically to output.csv 30 | in the directory DNSplice is run from. 31 | VirusTotal Domain Report lookups are performed every 20 seconds (3/min) and are outputted to vt_output.csv. For large files, this 32 | make take some time. 33 | ThreatGrid Lookups are limited to 50 lookups per day. Top ten most and least requested domains are requested from ThreatGrid, and 34 | are outputted to tg_output.csv. Requests are made every 20 seconds. 35 | 36 | 37 | -------------------------------------------------------------------------------- /dnsplice_v1.py: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # DNSplice was created by Shelly Giesbrecht (nerdiosity) to assist incident responders 3 | # to quickly and easily parse client query events from ugly DNS logs for 4 | # Microsoft Windows 2003/2008R2 (DNS debug log) to Windows 2012R2/2016 (DNS Analytical 5 | # log) into a format (CSV) suitable for additional analysis or insertion into a larger 6 | # timeline. 7 | # version: DNSplice v1.0 8 | # date of release: June 8, 2018 9 | # 10 | # This project was created in answer to a problem encountered by me over years of doing 11 | # IR, and as a way of learning to code. Comments or suggestions are greatly appreciated. 12 | # email: info@nerdiosity.com twitter: @nerdiosity 13 | # github: https://github.com/nerdiosity/DNSplice 14 | ######################################################################################## 15 | 16 | #!/usr/bin/env python 17 | import re 18 | import sys 19 | import datetime 20 | import argparse 21 | import csv 22 | from collections import Counter 23 | import requests # this is required and will need to be installed if not already done. "pip install request[security]" 24 | import time 25 | 26 | # Getting some arguments 27 | parser = argparse.ArgumentParser(description='Add a filename') 28 | parser.add_argument('-i', '--input', help='Input a filename', required=True) # Get a dns log as input 29 | parser.add_argument('-v', '--vtkey', help='Input your VT API key', required=False) # Optional: Enter a VirusTotal API key 30 | parser.add_argument('-t', '--tgkey', help='Input your TG API key', required=False) # Optional: Enter a Cisco ThreatGrid API key 31 | args = parser.parse_args() 32 | 33 | # Set up variables required 34 | inputfile = '' 35 | outputfile = '' 36 | splice_date = '' 37 | splice_sndrcv = '' 38 | splice_client = '' 39 | splice_rtype = '' 40 | splice_datetime = '' 41 | splice_uriquery = '' 42 | splice_domain = '' 43 | split_domain = '' 44 | join_domain = '' 45 | csv_output = '' 46 | tg_key = '' 47 | 48 | # Set up lists 49 | domain_list = [] 50 | client_list = [] 51 | top_domains = [] 52 | least_domains = [] 53 | temp_top = [] 54 | temp_least = [] 55 | ten_top = [] 56 | ten_least = [] 57 | 58 | 59 | print "#############################################################" 60 | print "# DNSplice v1 by nerdiosity" 61 | print "# Parse your ugly DNS logs!" 62 | print "#############################################################" 63 | print "#" 64 | print "#" 65 | 66 | ####################################################################################### 67 | # Parsing all the file! 68 | ####################################################################################### 69 | # Create a csv to output the parsed date to and give it some headers 70 | outputfile = open('output.csv', 'w') 71 | with outputfile: 72 | output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain'] 73 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 74 | writer.writeheader() 75 | # writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_domain, 'Domain' : join_domain}) 76 | outputfile.close() 77 | 78 | # Open the dns log file given as input 79 | with open(args.input,'r') as dns_file: 80 | for line in dns_file: 81 | if re.search( r'(.*) PACKET (.*?) .*', line, re.M|re.I): # Look for lines in Windows 2003-2008R2 DNS debug files with "PACKET" in them 82 | if re.match('^\d\d\d\d\d\d\d\d', line): # For Windows 2003 type files, look for lines that start with date style YYYYMMDD 83 | jack = line.split() # Split the line into fields 84 | splice_date = jack[0] + ' ' + jack[1] # Splice together the date and time indexes into datetime 85 | #splice_sndrcv = str(jack[6]).strip('[]') # Create a variable to hold the value of the direction of traffic 86 | splice_client = str(jack[7]).strip('[]') # Create a variable to hold the value of the client IP 87 | splice_rtype = str(jack[-2]).strip('[]') # Create a variable to hold the value of the record type 88 | splice_datetime = datetime.datetime.strptime(splice_date, '%Y%m%d %H:%M:%S') # Create a variable to hold the value of a formatted split_date 89 | splice_uriquery = re.sub(r"\(\d+\)",r".", str(jack[-1]).strip('[]')) 90 | splice_domain = splice_uriquery[1:-1] # create variable to hold the value of splice_uriquery with the leading and trailing '.' stripped off 91 | split_domain = splice_domain.split('.') # create a variable to hold the value of splitting split_domain by the '.' 92 | join_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] # create a variable to hold the value of joining the last two elements of split_domain 93 | if jack[9] != 'R': # this removes any response as we are looking for only queries made 94 | if re.match('^(10\.\d{1,3}|192\.168|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|127\.0\.0\.1)', splice_client): # This limits the client IPs to only RFC1918 addresses 95 | # This next piece opens the output file we created, and appends each line to it with value for fields: 'DateTime', 'ClientIP', 'URIQuery', and 'Domain' 96 | outputfile = open('output.csv', 'a') 97 | with outputfile: 98 | output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain'] 99 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 100 | #writer.writeheader() 101 | writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_domain, 'Domain' : join_domain}) 102 | client_list.append(splice_client) # add all the client IPs to a list 103 | domain_list.append(join_domain) # add all the domain names to a list 104 | elif re.match('^\d{1,2}\/\d{1,2}\/\d{4}',line): # For Windows 2008R2 type files, look for lines that start with date style MM/DD/YYYY 105 | jack = line.split() 106 | splice_date = jack[0] + ' ' + jack[1] + ' ' + jack[2] 107 | splice_datetime = datetime.datetime.strptime(splice_date, '%m/%d/%Y %I:%M:%S %p') 108 | splice_sndrcv = str(jack[7]).strip('[]') 109 | splice_client = str(jack[8]).strip('[]') 110 | splice_rtype = str(jack[-2]).strip('[]') 111 | splice_uriquery = re.sub(r"\(\d+\)",r".", str(jack[-1]).strip('[]')) 112 | splice_domain = splice_uriquery[1:-1] 113 | split_domain = splice_domain.split('.') 114 | join_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] 115 | 116 | if jack[10] != 'R': 117 | if re.match('^(10\.\d{1,3}|192\.168|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|127\.0\.0\.1)', splice_client): 118 | outputfile = open('output.csv', 'a') 119 | with outputfile: 120 | output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain'] 121 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 122 | #writer.writeheader() 123 | writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_domain, 'Domain' : join_domain}) 124 | client_list.append(splice_client) 125 | domain_list.append(join_domain) 126 | 127 | elif re.match('^Microsoft-Windows-DNS-Server',line): # For Windows 2012R2-2016 type files, look for lines that start with Microsoft-Windows-DNS-Server 128 | #from datetime import datetime 129 | jack = line.split() 130 | splice_eventID = str(jack[3]).strip(",") # create a variable to hold the value the event ID 131 | splice_date = long(str(jack[17]).strip(",")) # create a variable to hold the value the date, convert to string to remove a comma, then convert to long 132 | splice_datetime = datetime.datetime.fromtimestamp((splice_date - 116444736000000000) // 10000000) # create a variable to hold the formatted date 133 | #print splice_datetime 134 | splice_client = str(re.sub(r'"', r'',jack[22])).strip(',') 135 | #print splice_client 136 | splice_uriquery = str(re.sub(r'\.\"\,', r'', jack[24])).strip('"') 137 | split_domain = splice_uriquery.split('.') # create a variable to hold the value of splitting splice_uriquery by the '.' 138 | splice_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] # create a variable to hold the value of joining the last two elements of split_domain 139 | if re.match(r"256",splice_eventID): # DNS Analytical log event 256 is for queries and that's all we want 140 | #print splice_datetime, splice_client, splice_uriquery, splice_domain 141 | if re.match('^(10\.\d{1,3}|192\.168|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|127\.0\.0\.1)', splice_client): 142 | outputfile = open('output.csv', 'a') 143 | with outputfile: 144 | output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain'] 145 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 146 | #writer.writeheader() 147 | writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_uriquery, 'Domain' : splice_domain}) 148 | client_list.append(splice_client) 149 | domain_list.append(splice_domain) 150 | elif re.match('^Information',line): 151 | #jack = line.split(',') 152 | jack = re.split(';|,',line) 153 | #print jack 154 | splice_eventID = str(jack[3]) 155 | #print splice_eventID 156 | splice_date = jack[1] 157 | splice_datetime = datetime.datetime.strptime(splice_date, '%m/%d/%Y %I:%M:%S %p') 158 | #splice_datetime = datetime.datetime.frmotimestamp((splice_date - 116444736000000000) // 10000000) 159 | #print splice_datetime 160 | splice_client = re.sub(r'Source=|Destination=', r'',jack[7]) 161 | #print splice_client 162 | splice_uriquery = re.sub(r'Zone=|QNAME=', r'',jack[9].strip(".")) 163 | #print splice_uriquery 164 | split_domain = splice_uriquery.split('.') # create a variable to hold the value of splitting split_domain by the '.' 165 | splice_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] # create a variable to hold the value of joining the last two elements of split_domain 166 | #print splice_domain 167 | if re.match(r"256",splice_eventID): 168 | #print splice_datetime,",",splice_client,",",splice_uriquery,",",splice_domain 169 | #s = "," 170 | #seq = {str(splice_datetime), splice_client, splice_uriquery, splice_domain} 171 | #print s.join(seq),"\n" 172 | #print splice_datetime, splice_client, splice_uriquery, splice_domain 173 | outputfile = open('output.csv', 'a') 174 | with outputfile: 175 | output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain'] 176 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 177 | #writer.writeheader() 178 | writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_uriquery, 'Domain' : splice_domain}) 179 | client_list.append(splice_client) 180 | domain_list.append(splice_domain) 181 | # Parsing of files is COMPLETE! 182 | 183 | 184 | ####################################################################################### 185 | # Create some stats! 186 | # Which are: 187 | # Client with most domain requests 188 | # Top ten requested domains 189 | # Top ten least requested domains 190 | ####################################################################################### 191 | # Create a count of all the IPs in client_list 192 | cnt = Counter(client_list) 193 | # Determine the top ten client IPS with the most domain requests and print it to the console 194 | top_clients = cnt.most_common(10) 195 | print "#############################################################" 196 | print 'DNSplice Statistics' 197 | print "#############################################################" 198 | print '# The top 10 requesting client IPs are:' 199 | print str(top_clients).strip("['']") 200 | print "-------------------------------------------------------------" 201 | 202 | # Create a count of all the domains in domain_list 203 | cnt = Counter(domain_list) 204 | 205 | # Determine the top ten most requested domains, put them into a list called top_domains, and print it to the console 206 | top_domains.append(cnt.most_common(10)) 207 | print '# top 10 requested domains are:' 208 | print str(top_domains).strip("['']") 209 | print "-------------------------------------------------------------" 210 | 211 | # This is a really ridiculous way I stripped the count from the key pair of domain/count in top_domain, and put the top ten domains into a list called ten_top 212 | temp_top = str(top_domains) 213 | #print temp_top 214 | temp_top2 = temp_top.strip("['']") 215 | #print temp_top2 216 | temp_top3 = re.sub("\(\'", r"",temp_top2) 217 | #print temp_top3 218 | temp_top4 = re.sub("\'\,\s\d+\)", r"",temp_top3) 219 | #print temp_top4 220 | temp_top5 = re.sub(r" ",r"",temp_top4) 221 | ten_top = temp_top5.split(',') 222 | #print(ten_top) 223 | 224 | 225 | 226 | # Determine the top ten least requested domains, put them into a list called least_domains, and print it to the console 227 | least_domains.append(cnt.most_common()[:-11:-1]) 228 | print '# The top 10 least domains are:' 229 | print str(least_domains).strip("['']") 230 | print "#############################################################" 231 | print "#" 232 | print "#" 233 | 234 | # This is a really ridiculous way I stripped the count from the key pair of domain/count in least_domain, and put the least ten domains into a list called ten_least 235 | temp_least = str(least_domains) 236 | #print temp_least 237 | temp_least2 = temp_least.strip("['']") 238 | #print temp_least2 239 | temp_least3 = re.sub("\(\'", r"",temp_least2) 240 | #print temp_least3 241 | temp_least4 = re.sub("\'\,\s\d+\)", r"",temp_least3) 242 | #print temp_least4 243 | temp_least5 = re.sub(r" ",r"",temp_least4) 244 | ten_least = temp_least5.split(',') 245 | #print(ten_least) 246 | 247 | 248 | # and then I joined the lists 249 | tg_domains = ten_top + ten_least 250 | #print(tg_domains) 251 | 252 | 253 | 254 | ####################################################################################### 255 | # VirusTotal Domain Report Lookup 256 | # You will need at least a VT Public API key to do this 257 | # This step is optional 258 | # VT Public API lookups are limited to 4/minutes. This runs every 20sec (3/min) as that 259 | # seemed to work better 260 | # Dumps the domain name and a nasty blob of json into a csv for any additional research 261 | ####################################################################################### 262 | if args.vtkey is not None: # if you specified a VT API Key, all this neat stuff will happen: 263 | domain_set = set(domain_list) 264 | unique_domains = list(domain_set) 265 | 266 | vt_url = 'https://www.virustotal.com/vtapi/v2/url/report?apikey=' 267 | print "#############################################################" 268 | print '# DNSplice - VirusTotal Domain Report' 269 | print "#############################################################" 270 | print '# Output is printed to vt_outout.csv. One lookup is performed every 20 sec' 271 | print "#############################################################" 272 | print "#" 273 | print "#" 274 | 275 | outputfile = open('vt_output.csv', 'w') 276 | with outputfile: 277 | output_fields = ['Domain', 'Domain Report'] 278 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 279 | writer.writeheader() 280 | #writer.writerow({'Domain' : item, 'Domain Report' : response.json}) 281 | outputfile.close() 282 | 283 | 284 | outputfile = open('vt_output.csv', 'a') # the domain and json block is appended to the vt_output csv 285 | with outputfile: 286 | for item in unique_domains: 287 | url = 'https://www.virustotal.com/vtapi/v2/domain/report' 288 | params = {'apikey': args.vtkey,'domain': item} 289 | response = requests.get(url, params=params) 290 | json_out = response.json() 291 | output_fields = ['Domain', 'Domain Report'] 292 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 293 | #writer.writeheader() 294 | writer.writerow({'Domain' : item, 'Domain Report' : json_out}) 295 | print item + " complete" 296 | print "-------------------------------------------------------------" 297 | time.sleep(20) 298 | 299 | else: # if you didn't specify a VT API Key, you will see this in the console 300 | print "#############################################################" 301 | print 'DNSplice - VirusTotal Domain Report' 302 | print "#############################################################" 303 | print "No api key was given. VT domain reports will not be run" 304 | print "#############################################################" 305 | print "#" 306 | print "#" 307 | 308 | 309 | ####################################################################################### 310 | # CISCO THREATGRID Domain Lookup 311 | # You will need at least a TG API key to do this 312 | # This step is optional 313 | # TG lookups are limited to 50 per day so I've taken the top ten most and least requested 314 | # domains for this search 315 | # Dumps the domain name and a nasty blob of json into a csv for any additional research 316 | ####################################################################################### 317 | if args.tgkey is not None: # if you specified a TG API Key, all this neat stuff will happen: 318 | #domain_set = set(domain_list) 319 | #unique_domains = list(domain_set) 320 | 321 | tg_url = 'https://panacea.threatgrid.com/api/v2/search/submissions?api_key=' 322 | tg_key = args.tgkey 323 | print "#############################################################" 324 | print '# DNSplice - Cisco ThreatGrid Domain Report' 325 | print "#############################################################" 326 | print '# Output is printed to tg_outout.csv. One lookup is performed every 20 sec' 327 | print "#############################################################" 328 | print "#" 329 | print "#" 330 | 331 | outputfile = open('tg_output.csv', 'w') 332 | with outputfile: 333 | output_fields = ['Domain', 'Domain Report'] 334 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 335 | writer.writeheader() 336 | #writer.writerow({'Domain' : item, 'Domain Report : response.json}) 337 | outputfile.close() 338 | 339 | 340 | outputfile = open('tg_output.csv', 'a') # the domain and json block is appended to the tg_output csv 341 | with outputfile: 342 | #print tg_domains 343 | for item in tg_domains: 344 | #print tg_url + tg_key + '&q=' + item 345 | url = tg_url + tg_key + '&q=' + item 346 | #print url 347 | response = requests.get(url) 348 | json_out = response.json() 349 | output_fields = ['Domain', 'Domain Report'] 350 | writer = csv.DictWriter(outputfile, fieldnames=output_fields) 351 | #writer.writeheader() 352 | writer.writerow({'Domain' : item, 'Domain Report' : json_out}) 353 | print item + " complete" 354 | print "-------------------------------------------------------------" 355 | time.sleep(15) 356 | 357 | else: # if you didn't specify a TG API Key, you will see this in the console 358 | print "#############################################################" 359 | print '# DNSplice - Cisco ThreatGrid Domain Report' 360 | print "#############################################################" 361 | print "# No api key was given. TG domain reports will not be run" 362 | print "#############################################################" 363 | 364 | 365 | 366 | ####################################################################################### 367 | # This is the end of this run, but DNSplice is going to get better so come back, y'all! 368 | ####################################################################################### --------------------------------------------------------------------------------