├── LICENSE
├── README.md
└── dnsplice_v1.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 nerdiosity
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #######################################################################################
 2 | # DNSplice was created by Shelly Giesbrecht (nerdiosity) to assist incident responders
 3 | # to quickly and easily parse client query events from ugly DNS logs for 
 4 | # Microsoft Windows 2003/2008R2 (DNS debug log) to Windows 2012R2/2016 (DNS Analytical
 5 | # log) into a format (CSV) suitable for additional analysis or insertion into a larger
 6 | # timeline.
 7 | # version: DNSplice v1.0
 8 | # date of release: June 8, 2018
 9 | #
10 | # This project was created in answer to a problem encountered by me over years of doing
11 | # IR, and as a way of learning to code. Comments or suggestions are greatly appreciated.
12 | # email: info@nerdiosity.com twitter: @nerdiosity 
13 | # github: https://github.com/nerdiosity/DNSplice
14 | ########################################################################################
15 | 
16 | Requirements:
17 | DNSplice uses the requests module for python. This will need to be installed to run.
18 | command: pip install requests
19 | 
20 | To run:
21 | At command prompt: python dnsplice_v1.py -i <inputfile> -v <virustotal apikey> -t <threatgrid apikey>
22 | 
23 | Options:
24 | -i, --input : DNS log filename (REQUIRED)
25 | -v, --vtkey : VirusTotal API key (OPTIONAL)
26 | -t, --tgkey : Cisco ThreatGrid API key (OPTIONAL)
27 | 
28 | Output:
29 | DNS logs are parsed to include timedatestamp, client IP, uri requested, and domain, and are outputted automagically to output.csv
30 | in the directory DNSplice is run from.
31 | VirusTotal Domain Report lookups are performed every 20 seconds (3/min) and are outputted to vt_output.csv. For large files, this
32 | make take some time. 
33 | ThreatGrid Lookups are limited to 50 lookups per day. Top ten most and least requested domains are requested from ThreatGrid, and
34 | are outputted to tg_output.csv. Requests are made every 20 seconds. 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/dnsplice_v1.py:
--------------------------------------------------------------------------------
  1 | #######################################################################################
  2 | # DNSplice was created by Shelly Giesbrecht (nerdiosity) to assist incident responders
  3 | # to quickly and easily parse client query events from ugly DNS logs for 
  4 | # Microsoft Windows 2003/2008R2 (DNS debug log) to Windows 2012R2/2016 (DNS Analytical
  5 | # log) into a format (CSV) suitable for additional analysis or insertion into a larger
  6 | # timeline.
  7 | # version: DNSplice v1.0
  8 | # date of release: June 8, 2018
  9 | #
 10 | # This project was created in answer to a problem encountered by me over years of doing
 11 | # IR, and as a way of learning to code. Comments or suggestions are greatly appreciated.
 12 | # email: info@nerdiosity.com twitter: @nerdiosity 
 13 | # github: https://github.com/nerdiosity/DNSplice
 14 | ########################################################################################
 15 | 
 16 | #!/usr/bin/env python
 17 | import re
 18 | import sys
 19 | import datetime
 20 | import argparse
 21 | import csv
 22 | from collections import Counter
 23 | import requests # this is required and will need to be installed if not already done. "pip install request[security]"
 24 | import time
 25 | 
 26 | # Getting some arguments
 27 | parser = argparse.ArgumentParser(description='Add a filename')
 28 | parser.add_argument('-i', '--input', help='Input a filename', required=True) # Get a dns log as input
 29 | parser.add_argument('-v', '--vtkey', help='Input your VT API key', required=False) # Optional: Enter a VirusTotal API key 
 30 | parser.add_argument('-t', '--tgkey', help='Input your  TG API key', required=False) # Optional: Enter a Cisco ThreatGrid API key 
 31 | args = parser.parse_args()
 32 | 
 33 | # Set up variables required
 34 | inputfile = ''
 35 | outputfile = ''
 36 | splice_date = ''
 37 | splice_sndrcv = ''
 38 | splice_client = ''
 39 | splice_rtype = ''
 40 | splice_datetime = ''   
 41 | splice_uriquery = ''
 42 | splice_domain = ''
 43 | split_domain = ''
 44 | join_domain = ''
 45 | csv_output = ''
 46 | tg_key = ''
 47 | 
 48 | # Set up lists
 49 | domain_list = []
 50 | client_list = []
 51 | top_domains = []
 52 | least_domains = []
 53 | temp_top = []
 54 | temp_least = []
 55 | ten_top = []
 56 | ten_least = []
 57 | 
 58 | 
 59 | print "#############################################################"
 60 | print "#  DNSplice v1 by nerdiosity"
 61 | print "#  Parse your ugly DNS logs!"
 62 | print "#############################################################"
 63 | print "#"
 64 | print "#"
 65 | 
 66 | #######################################################################################
 67 | # Parsing all the file!
 68 | #######################################################################################
 69 | # Create a csv to output the parsed date to and give it some headers
 70 | outputfile = open('output.csv', 'w')
 71 | with outputfile:
 72 |     output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain']
 73 |     writer = csv.DictWriter(outputfile, fieldnames=output_fields)
 74 |     writer.writeheader()
 75 |     # writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_domain, 'Domain' : join_domain})
 76 | outputfile.close()
 77 |  
 78 | # Open the dns log file given as input   
 79 | with open(args.input,'r') as dns_file:
 80 |     for line in dns_file:
 81 |         if re.search( r'(.*) PACKET (.*?) .*', line, re.M|re.I): # Look for lines in Windows 2003-2008R2 DNS debug files with "PACKET" in them
 82 |             if re.match('^\d\d\d\d\d\d\d\d', line):         # For Windows 2003 type files, look for lines that start with date style YYYYMMDD
 83 |                 jack = line.split()                         # Split the line into fields
 84 |                 splice_date = jack[0] + ' ' + jack[1]       # Splice together the date and time indexes into datetime
 85 |                 #splice_sndrcv = str(jack[6]).strip('[]')   # Create a variable to hold the value of the direction of traffic
 86 |                 splice_client = str(jack[7]).strip('[]')    # Create a variable to hold the value of the client IP
 87 |                 splice_rtype = str(jack[-2]).strip('[]')    # Create a variable to hold the value of the record type
 88 |                 splice_datetime = datetime.datetime.strptime(splice_date, '%Y%m%d %H:%M:%S')     # Create a variable to hold the value of a formatted split_date
 89 |                 splice_uriquery = re.sub(r"\(\d+\)",r".", str(jack[-1]).strip('[]'))
 90 |                 splice_domain = splice_uriquery[1:-1]               # create variable to hold the value of splice_uriquery with the leading and trailing '.' stripped off
 91 |                 split_domain = splice_domain.split('.')              # create a variable to hold the value of splitting split_domain by the '.'
 92 |                 join_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] # create a variable to hold the value of joining the last two elements of split_domain
 93 |                 if jack[9] != 'R':                                                              # this removes any response as we are looking for only queries made
 94 |                     if re.match('^(10\.\d{1,3}|192\.168|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|127\.0\.0\.1)', splice_client): # This limits the client IPs to only RFC1918 addresses
 95 |                         # This next piece opens the output file we created, and appends each line to it with value for fields: 'DateTime', 'ClientIP', 'URIQuery', and 'Domain'
 96 |                         outputfile = open('output.csv', 'a') 
 97 |                         with outputfile:
 98 |                             output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain']
 99 |                             writer = csv.DictWriter(outputfile, fieldnames=output_fields)
100 |                             #writer.writeheader()
101 |                             writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_domain, 'Domain' : join_domain})
102 |                         client_list.append(splice_client)               # add all the client IPs to a list
103 |                         domain_list.append(join_domain)                 # add all the domain names to a list                              
104 |             elif re.match('^\d{1,2}\/\d{1,2}\/\d{4}',line):     # For Windows 2008R2 type files, look for lines that start with date style MM/DD/YYYY
105 |                 jack = line.split()
106 |                 splice_date = jack[0] + ' ' + jack[1] + ' ' + jack[2]
107 |                 splice_datetime = datetime.datetime.strptime(splice_date, '%m/%d/%Y %I:%M:%S %p')
108 |                 splice_sndrcv = str(jack[7]).strip('[]')
109 |                 splice_client = str(jack[8]).strip('[]')
110 |                 splice_rtype = str(jack[-2]).strip('[]')
111 |                 splice_uriquery = re.sub(r"\(\d+\)",r".", str(jack[-1]).strip('[]'))
112 |                 splice_domain = splice_uriquery[1:-1]               
113 |                 split_domain = splice_domain.split('.')              
114 |                 join_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1]                
115 |                 
116 |                 if jack[10] != 'R':
117 |                     if re.match('^(10\.\d{1,3}|192\.168|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|127\.0\.0\.1)', splice_client):
118 |                         outputfile = open('output.csv', 'a')
119 |                         with outputfile:
120 |                             output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain']
121 |                             writer = csv.DictWriter(outputfile, fieldnames=output_fields)
122 |                             #writer.writeheader()
123 |                             writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_domain, 'Domain' : join_domain})
124 |                         client_list.append(splice_client)
125 |                         domain_list.append(join_domain)
126 |                         
127 |         elif re.match('^Microsoft-Windows-DNS-Server',line): # For Windows 2012R2-2016 type files, look for lines that start with Microsoft-Windows-DNS-Server
128 |             #from datetime import datetime
129 |             jack = line.split()
130 |             splice_eventID = str(jack[3]).strip(",")        # create a variable to hold the value the event ID
131 |             splice_date = long(str(jack[17]).strip(","))    # create a variable to hold the value the date, convert to string to remove a comma, then convert to long
132 |             splice_datetime = datetime.datetime.fromtimestamp((splice_date - 116444736000000000) // 10000000) # create a variable to hold the formatted date 
133 |             #print splice_datetime
134 |             splice_client = str(re.sub(r'"', r'',jack[22])).strip(',')
135 |             #print splice_client
136 |             splice_uriquery = str(re.sub(r'\.\"\,', r'', jack[24])).strip('"')
137 |             split_domain = splice_uriquery.split('.')              # create a variable to hold the value of splitting splice_uriquery by the '.'
138 |             splice_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] # create a variable to hold the value of joining the last two elements of split_domain                         
139 |             if re.match(r"256",splice_eventID):                     # DNS Analytical log event 256 is for queries and that's all we want
140 |                 #print splice_datetime, splice_client, splice_uriquery, splice_domain
141 |                 if re.match('^(10\.\d{1,3}|192\.168|172\.1[6-9]|172\.2[0-9]|172\.3[0-1]|127\.0\.0\.1)', splice_client): 
142 |                     outputfile = open('output.csv', 'a')
143 |                     with outputfile:
144 |                         output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain']
145 |                         writer = csv.DictWriter(outputfile, fieldnames=output_fields)
146 |                         #writer.writeheader()
147 |                         writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_uriquery, 'Domain' : splice_domain})
148 |                     client_list.append(splice_client)
149 |                     domain_list.append(splice_domain)
150 |         elif re.match('^Information',line):
151 |                         #jack = line.split(',')
152 |                         jack = re.split(';|,',line)
153 |                         #print jack
154 |                         splice_eventID = str(jack[3])
155 |                         #print splice_eventID
156 |                         splice_date = jack[1]
157 |                         splice_datetime = datetime.datetime.strptime(splice_date, '%m/%d/%Y %I:%M:%S %p')
158 |                         #splice_datetime = datetime.datetime.frmotimestamp((splice_date - 116444736000000000) // 10000000)
159 |                         #print splice_datetime
160 |                         splice_client = re.sub(r'Source=|Destination=', r'',jack[7])
161 |                         #print splice_client
162 |                         splice_uriquery = re.sub(r'Zone=|QNAME=', r'',jack[9].strip("."))
163 |                         #print splice_uriquery
164 |                         split_domain = splice_uriquery.split('.')              # create a variable to hold the value of splitting split_domain by the '.'
165 |                         splice_domain = str(split_domain[-2:-1]).strip("'[]'") + '.' + split_domain[-1] # create a variable to hold the value of joining the last two elements of split_domain 
166 |                         #print splice_domain
167 |                         if re.match(r"256",splice_eventID):
168 |                             #print splice_datetime,",",splice_client,",",splice_uriquery,",",splice_domain
169 |                             #s = ","
170 |                             #seq = {str(splice_datetime), splice_client, splice_uriquery, splice_domain}
171 |                             #print s.join(seq),"\n"
172 |                                 #print splice_datetime, splice_client, splice_uriquery, splice_domain
173 |                             outputfile = open('output.csv', 'a')
174 |                             with outputfile:
175 |                                 output_fields = ['DateTime', 'ClientIP', 'URIQuery', 'Domain']
176 |                                 writer = csv.DictWriter(outputfile, fieldnames=output_fields)
177 |                                 #writer.writeheader()
178 |                                 writer.writerow({'DateTime' : splice_datetime, 'ClientIP' : splice_client, 'URIQuery' : splice_uriquery, 'Domain' : splice_domain})
179 |                             client_list.append(splice_client)
180 |                             domain_list.append(splice_domain)                        
181 | # Parsing of files is COMPLETE!
182 | 
183 | 
184 | #######################################################################################
185 | # Create some stats!
186 | # Which are: 
187 | # Client with most domain requests
188 | # Top ten requested domains
189 | # Top ten least requested domains
190 | #######################################################################################
191 | # Create a count of all the IPs in client_list
192 | cnt = Counter(client_list)
193 | # Determine the top ten client IPS with the most domain requests and print it to the console
194 | top_clients = cnt.most_common(10) 
195 | print "#############################################################"
196 | print 'DNSplice Statistics'
197 | print "#############################################################"
198 | print '# The top 10 requesting client IPs are:'
199 | print str(top_clients).strip("['']")
200 | print "-------------------------------------------------------------"
201 | 
202 | # Create a count of all the domains in domain_list
203 | cnt = Counter(domain_list)
204 | 
205 | # Determine the top ten most requested domains, put them into a list called top_domains, and print it to the console
206 | top_domains.append(cnt.most_common(10))
207 | print '#  top 10 requested domains are:'
208 | print str(top_domains).strip("['']")
209 | print "-------------------------------------------------------------"
210 | 
211 | # This is a really ridiculous way I stripped the count from the key pair of domain/count in top_domain, and put the top ten domains into a list called ten_top
212 | temp_top = str(top_domains)
213 | #print temp_top
214 | temp_top2 = temp_top.strip("['']")
215 | #print temp_top2
216 | temp_top3 = re.sub("\(\'", r"",temp_top2)
217 | #print temp_top3
218 | temp_top4 = re.sub("\'\,\s\d+\)", r"",temp_top3)
219 | #print temp_top4
220 | temp_top5 = re.sub(r" ",r"",temp_top4)
221 | ten_top = temp_top5.split(',')
222 | #print(ten_top)
223 | 
224 | 
225 | 
226 | # Determine the top ten least requested domains, put them into a list called least_domains, and print it to the console
227 | least_domains.append(cnt.most_common()[:-11:-1])
228 | print '# The top 10 least domains are:'
229 | print str(least_domains).strip("['']")
230 | print "#############################################################"
231 | print "#"
232 | print "#" 
233 | 
234 | # This is a really ridiculous way I stripped the count from the key pair of domain/count in least_domain, and put the least ten domains into a list called ten_least
235 | temp_least = str(least_domains)
236 | #print temp_least
237 | temp_least2 = temp_least.strip("['']")
238 | #print temp_least2
239 | temp_least3 = re.sub("\(\'", r"",temp_least2)
240 | #print temp_least3
241 | temp_least4 = re.sub("\'\,\s\d+\)", r"",temp_least3)
242 | #print temp_least4
243 | temp_least5 = re.sub(r" ",r"",temp_least4)
244 | ten_least = temp_least5.split(',')
245 | #print(ten_least)
246 | 
247 | 
248 | # and then I joined the lists
249 | tg_domains = ten_top + ten_least
250 | #print(tg_domains)
251 | 
252 | 
253 | 
254 | #######################################################################################
255 | # VirusTotal Domain Report Lookup
256 | # You will need at least a VT Public API key to do this
257 | # This step is optional
258 | # VT Public API lookups are limited to 4/minutes. This runs every 20sec (3/min) as that
259 | # seemed to work better
260 | # Dumps the domain name and a nasty blob of json into a csv for any additional research
261 | #######################################################################################
262 | if args.vtkey is not None: # if you specified a VT API Key, all this neat stuff will happen:
263 |     domain_set = set(domain_list)
264 |     unique_domains = list(domain_set)
265 |     
266 |     vt_url = 'https://www.virustotal.com/vtapi/v2/url/report?apikey='
267 |     print "#############################################################"
268 |     print '# DNSplice - VirusTotal Domain Report'
269 |     print "#############################################################"
270 |     print '# Output is printed to vt_outout.csv. One lookup is performed every 20 sec'
271 |     print "#############################################################"
272 |     print "#"
273 |     print "#"
274 |     
275 |     outputfile = open('vt_output.csv', 'w')
276 |     with outputfile:
277 |         output_fields = ['Domain', 'Domain Report']
278 |         writer = csv.DictWriter(outputfile, fieldnames=output_fields)
279 |         writer.writeheader()
280 |         #writer.writerow({'Domain' : item, 'Domain Report' : response.json})
281 |     outputfile.close()
282 |     
283 |     
284 |     outputfile = open('vt_output.csv', 'a') # the domain and json block is appended to the vt_output csv
285 |     with outputfile:
286 |         for item in unique_domains:
287 |             url = 'https://www.virustotal.com/vtapi/v2/domain/report'
288 |             params = {'apikey': args.vtkey,'domain': item}
289 |             response = requests.get(url, params=params)
290 |             json_out = response.json()
291 |             output_fields = ['Domain', 'Domain Report']
292 |             writer = csv.DictWriter(outputfile, fieldnames=output_fields)
293 |             #writer.writeheader()
294 |             writer.writerow({'Domain' : item, 'Domain Report' : json_out})
295 |             print item + " complete"
296 |             print "-------------------------------------------------------------"
297 |             time.sleep(20)
298 |             
299 | else:   # if you didn't specify a VT API Key, you will see this in the console
300 |     print "#############################################################"
301 |     print 'DNSplice - VirusTotal Domain Report'
302 |     print "#############################################################"
303 |     print "No api key was given. VT domain reports will not be run"
304 |     print "#############################################################"
305 |     print "#"
306 |     print "#"     
307 | 
308 | 
309 | #######################################################################################
310 | # CISCO THREATGRID Domain Lookup
311 | # You will need at least a TG API key to do this
312 | # This step is optional
313 | # TG lookups are limited to 50 per day so I've taken the top ten most and least requested
314 | # domains for this search
315 | # Dumps the domain name and a nasty blob of json into a csv for any additional research
316 | #######################################################################################
317 | if args.tgkey is not None: # if you specified a TG API Key, all this neat stuff will happen:
318 |     #domain_set = set(domain_list)
319 |     #unique_domains = list(domain_set)
320 |     
321 |     tg_url = 'https://panacea.threatgrid.com/api/v2/search/submissions?api_key='
322 |     tg_key = args.tgkey
323 |     print "#############################################################"
324 |     print '# DNSplice - Cisco ThreatGrid Domain Report'
325 |     print "#############################################################"
326 |     print '# Output is printed to tg_outout.csv. One lookup is performed every 20 sec'
327 |     print "#############################################################"
328 |     print "#"
329 |     print "#" 
330 |     
331 |     outputfile = open('tg_output.csv', 'w')
332 |     with outputfile:
333 |         output_fields = ['Domain', 'Domain Report']
334 |         writer = csv.DictWriter(outputfile, fieldnames=output_fields)
335 |         writer.writeheader()
336 |         #writer.writerow({'Domain' : item, 'Domain Report : response.json})
337 |     outputfile.close()
338 |     
339 |     
340 |     outputfile = open('tg_output.csv', 'a') # the domain and json block is appended to the tg_output csv
341 |     with outputfile:
342 |         #print tg_domains
343 |         for item in tg_domains:
344 |             #print tg_url + tg_key + '&q=' + item
345 |             url = tg_url + tg_key + '&q=' + item
346 |             #print url
347 |             response = requests.get(url)
348 |             json_out = response.json()
349 |             output_fields = ['Domain', 'Domain Report']
350 |             writer = csv.DictWriter(outputfile, fieldnames=output_fields)
351 |             #writer.writeheader()
352 |             writer.writerow({'Domain' : item, 'Domain Report' : json_out})
353 |             print item + " complete"
354 |             print "-------------------------------------------------------------"
355 |             time.sleep(15)
356 |             
357 | else:       # if you didn't specify a TG API Key, you will see this in the console
358 |     print "#############################################################"
359 |     print '# DNSplice - Cisco ThreatGrid Domain Report'
360 |     print "#############################################################"
361 |     print "# No api key was given. TG domain reports will not be run"
362 |     print "#############################################################"
363 | 
364 | 
365 | 
366 | #######################################################################################
367 | # This is the end of this run, but DNSplice is going to get better so come back, y'all!
368 | #######################################################################################


--------------------------------------------------------------------------------