├── .gitignore
├── README.md
└── vtscan
    ├── README.md
    └── vtscan.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .pyc
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Various scripts helpful in sorting collections of malware samples.
2 | 


--------------------------------------------------------------------------------
/vtscan/README.md:
--------------------------------------------------------------------------------
1 | VT-scan
2 | --
3 | Checks list of hashes for malware (using Virus Total).
4 | 


--------------------------------------------------------------------------------
/vtscan/vtscan.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2.7
  2 | "Checks list of hashes for malware names (using Virus Total)"
  3 | 
  4 | __author__ = 'hasherezade (hasherezade.net)'
  5 | __license__ = "GPL"
  6 | __VERSION__ = "1.0"
  7 | 
  8 | import sys,os
  9 | import re
 10 | import time
 11 | import zlib
 12 | import argparse
 13 | import urllib,urllib2
 14 | import hashlib
 15 | 
 16 | DEFAULT_MALNAMES = 'cryptowall,crypwall,bunitu,proxy,zeus,zbot,ramnit'
 17 | 
 18 | host = "www.virustotal.com"
 19 | url2 = "https://" + host + "/en/search/?query="
 20 | method = 'GET'
 21 | 
 22 | agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Iceweasel/38.2.1'
 23 | accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
 24 | language = 'en-US,en;q=0.5'
 25 | encoding = 'gzip, deflate'
 26 | content_type = "application/x-www-form-urlencoded"
 27 | 
 28 | g_DisableColors = False
 29 | 
 30 | #---
 31 | #terminal colors:
 32 | #
 33 | GREY = '\033[90m'
 34 | RED = '\033[91m'
 35 | GREEN = '\033[92m'
 36 | YELLOW = '\033[93m'
 37 | BLUE = '\033[94m'
 38 | PURPLE = '\033[95m'
 39 | LIGHTBLUE = '\033[96m'
 40 | BG_RED = '\033[6;30;41m'
 41 | BG_GREY = '\033[6;37;40m'
 42 | 
 43 | COLOR_END = '\033[0m'
 44 | BOLD = "\033[1m"
 45 | 
 46 | def color_signed_msg(color, sign, msg):
 47 |     if not color or not sign:
 48 |         print msg
 49 |         return
 50 |     if not is_linux() or g_DisableColors is True:
 51 |         print '[' + sign + '] ' + msg
 52 |         return
 53 |     print BOLD + color +'[' + sign + '] ' + COLOR_END + msg
 54 | 
 55 | def color_msg(color,msg):
 56 |     if not color or not is_linux() or g_DisableColors is True:
 57 |         print msg
 58 |         return
 59 |     print color + msg + COLOR_END
 60 | 
 61 | def color_bold_msg(color, msg):
 62 |     if not color or not is_linux() or g_DisableColors is True:
 63 |         print msg
 64 |         return
 65 |     print BOLD + color + msg + COLOR_END
 66 | 
 67 | def info(msg):
 68 |     color_signed_msg(BLUE, '*', msg)
 69 | 
 70 | def good(msg):
 71 |     color_signed_msg(GREEN, '+', msg)
 72 | 
 73 | def warn(msg):
 74 |     color_signed_msg(YELLOW, '!', msg)
 75 | 
 76 | def err( msg):
 77 |     color_signed_msg(RED, '-', msg)
 78 | 
 79 | def is_linux():
 80 |     from sys import platform as _platform
 81 |     if "linux" in _platform :
 82 |         return True
 83 |     return False
 84 | #---
 85 | 
 86 | class TimeoutException(Exception):
 87 |     pass
 88 | 
 89 | def decompress_data(data):
 90 |     data=zlib.decompress(data, 16+zlib.MAX_WBITS)
 91 |     return data
 92 | 
 93 | def make_req(host, url, mhash):
 94 |     data=''
 95 |     url += mhash
 96 |     print "\n---\n"+ url
 97 |     request = urllib2.Request(url, data, {'Host': host, 
 98 |         'Content-Type': content_type, 
 99 |         'User-Agent' : agent, 
100 |         'Accept' : accept,
101 |         'Accept-Language' : language,
102 |         'Accept-Encoding' : encoding
103 |     })
104 |     request.get_method = lambda: method
105 |     try:
106 |         resp = urllib2.urlopen(request)
107 |     except urllib2.HTTPError as e1:
108 |         print "Error"
109 |         raise e1
110 |     except urllib2.URLError, e:
111 |         print "Error"
112 |         if 'timeout' in e.reason:
113 |             raise TimeoutException()
114 | 
115 |     rcode = resp.getcode()
116 |     if rcode == 200:
117 |         resp_content = resp.read()  
118 |         if resp.info().getheader('Content-Encoding') == 'gzip':
119 |             resp_content = decompress_data(resp_content)
120 |         return resp_content
121 |     print "Response code: %d" % rcode
122 |     return None
123 | 
124 | def fetch_md5s(line):
125 |     pattern = re.compile(r'\b[0-9a-fA-F]{32}\b')
126 |     fhash = re.findall(pattern, line)
127 |     return fhash
128 | 
129 | def fetch_sha1(line):
130 |     pattern = re.compile(r'\b[0-9a-fA-F]{40}\b')
131 |     fhash = re.findall(pattern, line)
132 |     return fhash
133 | 
134 | def fetch_sha256(line):
135 |     pattern = re.compile(r'\b[0-9a-f]{64}\b')
136 |     fhash = re.findall(pattern, line)
137 |     return fhash
138 | 
139 | def get_hashes(fname):
140 |     hashes = set()
141 |     with open(fname, 'r') as f:
142 |         for line in f.readlines():
143 |             md5s = fetch_md5s(line)
144 |             for h in md5s: 
145 |                 hashes.add(h)
146 |             sha1s = fetch_sha1(line)
147 |             for h in sha1s: 
148 |                 hashes.add(h)
149 |             sha256s = fetch_sha256(line)
150 |             for h in sha256s: 
151 |                 hashes.add(h)
152 |     return hashes
153 | 
154 | def calc_hashes(dir_name):
155 |     dir_content = set(os.listdir(dir_name))
156 |     hash_to_name = dict()
157 |     for fname in dir_content:
158 |         fullname = os.path.join(dir_name, fname)
159 |         if not os.path.isfile(fullname):
160 |             continue
161 |         data = open(fullname, 'rb').read()
162 |         filehash = hashlib.sha256(data).hexdigest()
163 |         print filehash + " : " + fname
164 |         hash_to_name[filehash] = fname
165 |     return hash_to_name
166 | 
167 | def get_between_patterns(data, pattern1, pattern2):
168 |     pattern1 = pattern1.lower()
169 |     pattern2 = pattern2.lower()
170 |     data = data.lower()
171 | 
172 |     if not pattern1 in data:
173 |         return None
174 |     indx1 = data.index(pattern1) + len(pattern1)
175 |     data = data[indx1:]
176 |     if not pattern2 in data:
177 |         return None
178 |     indx2 = data.index(pattern2)
179 |     data = data[:indx2].strip()
180 |     return data
181 | 
182 | def check_keywords(data, keywords, mhash):
183 |     data = data.lower()
184 |     for keyword in keywords:
185 |         keyword = keyword.lower().strip()
186 |         if keyword in data:
187 |             return keyword
188 |     return None
189 | 
190 | def check_all_keywords(data, keywords, mhash):
191 |     found_keywords = list()
192 |     data = data.lower()
193 |     for keyword in keywords:
194 |         keyword = keyword.lower().strip()
195 |         if keyword in data:
196 |             found_keywords.append(keyword)
197 |     if len(found_keywords) == 0:
198 |         return None
199 |     return found_keywords
200 | 
201 | def check_id(data, vendor):
202 |     data = get_between_patterns(data, vendor, '</tr>')
203 |     if not data:
204 |         return None
205 |     detectedp = '<td class=\"ltr text-red\">'
206 |     not_detectedp = '<td class=\"ltr text-green\">'
207 |     if get_between_patterns(data, not_detectedp, '</td>'):
208 |         warn(vendor +": NOT DETECTED")
209 |         return None
210 | 
211 |     fetched = get_between_patterns(data, detectedp, '</td>')
212 |     if fetched:
213 |         info(vendor + " : " + fetched)
214 |     return fetched
215 | 
216 | def check_any(data):
217 |     if not data:
218 |         return None
219 |     detectedp = '<td class=\"ltr text-red\">'
220 |     fetched = get_between_patterns(data, detectedp, '</td>')
221 |     return fetched
222 | 
223 | def get_names_table(data):
224 |     if not data:
225 |         return None
226 |     detectedp = '<table class=\"table table-striped\" id=\"antivirus-results\">'
227 |     fetched = get_between_patterns(data, detectedp, '<div class=\"tab-pane extra-info\" id=\"item-detail\">')
228 |     return fetched
229 | 
230 | def vt_check(mhash, keywords, vendor, other_keywords=None):
231 |     not_found = ["File not found"]
232 | 
233 |     try:
234 |         resp_content = make_req(host, url2, mhash)
235 |         if not resp_content:
236 |             err("NO RESPONSE " + mhash)
237 |             return None
238 | 
239 |         if check_keywords(resp_content, not_found, mhash):
240 |             err("Not found: " + mhash)
241 |             return None
242 | 
243 |         if other_keywords is not None:
244 |             found_keywords = check_all_keywords(resp_content, other_keywords, mhash)
245 |             if found_keywords is not None :
246 |                 found_str = ", ". join(found_keywords)
247 |                 info("KEYWORDS: " + found_str)
248 | 
249 |         vendor_id = check_id(resp_content, vendor) 
250 |         if vendor_id is None :
251 |             vendor_id = check_any(resp_content)
252 |             if not vendor_id:
253 |                 err("NO VENDOR DETECTED : " + mhash)
254 |                 return None
255 |             info("Other id : " + vendor_id)
256 | 
257 |         names_table = get_names_table(resp_content)
258 |         malwarename = check_keywords(names_table, keywords, mhash)
259 |         if malwarename :
260 |             good(malwarename + " : " + mhash)
261 |             return malwarename
262 |         return vendor_id
263 | 
264 |     except TimeoutException:
265 |         print "Timeout: " + url
266 |     except urllib2.HTTPError as e:
267 |         if e.code == 404:
268 |             pass
269 |         else:
270 |             print "\tError : " + e.reason
271 |     except Exception:
272 |         pass
273 |     return False
274 | 
275 | def make_outfile_name(filename, prefix):
276 |     basename = os.path.basename(filename)
277 |     dirname = os.path.dirname(filename)
278 | 
279 |     basename = prefix + basename
280 |     out_name = os.path.join(dirname, basename)
281 |     return out_name
282 | 
283 | def make_outfile(out_file_name):
284 |     out_file = open(out_file_name, 'a+')
285 |     if out_file:
286 |         info("File: " + out_file_name)
287 |     else:
288 |         err("Cannot open file: " + out_file_name)
289 |         return None
290 |     return out_file
291 | 
292 | def main():
293 |     parser = argparse.ArgumentParser(description="VirusTotal checker "+ __VERSION__)
294 |     parser.add_argument('--hashes', dest="hashes", default=None, help="Input file with list of hashes (alternative to dir)")
295 |     parser.add_argument('--whitelist', dest="whitelist", default=None, help="Input file with list of whitelisted hashes")
296 |     parser.add_argument('--dir', dest="dir", default=None, help="Input directory with files to scan")
297 |     parser.add_argument('--names', dest="names", default=DEFAULT_MALNAMES, help="Searched malware names, ie. " + DEFAULT_MALNAMES)
298 |     parser.add_argument('--keywords', dest="keywords", default=None, help="Other keywords searched in the report")
299 |     parser.add_argument('--vendor', dest="vendor", default="Malwarebytes", help="Searched vendor, default='Malwarebytes'")
300 |     parser.add_argument('--sleeptime', dest="sleeptime", default=3, help="Sleep time between queries, default=3", type=int)
301 |     parser.add_argument('--nocolors', dest="nocolors", default="False", action='store_true', help="Disable colors?")
302 |     args = parser.parse_args()
303 | 
304 |     global g_DisableColors
305 |     g_DisableColors = args.nocolors
306 | 
307 |     found_list = list()
308 |     not_found_list = list()
309 | 
310 |     if args.hashes is None and args.dir is None:
311 |         print "[ERROR] Invalid parameters: supply dir or hashes!"
312 |         return (-1)
313 | 
314 |     if args.hashes is not None and args.dir is not None:
315 |         print "[ERROR] Invalid parameters: supply dir or hashes!"
316 |         return (-1)
317 | 
318 |     if args.hashes is not None:
319 |         hashes = get_hashes(args.hashes)
320 |     input_name = args.hashes
321 | 
322 |     hash_to_name = None
323 |     if args.dir is not None:
324 |         dirstr = os.path.expanduser(args.dir)
325 |         dirstr = os.path.expandvars(dirstr)
326 | 
327 |         hash_to_name = calc_hashes(dirstr)
328 |         hashes = hash_to_name.keys()
329 |         input_name = dirstr + ".txt"
330 | 
331 |     if len(hashes):
332 |         good("{} hashes loaded.".format(len(hashes)))
333 |     else:
334 |         print "[ERROR] No hashes found in given file!"
335 |         return (-1)
336 | 
337 |     if args.whitelist:
338 |         whitelist = get_hashes(args.whitelist)
339 | 
340 |     if args.whitelist:
341 |         hashes = hashes - whitelist
342 |         if len(hashes):
343 |             good("{} hashes remain after whitelist elimination.".format(len(hashes)))
344 |         else:
345 |             err("No hashes remaining after whitelist elimination.")
346 |             return 0
347 | 
348 |     malnames = args.names.split(',')
349 |     if args.keywords :
350 |         keywords = args.keywords.split(',') 
351 |     else:
352 |         keywords = None
353 | 
354 |     print "Results will be appended to files:"
355 |     found_file_name = make_outfile_name( input_name, 'FOUND_')
356 |     found_file = make_outfile(found_file_name)
357 |     if found_file is None:
358 |         return (-1)
359 | 
360 |     nfound_file_name = make_outfile_name( input_name, 'NOTFOUND_')
361 |     nfound_file  = make_outfile(nfound_file_name)
362 |     if found_file is None:
363 |         return (-1)
364 | 
365 |     for mhash in hashes:
366 |         found = vt_check(mhash, malnames, args.vendor, keywords)
367 |         if found:
368 |             found_list.append(mhash)
369 |             if hash_to_name is not None:
370 |                 name = hash_to_name[mhash]
371 |                 if name is not None:
372 |                     print name
373 |                     mhash = mhash + " : " + name
374 |             found_file.write("%s : %s\n" % (mhash, found))
375 |             found_file.flush()
376 |         else:
377 |             not_found_list.append(mhash)
378 |             nfound_file.write("%s\n" % mhash)
379 |             nfound_file.flush()
380 |         time.sleep(args.sleeptime)
381 |     print "----"
382 |     print "Summary:"
383 | 
384 |     good("Found: " + str(len(found_list)))
385 |     info("File: " + found_file_name)
386 |     err("Not Found: " + str(len(not_found_list)))
387 |     info("File: " + nfound_file_name)
388 |     found_file.close()
389 |     nfound_file.close()
390 |     print "----"
391 |     return 1
392 | 
393 | if __name__ == "__main__":
394 |     sys.exit(main())
395 | 


--------------------------------------------------------------------------------