├── .gitignore ├── README.md └── vtscan ├── README.md └── vtscan.py /.gitignore: -------------------------------------------------------------------------------- 1 | .pyc 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Various scripts helpful in sorting collections of malware samples. 2 | -------------------------------------------------------------------------------- /vtscan/README.md: -------------------------------------------------------------------------------- 1 | VT-scan 2 | -- 3 | Checks list of hashes for malware (using Virus Total). 4 | -------------------------------------------------------------------------------- /vtscan/vtscan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2.7 2 | "Checks list of hashes for malware names (using Virus Total)" 3 | 4 | __author__ = 'hasherezade (hasherezade.net)' 5 | __license__ = "GPL" 6 | __VERSION__ = "1.0" 7 | 8 | import sys,os 9 | import re 10 | import time 11 | import zlib 12 | import argparse 13 | import urllib,urllib2 14 | import hashlib 15 | 16 | DEFAULT_MALNAMES = 'cryptowall,crypwall,bunitu,proxy,zeus,zbot,ramnit' 17 | 18 | host = "www.virustotal.com" 19 | url2 = "https://" + host + "/en/search/?query=" 20 | method = 'GET' 21 | 22 | agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Iceweasel/38.2.1' 23 | accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 24 | language = 'en-US,en;q=0.5' 25 | encoding = 'gzip, deflate' 26 | content_type = "application/x-www-form-urlencoded" 27 | 28 | g_DisableColors = False 29 | 30 | #--- 31 | #terminal colors: 32 | # 33 | GREY = '\033[90m' 34 | RED = '\033[91m' 35 | GREEN = '\033[92m' 36 | YELLOW = '\033[93m' 37 | BLUE = '\033[94m' 38 | PURPLE = '\033[95m' 39 | LIGHTBLUE = '\033[96m' 40 | BG_RED = '\033[6;30;41m' 41 | BG_GREY = '\033[6;37;40m' 42 | 43 | COLOR_END = '\033[0m' 44 | BOLD = "\033[1m" 45 | 46 | def color_signed_msg(color, sign, msg): 47 | if not color or not sign: 48 | print msg 49 | return 50 | if not is_linux() or g_DisableColors is True: 51 | print '[' + sign + '] ' + msg 52 | return 53 | print BOLD + color +'[' + sign + '] ' + COLOR_END + msg 54 | 55 | def color_msg(color,msg): 56 | if not color or not is_linux() or g_DisableColors is True: 57 | print msg 58 | return 59 | print color + msg + COLOR_END 60 | 61 | def color_bold_msg(color, msg): 62 | if not color or not is_linux() or g_DisableColors is True: 63 | print msg 64 | return 65 | print BOLD + color + msg + COLOR_END 66 | 67 | def info(msg): 68 | color_signed_msg(BLUE, '*', msg) 69 | 70 | def good(msg): 71 | color_signed_msg(GREEN, '+', msg) 72 | 73 | def warn(msg): 74 | color_signed_msg(YELLOW, '!', msg) 75 | 76 | def err( msg): 77 | color_signed_msg(RED, '-', msg) 78 | 79 | def is_linux(): 80 | from sys import platform as _platform 81 | if "linux" in _platform : 82 | return True 83 | return False 84 | #--- 85 | 86 | class TimeoutException(Exception): 87 | pass 88 | 89 | def decompress_data(data): 90 | data=zlib.decompress(data, 16+zlib.MAX_WBITS) 91 | return data 92 | 93 | def make_req(host, url, mhash): 94 | data='' 95 | url += mhash 96 | print "\n---\n"+ url 97 | request = urllib2.Request(url, data, {'Host': host, 98 | 'Content-Type': content_type, 99 | 'User-Agent' : agent, 100 | 'Accept' : accept, 101 | 'Accept-Language' : language, 102 | 'Accept-Encoding' : encoding 103 | }) 104 | request.get_method = lambda: method 105 | try: 106 | resp = urllib2.urlopen(request) 107 | except urllib2.HTTPError as e1: 108 | print "Error" 109 | raise e1 110 | except urllib2.URLError, e: 111 | print "Error" 112 | if 'timeout' in e.reason: 113 | raise TimeoutException() 114 | 115 | rcode = resp.getcode() 116 | if rcode == 200: 117 | resp_content = resp.read() 118 | if resp.info().getheader('Content-Encoding') == 'gzip': 119 | resp_content = decompress_data(resp_content) 120 | return resp_content 121 | print "Response code: %d" % rcode 122 | return None 123 | 124 | def fetch_md5s(line): 125 | pattern = re.compile(r'\b[0-9a-fA-F]{32}\b') 126 | fhash = re.findall(pattern, line) 127 | return fhash 128 | 129 | def fetch_sha1(line): 130 | pattern = re.compile(r'\b[0-9a-fA-F]{40}\b') 131 | fhash = re.findall(pattern, line) 132 | return fhash 133 | 134 | def fetch_sha256(line): 135 | pattern = re.compile(r'\b[0-9a-f]{64}\b') 136 | fhash = re.findall(pattern, line) 137 | return fhash 138 | 139 | def get_hashes(fname): 140 | hashes = set() 141 | with open(fname, 'r') as f: 142 | for line in f.readlines(): 143 | md5s = fetch_md5s(line) 144 | for h in md5s: 145 | hashes.add(h) 146 | sha1s = fetch_sha1(line) 147 | for h in sha1s: 148 | hashes.add(h) 149 | sha256s = fetch_sha256(line) 150 | for h in sha256s: 151 | hashes.add(h) 152 | return hashes 153 | 154 | def calc_hashes(dir_name): 155 | dir_content = set(os.listdir(dir_name)) 156 | hash_to_name = dict() 157 | for fname in dir_content: 158 | fullname = os.path.join(dir_name, fname) 159 | if not os.path.isfile(fullname): 160 | continue 161 | data = open(fullname, 'rb').read() 162 | filehash = hashlib.sha256(data).hexdigest() 163 | print filehash + " : " + fname 164 | hash_to_name[filehash] = fname 165 | return hash_to_name 166 | 167 | def get_between_patterns(data, pattern1, pattern2): 168 | pattern1 = pattern1.lower() 169 | pattern2 = pattern2.lower() 170 | data = data.lower() 171 | 172 | if not pattern1 in data: 173 | return None 174 | indx1 = data.index(pattern1) + len(pattern1) 175 | data = data[indx1:] 176 | if not pattern2 in data: 177 | return None 178 | indx2 = data.index(pattern2) 179 | data = data[:indx2].strip() 180 | return data 181 | 182 | def check_keywords(data, keywords, mhash): 183 | data = data.lower() 184 | for keyword in keywords: 185 | keyword = keyword.lower().strip() 186 | if keyword in data: 187 | return keyword 188 | return None 189 | 190 | def check_all_keywords(data, keywords, mhash): 191 | found_keywords = list() 192 | data = data.lower() 193 | for keyword in keywords: 194 | keyword = keyword.lower().strip() 195 | if keyword in data: 196 | found_keywords.append(keyword) 197 | if len(found_keywords) == 0: 198 | return None 199 | return found_keywords 200 | 201 | def check_id(data, vendor): 202 | data = get_between_patterns(data, vendor, '') 203 | if not data: 204 | return None 205 | detectedp = '' 206 | not_detectedp = '' 207 | if get_between_patterns(data, not_detectedp, ''): 208 | warn(vendor +": NOT DETECTED") 209 | return None 210 | 211 | fetched = get_between_patterns(data, detectedp, '') 212 | if fetched: 213 | info(vendor + " : " + fetched) 214 | return fetched 215 | 216 | def check_any(data): 217 | if not data: 218 | return None 219 | detectedp = '' 220 | fetched = get_between_patterns(data, detectedp, '') 221 | return fetched 222 | 223 | def get_names_table(data): 224 | if not data: 225 | return None 226 | detectedp = '' 227 | fetched = get_between_patterns(data, detectedp, '
') 228 | return fetched 229 | 230 | def vt_check(mhash, keywords, vendor, other_keywords=None): 231 | not_found = ["File not found"] 232 | 233 | try: 234 | resp_content = make_req(host, url2, mhash) 235 | if not resp_content: 236 | err("NO RESPONSE " + mhash) 237 | return None 238 | 239 | if check_keywords(resp_content, not_found, mhash): 240 | err("Not found: " + mhash) 241 | return None 242 | 243 | if other_keywords is not None: 244 | found_keywords = check_all_keywords(resp_content, other_keywords, mhash) 245 | if found_keywords is not None : 246 | found_str = ", ". join(found_keywords) 247 | info("KEYWORDS: " + found_str) 248 | 249 | vendor_id = check_id(resp_content, vendor) 250 | if vendor_id is None : 251 | vendor_id = check_any(resp_content) 252 | if not vendor_id: 253 | err("NO VENDOR DETECTED : " + mhash) 254 | return None 255 | info("Other id : " + vendor_id) 256 | 257 | names_table = get_names_table(resp_content) 258 | malwarename = check_keywords(names_table, keywords, mhash) 259 | if malwarename : 260 | good(malwarename + " : " + mhash) 261 | return malwarename 262 | return vendor_id 263 | 264 | except TimeoutException: 265 | print "Timeout: " + url 266 | except urllib2.HTTPError as e: 267 | if e.code == 404: 268 | pass 269 | else: 270 | print "\tError : " + e.reason 271 | except Exception: 272 | pass 273 | return False 274 | 275 | def make_outfile_name(filename, prefix): 276 | basename = os.path.basename(filename) 277 | dirname = os.path.dirname(filename) 278 | 279 | basename = prefix + basename 280 | out_name = os.path.join(dirname, basename) 281 | return out_name 282 | 283 | def make_outfile(out_file_name): 284 | out_file = open(out_file_name, 'a+') 285 | if out_file: 286 | info("File: " + out_file_name) 287 | else: 288 | err("Cannot open file: " + out_file_name) 289 | return None 290 | return out_file 291 | 292 | def main(): 293 | parser = argparse.ArgumentParser(description="VirusTotal checker "+ __VERSION__) 294 | parser.add_argument('--hashes', dest="hashes", default=None, help="Input file with list of hashes (alternative to dir)") 295 | parser.add_argument('--whitelist', dest="whitelist", default=None, help="Input file with list of whitelisted hashes") 296 | parser.add_argument('--dir', dest="dir", default=None, help="Input directory with files to scan") 297 | parser.add_argument('--names', dest="names", default=DEFAULT_MALNAMES, help="Searched malware names, ie. " + DEFAULT_MALNAMES) 298 | parser.add_argument('--keywords', dest="keywords", default=None, help="Other keywords searched in the report") 299 | parser.add_argument('--vendor', dest="vendor", default="Malwarebytes", help="Searched vendor, default='Malwarebytes'") 300 | parser.add_argument('--sleeptime', dest="sleeptime", default=3, help="Sleep time between queries, default=3", type=int) 301 | parser.add_argument('--nocolors', dest="nocolors", default="False", action='store_true', help="Disable colors?") 302 | args = parser.parse_args() 303 | 304 | global g_DisableColors 305 | g_DisableColors = args.nocolors 306 | 307 | found_list = list() 308 | not_found_list = list() 309 | 310 | if args.hashes is None and args.dir is None: 311 | print "[ERROR] Invalid parameters: supply dir or hashes!" 312 | return (-1) 313 | 314 | if args.hashes is not None and args.dir is not None: 315 | print "[ERROR] Invalid parameters: supply dir or hashes!" 316 | return (-1) 317 | 318 | if args.hashes is not None: 319 | hashes = get_hashes(args.hashes) 320 | input_name = args.hashes 321 | 322 | hash_to_name = None 323 | if args.dir is not None: 324 | dirstr = os.path.expanduser(args.dir) 325 | dirstr = os.path.expandvars(dirstr) 326 | 327 | hash_to_name = calc_hashes(dirstr) 328 | hashes = hash_to_name.keys() 329 | input_name = dirstr + ".txt" 330 | 331 | if len(hashes): 332 | good("{} hashes loaded.".format(len(hashes))) 333 | else: 334 | print "[ERROR] No hashes found in given file!" 335 | return (-1) 336 | 337 | if args.whitelist: 338 | whitelist = get_hashes(args.whitelist) 339 | 340 | if args.whitelist: 341 | hashes = hashes - whitelist 342 | if len(hashes): 343 | good("{} hashes remain after whitelist elimination.".format(len(hashes))) 344 | else: 345 | err("No hashes remaining after whitelist elimination.") 346 | return 0 347 | 348 | malnames = args.names.split(',') 349 | if args.keywords : 350 | keywords = args.keywords.split(',') 351 | else: 352 | keywords = None 353 | 354 | print "Results will be appended to files:" 355 | found_file_name = make_outfile_name( input_name, 'FOUND_') 356 | found_file = make_outfile(found_file_name) 357 | if found_file is None: 358 | return (-1) 359 | 360 | nfound_file_name = make_outfile_name( input_name, 'NOTFOUND_') 361 | nfound_file = make_outfile(nfound_file_name) 362 | if found_file is None: 363 | return (-1) 364 | 365 | for mhash in hashes: 366 | found = vt_check(mhash, malnames, args.vendor, keywords) 367 | if found: 368 | found_list.append(mhash) 369 | if hash_to_name is not None: 370 | name = hash_to_name[mhash] 371 | if name is not None: 372 | print name 373 | mhash = mhash + " : " + name 374 | found_file.write("%s : %s\n" % (mhash, found)) 375 | found_file.flush() 376 | else: 377 | not_found_list.append(mhash) 378 | nfound_file.write("%s\n" % mhash) 379 | nfound_file.flush() 380 | time.sleep(args.sleeptime) 381 | print "----" 382 | print "Summary:" 383 | 384 | good("Found: " + str(len(found_list))) 385 | info("File: " + found_file_name) 386 | err("Not Found: " + str(len(not_found_list))) 387 | info("File: " + nfound_file_name) 388 | found_file.close() 389 | nfound_file.close() 390 | print "----" 391 | return 1 392 | 393 | if __name__ == "__main__": 394 | sys.exit(main()) 395 | --------------------------------------------------------------------------------