├── .gitignore ├── mime2vt.conf.default ├── procmailrc.sample ├── procmailrc.sample2 ├── README.md └── mime2vt.py /.gitignore: -------------------------------------------------------------------------------- 1 | mime2vt.conf 2 | mime2vt.db 3 | -------------------------------------------------------------------------------- /mime2vt.conf.default: -------------------------------------------------------------------------------- 1 | [virustotal] 2 | apikey: 3 | exclude: image/png,image/gif,image/jpeg,text/plain,text/html 4 | 5 | [elasticsearch] 6 | server: 127.0.0.1:9200 7 | index: virustotal 8 | 9 | [database] 10 | dbpath: /var/tmp/mime2vt.db 11 | -------------------------------------------------------------------------------- /procmailrc.sample: -------------------------------------------------------------------------------- 1 | DELIVER=/usr/lib/dovecot/deliver 2 | 3 | :0 4 | * ^X-Spam-Status: Yes,.* 5 | { 6 | :0c 7 | | /usr/local/bin/mime2vt.py -d /tmp/mime -c /home/xavier/mime2vt.conf 8 | 9 | :0c 10 | /tmp/spam.debug 11 | 12 | :0 13 | | $DELIVER -m spam 14 | } 15 | 16 | :0 17 | * ^From.* 18 | | $DELIVER -m incoming 19 | -------------------------------------------------------------------------------- /procmailrc.sample2: -------------------------------------------------------------------------------- 1 | SHELL="/bin/bash" 2 | DELIVER=/usr/lib/dovecot/deliver 3 | LOGFILE="/var/log/procmail.log" 4 | 5 | # Parse every e-mail, not only spam etc. 6 | 7 | :0 8 | { 9 | :0c 10 | | /usr/local/bin/mime2vt.py -d /tmp/mime -c /home/xavier/mime2vt.conf 11 | 12 | :0 13 | | $DELIVER -d $LOGNAME 14 | } 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mime2vt.py 2 | ========== 3 | Unpack MIME attachments from STDIN and check them against virustotal.com 4 | Use it indepently: 5 | 6 | cat /tmp/mail.dump | mime2vt -c /etc/mime2vt.conf 7 | 8 | Or via tools like Procmail: 9 | 10 |
11 | :0
12 | * ^X-Spam-Flag: YES
13 | {
14 |         :0c
15 |         | /usr/local/bin/mime2vt.py -d /tmp/mime -c /home/xavier/.mime2vt.conf
16 | 	:0
17 | 	spam
18 | }
19 | 
20 | 21 | Usage 22 | ----- 23 |
24 | mime2vt.py [-h] [-d DIRECTORY] [-v] [-c CONFIG]
25 | 
26 | Unpack MIME attachments from a file and check them against virustotal.com
27 | 
28 | optional arguments:
29 | -h, --help            show this help message and exit
30 | -d DIRECTORY, --directory DIRECTORY
31 |                       directory where files will be extracted (default: /tmp)
32 | -v, --verbose         verbose output
33 | -c CONFIG, --config CONFIG
34 |                       configuration file (default: /etc/mime2vt.conf)
35 | 
36 | 37 | Results 38 | ------- 39 | Information is sent via Syslog: 40 | 41 | Dec 12 18:41:20 marge mime2vt.py[1104]: Processing zip archive: 4359ae6078390f417ab0d4411527a5c2.zip 42 | Dec 12 18:41:21 marge mime2vt.py[1104]: File: VOICE748-348736.scr (acb05e95d713b1772fb96a5e607d539f) Score: 38/53 Scanned: 2014-11-13 15:45:04 (29 days, 2:56:17) 43 | 44 | A SQLite database is created to store useful information about the malicious files: 45 | 46 |
47 | CREATE TABLE files(md5 TEXT PRIMARY KEY,
48 |                    filename TEXT,
49 |                    first_vt_score TEXT,
50 |                    last_vt_score TEXT,
51 |                    first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
52 |                    last_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
53 |                    occurrences INTEGER
54 | )
55 | 
56 | The database is created automatically if not present. 57 | 58 | Requirements 59 | ---- 60 |
61 | sudo pip install python-dateutil
62 | sudo pip install elasticsearch
63 | sudo pip install virustotal-api    
64 | 
65 | 66 | Todo 67 | ---- 68 | * 69 | -------------------------------------------------------------------------------- /mime2vt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # mime2vt.py - Submit MIME attachments to VirusTotal 4 | # 5 | # Author: Xavier Mertens 6 | # Copyright: GPLv3 (http://gplv3.fsf.org/) 7 | # Feel free to use the code, but please share the changes you've made 8 | # 9 | 10 | import argparse 11 | import ConfigParser 12 | import email 13 | import errno 14 | import hashlib 15 | import json 16 | import logging 17 | import mimetypes 18 | import os 19 | import re 20 | import sys 21 | import time 22 | import zipfile 23 | import sqlite3 24 | import syslog 25 | from elasticsearch import Elasticsearch 26 | from virus_total_apis import PublicApi as VirusTotalPublicApi 27 | from optparse import OptionParser 28 | from datetime import datetime 29 | from dateutil import parser 30 | 31 | # Try to use oletools 32 | try: 33 | from oletools.olevba import VBA_Parser, TYPE_OLE, TYPE_OpenXML, TYPE_Word2003_XML, TYPE_MHTML 34 | useOLETools = 1 35 | except: 36 | useOLETools = 0 37 | 38 | # Try to use pyzmail 39 | try: 40 | import pyzmail 41 | usePyzMail = 1 42 | except: 43 | usePyzMail = 0 44 | 45 | args = '' 46 | 47 | # Default configuration 48 | config = { 49 | 'apiKey': '', 50 | 'esServer': '', 51 | 'esIndex': 'virustotal', 52 | 'dbPath': '/var/tmp/mime2vt.db' 53 | } 54 | 55 | # Return code 56 | rcode = 0 57 | 58 | def timeDiff(t): 59 | 60 | """Compute the delta between two timestamps""" 61 | 62 | fmt = '%Y-%m-%d %H:%M:%S' 63 | now = time.strftime(fmt) 64 | return datetime.strptime(now, fmt) - datetime.strptime(t, fmt) 65 | 66 | def writeLog(msg): 67 | syslog.openlog(logoption=syslog.LOG_PID,facility=syslog.LOG_MAIL) 68 | syslog.syslog(msg) 69 | return 70 | 71 | def dbCreate(): 72 | 73 | """Create the SQLite DB at first run""" 74 | 75 | if (not os.path.isfile(config['dbPath'])): 76 | db = sqlite3.connect(config['dbPath']) 77 | cursor = db.cursor() 78 | cursor.execute(''' 79 | CREATE TABLE files(md5 TEXT PRIMARY KEY, 80 | filename TEXT, 81 | first_vt_score TEXT, 82 | last_vt_score TEXT, 83 | first_seen DATETIME DEFAULT CURRENT_TIMESTAMP, 84 | last_seen DATETIME DEFAULT CURRENT_TIMESTAMP, 85 | occurrences INTEGER 86 | ) 87 | ''') 88 | cursor.execute(''' 89 | CREATE TABLE urls(url TEXT) 90 | ''') 91 | db.commit() 92 | db.close() 93 | return 94 | 95 | def dbMD5Exists(md5): 96 | """ Search for a MD5 hash in the database""" 97 | """ (Return "1" if found) """ 98 | if not md5: 99 | return 1 100 | 101 | try: 102 | db = sqlite3.connect(config['dbPath']) 103 | except: 104 | writeLog("Cannot open the database file (locked?)") 105 | return 0 106 | cursor = db.cursor() 107 | cursor.execute('''SELECT md5 FROM files WHERE md5=?''', (md5,)) 108 | if cursor.fetchone(): 109 | db.close() 110 | return 1 111 | db.close() 112 | return 0 113 | 114 | def dbAddMD5(md5, filename, vt): 115 | """ Store a new MD5 hash in the database """ 116 | if not md5 or not filename: 117 | return 0 118 | try: 119 | db = sqlite3.connect(config['dbPath']) 120 | except: 121 | writeLog("Cannot open the database file (locked?)") 122 | return 0 123 | 124 | writeLog("DEBUG: dbAddMD5: Checking if MD5 exists") 125 | cursor = db.cursor() 126 | cursor.execute('''SELECT md5,occurrences FROM files WHERE md5=?''', (md5,)) 127 | row = cursor.fetchone() 128 | if row: 129 | occ = int(row[1]) + 1 130 | # Update existing record 131 | cursor.execute('''UPDATE files SET last_seen = DATETIME("now"), 132 | occurrences = ?, 133 | last_vt_score = ? 134 | WHERE md5=?''', (occ, vt, md5,)) 135 | writeLog("DEBUG: db record updated") 136 | else: 137 | # Insert new record 138 | cursor.execute('''INSERT INTO files(md5,filename,first_vt_score, occurrences) VALUES(?,?,?,1)''', (md5, filename, vt)) 139 | writeLog("DEBUG: db record created") 140 | db.commit() 141 | db.close() 142 | writeLog("DEBUG: dbAddMD5: %s" % md5) 143 | return 0 144 | 145 | def submit2vt(filename): 146 | 147 | """Submit a new file to VT for scanning""" 148 | 149 | # Check VT score 150 | vt = VirusTotalPublicApi(config['apiKey']) 151 | response = vt.scan_file(filename) 152 | 153 | # DEBUG 154 | fp = open('/tmp/vt.debug', 'a') 155 | fp.write(json.dumps(response, sort_keys=False, indent=4)) 156 | fp.close() 157 | 158 | if response['response_code'] == 200: 159 | writeLog("VT Reply: %s" % response['results']['verbose_msg']) 160 | else: 161 | writeLog('VT Error: %s' % response['error']) 162 | 163 | if config['esServer']: 164 | # Save results to Elasticsearch 165 | try: 166 | response['@timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%S+01:00") 167 | res = es.index(index=config['esIndex'], doc_type="VTresult", body=json.dumps(response)) 168 | except: 169 | writeLog("Cannot index to Elasticsearch") 170 | return 171 | 172 | def generateDumpDirectory(path): 173 | 174 | """Generate the destination directory to dump files""" 175 | 176 | # Prepare the output directory: 177 | # %m -> month 178 | # %d -> day 179 | # %y -> year 180 | t_day = time.strftime("%d") 181 | t_month = time.strftime("%m") 182 | t_year = time.strftime("%Y") 183 | path = path.replace('%d', t_day) 184 | path = path.replace('%m', t_month) 185 | path = path.replace('%y', t_year) 186 | try: 187 | os.makedirs(path) 188 | writeLog("DEBUG: Generated directory: %s" % path) 189 | except OSError as e: 190 | # Ignore directory exists error 191 | if e.errno != errno.EEXIST: 192 | raise 193 | else: 194 | return(path) 195 | 196 | # Fix corrext access rights on the direcrity (just for me) 197 | try: 198 | writeLog("DEBUG: chmod() on %s" % path) 199 | os.chmod(path, 0775) 200 | except IOError as e: 201 | writeLog("DEBUG: chmod() failed on %s: %s" % (path,e.strerror)) 202 | raise 203 | 204 | return(path) 205 | 206 | def parseOLEDocument(f): 207 | 208 | """Parse an OLE document for VBA macros""" 209 | 210 | if not f or not useOLETools: 211 | return 212 | 213 | writeLog('DEBUG: Analyzing with oletools') 214 | try: 215 | v = VBA_Parser(f) 216 | except: 217 | writeLog("Not a supported file format: %s" % f) 218 | return 219 | writeLog('DEBUG: Detected file type: %s' % v.type) 220 | 221 | # Hack: Search for a .js extension 222 | fname, fextension = os.path.splitext(f) 223 | 224 | if v.detect_vba_macros() or fextension == ".js": 225 | writeLog('DEBUG: VBA Macros/JScript found') 226 | try: 227 | t = open("%s.analysis" % f, 'w') 228 | except IOError as e: 229 | writeLog("Cannot create analysis file %s.analysis: %s" % (f,e.strerror)) 230 | return 231 | for kw_type, keyword, description in v.analyze_macros(): 232 | t.write("%-12s | %-25s | %s\n" % (kw_type, keyword, description)) 233 | t.close() 234 | writeLog("DEBUG: Analysis dumped to %s.analysis" % f) 235 | else: 236 | writeLog('DEBUG: No VBA Macros found') 237 | return 238 | 239 | def processZipFile(filename): 240 | 241 | """Extract files from a ZIP archive and test them against VT""" 242 | 243 | global rcode 244 | 245 | zf = zipfile.ZipFile(filename) 246 | for f in zf.namelist(): 247 | try: 248 | data = zf.read(f) 249 | except KeyError: 250 | writeLog("Cannot extract %s from zip file %s" % (f, filename)) 251 | return 252 | fp = open(os.path.join(generateDumpDirectory(args.directory), f), 'wb') 253 | fp.write(data) 254 | fp.close() 255 | md5 = hashlib.md5(data).hexdigest() 256 | writeLog("Unzipped %s (%s)" % (f, md5)) 257 | 258 | #if dbMD5Exists(md5): 259 | # writeLog("Skipped %s (known MD5)" % f) 260 | # continue 261 | 262 | vt = VirusTotalPublicApi(config['apiKey']) 263 | response = vt.get_file_report(md5) 264 | # writeLog("DEBUG: VT Response received") 265 | 266 | if config['esServer']: 267 | # Save results to Elasticsearch 268 | try: 269 | response['@timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%S+01:00") 270 | res = es.index(index=config['esIndex'], doc_type="VTresult", body=json.dumps(response)) 271 | except: 272 | writeLog("Cannot index to Elasticsearch") 273 | # writeLog("DEBUG: Step1") 274 | 275 | # DEBUG 276 | fp = open('/tmp/vt.debug', 'a') 277 | fp.write(json.dumps(response, sort_keys=False, indent=4)) 278 | fp.close() 279 | # writeLog("DEBUG: Step1: %s" % response['results']['response_code']) 280 | 281 | vtScore = "0/0" 282 | if response['response_code'] == 200: 283 | if response['results']['response_code']: 284 | positives = response['results']['positives'] 285 | total = response['results']['total'] 286 | scan_date = response['results']['scan_date'] 287 | vtScore = str(positives) + "/" + str(total) 288 | if positives > 0: 289 | # File is malicious 290 | rcode = 1 291 | 292 | writeLog('File: %s (%s) Score: %s Scanned: %s (%s)' % 293 | (f, md5, vtScore, scan_date, timeDiff(scan_date))) 294 | else: 295 | # Do not resubmit existing MD5 296 | if !dbMD5Exists(md5): 297 | writeLog('File: %s (%s) not found, submited for scanning' % (f, md5)) 298 | submit2vt(os.path.join(generateDumpDirectory(args.directory), f)) 299 | dbAddMD5(md5, f, vtScore) 300 | else: 301 | writeLog('VT Error: %s' % response['error']) 302 | 303 | # Analyze OLE documents if API is available 304 | parseOLEDocument(os.path.join(generateDumpDirectory(args.directory), f)) 305 | return 306 | 307 | def parseMailheaders(data): 308 | 309 | """Extract useful e-mail headers""" 310 | 311 | if data: 312 | msg=pyzmail.PyzMessage.factory(data) 313 | 314 | mailheaders = { "subject": msg.get_subject(), 315 | "from": msg.get_address('from'), 316 | "to": msg.get_addresses('to'), 317 | "cc": msg.get_addresses('cc'), 318 | "x-mailer": msg.get('x-mailer', ''), 319 | "date": msg.get('date', ''), 320 | "message-id": msg.get('message-id', ''), 321 | "user-agent": msg.get('user-agent',''), 322 | "x-virus-scanned": msg.get('x-virus-scanned',''), 323 | "return-path": msg.get('return-path','') 324 | } 325 | 326 | received = msg.get('received','') 327 | if received: 328 | ip = re.findall( r'[0-9]+(?:\.[0-9]+){3}', received ) 329 | if ip: 330 | mailheaders["ip"] = ip 331 | return mailheaders 332 | else: 333 | return None 334 | 335 | def main(): 336 | global args 337 | global config 338 | global es 339 | global verbose 340 | global rcode 341 | 342 | parser = argparse.ArgumentParser( 343 | description = 'Unpack MIME attachments from a file and check them against virustotal.com') 344 | parser.add_argument('-d', '--directory', 345 | dest = 'directory', 346 | help = 'directory where files will be extracted (default: /tmp) %%d,%%m,%%y can use used for dynamic names', 347 | metavar = 'DIRECTORY') 348 | parser.add_argument('-v', '--verbose', 349 | action = 'store_false', 350 | dest = 'verbose', 351 | help = 'verbose output', 352 | default = False) 353 | parser.add_argument('-c', '--config', 354 | dest = 'config_file', 355 | help = 'configuration file (default: /etc/mime2vt.conf)', 356 | metavar = 'CONFIG') 357 | parser.add_argument('-l', '--log', 358 | dest = 'dump_file', 359 | help = 'mail dump file (default /tmp/message.dump)', 360 | metavar = 'DUMPFILE') 361 | args = parser.parse_args() 362 | 363 | # Default values 364 | if not args.directory: 365 | args.directory = '/tmp' 366 | if not args.config_file: 367 | args.config_file = '/etc/mime2vt.conf' 368 | 369 | #writeLog('DEBUG: config_file = %s' % args.config_file) 370 | 371 | try: 372 | c = ConfigParser.ConfigParser() 373 | c.read(args.config_file) 374 | config['apiKey'] = c.get('virustotal', 'apikey') 375 | excludetypes = c.get('virustotal', 'exclude').split(',') 376 | # Elasticsearch config 377 | config['esServer'] = c.get('elasticsearch', 'server') 378 | config['esIndex'] = c.get('elasticsearch', 'index') 379 | config['dbPath'] = c.get('database', 'dbpath') 380 | except OSError as e: 381 | writeLog('Cannot read config file %s: %s' % (args.config_file, e.errno)) 382 | exit 383 | 384 | if config['esServer']: 385 | logging.basicConfig() 386 | es = Elasticsearch([config['esServer']]) 387 | 388 | # Create the SQLite DB 389 | dbCreate() 390 | 391 | # Read the mail flow from STDIN 392 | data = "" . join(sys.stdin) 393 | msg = email.message_from_string(data) 394 | 395 | if usePyzMail: 396 | mailheaders = parseMailheaders(data) 397 | 398 | if args.dump_file: 399 | try: 400 | fp = open(args.dump_file, 'a') 401 | except OSError as e: 402 | writeLog('Cannot dump message to %s: %s' % (args.dump_file, e.errno)) 403 | fp.write(data) 404 | fp.close() 405 | 406 | # Process MIME parts 407 | for part in msg.walk(): 408 | contenttype = part.get_content_type() 409 | filename = part.get_param('name') 410 | 411 | # Hack: Search for a .js extension 412 | try: 413 | fname, fextension = os.path.splitext(filename) 414 | except: 415 | fextension = "none" 416 | 417 | data = part.get_payload(None, True) 418 | if data: 419 | md5 = hashlib.md5(data).hexdigest() 420 | #if dbMD5Exists(md5): 421 | # writeLog("Skipping existing MD5 %s" % md5) 422 | # continue 423 | 424 | # New: Extract URLS 425 | if contenttype in [ 'text/html', 'text/plain' ]: 426 | urls = [] 427 | # Source: https://gist.github.com/uogbuji/705383 428 | GRUBER_URLINTEXT_PAT = re.compile(ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))') 429 | lines = data.split('\n') 430 | for line in lines: 431 | try: 432 | #urls.append(re.search("(?Phttps?://[^\s]+)", word).group("url")) 433 | for url in GRUBER_URLINTEXT_PAT.findall(line): 434 | if url[0]: 435 | urls.append(url[0]) 436 | except: 437 | pass 438 | fp = open('/var/tmp/urls.log', 'a') 439 | for url in urls: 440 | fp.write("%s\n" % url) 441 | fp.close() 442 | 443 | # Process only interesting files 444 | # if contenttype not in ('text/plain', 'text/html', 'image/jpeg', 'image/gif', 'image/png'): 445 | if contenttype not in excludetypes or fextension == '.js': 446 | if not filename: 447 | filename = md5 448 | mime_ext = mimetypes.guess_extension(contenttype) 449 | if not mime_ext: 450 | # Use a generic bag-of-bits extension 451 | mime_ext = '.bin' 452 | f_name, f_ext = os.path.splitext(filename) 453 | if not f_ext: 454 | filename += mime_ext 455 | 456 | writeLog('Found interesting file: %s (%s)' % (filename, contenttype)) 457 | 458 | fp = open(os.path.join(generateDumpDirectory(args.directory), filename), 'wb') 459 | fp.write(data) 460 | fp.close() 461 | 462 | if contenttype in ['application/zip', 'application/x-zip-compressed']: 463 | # Process ZIP archive 464 | writeLog('Processing zip archive: %s' % filename) 465 | processZipFile(os.path.join(generateDumpDirectory(args.directory), filename)) 466 | else: 467 | # Check VT score 468 | vt = VirusTotalPublicApi(config['apiKey']) 469 | response = vt.get_file_report(md5) 470 | 471 | # Save results to Elasticsearch 472 | if config['esServer']: 473 | try: 474 | response['@timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%S+01:00") 475 | response['filename'] = filename 476 | if usePyzMail: 477 | response['mail'] = mailheaders 478 | res = es.index(index=config['esIndex'], doc_type="VTresult", body=json.dumps(response)) 479 | except: 480 | writeLog("Cannot index to Elasticsearch") 481 | 482 | # DEBUG 483 | fp = open('/tmp/vt.debug', 'a') 484 | fp.write(json.dumps(response, sort_keys=False, indent=4)) 485 | fp.close() 486 | 487 | vtScore = "0/0" 488 | if response['response_code'] == 200: 489 | if response['results']['response_code']: 490 | positives = response['results']['positives'] 491 | total = response['results']['total'] 492 | scan_date = response['results']['scan_date'] 493 | vtScore = str(positives) + "/" + str(total) 494 | if positives > 0: 495 | rcode = 1 496 | 497 | writeLog('File: %s (%s) Score: %s Scanned: %s (%s)' % 498 | (filename, md5, vtScore, scan_date, timeDiff(scan_date))) 499 | else: 500 | # Do not resubmit existing MD5 501 | if !dbMD5Exists(md5): 502 | writeLog('File: %s (%s) not found, submited for scanning' % 503 | (filename, md5)) 504 | submit2vt(os.path.join(generateDumpDirectory(args.directory), filename)) 505 | dbAddMD5(md5, filename, vtScore) 506 | else: 507 | writeLog('VT Error: %s' % response['error']) 508 | 509 | # Analyze OLE documents if API is available 510 | parseOLEDocument(os.path.join(generateDumpDirectory(args.directory), filename)) 511 | 512 | if __name__ == '__main__': 513 | main() 514 | if rcode: 515 | writeLog("Mail contains malicious content!") 516 | sys.exit(rcode) 517 | --------------------------------------------------------------------------------