├── DridexUrlDecoder.py ├── README.md ├── olevba-0.8b.py ├── plugin_base64.py └── plugin_dridex.py /DridexUrlDecoder.py: -------------------------------------------------------------------------------- 1 | # Written by @JamesHabben 2 | # https://github.com/JamesHabben/MalwareStuff 3 | 4 | import sys 5 | 6 | def DridexUrlDecode (inputText) : 7 | work = inputText[4:-4] 8 | strKeyEnc = StripCharsWithZero(work[(len(work) / 2) - 2: (len(work) / 2)]) 9 | strKeySize = StripCharsWithZero(work[(len(work) / 2): (len(work) / 2) + 2]) 10 | nCharSize = strKeySize - strKeyEnc 11 | work = work[:(len(work) / 2) - 2] + work[(len(work) / 2) + 2:] 12 | strKeyEnc2 = StripChars(work[(len(work) / 2) - (nCharSize/2): (len(work) / 2) + (nCharSize/2)]) 13 | work = work[:(len(work) / 2) - (nCharSize/2)] + work[(len(work) / 2) + (nCharSize/2):] 14 | work_split = [work[i:i+nCharSize] for i in range(0, len(work), nCharSize)] 15 | for group in work_split: 16 | sys.stdout.write(chr(StripChars(group)/strKeyEnc2)) 17 | 18 | def StripChars (input) : 19 | result = '' 20 | for c in input : 21 | if c.isdigit() : 22 | result += c 23 | return int(result) 24 | 25 | def StripCharsWithZero (input) : 26 | result = '' 27 | for c in input : 28 | if c.isdigit() : 29 | result += c 30 | else: 31 | result += '0' 32 | return int(result) 33 | 34 | 35 | DridexUrlDecode("C3iY1epSRGe6q8g15xStVesdG717MAlg2H4hmV1vkL6Glnf0cknj") 36 | DridexUrlDecode("HLIY3Nf3z2k8jD37h1n2OM3N712DGQ3c5M841RZ8C5e6P1C50C4ym1oF504WyV182p4mJ16cK9Z61l47h2dU1rVB5V681sFY728i16H3E2Qm1fn47y2cgAo156j8T1s600hukKO1568X1xE4Z7d2q17jvcwgk816Yz32o9Q216Mpr0B01vcwg856a17b9j2zAmWf1536B1t7d92rI1FZ5E36Pu1jl504Z34tm2R43i55Lg2F3eLE3T28lLX1D504348Goe8Gbdp37w443ADy36X0h14g7Wb2G3u584kEG332Ut8ws3wO584pzSTf") 37 | DridexUrlDecode("YNPH1W47E211z3P6142cM4115K2J1696CURf1712N1OCJwc0w6Z16840Z1r600W16Z3273k6SR16Bf161Q92a016Vr16V1pc") 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MalwareStuff 2 | 3 | # Dridex URL Decoder 4 | -------------------------------------------------------------------------------- /olevba-0.8b.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | olevba.py 4 | 5 | olevba is a script to parse OLE and OpenXML files such as MS Office documents 6 | (e.g. Word, Excel), to extract VBA Macro code in clear text. 7 | 8 | Supported formats: 9 | - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) 10 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) 11 | - PowerPoint 2007+ (.pptm, .ppsm) 12 | 13 | Author: Philippe Lagadec - http://www.decalage.info 14 | License: BSD, see source code or documentation 15 | 16 | olevba is part of the python-oletools package: 17 | http://www.decalage.info/python/oletools 18 | 19 | olevba is based on source code from officeparser by John William Davison 20 | https://github.com/unixfreak0037/officeparser 21 | """ 22 | 23 | #=== LICENSE ================================================================== 24 | 25 | # olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info) 26 | # All rights reserved. 27 | # 28 | # Redistribution and use in source and binary forms, with or without modification, 29 | # are permitted provided that the following conditions are met: 30 | # 31 | # * Redistributions of source code must retain the above copyright notice, this 32 | # list of conditions and the following disclaimer. 33 | # * Redistributions in binary form must reproduce the above copyright notice, 34 | # this list of conditions and the following disclaimer in the documentation 35 | # and/or other materials provided with the distribution. 36 | # 37 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 38 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 39 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 40 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 41 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 42 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 43 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 44 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 45 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 | 48 | 49 | # olevba contains modified source code from the officeparser project, published 50 | # under the following MIT License (MIT): 51 | # 52 | # officeparser is copyright (c) 2014 John William Davison 53 | # 54 | # Permission is hereby granted, free of charge, to any person obtaining a copy 55 | # of this software and associated documentation files (the "Software"), to deal 56 | # in the Software without restriction, including without limitation the rights 57 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 58 | # copies of the Software, and to permit persons to whom the Software is 59 | # furnished to do so, subject to the following conditions: 60 | # 61 | # The above copyright notice and this permission notice shall be included in all 62 | # copies or substantial portions of the Software. 63 | # 64 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 65 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 66 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 67 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 68 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 69 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 70 | # SOFTWARE. 71 | 72 | #------------------------------------------------------------------------------ 73 | # CHANGELOG: 74 | # 2014-08-05 v0.01 PL: - first version based on officeparser code 75 | # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser 76 | # 2014-08-15 PL: - fixed incorrect value check in PROJECTHELPFILEPATH Record 77 | # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats 78 | # and to find the VBA project root anywhere in the file 79 | # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL 80 | # 2014-12-05 v0.05 PL: - refactored most functions into a class, new API 81 | # - added detect_vba_macros 82 | # 2014-12-10 v0.06 PL: - hide first lines with VB attributes 83 | # - detect auto-executable macros 84 | # - ignore empty macros 85 | # 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive 86 | # 2014-12-15 v0.08 PL: - improved display for empty macros 87 | # - added pattern extraction 88 | # 2014-12-25 v0.09 PL: - added suspicious keywords detection 89 | # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file 90 | # - uses xglob to scan several files with wildcards 91 | # - option -r to recurse subdirectories 92 | # - option -z to scan files in password-protected zips 93 | # 2015-01-02 v0.11 PL: - improved filter_vba to detect colons 94 | # 2015-01-03 v0.12 PL: - fixed detect_patterns to detect all patterns 95 | # - process_file: improved display, shows container file 96 | # - improved list of executable file extensions 97 | # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display 98 | # 2015-01-08 v0.14 PL: - added hex strings detection and decoding 99 | # - fixed issue #2, decoding VBA stream names using 100 | # specified codepage and unicode stream names 101 | # 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d 102 | # 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text") 103 | # - added several suspicious keywords 104 | # - added option -i to analyze VBA source code directly 105 | # 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions 106 | # - added scan_vba to run all detection algorithms 107 | # - decoded hex strings are now also scanned + reversed 108 | # 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules 109 | # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex 110 | # strings and StrReverse 111 | 112 | ####################### 113 | # Base64 detection and decode added by James Habben 114 | ####################### 115 | 116 | __version__ = '0.19' 117 | 118 | #------------------------------------------------------------------------------ 119 | # TODO: 120 | # + do not use logging, but a provided logger (null logger by default) 121 | # + setup logging (common with other oletools) 122 | 123 | # TODO later: 124 | # + do not show hex strings by default (add option --hex) 125 | # + performance improvement: instead of searching each keyword separately, 126 | # first split vba code into a list of words (per line), then check each 127 | # word against a dict. (or put vba words into a set/dict?) 128 | # + for regex, maybe combine them into a single re with named groups? 129 | # + add Yara support, include sample rules? plugins like balbuzard? 130 | # + add balbuzard support 131 | # + output to file (replace print by file.write, sys.stdout by default) 132 | # + look for VBA in embedded documents (e.g. Excel in Word) 133 | # + support SRP streams (see Lenny's article + links and sample) 134 | # - python 3.x support 135 | # - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic? 136 | # - check VBA macros in Visio, Access, Project, etc 137 | # - extract_macros: convert to a class, split long function into smaller methods 138 | # - extract_macros: read bytes from stream file objects instead of strings 139 | # - extract_macros: use combined struct.unpack instead of many calls 140 | 141 | #------------------------------------------------------------------------------ 142 | # REFERENCES: 143 | # - [MS-OVBA]: Microsoft Office VBA File Format Structure 144 | # http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx 145 | # - officeparser: https://github.com/unixfreak0037/officeparser 146 | 147 | 148 | #--- IMPORTS ------------------------------------------------------------------ 149 | 150 | import sys, logging 151 | import struct 152 | import cStringIO 153 | import math 154 | import zipfile 155 | import re 156 | import optparse 157 | import os.path 158 | import binascii 159 | import base64 160 | 161 | import thirdparty.olefile as olefile 162 | from thirdparty.prettytable import prettytable 163 | from thirdparty.xglob import xglob 164 | 165 | #--- CONSTANTS ---------------------------------------------------------------- 166 | 167 | TYPE_OLE = 'OLE' 168 | TYPE_OpenXML = 'OpenXML' 169 | 170 | MODULE_EXTENSION = "bas" 171 | CLASS_EXTENSION = "cls" 172 | FORM_EXTENSION = "frm" 173 | 174 | # Keywords to detect auto-executable macros 175 | AUTOEXEC_KEYWORDS = { 176 | # MS Word: 177 | 'Runs when the Word document is opened': 178 | ('AutoExec', 'AutoOpen', 'Document_Open', 'DocumentOpen'), 179 | 'Runs when the Word document is closed': 180 | ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'), 181 | 'Runs when the Word document is modified': 182 | ('DocumentChange',), 183 | 'Runs when a new Word document is created': 184 | ('AutoNew', 'Document_New', 'NewDocument'), 185 | 186 | # MS Excel: 187 | 'Runs when the Excel Workbook is opened': 188 | ('Auto_Open', 'Workbook_Open'), 189 | 'Runs when the Excel Workbook is closed': 190 | ('Auto_Close', 'Workbook_Close'), 191 | 192 | #TODO: full list in MS specs?? 193 | } 194 | 195 | # Suspicious Keywords that may be used by malware 196 | # See VBA language reference: http://msdn.microsoft.com/en-us/library/office/jj692818%28v=office.15%29.aspx 197 | SUSPICIOUS_KEYWORDS = { 198 | #TODO: use regex to support variable whitespaces 199 | 'May read system environment variables': 200 | ('Environ',), 201 | 'May open a file': 202 | ('Open',), 203 | 'May write to a file (if combined with Open)': 204 | #TODO: regex to find Open+Write on same line 205 | ('Write', 'Put', 'Output', 'Print #'), 206 | 'May read or write a binary file (if combined with Open)': 207 | #TODO: regex to find Open+Binary on same line 208 | ('Binary',), 209 | 'May copy a file': 210 | ('FileCopy', 'CopyFile'), 211 | #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx 212 | #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx 213 | 'May create a text file': 214 | ('CreateTextFile','ADODB.Stream', 'WriteText', 'SaveToFile'), 215 | #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx 216 | #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6 217 | 'May run an executable file or a system command': 218 | ('Shell', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus', 219 | 'vbMinimizedNoFocus', 'WScript.Shell', 'Run'), 220 | #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx 221 | #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6 222 | 'May hide the application': 223 | ('Application.Visible', 'ShowWindow', 'SW_HIDE'), 224 | 'May create a directory': 225 | ('MkDir',), 226 | 'May save the current workbook': 227 | ('ActiveWorkbook.SaveAs',), 228 | 'May change which directory contains files to open at startup': 229 | #TODO: confirm the actual effect 230 | ('Application.AltStartupPath',), 231 | 'May create an OLE object': 232 | ('CreateObject',), 233 | 'May run an application (if combined with CreateObject)': 234 | ('Shell.Application',), 235 | 'May enumerate application windows (if combined with Shell.Application object)': 236 | ('Windows', 'FindWindow'), 237 | 'May run code from a DLL': 238 | #TODO: regex to find declare+lib on same line 239 | ('Lib',), 240 | 'May download files from the Internet': 241 | #TODO: regex to find urlmon+URLDownloadToFileA on same line 242 | ('URLDownloadToFileA',), 243 | 'May control another application by simulating user keystrokes': 244 | ('SendKeys', 'AppActivate'), 245 | #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx 246 | 'May attempt to obfuscate malicious function calls': 247 | ('CallByName',), 248 | #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx 249 | 'May attempt to obfuscate specific strings': 250 | ('Chr', 'ChrB', 'ChrW', 'StrReverse'), 251 | #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx 252 | } 253 | 254 | # Patterns to be extracted (IP addresses, URLs, etc) 255 | # From patterns.py in balbuzard 256 | RE_PATTERNS = ( 257 | #TODO: check if this regex matches URLs with an IP address (various forms) 258 | ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&%\$#\=~])*[^\.\,\)\(\s]')), 259 | ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")), 260 | ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')), 261 | # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?> bit_count 290 | offset_mask = ~length_mask 291 | maximum_length = (0xFFFF >> bit_count) + 3 292 | return length_mask, offset_mask, bit_count, maximum_length 293 | 294 | 295 | def decompress_stream (compressed_container): 296 | """ 297 | Decompress a stream according to MS-OVBA section 2.4.1 298 | 299 | compressed_container: string compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm 300 | return the decompressed container as a string (bytes) 301 | """ 302 | # 2.4.1.2 State Variables 303 | 304 | # The following state is maintained for the CompressedContainer (section 2.4.1.1.1): 305 | # CompressedRecordEnd: The location of the byte after the last byte in the CompressedContainer (section 2.4.1.1.1). 306 | # CompressedCurrent: The location of the next byte in the CompressedContainer (section 2.4.1.1.1) to be read by 307 | # decompression or to be written by compression. 308 | 309 | # The following state is maintained for the current CompressedChunk (section 2.4.1.1.4): 310 | # CompressedChunkStart: The location of the first byte of the CompressedChunk (section 2.4.1.1.4) within the 311 | # CompressedContainer (section 2.4.1.1.1). 312 | 313 | # The following state is maintained for a DecompressedBuffer (section 2.4.1.1.2): 314 | # DecompressedCurrent: The location of the next byte in the DecompressedBuffer (section 2.4.1.1.2) to be written by 315 | # decompression or to be read by compression. 316 | # DecompressedBufferEnd: The location of the byte after the last byte in the DecompressedBuffer (section 2.4.1.1.2). 317 | 318 | # The following state is maintained for the current DecompressedChunk (section 2.4.1.1.3): 319 | # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the 320 | # DecompressedBuffer (section 2.4.1.1.2). 321 | 322 | decompressed_container = '' # result 323 | compressed_current = 0 324 | 325 | sig_byte = ord(compressed_container[compressed_current]) 326 | if sig_byte != 0x01: 327 | raise ValueError('invalid signature byte {0:02X}'.format(sig_byte)) 328 | 329 | compressed_current += 1 330 | 331 | #NOTE: the definition of CompressedRecordEnd is ambiguous. Here we assume that 332 | # CompressedRecordEnd = len(compressed_container) 333 | while compressed_current < len(compressed_container): 334 | # 2.4.1.1.5 335 | compressed_chunk_start = compressed_current 336 | # chunk header = first 16 bits 337 | compressed_chunk_header = struct.unpack("> 12) & 0x07 342 | if chunk_signature != 0b011: 343 | raise ValueError('Invalid CompressedChunkSignature in VBA compressed stream') 344 | # chunk flag = next bit - 1 == compressed, 0 == uncompressed 345 | chunk_flag = (compressed_chunk_header >> 15) & 0x01 346 | logging.debug("chunk size = {0}, compressed flag = {1}".format(chunk_size, chunk_flag)) 347 | 348 | #MS-OVBA 2.4.1.3.12: the maximum size of a chunk including its header is 4098 bytes (header 2 + data 4096) 349 | # The minimum size is 3 bytes 350 | # NOTE: there seems to be a typo in MS-OVBA, the check should be with 4098, not 4095 (which is the max value 351 | # in chunk header before adding 3. 352 | # Also the first test is not useful since a 12 bits value cannot be larger than 4095. 353 | if chunk_flag == 1 and chunk_size > 4098: 354 | raise ValueError('CompressedChunkSize > 4098 but CompressedChunkFlag == 1') 355 | if chunk_flag == 0 and chunk_size != 4098: 356 | raise ValueError('CompressedChunkSize != 4098 but CompressedChunkFlag == 0') 357 | 358 | # check if chunk_size goes beyond the compressed data, instead of silently cutting it: 359 | #TODO: raise an exception? 360 | if compressed_chunk_start + chunk_size > len(compressed_container): 361 | logging.warning('Chunk size is larger than remaining compressed data') 362 | compressed_end = min([len(compressed_container), compressed_chunk_start + chunk_size]) 363 | # read after chunk header: 364 | compressed_current = compressed_chunk_start + 2 365 | 366 | if chunk_flag == 0: 367 | # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk 368 | # uncompressed chunk: read the next 4096 bytes as-is 369 | #TODO: check if there are at least 4096 bytes left 370 | decompressed_container += compressed_container[compressed_current:compressed_current + 4096] 371 | compressed_current += 4096 372 | else: 373 | # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk 374 | # compressed chunk 375 | decompressed_chunk_start = len(decompressed_container) 376 | while compressed_current < compressed_end: 377 | # MS-OVBA 2.4.1.3.4 Decompressing a TokenSequence 378 | # logging.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end)) 379 | # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or 380 | # copy tokens (reference to a previous literal token) 381 | flag_byte = ord(compressed_container[compressed_current]) 382 | compressed_current += 1 383 | for bit_index in xrange(0, 8): 384 | # logging.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end)) 385 | if compressed_current >= compressed_end: 386 | break 387 | # MS-OVBA 2.4.1.3.5 Decompressing a Token 388 | # MS-OVBA 2.4.1.3.17 Extract FlagBit 389 | flag_bit = (flag_byte >> bit_index) & 1 390 | #logging.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit)) 391 | if flag_bit == 0: # LiteralToken 392 | # copy one byte directly to output 393 | decompressed_container += compressed_container[compressed_current] 394 | compressed_current += 1 395 | else: # CopyToken 396 | # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken 397 | copy_token = struct.unpack("> temp2) + 1 405 | #logging.debug('offset=%d length=%d' % (offset, length)) 406 | copy_source = len(decompressed_container) - offset 407 | for index in xrange(copy_source, copy_source + length): 408 | decompressed_container += decompressed_container[index] 409 | compressed_current += 2 410 | return decompressed_container 411 | 412 | 413 | def _extract_vba (ole, vba_root, project_path, dir_path): 414 | """ 415 | Extract VBA macros from an OleFileIO object. 416 | Internal function, do not call directly. 417 | 418 | vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream 419 | vba_project: path to the PROJECT stream 420 | This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream 421 | """ 422 | # Open the PROJECT stream: 423 | project = ole.openstream(project_path) 424 | 425 | # sample content of the PROJECT stream: 426 | 427 | ## ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}" 428 | ## Document=ThisDocument/&H00000000 429 | ## Module=NewMacros 430 | ## Name="Project" 431 | ## HelpContextID="0" 432 | ## VersionCompatible32="393222000" 433 | ## CMG="F1F301E705E705E705E705" 434 | ## DPB="8F8D7FE3831F2020202020" 435 | ## GC="2D2FDD81E51EE61EE6E1" 436 | ## 437 | ## [Host Extender Info] 438 | ## &H00000001={3832D640-CF90-11CF-8E43-00A0C911005A};VBE;&H00000000 439 | ## &H00000002={000209F2-0000-0000-C000-000000000046};Word8.0;&H00000000 440 | ## 441 | ## [Workspace] 442 | ## ThisDocument=22, 29, 339, 477, Z 443 | ## NewMacros=-4, 42, 832, 510, C 444 | 445 | code_modules = {} 446 | 447 | for line in project: 448 | line = line.strip() 449 | if '=' in line: 450 | # split line at the 1st equal sign: 451 | name, value = line.split('=', 1) 452 | # looking for code modules 453 | # add the code module as a key in the dictionary 454 | # the value will be the extension needed later 455 | # The value is converted to lowercase, to allow case-insensitive matching (issue #3) 456 | value = value.lower() 457 | if name == 'Document': 458 | # split value at the 1st slash, keep 1st part: 459 | value = value.split('/', 1)[0] 460 | code_modules[value] = CLASS_EXTENSION 461 | elif name == 'Module': 462 | code_modules[value] = MODULE_EXTENSION 463 | elif name == 'Class': 464 | code_modules[value] = CLASS_EXTENSION 465 | elif name == 'BaseClass': 466 | code_modules[value] = FORM_EXTENSION 467 | 468 | # read data from dir stream (compressed) 469 | dir_compressed = ole.openstream(dir_path).read() 470 | 471 | def check_value(name, expected, value): 472 | if expected != value: 473 | logging.error("invalid value for {0} expected {1:04X} got {2:04X}".format(name, expected, value)) 474 | 475 | dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed)) 476 | 477 | # PROJECTSYSKIND Record 478 | PROJECTSYSKIND_Id = struct.unpack(" 128: 522 | logging.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName)) 523 | PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName) 524 | 525 | # PROJECTDOCSTRING Record 526 | PROJECTDOCSTRING_Id = struct.unpack(" 2000: 530 | logging.error("PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString)) 531 | PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString) 532 | PROJECTDOCSTRING_Reserved = struct.unpack(" 260: 544 | logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1)) 545 | PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1) 546 | PROJECTHELPFILEPATH_Reserved = struct.unpack(" 1015: 583 | logging.error("PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants)) 584 | PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants) 585 | PROJECTCONSTANTS_Reserved = struct.unpack(" 0: 783 | code_data = decompress_stream(code_data) 784 | # case-insensitive search in the code_modules dict to find the file extension: 785 | filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin') 786 | filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext) 787 | #TODO: also yield the codepage so that callers can decode it properly 788 | yield (code_path, filename, code_data) 789 | # print '-'*79 790 | # print filename 791 | # print '' 792 | # print code_data 793 | # print '' 794 | logging.debug('extracted file {0}'.format(filename)) 795 | else: 796 | logging.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName)) 797 | return 798 | 799 | 800 | def filter_vba(vba_code): 801 | """ 802 | Filter VBA source code to remove the first lines starting with "Attribute VB_", 803 | which are automatically added by MS Office and not displayed in the VBA Editor. 804 | This should only be used when displaying source code for human analysis. 805 | 806 | Note: lines are not filtered if they contain a colon, because it could be 807 | used to hide malicious instructions. 808 | 809 | :param vba_code: str, VBA source code 810 | :return: str, filtered VBA source code 811 | """ 812 | vba_lines = vba_code.splitlines() 813 | start = 0 814 | for line in vba_lines: 815 | if line.startswith("Attribute VB_") and not ':' in line: 816 | start += 1 817 | else: 818 | break 819 | #TODO: also remove empty lines? 820 | vba = '\n'.join(vba_lines[start:]) 821 | return vba 822 | 823 | 824 | def detect_autoexec(vba_code): 825 | """ 826 | Detect if the VBA code contains keywords corresponding to macros running 827 | automatically when triggered by specific actions (e.g. when a document is 828 | opened or closed). 829 | 830 | :param vba_code: str, VBA source code 831 | :return: list of str tuples (keyword, description) 832 | """ 833 | #TODO: merge code with detect_suspicious 834 | # case-insensitive search 835 | #vba_code = vba_code.lower() 836 | results = [] 837 | for description, keywords in AUTOEXEC_KEYWORDS.items(): 838 | for keyword in keywords: 839 | #TODO: if keyword is already a compiled regex, use it as-is 840 | # search using regex to detect word boundaries: 841 | if re.search(r'(?i)\b'+keyword+r'\b', vba_code): 842 | #if keyword.lower() in vba_code: 843 | results.append((keyword, description)) 844 | return results 845 | 846 | 847 | def detect_suspicious(vba_code): 848 | """ 849 | Detect if the VBA code contains suspicious keywords corresponding to 850 | potential malware behaviour. 851 | 852 | :param vba_code: str, VBA source code 853 | :return: list of str tuples (keyword, description) 854 | """ 855 | # case-insensitive search 856 | #vba_code = vba_code.lower() 857 | results = [] 858 | for description, keywords in SUSPICIOUS_KEYWORDS.items(): 859 | for keyword in keywords: 860 | # search using regex to detect word boundaries: 861 | if re.search(r'(?i)\b'+keyword+r'\b', vba_code): 862 | #if keyword.lower() in vba_code: 863 | results.append((keyword, description)) 864 | return results 865 | 866 | 867 | def detect_patterns(vba_code): 868 | """ 869 | Detect if the VBA code contains specific patterns such as IP addresses, 870 | URLs, e-mail addresses, executable file names, etc. 871 | 872 | :param vba_code: str, VBA source code 873 | :return: list of str tuples (pattern type, value) 874 | """ 875 | results = [] 876 | found = set() 877 | for pattern_type, pattern_re in RE_PATTERNS: 878 | for match in pattern_re.finditer(vba_code): 879 | value = match.group() 880 | if value not in found: 881 | results.append((pattern_type, value)) 882 | found.add(value) 883 | return results 884 | 885 | 886 | def detect_hex_strings(vba_code): 887 | """ 888 | Detect if the VBA code contains strings encoded in hexadecimal. 889 | 890 | :param vba_code: str, VBA source code 891 | :return: list of str tuples (encoded string, decoded string) 892 | """ 893 | results = [] 894 | found = set() 895 | for match in re_hex_string.finditer(vba_code): 896 | value = match.group() 897 | if value not in found: 898 | decoded = binascii.unhexlify(value) 899 | results.append((value, decoded)) 900 | found.add(value) 901 | return results 902 | 903 | def detect_base64_strings(vba_code): 904 | """ 905 | Detect if the VBA code contains strings encoded in base64. 906 | 907 | :param vba_code: str, VBA source code 908 | :return: list of str tuples (encoded string, decoded string) 909 | """ 910 | results = [] 911 | found = set() 912 | for match in re_base64_string.finditer(vba_code): 913 | value = match.group() 914 | if value not in found: 915 | decoded = base64.b64decode(value) 916 | results.append((value, decoded)) 917 | found.add(value) 918 | return results 919 | 920 | def scan_vba(vba_code): 921 | """ 922 | Analyze the provided VBA code to detect suspicious keywords, 923 | auto-executable macros, IOC patterns, obfuscation patterns 924 | such as hex-encoded strings. 925 | 926 | :param vba_code: str, VBA source code to be analyzed 927 | :return: list of tuples (type, keyword, description) 928 | (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String') 929 | """ 930 | # First, detect and extract hex-encoded strings: 931 | hex_strings = detect_hex_strings(vba_code) 932 | base64_strings = detect_base64_strings(vba_code) 933 | # detect if the code contains StrReverse: 934 | if 'strreverse' in vba_code.lower(): strreverse = True 935 | else: strreverse = False 936 | # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords: 937 | for encoded, decoded in hex_strings: 938 | vba_code += '\n'+decoded 939 | # if the code contains "StrReverse", also append the hex strings in reverse order: 940 | if strreverse: 941 | # StrReverse after hex decoding: 942 | vba_code += '\n'+decoded[::-1] 943 | # StrReverse before hex decoding: 944 | vba_code += '\n'+binascii.unhexlify(encoded[::-1]) 945 | #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/ 946 | #TODO: also append the full code reversed if StrReverse? (risk of false positives?) 947 | autoexec_keywords = detect_autoexec(vba_code) 948 | suspicious_keywords = detect_suspicious(vba_code) 949 | # If hex-encoded strings were discovered, add an item to suspicious keywords: 950 | if hex_strings: 951 | suspicious_keywords.append(('Hex Strings', 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)')) 952 | if base64_strings: 953 | suspicious_keywords.append(('Base64 Strings', 'Base64-encoded strings were detected, may be used to obfuscate strings')) 954 | patterns = detect_patterns(vba_code) 955 | results = [] 956 | for keyword, description in autoexec_keywords: 957 | results.append(('AutoExec', keyword, description)) 958 | for keyword, description in suspicious_keywords: 959 | results.append(('Suspicious', keyword, description)) 960 | for pattern_type, value in patterns: 961 | results.append(('IOC', value, pattern_type)) 962 | # Only if option --hex: 963 | # for encoded, decoded in hex_strings: 964 | # results.append(('Hex String', repr(decoded), encoded)) 965 | for encoded, decoded in base64_strings: 966 | results.append(('Base64 String', repr(decoded), encoded)) 967 | return results 968 | 969 | 970 | #=== CLASSES ================================================================= 971 | 972 | class VBA_Parser(object): 973 | """ 974 | Class to parse MS Office files, to detect VBA macros and extract VBA source code 975 | Supported file formats: 976 | - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) 977 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) 978 | - PowerPoint 2007+ (.pptm, .ppsm) 979 | """ 980 | 981 | def __init__(self, filename, data=None): 982 | """ 983 | Constructor for VBA_Parser 984 | 985 | :param _file: path of file to parse, file-like object or file content 986 | :param filename: actual filename if _file is a file-like object or file content 987 | in a bytes string 988 | """ 989 | #TODO: filename should be mandatory, optional data is a string or file-like object 990 | #TODO: also support olefile and zipfile as input 991 | if data is None: 992 | # open file from disk: 993 | _file = filename 994 | else: 995 | # file already read in memory, make it a file-like object for zipfile: 996 | _file = cStringIO.StringIO(data) 997 | #self.file = _file 998 | self.ole_file = None 999 | self.ole_subfiles = [] 1000 | self.filename = filename 1001 | self.type = None 1002 | self.vba_projects = None 1003 | # if filename is None: 1004 | # if isinstance(_file, basestring): 1005 | # if len(_file) < olefile.MINIMAL_OLEFILE_SIZE: 1006 | # self.filename = _file 1007 | # else: 1008 | # self.filename = '' 1009 | # else: 1010 | # self.filename = '' 1011 | if olefile.isOleFile(_file): 1012 | # This looks like an OLE file 1013 | logging.info('Parsing OLE file %s' % self.filename) 1014 | # Open and parse the OLE file, using unicode for path names: 1015 | self.ole_file = olefile.OleFileIO(_file, path_encoding=None) 1016 | self.type = TYPE_OLE 1017 | #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet 1018 | elif zipfile.is_zipfile(_file): 1019 | # This looks like a zip file, need to look for vbaProject.bin inside 1020 | # It can be any OLE file inside the archive 1021 | #...because vbaProject.bin can be renamed: 1022 | # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18 1023 | logging.info('Opening ZIP/OpenXML file %s' % self.filename) 1024 | self.type = TYPE_OpenXML 1025 | z = zipfile.ZipFile(_file) 1026 | #TODO: check if this is actually an OpenXML file 1027 | # check each file within the zip if it is an OLE file, by reading its magic: 1028 | for subfile in z.namelist(): 1029 | magic = z.open(subfile).read(len(olefile.MAGIC)) 1030 | if magic == olefile.MAGIC: 1031 | logging.debug('Opening OLE file %s within zip' % subfile) 1032 | ole_data = z.open(subfile).read() 1033 | try: 1034 | self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data)) 1035 | except: 1036 | logging.debug('%s is not a valid OLE file' % subfile) 1037 | continue 1038 | z.close() 1039 | else: 1040 | msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename 1041 | logging.error(msg) 1042 | raise TypeError(msg) 1043 | 1044 | def find_vba_projects (self): 1045 | """ 1046 | Finds all the VBA projects stored in an OLE file. 1047 | 1048 | Return None if the file is not OLE but OpenXML. 1049 | Return a list of tuples (vba_root, project_path, dir_path) for each VBA project. 1050 | vba_root is the path of the root OLE storage containing the VBA project, 1051 | including a trailing slash unless it is the root of the OLE file. 1052 | project_path is the path of the OLE stream named "PROJECT" within the VBA project. 1053 | dir_path is the path of the OLE stream named "VBA/dir" within the VBA project. 1054 | 1055 | If this function returns an empty list for one of the supported formats 1056 | (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the 1057 | file does not contain VBA macros. 1058 | 1059 | :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path) 1060 | for each VBA project found if OLE file 1061 | """ 1062 | # if the file is not OLE but OpenXML, return None: 1063 | if self.ole_file is None: 1064 | return None 1065 | 1066 | # if this method has already been called, return previous result: 1067 | if self.vba_projects is not None: 1068 | return self.vba_projects 1069 | 1070 | # Find the VBA project root (different in MS Word, Excel, etc): 1071 | # - Word 97-2003: Macros 1072 | # - Excel 97-2003: _VBA_PROJECT_CUR 1073 | # - PowerPoint 97-2003: not supported yet (different file structure) 1074 | # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin. 1075 | # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word 1076 | # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word 1077 | # - Visio 2007: not supported yet (different file structure) 1078 | 1079 | # According to MS-OVBA section 2.2.1: 1080 | # - the VBA project root storage MUST contain a VBA storage and a PROJECT stream 1081 | # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream 1082 | # - all names are case-insensitive 1083 | 1084 | # start with an empty list: 1085 | self.vba_projects = [] 1086 | # Look for any storage containing those storage/streams: 1087 | ole = self.ole_file 1088 | for storage in ole.listdir(streams=False, storages=True): 1089 | # Look for a storage ending with "VBA": 1090 | if storage[-1].upper() == 'VBA': 1091 | logging.debug('Found VBA storage: %s' % ('/'.join(storage))) 1092 | vba_root = '/'.join(storage[:-1]) 1093 | # Add a trailing slash to vba_root, unless it is the root of the OLE file: 1094 | # (used later to append all the child streams/storages) 1095 | if vba_root != '': 1096 | vba_root += '/' 1097 | logging.debug('Checking vba_root="%s"' % vba_root) 1098 | 1099 | def check_vba_stream(ole, vba_root, stream_path): 1100 | full_path = vba_root + stream_path 1101 | if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM: 1102 | logging.debug('Found %s stream: %s' % (stream_path, full_path)) 1103 | return full_path 1104 | else: 1105 | logging.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path) 1106 | return False 1107 | 1108 | # Check if the VBA root storage also contains a PROJECT stream: 1109 | project_path = check_vba_stream(ole, vba_root, 'PROJECT') 1110 | if not project_path: continue 1111 | # Check if the VBA root storage also contains a VBA/_VBA_PROJECT stream: 1112 | vba_project_path = check_vba_stream(ole, vba_root, 'VBA/_VBA_PROJECT') 1113 | if not vba_project_path: continue 1114 | # Check if the VBA root storage also contains a VBA/dir stream: 1115 | dir_path = check_vba_stream(ole, vba_root, 'VBA/dir') 1116 | if not dir_path: continue 1117 | # Now we are pretty sure it is a VBA project structure 1118 | logging.debug('VBA root storage: "%s"' % vba_root) 1119 | # append the results to the list as a tuple for later use: 1120 | self.vba_projects.append((vba_root, project_path, dir_path)) 1121 | return self.vba_projects 1122 | 1123 | def detect_vba_macros(self): 1124 | """ 1125 | Detect the potential presence of VBA macros in the file, by checking 1126 | if it contains VBA projects. Both OLE and OpenXML files are supported. 1127 | 1128 | Important: for now, results are accurate only for Word, Excel and PowerPoint 1129 | EXCEPT Powerpoint 97-2003, which has a different structure for VBA. 1130 | 1131 | Note: this method does NOT attempt to check the actual presence or validity 1132 | of VBA macro source code, so there might be false positives. 1133 | It may also detect VBA macros in files embedded within the main file, 1134 | for example an Excel workbook with macros embedded into a Word 1135 | document without macros may be detected, without distinction. 1136 | 1137 | :return: bool, True if at least one VBA project has been found, False otherwise 1138 | """ 1139 | #TODO: return None or raise exception if format not supported like PPT 97-2003 1140 | #TODO: return the number of VBA projects found instead of True/False? 1141 | # if OpenXML, check all the OLE subfiles: 1142 | if self.ole_file is None: 1143 | for ole_subfile in self.ole_subfiles: 1144 | if ole_subfile.detect_vba_macros(): 1145 | return True 1146 | return False 1147 | # otherwise it's an OLE file, find VBA projects: 1148 | vba_projects = self.find_vba_projects() 1149 | if len(vba_projects) == 0: 1150 | return False 1151 | else: 1152 | return True 1153 | 1154 | 1155 | def extract_macros (self): 1156 | """ 1157 | Extract and decompress source code for each VBA macro found in the file 1158 | 1159 | Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found 1160 | If the file is OLE, filename is the path of the file. 1161 | If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros 1162 | within the zip archive, e.g. word/vbaProject.bin. 1163 | """ 1164 | if self.ole_file is None: 1165 | for ole_subfile in self.ole_subfiles: 1166 | for results in ole_subfile.extract_macros(): 1167 | yield results 1168 | else: 1169 | self.find_vba_projects() 1170 | for vba_root, project_path, dir_path in self.vba_projects: 1171 | # extract all VBA macros from that VBA root storage: 1172 | for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, dir_path): 1173 | yield (self.filename, stream_path, vba_filename, vba_code) 1174 | 1175 | 1176 | def close(self): 1177 | """ 1178 | Close all the open files. This method must be called after usage, if 1179 | the application is opening many files. 1180 | """ 1181 | if self.ole_file is None: 1182 | for ole_subfile in self.ole_subfiles: 1183 | ole_subfile.close() 1184 | else: 1185 | self.ole_file.close() 1186 | 1187 | 1188 | def print_analysis(vba_code): 1189 | """ 1190 | Analyze the provided VBA code, and print the results in a table 1191 | 1192 | :param vba_code: str, VBA source code to be analyzed 1193 | :return: None 1194 | """ 1195 | results = scan_vba(vba_code) 1196 | if results: 1197 | t = prettytable.PrettyTable(('Type', 'Keyword', 'Description')) 1198 | t.align = 'l' 1199 | t.max_width['Type'] = 10 1200 | t.max_width['Keyword'] = 20 1201 | t.max_width['Description'] = 39 1202 | for kw_type, keyword, description in results: 1203 | t.add_row((kw_type, keyword, description)) 1204 | print t 1205 | else: 1206 | print 'No suspicious keyword or IOC found.' 1207 | 1208 | 1209 | 1210 | def process_file (container, filename, data): 1211 | """ 1212 | Process a single file 1213 | 1214 | :param container: str, path and filename of container if the file is within 1215 | a zip archive, None otherwise. 1216 | :param filename: str, path and filename of file on disk, or within the container. 1217 | :param data: bytes, content of the file if it is in a container, None if it is a file on disk. 1218 | """ 1219 | #TODO: replace print by writing to a provided output file (sys.stdout by default) 1220 | if container: 1221 | display_filename = '%s in %s' % (filename, container) 1222 | else: 1223 | display_filename = filename 1224 | print '='*79 1225 | print 'FILE:', display_filename 1226 | try: 1227 | #TODO: handle olefile errors, when an OLE file is malformed 1228 | vba = VBA_Parser(filename, data) 1229 | print 'Type:', vba.type 1230 | if vba.detect_vba_macros(): 1231 | #print 'Contains VBA Macros:' 1232 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): 1233 | # hide attribute lines: 1234 | #TODO: option to disable attribute filtering 1235 | vba_code = filter_vba(vba_code) 1236 | print '-'*79 1237 | print 'VBA MACRO %s ' % vba_filename 1238 | print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path)) 1239 | print '- '*39 1240 | # detect empty macros: 1241 | if vba_code.strip() == '': 1242 | print '(empty macro)' 1243 | else: 1244 | print vba_code 1245 | print '- '*39 1246 | print 'ANALYSIS:' 1247 | print_analysis(vba_code) 1248 | else: 1249 | print 'No VBA macros found.' 1250 | except: #TypeError: 1251 | #raise 1252 | #TODO: print more info if debug mode 1253 | print sys.exc_value 1254 | print '' 1255 | 1256 | 1257 | def process_file_triage (container, filename, data): 1258 | """ 1259 | Process a single file 1260 | 1261 | :param container: str, path and filename of container if the file is within 1262 | a zip archive, None otherwise. 1263 | :param filename: str, path and filename of file on disk, or within the container. 1264 | :param data: bytes, content of the file if it is in a container, None if it is a file on disk. 1265 | """ 1266 | #TODO: replace print by writing to a provided output file (sys.stdout by default) 1267 | nb_macros = 0 1268 | nb_autoexec = 0 1269 | nb_suspicious = 0 1270 | nb_iocs = 0 1271 | nb_hexstrings = 0 1272 | # ftype = 'Other' 1273 | message = '' 1274 | try: 1275 | #TODO: handle olefile errors, when an OLE file is malformed 1276 | vba = VBA_Parser(filename, data) 1277 | if vba.detect_vba_macros(): 1278 | for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros(): 1279 | nb_macros += 1 1280 | if vba_code.strip() != '': 1281 | #TODO: same changes as scan_vba, or modify scan_vba to return these counts 1282 | nb_autoexec += len(detect_autoexec(vba_code)) 1283 | nb_suspicious += len(detect_suspicious(vba_code)) 1284 | nb_iocs += len(detect_patterns(vba_code)) 1285 | nb_hexstrings += len(detect_hex_strings(vba_code)) 1286 | if vba.type == TYPE_OLE: 1287 | flags = 'O' 1288 | else: 1289 | flags = 'X' 1290 | macros = autoexec = suspicious = iocs = hexstrings = '-' 1291 | if nb_macros: macros = 'M' 1292 | if nb_autoexec: autoexec = 'A' 1293 | if nb_suspicious: suspicious = 'S' 1294 | if nb_iocs: iocs = 'I' 1295 | if nb_hexstrings: hexstrings = 'H' 1296 | flags += '%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings) 1297 | 1298 | # macros = autoexec = suspicious = iocs = hexstrings = 'no' 1299 | # if nb_macros: macros = 'YES:%d' % nb_macros 1300 | # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec 1301 | # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious 1302 | # if nb_iocs: iocs = 'YES:%d' % nb_iocs 1303 | # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings 1304 | # # 2nd line = info 1305 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (vba.type, macros, autoexec, suspicious, iocs, hexstrings) 1306 | except TypeError: 1307 | # file type not OLE nor OpenXML 1308 | flags = '?' 1309 | message = 'File format not supported' 1310 | except: 1311 | # another error occurred 1312 | #raise 1313 | #TODO: print more info if debug mode 1314 | #TODO: distinguish real errors from incorrect file types 1315 | flags = '!ERROR' 1316 | message = sys.exc_value 1317 | line = '%-6s %s' % (flags, filename) 1318 | if message: 1319 | line += ' - %s' % message 1320 | print line 1321 | 1322 | # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'), 1323 | # header=False, border=False) 1324 | # t.align = 'l' 1325 | # t.max_width['filename'] = 30 1326 | # t.max_width['type'] = 10 1327 | # t.max_width['macros'] = 6 1328 | # t.max_width['autoexec'] = 6 1329 | # t.max_width['suspicious'] = 6 1330 | # t.max_width['ioc'] = 6 1331 | # t.max_width['hexstrings'] = 6 1332 | # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings)) 1333 | # print t 1334 | 1335 | def main_triage_quick(): 1336 | pass 1337 | 1338 | #=== MAIN ===================================================================== 1339 | 1340 | def main(): 1341 | """ 1342 | Main function, called when olevba is run from the command line 1343 | """ 1344 | usage = 'usage: %prog [options] [filename2 ...]' 1345 | parser = optparse.OptionParser(usage=usage) 1346 | # parser.add_option('-o', '--outfile', dest='outfile', 1347 | # help='output file') 1348 | # parser.add_option('-c', '--csv', dest='csv', 1349 | # help='export results to a CSV file') 1350 | parser.add_option("-r", action="store_true", dest="recursive", 1351 | help='find files recursively in subdirectories.') 1352 | parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None, 1353 | help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)') 1354 | parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*', 1355 | help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)') 1356 | parser.add_option("-t", action="store_true", dest="triage_mode", 1357 | help='triage mode, display results as a summary table (default for multiple files)') 1358 | parser.add_option("-d", action="store_true", dest="detailed_mode", 1359 | help='detailed mode, display full results (default for single file)') 1360 | parser.add_option("-i", "--input", dest='input', type='str', default=None, 1361 | help='input file containing VBA source code to be analyzed (no parsing)') 1362 | 1363 | (options, args) = parser.parse_args() 1364 | 1365 | # Print help if no arguments are passed 1366 | if len(args) == 0 and not options.input: 1367 | print __doc__ 1368 | parser.print_help() 1369 | sys.exit() 1370 | 1371 | logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO) 1372 | # For now, all logging is disabled: 1373 | logging.disable(logging.CRITICAL) 1374 | 1375 | if options.input: 1376 | # input file provided with VBA source code to be analyzed directly: 1377 | print 'Analysis of VBA source code from %s:' % options.input 1378 | vba_code = open(options.input).read() 1379 | print_analysis(vba_code) 1380 | sys.exit() 1381 | 1382 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr') 1383 | # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7) 1384 | if not options.detailed_mode or options.triage_mode: 1385 | print '%-6s %-72s' % ('Flags', 'Filename') 1386 | print '%-6s %-72s' % ('-'*6, '-'*72) 1387 | previous_container = None 1388 | count = 0 1389 | container = filename = data = None 1390 | for container, filename, data in xglob.iter_files(args, recursive=options.recursive, 1391 | zip_password=options.zip_password, zip_fname=options.zip_fname): 1392 | # ignore directory names stored in zip files: 1393 | if container and filename.endswith('/'): 1394 | continue 1395 | if options.detailed_mode and not options.triage_mode: 1396 | # fully detailed output 1397 | process_file(container, filename, data) 1398 | else: 1399 | # print container name when it changes: 1400 | if container != previous_container: 1401 | if container is not None: 1402 | print '\nFiles in %s:' % container 1403 | previous_container = container 1404 | # summarized output for triage: 1405 | process_file_triage(container, filename, data) 1406 | count += 1 1407 | if not options.detailed_mode or options.triage_mode: 1408 | print '\n(Flags: O=OLE, X=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex-encoded strings, ?=Unknown)\n' 1409 | 1410 | if count == 1 and not options.triage_mode and not options.detailed_mode: 1411 | # if options -t and -d were not specified and it's a single file, print details: 1412 | #TODO: avoid doing the analysis twice by storing results 1413 | process_file(container, filename, data) 1414 | 1415 | if __name__ == '__main__': 1416 | main() 1417 | 1418 | # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness 1419 | -------------------------------------------------------------------------------- /plugin_base64.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __description__ = 'Base64 string decoder for oledump.py' 4 | __author__ = 'James Habben' 5 | __version__ = '0.0.1' 6 | __date__ = '2015/01/30' 7 | 8 | import re 9 | import base64 10 | 11 | def Decode (input) : 12 | return base64.b64decode(input) 13 | 14 | class cBase64Decoder(cPluginParent): 15 | macroOnly = True 16 | name = 'Base64 decoder' 17 | 18 | def __init__(self, name, stream, options): 19 | self.streamname = name 20 | self.stream = stream 21 | self.options = options 22 | self.ran = False 23 | 24 | def Analyze(self): 25 | self.ran = True 26 | 27 | result = [] 28 | 29 | oREString = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"') 30 | for foundString in oREString.findall(self.stream): 31 | try: 32 | result.append(Decode(foundString)) 33 | except: 34 | pass 35 | 36 | return result 37 | 38 | AddPlugin(cBase64Decoder) 39 | -------------------------------------------------------------------------------- /plugin_dridex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __description__ = 'Dridex string decoder for oledump.py' 4 | __author__ = 'James Habben' 5 | __version__ = '0.0.1' 6 | __date__ = '2015/01/29' 7 | 8 | import re 9 | 10 | def Decode (input) : 11 | work = input[4:-4] 12 | strKeyEnc = StripCharsWithZero(work[(len(work) / 2) - 2: (len(work) / 2)]) 13 | strKeySize = StripCharsWithZero(work[(len(work) / 2): (len(work) / 2) + 2]) 14 | nCharSize = strKeySize - strKeyEnc 15 | work = work[:(len(work) / 2) - 2] + work[(len(work) / 2) + 2:] 16 | strKeyEnc2 = StripChars(work[(len(work) / 2) - (nCharSize/2): (len(work) / 2) + (nCharSize/2)]) 17 | work = work[:(len(work) / 2) - (nCharSize/2)] + work[(len(work) / 2) + (nCharSize/2):] 18 | work_split = [work[i:i+nCharSize] for i in range(0, len(work), nCharSize)] 19 | decoded = '' 20 | for group in work_split: 21 | decoded += chr(StripChars(group)/strKeyEnc2) 22 | return decoded 23 | 24 | def StripChars (input) : 25 | result = '' 26 | for c in input : 27 | if c.isdigit() : 28 | result += c 29 | return int(result) 30 | 31 | def StripCharsWithZero (input) : 32 | result = '' 33 | for c in input : 34 | if c.isdigit() : 35 | result += c 36 | else: 37 | result += '0' 38 | return int(result) 39 | 40 | class cDridexDecoder(cPluginParent): 41 | macroOnly = True 42 | name = 'Dridex decoder' 43 | 44 | def __init__(self, name, stream, options): 45 | self.streamname = name 46 | self.stream = stream 47 | self.options = options 48 | self.ran = False 49 | 50 | def Analyze(self): 51 | self.ran = True 52 | 53 | result = [] 54 | 55 | oREString = re.compile(r'"([^"\n]+)"') 56 | for foundString in oREString.findall(self.stream): 57 | try: 58 | result.append(Decode(foundString)) 59 | except: 60 | pass 61 | 62 | return result 63 | 64 | AddPlugin(cDridexDecoder) 65 | --------------------------------------------------------------------------------