├── DridexUrlDecoder.py
├── README.md
├── olevba-0.8b.py
├── plugin_base64.py
└── plugin_dridex.py


/DridexUrlDecoder.py:
--------------------------------------------------------------------------------
 1 | # Written by @JamesHabben
 2 | # https://github.com/JamesHabben/MalwareStuff
 3 | 
 4 | import sys
 5 | 
 6 | def DridexUrlDecode (inputText) :
 7 |     work = inputText[4:-4]
 8 |     strKeyEnc = StripCharsWithZero(work[(len(work) / 2) - 2: (len(work) / 2)])
 9 |     strKeySize = StripCharsWithZero(work[(len(work) / 2): (len(work) / 2) + 2])
10 |     nCharSize = strKeySize - strKeyEnc
11 |     work = work[:(len(work) / 2) - 2] + work[(len(work) / 2) + 2:]
12 |     strKeyEnc2 = StripChars(work[(len(work) / 2) - (nCharSize/2): (len(work) / 2) + (nCharSize/2)])
13 |     work = work[:(len(work) / 2) - (nCharSize/2)] + work[(len(work) / 2) + (nCharSize/2):]
14 |     work_split = [work[i:i+nCharSize] for i in range(0, len(work), nCharSize)]
15 |     for group in work_split:
16 |         sys.stdout.write(chr(StripChars(group)/strKeyEnc2))
17 | 
18 | def StripChars (input) :
19 |     result = ''
20 |     for c in input :
21 |         if c.isdigit() :
22 |             result += c
23 |     return int(result)
24 | 
25 | def StripCharsWithZero (input) :
26 |     result = ''
27 |     for c in input :
28 |         if c.isdigit() :
29 |             result += c
30 |         else:
31 |             result += '0'
32 |     return int(result)
33 | 
34 | 
35 | DridexUrlDecode("C3iY1epSRGe6q8g15xStVesdG717MAlg2H4hmV1vkL6Glnf0cknj")
36 | DridexUrlDecode("HLIY3Nf3z2k8jD37h1n2OM3N712DGQ3c5M841RZ8C5e6P1C50C4ym1oF504WyV182p4mJ16cK9Z61l47h2dU1rVB5V681sFY728i16H3E2Qm1fn47y2cgAo156j8T1s600hukKO1568X1xE4Z7d2q17jvcwgk816Yz32o9Q216Mpr0B01vcwg856a17b9j2zAmWf1536B1t7d92rI1FZ5E36Pu1jl504Z34tm2R43i55Lg2F3eLE3T28lLX1D504348Goe8Gbdp37w443ADy36X0h14g7Wb2G3u584kEG332Ut8ws3wO584pzSTf")
37 | DridexUrlDecode("YNPH1W47E211z3P6142cM4115K2J1696CURf1712N1OCJwc0w6Z16840Z1r600W16Z3273k6SR16Bf161Q92a016Vr16V1pc")
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MalwareStuff
2 | 
3 | # Dridex URL Decoder
4 | 


--------------------------------------------------------------------------------
/olevba-0.8b.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | """
   3 | olevba.py
   4 | 
   5 | olevba is a script to parse OLE and OpenXML files such as MS Office documents
   6 | (e.g. Word, Excel), to extract VBA Macro code in clear text.
   7 | 
   8 | Supported formats:
   9 | - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
  10 | - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
  11 | - PowerPoint 2007+ (.pptm, .ppsm)
  12 | 
  13 | Author: Philippe Lagadec - http://www.decalage.info
  14 | License: BSD, see source code or documentation
  15 | 
  16 | olevba is part of the python-oletools package:
  17 | http://www.decalage.info/python/oletools
  18 | 
  19 | olevba is based on source code from officeparser by John William Davison
  20 | https://github.com/unixfreak0037/officeparser
  21 | """
  22 | 
  23 | #=== LICENSE ==================================================================
  24 | 
  25 | # olevba is copyright (c) 2014-2015 Philippe Lagadec (http://www.decalage.info)
  26 | # All rights reserved.
  27 | #
  28 | # Redistribution and use in source and binary forms, with or without modification,
  29 | # are permitted provided that the following conditions are met:
  30 | #
  31 | #  * Redistributions of source code must retain the above copyright notice, this
  32 | #    list of conditions and the following disclaimer.
  33 | #  * Redistributions in binary form must reproduce the above copyright notice,
  34 | #    this list of conditions and the following disclaimer in the documentation
  35 | #    and/or other materials provided with the distribution.
  36 | #
  37 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  38 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  39 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  40 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  41 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  42 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  43 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  44 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  46 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  47 | 
  48 | 
  49 | # olevba contains modified source code from the officeparser project, published
  50 | # under the following MIT License (MIT):
  51 | #
  52 | # officeparser is copyright (c) 2014 John William Davison
  53 | #
  54 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  55 | # of this software and associated documentation files (the "Software"), to deal
  56 | # in the Software without restriction, including without limitation the rights
  57 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  58 | # copies of the Software, and to permit persons to whom the Software is
  59 | # furnished to do so, subject to the following conditions:
  60 | #
  61 | # The above copyright notice and this permission notice shall be included in all
  62 | # copies or substantial portions of the Software.
  63 | #
  64 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  65 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  66 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  67 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  68 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  69 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  70 | # SOFTWARE.
  71 | 
  72 | #------------------------------------------------------------------------------
  73 | # CHANGELOG:
  74 | # 2014-08-05 v0.01 PL: - first version based on officeparser code
  75 | # 2014-08-14 v0.02 PL: - fixed bugs in code, added license from officeparser
  76 | # 2014-08-15       PL: - fixed incorrect value check in PROJECTHELPFILEPATH Record
  77 | # 2014-08-15 v0.03 PL: - refactored extract_macros to support OpenXML formats
  78 | #                        and to find the VBA project root anywhere in the file
  79 | # 2014-11-29 v0.04 PL: - use olefile instead of OleFileIO_PL
  80 | # 2014-12-05 v0.05 PL: - refactored most functions into a class, new API
  81 | #                      - added detect_vba_macros
  82 | # 2014-12-10 v0.06 PL: - hide first lines with VB attributes
  83 | #                      - detect auto-executable macros
  84 | #                      - ignore empty macros
  85 | # 2014-12-14 v0.07 PL: - detect_autoexec() is now case-insensitive
  86 | # 2014-12-15 v0.08 PL: - improved display for empty macros
  87 | #                      - added pattern extraction
  88 | # 2014-12-25 v0.09 PL: - added suspicious keywords detection
  89 | # 2014-12-27 v0.10 PL: - added OptionParser, main and process_file
  90 | #                      - uses xglob to scan several files with wildcards
  91 | #                      - option -r to recurse subdirectories
  92 | #                      - option -z to scan files in password-protected zips
  93 | # 2015-01-02 v0.11 PL: - improved filter_vba to detect colons
  94 | # 2015-01-03 v0.12 PL: - fixed detect_patterns to detect all patterns
  95 | #                      - process_file: improved display, shows container file
  96 | #                      - improved list of executable file extensions
  97 | # 2015-01-04 v0.13 PL: - added several suspicious keywords, improved display
  98 | # 2015-01-08 v0.14 PL: - added hex strings detection and decoding
  99 | #                      - fixed issue #2, decoding VBA stream names using
 100 | #                        specified codepage and unicode stream names
 101 | # 2015-01-11 v0.15 PL: - added new triage mode, options -t and -d
 102 | # 2015-01-16 v0.16 PL: - fix for issue #3 (exception when module name="text")
 103 | #                      - added several suspicious keywords
 104 | #                      - added option -i to analyze VBA source code directly
 105 | # 2015-01-17 v0.17 PL: - removed .com from the list of executable extensions
 106 | #                      - added scan_vba to run all detection algorithms
 107 | #                      - decoded hex strings are now also scanned + reversed
 108 | # 2015-01-23 v0.18 PL: - fixed issue #3, case-insensitive search in code_modules
 109 | # 2015-01-24 v0.19 PL: - improved the detection of IOCs obfuscated with hex
 110 | #                        strings and StrReverse
 111 | 
 112 | #######################
 113 | # Base64 detection and decode added by James Habben
 114 | #######################
 115 | 
 116 | __version__ = '0.19'
 117 | 
 118 | #------------------------------------------------------------------------------
 119 | # TODO:
 120 | # + do not use logging, but a provided logger (null logger by default)
 121 | # + setup logging (common with other oletools)
 122 | 
 123 | # TODO later:
 124 | # + do not show hex strings by default (add option --hex)
 125 | # + performance improvement: instead of searching each keyword separately,
 126 | #   first split vba code into a list of words (per line), then check each
 127 | #   word against a dict. (or put vba words into a set/dict?)
 128 | # + for regex, maybe combine them into a single re with named groups?
 129 | # + add Yara support, include sample rules? plugins like balbuzard?
 130 | # + add balbuzard support
 131 | # + output to file (replace print by file.write, sys.stdout by default)
 132 | # + look for VBA in embedded documents (e.g. Excel in Word)
 133 | # + support SRP streams (see Lenny's article + links and sample)
 134 | # - python 3.x support
 135 | # - add support for PowerPoint macros (see libclamav, libgsf), use oledump heuristic?
 136 | # - check VBA macros in Visio, Access, Project, etc
 137 | # - extract_macros: convert to a class, split long function into smaller methods
 138 | # - extract_macros: read bytes from stream file objects instead of strings
 139 | # - extract_macros: use combined struct.unpack instead of many calls
 140 | 
 141 | #------------------------------------------------------------------------------
 142 | # REFERENCES:
 143 | # - [MS-OVBA]: Microsoft Office VBA File Format Structure
 144 | #   http://msdn.microsoft.com/en-us/library/office/cc313094%28v=office.12%29.aspx
 145 | # - officeparser: https://github.com/unixfreak0037/officeparser
 146 | 
 147 | 
 148 | #--- IMPORTS ------------------------------------------------------------------
 149 | 
 150 | import sys, logging
 151 | import struct
 152 | import cStringIO
 153 | import math
 154 | import zipfile
 155 | import re
 156 | import optparse
 157 | import os.path
 158 | import binascii
 159 | import base64
 160 | 
 161 | import thirdparty.olefile as olefile
 162 | from thirdparty.prettytable import prettytable
 163 | from thirdparty.xglob import xglob
 164 | 
 165 | #--- CONSTANTS ----------------------------------------------------------------
 166 | 
 167 | TYPE_OLE     = 'OLE'
 168 | TYPE_OpenXML = 'OpenXML'
 169 | 
 170 | MODULE_EXTENSION = "bas"
 171 | CLASS_EXTENSION = "cls"
 172 | FORM_EXTENSION = "frm"
 173 | 
 174 | # Keywords to detect auto-executable macros
 175 | AUTOEXEC_KEYWORDS = {
 176 |     # MS Word:
 177 |     'Runs when the Word document is opened':
 178 |         ('AutoExec', 'AutoOpen', 'Document_Open', 'DocumentOpen'),
 179 |     'Runs when the Word document is closed':
 180 |         ('AutoExit', 'AutoClose', 'Document_Close', 'DocumentBeforeClose'),
 181 |     'Runs when the Word document is modified':
 182 |         ('DocumentChange',),
 183 |     'Runs when a new Word document is created':
 184 |         ('AutoNew', 'Document_New', 'NewDocument'),
 185 | 
 186 |     # MS Excel:
 187 |     'Runs when the Excel Workbook is opened':
 188 |         ('Auto_Open', 'Workbook_Open'),
 189 |     'Runs when the Excel Workbook is closed':
 190 |         ('Auto_Close', 'Workbook_Close'),
 191 | 
 192 |     #TODO: full list in MS specs??
 193 | }
 194 | 
 195 | # Suspicious Keywords that may be used by malware
 196 | # See VBA language reference: http://msdn.microsoft.com/en-us/library/office/jj692818%28v=office.15%29.aspx
 197 | SUSPICIOUS_KEYWORDS = {
 198 |     #TODO: use regex to support variable whitespaces
 199 |     'May read system environment variables':
 200 |         ('Environ',),
 201 |     'May open a file':
 202 |         ('Open',),
 203 |     'May write to a file (if combined with Open)':
 204 |         #TODO: regex to find Open+Write on same line
 205 |         ('Write', 'Put', 'Output', 'Print #'),
 206 |     'May read or write a binary file (if combined with Open)':
 207 |         #TODO: regex to find Open+Binary on same line
 208 |         ('Binary',),
 209 |     'May copy a file':
 210 |         ('FileCopy', 'CopyFile'),
 211 |         #FileCopy: http://msdn.microsoft.com/en-us/library/office/gg264390%28v=office.15%29.aspx
 212 |         #CopyFile: http://msdn.microsoft.com/en-us/library/office/gg264089%28v=office.15%29.aspx
 213 |     'May create a text file':
 214 |         ('CreateTextFile','ADODB.Stream', 'WriteText', 'SaveToFile'),
 215 |         #CreateTextFile: http://msdn.microsoft.com/en-us/library/office/gg264617%28v=office.15%29.aspx
 216 |         #ADODB.Stream sample: http://pastebin.com/Z4TMyuq6
 217 |     'May run an executable file or a system command':
 218 |         ('Shell', 'vbNormalFocus', 'vbHide', 'vbMinimizedFocus', 'vbMaximizedFocus', 'vbNormalNoFocus',
 219 |          'vbMinimizedNoFocus', 'WScript.Shell', 'Run'),
 220 |         #Shell: http://msdn.microsoft.com/en-us/library/office/gg278437%28v=office.15%29.aspx
 221 |         #WScript.Shell+Run sample: http://pastebin.com/Z4TMyuq6
 222 |     'May hide the application':
 223 |         ('Application.Visible', 'ShowWindow', 'SW_HIDE'),
 224 |     'May create a directory':
 225 |         ('MkDir',),
 226 |     'May save the current workbook':
 227 |         ('ActiveWorkbook.SaveAs',),
 228 |     'May change which directory contains files to open at startup':
 229 |         #TODO: confirm the actual effect
 230 |         ('Application.AltStartupPath',),
 231 |     'May create an OLE object':
 232 |         ('CreateObject',),
 233 |     'May run an application (if combined with CreateObject)':
 234 |         ('Shell.Application',),
 235 |     'May enumerate application windows (if combined with Shell.Application object)':
 236 |         ('Windows', 'FindWindow'),
 237 |     'May run code from a DLL':
 238 |         #TODO: regex to find declare+lib on same line
 239 |         ('Lib',),
 240 |     'May download files from the Internet':
 241 |         #TODO: regex to find urlmon+URLDownloadToFileA on same line
 242 |         ('URLDownloadToFileA',),
 243 |     'May control another application by simulating user keystrokes':
 244 |         ('SendKeys', 'AppActivate'),
 245 |         #SendKeys: http://msdn.microsoft.com/en-us/library/office/gg278655%28v=office.15%29.aspx
 246 |     'May attempt to obfuscate malicious function calls':
 247 |         ('CallByName',),
 248 |         #CallByName: http://msdn.microsoft.com/en-us/library/office/gg278760%28v=office.15%29.aspx
 249 |     'May attempt to obfuscate specific strings':
 250 |         ('Chr', 'ChrB', 'ChrW', 'StrReverse'),
 251 |         #Chr: http://msdn.microsoft.com/en-us/library/office/gg264465%28v=office.15%29.aspx
 252 | }
 253 | 
 254 | # Patterns to be extracted (IP addresses, URLs, etc)
 255 | # From patterns.py in balbuzard
 256 | RE_PATTERNS = (
 257 |     #TODO: check if this regex matches URLs with an IP address (various forms)
 258 |     ('URL', re.compile(r'(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+&amp;%\$#\=~])*[^\.\,\)\(\s]')),
 259 |     ('IPv4 address', re.compile(r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b")),
 260 |     ('E-mail address', re.compile(r'(?i)\b[A-Z0-9._%+-]+@(?:[A-Z0-9-]+\.)+(?:[A-Z]{2,12}|XN--[A-Z0-9]{4,18})\b')),
 261 |     # ('Domain name', re.compile(r'(?=^.{1,254}$)(^(?:(?!\d+\.|-)[a-zA-Z0-9_\-]{1,63}(?<!-)\.?)+(?:[a-zA-Z]{2,})$)')),
 262 |     # Executable file name with known extensions (except .com which is present in many URLs, and .application):
 263 |     ("Executable file name", re.compile(r"(?i)\b\w+\.(EXE|PIF|GADGET|MSI|MSP|MSC|VB|VBS|JS|VBE|JSE|WS|WSF|WSC|WSH|BAT|CMD|DLL|SCR|HTA|CPL|CLASS|JAR|PS1|PS1XML|PS2|PS2XML|PSC1|PSC2|SCF|LNK|INF|REG)\b")),
 264 |     # Sources: http://www.howtogeek.com/137270/50-file-extensions-that-are-potentially-dangerous-on-windows/
 265 |     #TODO: https://support.office.com/en-us/article/Blocked-attachments-in-Outlook-3811cddc-17c3-4279-a30c-060ba0207372#__attachment_file_types
 266 |     #('Hex string', re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')),
 267 |     )
 268 | 
 269 | # regex to detect strings encoded in hexadecimal
 270 | re_hex_string = re.compile(r'(?:[0-9A-Fa-f]{2}){4,}')
 271 | 
 272 | # regex to detect strings encoded in base64
 273 | re_base64_string = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
 274 | 
 275 | 
 276 | #--- FUNCTIONS ----------------------------------------------------------------
 277 | 
 278 | def copytoken_help(decompressed_current, decompressed_chunk_start):
 279 |     """
 280 |     compute bit masks to decode a CopyToken according to MS-OVBA 2.4.1.3.19.1 CopyToken Help
 281 | 
 282 |     decompressed_current: number of decompressed bytes so far, i.e. len(decompressed_container)
 283 |     decompressed_chunk_start: offset of the current chunk in the decompressed container
 284 |     return length_mask, offset_mask, bit_count, maximum_length
 285 |     """
 286 |     difference = decompressed_current - decompressed_chunk_start
 287 |     bit_count = int(math.ceil(math.log(difference, 2)))
 288 |     bit_count = max([bit_count, 4])
 289 |     length_mask = 0xFFFF >> bit_count
 290 |     offset_mask = ~length_mask
 291 |     maximum_length = (0xFFFF >> bit_count) + 3
 292 |     return length_mask, offset_mask, bit_count, maximum_length
 293 | 
 294 | 
 295 | def decompress_stream (compressed_container):
 296 |     """
 297 |     Decompress a stream according to MS-OVBA section 2.4.1
 298 | 
 299 |     compressed_container: string compressed according to the MS-OVBA 2.4.1.3.6 Compression algorithm
 300 |     return the decompressed container as a string (bytes)
 301 |     """
 302 |     # 2.4.1.2 State Variables
 303 | 
 304 |     # The following state is maintained for the CompressedContainer (section 2.4.1.1.1):
 305 |     # CompressedRecordEnd: The location of the byte after the last byte in the CompressedContainer (section 2.4.1.1.1).
 306 |     # CompressedCurrent: The location of the next byte in the CompressedContainer (section 2.4.1.1.1) to be read by
 307 |     #                    decompression or to be written by compression.
 308 | 
 309 |     # The following state is maintained for the current CompressedChunk (section 2.4.1.1.4):
 310 |     # CompressedChunkStart: The location of the first byte of the CompressedChunk (section 2.4.1.1.4) within the
 311 |     #                       CompressedContainer (section 2.4.1.1.1).
 312 | 
 313 |     # The following state is maintained for a DecompressedBuffer (section 2.4.1.1.2):
 314 |     # DecompressedCurrent: The location of the next byte in the DecompressedBuffer (section 2.4.1.1.2) to be written by
 315 |     #                      decompression or to be read by compression.
 316 |     # DecompressedBufferEnd: The location of the byte after the last byte in the DecompressedBuffer (section 2.4.1.1.2).
 317 | 
 318 |     # The following state is maintained for the current DecompressedChunk (section 2.4.1.1.3):
 319 |     # DecompressedChunkStart: The location of the first byte of the DecompressedChunk (section 2.4.1.1.3) within the
 320 |     #                         DecompressedBuffer (section 2.4.1.1.2).
 321 | 
 322 |     decompressed_container = ''  # result
 323 |     compressed_current = 0
 324 | 
 325 |     sig_byte = ord(compressed_container[compressed_current])
 326 |     if sig_byte != 0x01:
 327 |         raise ValueError('invalid signature byte {0:02X}'.format(sig_byte))
 328 | 
 329 |     compressed_current += 1
 330 | 
 331 |     #NOTE: the definition of CompressedRecordEnd is ambiguous. Here we assume that
 332 |     # CompressedRecordEnd = len(compressed_container)
 333 |     while compressed_current < len(compressed_container):
 334 |         # 2.4.1.1.5
 335 |         compressed_chunk_start = compressed_current
 336 |         # chunk header = first 16 bits
 337 |         compressed_chunk_header = struct.unpack("<H", compressed_container[compressed_chunk_start:compressed_chunk_start + 2])[0]
 338 |         # chunk size = 12 first bits of header + 3
 339 |         chunk_size = (compressed_chunk_header & 0x0FFF) + 3
 340 |         # chunk signature = 3 next bits - should always be 0b011
 341 |         chunk_signature = (compressed_chunk_header >> 12) & 0x07
 342 |         if chunk_signature != 0b011:
 343 |             raise ValueError('Invalid CompressedChunkSignature in VBA compressed stream')
 344 |         # chunk flag = next bit - 1 == compressed, 0 == uncompressed
 345 |         chunk_flag = (compressed_chunk_header >> 15) & 0x01
 346 |         logging.debug("chunk size = {0}, compressed flag = {1}".format(chunk_size, chunk_flag))
 347 | 
 348 |         #MS-OVBA 2.4.1.3.12: the maximum size of a chunk including its header is 4098 bytes (header 2 + data 4096)
 349 |         # The minimum size is 3 bytes
 350 |         # NOTE: there seems to be a typo in MS-OVBA, the check should be with 4098, not 4095 (which is the max value
 351 |         # in chunk header before adding 3.
 352 |         # Also the first test is not useful since a 12 bits value cannot be larger than 4095.
 353 |         if chunk_flag == 1 and chunk_size > 4098:
 354 |             raise ValueError('CompressedChunkSize > 4098 but CompressedChunkFlag == 1')
 355 |         if chunk_flag == 0 and chunk_size != 4098:
 356 |             raise ValueError('CompressedChunkSize != 4098 but CompressedChunkFlag == 0')
 357 | 
 358 |         # check if chunk_size goes beyond the compressed data, instead of silently cutting it:
 359 |         #TODO: raise an exception?
 360 |         if compressed_chunk_start + chunk_size > len(compressed_container):
 361 |             logging.warning('Chunk size is larger than remaining compressed data')
 362 |         compressed_end = min([len(compressed_container), compressed_chunk_start + chunk_size])
 363 |         # read after chunk header:
 364 |         compressed_current = compressed_chunk_start + 2
 365 | 
 366 |         if chunk_flag == 0:
 367 |             # MS-OVBA 2.4.1.3.3 Decompressing a RawChunk
 368 |             # uncompressed chunk: read the next 4096 bytes as-is
 369 |             #TODO: check if there are at least 4096 bytes left
 370 |             decompressed_container += compressed_container[compressed_current:compressed_current + 4096]
 371 |             compressed_current += 4096
 372 |         else:
 373 |             # MS-OVBA 2.4.1.3.2 Decompressing a CompressedChunk
 374 |             # compressed chunk
 375 |             decompressed_chunk_start = len(decompressed_container)
 376 |             while compressed_current < compressed_end:
 377 |                 # MS-OVBA 2.4.1.3.4 Decompressing a TokenSequence
 378 |                 # logging.debug('compressed_current = %d / compressed_end = %d' % (compressed_current, compressed_end))
 379 |                 # FlagByte: 8 bits indicating if the following 8 tokens are either literal (1 byte of plain text) or
 380 |                 # copy tokens (reference to a previous literal token)
 381 |                 flag_byte = ord(compressed_container[compressed_current])
 382 |                 compressed_current += 1
 383 |                 for bit_index in xrange(0, 8):
 384 |                     # logging.debug('bit_index=%d / compressed_current=%d / compressed_end=%d' % (bit_index, compressed_current, compressed_end))
 385 |                     if compressed_current >= compressed_end:
 386 |                         break
 387 |                     # MS-OVBA 2.4.1.3.5 Decompressing a Token
 388 |                     # MS-OVBA 2.4.1.3.17 Extract FlagBit
 389 |                     flag_bit = (flag_byte >> bit_index) & 1
 390 |                     #logging.debug('bit_index=%d: flag_bit=%d' % (bit_index, flag_bit))
 391 |                     if flag_bit == 0: # LiteralToken
 392 |                         # copy one byte directly to output
 393 |                         decompressed_container += compressed_container[compressed_current]
 394 |                         compressed_current += 1
 395 |                     else: # CopyToken
 396 |                         # MS-OVBA 2.4.1.3.19.2 Unpack CopyToken
 397 |                         copy_token = struct.unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
 398 |                         #TODO: check this
 399 |                         length_mask, offset_mask, bit_count, maximum_length = copytoken_help(
 400 |                             len(decompressed_container), decompressed_chunk_start)
 401 |                         length = (copy_token & length_mask) + 3
 402 |                         temp1 = copy_token & offset_mask
 403 |                         temp2 = 16 - bit_count
 404 |                         offset = (temp1 >> temp2) + 1
 405 |                         #logging.debug('offset=%d length=%d' % (offset, length))
 406 |                         copy_source = len(decompressed_container) - offset
 407 |                         for index in xrange(copy_source, copy_source + length):
 408 |                             decompressed_container += decompressed_container[index]
 409 |                         compressed_current += 2
 410 |     return decompressed_container
 411 | 
 412 | 
 413 | def _extract_vba (ole, vba_root, project_path, dir_path):
 414 |     """
 415 |     Extract VBA macros from an OleFileIO object.
 416 |     Internal function, do not call directly.
 417 | 
 418 |     vba_root: path to the VBA root storage, containing the VBA storage and the PROJECT stream
 419 |     vba_project: path to the PROJECT stream
 420 |     This is a generator, yielding (stream path, VBA filename, VBA source code) for each VBA code stream
 421 |     """
 422 |     # Open the PROJECT stream:
 423 |     project = ole.openstream(project_path)
 424 | 
 425 |     # sample content of the PROJECT stream:
 426 | 
 427 |     ##    ID="{5312AC8A-349D-4950-BDD0-49BE3C4DD0F0}"
 428 |     ##    Document=ThisDocument/&H00000000
 429 |     ##    Module=NewMacros
 430 |     ##    Name="Project"
 431 |     ##    HelpContextID="0"
 432 |     ##    VersionCompatible32="393222000"
 433 |     ##    CMG="F1F301E705E705E705E705"
 434 |     ##    DPB="8F8D7FE3831F2020202020"
 435 |     ##    GC="2D2FDD81E51EE61EE6E1"
 436 |     ##
 437 |     ##    [Host Extender Info]
 438 |     ##    &H00000001={3832D640-CF90-11CF-8E43-00A0C911005A};VBE;&H00000000
 439 |     ##    &H00000002={000209F2-0000-0000-C000-000000000046};Word8.0;&H00000000
 440 |     ##
 441 |     ##    [Workspace]
 442 |     ##    ThisDocument=22, 29, 339, 477, Z
 443 |     ##    NewMacros=-4, 42, 832, 510, C
 444 | 
 445 |     code_modules = {}
 446 | 
 447 |     for line in project:
 448 |         line = line.strip()
 449 |         if '=' in line:
 450 |             # split line at the 1st equal sign:
 451 |             name, value = line.split('=', 1)
 452 |             # looking for code modules
 453 |             # add the code module as a key in the dictionary
 454 |             # the value will be the extension needed later
 455 |             # The value is converted to lowercase, to allow case-insensitive matching (issue #3)
 456 |             value = value.lower()
 457 |             if name == 'Document':
 458 |                 # split value at the 1st slash, keep 1st part:
 459 |                 value = value.split('/', 1)[0]
 460 |                 code_modules[value] = CLASS_EXTENSION
 461 |             elif name == 'Module':
 462 |                 code_modules[value] = MODULE_EXTENSION
 463 |             elif name == 'Class':
 464 |                 code_modules[value] = CLASS_EXTENSION
 465 |             elif name == 'BaseClass':
 466 |                 code_modules[value] = FORM_EXTENSION
 467 | 
 468 |     # read data from dir stream (compressed)
 469 |     dir_compressed = ole.openstream(dir_path).read()
 470 | 
 471 |     def check_value(name, expected, value):
 472 |         if expected != value:
 473 |             logging.error("invalid value for {0} expected {1:04X} got {2:04X}".format(name, expected, value))
 474 | 
 475 |     dir_stream = cStringIO.StringIO(decompress_stream(dir_compressed))
 476 | 
 477 |     # PROJECTSYSKIND Record
 478 |     PROJECTSYSKIND_Id = struct.unpack("<H", dir_stream.read(2))[0]
 479 |     check_value('PROJECTSYSKIND_Id', 0x0001, PROJECTSYSKIND_Id)
 480 |     PROJECTSYSKIND_Size = struct.unpack("<L", dir_stream.read(4))[0]
 481 |     check_value('PROJECTSYSKIND_Size', 0x0004, PROJECTSYSKIND_Size)
 482 |     PROJECTSYSKIND_SysKind = struct.unpack("<L", dir_stream.read(4))[0]
 483 |     if PROJECTSYSKIND_SysKind == 0x00:
 484 |         logging.debug("16-bit Windows")
 485 |     elif PROJECTSYSKIND_SysKind == 0x01:
 486 |         logging.debug("32-bit Windows")
 487 |     elif PROJECTSYSKIND_SysKind == 0x02:
 488 |         logging.debug("Macintosh")
 489 |     elif PROJECTSYSKIND_SysKind == 0x03:
 490 |         logging.debug("64-bit Windows")
 491 |     else:
 492 |         logging.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(PROJECTSYSKIND_SysKind))
 493 | 
 494 |     # PROJECTLCID Record
 495 |     PROJECTLCID_Id = struct.unpack("<H", dir_stream.read(2))[0]
 496 |     check_value('PROJECTLCID_Id', 0x0002, PROJECTLCID_Id)
 497 |     PROJECTLCID_Size = struct.unpack("<L", dir_stream.read(4))[0]
 498 |     check_value('PROJECTLCID_Size', 0x0004, PROJECTLCID_Size)
 499 |     PROJECTLCID_Lcid = struct.unpack("<L", dir_stream.read(4))[0]
 500 |     check_value('PROJECTLCID_Lcid', 0x409, PROJECTLCID_Lcid)
 501 | 
 502 |     # PROJECTLCIDINVOKE Record
 503 |     PROJECTLCIDINVOKE_Id = struct.unpack("<H", dir_stream.read(2))[0]
 504 |     check_value('PROJECTLCIDINVOKE_Id', 0x0014, PROJECTLCIDINVOKE_Id)
 505 |     PROJECTLCIDINVOKE_Size = struct.unpack("<L", dir_stream.read(4))[0]
 506 |     check_value('PROJECTLCIDINVOKE_Size', 0x0004, PROJECTLCIDINVOKE_Size)
 507 |     PROJECTLCIDINVOKE_LcidInvoke = struct.unpack("<L", dir_stream.read(4))[0]
 508 |     check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, PROJECTLCIDINVOKE_LcidInvoke)
 509 | 
 510 |     # PROJECTCODEPAGE Record
 511 |     PROJECTCODEPAGE_Id = struct.unpack("<H", dir_stream.read(2))[0]
 512 |     check_value('PROJECTCODEPAGE_Id', 0x0003, PROJECTCODEPAGE_Id)
 513 |     PROJECTCODEPAGE_Size = struct.unpack("<L", dir_stream.read(4))[0]
 514 |     check_value('PROJECTCODEPAGE_Size', 0x0002, PROJECTCODEPAGE_Size)
 515 |     PROJECTCODEPAGE_CodePage = struct.unpack("<H", dir_stream.read(2))[0]
 516 | 
 517 |     # PROJECTNAME Record
 518 |     PROJECTNAME_Id = struct.unpack("<H", dir_stream.read(2))[0]
 519 |     check_value('PROJECTNAME_Id', 0x0004, PROJECTNAME_Id)
 520 |     PROJECTNAME_SizeOfProjectName = struct.unpack("<L", dir_stream.read(4))[0]
 521 |     if PROJECTNAME_SizeOfProjectName < 1 or PROJECTNAME_SizeOfProjectName > 128:
 522 |         logging.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName))
 523 |     PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName)
 524 | 
 525 |     # PROJECTDOCSTRING Record
 526 |     PROJECTDOCSTRING_Id = struct.unpack("<H", dir_stream.read(2))[0]
 527 |     check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id)
 528 |     PROJECTDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]
 529 |     if PROJECTNAME_SizeOfProjectName > 2000:
 530 |         logging.error("PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString))
 531 |     PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString)
 532 |     PROJECTDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 533 |     check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved)
 534 |     PROJECTDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 535 |     if PROJECTDOCSTRING_SizeOfDocStringUnicode % 2 != 0:
 536 |         logging.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even")
 537 |     PROJECTDOCSTRING_DocStringUnicode = dir_stream.read(PROJECTDOCSTRING_SizeOfDocStringUnicode)
 538 | 
 539 |     # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
 540 |     PROJECTHELPFILEPATH_Id = struct.unpack("<H", dir_stream.read(2))[0]
 541 |     check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id)
 542 |     PROJECTHELPFILEPATH_SizeOfHelpFile1 = struct.unpack("<L", dir_stream.read(4))[0]
 543 |     if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260:
 544 |         logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1))
 545 |     PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1)
 546 |     PROJECTHELPFILEPATH_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 547 |     check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved)
 548 |     PROJECTHELPFILEPATH_SizeOfHelpFile2 = struct.unpack("<L", dir_stream.read(4))[0]
 549 |     if PROJECTHELPFILEPATH_SizeOfHelpFile2 != PROJECTHELPFILEPATH_SizeOfHelpFile1:
 550 |         logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2")
 551 |     PROJECTHELPFILEPATH_HelpFile2 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile2)
 552 |     if PROJECTHELPFILEPATH_HelpFile2 != PROJECTHELPFILEPATH_HelpFile1:
 553 |         logging.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2")
 554 | 
 555 |     # PROJECTHELPCONTEXT Record
 556 |     PROJECTHELPCONTEXT_Id = struct.unpack("<H", dir_stream.read(2))[0]
 557 |     check_value('PROJECTHELPCONTEXT_Id', 0x0007, PROJECTHELPCONTEXT_Id)
 558 |     PROJECTHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0]
 559 |     check_value('PROJECTHELPCONTEXT_Size', 0x0004, PROJECTHELPCONTEXT_Size)
 560 |     PROJECTHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0]
 561 | 
 562 |     # PROJECTLIBFLAGS Record
 563 |     PROJECTLIBFLAGS_Id = struct.unpack("<H", dir_stream.read(2))[0]
 564 |     check_value('PROJECTLIBFLAGS_Id', 0x0008, PROJECTLIBFLAGS_Id)
 565 |     PROJECTLIBFLAGS_Size = struct.unpack("<L", dir_stream.read(4))[0]
 566 |     check_value('PROJECTLIBFLAGS_Size', 0x0004, PROJECTLIBFLAGS_Size)
 567 |     PROJECTLIBFLAGS_ProjectLibFlags = struct.unpack("<L", dir_stream.read(4))[0]
 568 |     check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, PROJECTLIBFLAGS_ProjectLibFlags)
 569 | 
 570 |     # PROJECTVERSION Record
 571 |     PROJECTVERSION_Id = struct.unpack("<H", dir_stream.read(2))[0]
 572 |     check_value('PROJECTVERSION_Id', 0x0009, PROJECTVERSION_Id)
 573 |     PROJECTVERSION_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
 574 |     check_value('PROJECTVERSION_Reserved', 0x0004, PROJECTVERSION_Reserved)
 575 |     PROJECTVERSION_VersionMajor = struct.unpack("<L", dir_stream.read(4))[0]
 576 |     PROJECTVERSION_VersionMinor = struct.unpack("<H", dir_stream.read(2))[0]
 577 | 
 578 |     # PROJECTCONSTANTS Record
 579 |     PROJECTCONSTANTS_Id = struct.unpack("<H", dir_stream.read(2))[0]
 580 |     check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id)
 581 |     PROJECTCONSTANTS_SizeOfConstants = struct.unpack("<L", dir_stream.read(4))[0]
 582 |     if PROJECTCONSTANTS_SizeOfConstants > 1015:
 583 |         logging.error("PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants))
 584 |     PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants)
 585 |     PROJECTCONSTANTS_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 586 |     check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved)
 587 |     PROJECTCONSTANTS_SizeOfConstantsUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 588 |     if PROJECTCONSTANTS_SizeOfConstantsUnicode % 2 != 0:
 589 |         logging.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even")
 590 |     PROJECTCONSTANTS_ConstantsUnicode = dir_stream.read(PROJECTCONSTANTS_SizeOfConstantsUnicode)
 591 | 
 592 |     # array of REFERENCE records
 593 |     check = None
 594 |     while True:
 595 |         check = struct.unpack("<H", dir_stream.read(2))[0]
 596 |         logging.debug("reference type = {0:04X}".format(check))
 597 |         if check == 0x000F:
 598 |             break
 599 | 
 600 |         if check == 0x0016:
 601 |             # REFERENCENAME
 602 |             REFERENCE_Id = check
 603 |             REFERENCE_SizeOfName = struct.unpack("<L", dir_stream.read(4))[0]
 604 |             REFERENCE_Name = dir_stream.read(REFERENCE_SizeOfName)
 605 |             REFERENCE_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 606 |             check_value('REFERENCE_Reserved', 0x003E, REFERENCE_Reserved)
 607 |             REFERENCE_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 608 |             REFERENCE_NameUnicode = dir_stream.read(REFERENCE_SizeOfNameUnicode)
 609 |             continue
 610 | 
 611 |         if check == 0x0033:
 612 |             # REFERENCEORIGINAL (followed by REFERENCECONTROL)
 613 |             REFERENCEORIGINAL_Id = check
 614 |             REFERENCEORIGINAL_SizeOfLibidOriginal = struct.unpack("<L", dir_stream.read(4))[0]
 615 |             REFERENCEORIGINAL_LibidOriginal = dir_stream.read(REFERENCEORIGINAL_SizeOfLibidOriginal)
 616 |             continue
 617 | 
 618 |         if check == 0x002F:
 619 |             # REFERENCECONTROL
 620 |             REFERENCECONTROL_Id = check
 621 |             REFERENCECONTROL_SizeTwiddled = struct.unpack("<L", dir_stream.read(4))[0] # ignore
 622 |             REFERENCECONTROL_SizeOfLibidTwiddled = struct.unpack("<L", dir_stream.read(4))[0]
 623 |             REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled)
 624 |             REFERENCECONTROL_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0] # ignore
 625 |             check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1)
 626 |             REFERENCECONTROL_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0] # ignore
 627 |             check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2)
 628 |             # optional field
 629 |             check2 = struct.unpack("<H", dir_stream.read(2))[0]
 630 |             if check2 == 0x0016:
 631 |                 REFERENCECONTROL_NameRecordExtended_Id = check
 632 |                 REFERENCECONTROL_NameRecordExtended_SizeofName = struct.unpack("<L", dir_stream.read(4))[0]
 633 |                 REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeofName)
 634 |                 REFERENCECONTROL_NameRecordExtended_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 635 |                 check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, REFERENCECONTROL_NameRecordExtended_Reserved)
 636 |                 REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 637 |                 REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode)
 638 |                 REFERENCECONTROL_Reserved3 = struct.unpack("<H", dir_stream.read(2))[0]
 639 |             else:
 640 |                 REFERENCECONTROL_Reserved3 = check2
 641 | 
 642 |             check_value('REFERENCECONTROL_Reserved3', 0x0030, REFERENCECONTROL_Reserved3)
 643 |             REFERENCECONTROL_SizeExtended = struct.unpack("<L", dir_stream.read(4))[0]
 644 |             REFERENCECONTROL_SizeOfLibidExtended = struct.unpack("<L", dir_stream.read(4))[0]
 645 |             REFERENCECONTROL_LibidExtended = dir_stream.read(REFERENCECONTROL_SizeOfLibidExtended)
 646 |             REFERENCECONTROL_Reserved4 = struct.unpack("<L", dir_stream.read(4))[0]
 647 |             REFERENCECONTROL_Reserved5 = struct.unpack("<H", dir_stream.read(2))[0]
 648 |             REFERENCECONTROL_OriginalTypeLib = dir_stream.read(16)
 649 |             REFERENCECONTROL_Cookie = struct.unpack("<L", dir_stream.read(4))[0]
 650 |             continue
 651 | 
 652 |         if check == 0x000D:
 653 |             # REFERENCEREGISTERED
 654 |             REFERENCEREGISTERED_Id = check
 655 |             REFERENCEREGISTERED_Size = struct.unpack("<L", dir_stream.read(4))[0]
 656 |             REFERENCEREGISTERED_SizeOfLibid = struct.unpack("<L", dir_stream.read(4))[0]
 657 |             REFERENCEREGISTERED_Libid = dir_stream.read(REFERENCEREGISTERED_SizeOfLibid)
 658 |             REFERENCEREGISTERED_Reserved1 = struct.unpack("<L", dir_stream.read(4))[0]
 659 |             check_value('REFERENCEREGISTERED_Reserved1', 0x0000, REFERENCEREGISTERED_Reserved1)
 660 |             REFERENCEREGISTERED_Reserved2 = struct.unpack("<H", dir_stream.read(2))[0]
 661 |             check_value('REFERENCEREGISTERED_Reserved2', 0x0000, REFERENCEREGISTERED_Reserved2)
 662 |             continue
 663 | 
 664 |         if check == 0x000E:
 665 |             # REFERENCEPROJECT
 666 |             REFERENCEPROJECT_Id = check
 667 |             REFERENCEPROJECT_Size = struct.unpack("<L", dir_stream.read(4))[0]
 668 |             REFERENCEPROJECT_SizeOfLibidAbsolute = struct.unpack("<L", dir_stream.read(4))[0]
 669 |             REFERENCEPROJECT_LibidAbsolute = dir_stream.read(REFERENCEPROJECT_SizeOfLibidAbsolute)
 670 |             REFERENCEPROJECT_SizeOfLibidRelative = struct.unpack("<L", dir_stream.read(4))[0]
 671 |             REFERENCEPROJECT_LibidRelative = dir_stream.read(REFERENCEPROJECT_SizeOfLibidRelative)
 672 |             REFERENCEPROJECT_MajorVersion = struct.unpack("<L", dir_stream.read(4))[0]
 673 |             REFERENCEPROJECT_MinorVersion = struct.unpack("<H", dir_stream.read(2))[0]
 674 |             continue
 675 | 
 676 |         logging.error('invalid or unknown check Id {0:04X}'.format(check))
 677 |         sys.exit(0)
 678 | 
 679 |     PROJECTMODULES_Id = check #struct.unpack("<H", dir_stream.read(2))[0]
 680 |     check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id)
 681 |     PROJECTMODULES_Size = struct.unpack("<L", dir_stream.read(4))[0]
 682 |     check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size)
 683 |     PROJECTMODULES_Count = struct.unpack("<H", dir_stream.read(2))[0]
 684 |     PROJECTMODULES_ProjectCookieRecord_Id = struct.unpack("<H", dir_stream.read(2))[0]
 685 |     check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, PROJECTMODULES_ProjectCookieRecord_Id)
 686 |     PROJECTMODULES_ProjectCookieRecord_Size = struct.unpack("<L", dir_stream.read(4))[0]
 687 |     check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, PROJECTMODULES_ProjectCookieRecord_Size)
 688 |     PROJECTMODULES_ProjectCookieRecord_Cookie = struct.unpack("<H", dir_stream.read(2))[0]
 689 | 
 690 |     logging.debug("parsing {0} modules".format(PROJECTMODULES_Count))
 691 |     for x in xrange(0, PROJECTMODULES_Count):
 692 |         MODULENAME_Id = struct.unpack("<H", dir_stream.read(2))[0]
 693 |         check_value('MODULENAME_Id', 0x0019, MODULENAME_Id)
 694 |         MODULENAME_SizeOfModuleName = struct.unpack("<L", dir_stream.read(4))[0]
 695 |         MODULENAME_ModuleName = dir_stream.read(MODULENAME_SizeOfModuleName)
 696 |         # account for optional sections
 697 |         section_id = struct.unpack("<H", dir_stream.read(2))[0]
 698 |         if section_id == 0x0047:
 699 |             MODULENAMEUNICODE_Id = section_id
 700 |             MODULENAMEUNICODE_SizeOfModuleNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 701 |             MODULENAMEUNICODE_ModuleNameUnicode = dir_stream.read(MODULENAMEUNICODE_SizeOfModuleNameUnicode)
 702 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 703 |         if section_id == 0x001A:
 704 |             MODULESTREAMNAME_id = section_id
 705 |             MODULESTREAMNAME_SizeOfStreamName = struct.unpack("<L", dir_stream.read(4))[0]
 706 |             MODULESTREAMNAME_StreamName = dir_stream.read(MODULESTREAMNAME_SizeOfStreamName)
 707 |             MODULESTREAMNAME_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 708 |             check_value('MODULESTREAMNAME_Reserved', 0x0032, MODULESTREAMNAME_Reserved)
 709 |             MODULESTREAMNAME_SizeOfStreamNameUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 710 |             MODULESTREAMNAME_StreamNameUnicode = dir_stream.read(MODULESTREAMNAME_SizeOfStreamNameUnicode)
 711 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 712 |         if section_id == 0x001C:
 713 |             MODULEDOCSTRING_Id = section_id
 714 |             check_value('MODULEDOCSTRING_Id', 0x001C, MODULEDOCSTRING_Id)
 715 |             MODULEDOCSTRING_SizeOfDocString = struct.unpack("<L", dir_stream.read(4))[0]
 716 |             MODULEDOCSTRING_DocString = dir_stream.read(MODULEDOCSTRING_SizeOfDocString)
 717 |             MODULEDOCSTRING_Reserved = struct.unpack("<H", dir_stream.read(2))[0]
 718 |             check_value('MODULEDOCSTRING_Reserved', 0x0048, MODULEDOCSTRING_Reserved)
 719 |             MODULEDOCSTRING_SizeOfDocStringUnicode = struct.unpack("<L", dir_stream.read(4))[0]
 720 |             MODULEDOCSTRING_DocStringUnicode = dir_stream.read(MODULEDOCSTRING_SizeOfDocStringUnicode)
 721 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 722 |         if section_id == 0x0031:
 723 |             MODULEOFFSET_Id = section_id
 724 |             check_value('MODULEOFFSET_Id', 0x0031, MODULEOFFSET_Id)
 725 |             MODULEOFFSET_Size = struct.unpack("<L", dir_stream.read(4))[0]
 726 |             check_value('MODULEOFFSET_Size', 0x0004, MODULEOFFSET_Size)
 727 |             MODULEOFFSET_TextOffset = struct.unpack("<L", dir_stream.read(4))[0]
 728 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 729 |         if section_id == 0x001E:
 730 |             MODULEHELPCONTEXT_Id = section_id
 731 |             check_value('MODULEHELPCONTEXT_Id', 0x001E, MODULEHELPCONTEXT_Id)
 732 |             MODULEHELPCONTEXT_Size = struct.unpack("<L", dir_stream.read(4))[0]
 733 |             check_value('MODULEHELPCONTEXT_Size', 0x0004, MODULEHELPCONTEXT_Size)
 734 |             MODULEHELPCONTEXT_HelpContext = struct.unpack("<L", dir_stream.read(4))[0]
 735 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 736 |         if section_id == 0x002C:
 737 |             MODULECOOKIE_Id = section_id
 738 |             check_value('MODULECOOKIE_Id', 0x002C, MODULECOOKIE_Id)
 739 |             MODULECOOKIE_Size = struct.unpack("<L", dir_stream.read(4))[0]
 740 |             check_value('MODULECOOKIE_Size', 0x0002, MODULECOOKIE_Size)
 741 |             MODULECOOKIE_Cookie = struct.unpack("<H", dir_stream.read(2))[0]
 742 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 743 |         if section_id == 0x0021 or section_id == 0x0022:
 744 |             MODULETYPE_Id = section_id
 745 |             MODULETYPE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
 746 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 747 |         if section_id == 0x0025:
 748 |             MODULEREADONLY_Id = section_id
 749 |             check_value('MODULEREADONLY_Id', 0x0025, MODULEREADONLY_Id)
 750 |             MODULEREADONLY_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
 751 |             check_value('MODULEREADONLY_Reserved', 0x0000, MODULEREADONLY_Reserved)
 752 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 753 |         if section_id == 0x0028:
 754 |             MODULEPRIVATE_Id = section_id
 755 |             check_value('MODULEPRIVATE_Id', 0x0028, MODULEPRIVATE_Id)
 756 |             MODULEPRIVATE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
 757 |             check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved)
 758 |             section_id = struct.unpack("<H", dir_stream.read(2))[0]
 759 |         if section_id == 0x002B: # TERMINATOR
 760 |             MODULE_Reserved = struct.unpack("<L", dir_stream.read(4))[0]
 761 |             check_value('MODULE_Reserved', 0x0000, MODULE_Reserved)
 762 |             section_id = None
 763 |         if section_id != None:
 764 |             logging.warning('unknown or invalid module section id {0:04X}'.format(section_id))
 765 | 
 766 |         logging.debug('Project CodePage = %d' % PROJECTCODEPAGE_CodePage)
 767 |         vba_codec = 'cp%d' % PROJECTCODEPAGE_CodePage
 768 |         logging.debug("ModuleName = {0}".format(MODULENAME_ModuleName))
 769 |         logging.debug("StreamName = {0}".format(repr(MODULESTREAMNAME_StreamName)))
 770 |         streamname_unicode = MODULESTREAMNAME_StreamName.decode(vba_codec)
 771 |         logging.debug("StreamName.decode('%s') = %s" % (vba_codec, repr(streamname_unicode)))
 772 |         logging.debug("StreamNameUnicode = {0}".format(repr(MODULESTREAMNAME_StreamNameUnicode)))
 773 |         logging.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset))
 774 | 
 775 |         code_path = vba_root + u'VBA/' + streamname_unicode
 776 |         #TODO: test if stream exists
 777 |         logging.debug('opening VBA code stream %s' % repr(code_path))
 778 |         code_data = ole.openstream(code_path).read()
 779 |         logging.debug("length of code_data = {0}".format(len(code_data)))
 780 |         logging.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset))
 781 |         code_data = code_data[MODULEOFFSET_TextOffset:]
 782 |         if len(code_data) > 0:
 783 |             code_data = decompress_stream(code_data)
 784 |             # case-insensitive search in the code_modules dict to find the file extension:
 785 |             filext = code_modules.get(MODULENAME_ModuleName.lower(), 'bin')
 786 |             filename = '{0}.{1}'.format(MODULENAME_ModuleName, filext)
 787 |             #TODO: also yield the codepage so that callers can decode it properly
 788 |             yield (code_path, filename, code_data)
 789 |             # print '-'*79
 790 |             # print filename
 791 |             # print ''
 792 |             # print code_data
 793 |             # print ''
 794 |             logging.debug('extracted file {0}'.format(filename))
 795 |         else:
 796 |             logging.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName))
 797 |     return
 798 | 
 799 | 
 800 | def filter_vba(vba_code):
 801 |     """
 802 |     Filter VBA source code to remove the first lines starting with "Attribute VB_",
 803 |     which are automatically added by MS Office and not displayed in the VBA Editor.
 804 |     This should only be used when displaying source code for human analysis.
 805 | 
 806 |     Note: lines are not filtered if they contain a colon, because it could be
 807 |     used to hide malicious instructions.
 808 | 
 809 |     :param vba_code: str, VBA source code
 810 |     :return: str, filtered VBA source code
 811 |     """
 812 |     vba_lines = vba_code.splitlines()
 813 |     start = 0
 814 |     for line in vba_lines:
 815 |         if line.startswith("Attribute VB_") and not ':' in line:
 816 |             start += 1
 817 |         else:
 818 |             break
 819 |     #TODO: also remove empty lines?
 820 |     vba = '\n'.join(vba_lines[start:])
 821 |     return vba
 822 | 
 823 | 
 824 | def detect_autoexec(vba_code):
 825 |     """
 826 |     Detect if the VBA code contains keywords corresponding to macros running
 827 |     automatically when triggered by specific actions (e.g. when a document is
 828 |     opened or closed).
 829 | 
 830 |     :param vba_code: str, VBA source code
 831 |     :return: list of str tuples (keyword, description)
 832 |     """
 833 |     #TODO: merge code with detect_suspicious
 834 |     # case-insensitive search
 835 |     #vba_code = vba_code.lower()
 836 |     results = []
 837 |     for description, keywords in AUTOEXEC_KEYWORDS.items():
 838 |         for keyword in keywords:
 839 |             #TODO: if keyword is already a compiled regex, use it as-is
 840 |             # search using regex to detect word boundaries:
 841 |             if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
 842 |             #if keyword.lower() in vba_code:
 843 |                 results.append((keyword, description))
 844 |     return results
 845 | 
 846 | 
 847 | def detect_suspicious(vba_code):
 848 |     """
 849 |     Detect if the VBA code contains suspicious keywords corresponding to
 850 |     potential malware behaviour.
 851 | 
 852 |     :param vba_code: str, VBA source code
 853 |     :return: list of str tuples (keyword, description)
 854 |     """
 855 |     # case-insensitive search
 856 |     #vba_code = vba_code.lower()
 857 |     results = []
 858 |     for description, keywords in SUSPICIOUS_KEYWORDS.items():
 859 |         for keyword in keywords:
 860 |             # search using regex to detect word boundaries:
 861 |             if re.search(r'(?i)\b'+keyword+r'\b', vba_code):
 862 |             #if keyword.lower() in vba_code:
 863 |                 results.append((keyword, description))
 864 |     return results
 865 | 
 866 | 
 867 | def detect_patterns(vba_code):
 868 |     """
 869 |     Detect if the VBA code contains specific patterns such as IP addresses,
 870 |     URLs, e-mail addresses, executable file names, etc.
 871 | 
 872 |     :param vba_code: str, VBA source code
 873 |     :return: list of str tuples (pattern type, value)
 874 |     """
 875 |     results = []
 876 |     found = set()
 877 |     for pattern_type, pattern_re in RE_PATTERNS:
 878 |         for match in pattern_re.finditer(vba_code):
 879 |             value = match.group()
 880 |             if value not in found:
 881 |                 results.append((pattern_type, value))
 882 |                 found.add(value)
 883 |     return results
 884 | 
 885 | 
 886 | def detect_hex_strings(vba_code):
 887 |     """
 888 |     Detect if the VBA code contains strings encoded in hexadecimal.
 889 | 
 890 |     :param vba_code: str, VBA source code
 891 |     :return: list of str tuples (encoded string, decoded string)
 892 |     """
 893 |     results = []
 894 |     found = set()
 895 |     for match in re_hex_string.finditer(vba_code):
 896 |         value = match.group()
 897 |         if value not in found:
 898 |             decoded = binascii.unhexlify(value)
 899 |             results.append((value, decoded))
 900 |             found.add(value)
 901 |     return results
 902 | 
 903 | def detect_base64_strings(vba_code):
 904 |     """
 905 |     Detect if the VBA code contains strings encoded in base64.
 906 | 
 907 |     :param vba_code: str, VBA source code
 908 |     :return: list of str tuples (encoded string, decoded string)
 909 |     """
 910 |     results = []
 911 |     found = set()
 912 |     for match in re_base64_string.finditer(vba_code):
 913 |         value = match.group()
 914 |         if value not in found:
 915 |             decoded = base64.b64decode(value)
 916 |             results.append((value, decoded))
 917 |             found.add(value)
 918 |     return results
 919 | 
 920 | def scan_vba(vba_code):
 921 |     """
 922 |     Analyze the provided VBA code to detect suspicious keywords,
 923 |     auto-executable macros, IOC patterns, obfuscation patterns
 924 |     such as hex-encoded strings.
 925 | 
 926 |     :param vba_code: str, VBA source code to be analyzed
 927 |     :return: list of tuples (type, keyword, description)
 928 |     (type = 'AutoExec', 'Suspicious', 'IOC' or 'Hex String')
 929 |     """
 930 |     # First, detect and extract hex-encoded strings:
 931 |     hex_strings = detect_hex_strings(vba_code)
 932 |     base64_strings = detect_base64_strings(vba_code)
 933 |     # detect if the code contains StrReverse:
 934 |     if 'strreverse' in vba_code.lower(): strreverse = True
 935 |     else: strreverse = False
 936 |     # Then append the decoded strings to the VBA code, to detect obfuscated IOCs and keywords:
 937 |     for encoded, decoded in hex_strings:
 938 |         vba_code += '\n'+decoded
 939 |         # if the code contains "StrReverse", also append the hex strings in reverse order:
 940 |         if strreverse:
 941 |             # StrReverse after hex decoding:
 942 |             vba_code += '\n'+decoded[::-1]
 943 |             # StrReverse before hex decoding:
 944 |             vba_code += '\n'+binascii.unhexlify(encoded[::-1])
 945 |             #example: https://malwr.com/analysis/NmFlMGI4YTY1YzYyNDkwNTg1ZTBiZmY5OGI3YjlhYzU/
 946 |     #TODO: also append the full code reversed if StrReverse? (risk of false positives?)
 947 |     autoexec_keywords = detect_autoexec(vba_code)
 948 |     suspicious_keywords = detect_suspicious(vba_code)
 949 |     # If hex-encoded strings were discovered, add an item to suspicious keywords:
 950 |     if hex_strings:
 951 |         suspicious_keywords.append(('Hex Strings', 'Hex-encoded strings were detected, may be used to obfuscate strings (option --hex to see all)'))
 952 |     if base64_strings:
 953 |         suspicious_keywords.append(('Base64 Strings', 'Base64-encoded strings were detected, may be used to obfuscate strings'))
 954 |     patterns = detect_patterns(vba_code)
 955 |     results = []
 956 |     for keyword, description in autoexec_keywords:
 957 |         results.append(('AutoExec', keyword, description))
 958 |     for keyword, description in suspicious_keywords:
 959 |         results.append(('Suspicious', keyword, description))
 960 |     for pattern_type, value in patterns:
 961 |         results.append(('IOC', value, pattern_type))
 962 |     # Only if option --hex:
 963 |     # for encoded, decoded in hex_strings:
 964 |     #     results.append(('Hex String', repr(decoded), encoded))
 965 |     for encoded, decoded in base64_strings:
 966 |          results.append(('Base64 String', repr(decoded), encoded))
 967 |     return results
 968 | 
 969 | 
 970 | #=== CLASSES =================================================================
 971 | 
 972 | class VBA_Parser(object):
 973 |     """
 974 |     Class to parse MS Office files, to detect VBA macros and extract VBA source code
 975 |     Supported file formats:
 976 |     - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm)
 977 |     - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb)
 978 |     - PowerPoint 2007+ (.pptm, .ppsm)
 979 |     """
 980 | 
 981 |     def __init__(self, filename, data=None):
 982 |         """
 983 |         Constructor for VBA_Parser
 984 | 
 985 |         :param _file: path of file to parse, file-like object or file content
 986 |         :param filename: actual filename if _file is a  file-like object or file content
 987 |         in a bytes string
 988 |         """
 989 |         #TODO: filename should be mandatory, optional data is a string or file-like object
 990 |         #TODO: also support olefile and zipfile as input
 991 |         if data is None:
 992 |             # open file from disk:
 993 |             _file = filename
 994 |         else:
 995 |             # file already read in memory, make it a file-like object for zipfile:
 996 |             _file = cStringIO.StringIO(data)
 997 |         #self.file = _file
 998 |         self.ole_file = None
 999 |         self.ole_subfiles = []
1000 |         self.filename = filename
1001 |         self.type = None
1002 |         self.vba_projects = None
1003 |         # if filename is None:
1004 |         #     if isinstance(_file, basestring):
1005 |         #         if len(_file) < olefile.MINIMAL_OLEFILE_SIZE:
1006 |         #             self.filename = _file
1007 |         #         else:
1008 |         #             self.filename = '<file in bytes string>'
1009 |         #     else:
1010 |         #         self.filename = '<file-like object>'
1011 |         if olefile.isOleFile(_file):
1012 |             # This looks like an OLE file
1013 |             logging.info('Parsing OLE file %s' % self.filename)
1014 |             # Open and parse the OLE file, using unicode for path names:
1015 |             self.ole_file = olefile.OleFileIO(_file, path_encoding=None)
1016 |             self.type = TYPE_OLE
1017 |             #TODO: raise TypeError if this is a Powerpoint 97 file, since VBA macros cannot be detected yet
1018 |         elif zipfile.is_zipfile(_file):
1019 |             # This looks like a zip file, need to look for vbaProject.bin inside
1020 |             # It can be any OLE file inside the archive
1021 |             #...because vbaProject.bin can be renamed:
1022 |             # see http://www.decalage.info/files/JCV07_Lagadec_OpenDocument_OpenXML_v4_decalage.pdf#page=18
1023 |             logging.info('Opening ZIP/OpenXML file %s' % self.filename)
1024 |             self.type = TYPE_OpenXML
1025 |             z = zipfile.ZipFile(_file)
1026 |             #TODO: check if this is actually an OpenXML file
1027 |             # check each file within the zip if it is an OLE file, by reading its magic:
1028 |             for subfile in z.namelist():
1029 |                 magic = z.open(subfile).read(len(olefile.MAGIC))
1030 |                 if magic == olefile.MAGIC:
1031 |                     logging.debug('Opening OLE file %s within zip' % subfile)
1032 |                     ole_data = z.open(subfile).read()
1033 |                     try:
1034 |                         self.ole_subfiles.append(VBA_Parser(filename=subfile, data=ole_data))
1035 |                     except:
1036 |                         logging.debug('%s is not a valid OLE file' % subfile)
1037 |                         continue
1038 |             z.close()
1039 |         else:
1040 |             msg = '%s is not an OLE nor an OpenXML file, cannot extract VBA Macros.' % self.filename
1041 |             logging.error(msg)
1042 |             raise TypeError(msg)
1043 | 
1044 |     def find_vba_projects (self):
1045 |         """
1046 |         Finds all the VBA projects stored in an OLE file.
1047 | 
1048 |         Return None if the file is not OLE but OpenXML.
1049 |         Return a list of tuples (vba_root, project_path, dir_path) for each VBA project.
1050 |         vba_root is the path of the root OLE storage containing the VBA project,
1051 |         including a trailing slash unless it is the root of the OLE file.
1052 |         project_path is the path of the OLE stream named "PROJECT" within the VBA project.
1053 |         dir_path is the path of the OLE stream named "VBA/dir" within the VBA project.
1054 | 
1055 |         If this function returns an empty list for one of the supported formats
1056 |         (i.e. Word, Excel, Powerpoint except Powerpoint 97-2003), then the
1057 |         file does not contain VBA macros.
1058 | 
1059 |         :return: None if OpenXML file, list of tuples (vba_root, project_path, dir_path)
1060 |         for each VBA project found if OLE file
1061 |         """
1062 |         # if the file is not OLE but OpenXML, return None:
1063 |         if self.ole_file is None:
1064 |             return None
1065 | 
1066 |         # if this method has already been called, return previous result:
1067 |         if self.vba_projects is not None:
1068 |             return self.vba_projects
1069 | 
1070 |         # Find the VBA project root (different in MS Word, Excel, etc):
1071 |         # - Word 97-2003: Macros
1072 |         # - Excel 97-2003: _VBA_PROJECT_CUR
1073 |         # - PowerPoint 97-2003: not supported yet (different file structure)
1074 |         # - Word 2007+: word/vbaProject.bin in zip archive, then the VBA project is the root of vbaProject.bin.
1075 |         # - Excel 2007+: xl/vbaProject.bin in zip archive, then same as Word
1076 |         # - PowerPoint 2007+: ppt/vbaProject.bin in zip archive, then same as Word
1077 |         # - Visio 2007: not supported yet (different file structure)
1078 | 
1079 |         # According to MS-OVBA section 2.2.1:
1080 |         # - the VBA project root storage MUST contain a VBA storage and a PROJECT stream
1081 |         # - The root/VBA storage MUST contain a _VBA_PROJECT stream and a dir stream
1082 |         # - all names are case-insensitive
1083 | 
1084 |         # start with an empty list:
1085 |         self.vba_projects = []
1086 |         # Look for any storage containing those storage/streams:
1087 |         ole = self.ole_file
1088 |         for storage in ole.listdir(streams=False, storages=True):
1089 |             # Look for a storage ending with "VBA":
1090 |             if storage[-1].upper() == 'VBA':
1091 |                 logging.debug('Found VBA storage: %s' % ('/'.join(storage)))
1092 |                 vba_root = '/'.join(storage[:-1])
1093 |                 # Add a trailing slash to vba_root, unless it is the root of the OLE file:
1094 |                 # (used later to append all the child streams/storages)
1095 |                 if vba_root != '':
1096 |                     vba_root += '/'
1097 |                 logging.debug('Checking vba_root="%s"' % vba_root)
1098 | 
1099 |                 def check_vba_stream(ole, vba_root, stream_path):
1100 |                     full_path = vba_root + stream_path
1101 |                     if ole.exists(full_path) and ole.get_type(full_path) == olefile.STGTY_STREAM:
1102 |                         logging.debug('Found %s stream: %s' % (stream_path, full_path))
1103 |                         return full_path
1104 |                     else:
1105 |                         logging.debug('Missing %s stream, this is not a valid VBA project structure' % stream_path)
1106 |                         return False
1107 | 
1108 |                 # Check if the VBA root storage also contains a PROJECT stream:
1109 |                 project_path = check_vba_stream(ole, vba_root, 'PROJECT')
1110 |                 if not project_path: continue
1111 |                 # Check if the VBA root storage also contains a VBA/_VBA_PROJECT stream:
1112 |                 vba_project_path = check_vba_stream(ole, vba_root, 'VBA/_VBA_PROJECT')
1113 |                 if not vba_project_path: continue
1114 |                 # Check if the VBA root storage also contains a VBA/dir stream:
1115 |                 dir_path = check_vba_stream(ole, vba_root, 'VBA/dir')
1116 |                 if not dir_path: continue
1117 |                 # Now we are pretty sure it is a VBA project structure
1118 |                 logging.debug('VBA root storage: "%s"' % vba_root)
1119 |                 # append the results to the list as a tuple for later use:
1120 |                 self.vba_projects.append((vba_root, project_path, dir_path))
1121 |         return self.vba_projects
1122 | 
1123 |     def detect_vba_macros(self):
1124 |         """
1125 |         Detect the potential presence of VBA macros in the file, by checking
1126 |         if it contains VBA projects. Both OLE and OpenXML files are supported.
1127 | 
1128 |         Important: for now, results are accurate only for Word, Excel and PowerPoint
1129 |         EXCEPT Powerpoint 97-2003, which has a different structure for VBA.
1130 | 
1131 |         Note: this method does NOT attempt to check the actual presence or validity
1132 |         of VBA macro source code, so there might be false positives.
1133 |         It may also detect VBA macros in files embedded within the main file,
1134 |         for example an Excel workbook with macros embedded into a Word
1135 |         document without macros may be detected, without distinction.
1136 | 
1137 |         :return: bool, True if at least one VBA project has been found, False otherwise
1138 |         """
1139 |         #TODO: return None or raise exception if format not supported like PPT 97-2003
1140 |         #TODO: return the number of VBA projects found instead of True/False?
1141 |         # if OpenXML, check all the OLE subfiles:
1142 |         if self.ole_file is None:
1143 |             for ole_subfile in self.ole_subfiles:
1144 |                 if ole_subfile.detect_vba_macros():
1145 |                     return True
1146 |             return False
1147 |         # otherwise it's an OLE file, find VBA projects:
1148 |         vba_projects = self.find_vba_projects()
1149 |         if len(vba_projects) == 0:
1150 |             return False
1151 |         else:
1152 |             return True
1153 | 
1154 | 
1155 |     def extract_macros (self):
1156 |         """
1157 |         Extract and decompress source code for each VBA macro found in the file
1158 | 
1159 |         Iterator: yields (filename, stream_path, vba_filename, vba_code) for each VBA macro found
1160 |         If the file is OLE, filename is the path of the file.
1161 |         If the file is OpenXML, filename is the path of the OLE subfile containing VBA macros
1162 |         within the zip archive, e.g. word/vbaProject.bin.
1163 |         """
1164 |         if self.ole_file is None:
1165 |             for ole_subfile in self.ole_subfiles:
1166 |                 for results in ole_subfile.extract_macros():
1167 |                     yield results
1168 |         else:
1169 |             self.find_vba_projects()
1170 |             for vba_root, project_path, dir_path in self.vba_projects:
1171 |                 # extract all VBA macros from that VBA root storage:
1172 |                 for stream_path, vba_filename, vba_code in _extract_vba(self.ole_file, vba_root, project_path, dir_path):
1173 |                     yield (self.filename, stream_path, vba_filename, vba_code)
1174 | 
1175 | 
1176 |     def close(self):
1177 |         """
1178 |         Close all the open files. This method must be called after usage, if
1179 |         the application is opening many files.
1180 |         """
1181 |         if self.ole_file is None:
1182 |             for ole_subfile in self.ole_subfiles:
1183 |                 ole_subfile.close()
1184 |         else:
1185 |             self.ole_file.close()
1186 | 
1187 | 
1188 | def print_analysis(vba_code):
1189 |     """
1190 |     Analyze the provided VBA code, and print the results in a table
1191 | 
1192 |     :param vba_code: str, VBA source code to be analyzed
1193 |     :return: None
1194 |     """
1195 |     results = scan_vba(vba_code)
1196 |     if results:
1197 |         t = prettytable.PrettyTable(('Type', 'Keyword', 'Description'))
1198 |         t.align = 'l'
1199 |         t.max_width['Type'] = 10
1200 |         t.max_width['Keyword'] = 20
1201 |         t.max_width['Description'] = 39
1202 |         for kw_type, keyword, description in results:
1203 |             t.add_row((kw_type, keyword, description))
1204 |         print t
1205 |     else:
1206 |         print 'No suspicious keyword or IOC found.'
1207 | 
1208 | 
1209 | 
1210 | def process_file (container, filename, data):
1211 |     """
1212 |     Process a single file
1213 | 
1214 |     :param container: str, path and filename of container if the file is within
1215 |     a zip archive, None otherwise.
1216 |     :param filename: str, path and filename of file on disk, or within the container.
1217 |     :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
1218 |     """
1219 |     #TODO: replace print by writing to a provided output file (sys.stdout by default)
1220 |     if container:
1221 |         display_filename = '%s in %s' % (filename, container)
1222 |     else:
1223 |         display_filename = filename
1224 |     print '='*79
1225 |     print 'FILE:', display_filename
1226 |     try:
1227 |         #TODO: handle olefile errors, when an OLE file is malformed
1228 |         vba = VBA_Parser(filename, data)
1229 |         print 'Type:', vba.type
1230 |         if vba.detect_vba_macros():
1231 |             #print 'Contains VBA Macros:'
1232 |             for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1233 |                 # hide attribute lines:
1234 |                 #TODO: option to disable attribute filtering
1235 |                 vba_code = filter_vba(vba_code)
1236 |                 print '-'*79
1237 |                 print 'VBA MACRO %s ' % vba_filename
1238 |                 print 'in file: %s - OLE stream: %s' % (subfilename, repr(stream_path))
1239 |                 print '- '*39
1240 |                 # detect empty macros:
1241 |                 if vba_code.strip() == '':
1242 |                     print '(empty macro)'
1243 |                 else:
1244 |                     print vba_code
1245 |                     print '- '*39
1246 |                     print 'ANALYSIS:'
1247 |                     print_analysis(vba_code)
1248 |         else:
1249 |             print 'No VBA macros found.'
1250 |     except: #TypeError:
1251 |         #raise
1252 |         #TODO: print more info if debug mode
1253 |         print sys.exc_value
1254 |     print ''
1255 | 
1256 | 
1257 | def process_file_triage (container, filename, data):
1258 |     """
1259 |     Process a single file
1260 | 
1261 |     :param container: str, path and filename of container if the file is within
1262 |     a zip archive, None otherwise.
1263 |     :param filename: str, path and filename of file on disk, or within the container.
1264 |     :param data: bytes, content of the file if it is in a container, None if it is a file on disk.
1265 |     """
1266 |     #TODO: replace print by writing to a provided output file (sys.stdout by default)
1267 |     nb_macros = 0
1268 |     nb_autoexec = 0
1269 |     nb_suspicious = 0
1270 |     nb_iocs = 0
1271 |     nb_hexstrings = 0
1272 |     # ftype = 'Other'
1273 |     message = ''
1274 |     try:
1275 |         #TODO: handle olefile errors, when an OLE file is malformed
1276 |         vba = VBA_Parser(filename, data)
1277 |         if vba.detect_vba_macros():
1278 |             for (subfilename, stream_path, vba_filename, vba_code) in vba.extract_macros():
1279 |                 nb_macros += 1
1280 |                 if vba_code.strip() != '':
1281 |                     #TODO: same changes as scan_vba, or modify scan_vba to return these counts
1282 |                     nb_autoexec += len(detect_autoexec(vba_code))
1283 |                     nb_suspicious += len(detect_suspicious(vba_code))
1284 |                     nb_iocs += len(detect_patterns(vba_code))
1285 |                     nb_hexstrings += len(detect_hex_strings(vba_code))
1286 |         if vba.type == TYPE_OLE:
1287 |             flags = 'O'
1288 |         else:
1289 |             flags = 'X'
1290 |         macros = autoexec = suspicious = iocs = hexstrings = '-'
1291 |         if nb_macros: macros = 'M'
1292 |         if nb_autoexec: autoexec = 'A'
1293 |         if nb_suspicious: suspicious = 'S'
1294 |         if nb_iocs: iocs = 'I'
1295 |         if nb_hexstrings: hexstrings = 'H'
1296 |         flags += '%s%s%s%s%s' % (macros, autoexec, suspicious, iocs, hexstrings)
1297 | 
1298 |         # macros = autoexec = suspicious = iocs = hexstrings = 'no'
1299 |         # if nb_macros: macros = 'YES:%d' % nb_macros
1300 |         # if nb_autoexec: autoexec = 'YES:%d' % nb_autoexec
1301 |         # if nb_suspicious: suspicious = 'YES:%d' % nb_suspicious
1302 |         # if nb_iocs: iocs = 'YES:%d' % nb_iocs
1303 |         # if nb_hexstrings: hexstrings = 'YES:%d' % nb_hexstrings
1304 |         # # 2nd line = info
1305 |         # print '%-8s %-7s %-7s %-7s %-7s %-7s' % (vba.type, macros, autoexec, suspicious, iocs, hexstrings)
1306 |     except TypeError:
1307 |         # file type not OLE nor OpenXML
1308 |         flags = '?'
1309 |         message = 'File format not supported'
1310 |     except:
1311 |         # another error occurred
1312 |         #raise
1313 |         #TODO: print more info if debug mode
1314 |         #TODO: distinguish real errors from incorrect file types
1315 |         flags = '!ERROR'
1316 |         message = sys.exc_value
1317 |     line = '%-6s %s' % (flags, filename)
1318 |     if message:
1319 |         line += ' - %s' % message
1320 |     print line
1321 | 
1322 |     # t = prettytable.PrettyTable(('filename', 'type', 'macros', 'autoexec', 'suspicious', 'ioc', 'hexstrings'),
1323 |     #     header=False, border=False)
1324 |     # t.align = 'l'
1325 |     # t.max_width['filename'] = 30
1326 |     # t.max_width['type'] = 10
1327 |     # t.max_width['macros'] = 6
1328 |     # t.max_width['autoexec'] = 6
1329 |     # t.max_width['suspicious'] = 6
1330 |     # t.max_width['ioc'] = 6
1331 |     # t.max_width['hexstrings'] = 6
1332 |     # t.add_row((filename, ftype, macros, autoexec, suspicious, iocs, hexstrings))
1333 |     # print t
1334 | 
1335 | def main_triage_quick():
1336 |     pass
1337 | 
1338 | #=== MAIN =====================================================================
1339 | 
1340 | def main():
1341 |     """
1342 |     Main function, called when olevba is run from the command line
1343 |     """
1344 |     usage = 'usage: %prog [options] <filename> [filename2 ...]'
1345 |     parser = optparse.OptionParser(usage=usage)
1346 |     # parser.add_option('-o', '--outfile', dest='outfile',
1347 |     #     help='output file')
1348 |     # parser.add_option('-c', '--csv', dest='csv',
1349 |     #     help='export results to a CSV file')
1350 |     parser.add_option("-r", action="store_true", dest="recursive",
1351 |         help='find files recursively in subdirectories.')
1352 |     parser.add_option("-z", "--zip", dest='zip_password', type='str', default=None,
1353 |         help='if the file is a zip archive, open first file from it, using the provided password (requires Python 2.6+)')
1354 |     parser.add_option("-f", "--zipfname", dest='zip_fname', type='str', default='*',
1355 |         help='if the file is a zip archive, file(s) to be opened within the zip. Wildcards * and ? are supported. (default:*)')
1356 |     parser.add_option("-t", action="store_true", dest="triage_mode",
1357 |         help='triage mode, display results as a summary table (default for multiple files)')
1358 |     parser.add_option("-d", action="store_true", dest="detailed_mode",
1359 |         help='detailed mode, display full results (default for single file)')
1360 |     parser.add_option("-i", "--input", dest='input', type='str', default=None,
1361 |         help='input file containing VBA source code to be analyzed (no parsing)')
1362 | 
1363 |     (options, args) = parser.parse_args()
1364 | 
1365 |     # Print help if no arguments are passed
1366 |     if len(args) == 0 and not options.input:
1367 |         print __doc__
1368 |         parser.print_help()
1369 |         sys.exit()
1370 | 
1371 |     logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING) #INFO)
1372 |     # For now, all logging is disabled:
1373 |     logging.disable(logging.CRITICAL)
1374 | 
1375 |     if options.input:
1376 |         # input file provided with VBA source code to be analyzed directly:
1377 |         print 'Analysis of VBA source code from %s:' % options.input
1378 |         vba_code = open(options.input).read()
1379 |         print_analysis(vba_code)
1380 |         sys.exit()
1381 | 
1382 |     # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('Type', 'Macros', 'AutoEx', 'Susp.', 'IOCs', 'HexStr')
1383 |     # print '%-8s %-7s %-7s %-7s %-7s %-7s' % ('-'*8, '-'*7, '-'*7, '-'*7, '-'*7, '-'*7)
1384 |     if not options.detailed_mode or options.triage_mode:
1385 |         print '%-6s %-72s' % ('Flags', 'Filename')
1386 |         print '%-6s %-72s' % ('-'*6, '-'*72)
1387 |     previous_container = None
1388 |     count = 0
1389 |     container = filename = data = None
1390 |     for container, filename, data in xglob.iter_files(args, recursive=options.recursive,
1391 |         zip_password=options.zip_password, zip_fname=options.zip_fname):
1392 |         # ignore directory names stored in zip files:
1393 |         if container and filename.endswith('/'):
1394 |             continue
1395 |         if options.detailed_mode and not options.triage_mode:
1396 |             # fully detailed output
1397 |             process_file(container, filename, data)
1398 |         else:
1399 |             # print container name when it changes:
1400 |             if container != previous_container:
1401 |                 if container is not None:
1402 |                     print '\nFiles in %s:' % container
1403 |                 previous_container = container
1404 |             # summarized output for triage:
1405 |             process_file_triage(container, filename, data)
1406 |         count += 1
1407 |     if not options.detailed_mode or options.triage_mode:
1408 |         print '\n(Flags: O=OLE, X=OpenXML, M=Macros, A=Auto-executable, S=Suspicious keywords, I=IOCs, H=Hex-encoded strings, ?=Unknown)\n'
1409 | 
1410 |     if count == 1 and not options.triage_mode and not options.detailed_mode:
1411 |         # if options -t and -d were not specified and it's a single file, print details:
1412 |         #TODO: avoid doing the analysis twice by storing results
1413 |         process_file(container, filename, data)
1414 | 
1415 | if __name__ == '__main__':
1416 |     main()
1417 | 
1418 | # This was coded while listening to "Dust" from I Love You But I've Chosen Darkness
1419 | 


--------------------------------------------------------------------------------
/plugin_base64.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __description__ = 'Base64 string decoder for oledump.py'
 4 | __author__ = 'James Habben'
 5 | __version__ = '0.0.1'
 6 | __date__ = '2015/01/30'
 7 | 
 8 | import re
 9 | import base64
10 | 
11 | def Decode (input) :
12 |     return base64.b64decode(input)
13 | 
14 | class cBase64Decoder(cPluginParent):
15 |     macroOnly = True
16 |     name = 'Base64 decoder'
17 | 
18 |     def __init__(self, name, stream, options):
19 |         self.streamname = name
20 |         self.stream = stream
21 |         self.options = options
22 |         self.ran = False
23 | 
24 |     def Analyze(self):
25 |         self.ran = True
26 | 
27 |         result = []
28 | 
29 |         oREString = re.compile(r'"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?"')
30 |         for foundString in oREString.findall(self.stream):
31 |             try:
32 |                 result.append(Decode(foundString))
33 |             except:
34 |                 pass
35 | 
36 |         return result
37 | 
38 | AddPlugin(cBase64Decoder)
39 | 


--------------------------------------------------------------------------------
/plugin_dridex.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | __description__ = 'Dridex string decoder for oledump.py'
 4 | __author__ = 'James Habben'
 5 | __version__ = '0.0.1'
 6 | __date__ = '2015/01/29'
 7 | 
 8 | import re
 9 | 
10 | def Decode (input) :
11 |     work = input[4:-4]
12 |     strKeyEnc = StripCharsWithZero(work[(len(work) / 2) - 2: (len(work) / 2)])
13 |     strKeySize = StripCharsWithZero(work[(len(work) / 2): (len(work) / 2) + 2])
14 |     nCharSize = strKeySize - strKeyEnc
15 |     work = work[:(len(work) / 2) - 2] + work[(len(work) / 2) + 2:]
16 |     strKeyEnc2 = StripChars(work[(len(work) / 2) - (nCharSize/2): (len(work) / 2) + (nCharSize/2)])
17 |     work = work[:(len(work) / 2) - (nCharSize/2)] + work[(len(work) / 2) + (nCharSize/2):]
18 |     work_split = [work[i:i+nCharSize] for i in range(0, len(work), nCharSize)]
19 |     decoded = ''
20 |     for group in work_split:
21 |         decoded += chr(StripChars(group)/strKeyEnc2)
22 |     return decoded
23 | 
24 | def StripChars (input) :
25 |     result = ''
26 |     for c in input :
27 |         if c.isdigit() :
28 |             result += c
29 |     return int(result)
30 | 
31 | def StripCharsWithZero (input) :
32 |     result = ''
33 |     for c in input :
34 |         if c.isdigit() :
35 |             result += c
36 |         else:
37 |             result += '0'
38 |     return int(result)
39 | 
40 | class cDridexDecoder(cPluginParent):
41 |     macroOnly = True
42 |     name = 'Dridex decoder'
43 | 
44 |     def __init__(self, name, stream, options):
45 |         self.streamname = name
46 |         self.stream = stream
47 |         self.options = options
48 |         self.ran = False
49 | 
50 |     def Analyze(self):
51 |         self.ran = True
52 | 
53 |         result = []
54 | 
55 |         oREString = re.compile(r'"([^"\n]+)"')
56 |         for foundString in oREString.findall(self.stream):
57 |             try:
58 |                 result.append(Decode(foundString))
59 |             except:
60 |                 pass
61 | 
62 |         return result
63 | 
64 | AddPlugin(cDridexDecoder)
65 | 


--------------------------------------------------------------------------------