├── README.md ├── plugin_officecrackros.py └── oledump.py /README.md: -------------------------------------------------------------------------------- 1 | # OfficeCrackros 2 | ##### Crack your macros like the math pros. 3 | This is a substitution cipher detector & decoder plugin for Microsoft Office documents. Essentially, this is Sigpedia for Macros. What I'm trying to say is I think you'll find this helpful if you can navigate all the trolling. **Feb 2017 Update:** This now supports PointsToInches character encoding (new FIN8 technique)! 4 | 5 | ## How To Use It 6 | ### 1. download teh scripts 7 | ### 2. run against suspect documents 8 | **Usage:** ```python oledump.py -p plugin_officecrackros ``` 9 | ### 3. let me know what you think 10 | * Please understand that, like all good hacked together tools, I stopped as soon as it worked - with much room for improvement 11 | * If you found the tool helpful, let me know [@itsreallynick](https://twitter.com/itsreallynick) 12 | 13 | ## Requirements 14 | * oledump 15 | * Didier Stevens, who is awesome, created this tool 16 | * oledump has been included in this repository 17 | * https://github.com/DidierStevens/DidierStevensSuite/blob/master/oledump.py 18 | * oledump requires olefile python library: ```easy_install olefile``` 19 | * Malicious Microsoft Office Document using encoded macros 20 | * Specifically: macros substitution noise used by FIN8; also seen for Nymaim ransomware delivery 21 | * Try it yourself: 22 | * https://www.virustotal.com/en/file/cba63594f28e69405b5075013624075ef1a538be40a7c2402f84d33f9f6c2927/analysis/ 23 | 24 | ## To Do List: 25 | * ~~CRUSH IT.~~ 26 | * Remove extraneous text in multiple line matches (improve regular expressions) 27 | * Add back in substitution / dropchar detection based on character histogramming -------------------------------------------------------------------------------- /plugin_officecrackros.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # OfficeCrackros (substitution cipher detector/decoder) plugin for oledump.py by Nick Carr, while at Mandiant 3 | # 2016/06/01 4 | # Updated 2017/02/13 for Points2Inches (tricky FIN8 macro decoder) 5 | 6 | import re 7 | import string 8 | 9 | def deobfuscate(text, key): 10 | # For every char in the key 11 | for i in range(len(key)): 12 | # Remove all instances of that char in the text 13 | while True: 14 | found = text.find(key[i]) 15 | # If we still have a char in the text, remove it 16 | if (found != -1): 17 | text = text[:found] + text[found+1:]; 18 | # Otherwise, break 19 | else: 20 | break; 21 | # Return unobfuscated string 22 | return text; 23 | 24 | class cOfficeCrackros(cPluginParent): 25 | macroOnly = True #observed Macros 26 | name = 'Sketchy cipher detected: OfficeCrackros plugin by Nick Carr' 27 | 28 | def __init__(self, name, stream, options): 29 | self.streamname = name 30 | self.stream = stream 31 | self.options = options 32 | self.ran = False 33 | 34 | def Analyze(self): 35 | result = [] 36 | 37 | if len(self.streamname) > 1: 38 | for nbi in re.findall(r'h.{0,3}t.{0,3}t.{0,3}p.{0,7}\:.{0,3}\/.{0,3}\/[^"][^)]*', self.stream): 39 | # can remove last ^ to get key, but this is more fun 40 | self.ran = True 41 | result.append('ENCODED NBI: ' + nbi) 42 | 43 | for obfuscated in re.findall(r'$\".*\"\,\s\".*\"$', self.stream): 44 | # Pattern to match: ("", "") 45 | # Substition cipher routine (dropchars) 46 | self.ran = True 47 | counter = 0 48 | for matches in re.split(', "', obfuscated): 49 | if (counter %2 == 0): 50 | text = re.sub('[()"]', '', matches) 51 | else: 52 | key = re.sub('[()"]', '', matches) 53 | result.append('DECODED STRING: ' + deobfuscate(text, key)) 54 | counter+=1 55 | 56 | for points2inches in re.findall(r'.*\=.*VBA\.Chr$PointsToInches\(.*$', self.stream): 57 | self.ran = True 58 | p2i_function = re.split(' =',points2inches)[0] # identifies function name for extraction 59 | 60 | for matchingline in re.findall(r'.*' + re.escape(p2i_function) + r'$.*,.*$', self.stream): # identifies full lines / context, excludes single item lists 61 | p2i_string = '' 62 | 63 | for encoded in re.findall(re.escape(p2i_function) + r'$[^$]*\)', matchingline): 64 | p2i_array = re.split(p2i_function, encoded)[1] 65 | for points in eval(p2i_array): 66 | p2i_string += chr(points/72) 67 | result.append(re.sub(re.escape(p2i_function) + r'$[^$]*\)', p2i_string.replace('\\','\\\\'), matchingline).replace('\\\\','\\')) 68 | return result 69 | 70 | AddPlugin(cOfficeCrackros) -------------------------------------------------------------------------------- /oledump.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __description__ = 'Analyze OLE files (Compound Binary Files)' 4 | __author__ = 'Didier Stevens' 5 | __version__ = '0.0.23' 6 | __date__ = '2015/12/22' 7 | 8 | """ 9 | 10 | Source code put in public domain by Didier Stevens, no Copyright 11 | https://DidierStevens.com 12 | Use at your own risk 13 | 14 | # http://www.wordarticles.com/Articles/Formats/StreamCompression.php 15 | 16 | History: 17 | 2014/08/21: start 18 | 2014/08/22: added ZIP support 19 | 2014/08/23: added stdin support 20 | 2014/08/25: added options extract and info 21 | 2014/08/26: bugfix pipe 22 | 2014/09/01: added * as selection option 23 | 2014/09/15: exception handling for import OleFileIO_PL 24 | 2014/11/12: added plugins 25 | 2014/11/15: continued plugins 26 | 2014/11/21: added pluginoptions 27 | 2014/12/14: 0.0.3: Added YARA support; added decoders 28 | 2014/12/19: 0.0.4: fixed bug when file was not OLE 29 | 2014/12/24: 0.0.5: fixed storage bug and added MacrosContainsOnlyAttributes 30 | 2014/12/25: 0.0.6: added support for ZIP containers with OLE files, like .docx 31 | 2014/12/26: added printing of filename OLE files inside ZIP 32 | 2014/12/31: suppressed printing of filename when selecting 33 | 2015/02/09: 0.0.7: added handling of .docx, ... inside ZIP file; Added option yarastrings 34 | 2015/02/10: 0.0.8: added YARACompile 35 | 2015/02/19: 0.0.9: added option -q 36 | 2015/02/23: 0.0.10: handle errors in compressed macros 37 | 2015/02/24: continue 38 | 2015/03/02: 0.0.11: added option -M 39 | 2015/03/05: added support for .xml files 40 | 2015/03/11: 0.0.12: added code pages identification 41 | 2015/03/13: Fixed oElement.firstChild.nodeValue UnicodeEncodeError bug 42 | 2015/03/19: 0.0.13: added option -c 43 | 2015/03/24: added man page 44 | 2015/03/25: added option --decompress 45 | 2015/03/26: changed --raw option 46 | 2015/04/10: 0.0.14: fixed bug SearchAndDecompressSub 47 | 2015/05/08: 0.0.15: added direct support for ActiveMime files 48 | 2015/05/13: 0.0.16: changed HeuristicDecompress with findall; renamed MacrosContainsOnlyAttributes to MacrosContainsOnlyAttributesOrOptions 49 | 2015/06/08: 0.0.17: Fix HexAsciiDump 50 | 2015/06/14: Added exit code 51 | 2015/07/26: 0.0.18: Added option --vbadecompresscorrupt 52 | 2015/09/12: added option --cut 53 | 2015/09/13: changed exit code to 2 when macros detected 54 | 2015/09/16: Rename old OleFileIO_PL to new olefile so that local copy of the module can be used 55 | 2015/09/17: added help for pip install olefile 56 | 2015/09/22: fixed os.path.isfile(filename) bug 57 | 2015/10/30: 0.0.19 added option -E and environment variable OLEDUMP_EXTRA; added MD5 to option -i 58 | 2015/11/08: 0.0.20 added man text for option -E; changed OptionsEnvironmentVariables so option takes precedence over environment variable 59 | 2015/11/09: continued -E 60 | 2015/11/12: 0.0.21 added dslsimulationdb 61 | 2015/11/17: added support for :-number in --cut option 62 | 2015/12/16: 0.0.22 some enhancements for --raw option 63 | 2015/12/22: 0.0.23 updated cut syntax 64 | 65 | Todo: 66 | """ 67 | 68 | import optparse 69 | import sys 70 | import math 71 | import os 72 | import zipfile 73 | import cStringIO 74 | import binascii 75 | import xml.dom.minidom 76 | import zlib 77 | import hashlib 78 | import textwrap 79 | import re 80 | import string 81 | 82 | try: 83 | import dslsimulationdb 84 | except: 85 | dslsimulationdb = None 86 | 87 | try: 88 | import yara 89 | except: 90 | pass 91 | 92 | try: 93 | import olefile 94 | except: 95 | print('This program requires module olefile.\nhttp://www.decalage.info/python/olefileio\n') 96 | if sys.version >= '2.7.9': 97 | print("You can use PIP to install olefile like this: pip install olefile\npip is located in Python's Scripts folder.\n") 98 | exit(-1) 99 | 100 | dumplinelength = 16 101 | MALWARE_PASSWORD = 'infected' 102 | OLEFILE_MAGIC = '\xD0\xCF\x11\xE0' 103 | ACTIVEMIME_MAGIC = 'ActiveMime' 104 | 105 | def PrintManual(): 106 | manual = ''' 107 | Manual: 108 | 109 | oledump is a tool to analyze OLE files (also known as Compound File Binary). Many file formats are in fact OLE files, like Microsoft Office files, MSI files, ... Even the new Microsoft Office Open XML format uses OLE files for VBA macros. 110 | oledump can analyze OLE files directly, or indirectly when then are contained in some form or other (like .docm, .xml, ...). 111 | 112 | oledump uses 2 modules that are not part of Python 2: olefile (http://www.decalage.info/python/olefileio) and YARA. 113 | You need to install the olefile module for this program to work. 114 | The YARA module is not mandatory if you don't use YARA rules. 115 | 116 | Running oledump with a spreadsheet (.xls binary format) lists al the streams found in the OLE file (an OLE file is a virtual filesystem with folders and files, known as streams), like this: 117 | 118 | C:\Demo>oledump.py Book1.xls 119 | 1: 4096 '\\x05DocumentSummaryInformation' 120 | 2: 4096 '\\x05SummaryInformation' 121 | 3: 4096 'Workbook' 122 | 123 | The first column is an index assigned to the stream by oledump. This index is used to select streams. The second column is the size of the stream (number of bytes inside the stream), and the last column is the name of the stream. 124 | 125 | To select a stream for analysis, use option -s with the index (number of the stream, or a for all streams), like this: 126 | C:\Demo>oledump.py -s 1 Book1.xls 127 | 00000000: FE FF 00 00 05 01 02 00 00 00 00 00 00 00 00 00 ................ 128 | 00000010: 00 00 00 00 00 00 00 00 01 00 00 00 02 D5 CD D5 .............i-i 129 | 00000020: 9C 2E 1B 10 93 97 08 00 2B 2C F9 AE 30 00 00 00 ........+,..0... 130 | 00000030: E4 00 00 00 09 00 00 00 01 00 00 00 50 00 00 00 ............P... 131 | 00000040: 0F 00 00 00 58 00 00 00 17 00 00 00 70 00 00 00 ....X.......p... 132 | ... 133 | 134 | When selecting a stream, its content is shown as an ASCII dump (this can also be done with option -a). 135 | Option -x produces a hexadecimal dump instead of an ASCII dump. 136 | 137 | C:\Demo>oledump.py -s 1 -x Book1.xls 138 | 00000000: FE FF 00 00 05 01 02 00 00 00 00 00 00 00 00 00 139 | 00000010: 00 00 00 00 00 00 00 00 01 00 00 00 02 D5 CD D5 140 | 00000020: 9C 2E 1B 10 93 97 08 00 2B 2C F9 AE 30 00 00 00 141 | 00000030: E4 00 00 00 09 00 00 00 01 00 00 00 50 00 00 00 142 | 00000040: 0F 00 00 00 58 00 00 00 17 00 00 00 70 00 00 00 143 | ... 144 | 145 | Option -d produces a raw dump of the content of the stream. This content can be redirected to a file, like this: 146 | C:\Demo>oledump.py -s 1 -d Book1.xls > content.bin 147 | 148 | or it can be piped into another command, like this: 149 | C:\Demo>oledump.py -s 1 -d Book1.xls | pdfid.py -f 150 | 151 | Option -C (--cut) allows for the partial selection of a stream. Use this option to "cut out" part of the stream. 152 | The --cut option takes an argument to specify which section of bytes to select from the stream. This argument is composed of 2 terms separated by a colon (:), like this: 153 | termA:termB 154 | termA and termB can be: 155 | - nothing (an empty string) 156 | - a positive decimal number; example: 10 157 | - an hexadecimal number (to be preceded by 0x); example: 0x10 158 | - a case sensitive string to search for (surrounded by square brackets and single quotes); example: ['MZ'] 159 | - an hexadecimal string to search for (surrounded by square brackets); example: [d0cf11e0] 160 | If termA is nothing, then the cut section of bytes starts with the byte at position 0. 161 | If termA is a number, then the cut section of bytes starts with the byte at the position given by the number (first byte has index 0). 162 | If termA is a string to search for, then the cut section of bytes starts with the byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes). 163 | If termB is nothing, then the cut section of bytes ends with the last byte. 164 | If termB is a number, then the cut section of bytes ends with the byte at the position given by the number (first byte has index 0). 165 | When termB is a number, it can have suffix letter l. This indicates that the number is a length (number of bytes), and not a position. 166 | termB can also be a negative number (decimal or hexademical): in that case the position is counted from the end of the file. For example, :-5 selects the complete file except the last 5 bytes. 167 | If termB is a string to search for, then the cut section of bytes ends with the last byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes). 168 | No checks are made to assure that the position specified by termA is lower than the position specified by termB. This is left up to the user. 169 | Search string expressions (ASCII and hexadecimal) can be followed by an instance (a number equal to 1 or greater) to indicate which instance needs to be taken. For example, ['ABC']2 will search for the second instance of string 'ABC'. If this instance is not found, then nothing is selected. 170 | Search string expressions (ASCII and hexadecimal) can be followed by an offset (+ or - a number) to add (or substract) an offset to the found instance. For example, ['ABC']+3 will search for the first instance of string 'ABC' and then select the bytes after ABC (+ 3). 171 | Finally, search string expressions (ASCII and hexadecimal) can be followed by an instance and an offset. 172 | Examples: 173 | This argument can be used to dump the first 256 bytes of a PE file located inside the stream: ['MZ']:0x100l 174 | This argument can be used to dump the OLE file located inside the stream: [d0cf11e0]: 175 | When this option is not used, the complete stream is selected. 176 | 177 | When analyzing a Microsoft Office document with VBA macros, you will see output similar to this: 178 | 179 | C:\Demo>oledump.py Book2-vba.xls 180 | 1: 109 '\\x01CompObj' 181 | 2: 276 '\\x05DocumentSummaryInformation' 182 | 3: 224 '\\x05SummaryInformation' 183 | 4: 2484 'Workbook' 184 | 5: 529 '_VBA_PROJECT_CUR/PROJECT' 185 | 6: 104 '_VBA_PROJECT_CUR/PROJECTwm' 186 | 7: M 1196 '_VBA_PROJECT_CUR/VBA/Sheet1' 187 | 8: m 977 '_VBA_PROJECT_CUR/VBA/Sheet2' 188 | 9: m 977 '_VBA_PROJECT_CUR/VBA/Sheet3' 189 | 10: m 985 '_VBA_PROJECT_CUR/VBA/ThisWorkbook' 190 | 11: 2651 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT' 191 | 12: 549 '_VBA_PROJECT_CUR/VBA/dir' 192 | 193 | The letter M next to the index of some of the streams (streams 7, 8, 9 and 10) is a macro indicator. 194 | If you select a macro stream, the ASCII dump will not help you much. This is because of compression. VBA macros are stored inside streams using a proprietary compression method. To decompress the VBA macros source code, you use option -v, like this: 195 | C:\Demo>oledump.py -s 7 -v Book2-vba.xls 196 | Attribute VB_Name = "Sheet1" 197 | Attribute VB_Base = "0{00020820-0000-0000-C000-000000000046}" 198 | Attribute VB_GlobalNameSpace = False 199 | Attribute VB_Creatable = False 200 | Attribute VB_PredeclaredId = True 201 | Attribute VB_Exposed = True 202 | Attribute VB_TemplateDerived = False 203 | Attribute VB_Customizable = True 204 | Sub Workbook_Open() 205 | MsgBox "VBA macro" 206 | End Sub 207 | 208 | If the VBA macro code is only composed of Attribute or Option statements, and no other statements, then the indicator is a lower case letter m. Example: 209 | C:\Demo>oledump.py -s 8 -v Book2-vba.xls 210 | Attribute VB_Name = "Sheet2" 211 | Attribute VB_Base = "0{00020820-0000-0000-C000-000000000046}" 212 | Attribute VB_GlobalNameSpace = False 213 | Attribute VB_Creatable = False 214 | Attribute VB_PredeclaredId = True 215 | Attribute VB_Exposed = True 216 | Attribute VB_TemplateDerived = False 217 | Attribute VB_Customizable = True 218 | 219 | If the VBA code contains other statements than Attribute or Options statements, then the indicator is a upper case letter M. 220 | This M/m indicator allows you to focus first on interesting VBA macros. 221 | 222 | When compressed VBA code is corrupted, the status indicatore will be E (error). 223 | C:\Demo>oledump.py Book2-vba.xls 224 | 1: 109 '\\x01CompObj' 225 | 2: 276 '\\x05DocumentSummaryInformation' 226 | 3: 224 '\\x05SummaryInformation' 227 | 4: 2484 'Workbook' 228 | 5: 529 '_VBA_PROJECT_CUR/PROJECT' 229 | 6: 104 '_VBA_PROJECT_CUR/PROJECTwm' 230 | 7: E 1196 '_VBA_PROJECT_CUR/VBA/Sheet1' 231 | 8: m 977 '_VBA_PROJECT_CUR/VBA/Sheet2' 232 | 9: m 977 '_VBA_PROJECT_CUR/VBA/Sheet3' 233 | 10: m 985 '_VBA_PROJECT_CUR/VBA/ThisWorkbook' 234 | 11: 2651 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT' 235 | 12: 549 '_VBA_PROJECT_CUR/VBA/dir' 236 | 237 | To view the VBA code up til the corruption, use option --vbadecompresscorrupt. 238 | C:\Demo>oledump.py -s 7 --vbadecompresscorrupt Book2-vba.xls 239 | 240 | Option -r can be used together with option -v to decompress a VBA macro stream that was extracted through some other mean than oledump. In such case, you provide the file that contains the compressed macro, instead of the OLE file. 241 | 242 | Microsoft Office files can contain embedded objects. They show up like this (notice stream 6 Ole10Native): 243 | C:\Demo>oledump.py Book1-insert-object-calc-rol3.exe.xls 244 | 1: 109 '\\x01CompObj' 245 | 2: 276 '\\x05DocumentSummaryInformation' 246 | 3: 224 '\\x05SummaryInformation' 247 | 4: 80 'MBD0004D0D1/\\x01CompObj' 248 | 5: 20 'MBD0004D0D1/\\x01Ole' 249 | 6: 114798 'MBD0004D0D1/\\x01Ole10Native' 250 | 7: 11312 'Workbook' 251 | 252 | To get more info about the embedded object, use option -i like this: 253 | C:\Demo>oledump.py -s 6 -i Book1-insert-object-calc-rol3.exe.xls 254 | String 1: calc-rol3.exe 255 | String 2: C:\Demo\ole\CALC-R~1.EXE 256 | String 3: C:\Demo\ole\CALC-R~1.EXE 257 | Size embedded file: 114688 258 | MD5 embedded file: bef425b95e45c54d649a19a7c55556a0 259 | 260 | To extract the embedded file, use option -e and redirect the output to a file like this: 261 | C:\Demo>oledump.py -s 6 -e Book1-insert-object-calc-rol3.exe.xls > extracted.bin 262 | 263 | Analyzing the content of streams (and VBA macros) can be quite challenging. To help with the analysis, oledump provides support for plugins and YARA rules. 264 | 265 | plugins are Python programs that take the stream content as input and try to analyze it. Plugins can analyze the raw stream content or the decompressed VBA macro source code. Plugins analyze all streams, you don't need to select a particular stream. 266 | VBA macros code in malicious documents is often obfuscated, and hard to understand. plugin_http_heuristics is a plugin for VBA macros that tries to recover the URL used to download the trojan in a malicious Office document. This URL is often obfuscated, for example by using hexadecimal or base64 strings to represent the URL. plugin_http_heuristics tries several heuristics to recover a URL. 267 | Example: 268 | C:\Demo>oledump.py -p plugin_http_heuristics sample.xls 269 | 1: 104 '\\x01CompObj' 270 | 2: 256 '\\x05DocumentSummaryInformation' 271 | 3: 228 '\\x05SummaryInformation' 272 | 4: 4372 'Workbook' 273 | 5: 583 '_VBA_PROJECT_CUR/PROJECT' 274 | 6: 83 '_VBA_PROJECT_CUR/PROJECTwm' 275 | 7: m 976 '_VBA_PROJECT_CUR/VBA/????1' 276 | Plugin: HTTP Heuristics plugin 277 | 8: m 976 '_VBA_PROJECT_CUR/VBA/????2' 278 | Plugin: HTTP Heuristics plugin 279 | 9: m 976 '_VBA_PROJECT_CUR/VBA/????3' 280 | Plugin: HTTP Heuristics plugin 281 | 10: M 261251 '_VBA_PROJECT_CUR/VBA/????????' 282 | Plugin: HTTP Heuristics plugin 283 | http://???.???.???.??:8080/stat/lld.php 284 | 11: 8775 '_VBA_PROJECT_CUR/VBA/_VBA_PROJECT' 285 | 12: 1398 '_VBA_PROJECT_CUR/VBA/__SRP_0' 286 | 13: 212 '_VBA_PROJECT_CUR/VBA/__SRP_1' 287 | 14: 456 '_VBA_PROJECT_CUR/VBA/__SRP_2' 288 | 15: 385 '_VBA_PROJECT_CUR/VBA/__SRP_3' 289 | 16: 550 '_VBA_PROJECT_CUR/VBA/dir' 290 | 291 | Option -q (quiet) only displays output from the plugins, it suppresses output from oledump. This makes it easier to spot URLs: 292 | C:\Demo>oledump.py -p plugin_http_heuristics -q sample.xls 293 | http://???.???.???.??:8080/stat/lld.php 294 | 295 | When specifying plugins, you do not need to give the full path nor the .py extension (it's allowed though). If you just give the filename without a path, oledump will search for the plugin in the current directory and in the directory where oledump.py is located. You can specify more than one plugin by separating their names with a comma (,), or by using a at-file. A at-file is a text file containing the names of the plugins (one per line). To indicate to oledump that a text file is a at-file, you prefix iw with @, like this: 296 | oledump.py -p @all-plugins.txt sample.xls 297 | 298 | Some plugins take options too. Use --pluginoptions to specify these options. 299 | 300 | oledump can scan the content of the streams with YARA rules (the YARA Python module must be installed). You provide the YARA rules with option -y. You can provide one file with YARA rules, an at-file (@file containing the filenames of the YARA files) or a directory. In case of a directory, all files inside the directory are read as YARA files. All streams are scanned with the provided YARA rules, you can not use option -s to select an individual stream. 301 | 302 | Example: 303 | C:\Demo>oledump.py -y contains_pe_file.yara Book1-insert-object-exe.xls 304 | 1: 107 '\\x01CompObj' 305 | 2: 256 '\\x05DocumentSummaryInformation' 306 | 3: 216 '\\x05SummaryInformation' 307 | 4: 76 'MBD0049DB15/\\x01CompObj' 308 | 5: 60326 'MBD0049DB15/\\x01Ole10Native' 309 | YARA rule: Contains_PE_File 310 | 6: 19567 'Workbook' 311 | 312 | In this example, you use YARA rule contains_pe_file.yara to find PE files (executables) inside Microsoft Office files. The rule triggered for stream 5, because it contains an EXE file embedded as OLE object. 313 | 314 | If you want more information about what was detected by the YARA rule, use option --yarastrings like in this example: 315 | C:\Demo>oledump.py -y contains_pe_file.yara --yarastrings Book1-insert-object-exe.xls 316 | 1: 107 '\\x01CompObj' 317 | 2: 256 '\\x05DocumentSummaryInformation' 318 | 3: 216 '\\x05SummaryInformation' 319 | 4: 76 'MBD0049DB15/\\x01CompObj' 320 | 5: 60326 'MBD0049DB15/\\x01Ole10Native' 321 | YARA rule: Contains_PE_File 322 | 000064 $a: 323 | 4d5a 324 | 'MZ' 325 | 6: 19567 'Workbook' 326 | 327 | YARA rule contains_pe_file detects PE files by finding string MZ followed by string PE at the correct offset (AddressOfNewExeHeader). 328 | The rule looks like this: 329 | rule Contains_PE_File 330 | { 331 | meta: 332 | author = "Didier Stevens (https://DidierStevens.com)" 333 | description = "Detect a PE file inside a byte sequence" 334 | method = "Find string MZ followed by string PE at the correct offset (AddressOfNewExeHeader)" 335 | strings: 336 | $a = "MZ" 337 | condition: 338 | for any i in (1..#a): (uint32(@a[i] + uint32(@a[i] + 0x3C)) == 0x00004550) 339 | } 340 | 341 | Distributed together with oledump are the YARA rules maldoc.yara. These are YARA rules to detect shellcode, based on Frank Boldewin's shellcode detector used in OfficeMalScanner. 342 | 343 | When looking for traces of Windows executable code (PE files, shellcode, ...) with YARA rules, one must take into account the fact that the executable code might have been encoded (for example via XOR and a key) to evade detection. 344 | To deal with this possibility, oledump supports decoders. A decoder is another type of plugin, that will bruteforce a type of encoding on each stream. For example, decoder_xor1 will encode each stream via XOR and a key of 1 byte. So effectively, 256 different encodings of the stream will be scanned by the YARA rules. 256 encodings because: XOR key 0x00, XOR key 0x01, XOR key 0x02, ..., XOR key 0xFF 345 | Here is an example: 346 | C:\Demo>oledump.py -y contains_pe_file.yara -D decoder_xor1 Book1-insert-object-exe-xor14.xls 347 | 1: 107 '\\x01CompObj' 348 | 2: 256 '\\x05DocumentSummaryInformation' 349 | 3: 216 '\\x05SummaryInformation' 350 | 4: 76 'MBD0049DB15/\\x01CompObj' 351 | 5: 60326 'MBD0049DB15/\\x01Ole10Native' 352 | YARA rule (stream decoder: XOR 1 byte key 0x14): Contains_PE_File 353 | 6: 19567 'Workbook' 354 | 355 | The YARA rule triggers on stream 5. It contains a PE file encoded via XORing each byte with 0x14. 356 | 357 | You can specify decoders in exactly the same way as plugins, for example specifying more than one decoder separated by a comma ,. 358 | C:\Demo>oledump.py -y contains_pe_file.yara -D decoder_xor1,decoder_rol1,decoder_add1 Book1-insert-object-exe-xor14.xls 359 | 1: 107 '\\x01CompObj' 360 | 2: 256 '\\x05DocumentSummaryInformation' 361 | 3: 216 '\\x05SummaryInformation' 362 | 4: 76 'MBD0049DB15/\\x01CompObj' 363 | 5: 60326 'MBD0049DB15/\\x01Ole10Native' 364 | YARA rule (stream decoder: XOR 1 byte key 0x14): Contains_PE_File 365 | 6: 19567 'Workbook' 366 | 367 | Some decoders take options, to be provided with option --decoderoptions. 368 | 369 | OLE files contain metadata. Use option -M to display it. 370 | 371 | Example: 372 | C:\Demo>oledump.py -M Book1.xls 373 | Properties SummaryInformation: 374 | codepage: 1252 ANSI Latin 1; Western European (Windows) 375 | author: Didier Stevens 376 | last_saved_by: Didier Stevens 377 | create_time: 2014-08-21 09:16:10 378 | last_saved_time: 2014-08-21 10:26:40 379 | creating_application: Microsoft Excel 380 | security: 0 381 | Properties DocumentSummaryInformation: 382 | codepage_doc: 1252 ANSI Latin 1; Western European (Windows) 383 | scale_crop: False 384 | company: Didier Stevens Labs 385 | links_dirty: False 386 | shared_doc: False 387 | hlinks_changed: False 388 | version: 730895 389 | 390 | Option -c calculates extra data per stream. This data is displayed per stream. Only the MD5 hash of the content of the stream is calculated. 391 | Example: 392 | C:\Demo>oledump.py -c Book1.xls 393 | 1: 4096 '\\x05DocumentSummaryInformation' ff1773dce227027d410b09f8f3224a56 394 | 2: 4096 '\\x05SummaryInformation' b46068f38a3294ca9163442cb8271028 395 | 3: 4096 'Workbook' d6a5bebba74fb1adf84c4ee66b2bf8dd 396 | 397 | If you need more data than the MD5 of each stream, use option -E (extra). This option takes a parameter describing the extra data that needs to be calculated and displayed for each stream. The following variables are defined: 398 | %INDEX%: the index of the stream 399 | %INDICATOR%: macro indicator 400 | %LENGTH%': the length of the stream 401 | %NAME%: the printable name of the stream 402 | %MD5%: calculates MD5 hash 403 | %SHA1%: calculates SHA1 hash 404 | %SHA256%: calculates SHA256 hash 405 | %ENTROPY%: calculates entropy 406 | %HEADHEX%: display first 20 bytes of the stream as hexadecimal 407 | %HEADASCII%: display first 20 bytes of the stream as ASCII 408 | %TAILHEX%: display last 20 bytes of the stream as hexadecimal 409 | %TAILASCII%: display last 20 bytes of the stream as ASCII 410 | %HISTOGRAM%: calculates a histogram 411 | this is the prevalence of each byte value (0x00 through 0xFF) 412 | at least 3 numbers are displayed separated by a comma: 413 | number of values with a prevalence > 0 414 | minimum values with a prevalence > 0 415 | maximum values with a prevalence > 0 416 | each value with a prevalence > 0 417 | %BYTESTATS%: calculates byte statistics 418 | byte statistics are 5 numbers separated by a comma: 419 | number of NULL bytes 420 | number of control bytes 421 | number of whitespace bytes 422 | number of printable bytes 423 | number of high bytes 424 | 425 | The parameter for -E may contain other text than the variables, which will be printed. Escape characters \\n and \\t are supported. 426 | Example displaying the MD5 and SHA256 hash per stream, separated by a space character: 427 | C:\Demo>oledump.py -E "%MD5% %SHA256%" Book1.xls 428 | 1: 4096 '\\x05DocumentSummaryInformation' ff1773dce227027d410b09f8f3224a56 2817c0fbe2931a562be17ed163775ea5e0b12aac203a095f51ffdbd5b27e7737 429 | 2: 4096 '\\x05SummaryInformation' b46068f38a3294ca9163442cb8271028 2c3009a215346ae5163d5776ead3102e49f6b5c4d29bd1201e9a32d3bfe52723 430 | 3: 4096 'Workbook' d6a5bebba74fb1adf84c4ee66b2bf8dd 82157e87a4e70920bf8975625f636d84101bbe8f07a998bc571eb8fa32d3a498 431 | 432 | If the extra parameter starts with !, then it replaces the complete output line (in stead of being appended to the output line). 433 | Example: 434 | C:\Demo>oledump.py -E "!%INDEX% %MD5%" Book1.xls 435 | 1 ff1773dce227027d410b09f8f3224a56 436 | 2 b46068f38a3294ca9163442cb8271028 437 | 3 d6a5bebba74fb1adf84c4ee66b2bf8dd 438 | 439 | To include extra data with each use of oledump, define environment variable OLEDUMP_EXTRA with the parameter that should be passed to -E. When environment variable OLEDUMP_EXTRA is defined, option -E can be ommited. When option -E is used together with environment variable OLEDUMP_EXTRA, the parameter of option -E is used and the environment variable is ignored. 440 | 441 | Sometimes during the analysis of an OLE file, you might come across compressed data inside the stream. For example, an indicator of ZLIB compressed DATA is byte 0x78. 442 | Option --decompress instructs oledump to search for compressed data inside the selected stream, and then decompress it. If this fails, the original data is displayed. 443 | 444 | oledump can handle several types of files. OLE files are supported, but also the new Office Open XML standard: these are XML files inside a ZIP container, but VBA macros are still stored as OLE files inside the ZIP file. In such case, the name of the OLE file inside the ZIP file will be displayed, and the indices will be prefixed by a letter (A for the first OLE file, B for the second OLE file, ...). 445 | Example: 446 | C:\Demo>oledump.py Book1.xlsm 447 | A: xl/vbaProject.bin 448 | A1: 462 'PROJECT' 449 | A2: 86 'PROJECTwm' 450 | A3: M 974 'VBA/Module1' 451 | A4: m 977 'VBA/Sheet1' 452 | A5: m 985 'VBA/ThisWorkbook' 453 | A6: 2559 'VBA/_VBA_PROJECT' 454 | A7: 1111 'VBA/__SRP_0' 455 | A8: 74 'VBA/__SRP_1' 456 | A9: 136 'VBA/__SRP_2' 457 | A10: 103 'VBA/__SRP_3' 458 | A11: 566 'VBA/dir' 459 | 460 | oledump can also handle XML files that contain OLE files stored as base64 inside XML files. 461 | 462 | Finally, all of these file types may be stored inside a password protected ZIP file (password infected). Storing malicious files inside a password protected ZIP file is common practice amongst malware researchers. Not only does it prevent accidental infection, but it also prevents anti-virus programs from deleting the sample. 463 | oledump supports the analysis of samples stored in password protected ZIP files (password infected). Do not store more than one sample inside a password protected ZIP file. Each sample should be in its own ZIP container. 464 | 465 | oledump also supports input/output redirection. This way, oledump can be used in a pipe. 466 | Say for example that the sample OLE file is GZIP compressed. oledump can not handle GZIP files directly, but you can decompress and cat it with zcat and then pipe it into oledump for analysis, like this: 467 | zcat sample.gz | oledump.py 468 | 469 | The return code of oledump is 0, except when you use no options and the analyzed file contains macros. When macros are found, the return code is 2. 470 | ''' 471 | for line in manual.split('\n'): 472 | print(textwrap.fill(line)) 473 | 474 | #Convert 2 Bytes If Python 3 475 | def C2BIP3(string): 476 | if sys.version_info[0] > 2: 477 | return bytes([ord(x) for x in string]) 478 | else: 479 | return string 480 | 481 | # CIC: Call If Callable 482 | def CIC(expression): 483 | if callable(expression): 484 | return expression() 485 | else: 486 | return expression 487 | 488 | # IFF: IF Function 489 | def IFF(expression, valueTrue, valueFalse): 490 | if expression: 491 | return CIC(valueTrue) 492 | else: 493 | return CIC(valueFalse) 494 | 495 | def File2String(filename): 496 | try: 497 | f = open(filename, 'rb') 498 | except: 499 | return None 500 | try: 501 | return f.read() 502 | except: 503 | return None 504 | finally: 505 | f.close() 506 | 507 | class cDumpStream(): 508 | def __init__(self): 509 | self.text = '' 510 | 511 | def Addline(self, line): 512 | if line != '': 513 | self.text += line + '\n' 514 | 515 | def Content(self): 516 | return self.text 517 | 518 | def HexDump(data): 519 | oDumpStream = cDumpStream() 520 | hexDump = '' 521 | for i, b in enumerate(data): 522 | if i % dumplinelength == 0 and hexDump != '': 523 | oDumpStream.Addline(hexDump) 524 | hexDump = '' 525 | hexDump += IFF(hexDump == '', '', ' ') + '%02X' % ord(b) 526 | oDumpStream.Addline(hexDump) 527 | return oDumpStream.Content() 528 | 529 | def CombineHexAscii(hexDump, asciiDump): 530 | if hexDump == '': 531 | return '' 532 | return hexDump + ' ' + (' ' * (3 * (dumplinelength - len(asciiDump)))) + asciiDump 533 | 534 | def HexAsciiDump(data): 535 | oDumpStream = cDumpStream() 536 | hexDump = '' 537 | asciiDump = '' 538 | for i, b in enumerate(data): 539 | if i % dumplinelength == 0: 540 | if hexDump != '': 541 | oDumpStream.Addline(CombineHexAscii(hexDump, asciiDump)) 542 | hexDump = '%08X:' % i 543 | asciiDump = '' 544 | hexDump+= ' %02X' % ord(b) 545 | asciiDump += IFF(ord(b) >= 32, b, '.') 546 | oDumpStream.Addline(CombineHexAscii(hexDump, asciiDump)) 547 | return oDumpStream.Content() 548 | 549 | #Fix for http://bugs.python.org/issue11395 550 | def StdoutWriteChunked(data): 551 | while data != '': 552 | sys.stdout.write(data[0:10000]) 553 | try: 554 | sys.stdout.flush() 555 | except IOError: 556 | return 557 | data = data[10000:] 558 | 559 | def PrintableName(fname): 560 | return repr('/'.join(fname)) 561 | 562 | def ParseTokenSequence(data): 563 | flags = ord(data[0]) 564 | data = data[1:] 565 | result = [] 566 | for mask in [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80]: 567 | if len(data) > 0: 568 | if flags & mask: 569 | result.append(data[0:2]) 570 | data = data[2:] 571 | else: 572 | result.append(data[0]) 573 | data = data[1:] 574 | return result, data 575 | 576 | def OffsetBits(data): 577 | numberOfBits = int(math.ceil(math.log(len(data), 2))) 578 | if numberOfBits < 4: 579 | numberOfBits = 4 580 | elif numberOfBits > 12: 581 | numberOfBits = 12 582 | return numberOfBits 583 | 584 | def Bin(number): 585 | result = bin(number)[2:] 586 | while len(result) < 16: 587 | result = '0' + result 588 | return result 589 | 590 | def DecompressChunk(compressedChunk): 591 | if len(compressedChunk) < 2: 592 | return None, None 593 | header = ord(compressedChunk[0]) + ord(compressedChunk[1]) * 0x100 594 | size = (header & 0x0FFF) + 3 595 | flagCompressed = header & 0x8000 596 | data = compressedChunk[2:2 + size - 2] 597 | 598 | if flagCompressed == 0: 599 | return data, compressedChunk[size:] 600 | 601 | decompressedChunk = '' 602 | while len(data) != 0: 603 | tokens, data = ParseTokenSequence(data) 604 | for token in tokens: 605 | if len(token) == 1: 606 | decompressedChunk += token 607 | else: 608 | if decompressedChunk == '': 609 | return None, None 610 | numberOfOffsetBits = OffsetBits(decompressedChunk) 611 | copyToken = ord(token[0]) + ord(token[1]) * 0x100 612 | offset = 1 + (copyToken >> (16 - numberOfOffsetBits)) 613 | length = 3 + (((copyToken << numberOfOffsetBits) & 0xFFFF) >> numberOfOffsetBits) 614 | copy = decompressedChunk[-offset:] 615 | copy = copy[0:length] 616 | lengthCopy = len(copy) 617 | while length > lengthCopy: #a# 618 | if length - lengthCopy >= lengthCopy: 619 | copy += copy[0:lengthCopy] 620 | length -= lengthCopy 621 | else: 622 | copy += copy[0:length - lengthCopy] 623 | length -= length - lengthCopy 624 | decompressedChunk += copy 625 | return decompressedChunk, compressedChunk[size:] 626 | 627 | def Decompress(compressedData): 628 | if compressedData[0] != chr(1): 629 | return (False, None) 630 | remainder = compressedData[1:] 631 | decompressed = '' 632 | while len(remainder) != 0: 633 | decompressedChunk, remainder = DecompressChunk(remainder) 634 | if decompressedChunk == None: 635 | return (False, decompressed) 636 | decompressed += decompressedChunk 637 | return (True, decompressed) 638 | 639 | def FindCompression(data): 640 | searchString = '\x00Attribut' 641 | position = data.find(searchString) 642 | if position != -1 and data[position + len(searchString)] == 'e': 643 | position = -1 644 | return position 645 | 646 | def SearchAndDecompressSub(data): 647 | position = FindCompression(data) 648 | if position == -1: 649 | return (False, '') 650 | else: 651 | compressedData = data[position - 3:] 652 | return Decompress(compressedData) 653 | 654 | def SearchAndDecompress(data, ifError='Error: unable to decompress\n'): 655 | result, decompress = SearchAndDecompressSub(data) 656 | if result: 657 | return decompress 658 | elif ifError == None: 659 | return decompress 660 | else: 661 | return ifError 662 | 663 | def ReadWORD(data): 664 | if len(data) < 2: 665 | return None, None 666 | return ord(data[0]) + ord(data[1]) *0x100, data[2:] 667 | 668 | def ReadDWORD(data): 669 | if len(data) < 4: 670 | return None, None 671 | return ord(data[0]) + ord(data[1]) *0x100 + ord(data[2]) *0x10000 + ord(data[3]) *0x1000000, data[4:] 672 | 673 | def ReadNullTerminatedString(data): 674 | position = data.find('\x00') 675 | if position == -1: 676 | return None, None 677 | return data[:position], data[position + 1:] 678 | 679 | def ExtractOle10Native(data): 680 | size, data = ReadDWORD(data) 681 | if size == None: 682 | return [] 683 | dummy, data = ReadWORD(data) 684 | if dummy == None: 685 | return [] 686 | filename, data = ReadNullTerminatedString(data) 687 | if filename == None: 688 | return [] 689 | pathname, data = ReadNullTerminatedString(data) 690 | if pathname == None: 691 | return [] 692 | dummy, data = ReadDWORD(data) 693 | if dummy == None: 694 | return [] 695 | dummy, data = ReadDWORD(data) 696 | if dummy == None: 697 | return [] 698 | temppathname, data = ReadNullTerminatedString(data) 699 | if temppathname == None: 700 | return [] 701 | sizeEmbedded, data = ReadDWORD(data) 702 | if sizeEmbedded == None: 703 | return [] 704 | if len(data) < sizeEmbedded: 705 | return [] 706 | 707 | return [filename, pathname, temppathname, data[:sizeEmbedded]] 708 | 709 | def Extract(data): 710 | result = ExtractOle10Native(data) 711 | if result == []: 712 | return 'Error: extraction failed' 713 | return result[3] 714 | 715 | def Info(data): 716 | result = ExtractOle10Native(data) 717 | if result == []: 718 | return 'Error: extraction failed' 719 | return 'String 1: %s\nString 2: %s\nString 3: %s\nSize embedded file: %d\nMD5 embedded file: %s\n' % (result[0], result[1], result[2], len(result[3]), hashlib.md5(result[3]).hexdigest()) 720 | 721 | def IfWIN32SetBinary(io): 722 | if sys.platform == 'win32': 723 | import msvcrt 724 | msvcrt.setmode(io.fileno(), os.O_BINARY) 725 | 726 | def File2Strings(filename): 727 | try: 728 | f = open(filename, 'r') 729 | except: 730 | return None 731 | try: 732 | return map(lambda line:line.rstrip('\n'), f.readlines()) 733 | except: 734 | return None 735 | finally: 736 | f.close() 737 | 738 | def ProcessAt(argument): 739 | if argument.startswith('@'): 740 | strings = File2Strings(argument[1:]) 741 | if strings == None: 742 | raise Exception('Error reading %s' % argument) 743 | else: 744 | return strings 745 | else: 746 | return [argument] 747 | 748 | def AddPlugin(cClass): 749 | global plugins 750 | 751 | plugins.append(cClass) 752 | 753 | def ExpandFilenameArguments(filenames): 754 | return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), []))) 755 | 756 | class cPluginParent(): 757 | macroOnly = False 758 | 759 | def LoadPlugins(plugins, verbose): 760 | if plugins == '': 761 | return 762 | scriptPath = os.path.dirname(sys.argv[0]) 763 | for plugin in sum(map(ProcessAt, plugins.split(',')), []): 764 | try: 765 | if not plugin.lower().endswith('.py'): 766 | plugin += '.py' 767 | if os.path.dirname(plugin) == '': 768 | if not os.path.exists(plugin): 769 | scriptPlugin = os.path.join(scriptPath, plugin) 770 | if os.path.exists(scriptPlugin): 771 | plugin = scriptPlugin 772 | exec open(plugin, 'r') in globals(), globals() 773 | except Exception as e: 774 | print('Error loading plugin: %s' % plugin) 775 | if verbose: 776 | raise e 777 | 778 | def AddDecoder(cClass): 779 | global decoders 780 | 781 | decoders.append(cClass) 782 | 783 | class cDecoderParent(): 784 | pass 785 | 786 | def LoadDecoders(decoders, verbose): 787 | if decoders == '': 788 | return 789 | scriptPath = os.path.dirname(sys.argv[0]) 790 | for decoder in sum(map(ProcessAt, decoders.split(',')), []): 791 | try: 792 | if not decoder.lower().endswith('.py'): 793 | decoder += '.py' 794 | if os.path.dirname(decoder) == '': 795 | if not os.path.exists(decoder): 796 | scriptDecoder = os.path.join(scriptPath, decoder) 797 | if os.path.exists(scriptDecoder): 798 | decoder = scriptDecoder 799 | exec open(decoder, 'r') in globals(), globals() 800 | except Exception as e: 801 | print('Error loading decoder: %s' % decoder) 802 | if verbose: 803 | raise e 804 | 805 | class cIdentity(cDecoderParent): 806 | name = 'Identity function decoder' 807 | 808 | def __init__(self, stream, options): 809 | self.stream = stream 810 | self.options = options 811 | self.available = True 812 | 813 | def Available(self): 814 | return self.available 815 | 816 | def Decode(self): 817 | self.available = False 818 | return self.stream 819 | 820 | def Name(self): 821 | return '' 822 | 823 | def DecodeFunction(decoders, options, stream): 824 | if decoders == []: 825 | return stream 826 | return decoders[0](stream, options.decoderoptions).Decode() 827 | 828 | def MacrosContainsOnlyAttributesOrOptions(stream): 829 | lines = SearchAndDecompress(stream).split('\n') 830 | for line in [line.strip() for line in lines]: 831 | if line != '' and not line.startswith('Attribute ') and not line == 'Option Explicit': 832 | return False 833 | return True 834 | 835 | #https://msdn.microsoft.com/en-us/library/windows/desktop/dd317756%28v=vs.85%29.aspx 836 | dCodepages = { 837 | 037: 'IBM EBCDIC US-Canada', 838 | 437: 'OEM United States', 839 | 500: 'IBM EBCDIC International', 840 | 708: 'Arabic (ASMO 708)', 841 | 709: 'Arabic (ASMO-449+, BCON V4)', 842 | 710: 'Arabic - Transparent Arabic', 843 | 720: 'Arabic (Transparent ASMO); Arabic (DOS)', 844 | 737: 'OEM Greek (formerly 437G); Greek (DOS)', 845 | 775: 'OEM Baltic; Baltic (DOS)', 846 | 850: 'OEM Multilingual Latin 1; Western European (DOS)', 847 | 852: 'OEM Latin 2; Central European (DOS)', 848 | 855: 'OEM Cyrillic (primarily Russian)', 849 | 857: 'OEM Turkish; Turkish (DOS)', 850 | 858: 'OEM Multilingual Latin 1 + Euro symbol', 851 | 860: 'OEM Portuguese; Portuguese (DOS)', 852 | 861: 'OEM Icelandic; Icelandic (DOS)', 853 | 862: 'OEM Hebrew; Hebrew (DOS)', 854 | 863: 'OEM French Canadian; French Canadian (DOS)', 855 | 864: 'OEM Arabic; Arabic (864)', 856 | 865: 'OEM Nordic; Nordic (DOS)', 857 | 866: 'OEM Russian; Cyrillic (DOS)', 858 | 869: 'OEM Modern Greek; Greek, Modern (DOS)', 859 | 870: 'IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2', 860 | 874: 'ANSI/OEM Thai (ISO 8859-11); Thai (Windows)', 861 | 875: 'IBM EBCDIC Greek Modern', 862 | 932: 'ANSI/OEM Japanese; Japanese (Shift-JIS)', 863 | 936: 'ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)', 864 | 949: 'ANSI/OEM Korean (Unified Hangul Code)', 865 | 950: 'ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)', 866 | 1026: 'IBM EBCDIC Turkish (Latin 5)', 867 | 1047: 'IBM EBCDIC Latin 1/Open System', 868 | 1140: 'IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)', 869 | 1141: 'IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)', 870 | 1142: 'IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)', 871 | 1143: 'IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)', 872 | 1144: 'IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)', 873 | 1145: 'IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)', 874 | 1146: 'IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)', 875 | 1147: 'IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)', 876 | 1148: 'IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)', 877 | 1149: 'IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)', 878 | 1200: 'Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications', 879 | 1201: 'Unicode UTF-16, big endian byte order; available only to managed applications', 880 | 1250: 'ANSI Central European; Central European (Windows)', 881 | 1251: 'ANSI Cyrillic; Cyrillic (Windows)', 882 | 1252: 'ANSI Latin 1; Western European (Windows)', 883 | 1253: 'ANSI Greek; Greek (Windows)', 884 | 1254: 'ANSI Turkish; Turkish (Windows)', 885 | 1255: 'ANSI Hebrew; Hebrew (Windows)', 886 | 1256: 'ANSI Arabic; Arabic (Windows)', 887 | 1257: 'ANSI Baltic; Baltic (Windows)', 888 | 1258: 'ANSI/OEM Vietnamese; Vietnamese (Windows)', 889 | 1361: 'Korean (Johab)', 890 | 10000: 'MAC Roman; Western European (Mac)', 891 | 10001: 'Japanese (Mac)', 892 | 10002: 'MAC Traditional Chinese (Big5); Chinese Traditional (Mac)', 893 | 10003: 'Korean (Mac)', 894 | 10004: 'Arabic (Mac)', 895 | 10005: 'Hebrew (Mac)', 896 | 10006: 'Greek (Mac)', 897 | 10007: 'Cyrillic (Mac)', 898 | 10008: 'MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)', 899 | 10010: 'Romanian (Mac)', 900 | 10017: 'Ukrainian (Mac)', 901 | 10021: 'Thai (Mac)', 902 | 10029: 'MAC Latin 2; Central European (Mac)', 903 | 10079: 'Icelandic (Mac)', 904 | 10081: 'Turkish (Mac)', 905 | 10082: 'Croatian (Mac)', 906 | 12000: 'Unicode UTF-32, little endian byte order; available only to managed applications', 907 | 12001: 'Unicode UTF-32, big endian byte order; available only to managed applications', 908 | 20000: 'CNS Taiwan; Chinese Traditional (CNS)', 909 | 20001: 'TCA Taiwan', 910 | 20002: 'Eten Taiwan; Chinese Traditional (Eten)', 911 | 20003: 'IBM5550 Taiwan', 912 | 20004: 'TeleText Taiwan', 913 | 20005: 'Wang Taiwan', 914 | 20105: 'IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)', 915 | 20106: 'IA5 German (7-bit)', 916 | 20107: 'IA5 Swedish (7-bit)', 917 | 20108: 'IA5 Norwegian (7-bit)', 918 | 20127: 'US-ASCII (7-bit)', 919 | 20261: 'T.61', 920 | 20269: 'ISO 6937 Non-Spacing Accent', 921 | 20273: 'IBM EBCDIC Germany', 922 | 20277: 'IBM EBCDIC Denmark-Norway', 923 | 20278: 'IBM EBCDIC Finland-Sweden', 924 | 20280: 'IBM EBCDIC Italy', 925 | 20284: 'IBM EBCDIC Latin America-Spain', 926 | 20285: 'IBM EBCDIC United Kingdom', 927 | 20290: 'IBM EBCDIC Japanese Katakana Extended', 928 | 20297: 'IBM EBCDIC France', 929 | 20420: 'IBM EBCDIC Arabic', 930 | 20423: 'IBM EBCDIC Greek', 931 | 20424: 'IBM EBCDIC Hebrew', 932 | 20833: 'IBM EBCDIC Korean Extended', 933 | 20838: 'IBM EBCDIC Thai', 934 | 20866: 'Russian (KOI8-R); Cyrillic (KOI8-R)', 935 | 20871: 'IBM EBCDIC Icelandic', 936 | 20880: 'IBM EBCDIC Cyrillic Russian', 937 | 20905: 'IBM EBCDIC Turkish', 938 | 20924: 'IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)', 939 | 20932: 'Japanese (JIS 0208-1990 and 0212-1990)', 940 | 20936: 'Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)', 941 | 20949: 'Korean Wansung', 942 | 21025: 'IBM EBCDIC Cyrillic Serbian-Bulgarian', 943 | 21027: '(deprecated)', 944 | 21866: 'Ukrainian (KOI8-U); Cyrillic (KOI8-U)', 945 | 28591: 'ISO 8859-1 Latin 1; Western European (ISO)', 946 | 28592: 'ISO 8859-2 Central European; Central European (ISO)', 947 | 28593: 'ISO 8859-3 Latin 3', 948 | 28594: 'ISO 8859-4 Baltic', 949 | 28595: 'ISO 8859-5 Cyrillic', 950 | 28596: 'ISO 8859-6 Arabic', 951 | 28597: 'ISO 8859-7 Greek', 952 | 28598: 'ISO 8859-8 Hebrew; Hebrew (ISO-Visual)', 953 | 28599: 'ISO 8859-9 Turkish', 954 | 28603: 'ISO 8859-13 Estonian', 955 | 28605: 'ISO 8859-15 Latin 9', 956 | 29001: 'Europa 3', 957 | 38598: 'ISO 8859-8 Hebrew; Hebrew (ISO-Logical)', 958 | 50220: 'ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)', 959 | 50221: 'ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)', 960 | 50222: 'ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)', 961 | 50225: 'ISO 2022 Korean', 962 | 50227: 'ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)', 963 | 50229: 'ISO 2022 Traditional Chinese', 964 | 50930: 'EBCDIC Japanese (Katakana) Extended', 965 | 50931: 'EBCDIC US-Canada and Japanese', 966 | 50933: 'EBCDIC Korean Extended and Korean', 967 | 50935: 'EBCDIC Simplified Chinese Extended and Simplified Chinese', 968 | 50936: 'EBCDIC Simplified Chinese', 969 | 50937: 'EBCDIC US-Canada and Traditional Chinese', 970 | 50939: 'EBCDIC Japanese (Latin) Extended and Japanese', 971 | 51932: 'EUC Japanese', 972 | 51936: 'EUC Simplified Chinese; Chinese Simplified (EUC)', 973 | 51949: 'EUC Korean', 974 | 51950: 'EUC Traditional Chinese', 975 | 52936: 'HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)', 976 | 54936: 'Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)', 977 | 57002: 'ISCII Devanagari', 978 | 57003: 'ISCII Bengali', 979 | 57004: 'ISCII Tamil', 980 | 57005: 'ISCII Telugu', 981 | 57006: 'ISCII Assamese', 982 | 57007: 'ISCII Oriya', 983 | 57008: 'ISCII Kannada', 984 | 57009: 'ISCII Malayalam', 985 | 57010: 'ISCII Gujarati', 986 | 57011: 'ISCII Punjabi', 987 | 65000: 'Unicode (UTF-7)', 988 | 65001: 'Unicode (UTF-8)' 989 | } 990 | 991 | def LookupCodepage(codepage): 992 | if codepage in dCodepages: 993 | return dCodepages[codepage] 994 | else: 995 | return '' 996 | 997 | def MyRepr(stringArg): 998 | stringRepr = repr(stringArg) 999 | if "'" + stringArg + "'" != stringRepr: 1000 | return stringRepr 1001 | else: 1002 | return stringArg 1003 | 1004 | def FindAll(data, sub): 1005 | result = [] 1006 | start = 0 1007 | while True: 1008 | position = data.find(sub, start) 1009 | if position == -1: 1010 | return result 1011 | result.append(position) 1012 | start = position + 1 1013 | 1014 | def HeuristicDecompress(data): 1015 | for position in FindAll(data, '\x78'): 1016 | try: 1017 | return zlib.decompress(data[position:]) 1018 | except: 1019 | pass 1020 | return data 1021 | 1022 | CUTTERM_NOTHING = 0 1023 | CUTTERM_POSITION = 1 1024 | CUTTERM_FIND = 2 1025 | CUTTERM_LENGTH = 3 1026 | 1027 | def Replace(string, dReplacements): 1028 | if string in dReplacements: 1029 | return dReplacements[string] 1030 | else: 1031 | return string 1032 | 1033 | def ParseCutTerm(argument): 1034 | if argument == '': 1035 | return CUTTERM_NOTHING, None, '' 1036 | oMatch = re.match(r'\-?0x([0-9a-f]+)', argument, re.I) 1037 | if oMatch == None: 1038 | oMatch = re.match(r'\-?(\d+)', argument) 1039 | else: 1040 | value = int(oMatch.group(1), 16) 1041 | if argument.startswith('-'): 1042 | value = -value 1043 | return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] 1044 | if oMatch == None: 1045 | oMatch = re.match(r'\[([0-9a-f]+)\](\d+)?([+-]\d+)?', argument, re.I) 1046 | else: 1047 | value = int(oMatch.group(1)) 1048 | if argument.startswith('-'): 1049 | value = -value 1050 | return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] 1051 | if oMatch == None: 1052 | oMatch = re.match(r"\[\'(.+?)\'\](\d+)?([+-]\d+)?", argument) 1053 | else: 1054 | if len(oMatch.group(1)) % 2 == 1: 1055 | raise Exception("Uneven length hexadecimal string") 1056 | else: 1057 | return CUTTERM_FIND, (binascii.a2b_hex(oMatch.group(1)), int(Replace(oMatch.group(2), {None: '1'})), int(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] 1058 | if oMatch == None: 1059 | return None, None, argument 1060 | else: 1061 | return CUTTERM_FIND, (oMatch.group(1), int(Replace(oMatch.group(2), {None: '1'})), int(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] 1062 | 1063 | def ParseCutArgument(argument): 1064 | type, value, remainder = ParseCutTerm(argument.strip()) 1065 | if type == CUTTERM_NOTHING: 1066 | return CUTTERM_NOTHING, None, CUTTERM_NOTHING, None 1067 | elif type == None: 1068 | if remainder.startswith(':'): 1069 | typeLeft = CUTTERM_NOTHING 1070 | valueLeft = None 1071 | remainder = remainder[1:] 1072 | else: 1073 | return None, None, None, None 1074 | else: 1075 | typeLeft = type 1076 | valueLeft = value 1077 | if typeLeft == CUTTERM_POSITION and valueLeft < 0: 1078 | return None, None, None, None 1079 | if typeLeft == CUTTERM_FIND and valueLeft[1] == 0: 1080 | return None, None, None, None 1081 | if remainder.startswith(':'): 1082 | remainder = remainder[1:] 1083 | else: 1084 | return None, None, None, None 1085 | type, value, remainder = ParseCutTerm(remainder) 1086 | if type == CUTTERM_POSITION and remainder == 'l': 1087 | return typeLeft, valueLeft, CUTTERM_LENGTH, value 1088 | elif type == None or remainder != '': 1089 | return None, None, None, None 1090 | elif type == CUTTERM_FIND and value[1] == 0: 1091 | return None, None, None, None 1092 | else: 1093 | return typeLeft, valueLeft, type, value 1094 | 1095 | def Find(data, value, nth): 1096 | position = -1 1097 | while nth > 0: 1098 | position = data.find(value, position + 1) 1099 | if position == -1: 1100 | return -1 1101 | nth -= 1 1102 | return position 1103 | 1104 | def CutData(stream, cutArgument): 1105 | if cutArgument == '': 1106 | return stream 1107 | 1108 | typeLeft, valueLeft, typeRight, valueRight = ParseCutArgument(cutArgument) 1109 | 1110 | if typeLeft == None: 1111 | return stream 1112 | 1113 | if typeLeft == CUTTERM_NOTHING: 1114 | positionBegin = 0 1115 | elif typeLeft == CUTTERM_POSITION: 1116 | positionBegin = valueLeft 1117 | elif typeLeft == CUTTERM_FIND: 1118 | positionBegin = Find(stream, valueLeft[0], valueLeft[1]) 1119 | if positionBegin == -1: 1120 | return '' 1121 | positionBegin += valueLeft[2] 1122 | else: 1123 | raise Exception("Unknown value typeLeft") 1124 | 1125 | if typeRight == CUTTERM_NOTHING: 1126 | positionEnd = len(stream) 1127 | elif typeRight == CUTTERM_POSITION and valueRight < 0: 1128 | positionEnd = len(stream) + valueRight 1129 | elif typeRight == CUTTERM_POSITION: 1130 | positionEnd = valueRight + 1 1131 | elif typeRight == CUTTERM_LENGTH: 1132 | positionEnd = positionBegin + valueRight 1133 | elif typeRight == CUTTERM_FIND: 1134 | positionEnd = Find(stream, valueRight[0], valueRight[1]) 1135 | if positionEnd == -1: 1136 | return '' 1137 | else: 1138 | positionEnd += len(valueRight[0]) 1139 | positionEnd += valueRight[2] 1140 | else: 1141 | raise Exception("Unknown value typeRight") 1142 | 1143 | return stream[positionBegin:positionEnd] 1144 | 1145 | def ExtraInfoMD5(data): 1146 | return hashlib.md5(data).hexdigest() 1147 | 1148 | def ExtraInfoSHA1(data): 1149 | return hashlib.sha1(data).hexdigest() 1150 | 1151 | def ExtraInfoSHA256(data): 1152 | return hashlib.sha256(data).hexdigest() 1153 | 1154 | def CalculateByteStatistics(dPrevalence): 1155 | sumValues = sum(dPrevalence.values()) 1156 | countNullByte = dPrevalence[0] 1157 | countControlBytes = 0 1158 | countWhitespaceBytes = 0 1159 | for iter in range(1, 0x21): 1160 | if chr(iter) in string.whitespace: 1161 | countWhitespaceBytes += dPrevalence[iter] 1162 | else: 1163 | countControlBytes += dPrevalence[iter] 1164 | countControlBytes += dPrevalence[0x7F] 1165 | countPrintableBytes = 0 1166 | for iter in range(0x21, 0x7F): 1167 | countPrintableBytes += dPrevalence[iter] 1168 | countHighBytes = 0 1169 | for iter in range(0x80, 0x100): 1170 | countHighBytes += dPrevalence[iter] 1171 | entropy = 0.0 1172 | for iter in range(0x100): 1173 | if dPrevalence[iter] > 0: 1174 | prevalence = float(dPrevalence[iter]) / float(sumValues) 1175 | entropy += - prevalence * math.log(prevalence, 2) 1176 | return sumValues, entropy, countNullByte, countControlBytes, countWhitespaceBytes, countPrintableBytes, countHighBytes 1177 | 1178 | def ExtraInfoENTROPY(data): 1179 | dPrevalence = {iter: 0 for iter in range(0x100)} 1180 | for char in data: 1181 | dPrevalence[ord(char)] += 1 1182 | sumValues, entropy, countNullByte, countControlBytes, countWhitespaceBytes, countPrintableBytes, countHighBytes = CalculateByteStatistics(dPrevalence) 1183 | return '%f' % entropy 1184 | 1185 | def ExtraInfoHEADHEX(data): 1186 | return binascii.hexlify(data[:16]) 1187 | 1188 | def ExtraInfoHEADASCII(data): 1189 | return ''.join([IFF(ord(b) >= 32, b, '.') for b in data[:16]]) 1190 | 1191 | def ExtraInfoTAILHEX(data): 1192 | return binascii.hexlify(data[-16:]) 1193 | 1194 | def ExtraInfoTAILASCII(data): 1195 | return ''.join([IFF(ord(b) >= 32, b, '.') for b in data[-16:]]) 1196 | 1197 | def ExtraInfoHISTOGRAM(data): 1198 | dPrevalence = {iter: 0 for iter in range(0x100)} 1199 | for char in data: 1200 | dPrevalence[ord(char)] += 1 1201 | result = [] 1202 | count = 0 1203 | minimum = None 1204 | maximum = None 1205 | for iter in range(0x100): 1206 | if dPrevalence[iter] > 0: 1207 | result.append('0x%02x:%d' % (iter, dPrevalence[iter])) 1208 | count += 1 1209 | if minimum == None: 1210 | minimum = iter 1211 | else: 1212 | minimum = min(minimum, iter) 1213 | if maximum == None: 1214 | maximum = iter 1215 | else: 1216 | maximum = max(maximum, iter) 1217 | result.insert(0, '%d' % count) 1218 | result.insert(1, IFF(minimum == None, '', '0x%02x' % minimum)) 1219 | result.insert(2, IFF(maximum == None, '', '0x%02x' % maximum)) 1220 | return ','.join(result) 1221 | 1222 | def ExtraInfoBYTESTATS(data): 1223 | dPrevalence = {iter: 0 for iter in range(0x100)} 1224 | for char in data: 1225 | dPrevalence[ord(char)] += 1 1226 | sumValues, entropy, countNullByte, countControlBytes, countWhitespaceBytes, countPrintableBytes, countHighBytes = CalculateByteStatistics(dPrevalence) 1227 | return '%d,%d,%d,%d,%d' % (countNullByte, countControlBytes, countWhitespaceBytes, countPrintableBytes, countHighBytes) 1228 | 1229 | def GenerateExtraInfo(extra, index, indicator, name, stream): 1230 | if extra == '': 1231 | return '' 1232 | if extra.startswith('!'): 1233 | extra = extra[1:] 1234 | prefix = '' 1235 | else: 1236 | prefix = ' ' 1237 | if indicator == ' ': 1238 | indicator = '' 1239 | dExtras = {'%INDEX%': lambda x: index, 1240 | '%INDICATOR%': lambda x: indicator, 1241 | '%LENGTH%': lambda x: '%d' % len(stream), 1242 | '%NAME%': lambda x: name, 1243 | '%MD5%': ExtraInfoMD5, 1244 | '%SHA1%': ExtraInfoSHA1, 1245 | '%SHA256%': ExtraInfoSHA256, 1246 | '%ENTROPY%': ExtraInfoENTROPY, 1247 | '%HEADHEX%': ExtraInfoHEADHEX, 1248 | '%HEADASCII%': ExtraInfoHEADASCII, 1249 | '%TAILHEX%': ExtraInfoTAILHEX, 1250 | '%TAILASCII%': ExtraInfoTAILASCII, 1251 | '%HISTOGRAM%': ExtraInfoHISTOGRAM, 1252 | '%BYTESTATS%': ExtraInfoBYTESTATS, 1253 | } 1254 | for variable in dExtras: 1255 | if variable in extra: 1256 | extra = extra.replace(variable, dExtras[variable](stream)) 1257 | return prefix + extra.replace(r'\t', '\t').replace(r'\n', '\n') 1258 | 1259 | def OLESub(ole, prefix, rules, options): 1260 | global plugins 1261 | global decoders 1262 | 1263 | returnCode = 0 1264 | 1265 | if options.metadata: 1266 | metadata = ole.get_metadata() 1267 | print('Properties SummaryInformation:') 1268 | for attribute in metadata.SUMMARY_ATTRIBS: 1269 | value = getattr(metadata, attribute) 1270 | if value != None: 1271 | if attribute == 'codepage': 1272 | print(' %s: %s %s' % (attribute, value, LookupCodepage(value))) 1273 | else: 1274 | print(' %s: %s' % (attribute, value)) 1275 | print('Properties DocumentSummaryInformation:') 1276 | for attribute in metadata.DOCSUM_ATTRIBS: 1277 | value = getattr(metadata, attribute) 1278 | if value != None: 1279 | if attribute == 'codepage_doc': 1280 | print(' %s: %s %s' % (attribute, value, LookupCodepage(value))) 1281 | else: 1282 | print(' %s: %s' % (attribute, value)) 1283 | return returnCode 1284 | 1285 | if options.select == '': 1286 | counter = 1 1287 | for fname in ole.listdir(): 1288 | stream = None 1289 | indicator = ' ' 1290 | macroPresent = False 1291 | lengthString = ' ' 1292 | if ole.get_type(fname) == 1: 1293 | indicator = '.' 1294 | elif ole.get_type(fname) == 2: 1295 | stream = ole.openstream(fname).read() 1296 | lengthString = '%7d' % len(stream) 1297 | macroPresent = FindCompression(stream) != -1 1298 | if macroPresent: 1299 | returnCode = 2 1300 | if not SearchAndDecompressSub(stream)[0]: 1301 | indicator = 'E' 1302 | else: 1303 | indicator = 'M' 1304 | if MacrosContainsOnlyAttributesOrOptions(stream): 1305 | indicator = 'm' 1306 | if not options.quiet: 1307 | index = '%s%d' % (prefix, counter) 1308 | line = '%3s: %s %s %s' % (index, indicator, lengthString, PrintableName(fname)) 1309 | if options.calc: 1310 | line += ' %s' % hashlib.md5(stream).hexdigest() 1311 | if options.extra.startswith('!'): 1312 | line = '' 1313 | line += GenerateExtraInfo(options.extra, index, indicator, PrintableName(fname), stream) 1314 | print(line) 1315 | for cPlugin in plugins: 1316 | try: 1317 | if cPlugin.macroOnly and macroPresent: 1318 | oPlugin = cPlugin(fname, SearchAndDecompress(stream), options.pluginoptions) 1319 | elif not cPlugin.macroOnly: 1320 | oPlugin = cPlugin(fname, stream, options.pluginoptions) 1321 | else: 1322 | oPlugin = None 1323 | except Exception as e: 1324 | print('Error instantiating plugin: %s' % cPlugin.name) 1325 | if options.verbose: 1326 | raise e 1327 | return returnCode 1328 | if oPlugin != None: 1329 | result = oPlugin.Analyze() 1330 | if oPlugin.ran: 1331 | if options.quiet: 1332 | for line in result: 1333 | print(MyRepr(line)) 1334 | else: 1335 | print(' Plugin: %s ' % oPlugin.name) 1336 | for line in result: 1337 | print(' ' + MyRepr(line)) 1338 | counter += 1 1339 | if options.yara != None: 1340 | oDecoders = [cIdentity(stream, None)] 1341 | for cDecoder in decoders: 1342 | try: 1343 | oDecoder = cDecoder(stream, options.decoderoptions) 1344 | oDecoders.append(oDecoder) 1345 | except Exception as e: 1346 | print('Error instantiating decoder: %s' % cDecoder.name) 1347 | if options.verbose: 1348 | raise e 1349 | return returnCode 1350 | for oDecoder in oDecoders: 1351 | while oDecoder.Available(): 1352 | for result in rules.match(data=oDecoder.Decode()): 1353 | print(' YARA rule%s: %s' % (IFF(oDecoder.Name() == '', '', ' (stream decoder: %s)' % oDecoder.Name()), result.rule)) 1354 | if options.yarastrings: 1355 | for stringdata in result.strings: 1356 | print(' %06x %s:' % (stringdata[0], stringdata[1])) 1357 | print(' %s' % binascii.hexlify(C2BIP3(stringdata[2]))) 1358 | print(' %s' % repr(stringdata[2])) 1359 | else: 1360 | if len(decoders) > 1: 1361 | print('Error: provide only one decoder when using option select') 1362 | return returnCode 1363 | if options.decompress: 1364 | DecompressFunction = HeuristicDecompress 1365 | else: 1366 | DecompressFunction = lambda x:x 1367 | if options.dump: 1368 | DumpFunction = lambda x:x 1369 | IfWIN32SetBinary(sys.stdout) 1370 | elif options.hexdump: 1371 | DumpFunction = HexDump 1372 | elif options.vbadecompress: 1373 | if options.select == 'a': 1374 | DumpFunction = lambda x: SearchAndDecompress(x, '') 1375 | else: 1376 | DumpFunction = SearchAndDecompress 1377 | elif options.vbadecompresscorrupt: 1378 | DumpFunction = lambda x: SearchAndDecompress(x, None) 1379 | elif options.extract: 1380 | DumpFunction = Extract 1381 | IfWIN32SetBinary(sys.stdout) 1382 | elif options.info: 1383 | DumpFunction = Info 1384 | else: 1385 | DumpFunction = HexAsciiDump 1386 | counter = 1 1387 | for fname in ole.listdir(): 1388 | if options.select == 'a' or ('%s%d' % (prefix, counter)) == options.select: 1389 | StdoutWriteChunked(DumpFunction(DecompressFunction(DecodeFunction(decoders, options, CutData(ole.openstream(fname).read(), options.cut))))) 1390 | if options.select != 'a': 1391 | break 1392 | counter += 1 1393 | 1394 | return returnCode 1395 | 1396 | def YARACompile(fileordirname): 1397 | dFilepaths = {} 1398 | if os.path.isdir(fileordirname): 1399 | for root, dirs, files in os.walk(fileordirname): 1400 | for file in files: 1401 | filename = os.path.join(root, file) 1402 | dFilepaths[filename] = filename 1403 | else: 1404 | for filename in ProcessAt(fileordirname): 1405 | dFilepaths[filename] = filename 1406 | return yara.compile(filepaths=dFilepaths) 1407 | 1408 | def FilenameInSimulations(filename): 1409 | if dslsimulationdb == None: 1410 | return False 1411 | return filename in dslsimulationdb.dSimulations 1412 | 1413 | def OLEDump(filename, options): 1414 | returnCode = 0 1415 | 1416 | if filename != '' and not FilenameInSimulations(filename) and not os.path.isfile(filename): 1417 | print('Error: %s is not a file.' % filename) 1418 | return returnCode 1419 | 1420 | global plugins 1421 | plugins = [] 1422 | LoadPlugins(options.plugins, True) 1423 | 1424 | global decoders 1425 | decoders = [] 1426 | LoadDecoders(options.decoders, True) 1427 | 1428 | if options.raw: 1429 | if filename == '': 1430 | IfWIN32SetBinary(sys.stdin) 1431 | data = sys.stdin.read() 1432 | else: 1433 | data = File2String(filename) 1434 | if options.vbadecompress: 1435 | if options.vbadecompresscorrupt: 1436 | vba = SearchAndDecompress(data, None) 1437 | else: 1438 | vba = SearchAndDecompress(data) 1439 | if options.plugins == '': 1440 | print(vba) 1441 | return returnCode 1442 | else: 1443 | data = vba 1444 | for cPlugin in plugins: 1445 | try: 1446 | if cPlugin.macroOnly: 1447 | oPlugin = cPlugin(filename, data, options.pluginoptions) 1448 | elif not cPlugin.macroOnly: 1449 | oPlugin = cPlugin(filename, data, options.pluginoptions) 1450 | else: 1451 | oPlugin = None 1452 | except Exception as e: 1453 | print('Error instantiating plugin: %s' % cPlugin.name) 1454 | if options.verbose: 1455 | raise e 1456 | return returnCode 1457 | if oPlugin != None: 1458 | result = oPlugin.Analyze() 1459 | if oPlugin.ran: 1460 | if options.quiet: 1461 | for line in result: 1462 | print(MyRepr(line)) 1463 | else: 1464 | print('Plugin: %s ' % oPlugin.name) 1465 | for line in result: 1466 | print(' ' + MyRepr(line)) 1467 | return returnCode 1468 | 1469 | rules = None 1470 | if options.yara != None: 1471 | if not 'yara' in sys.modules: 1472 | print('Error: option yara requires the YARA Python module.') 1473 | return returnCode 1474 | rules = YARACompile(options.yara) 1475 | 1476 | if filename == '': 1477 | IfWIN32SetBinary(sys.stdin) 1478 | oStringIO = cStringIO.StringIO(sys.stdin.read()) 1479 | elif FilenameInSimulations(filename): 1480 | oZipfile = zipfile.ZipFile(dslsimulationdb.GetSimulation(filename), 'r') 1481 | oZipContent = oZipfile.open(oZipfile.infolist()[0], 'r', C2BIP3(MALWARE_PASSWORD)) 1482 | zipContent = oZipContent.read() 1483 | if zipContent.startswith('Neut'): 1484 | zipContent = OLEFILE_MAGIC + zipContent[4:] 1485 | oStringIO = cStringIO.StringIO(zipContent) 1486 | oZipContent.close() 1487 | oZipfile.close() 1488 | elif filename.lower().endswith('.zip'): 1489 | oZipfile = zipfile.ZipFile(filename, 'r') 1490 | oZipContent = oZipfile.open(oZipfile.infolist()[0], 'r', C2BIP3(MALWARE_PASSWORD)) 1491 | oStringIO = cStringIO.StringIO(oZipContent.read()) 1492 | oZipContent.close() 1493 | oZipfile.close() 1494 | else: 1495 | oStringIO = cStringIO.StringIO(open(filename, 'rb').read()) 1496 | 1497 | magic = oStringIO.read(6) 1498 | oStringIO.seek(0) 1499 | if magic[0:4] == OLEFILE_MAGIC: 1500 | ole = olefile.OleFileIO(oStringIO) 1501 | returnCode = OLESub(ole, '', rules, options) 1502 | ole.close() 1503 | elif magic[0:2] == 'PK': 1504 | oZipfile = zipfile.ZipFile(oStringIO, 'r') 1505 | counter = 0 1506 | for info in oZipfile.infolist(): 1507 | oZipContent = oZipfile.open(info, 'r') 1508 | content = oZipContent.read() 1509 | if content[0:4] == OLEFILE_MAGIC: 1510 | letter = chr(ord('A') + counter) 1511 | counter += 1 1512 | if options.select == '': 1513 | if not options.quiet: 1514 | print('%s: %s' % (letter, info.filename)) 1515 | ole = olefile.OleFileIO(cStringIO.StringIO(content)) 1516 | returnCode = OLESub(ole, letter, rules, options) 1517 | ole.close() 1518 | oZipContent.close() 1519 | oZipfile.close() 1520 | else: 1521 | data = oStringIO.read() 1522 | oStringIO.seek(0) 1523 | if ' 1: 1605 | oParser.print_help() 1606 | print('') 1607 | print(' Source code put in the public domain by Didier Stevens, no Copyright') 1608 | print(' Use at your own risk') 1609 | print(' https://DidierStevens.com') 1610 | return 0 1611 | elif len(args) == 0: 1612 | return OLEDump('', options) 1613 | else: 1614 | return OLEDump(args[0], options) 1615 | 1616 | if __name__ == '__main__': 1617 | sys.exit(Main()) 1618 | --------------------------------------------------------------------------------