├── .gitignore
├── LICENSE
├── README.md
├── officeparser.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .dump
2 | *.pyc
3 | *.xls*


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 John William Davison
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # officeparser.py
 2 | 
 3 | officerparser.py is a python script that parses the format of OLE compound documents used by Microsoft Office applications.
 4 | 
 5 | Some useful features of this script include
 6 |  * macro extraction
 7 |  * embedded file extraction
 8 |  * format analysis
 9 | 
10 | Execute the script with the --help option to view all available options.
11 | 
12 | ## useful options
13 | 
14 | __--extract-macros__
15 | 
16 | This extracts all macro code that can be detected in an office document.  Use the -o or --output-dir option to specify the directory to store the files.
17 |     
18 | 


--------------------------------------------------------------------------------
/officeparser.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | 
   3 | # CHANGELOG:
   4 | # 2023-08-12: - VBA: Added check for optional PROJECTCOMPATVERSION Record (MS OVBA 2.3.4.2.1.2 Revision 11.0)
   5 | #             - VBA: fixed infinite loop if REFERENCE record is corrupt
   6 | 
   7 | import sys
   8 | from struct import unpack
   9 | from optparse import OptionParser
  10 | from cStringIO import StringIO
  11 | import logging
  12 | import re
  13 | import os
  14 | import zipfile
  15 | import tempfile
  16 | 
  17 | OLE_SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
  18 | DIFSECT = 0xFFFFFFFC;
  19 | FATSECT = 0xFFFFFFFD;
  20 | ENDOFCHAIN = 0xFFFFFFFE;
  21 | FREESECT = 0xFFFFFFFF;
  22 | 
  23 | MODULE_EXTENSION = "bas"
  24 | CLASS_EXTENSION = "cls"
  25 | FORM_EXTENSION = "frm"
  26 | 
  27 | BINFILE_NAME = "/vbaProject.bin"
  28 | 
  29 | def fat_value_to_str(value):
  30 |     if value == DIFSECT:
  31 |         return '0xFFFFFFFC (DIF)'
  32 |     elif value == FATSECT:
  33 |         return '0xFFFFFFFD (FAT)'
  34 |     elif value == ENDOFCHAIN:
  35 |         return '0xFFFFFFFE (EOC)'
  36 |     elif value == FREESECT:
  37 |         return '0xFFFFFFFF (FREE)'
  38 |     else:
  39 |         return '{0:08X} (PTR)'.format(value)
  40 | 
  41 | def copytoken_help(difference):
  42 |     from math import ceil, log
  43 |     bit_count = int(ceil(log(difference, 2)))
  44 |     bit_count = max([bit_count, 4])
  45 |     length_mask = 0xFFFF >> bit_count
  46 |     offset_mask = ~length_mask
  47 |     maximum_length = (0xFFFF >> bit_count) + 3
  48 |     return length_mask, offset_mask, bit_count, maximum_length
  49 | 
  50 | def decompress_stream(compressed_container):
  51 |     # MS-OVBA
  52 |     # 2.4.1.2
  53 |     decompressed_container = '' # result
  54 |     compressed_current = 0
  55 |     compressed_chunk_start = None
  56 |     decompressed_chunk_start = None
  57 | 
  58 |     sig_byte = ord(compressed_container[compressed_current])
  59 |     if sig_byte != 0x01:
  60 |         logging.error('invalid signature byte {0:02X}'.format(sig_byte))
  61 |         return None
  62 | 
  63 |     compressed_current += 1
  64 | 
  65 |     while compressed_current < len(compressed_container):
  66 |         # 2.4.1.1.5
  67 |         compressed_chunk_start = compressed_current
  68 |         compressed_chunk_header = unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
  69 |         chunk_size = (compressed_chunk_header & 0x0FFF) + 3
  70 |         #chunk_sign = compressed_chunk_header & 0b0000000000001110
  71 |         chunk_is_compressed = (compressed_chunk_header & 0x8000) >> 15 # 1 == compressed, 0 == uncompressed
  72 | 
  73 |         if chunk_is_compressed != 0 and chunk_size > 4095:
  74 |             logging.warning('CompressedChunkSize > 4095 but CompressedChunkFlag == 1')
  75 |         if chunk_is_compressed == 0 and chunk_size != 4095:
  76 |             logging.warning('CompressedChunkSize != 4095 but CompressedChunkFlag == 0')
  77 |         #if chunk_sign != 0b0110:
  78 |             #logging.warning('invalid CompressedChunkSignature')
  79 | 
  80 |         logging.debug("chunk size = {0}".format(chunk_size))
  81 | 
  82 |         compressed_end = min([len(compressed_container), compressed_current + chunk_size])
  83 |         compressed_current += 2
  84 | 
  85 |         if chunk_is_compressed == 0: # uncompressed
  86 |             decompressed_container += compressed_container[compressed_current:compressed_current + 4096]
  87 |             compressed_current += 4096
  88 |             continue
  89 | 
  90 |         decompressed_chunk_start = len(decompressed_container)
  91 |         while compressed_current < compressed_end:
  92 |             flag_byte = ord(compressed_container[compressed_current])
  93 |             compressed_current += 1
  94 |             for bit_index in xrange(0, 8):
  95 |                 if compressed_current >= compressed_end:
  96 |                     break
  97 |                 if (1 << bit_index) & flag_byte == 0: # LiteralToken
  98 |                     decompressed_container += compressed_container[compressed_current]
  99 |                     compressed_current += 1
 100 |                     continue
 101 | 
 102 |                 #
 103 |                 # copy tokens
 104 |                 #
 105 | 
 106 |                 copy_token = unpack("<H", compressed_container[compressed_current:compressed_current + 2])[0]
 107 |                 length_mask, offset_mask, bit_count, maximum_length = copytoken_help(len(decompressed_container) - decompressed_chunk_start)
 108 |                 length = (copy_token & length_mask) + 3
 109 |                 temp1 = copy_token & offset_mask
 110 |                 temp2 = 16 - bit_count
 111 |                 offset = (temp1 >> temp2) + 1
 112 |                 copy_source = len(decompressed_container) - offset
 113 |                 for index in xrange(copy_source, copy_source + length):
 114 |                     decompressed_container += decompressed_container[index]
 115 |                 compressed_current += 2
 116 | 
 117 |     return decompressed_container
 118 | 
 119 | class ParserOptions:
 120 |     def __init__(
 121 |             self,
 122 |             fail_on_invalid_sig=False):
 123 |         self.fail_on_invalid_sig = fail_on_invalid_sig
 124 | 
 125 | class CompoundBinaryFile:
 126 |     def __init__(self, file, parser_options=None):
 127 |         self.file = file
 128 | 
 129 |         self.f = open(self.file, 'rb')
 130 |         sig = self.f.read(8)
 131 |         is_ole_document = sig == OLE_SIGNATURE
 132 |         if not is_ole_document and parser_options.fail_on_invalid_sig:
 133 |             logging.warning('invalid OLE signature (not an office document?)')
 134 |             sys.exit(1)
 135 |         self.f.close()
 136 | 
 137 |         # if the file is a zipfile, extract the binary part to a tempfile and continue,
 138 |         # otherwise, proceed as if a real binary file.
 139 |         if not is_ole_document and zipfile.is_zipfile(self.file):
 140 |             zfile = zipfile.ZipFile(self.file, "r")
 141 |             for name in zfile.namelist():
 142 |                 if name.endswith(BINFILE_NAME):
 143 |                     data = zfile.read(name)
 144 |                     self.f = tempfile.TemporaryFile()
 145 |                     self.f.write(data)
 146 |                     self.f.seek(0)  # rewind the data file to the beginning
 147 |         else:
 148 |             self.f = open(self.file, 'rb')
 149 | 
 150 |         # load the header
 151 |         self.header = Header(self.f.read(512), parser_options)
 152 |         self.sector_size = 2 ** self.header._uSectorShift
 153 |         self.mini_sector_size = 2 ** self.header._uMiniSectorShift
 154 | 
 155 |         # get a sector count
 156 |         if (os.path.getsize(file) - 512) % self.sector_size != 0:
 157 |             logging.warning("last sector has invalid size")
 158 | 
 159 |         self.sector_count = int((os.path.getsize(file) - 512) / self.sector_size)
 160 |         logging.debug("sector count = {0}".format(self.sector_size))
 161 |         logging.debug('sector size = {0}'.format(self.sector_size))
 162 |         logging.debug('mini sector size = {0}'.format(self.mini_sector_size))
 163 | 
 164 |         # load the sectors marked as FAT
 165 |         self.fat_sectors = []
 166 |         for fat_sect in self.header._sectFat:
 167 |             if fat_sect != FREESECT:
 168 |                 self.fat_sectors.append(fat_sect)
 169 | 
 170 |         # load any DIF sectors
 171 |         sector = self.header._sectDifStart
 172 |         buffer = [sector]
 173 |         # NOTE I've seen this have an initial value of FREESECT -- not sure why
 174 |         while sector != FREESECT and sector != ENDOFCHAIN:
 175 |             data = self.read_sector(sector)
 176 |             dif_values = [x for x in unpack('<{0}L'.format(self.sector_size / 4), data)]
 177 |             # the last entry is actually a pointer to next DIF
 178 |             next = dif_values.pop()
 179 |             for value in dif_values:
 180 |                 if value != FREESECT:
 181 |                     self.fat_sectors.append(value)
 182 |             if next in buffer:
 183 |                 logging.error('infinite loop detected at {0} to {1} starting at DIF'.format(sector, next))
 184 |                 break
 185 |             buffer.append(next)
 186 |             sector = next
 187 | 
 188 |         # load the FAT
 189 |         self.fat = []
 190 |         for fat_sect in self.fat_sectors:
 191 |             data = self.read_sector(fat_sect)
 192 |             if len(data) != self.sector_size:
 193 |                 logging.error('broken FAT (invalid sector size {0} != {1})'.format(len(data), self.sector_size))
 194 |             else:
 195 |                 for value in unpack('<{0}L'.format(self.sector_size / 4), data):
 196 |                     self.fat.append(value)
 197 | 
 198 |         # get the list of directory sectors
 199 |         self.directory = []
 200 |         buffer = self.read_chain(self.header._sectDirStart)
 201 |         directory_index = 0
 202 |         for chunk in unpack("128s" * (len(buffer) / 128), buffer):
 203 |             self.directory.append(Directory(chunk, directory_index))
 204 |             directory_index += 1
 205 | 
 206 |         # load the ministream
 207 |         self.minifat = []
 208 |         if self.directory[0]._sectStart != ENDOFCHAIN:
 209 |             self.ministream = self.read_chain(self.directory[0]._sectStart)
 210 |             #logging.debug("mini stream specified size = {0}".format(self.directory[0]._ulSize))
 211 |             if len(self.ministream) < self.directory[0]._ulSize:
 212 |                 logging.warning("specified size is larger than actual stream length {0}".format(len(self.ministream)))
 213 |             self.ministream = self.ministream[0:self.directory[0]._ulSize]
 214 | 
 215 |             # 2.3 The locations for MiniFat sectors are stored in a standard
 216 |             # chain in the Fat, with the beginning of the chain stored in the
 217 |             # header.
 218 | 
 219 |             data = StringIO(self.read_chain(self.header._sectMiniFatStart))
 220 |             while True:
 221 |                 chunk = data.read(self.sector_size)
 222 |                 if chunk == '':
 223 |                     break
 224 |                 if len(chunk) != self.sector_size:
 225 |                     logging.warning("encountered EOF while parsing minifat")
 226 |                     continue
 227 |                 for value in unpack('<{0}L'.format(self.sector_size / 4), chunk):
 228 |                     self.minifat.append(value)
 229 | 
 230 |     def read_sector(self, sector):
 231 |         if sector >= self.sector_count:
 232 |             logging.warning("reference to invalid sector {0:04X} ({0})".format(sector))
 233 |         self.f.seek(512 + (self.sector_size * sector))
 234 |         return self.f.read(self.sector_size)
 235 | 
 236 |     def read_mini_sector(self, sector):
 237 |         offset = sector * self.mini_sector_size
 238 |         return self.ministream[offset:offset + self.mini_sector_size]
 239 | 
 240 |     def read_fat(self, sector):
 241 |         return self.fat[sector]
 242 | 
 243 |     def read_mini_fat(self, sector):
 244 |         return self.minifat[sector]
 245 | 
 246 |     def __impl_read_chain(self, start, read_sector_f, read_fat_f):
 247 |         """Returns the entire contents of a chain starting at the given sector."""
 248 |         sector = start
 249 |         check = [ sector ] # keep a list of sectors we've already read
 250 |         buffer = StringIO()
 251 |         while sector != ENDOFCHAIN:
 252 |             buffer.write(read_sector_f(sector))
 253 |             next = read_fat_f(sector)
 254 |             if next in check:
 255 |                 logging.error('infinite loop detected at {0} to {1} starting at {2}'.format(
 256 |                     sector, next, sector_start))
 257 |                 return buffer.getvalue()
 258 |             check.append(next)
 259 |             sector = next
 260 |         return buffer.getvalue()
 261 | 
 262 |     def read_mini_chain(self, sector_start):
 263 |         return self.__impl_read_chain(sector_start, self.read_mini_sector, self.read_mini_fat)
 264 | 
 265 |     def read_chain(self, sector_start):
 266 |         return self.__impl_read_chain(sector_start, self.read_sector, self.read_fat)
 267 | 
 268 |     def print_fat_sectors(self):
 269 |         for sector in self.fat_sectors:
 270 |             print '{0:08X}'.format(sector)
 271 | 
 272 |     def get_stream(self, index):
 273 |         d = self.directory[index]
 274 |         if d._ulSize < self.header._ulMiniSectorCutoff:
 275 |             data = self.read_mini_chain(d._sectStart)
 276 |         else:
 277 |             data = self.read_chain(d._sectStart)
 278 |         data = data[0:d._ulSize]
 279 |         return data
 280 | 
 281 |     def find_stream_by_name(self, name):
 282 |         for d in self.directory:
 283 |             if d.name == name:
 284 |                 return d
 285 |         return None
 286 | 
 287 | # TODO newer office documents can have 4096 byte headers
 288 | class Header:
 289 |     def __init__(self, data, parser_options=None):
 290 |         # sanity checks
 291 |         if len(data) < 512:
 292 |             logging.warning('document is less than 512 bytes')
 293 | 
 294 |         self.data = data
 295 |         self.header = unpack("<8s16sHHHHHHLLLLLLLLLL109L", data)
 296 |         self._abSig = self.header[0]
 297 |         self._clid = self.header[1]
 298 |         self._uMinorVersion = self.header[2]
 299 |         self._uDllVersion = self.header[3]
 300 |         self._uByteOrder = self.header[4]
 301 |         self._uSectorShift = self.header[5]
 302 |         self._uMiniSectorShift = self.header[6]
 303 |         self._usReserved = self.header[7]
 304 |         self._usReserved1 = self.header[8]
 305 |         self._usReserved2 = self.header[9]
 306 |         self._csectFat = self.header[10] # number of sects in FAT chain
 307 |         self._sectDirStart = self.header[11] # first sect in Directory chain
 308 |         self._signature = self.header[12]
 309 |         self._ulMiniSectorCutoff = self.header[13]
 310 |         self._sectMiniFatStart = self.header[14] # first mini-FAT sect
 311 |         self._csectMiniFat = self.header[15] # number of sects in mini-FAT
 312 |         self._sectDifStart = self.header[16] # first sect in DIF chain
 313 |         self._csectDif = self.header[17] # number of sets in DIF chain
 314 |         self._sectFat = self.header[18:] # sects of first 109 FAT sectors
 315 | 
 316 |     def pretty_print(self):
 317 |         print """HEADER DUMP
 318 | _abSig              = {0}
 319 | _clid               = {1}
 320 | _uMinorVersion      = {2}
 321 | _uDllVersion        = {3}
 322 | _uByteOrder         = {4}
 323 | _uSectorShift       = {5}
 324 | _uMiniSectorShift   = {6}
 325 | _usReserved         = {7}
 326 | _usReserved1        = {8}
 327 | _usReserved2        = {9}
 328 | _csectFat           = {10}
 329 | _sectDirStart       = {11}
 330 | _signature          = {12}
 331 | _ulMiniSectorCutoff = {13}
 332 | _sectMiniFatStart   = {14}
 333 | _csectMiniFat       = {15}
 334 | _sectDifStart       = {16}
 335 | _csectDif           = {17}""".format(
 336 |         ' '.join(['{0:02X}'.format(ord(x)) for x in self._abSig]),
 337 |         ' '.join(['{0:02X}'.format(ord(x)) for x in self._clid]),
 338 |         '{0:04X}'.format(self._uMinorVersion),
 339 |         '{0}'.format(self._uDllVersion),
 340 |         '{0:04X}'.format(self._uByteOrder),
 341 |         '{0} ({1} bytes)'.format(self._uSectorShift, 2 ** self._uSectorShift),
 342 |         '{0} ({1} bytes)'.format(self._uMiniSectorShift,
 343 |                                  2 ** self._uMiniSectorShift),
 344 |         '{0:04X}'.format(self._usReserved),
 345 |         '{0:08X}'.format(self._usReserved1),
 346 |         '{0:08X}'.format(self._usReserved2),
 347 |         '{0:08X}'.format(self._csectFat),
 348 |         '{0:08X}'.format(self._sectDirStart),
 349 |         '{0:08X}'.format(self._signature),
 350 |         '{0:08X} ({1} bytes)'.format(self._ulMiniSectorCutoff,
 351 |                                      self._ulMiniSectorCutoff),
 352 |         '{0:08X}'.format(self._sectMiniFatStart),
 353 |         '{0:08X}'.format(self._csectMiniFat),
 354 |         '{0:08X}'.format(self._sectDifStart),
 355 |         '{0:08X}'.format(self._csectDif))
 356 | 
 357 |         for fat in self._sectFat:
 358 |             if fat != FREESECT:
 359 |                 print '_sectFat            = {0:08X}'.format(fat)
 360 | 
 361 | STGTY_INVALID = 0
 362 | STGTY_STORAGE = 1
 363 | STGTY_STREAM = 2
 364 | STGTY_LOCKBYTES = 3
 365 | STGTY_PROPERTY = 4
 366 | STGTY_ROOT = 5
 367 | 
 368 | def stgty_to_str(value):
 369 |     if value == STGTY_INVALID:
 370 |         return "STGTY_INVALID"
 371 |     elif value == STGTY_STORAGE:
 372 |         return "STGTY_STORAGE"
 373 |     elif value == STGTY_STREAM:
 374 |         return "STGTY_STREAM"
 375 |     elif value == STGTY_LOCKBYTES:
 376 |         return "STGTY_LOCKBYTES"
 377 |     elif value == STGTY_PROPERTY:
 378 |         return "STGTY_PROPERTY"
 379 |     elif value == STGTY_ROOT:
 380 |         return "STGTY_ROOT"
 381 |     else:
 382 |         return "UNKNOWN VALUE {0}".format(value)
 383 | 
 384 | DE_RED = 0
 385 | DE_BLACK = 1
 386 | 
 387 | def de_to_str(value):
 388 |     if value == DE_RED:
 389 |         return "DE_RED"
 390 |     elif value == DE_BLACK:
 391 |         return "DE_BLACK"
 392 |     else:
 393 |         return "UNKNOWN VALUE {0}".format(value)
 394 | 
 395 | class Directory:
 396 |     def __init__(self, data, index):
 397 |         self.data = data
 398 |         self.index = index
 399 |         self.directory = unpack("<64sHbbLLL16sLQQLLHH", data)
 400 |         self._ab = self.directory[0]
 401 |         self._cb = self.directory[1]
 402 |         # convert wide chars into ASCII
 403 |         self.name = ''.join([x for x in self._ab[0:self._cb] if ord(x) != 0])
 404 |         self._mse = self.directory[2]
 405 |         self._bflags = self.directory[3]
 406 |         self._sidLeftSib = self.directory[4]
 407 |         self._sidRightSib = self.directory[5]
 408 |         self._sidChild = self.directory[6]
 409 |         self._clsId = self.directory[7]
 410 |         self._dwUserFlags = self.directory[8]
 411 |         self._time = [ self.directory[9], self.directory[10] ]
 412 |         self._sectStart = self.directory[11]
 413 |         self._ulSize = self.directory[12]
 414 |         self._dptPropType = self.directory[13]
 415 |         # last two bytes are padding
 416 | 
 417 |     def pretty_print(self):
 418 |         print """
 419 | _ab                 = {0}
 420 | _cb                 = {1}
 421 | _mse                = {2}
 422 | _bflags             = {3}
 423 | _sidLeftSib         = {4}
 424 | _sidRightSib        = {5}
 425 | _sidChild           = {6}
 426 | _clsId              = {7}
 427 | _dwUserFlags        = {8}
 428 | _time[0]            = {9}
 429 | _time[1]            = {10}
 430 | _sectStart          = {11}
 431 | _ulSize             = {12}
 432 | _dptPropType        = {13}""".format(
 433 |         "{0}\n                      {1}".format(self.name,
 434 |         ' '.join(['{0:02X}'.format(ord(x)) for x in self._ab[0:self._cb]])),
 435 |         #unicode(self._ab).encode('us-ascii', 'ignore'),
 436 |         '{0:04X}'.format(self._cb),
 437 |         stgty_to_str(self._mse),
 438 |         de_to_str(self._bflags),
 439 |         '{0:04X}'.format(self._sidLeftSib),
 440 |         '{0:04X}'.format(self._sidRightSib),
 441 |         '{0:04X}'.format(self._sidChild),
 442 |         ' '.join(['{0:02X}'.format(ord(x)) for x in self._clsId]),
 443 |         '{0:04X}'.format(self._dwUserFlags),
 444 |         '{0}'.format(self._time[0]),
 445 |         '{0}'.format(self._time[1]),
 446 |         '{0:08X}'.format(self._sectStart),
 447 |         '{0:08X} ({0} bytes)'.format(self._ulSize),
 448 |         '{0:04X}'.format(self._dptPropType))
 449 | 
 450 | def _main():
 451 | 
 452 |     parser = OptionParser()
 453 | 
 454 |     parser.add_option('-l', '--log-level', dest='log_level',
 455 |             type='string', default='INFO',
 456 |             help='Sets logging level to DEBUG, INFO (default), WARNING or ERROR.')
 457 | 
 458 |     parser.add_option('-x', '--fail-on-invalid-signature', dest='fail_on_invalid_sig',
 459 |             action='store_true', default=False,
 460 |             help='Stop processing if the document is missing the required header signature.')
 461 | 
 462 |     parser.add_option('-H', "--print-header", dest="print_header",
 463 |             action="store_true", default=False,
 464 |             help="Print header section.")
 465 | 
 466 |     parser.add_option('-d', "--print-directory", dest="print_directory",
 467 |             action="store_true", default=False,
 468 |             help="Print directory structure.")
 469 | 
 470 |     parser.add_option('-f', "--print-fat", dest="print_fat",
 471 |             action="store_true", default=False,
 472 |             help="Print FAT structure.")
 473 | 
 474 |     parser.add_option('-m', "--print-mini-fat", dest="print_mini_fat",
 475 |             action="store_true", default=False,
 476 |             help="Print mini-FAT structure.")
 477 | 
 478 |     parser.add_option('-s', '--print-expected-file-size', dest='print_expected_file_size',
 479 |             action='store_true', default=False,
 480 |             help='Print the expected file size based on the number of FAT sectors and sector size.')
 481 | 
 482 |     parser.add_option('-t', "--print-streams", dest="print_streams",
 483 |             action="store_true", default=False,
 484 |             help="Print the index and names of the streams contained in the document.")
 485 | 
 486 |     parser.add_option('-i', "--print-invalid-fat-count", dest="print_invalid_fat_count",
 487 |             action="store_true", default=False,
 488 |             help="Prints the number of invalid FAT entries.")
 489 | 
 490 |     parser.add_option('--create-manifest', dest='create_manifest',
 491 |             action='store_true', default=False,
 492 |             help="Create a manifest file that contains a list of all created files.")
 493 | 
 494 |     parser.add_option('-o', '--output-dir', dest='output_dir',
 495 |             type='string', default='.',
 496 |             help="Directory to store all extracted files.")
 497 | 
 498 |     parser.add_option("--dump-sector", dest="dump_sector",
 499 |             type="int", default=None,
 500 |             help="Dump the contents of the given sector.")
 501 | 
 502 |     parser.add_option('--dump-stream', dest='dump_stream',
 503 |             type='int', default=None,
 504 |             help="Dump the contents of the given stream identified by directory index.")
 505 | 
 506 |     parser.add_option('--dump-stream-by-name', dest='dump_stream_by_name',
 507 |             type='string', default=None,
 508 |             help="Dump the contents of the given stream identified by name.")
 509 | 
 510 |     parser.add_option('--dump-ministream', dest='dump_ministream',
 511 |             action='store_true', default=False,
 512 |             help='Dump the entire contents of the ministream to standard output.')
 513 | 
 514 |     parser.add_option('--extract-streams', dest='extract_streams',
 515 |             action='store_true', default=False,
 516 |             help='Store all streams as the specified files. The string {0} in the file name is replaced with the directory index.')
 517 | 
 518 |     parser.add_option('--extract-ole-streams', dest='extract_ole_streams',
 519 |             action='store_true', default=False,
 520 |             help="Extract all Ole10Native streams.")
 521 | 
 522 |     parser.add_option('--extract-macros', dest='extract_macros',
 523 |             action='store_true', default=False,
 524 |             help='Extract all macros into .vbs files.')
 525 | 
 526 |     parser.add_option('--extract-unknown-sectors', dest='extract_unknown_sectors',
 527 |             action='store_true', default=False,
 528 |             help='Extract any sectors that are not represented in the FAT to unknown_sectors.dat.')
 529 | 
 530 |     parser.add_option('--check-stream-continuity', dest='check_stream_cont',
 531 |             action='store_true', default=False,
 532 |             help='Checks that sectors beloning to FAT chains are stored in sequential order.')
 533 | 
 534 |     parser.add_option('--check-fat', dest='check_fat',
 535 |             action='store_true', default=False,
 536 |             help='Checks for FAT values that point to sectors that do not exist.')
 537 | 
 538 |     parser.add_option('--check-orphaned-chains', dest='check_orphaned_chains',
 539 |             action='store_true', default=False,
 540 |             help='Checks for chains that are not accesible from any directory entry.')
 541 | 
 542 |     (options, args) = parser.parse_args()
 543 | 
 544 |     logging.basicConfig(format='%(levelname)s: %(message)s',
 545 |         level=logging.__dict__[options.log_level])
 546 | 
 547 |     parser_options = ParserOptions(
 548 |             fail_on_invalid_sig=options.fail_on_invalid_sig)
 549 | 
 550 |     ofdoc = CompoundBinaryFile(args[0], parser_options)
 551 | 
 552 |     if options.create_manifest:
 553 |         manifest = open(os.path.join(options.output_dir, 'manifest'), 'wb')
 554 | 
 555 |     #
 556 |     # print options
 557 |     #
 558 |     if options.print_header:
 559 |         ofdoc.header.pretty_print()
 560 | 
 561 |     if options.print_directory:
 562 |         for x in xrange(0, len(ofdoc.directory)):
 563 |             print "Directory Index {0:08X} ({0})".format(x)
 564 |             ofdoc.directory[x].pretty_print()
 565 |             print
 566 | 
 567 |     if options.print_fat:
 568 |         for sector in xrange(0, len(ofdoc.fat)):
 569 |             print '{0:08X}: {1}'.format(sector, fat_value_to_str(ofdoc.fat[sector]))
 570 | 
 571 |     if options.print_mini_fat:
 572 |         for sector in xrange(0, len(ofdoc.minifat)):
 573 |             print '{0:08X}: {1}'.format(sector, fat_value_to_str(ofdoc.minifat[sector]))
 574 | 
 575 |     if options.print_streams:
 576 |         for d in ofdoc.directory:
 577 |             if d._mse == STGTY_STREAM:
 578 |                 print '{0}: {1}'.format(d.index, d.name)
 579 | 
 580 |     if options.print_expected_file_size:
 581 |         expected_file_size = (len([x for x in ofdoc.fat if x != FREESECT]) * ofdoc.sector_size) + 512
 582 |         actual_file_size = os.path.getsize(args[0])
 583 |         size_diff = abs(expected_file_size - actual_file_size)
 584 |         percent_diff = (float(size_diff) / float(expected_file_size)) * 100.0
 585 | 
 586 |         print "expected file size {0} actual {1} difference {2} ({3:0.2f}%)".format(
 587 |             expected_file_size, actual_file_size, size_diff, percent_diff)
 588 | 
 589 |     #
 590 |     # analysis options
 591 |     #
 592 |     if options.check_stream_cont:
 593 |         for d in ofdoc.directory[1:]:
 594 |             if d._mse == STGTY_INVALID:
 595 |                 continue
 596 |             # ignore streams in the ministream
 597 |             if d._ulSize < ofdoc.header._ulMiniSectorCutoff:
 598 |                 continue
 599 | 
 600 |             d.pretty_print()
 601 |             if d._sectStart != ENDOFCHAIN:
 602 |                 current = d._sectStart
 603 |                 while True:
 604 |                     next = ofdoc.read_fat(current)
 605 |                     logging.debug("next = {0:08X}".format(next))
 606 |                     if next == ENDOFCHAIN:
 607 |                         break
 608 |                     if next - current != 1:
 609 |                         logging.warning('directory index {0} non-continuous at sector {1:08X} to {2:08X}'.format(
 610 |                             d.index, current, next))
 611 |                     current = next
 612 | 
 613 |     invalid_fat_sectors = 0
 614 |     if options.check_fat or options.print_invalid_fat_count:
 615 |         for value in ofdoc.fat_sectors:
 616 |             if value > ofdoc.sector_count:
 617 |                 invalid_fat_sectors += 1
 618 |                 if options.check_fat:
 619 |                     logging.warning('invalid FAT sector reference {0:08X}'.format(value))
 620 | 
 621 |     if options.print_invalid_fat_count:
 622 |         print "invalid FAT sector references: {0}".format(invalid_fat_sectors)
 623 | 
 624 |     invalid_fat_entries = 0
 625 |     if options.check_fat or options.print_invalid_fat_count:
 626 |         for value in xrange(0, len(ofdoc.fat)):
 627 |             ptr = ofdoc.read_fat(value)
 628 |             if ptr == DIFSECT or ptr == FATSECT or ptr == ENDOFCHAIN or ptr == FREESECT:
 629 |                 continue
 630 |             if ptr > len(ofdoc.fat):
 631 |                 invalid_fat_entries += 1
 632 |                 if options.check_fat:
 633 |                     logging.warning('invalid FAT sector {0:08X} value {1:08X}'.format(value, ptr))
 634 | 
 635 |     if options.print_invalid_fat_count:
 636 |         print "invalid FAT entries: {0}".format(invalid_fat_entries)
 637 | 
 638 |     if options.check_orphaned_chains:
 639 |         buffer = [False for fat in ofdoc.fat]
 640 |         # directory sectors
 641 |         index = ofdoc.header._sectDirStart
 642 |         while index != ENDOFCHAIN:
 643 |             buffer[index] = True
 644 |             index = ofdoc.read_fat(index)
 645 |         # minifat sectors
 646 |         index = ofdoc.header._sectMiniFatStart
 647 |         while index != ENDOFCHAIN:
 648 |             buffer[index] = True
 649 |             index = ofdoc.read_fat(index)
 650 |         # fat sectors specified in the header
 651 |         for index in ofdoc.header._sectFat:
 652 |             if index != FREESECT:
 653 |                 buffer[index] = True
 654 |         # stream sectors
 655 |         for d in ofdoc.directory:
 656 |             if d._mse == STGTY_INVALID:
 657 |                 continue
 658 |             # ignore streams in the ministream
 659 |             if d.index > 0 and d._ulSize < ofdoc.header._ulMiniSectorCutoff:
 660 |                 continue
 661 | 
 662 |             index = d._sectStart
 663 |             while index != ENDOFCHAIN:
 664 |                 #logging.debug('checking index {0:08X}'.format(index))
 665 |                 buffer[index] = True
 666 |                 index = ofdoc.read_fat(index)
 667 | 
 668 |         for index in xrange(0, len(buffer)):
 669 |             #logging.debug('{0:08X} {1} {2}'.format(index, buffer[index], fat_value_to_str(ofdoc.read_fat(index))))
 670 |             if ofdoc.read_fat(index) == FREESECT and buffer[index] == True:
 671 |                 logging.warning('FREESECT is marked as used')
 672 |             if ofdoc.read_fat(index) != FREESECT and buffer[index] == False:
 673 |                 logging.warning('non-FREESECT is not used')
 674 | 
 675 |     #
 676 |     # dump options
 677 |     #
 678 |     if options.dump_sector:
 679 |         sys.stdout.write(ofdoc.read_sector(options.dump_sector))
 680 |         sys.exit(0)
 681 | 
 682 |     if options.dump_ministream:
 683 |         sys.stdout.write(ofdoc.ministream)
 684 |         sys.exit(0)
 685 | 
 686 |     if options.dump_stream:
 687 |         sys.stdout.write(ofdoc.get_stream(options.dump_stream))
 688 |         sys.exit(0)
 689 | 
 690 |     if options.dump_stream_by_name:
 691 |         d = ofdoc.find_stream_by_name(options.dump_stream_by_name)
 692 |         sys.stdout.write(ofdoc.get_stream(d.index))
 693 |         sys.exit(0)
 694 | 
 695 |     #
 696 |     # extraction options
 697 |     #
 698 |     if options.extract_ole_streams:
 699 |         for d in ofdoc.directory:
 700 |             if d.name == "\x01Ole10Native":
 701 |                 data = ofdoc.get_stream(d.index)
 702 |                 size = unpack('<L', data[0:4])[0]
 703 |                 data = data[4:]
 704 |                 logging.debug('size = {0:08X} ({0} bytes)'.format(size))
 705 | 
 706 |                 # TODO
 707 |                 # haven't found the specs for this yet
 708 |                 #
 709 | 
 710 |                 unknown_short = None
 711 |                 filename = []
 712 |                 src_path = []
 713 |                 dst_path = []
 714 |                 actual_size = None
 715 |                 unknown_long_1 = None
 716 |                 unknown_long_2 = None
 717 | 
 718 |                 # I thought this might be an OLE type specifier ???
 719 |                 unknown_short = unpack('<H', data[0:2])[0]
 720 |                 data = data[2:]
 721 | 
 722 |                 # filename
 723 |                 i = 0
 724 |                 while i < len(data):
 725 |                     if ord(data[i]) == 0:
 726 |                         break
 727 |                     filename.append(data[i])
 728 |                     i += 1
 729 |                 filename = ''.join(filename)
 730 |                 data = data[i + 1:]
 731 | 
 732 |                 # source path
 733 |                 i = 0
 734 |                 while i < len(data):
 735 |                     if ord(data[i]) == 0:
 736 |                         break
 737 |                     src_path.append(data[i])
 738 |                     i += 1
 739 |                 src_path = ''.join(src_path)
 740 |                 data = data[i + 1:]
 741 | 
 742 |                 # TODO I bet these next 8 bytes are a timestamp
 743 |                 unknown_long_1 = unpack('<L', data[0:4])[0]
 744 |                 data = data[4:]
 745 | 
 746 |                 unknown_long_2 = unpack('<L', data[0:4])[0]
 747 |                 data = data[4:]
 748 | 
 749 |                 # destination path? (interesting that it has my name in there)
 750 |                 i = 0
 751 |                 while i < len(data):
 752 |                     if ord(data[i]) == 0:
 753 |                         break
 754 |                     dst_path.append(data[i])
 755 |                     i += 1
 756 |                 dst_path = ''.join(dst_path)
 757 |                 data = data[i + 1:]
 758 | 
 759 |                 # size of the rest of the data
 760 |                 actual_size = unpack('<L', data[0:4])[0]
 761 |                 data = data[4:]
 762 | 
 763 |                 logging.debug('unknown_short = {0:04X}'.format(unknown_short))
 764 |                 logging.debug('file = {0}'.format(filename))
 765 |                 logging.debug('src = {0}'.format(src_path))
 766 |                 logging.debug('unknown_long_1 = {0:08X}'.format(unknown_long_1))
 767 |                 logging.debug('unknown_long_2 = {0:08X}'.format(unknown_long_2))
 768 |                 logging.debug('dst = {0}'.format(dst_path))
 769 |                 logging.debug('actual size = {0}'.format(actual_size))
 770 | 
 771 |                 filename = os.path.join(options.output_dir, filename)
 772 |                 f = open(filename, 'wb')
 773 |                 f.write(data[0:actual_size])
 774 |                 f.close()
 775 | 
 776 |                 if options.create_manifest:
 777 |                     manifest.write(os.path.basename(filename))
 778 |                     manifest.write("\n")
 779 | 
 780 |                 logging.info('created file {0}'.format(filename))
 781 | 
 782 |     if options.extract_streams:
 783 |         for d in ofdoc.directory:
 784 |             if d._mse == STGTY_STREAM:
 785 |                 i = 0
 786 |                 while True:
 787 |                     filename = os.path.join(options.output_dir, 'stream_{0}_{1}.dat'.format(d.index, i))
 788 |                     if not os.path.exists(filename):
 789 |                         break
 790 |                     i += 1
 791 |                 f = open(filename, 'wb')
 792 |                 f.write(ofdoc.get_stream(d.index))
 793 |                 f.close()
 794 |                 if options.create_manifest:
 795 |                     manifest.write(os.path.basename(filename))
 796 |                     manifest.write("\n")
 797 |                 logging.debug("created file {0}".format(filename))
 798 | 
 799 |     while options.extract_macros:
 800 |         # this stream has to exist for macros
 801 |         project = ofdoc.find_stream_by_name('PROJECT')
 802 |         if project is None:
 803 |             logging.debug('missing PROJECT stream')
 804 |             break
 805 | 
 806 |         # parse PROJECT
 807 |         buffer = StringIO()
 808 |         buffer.write(ofdoc.get_stream(project.index))
 809 |         buffer.seek(0)
 810 |         re_keyval = re.compile(r'^([^=]+)=(.*)$')
 811 | 
 812 |         code_modules = {}
 813 |         while True:
 814 |             line = buffer.readline()
 815 |             if len(line) < 1:
 816 |                 break
 817 | 
 818 |             line = line.strip()
 819 |             if len(line) < 1:
 820 |                 continue
 821 | 
 822 |             # is this a section header?
 823 |             if line[0] == '[':
 824 |                 header = line[1:len(line) - 1]
 825 |                 continue
 826 | 
 827 |             m = re_keyval.match(line)
 828 |             if m == None:
 829 |                 logging.warning('invalid or unknown PROJECT property line')
 830 |                 logging.warning(line)
 831 |                 continue
 832 | 
 833 |             # looking for code modules
 834 |             # add the code module as a key in the dictionary
 835 |             # the value will be the extension needed later
 836 |             if m.group(1) == 'Document' or m.group(1) == 'DocClass':
 837 |                 code_modules[m.group(2).split("\x2F")[0]] = CLASS_EXTENSION
 838 |             elif m.group(1) == 'Module':
 839 |                 code_modules[m.group(2)] = MODULE_EXTENSION
 840 |             elif m.group(1) == 'Class':
 841 |                 code_modules[m.group(2)] = CLASS_EXTENSION
 842 |             elif m.group(1) == 'BaseClass':
 843 |                 code_modules[m.group(2)] = FORM_EXTENSION
 844 | 
 845 |         # this stream has to exist as well
 846 |         dir_stream = ofdoc.find_stream_by_name('dir')
 847 |         if dir_stream is None:
 848 |             logging.debug('missing dir stream')
 849 |             break
 850 | 
 851 |         def check_value(name, expected, value):
 852 |             if expected != value:
 853 |                 logging.error("invalid value for {0} expected {1:04X} got {2:04X}".format(name, expected, value))
 854 | 
 855 |         dir_stream = StringIO(decompress_stream(ofdoc.get_stream(dir_stream.index)))
 856 | 
 857 |         # PROJECTSYSKIND Record
 858 |         PROJECTSYSKIND_Id = unpack("<H", dir_stream.read(2))[0]
 859 |         check_value('PROJECTSYSKIND_Id', 0x0001, PROJECTSYSKIND_Id)
 860 |         PROJECTSYSKIND_Size = unpack("<L", dir_stream.read(4))[0]
 861 |         check_value('PROJECTSYSKIND_Size', 0x0004, PROJECTSYSKIND_Size)
 862 |         PROJECTSYSKIND_SysKind = unpack("<L", dir_stream.read(4))[0]
 863 |         if PROJECTSYSKIND_SysKind == 0x00:
 864 |             logging.debug("16-bit Windows")
 865 |         elif PROJECTSYSKIND_SysKind == 0x01:
 866 |             logging.debug("32-bit Windows")
 867 |         elif PROJECTSYSKIND_SysKind == 0x02:
 868 |             logging.debug("Macintosh")
 869 |         elif PROJECTSYSKIND_SysKind == 0x03:
 870 |             logging.debug("64-bit Windows")
 871 |         else:
 872 |             logging.error("invalid PROJECTSYSKIND_SysKind {0:04X}".format(PROJECTSYSKIND_SysKind))
 873 | 
 874 |         # Optional PROJECTCOMPATVERSION Record (MS OVBA 2.3.4.2.1.2 Revision 11.0 8/17/2021)
 875 |         record_check = unpack("<H", dir_stream.read(2))[0]
 876 |         if record_check == 0x004A:
 877 |             PROJECTCOMPATVERSION_Id = record_check
 878 |             logging.debug("Document conforms to MS OVBA Revision 11.0 (8/17/2021)")
 879 |             PROJECTCOMPATVERSION_Size = unpack("<L", dir_stream.read(4))[0]
 880 |             check_value('PROJECTCOMPATVERSION_Size', 0x0004, PROJECTCOMPATVERSION_Size)
 881 |             PROJECTCOMPATVERSION_CompatVersion = unpack("<L", dir_stream.read(4))[0]
 882 |         else record_check == 0x0002:
 883 |             # PROJECTLCID Record
 884 |             PROJECTLCID_Id = record_check
 885 |             check_value('PROJECTLCID_Id', 0x0002, PROJECTLCID_Id)
 886 |             PROJECTLCID_Size = unpack("<L", dir_stream.read(4))[0]
 887 |             check_value('PROJECTLCID_Size', 0x0004, PROJECTLCID_Size)
 888 |             PROJECTLCID_Lcid = unpack("<L", dir_stream.read(4))[0]
 889 |             check_value('PROJECTLCID_Lcid', 0x409, PROJECTLCID_Lcid)
 890 | 
 891 |         # PROJECTLCIDINVOKE Record
 892 |         PROJECTLCIDINVOKE_Id = unpack("<H", dir_stream.read(2))[0]
 893 |         check_value('PROJECTLCIDINVOKE_Id', 0x0014, PROJECTLCIDINVOKE_Id)
 894 |         PROJECTLCIDINVOKE_Size = unpack("<L", dir_stream.read(4))[0]
 895 |         check_value('PROJECTLCIDINVOKE_Size', 0x0004, PROJECTLCIDINVOKE_Size)
 896 |         PROJECTLCIDINVOKE_LcidInvoke = unpack("<L", dir_stream.read(4))[0]
 897 |         check_value('PROJECTLCIDINVOKE_LcidInvoke', 0x409, PROJECTLCIDINVOKE_LcidInvoke)
 898 | 
 899 |         # PROJECTCODEPAGE Record
 900 |         PROJECTCODEPAGE_Id = unpack("<H", dir_stream.read(2))[0]
 901 |         check_value('PROJECTCODEPAGE_Id', 0x0003, PROJECTCODEPAGE_Id)
 902 |         PROJECTCODEPAGE_Size = unpack("<L", dir_stream.read(4))[0]
 903 |         check_value('PROJECTCODEPAGE_Size', 0x0002, PROJECTCODEPAGE_Size)
 904 |         PROJECTCODEPAGE_CodePage = unpack("<H", dir_stream.read(2))[0]
 905 | 
 906 |         # PROJECTNAME Record
 907 |         PROJECTNAME_Id = unpack("<H", dir_stream.read(2))[0]
 908 |         check_value('PROJECTNAME_Id', 0x0004, PROJECTNAME_Id)
 909 |         PROJECTNAME_SizeOfProjectName = unpack("<L", dir_stream.read(4))[0]
 910 |         if PROJECTNAME_SizeOfProjectName < 1 or PROJECTNAME_SizeOfProjectName > 128:
 911 |             logging.error("PROJECTNAME_SizeOfProjectName value not in range: {0}".format(PROJECTNAME_SizeOfProjectName))
 912 |         PROJECTNAME_ProjectName = dir_stream.read(PROJECTNAME_SizeOfProjectName)
 913 | 
 914 |         # PROJECTDOCSTRING Record
 915 |         PROJECTDOCSTRING_Id = unpack("<H", dir_stream.read(2))[0]
 916 |         check_value('PROJECTDOCSTRING_Id', 0x0005, PROJECTDOCSTRING_Id)
 917 |         PROJECTDOCSTRING_SizeOfDocString = unpack("<L", dir_stream.read(4))[0]
 918 |         if PROJECTDOCSTRING_SizeOfDocString > 2000:
 919 |             logging.error("PROJECTDOCSTRING_SizeOfDocString value not in range: {0}".format(PROJECTDOCSTRING_SizeOfDocString))
 920 |         PROJECTDOCSTRING_DocString = dir_stream.read(PROJECTDOCSTRING_SizeOfDocString)
 921 |         PROJECTDOCSTRING_Reserved = unpack("<H", dir_stream.read(2))[0]
 922 |         check_value('PROJECTDOCSTRING_Reserved', 0x0040, PROJECTDOCSTRING_Reserved)
 923 |         PROJECTDOCSTRING_SizeOfDocStringUnicode = unpack("<L", dir_stream.read(4))[0]
 924 |         if PROJECTDOCSTRING_SizeOfDocStringUnicode % 2 != 0:
 925 |             logging.error("PROJECTDOCSTRING_SizeOfDocStringUnicode is not even")
 926 |         PROJECTDOCSTRING_DocStringUnicode = dir_stream.read(PROJECTDOCSTRING_SizeOfDocStringUnicode)
 927 | 
 928 |         # PROJECTHELPFILEPATH Record - MS-OVBA 2.3.4.2.1.7
 929 |         PROJECTHELPFILEPATH_Id = unpack("<H", dir_stream.read(2))[0]
 930 |         check_value('PROJECTHELPFILEPATH_Id', 0x0006, PROJECTHELPFILEPATH_Id)
 931 |         PROJECTHELPFILEPATH_SizeOfHelpFile1 = unpack("<L", dir_stream.read(4))[0]
 932 |         if PROJECTHELPFILEPATH_SizeOfHelpFile1 > 260:
 933 |             logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 value not in range: {0}".format(PROJECTHELPFILEPATH_SizeOfHelpFile1))
 934 |         PROJECTHELPFILEPATH_HelpFile1 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile1)
 935 |         PROJECTHELPFILEPATH_Reserved = unpack("<H", dir_stream.read(2))[0]
 936 |         check_value('PROJECTHELPFILEPATH_Reserved', 0x003D, PROJECTHELPFILEPATH_Reserved)
 937 |         PROJECTHELPFILEPATH_SizeOfHelpFile2 = unpack("<L", dir_stream.read(4))[0]
 938 |         if PROJECTHELPFILEPATH_SizeOfHelpFile2 != PROJECTHELPFILEPATH_SizeOfHelpFile1:
 939 |             logging.error("PROJECTHELPFILEPATH_SizeOfHelpFile1 does not equal PROJECTHELPFILEPATH_SizeOfHelpFile2")
 940 |         PROJECTHELPFILEPATH_HelpFile2 = dir_stream.read(PROJECTHELPFILEPATH_SizeOfHelpFile2)
 941 |         if PROJECTHELPFILEPATH_HelpFile2 != PROJECTHELPFILEPATH_HelpFile1:
 942 |             logging.error("PROJECTHELPFILEPATH_HelpFile1 does not equal PROJECTHELPFILEPATH_HelpFile2")
 943 | 
 944 |         # PROJECTHELPCONTEXT Record
 945 |         PROJECTHELPCONTEXT_Id = unpack("<H", dir_stream.read(2))[0]
 946 |         check_value('PROJECTHELPCONTEXT_Id', 0x0007, PROJECTHELPCONTEXT_Id)
 947 |         PROJECTHELPCONTEXT_Size = unpack("<L", dir_stream.read(4))[0]
 948 |         check_value('PROJECTHELPCONTEXT_Size', 0x0004, PROJECTHELPCONTEXT_Size)
 949 |         PROJECTHELPCONTEXT_HelpContext = unpack("<L", dir_stream.read(4))[0]
 950 | 
 951 |         # PROJECTLIBFLAGS Record
 952 |         PROJECTLIBFLAGS_Id = unpack("<H", dir_stream.read(2))[0]
 953 |         check_value('PROJECTLIBFLAGS_Id', 0x0008, PROJECTLIBFLAGS_Id)
 954 |         PROJECTLIBFLAGS_Size = unpack("<L", dir_stream.read(4))[0]
 955 |         check_value('PROJECTLIBFLAGS_Size', 0x0004, PROJECTLIBFLAGS_Size)
 956 |         PROJECTLIBFLAGS_ProjectLibFlags = unpack("<L", dir_stream.read(4))[0]
 957 |         check_value('PROJECTLIBFLAGS_ProjectLibFlags', 0x0000, PROJECTLIBFLAGS_ProjectLibFlags)
 958 | 
 959 |         # PROJECTVERSION Record
 960 |         PROJECTVERSION_Id = unpack("<H", dir_stream.read(2))[0]
 961 |         check_value('PROJECTVERSION_Id', 0x0009, PROJECTVERSION_Id)
 962 |         PROJECTVERSION_Reserved = unpack("<L", dir_stream.read(4))[0]
 963 |         check_value('PROJECTVERSION_Reserved', 0x0004, PROJECTVERSION_Reserved)
 964 |         PROJECTVERSION_VersionMajor = unpack("<L", dir_stream.read(4))[0]
 965 |         PROJECTVERSION_VersionMinor = unpack("<H", dir_stream.read(2))[0]
 966 | 
 967 |         # PROJECTCONSTANTS Record
 968 |         PROJECTCONSTANTS_Id = unpack("<H", dir_stream.read(2))[0]
 969 |         check_value('PROJECTCONSTANTS_Id', 0x000C, PROJECTCONSTANTS_Id)
 970 |         PROJECTCONSTANTS_SizeOfConstants = unpack("<L", dir_stream.read(4))[0]
 971 |         if PROJECTCONSTANTS_SizeOfConstants > 1015:
 972 |             logging.error("PROJECTCONSTANTS_SizeOfConstants value not in range: {0}".format(PROJECTCONSTANTS_SizeOfConstants))
 973 |         PROJECTCONSTANTS_Constants = dir_stream.read(PROJECTCONSTANTS_SizeOfConstants)
 974 |         PROJECTCONSTANTS_Reserved = unpack("<H", dir_stream.read(2))[0]
 975 |         check_value('PROJECTCONSTANTS_Reserved', 0x003C, PROJECTCONSTANTS_Reserved)
 976 |         PROJECTCONSTANTS_SizeOfConstantsUnicode = unpack("<L", dir_stream.read(4))[0]
 977 |         if PROJECTCONSTANTS_SizeOfConstantsUnicode % 2 != 0:
 978 |             logging.error("PROJECTCONSTANTS_SizeOfConstantsUnicode is not even")
 979 |         PROJECTCONSTANTS_ConstantsUnicode = dir_stream.read(PROJECTCONSTANTS_SizeOfConstantsUnicode)
 980 | 
 981 |         # array of REFERENCE records
 982 |         check = None
 983 |         while True:
 984 |             check = unpack("<H", dir_stream.read(2))[0]
 985 |             logging.debug("reference type = {0:04X}".format(check))
 986 |             if check == 0x000F:
 987 |                 break
 988 | 
 989 |             # Prevent infinite loop
 990 |             if check not in [ 0x0016, 0x0033, 0x002F, 0x000D, 0x000E]:
 991 |                 logging.error('invalid or unknown check Id {0:04X}'.format(check))
 992 |                 sys.exit(0)
 993 | 
 994 |             if check == 0x0016:
 995 |                 # REFERENCENAME
 996 |                 REFERENCE_Id = check
 997 |                 REFERENCE_SizeOfName = unpack("<L", dir_stream.read(4))[0]
 998 |                 REFERENCE_Name = dir_stream.read(REFERENCE_SizeOfName)
 999 |                 REFERENCE_Reserved = unpack("<H", dir_stream.read(2))[0]
1000 |                 check_value('REFERENCE_Reserved', 0x003E, REFERENCE_Reserved)
1001 |                 REFERENCE_SizeOfNameUnicode = unpack("<L", dir_stream.read(4))[0]
1002 |                 REFERENCE_NameUnicode = dir_stream.read(REFERENCE_SizeOfNameUnicode)
1003 |                 continue
1004 | 
1005 |             if check == 0x0033:
1006 |                 # REFERENCEORIGINAL (followed by REFERENCECONTROL)
1007 |                 REFERENCEORIGINAL_Id = check
1008 |                 REFERENCEORIGINAL_SizeOfLibidOriginal = unpack("<L", dir_stream.read(4))[0]
1009 |                 REFERENCEORIGINAL_LibidOriginal = dir_stream.read(REFERENCEORIGINAL_SizeOfLibidOriginal)
1010 |                 continue
1011 | 
1012 |             if check == 0x002F:
1013 |                 # REFERENCECONTROL
1014 |                 REFERENCECONTROL_Id = check
1015 |                 REFERENCECONTROL_SizeTwiddled = unpack("<L", dir_stream.read(4))[0] # ignore
1016 |                 REFERENCECONTROL_SizeOfLibidTwiddled = unpack("<L", dir_stream.read(4))[0]
1017 |                 REFERENCECONTROL_LibidTwiddled = dir_stream.read(REFERENCECONTROL_SizeOfLibidTwiddled)
1018 |                 REFERENCECONTROL_Reserved1 = unpack("<L", dir_stream.read(4))[0] # ignore
1019 |                 check_value('REFERENCECONTROL_Reserved1', 0x0000, REFERENCECONTROL_Reserved1)
1020 |                 REFERENCECONTROL_Reserved2 = unpack("<H", dir_stream.read(2))[0] # ignore
1021 |                 check_value('REFERENCECONTROL_Reserved2', 0x0000, REFERENCECONTROL_Reserved2)
1022 |                 # optional field
1023 |                 check2 = unpack("<H", dir_stream.read(2))[0]
1024 |                 if check2 == 0x0016:
1025 |                     REFERENCECONTROL_NameRecordExtended_Id = check
1026 |                     REFERENCECONTROL_NameRecordExtended_SizeofName = unpack("<L", dir_stream.read(4))[0]
1027 |                     REFERENCECONTROL_NameRecordExtended_Name = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeofName)
1028 |                     REFERENCECONTROL_NameRecordExtended_Reserved = unpack("<H", dir_stream.read(2))[0]
1029 |                     check_value('REFERENCECONTROL_NameRecordExtended_Reserved', 0x003E, REFERENCECONTROL_NameRecordExtended_Reserved)
1030 |                     REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode = unpack("<L", dir_stream.read(4))[0]
1031 |                     REFERENCECONTROL_NameRecordExtended_NameUnicode = dir_stream.read(REFERENCECONTROL_NameRecordExtended_SizeOfNameUnicode)
1032 |                     REFERENCECONTROL_Reserved3 = unpack("<H", dir_stream.read(2))[0]
1033 |                 else:
1034 |                     REFERENCECONTROL_Reserved3 = check2
1035 | 
1036 |                 check_value('REFERENCECONTROL_Reserved3', 0x0030, REFERENCECONTROL_Reserved3)
1037 |                 REFERENCECONTROL_SizeExtended = unpack("<L", dir_stream.read(4))[0]
1038 |                 REFERENCECONTROL_SizeOfLibidExtended = unpack("<L", dir_stream.read(4))[0]
1039 |                 REFERENCECONTROL_LibidExtended = dir_stream.read(REFERENCECONTROL_SizeOfLibidExtended)
1040 |                 REFERENCECONTROL_Reserved4 = unpack("<L", dir_stream.read(4))[0]
1041 |                 REFERENCECONTROL_Reserved5 = unpack("<H", dir_stream.read(2))[0]
1042 |                 REFERENCECONTROL_OriginalTypeLib = dir_stream.read(16)
1043 |                 REFERENCECONTROL_Cookie = unpack("<L", dir_stream.read(4))[0]
1044 |                 continue
1045 | 
1046 |             if check == 0x000D:
1047 |                 # REFERENCEREGISTERED
1048 |                 REFERENCEREGISTERED_Id = check
1049 |                 REFERENCEREGISTERED_Size = unpack("<L", dir_stream.read(4))[0]
1050 |                 REFERENCEREGISTERED_SizeOfLibid = unpack("<L", dir_stream.read(4))[0]
1051 |                 REFERENCEREGISTERED_Libid = dir_stream.read(REFERENCEREGISTERED_SizeOfLibid)
1052 |                 REFERENCEREGISTERED_Reserved1 = unpack("<L", dir_stream.read(4))[0]
1053 |                 check_value('REFERENCEREGISTERED_Reserved1', 0x0000, REFERENCEREGISTERED_Reserved1)
1054 |                 REFERENCEREGISTERED_Reserved2 = unpack("<H", dir_stream.read(2))[0]
1055 |                 check_value('REFERENCEREGISTERED_Reserved2', 0x0000, REFERENCEREGISTERED_Reserved2)
1056 |                 continue
1057 | 
1058 |             if check == 0x000E:
1059 |                 # REFERENCEPROJECT
1060 |                 REFERENCEPROJECT_Id = check
1061 |                 REFERENCEPROJECT_Size = unpack("<L", dir_stream.read(4))[0]
1062 |                 REFERENCEPROJECT_SizeOfLibidAbsolute = unpack("<L", dir_stream.read(4))[0]
1063 |                 REFERENCEPROJECT_LibidAbsolute = dir_stream.read(REFERENCEPROJECT_SizeOfLibidAbsolute)
1064 |                 REFERENCEPROJECT_SizeOfLibidRelative = unpack("<L", dir_stream.read(4))[0]
1065 |                 REFERENCEPROJECT_LibidRelative = dir_stream.read(REFERENCEPROJECT_SizeOfLibidRelative)
1066 |                 REFERENCEPROJECT_MajorVersion = unpack("<L", dir_stream.read(4))[0]
1067 |                 REFERENCEPROJECT_MinorVersion = unpack("<H", dir_stream.read(2))[0]
1068 |                 continue
1069 | 
1070 |             logging.error('invalid or unknown check Id {0:04X}'.format(check))
1071 |             sys.exit(0)
1072 | 
1073 |         PROJECTMODULES_Id = check #unpack("<H", dir_stream.read(2))[0]
1074 |         check_value('PROJECTMODULES_Id', 0x000F, PROJECTMODULES_Id)
1075 |         PROJECTMODULES_Size = unpack("<L", dir_stream.read(4))[0]
1076 |         check_value('PROJECTMODULES_Size', 0x0002, PROJECTMODULES_Size)
1077 |         PROJECTMODULES_Count = unpack("<H", dir_stream.read(2))[0]
1078 |         PROJECTMODULES_ProjectCookieRecord_Id = unpack("<H", dir_stream.read(2))[0]
1079 |         check_value('PROJECTMODULES_ProjectCookieRecord_Id', 0x0013, PROJECTMODULES_ProjectCookieRecord_Id)
1080 |         PROJECTMODULES_ProjectCookieRecord_Size = unpack("<L", dir_stream.read(4))[0]
1081 |         check_value('PROJECTMODULES_ProjectCookieRecord_Size', 0x0002, PROJECTMODULES_ProjectCookieRecord_Size)
1082 |         PROJECTMODULES_ProjectCookieRecord_Cookie = unpack("<H", dir_stream.read(2))[0]
1083 | 
1084 |         logging.debug("parsing {0} modules".format(PROJECTMODULES_Count))
1085 |         for x in xrange(0, PROJECTMODULES_Count):
1086 |             MODULENAME_Id = unpack("<H", dir_stream.read(2))[0]
1087 |             check_value('MODULENAME_Id', 0x0019, MODULENAME_Id)
1088 |             MODULENAME_SizeOfModuleName = unpack("<L", dir_stream.read(4))[0]
1089 |             MODULENAME_ModuleName = dir_stream.read(MODULENAME_SizeOfModuleName)
1090 |             # account for optional sections
1091 |             section_id = unpack("<H", dir_stream.read(2))[0]
1092 |             if section_id == 0x0047:
1093 |                 MODULENAMEUNICODE_Id = section_id
1094 |                 MODULENAMEUNICODE_SizeOfModuleNameUnicode = unpack("<L", dir_stream.read(4))[0]
1095 |                 MODULENAMEUNICODE_ModuleNameUnicode = dir_stream.read(MODULENAMEUNICODE_SizeOfModuleNameUnicode)
1096 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1097 |             if section_id == 0x001A:
1098 |                 MODULESTREAMNAME_id = section_id
1099 |                 MODULESTREAMNAME_SizeOfStreamName = unpack("<L", dir_stream.read(4))[0]
1100 |                 MODULESTREAMNAME_StreamName = dir_stream.read(MODULESTREAMNAME_SizeOfStreamName)
1101 |                 MODULESTREAMNAME_Reserved = unpack("<H", dir_stream.read(2))[0]
1102 |                 check_value('MODULESTREAMNAME_Reserved', 0x0032, MODULESTREAMNAME_Reserved)
1103 |                 MODULESTREAMNAME_SizeOfStreamNameUnicode = unpack("<L", dir_stream.read(4))[0]
1104 |                 MODULESTREAMNAME_StreamNameUnicode = dir_stream.read(MODULESTREAMNAME_SizeOfStreamNameUnicode)
1105 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1106 |             if section_id == 0x001C:
1107 |                 MODULEDOCSTRING_Id = section_id
1108 |                 check_value('MODULEDOCSTRING_Id', 0x001C, MODULEDOCSTRING_Id)
1109 |                 MODULEDOCSTRING_SizeOfDocString = unpack("<L", dir_stream.read(4))[0]
1110 |                 MODULEDOCSTRING_DocString = dir_stream.read(MODULEDOCSTRING_SizeOfDocString)
1111 |                 MODULEDOCSTRING_Reserved = unpack("<H", dir_stream.read(2))[0]
1112 |                 check_value('MODULEDOCSTRING_Reserved', 0x0048, MODULEDOCSTRING_Reserved)
1113 |                 MODULEDOCSTRING_SizeOfDocStringUnicode = unpack("<L", dir_stream.read(4))[0]
1114 |                 MODULEDOCSTRING_DocStringUnicode = dir_stream.read(MODULEDOCSTRING_SizeOfDocStringUnicode)
1115 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1116 |             if section_id == 0x0031:
1117 |                 MODULEOFFSET_Id = section_id
1118 |                 check_value('MODULEOFFSET_Id', 0x0031, MODULEOFFSET_Id)
1119 |                 MODULEOFFSET_Size = unpack("<L", dir_stream.read(4))[0]
1120 |                 check_value('MODULEOFFSET_Size', 0x0004, MODULEOFFSET_Size)
1121 |                 MODULEOFFSET_TextOffset = unpack("<L", dir_stream.read(4))[0]
1122 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1123 |             if section_id == 0x001E:
1124 |                 MODULEHELPCONTEXT_Id = section_id
1125 |                 check_value('MODULEHELPCONTEXT_Id', 0x001E, MODULEHELPCONTEXT_Id)
1126 |                 MODULEHELPCONTEXT_Size = unpack("<L", dir_stream.read(4))[0]
1127 |                 check_value('MODULEHELPCONTEXT_Size', 0x0004, MODULEHELPCONTEXT_Size)
1128 |                 MODULEHELPCONTEXT_HelpContext = unpack("<L", dir_stream.read(4))[0]
1129 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1130 |             if section_id == 0x002C:
1131 |                 MODULECOOKIE_Id = section_id
1132 |                 check_value('MODULECOOKIE_Id', 0x002C, MODULECOOKIE_Id)
1133 |                 MODULECOOKIE_Size = unpack("<L", dir_stream.read(4))[0]
1134 |                 check_value('MODULECOOKIE_Size', 0x0002, MODULECOOKIE_Size)
1135 |                 MODULECOOKIE_Cookie = unpack("<H", dir_stream.read(2))[0]
1136 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1137 |             if section_id == 0x0021 or section_id == 0x0022:
1138 |                 MODULETYPE_Id = section_id
1139 |                 MODULETYPE_Reserved = unpack("<L", dir_stream.read(4))[0]
1140 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1141 |             if section_id == 0x0025:
1142 |                 MODULEREADONLY_Id = section_id
1143 |                 check_value('MODULEREADONLY_Id', 0x0025, MODULEREADONLY_Id)
1144 |                 MODULEREADONLY_Reserved = unpack("<L", dir_stream.read(4))[0]
1145 |                 check_value('MODULEREADONLY_Reserved', 0x0000, MODULEREADONLY_Reserved)
1146 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1147 |             if section_id == 0x0028:
1148 |                 MODULEPRIVATE_Id = section_id
1149 |                 check_value('MODULEPRIVATE_Id', 0x0028, MODULEPRIVATE_Id)
1150 |                 MODULEPRIVATE_Reserved = unpack("<L", dir_stream.read(4))[0]
1151 |                 check_value('MODULEPRIVATE_Reserved', 0x0000, MODULEPRIVATE_Reserved)
1152 |                 section_id = unpack("<H", dir_stream.read(2))[0]
1153 |             if section_id == 0x002B: # TERMINATOR
1154 |                 MODULE_Reserved = unpack("<L", dir_stream.read(4))[0]
1155 |                 check_value('MODULE_Reserved', 0x0000, MODULE_Reserved)
1156 |                 section_id = None
1157 |             if section_id != None:
1158 |                 logging.warning('unknown or invalid module section id {0:04X}'.format(section_id))
1159 | 
1160 |             logging.debug("ModuleName = {0}".format(MODULENAME_ModuleName))
1161 |             logging.debug("StreamName = {0}".format(MODULESTREAMNAME_StreamName))
1162 |             logging.debug("TextOffset = {0}".format(MODULEOFFSET_TextOffset))
1163 | 
1164 |             code_stream = ofdoc.find_stream_by_name(MODULESTREAMNAME_StreamName)
1165 |             # check for None
1166 |             if code_stream = None:
1167 |                 logging.warning('Stream name \"{0}\" not found'.format(MODULESTREAMNAME_StreamName))
1168 |                 continue
1169 | 
1170 |             code_data = ofdoc.get_stream(code_stream.index)
1171 |             logging.debug("length of code_data = {0}".format(len(code_data)))
1172 |             logging.debug("offset of code_data = {0}".format(MODULEOFFSET_TextOffset))
1173 |             code_data = code_data[MODULEOFFSET_TextOffset:]
1174 |             if len(code_data) > 0:
1175 |                 code_data = decompress_stream(code_data)
1176 |                 # build filename
1177 |                 filext = code_modules[MODULENAME_ModuleName]
1178 |                 filename = os.path.join(options.output_dir, '{0}.{1}'.format(MODULENAME_ModuleName, filext))
1179 |                 # if the file already exists, add a counter until it's unused:
1180 |                 counter = 1
1181 |                 while os.path.exists(filename):
1182 |                     logging.debug('Filename %s already exists' % filename)
1183 |                     filename = os.path.join(options.output_dir, '%s_%d.%s' % (MODULENAME_ModuleName, counter, filext))
1184 |                     counter += 1
1185 |                 logging.info('Saving VBA code to %s' % filename)
1186 |                 f = open(filename, 'wb')
1187 |                 f.write(code_data)
1188 |                 f.close()
1189 | 
1190 |                 if options.create_manifest:
1191 |                     manifest.write(os.path.basename(filename))
1192 |                     manifest.write("\n")
1193 | 
1194 |                 logging.debug('created file {0}'.format(filename))
1195 |             else:
1196 |                 logging.warning("module stream {0} has code data length 0".format(MODULESTREAMNAME_StreamName))
1197 |         break
1198 | 
1199 |     if options.extract_unknown_sectors:
1200 |         i = 0
1201 |         while True:
1202 |             filename = os.path.join(options.output_dir, 'unknown_sectors_{0}.dat'.format(i))
1203 |             if not os.path.exists(filename):
1204 |                 break
1205 |             i += 1
1206 |         f_in = open(args[0], 'rb')
1207 |         f_in.seek(512 + (len(ofdoc.fat) * ofdoc.sector_size))
1208 |         f_out = open(filename, 'wb')
1209 |         f_out.write(f_in.read())
1210 |         f_out.close()
1211 |         f_in.close()
1212 | 
1213 |         if options.create_manifest:
1214 |             manifest.write(os.path.basename(filename))
1215 |             manifest.write("\n")
1216 | 
1217 |         logging.debug('created file {0} size = {1}'.format(filename, os.path.getsize(filename)))
1218 |         logging.debug('header + fat allocation = {0}'.format(512 + (len(ofdoc.fat) * ofdoc.sector_size)))
1219 |         logging.debug('file size = {0}'.format(os.path.getsize(args[0])))
1220 | 
1221 |     if options.create_manifest:
1222 |         manifest.close()
1223 | 
1224 | if __name__ == '__main__':
1225 | 	_main()
1226 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """A setuptools based setup module.
 5 | See:
 6 | https://packaging.python.org/en/latest/distributing.html
 7 | https://github.com/pypa/sampleproject
 8 | """
 9 | 
10 | 
11 | # Always prefer setuptools over distutils
12 | from setuptools import setup
13 | # To use a consistent encoding
14 | from codecs import open
15 | from os import path
16 | 
17 | __version__ = "1.0.1"
18 | description = "A python script that parses the format of OLE compound documents used by Microsoft Office applications."
19 | 
20 | here = path.abspath(path.dirname(__file__))
21 | 
22 | # Get the long description from the README file
23 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
24 |     long_description = f.read()
25 | 
26 | setup(
27 |     name='officeparser',
28 | 
29 |     # Versions should comply with PEP440.  For a discussion on single-sourcing
30 |     # the version across setup.py and the project code, see
31 |     # https://packaging.python.org/en/latest/single_source_version.html
32 |     version=__version__,
33 | 
34 |     description=description,
35 |     long_description=long_description,
36 | 
37 |     # The project's main homepage.
38 |     url='https://github.com/unixfreak0037/officeparser',
39 | 
40 |     # Author details
41 |     author='John Davison',
42 |     author_email='unixfreak0037@gmail.com',
43 | 
44 |     # Choose your license
45 |     license='MIT',
46 | 
47 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
48 |     classifiers=[
49 |         # How mature is this project? Common values are
50 |         #   3 - Alpha
51 |         #   4 - Beta
52 |         #   5 - Production/Stable
53 |         'Development Status :: 5 - Production/Stable',
54 | 
55 |         # Indicate who your project is intended for
56 |         'Intended Audience :: Developers',
57 |         "Intended Audience :: Information Technology",
58 |         'Operating System :: OS Independent',
59 | 
60 | 
61 |         # Pick your license as you wish (should match "license" above)
62 |         'License :: OSI Approved :: MIT License',
63 | 
64 |         # Specify the Python versions you support here. In particular, ensure
65 |         # that you indicate whether you support Python 2, Python 3 or both.
66 |         'Programming Language :: Python :: 3 :: Only',
67 |         'Programming Language :: Python :: 3',
68 |         'Programming Language :: Python :: 3.0',
69 |         'Programming Language :: Python :: 3.1',
70 |         'Programming Language :: Python :: 3.2',
71 |         'Programming Language :: Python :: 3.3',
72 |         'Programming Language :: Python :: 3.4',
73 |         'Programming Language :: Python :: 3.5',
74 |         'Programming Language :: Python :: 3.6',
75 |     ],
76 | 
77 |     # What does your project relate to?
78 |     keywords='Microsoft Office, ole, vba, macro',
79 | 
80 |     # You can just specify the packages manually here if your project is
81 |     # simple. Or you can use find_packages().
82 |     # packages=["msoffice_decrypt"],
83 | 
84 | 
85 |     # Alternatively, if you want to distribute just a my_module.py, uncomment
86 |     # this:
87 |     py_modules=["officeparser"],
88 | 
89 |     # List run-time dependencies here.  These will be installed by pip when
90 |     # your project is installed. For an analysis of "install_requires" vs pip's
91 |     # requirements files see:
92 |     # https://packaging.python.org/en/latest/requirements.html
93 |     install_requires=[],
94 | 
95 |     entry_points={
96 |         'console_scripts': ['officeparser=officeparser:_main'],
97 |     }
98 | )
99 | 


--------------------------------------------------------------------------------