├── TScopy
    ├── __init__.py
    ├── BinaryParser.py
    ├── MFT.py
    └── tscopy.py
├── dist
    ├── TScopy_x64.exe
    └── TScopy_x86.exe
├── README_imgs
    └── Blog_061120.png
├── TScopy.spec
├── tscopy.py
└── README.md


/TScopy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dist/TScopy_x64.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trustedsec/tscopy/HEAD/dist/TScopy_x64.exe


--------------------------------------------------------------------------------
/dist/TScopy_x86.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trustedsec/tscopy/HEAD/dist/TScopy_x86.exe


--------------------------------------------------------------------------------
/README_imgs/Blog_061120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trustedsec/tscopy/HEAD/README_imgs/Blog_061120.png


--------------------------------------------------------------------------------
/TScopy.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | import platform
 3 | import sys
 4 | 
 5 | operating_sys = platform.system()
 6 | 
 7 | # Computing binary name
 8 | suffix = ""
 9 | if operating_sys == "Windows":
10 | 	if sys.maxsize > 2**32:
11 | 		suffix='_x64.exe'
12 | 	else:
13 | 		suffix= '_x86.exe'
14 | 
15 | binary_name = "TScopy" + suffix
16 | block_cipher = None
17 | 
18 | 
19 | a = Analysis(['tscopy.py'],
20 |              pathex=['Z:\\'],
21 |              binaries=[],
22 |              datas=[],
23 |              hiddenimports=[],
24 |              hookspath=[],
25 |              runtime_hooks=[],
26 |              excludes=[],
27 |              win_no_prefer_redirects=False,
28 |              win_private_assemblies=False,
29 |              cipher=block_cipher,
30 |              noarchive=False)
31 | pyz = PYZ(a.pure, a.zipped_data,
32 |              cipher=block_cipher)
33 | exe = EXE(pyz,
34 |           a.scripts,
35 |           a.binaries,
36 |           a.zipfiles,
37 |           a.datas,
38 |           [],
39 |           name = binary_name,
40 |           debug=False,
41 |           bootloader_ignore_signals=False,
42 |           strip=False,
43 |           upx=True,
44 |           upx_exclude=[],
45 |           runtime_tmpdir=None,
46 |           console=True )
47 | 


--------------------------------------------------------------------------------
/tscopy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | This project is based off the work from the following projects:
  5 | * https://github.com/williballenthin/python-ntfs 
  6 | * https://github.com/jschicht/RawCopy
  7 | """
  8 | 
  9 | # TODO: Parsing of command line input for multiple files needs to be more robust
 10 | import logging
 11 | import sys
 12 | import os
 13 | import argparse
 14 | import traceback
 15 | import time
 16 | import ctypes
 17 | 
 18 | from TScopy.tscopy import TScopy
 19 | 
 20 | log = logging.getLogger("tscopy")
 21 | log.setLevel(logging.INFO)
 22 | handler = logging.StreamHandler(sys.stdout)
 23 | handler.setLevel(logging.DEBUG)
 24 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 25 | handler.setFormatter(formatter)
 26 | log.addHandler(handler)
 27 | 
 28 | def check_administrative_rights( ):
 29 |     if ctypes.windll.shell32.IsUserAnAdmin() == 0:
 30 |         log.info("TrustedIR Collector must run with administrative privileges")
 31 |         print "ERROR: TrustedIR Collector must run with administrative privileges\nPress ENTER to finish..."
 32 |         sys.stdin.readline()
 33 |         return False
 34 |     return True
 35 | 
 36 | def parseArgs():
 37 |     parser = argparse.ArgumentParser( description="Copy protected files by parsing the MFT. Must be run with Administrator privileges", usage="""\
 38 | 
 39 |     TScopy_x64.exe -r -o c:\\test -f c:\\users\\tscopy\\ntuser.dat 
 40 |         Description: Copies only the ntuser.dat file to the c:\\test directory 
 41 |     TScopy_x64.exe -o c:\\test -f c:\\Windows\\system32\\config 
 42 |         Description: Copies all files in the config directory but does not copy the directories under it.  
 43 |     TScopy_x64.exe -r -o c:\\test -f c:\\Windows\\system32\\config 
 44 |         Description: Copies all files and subdirectories in the config directory.  
 45 |     TScopy_x64.exe -r -o c:\\test -f c:\\users\\*\\ntuser*,c:\\Windows\\system32\\config 
 46 |         Description: Uses Wildcards and listings to copy any file beginning with ntuser under users accounts and recursively copies the registry hives.
 47 |     """)
 48 |     parser.add_argument('-f', '--file', help="Full path of the file or directory to be copied. Filenames can be grouped in a comma ',' seperated list. Wildcard '*' is accepted." )   
 49 |     parser.add_argument('-o', '--outputdir', help="Directory to copy files too. Copy will keep paths" )   
 50 |     parser.add_argument('-i', '--ignore_saved_ref_nums', action='store_true', help="Script stores the Reference numbers and path info to speed up internal run. This option will ignore and not save the stored MFT reference numbers and path")
 51 |     parser.add_argument('-r', '--recursive', action='store_true', help="Recursively copies directory. Note this only works with directories.")
 52 |     parser.add_argument('--debug', action='store_true', help=argparse.SUPPRESS)
 53 |     
 54 |     args = parser.parse_args()
 55 |     if args.debug:
 56 |         log.setLevel(logging.DEBUG)
 57 | 
 58 |     if args.file:
 59 |         process_files = []
 60 |         for name in args.file.split(','):
 61 |             process_files.append( name ) 
 62 |     else:
 63 |         log.error("\nError select --file\n\n")
 64 |         parser.print_help()
 65 |         sys.exit(1)
 66 | 
 67 |     if args.outputdir:
 68 |         tmp_dir = args.outputdir
 69 |         if tmp_dir[-1] == os.sep:
 70 |             tmp_dir = tmp_dir[:-1]
 71 | 
 72 |         if not os.path.isdir( tmp_dir ):
 73 |             log.error("Error output destination (%s) not found\n\n" %tmp_dir )
 74 |             parser.print_help()
 75 |             sys.exit(1)
 76 |         args.outputdir = tmp_dir
 77 |     return { 'files': process_files,
 78 |                'outputbasedir': args.outputdir,
 79 |                'debug': args.debug,
 80 |                'recursive': args.recursive,
 81 |                'ignore_table': args.ignore_saved_ref_nums
 82 |              }
 83 | 
 84 | if __name__ == '__main__':
 85 |     start = time.time()    
 86 |     args = parseArgs()
 87 |     if check_administrative_rights( )  == False:
 88 |         sys.exit(1)
 89 | 
 90 |     config = {
 91 |                'pickledir': args['outputbasedir'],
 92 |                'debug': args['debug'],
 93 |                'logger': log,
 94 |                'ignore_table': args['ignore_table']}
 95 |                                                                                 
 96 |     try:                                                                        
 97 |         tscopy = TScopy()
 98 |         tscopy.setConfiguration( config )
 99 |         dst_path = args['outputbasedir']
100 |         for src in args['files']:
101 |             try:
102 |                 tscopy.copy( src, dst_path, bRecursive=args['recursive'])
103 |             except:
104 |                 log.error( traceback.format_exc() ) 
105 |     except:
106 |         log.error( traceback.format_exc() ) 
107 | 
108 |     log.info("Job Took %r seconds" % (time.time()-start))
109 | 
110 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TScopy
  2 | ![TScopy Logo](/README_imgs/Blog_061120.png)
  3 | 
  4 | Updated 2022-03-31
  5 | 
  6 | ## Introducing TScopy 
  7 | It is a requirement during an Incident Response (IR) engagement to have the ability to analyze files on the filesystem. Sometimes these files are locked by the operating system (OS) because they are in use, which is particularly frustrating with event logs and registry hives. TScopy allows the user, who is running with administrator privileges, to access locked files by parsing out their raw location in the filesystem and copying them without asking the OS.
  8 | 
  9 | There are other tools that perform similar functions, such as RawCopy, which we have used and is the basis for this tool. However, there are some disadvantages to RawCopy that led us to develop TScopy, including performance, size, and the ability to incorporate it in other tools.
 10 | 
 11 | This blog is intended to introduce TScopy but also to ask for assistance. As in all software development, the more a tool is used, the more edge cases can be found. We are asking that people try out the tool and report any bugs.
 12 | 
 13 | ## What is TScopy?
 14 | TScopy is a Python script used to parse the NTFS $MFT file to locate and copy specific files. By parsing the Master File Table (MFT), the script bypasses operating system locks on files. The script was originally based on the work of RawCopy. RawCopy is written in AutoIT and is difficult to modify for our purposes. The decision to port RawCopy to Python was done because of the need to incorporate this functionality natively into our toolset.
 15 | 
 16 | TScopy is designed to be run as a standalone program or included as a python module. The python implementation makes use of the python-ntfs tools found at https://github.com/williballenthin/python-ntfs. TScopy built upon the base functionality of python-ntfs to isolate the location of each file from the raw disk.
 17 | 
 18 | ## What makes TScopy different?
 19 | TScopy is written in Python and organized into classes to make it more maintainable and readable than AutoIT. AutoIT can be flagged as malicious by anti-virus or detections software because some malware has utilized its potential.
 20 | 
 21 | The major difference between TScopy and RawCopy is the ability to copy multiple files per execution and to cache the file structure. As shown in the image below, TScopy has options to download a single file, multiple comma delimited files, the contents of a directory, wildcarded paths (individual files or directories), and recursive directories. 
 22 | 
 23 | TScopy caches the location of each directory and file as it iterates the target file’s full path. It then uses this cache to optimize the search for any other files, ensuring future file copies are performed much faster. This is a significant advantage over RawCopy, which iterates over the entire path for each file.
 24 | 
 25 | ## TScopy Options
 26 | ```
 27 | .\TScopy_x64.exe -h
 28 | 
 29 | usage: 
 30 |     TScopy_x64.exe -r -o c:\test -f c:\users\tscopy\ntuser.dat 
 31 |         Description: Copies only the ntuser.dat file to the c:\test directory 
 32 |     TScopy_x64.exe -o c:\test -f c:\Windows\system32\config 
 33 |         Description: Copies all files in the config directory but does not copy the directories under it.  
 34 |     TScopy_x64.exe -r -o c:\test -f c:\Windows\system32\config 
 35 |         Description: Copies all files and subdirectories in the config directory.  
 36 |     TScopy_x64.exe -r -o c:\test -f c:\users\*\ntuser*,c:\Windows\system32\config 
 37 |         Description: Uses Wildcards and listings to copy any file beginning with ntuser under users accounts and recursively copies the registry hives.
 38 |     
 39 | 
 40 | Copy protected files by parsing the MFT. Must be run with Administrator privileges
 41 | 
 42 | optional arguments:
 43 |   -h, --help            show this help message and exit
 44 |   -f FILE, --file FILE  Full path of the file or directory to be copied.
 45 |                         Filenames can be grouped in a comma ',' seperated
 46 |                         list. Wildcard '*' is accepted.
 47 |   -o OUTPUTDIR, --outputdir OUTPUTDIR
 48 |                         Directory to copy files too. Copy will keep paths
 49 |   -i, --ignore_saved_ref_nums
 50 |                         Script stores the Reference numbers and path info to
 51 |                         speed up internal run. This option will ignore and not
 52 |                         save the stored MFT reference numbers and path
 53 |   -r, --recursive       Recursively copies directory. Note this only works with
 54 |                         directories.
 55 | ```
 56 | There is a hidden option ‘--debug’, which enables the debug output.
 57 | 
 58 | ## Examples
 59 | ```code
 60 | TScopy_x64.exe -f c:\windows\system32\config\SYSTEM -o e:\outputdir
 61 | ```
 62 | Copies the SYSTEM registry to e:\outputdir
 63 | The new file will be located at e:\outputdir\windows\system32\config\SYSTEM
 64 | ```code
 65 | TScopy_x64.exe -f c:\windows\system32\config\SYSTEM -o e:\outputdir -i
 66 | ```
 67 | Copies the SYSTEM registry to e:\outputdir but ignores any previous cached files and does not save the current cache to disk
 68 | 
 69 | ```code
 70 | TScopy_x64.exe -f c:\windows\system32\config\SYSTEM,c:\windows\system32\config\SOFTWARE -o e:\outputdir
 71 | ```
 72 | Copies the SYSTEM and the SOFTWARE registries to e:\outputdir
 73 | 
 74 | ```code
 75 | TScopy_x64.exe -f c:\windows\system32\config\ -o e:\outputdir
 76 | ```
 77 | Copies the contents of the directory config to e:\outputdir
 78 | 
 79 | ```code
 80 | TScopy_x64.exe -r -f c:\windows\system32\config\ -o e:\outputdir
 81 | ```
 82 | Recursively copies the contents of the directory config to e:\outputdir
 83 | 
 84 | ```code
 85 | TScopy_x64.exe  -f c:\users\*\ntuser.dat -o e:\outputdir
 86 | ```
 87 | Copies each users NTUSER.DAT file to e:\outputdir
 88 | 
 89 | ```code
 90 | TScopy_x64.exe  -f c:\users\*\ntuser.dat* -o e:\outputdir
 91 | ```
 92 | For each users copies all files that begin with NTUSER.DAT to e:\outputdi
 93 | 
 94 | ```code
 95 | TScopy_x64.exe  -f c:\users\*\AppData\Roaming\Microsoft\Windows\Recent,c:\windows\system32\config,c:\users\*\AppData\Roaming\Microsoft\Windows\PowerShell\PSReadLine\ConsoleHost_history.txt -o e:\outputdir
 96 | ```
 97 | For each users copies all jumplists, Registry hives, and Powershell history commands to e:\outputdi
 98 | 
 99 | ## Bug Reporting Information
100 | Please report bugs in the issues section of the GitHub page.
101 | 
102 | ## Bug Fixes and Enhancements 
103 | ### Version 4.0
104 | - Corrected copying file containing sparsed data. Issue #13 (Error copying c:\$extend\$usnjrnl$j)
105 | - Files are no longer read into memory before writing to disk. Writes are performed by data run read now. Should reduce memory usage on large files.
106 | ### Version 3.0
107 | - Added Support for Alternative Data Stream. Request the root file and the ADS streams are copied
108 | - WildCard for the drive letter. Fixed Drives only.  Example "\*:\$MFT"  will find the $MFT for all local drives
109 | - Logging issues. Failed copies are reporting failed again.
110 | - Filepath size limit of 256 removed.
111 | ### Version 2.0
112 | - Issue 1: Change sys.exit to raise Exception
113 | - Issue 2: The double copying of files. Full name and short name.
114 | - Issue 3: Added the ability to recursively copy a directory
115 | - Issue 4: Add the support for wildcards in the path. Currently only supports *
116 | - Issue 5: Removed the hardcoded MFT size. MFT size determined by the Boot Sector
117 | - Issue 6: Converted the TScopy class into a singleton. This allows the class to be instantiated once and reuse the current MFT metadata object for all copies.
118 | - Issue 7: Attribute type ATTRIBUTE_LIST is now being handled.
119 | - Issue 9: Attrubute type ATTRIBUTE_LIST was not handled for files. THis caused a silent failure for files like SOFTWARE regestry hive.
120 | - Changes: General comments have been added to the code
121 | - Changes: Input parameters have changed. Reduced the three(3) different options --file, --list, and --directory to --file.
122 | - Changes: Backend restructuring to support new features.
123 | 
124 | ## TODO:
125 | 


--------------------------------------------------------------------------------
/TScopy/BinaryParser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | #
 19 | #   Version v.0.1
 20 | import mmap
 21 | import sys
 22 | import types
 23 | import struct
 24 | import logging
 25 | import cPickle
 26 | from datetime import datetime
 27 | 
 28 | g_logger = logging.getLogger("ntfs.BinaryParser")
 29 | 
 30 | 
 31 | def unpack_from(fmt, buf, off=0):
 32 |     """
 33 |     Shim struct.unpack_from and divert unpacking of __unpackable__ things.
 34 | 
 35 |     Otherwise, you'd get an exception like:
 36 |       TypeError: unpack_from() argument 1 must be convertible to a buffer, not FileMap
 37 | 
 38 |     So, we extract a true sub-buffer from the FileMap, and feed this
 39 |       back into the old unpack function.
 40 |     Theres an extra allocation and copy, but there's no getting
 41 |       around that.
 42 |     """
 43 |     if isinstance(buf, basestring):
 44 |         return struct.unpack_from(fmt, buf, off)
 45 |     elif not hasattr(buf, "__unpackable__"):
 46 |         return struct.unpack_from(fmt, buf, off)
 47 |     else:
 48 |         size = struct.calcsize(fmt)
 49 |         buf = buf[off:off + size]
 50 |         return struct.unpack_from(fmt, buf, 0x0)
 51 | 
 52 | 
 53 | def unpack(fmt, buf):
 54 |     """
 55 |     Like the shimmed unpack_from, but for struct.unpack.
 56 |     """
 57 |     if isinstance(buf, basestring):
 58 |         return struct.unpack(fmt, buf)
 59 |     elif not hasattr(buf, "__unpackable__"):
 60 |         return struct.unpack(fmt, buf)
 61 |     else:
 62 |         size = struct.calcsize(fmt)
 63 |         buf = buf[:size]
 64 |         return struct.unpack(fmt, buf, 0x0)
 65 | 
 66 | 
 67 | class Mmap(object):
 68 |     """
 69 |     Convenience class for opening a read-only memory map for a file path.
 70 |     """
 71 |     def __init__(self, filename):
 72 |         super(Mmap, self).__init__()
 73 |         self._filename = filename
 74 |         self._f = None
 75 |         self._mmap = None
 76 | 
 77 |     def __enter__(self):
 78 |         self._f = open(self._filename, "rb")
 79 |         self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)
 80 |         return self._mmap
 81 | 
 82 |     def __exit__(self, type, value, traceback):
 83 |         self._mmap.close()
 84 |         self._f.close()
 85 | 
 86 | 
 87 | def hex_dump(src, start_addr=0):
 88 |     """
 89 |     see:
 90 |     http://code.activestate.com/recipes/142812-hex-dumper/
 91 |     @param src A bytestring containing the data to dump.
 92 |     @param start_addr An integer representing the start
 93 |       address of the data in whatever context it comes from.
 94 |     @return A string containing a classic hex dump with 16
 95 |       bytes per line.  If start_addr is provided, then the
 96 |       data is interpreted as starting at this offset, and
 97 |       the offset column is updated accordingly.
 98 |     """
 99 |     FILTER = ''.join([(len(repr(chr(x))) == 3) and
100 |                         chr(x) or
101 |                         '.' for x in range(256)])
102 |     length = 16
103 |     result = []
104 | 
105 |     remainder_start_addr = start_addr
106 | 
107 |     if start_addr % length != 0:
108 |         base_addr = start_addr - (start_addr % length)
109 |         num_spaces = (start_addr % length)
110 |         num_chars = length - (start_addr % length)
111 | 
112 |         spaces = " ".join(["  " for i in xrange(num_spaces)])
113 |         s = src[0:num_chars]
114 |         hexa = ' '.join(["%02X" % ord(x) for x in s])
115 |         printable = s.translate(FILTER)
116 | 
117 |         result.append("%04X   %s %s   %s%s\n" %
118 |                       (base_addr, spaces, hexa,
119 |                       " " * (num_spaces + 1), printable))
120 | 
121 |         src = src[num_chars:]
122 |         remainder_start_addr = base_addr + length
123 | 
124 |     for i in xrange(0, len(src), length):
125 |         s = src[i:i + length]
126 |         hexa = ' '.join(["%02X" % ord(x) for x in s])
127 |         printable = s.translate(FILTER)
128 |         result.append("%04X   %-*s   %s\n" %
129 |                          (remainder_start_addr + i, length * 3,
130 |                           hexa, printable))
131 | 
132 |     return ''.join(result)
133 | 
134 | 
135 | class decoratorargs(object):
136 |     def __new__(typ, *attr_args, **attr_kwargs):
137 |         def decorator(orig_func):
138 |             self = object.__new__(typ)
139 |             self.__init__(orig_func, *attr_args, **attr_kwargs)
140 |             return self
141 |         return decorator
142 | 
143 | 
144 | class memoize(decoratorargs):
145 |     class Node:
146 |         __slots__ = ['key', 'value', 'older', 'newer']
147 | 
148 |         def __init__(self, key, value, older=None, newer=None):
149 |             self.key = key
150 |             self.value = value
151 |             self.older = older
152 |             self.newer = newer
153 | 
154 |     def __init__(self, func, capacity=1000,
155 |                  keyfunc=lambda *args, **kwargs: cPickle.dumps((args,
156 |                                                                 kwargs))):
157 |         if not isinstance(func, property):
158 |             self.func = func
159 |             self.name = func.__name__
160 |             self.is_property = False
161 |         else:
162 |             self.func = func.fget
163 |             self.name = func.fget.__name__
164 |             self.is_property = True
165 |         self.capacity = capacity
166 |         self.keyfunc = keyfunc
167 |         self.reset()
168 | 
169 |     def reset(self):
170 |         self.mru = self.Node(None, None)
171 |         self.mru.older = self.mru.newer = self.mru
172 |         self.nodes = {self.mru.key: self.mru}
173 |         self.count = 1
174 |         self.hits = 0
175 |         self.misses = 0
176 | 
177 |     def __get__(self, inst, clas):
178 |         self.obj = inst
179 |         if self.is_property:
180 |             return self.__call__()
181 |         else:
182 |             return self
183 | 
184 |     def __call__(self, *args, **kwargs):
185 |         key = self.keyfunc(*args, **kwargs)
186 |         try:
187 |             node = self.nodes[key]
188 |         except KeyError:
189 |             # We have an entry not in the cache
190 |             self.misses += 1
191 |             func = types.MethodType(self.func, self.obj, self.name)
192 |             value = func(*args, **kwargs)
193 |             lru = self.mru.newer  # Always true
194 |             # If we haven't reached capacity
195 |             if self.count < self.capacity:
196 |                 # Put it between the MRU and LRU - it'll be the new MRU
197 |                 node = self.Node(key, value, self.mru, lru)
198 |                 self.mru.newer = node
199 | 
200 |                 lru.older = node
201 |                 self.mru = node
202 |                 self.count += 1
203 |             else:
204 |                 # It's FULL! We'll make the LRU be the new MRU, but replace its
205 |                 # value first
206 |                 try:
207 |                     del self.nodes[lru.key]  # This mapping is now invalid
208 |                 except KeyError:  # HACK TODO: this may not work/leak
209 |                     pass
210 |                 lru.key = key
211 |                 lru.value = value
212 |                 self.mru = lru
213 | 
214 |             # Add the new mapping
215 |             self.nodes[key] = self.mru
216 |             return value
217 | 
218 |         # We have an entry in the cache
219 |         self.hits += 1
220 | 
221 |         # If it's already the MRU, do nothing
222 |         if node is self.mru:
223 |             return node.value
224 | 
225 |         lru = self.mru.newer  # Always true
226 | 
227 |         # If it's the LRU, update the MRU to be it
228 |         if node is lru:
229 |             self.mru = lru
230 |             return node.value
231 | 
232 |         # Remove the node from the list
233 |         node.older.newer = node.newer
234 |         node.newer.older = node.older
235 | 
236 |         # Put it between MRU and LRU
237 |         node.older = self.mru
238 |         self.mru.newer = node
239 | 
240 |         node.newer = lru
241 |         lru.older = node
242 | 
243 |         self.mru = node
244 |         return node.value
245 | 
246 | 
247 | def align(offset, alignment):
248 |     """
249 |     Return the offset aligned to the nearest greater given alignment
250 |     Arguments:
251 |     - `offset`: An integer
252 |     - `alignment`: An integer
253 |     """
254 |     if offset % alignment == 0:
255 |         return offset
256 |     return offset + (alignment - (offset % alignment))
257 | 
258 | 
259 | def dosdate(dosdate, dostime):
260 |     """
261 |     `dosdate`: 2 bytes, little endian.
262 |     `dostime`: 2 bytes, little endian.
263 |     returns: datetime.datetime or datetime.datetime.min on error
264 |     """
265 |     try:
266 |         t  = ord(dosdate[1]) << 8
267 |         t |= ord(dosdate[0])
268 |         day   = t & 0b0000000000011111
269 |         month = (t & 0b0000000111100000) >> 5
270 |         year  = (t & 0b1111111000000000) >> 9
271 |         year += 1980
272 | 
273 |         t  = ord(dostime[1]) << 8
274 |         t |= ord(dostime[0])
275 |         sec     = t & 0b0000000000011111
276 |         sec    *= 2
277 |         minute  = (t & 0b0000011111100000) >> 5
278 |         hour    = (t & 0b1111100000000000) >> 11
279 | 
280 |         return datetime.datetime(year, month, day, hour, minute, sec)
281 |     except:
282 |         return datetime.datetime.min
283 | 
284 | 
285 | def parse_filetime(qword):
286 |     # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/
287 |     return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
288 | 
289 | 
290 | class BinaryParserException(Exception):
291 |     """
292 |     Base Exception class for binary parsing.
293 |     """
294 |     def __init__(self, value):
295 |         """
296 |         Constructor.
297 |         Arguments:
298 |         - `value`: A string description.
299 |         """
300 |         super(BinaryParserException, self).__init__()
301 |         self._value = value
302 | 
303 |     def __repr__(self):
304 |         return "BinaryParserException(%r)" % (self._value)
305 | 
306 |     def __str__(self):
307 |         return "Binary Parser Exception: %s" % (self._value)
308 | 
309 | 
310 | class ParseException(BinaryParserException):
311 |     """
312 |     An exception to be thrown during binary parsing, such as
313 |     when an invalid header is encountered.
314 |     """
315 |     def __init__(self, value):
316 |         """
317 |         Constructor.
318 |         Arguments:
319 |         - `value`: A string description.
320 |         """
321 |         super(ParseException, self).__init__(value)
322 | 
323 |     def __repr__(self):
324 |         return "ParseException(%r)" % (self._value)
325 | 
326 |     def __str__(self):
327 |         return "Parse Exception(%s)" % (self._value)
328 | 
329 | 
330 | class OverrunBufferException(ParseException):
331 |     def __init__(self, readOffs, bufLen):
332 |         tvalue = "read: %s, buffer length: %s" % (hex(readOffs), hex(bufLen))
333 |         super(ParseException, self).__init__(tvalue)
334 | 
335 |     def __repr__(self):
336 |         return "OverrunBufferException(%r)" % (self._value)
337 | 
338 |     def __str__(self):
339 |         return "Tried to parse beyond the end of the file (%s)" % \
340 |             (self._value)
341 | 
342 | 
343 | def read_byte(buf, offset):
344 |     """
345 |     Returns a little-endian unsigned byte from the relative offset of the given buffer.
346 |     Arguments:
347 |     - `buf`: The buffer from which to read the value.
348 |     - `offset`: The relative offset from the start of the block.
349 |     Throws:
350 |     - `OverrunBufferException`
351 |     """
352 |     try:
353 |         return unpack_from("<B", buf, offset)[0]
354 |     except struct.error:
355 |         raise OverrunBufferException(offset, len(buf))
356 | 
357 | 
358 | def read_word(buf, offset):
359 |     """
360 |     Returns a little-endian unsigned word from the relative offset of the given buffer.
361 |     Arguments:
362 |     - `buf`: The buffer from which to read the value.
363 |     - `offset`: The relative offset from the start of the block.
364 |     Throws:
365 |     - `OverrunBufferException`
366 |     """
367 |     try:
368 |         return unpack_from("<H", buf, offset)[0]
369 |     except struct.error:
370 |         raise OverrunBufferException(offset, len(buf))
371 | 
372 | 
373 | def read_dword(buf, offset):
374 |     """
375 |     Returns a little-endian unsigned dword from the relative offset of the given buffer.
376 |     Arguments:
377 |     - `buf`: The buffer from which to read the value.
378 |     - `offset`: The relative offset from the start of the block.
379 |     Throws:
380 |     - `OverrunBufferException`
381 |     """
382 |     try:
383 |         return unpack_from("<I", buf, offset)[0]
384 |     except struct.error:
385 |         raise OverrunBufferException(offset, len(buf))
386 | 
387 | 
388 | class Block(object):
389 |     """
390 |     Base class for structure blocks in binary parsing.
391 |     A block is associated with a offset into a byte-string.
392 |     """
393 |     def __init__(self, buf, offset):
394 |         """
395 |         Constructor.
396 |         Arguments:
397 |         - `buf`: Byte string containing stuff to parse.
398 |         - `offset`: The offset into the buffer at which the block starts.
399 |         """
400 |         self._buf = buf
401 |         self._offset = offset
402 |         self._implicit_offset = 0
403 |         # list of dict(offset:number, type:string, name:string,
404 |         #              length:number, count:number)
405 |         self._declared_fields = []
406 | 
407 |     def __repr__(self):
408 |         return "Block(buf=%r, offset=%r)" % (self._buf, self._offset)
409 | 
410 |     def declare_field(self, type_, name, offset=None, length=None, count=None):
411 |         """
412 |         Declaratively add fields to this block.
413 |         This method will dynamically add corresponding offset and
414 |         unpacker methods to this block.
415 | 
416 |         Arguments:
417 |         - `type_`: A string or a Nestable type.
418 |             If a string, should be one of the unpack_* types.
419 |             If a type, then it must be a subclass of Nestable.
420 |         - `name`: A string.
421 |         - `offset`: A number.
422 |         - `length`: (Optional) A number. For (w)strings, length in chars.
423 |         - `count`: (Optional) A number that specifies the number of
424 |             instances of this type.
425 |             If the count is greater than 1, then the handler will return
426 |             a generator of the items. This parameter is not valid if
427 |             the `length` parameter is provided.
428 |         """
429 |         is_generator = True
430 |         if count is None:
431 |             count = 1
432 |             is_generator = False
433 | 
434 |         if count < 0:
435 |             raise "Count must be greater than 0."
436 | 
437 |         if length is not None and count > 1:
438 |             raise "Cannot specify both `length` and `count`."
439 | 
440 |         if offset is None:
441 |             offset = self._implicit_offset
442 | 
443 |         basic_sizes = {
444 |             "byte": 1,
445 |             "int8": 1,
446 |             "word": 2,
447 |             "word_be": 2,
448 |             "int16": 2,
449 |             "dword": 4,
450 |             "dword_be": 4,
451 |             "int32": 4,
452 |             "qword": 8,
453 |             "int64": 8,
454 |             "float": 4,
455 |             "double": 8,
456 |             "dosdate": 4,
457 |             "filetime": 8,
458 |             "systemtime": 8,
459 |             "guid": 16,
460 |         }
461 | 
462 |         handler = None
463 | 
464 |         if isinstance(type_, type):
465 |             if not issubclass(type_, Nestable):
466 |                 raise TypeError("Invalid nested structure")
467 | 
468 |             typename = type_.__name__
469 | 
470 |             if count == 0:
471 |                 def no_class_handler():
472 |                     return
473 |                 handler = no_class_handler
474 |             elif is_generator:
475 |                 def many_class_handler():
476 |                     ofs = offset
477 |                     for _ in range(count):
478 |                         r = type_(self._buf, self.absolute_offset(ofs), self)
479 |                         ofs += len(r)
480 |                         yield r
481 |                 handler = many_class_handler
482 | 
483 |                 if hasattr(type_, "structure_size"):
484 |                     ofs = offset
485 |                     for _ in range(count):
486 |                         ofs += type_.structure_size(self._buf, self.absolute_offset(ofs), self)
487 |                     self._implicit_offset = ofs
488 |                 else:
489 |                     ofs = offset
490 |                     for _ in range(count):
491 |                         r = type_(self._buf, self.absolute_offset(ofs), self)
492 |                         ofs += len(r)
493 |                     self._implicit_offset = ofs
494 |             else:
495 |                 # TODO(wb): this needs to cache/memoize
496 |                 def class_handler():
497 |                     return type_(self._buf, self.absolute_offset(offset), self)
498 |                 handler = class_handler
499 | 
500 |                 if hasattr(type_, "structure_size"):
501 |                     size = type_.structure_size(self._buf, self.absolute_offset(offset), self)
502 |                     self._implicit_offset = offset + size
503 |                 else:
504 |                     temp = type_(self._buf, self.absolute_offset(offset), self)
505 | 
506 |                     self._implicit_offset = offset + len(temp)
507 |         elif isinstance(type_, basestring):
508 |             typename = type_
509 | 
510 |             if count == 0:
511 |                 def no_basic_handler():
512 |                     return
513 |                 handler = no_basic_handler
514 |             elif is_generator:
515 |                 # length must be in basic_sizes
516 |                 def many_basic_handler():
517 |                     ofs = offset
518 |                     f = getattr(self, "unpack_" + type_)
519 |                     for _ in range(count):
520 |                         yield f(ofs)
521 |                         ofs += basic_sizes[type_]
522 |                 handler = many_basic_handler
523 | 
524 |                 self._implicit_offset = offset + count * basic_sizes[type_]
525 |             else:
526 |                 if length is None:
527 |                     def basic_no_length_handler():
528 |                         f = getattr(self, "unpack_" + type_)
529 |                         return f(offset)
530 |                     handler = basic_no_length_handler
531 | 
532 |                     if type_ in basic_sizes:
533 |                         self._implicit_offset = offset + basic_sizes[type_]
534 |                     elif type_ == "binary":
535 |                         self._implicit_offset = offset + length
536 |                     elif type_ == "string" and length is not None:
537 |                         self._implicit_offset = offset + length
538 |                     elif type_ == "wstring" and length is not None:
539 |                         self._implicit_offset = offset + (2 * length)
540 |                     elif "string" in type_ and length is None:
541 |                         raise ParseException("Implicit offset not supported for dynamic length strings")
542 |                     else:
543 |                         raise ParseException("Implicit offset not supported for type: " + type_)
544 |                 else:
545 |                     def basic_length_handler():
546 |                         f = getattr(self, "unpack_" + type_)
547 |                         return f(offset, length)
548 |                     handler = basic_length_handler
549 | 
550 |                     if type_ == "wstring":
551 |                         self._implicit_offset = offset + (2 * length)
552 |                     else:
553 |                         self._implicit_offset = offset + length
554 | 
555 |         setattr(self, name, handler)
556 |         setattr(self, "_off_" + name, offset)
557 |         self.add_explicit_field(offset, typename, name, length, count)
558 | 
559 |     def add_explicit_field(self, offset, typename, name, length=None, count=1):
560 |         """
561 |         The `Block` class tracks the fields that have been added so that you can
562 |           pretty print the structure.  If there are other fields a subclass
563 |           parses, use `add_explicit_field` to include them in the pretty printing.
564 |         @type offset:  int
565 |         @param offset: The offset at which the field begins.
566 |         @type typename:  str or Block subclass
567 |         @param typename: The type of the value of the field.
568 |         @type name:  str
569 |         @param name: The name of the field.
570 |         @type length:  int
571 |         @param length: An explicit length for the field.
572 |         @type count:  int
573 |         @param count: The number of repetitions for the field.
574 |         @rtype: None
575 |         @return: None
576 |         """
577 |         if type(typename) == type:
578 |             typename = typename.__name__
579 |         self._declared_fields.append({
580 |                 "offset": offset,
581 |                 "type": typename,
582 |                 "name": name,
583 |                 "length": length,
584 |                 "count": count,
585 |                 })
586 | 
587 |     def get_all_string(self, indent=0):
588 |         """
589 |         Get a nicely formatted, nested string of the contents of this structure
590 |           and any sub-structures.  If a sub-structure has a method `.string()`, then
591 |           this method will use it to represent its value.
592 |           Implementation note, can't look for `__str__`, because everything has this.
593 |         @type indent:  int
594 |         @param indent: The level of nesting this objects has.
595 |         @rtype: str
596 |         @return A nicely formatted string that describes this structure.
597 |         """
598 |         ret = ""
599 |         for field in self._declared_fields:
600 |             v = getattr(self, field["name"])()
601 |             if isinstance(v, Block):
602 |                 if hasattr(v, "string"):
603 |                     ret += "%s%s (%s)%s\t%s\n" % \
604 |                         ("  " * indent, hex(field["offset"]), field["type"], 
605 |                          field["name"], v.string())
606 |                 else:
607 |                     ret += "%s%s (%s)%s\n" % \
608 |                         ("  " * indent, hex(field["offset"]), field["type"], 
609 |                          field["name"])
610 |                     ret += v.get_all_string(indent + 1)
611 |             elif isinstance(v, types.GeneratorType):
612 |                 ret += "%s%s (%s[])%s\n" % ("  " * indent, hex(field["offset"]), field["type"], field["name"],)
613 |                 for i, j in enumerate(v):
614 |                     ret += "%s[%d] (%s) " % ("  " * (indent + 1), i, field["type"])
615 |                     if hasattr(j, "get_all_string"):
616 |                         ret += "\n" + j.get_all_string(indent + 2)
617 |                     else:
618 |                         ret += str(j) + "\n"
619 |             else:
620 |                 if isinstance(v, int):
621 |                     v = hex(v)
622 |                 ret += "%s%s (%s)%s\t%s\n" % \
623 |                     ("  " * indent, hex(field["offset"]), field["type"], 
624 |                      field["name"],  str(v))
625 |         return ret
626 | 
627 |     def current_field_offset(self):
628 |         return self._implicit_offset
629 | 
630 |     def unpack_byte(self, offset):
631 |         """
632 |         Returns a little-endian unsigned byte from the relative offset.
633 |         Arguments:
634 |         - `offset`: The relative offset from the start of the block.
635 |         Throws:
636 |         - `OverrunBufferException`
637 |         """
638 |         return read_byte(self._buf, self._offset + offset)
639 | 
640 |     def unpack_int8(self, offset):
641 |         """
642 |         Returns a little-endian signed byte from the relative offset.
643 |         Arguments:
644 |         - `offset`: The relative offset from the start of the block.
645 |         Throws:
646 |         - `OverrunBufferException`
647 |         """
648 |         o = self._offset + offset
649 |         try:
650 |             return unpack_from("<b", self._buf, o)[0]
651 |         except struct.error:
652 |             raise OverrunBufferException(o, len(self._buf))
653 | 
654 |     def unpack_word(self, offset):
655 |         """
656 |         Returns a little-endian unsigned WORD (2 bytes) from the
657 |           relative offset.
658 |         Arguments:
659 |         - `offset`: The relative offset from the start of the block.
660 |         Throws:
661 |         - `OverrunBufferException`
662 |         """
663 |         return read_word(self._buf, self._offset + offset)
664 | 
665 |     def unpack_word_be(self, offset):
666 |         """
667 |         Returns a big-endian unsigned WORD (2 bytes) from the
668 |           relative offset.
669 |         Arguments:
670 |         - `offset`: The relative offset from the start of the block.
671 |         Throws:
672 |         - `OverrunBufferException`
673 |         """
674 |         o = self._offset + offset
675 |         try:
676 |             return unpack_from(">H", self._buf, o)[0]
677 |         except struct.error:
678 |             raise OverrunBufferException(o, len(self._buf))
679 | 
680 |     def unpack_int16(self, offset):
681 |         """
682 |         Returns a little-endian signed WORD (2 bytes) from the
683 |           relative offset.
684 |         Arguments:
685 |         - `offset`: The relative offset from the start of the block.
686 |         Throws:
687 |         - `OverrunBufferException`
688 |         """
689 |         o = self._offset + offset
690 |         try:
691 |             return unpack_from("<h", self._buf, o)[0]
692 |         except struct.error:
693 |             raise OverrunBufferException(o, len(self._buf))
694 | 
695 |     def pack_word(self, offset, word):
696 |         """
697 |         Applies the little-endian WORD (2 bytes) to the relative offset.
698 |         Arguments:
699 |         - `offset`: The relative offset from the start of the block.
700 |         - `word`: The data to apply.
701 |         """
702 |         o = self._offset + offset
703 |         # TODO
704 |         return struct.pack_into("<H", self._buf, o, word)
705 | 
706 |     def unpack_dword(self, offset):
707 |         """
708 |         Returns a little-endian DWORD (4 bytes) from the relative offset.
709 |         Arguments:
710 |         - `offset`: The relative offset from the start of the block.
711 |         Throws:
712 |         - `OverrunBufferException`
713 |         """
714 |         return read_dword(self._buf, self._offset + offset)
715 | 
716 |     def unpack_dword_be(self, offset):
717 |         """
718 |         Returns a big-endian DWORD (4 bytes) from the relative offset.
719 |         Arguments:
720 |         - `offset`: The relative offset from the start of the block.
721 |         Throws:
722 |         - `OverrunBufferException`
723 |         """
724 |         o = self._offset + offset
725 |         try:
726 |             return unpack_from(">I", self._buf, o)[0]
727 |         except struct.error:
728 |             raise OverrunBufferException(o, len(self._buf))
729 | 
730 |     def unpack_int32(self, offset):
731 |         """
732 |         Returns a little-endian signed integer (4 bytes) from the
733 |           relative offset.
734 |         Arguments:
735 |         - `offset`: The relative offset from the start of the block.
736 |         Throws:
737 |         - `OverrunBufferException`
738 |         """
739 |         o = self._offset + offset
740 |         try:
741 |             return unpack_from("<i", self._buf, o)[0]
742 |         except struct.error:
743 |             raise OverrunBufferException(o, len(self._buf))
744 | 
745 |     def unpack_qword(self, offset):
746 |         """
747 |         Returns a little-endian QWORD (8 bytes) from the relative offset.
748 |         Arguments:
749 |         - `offset`: The relative offset from the start of the block.
750 |         Throws:
751 |         - `OverrunBufferException`
752 |         """
753 |         o = self._offset + offset
754 |         try:
755 |             return unpack_from("<Q", self._buf, o)[0]
756 |         except struct.error:
757 |             raise OverrunBufferException(o, len(self._buf))
758 | 
759 |     def unpack_int64(self, offset):
760 |         """
761 |         Returns a little-endian signed 64-bit integer (8 bytes) from
762 |           the relative offset.
763 |         Arguments:
764 |         - `offset`: The relative offset from the start of the block.
765 |         Throws:
766 |         - `OverrunBufferException`
767 |         """
768 |         o = self._offset + offset
769 |         try:
770 |             return unpack_from("<q", self._buf, o)[0]
771 |         except struct.error:
772 |             raise OverrunBufferException(o, len(self._buf))
773 | 
774 |     def unpack_float(self, offset):
775 |         """
776 |         Returns a single-precision float (4 bytes) from
777 |           the relative offset.  IEEE 754 format.
778 |         Arguments:
779 |         - `offset`: The relative offset from the start of the block.
780 |         Throws:
781 |         - `OverrunBufferException`
782 |         """
783 |         o = self._offset + offset
784 |         try:
785 |             return unpack_from("<f", self._buf, o)[0]
786 |         except struct.error:
787 |             raise OverrunBufferException(o, len(self._buf))
788 | 
789 |     def unpack_double(self, offset):
790 |         """
791 |         Returns a double-precision float (8 bytes) from
792 |           the relative offset.  IEEE 754 format.
793 |         Arguments:
794 |         - `offset`: The relative offset from the start of the block.
795 |         Throws:
796 |         - `OverrunBufferException`
797 |         """
798 |         o = self._offset + offset
799 |         try:
800 |             return unpack_from("<d", self._buf, o)[0]
801 |         except struct.error:
802 |             raise OverrunBufferException(o, len(self._buf))
803 | 
804 |     def unpack_binary(self, offset, length=False):
805 |         """
806 |         Returns raw binary data from the relative offset with the given length.
807 |         Arguments:
808 |         - `offset`: The relative offset from the start of the block.
809 |         - `length`: The length of the binary blob. If zero, the empty string
810 |             zero length is returned.
811 |         Throws:
812 |         - `OverrunBufferException`
813 |         """
814 |         if not length:
815 |             return ""
816 |         o = self._offset + offset
817 |         try:
818 |             return unpack_from("<%ds" % (length), self._buf, o)[0]
819 |         except struct.error:
820 |             raise OverrunBufferException(o, len(self._buf))
821 | 
822 |     def unpack_string(self, offset, length):
823 |         """
824 |         Returns a string from the relative offset with the given length.
825 |         Arguments:
826 |         - `offset`: The relative offset from the start of the block.
827 |         - `length`: The length of the string.
828 |         Throws:
829 |         - `OverrunBufferException`
830 |         """
831 |         return self.unpack_binary(offset, length)
832 | 
833 |     def unpack_wstring(self, offset, length):
834 |         """
835 |         Returns a string from the relative offset with the given length,
836 |         where each character is a wchar (2 bytes)
837 |         Arguments:
838 |         - `offset`: The relative offset from the start of the block.
839 |         - `length`: The length of the string.
840 |         Throws:
841 |         - `UnicodeDecodeError`
842 |         """
843 |         try:
844 |             return self._buf[self._offset + offset:self._offset + offset + \
845 |                              2 * length].tostring().decode("utf16")
846 |         except AttributeError: # already a 'str' ?
847 |             return self._buf[self._offset + offset:self._offset + offset + \
848 |                              2 * length].decode("utf16")
849 | 
850 |     def unpack_dosdate(self, offset):
851 |         """
852 |         Returns a datetime from the DOSDATE and DOSTIME starting at
853 |         the relative offset.
854 |         Arguments:
855 |         - `offset`: The relative offset from the start of the block.
856 |         Throws:
857 |         - `OverrunBufferException`
858 |         """
859 |         try:
860 |             o = self._offset + offset
861 |             return dosdate(self._buf[o:o + 2], self._buf[o + 2:o + 4])
862 |         except struct.error:
863 |             raise OverrunBufferException(o, len(self._buf))
864 | 
865 |     def unpack_filetime(self, offset):
866 |         """
867 |         Returns a datetime from the QWORD Windows timestamp starting at
868 |         the relative offset.
869 |         Arguments:
870 |         - `offset`: The relative offset from the start of the block.
871 |         Throws:
872 |         - `OverrunBufferException`
873 |         """
874 |         return parse_filetime(self.unpack_qword(offset))
875 | 
876 |     def unpack_systemtime(self, offset):
877 |         """
878 |         Returns a datetime from the QWORD Windows SYSTEMTIME timestamp
879 |           starting at the relative offset.
880 |           See http://msdn.microsoft.com/en-us/library/ms724950%28VS.85%29.aspx
881 |         Arguments:
882 |         - `offset`: The relative offset from the start of the block.
883 |         Throws:
884 |         - `OverrunBufferException`
885 |         """
886 |         o = self._offset + offset
887 |         try:
888 |             parts = unpack_from("<WWWWWWWW", self._buf, o)
889 |         except struct.error:
890 |             raise OverrunBufferException(o, len(self._buf))
891 |         return datetime.datetime(parts[0], parts[1],
892 |                                  parts[3],  # skip part 2 (day of week)
893 |                                  parts[4], parts[5],
894 |                                  parts[6], parts[7])
895 | 
896 |     def unpack_guid(self, offset):
897 |         """
898 |         Returns a string containing a GUID starting at the relative offset.
899 |         Arguments:
900 |         - `offset`: The relative offset from the start of the block.
901 |         Throws:
902 |         - `OverrunBufferException`
903 |         """
904 |         o = self._offset + offset
905 | 
906 |         try:
907 |             _bin = self._buf[o:o + 16]
908 |         except IndexError:
909 |             raise OverrunBufferException(o, len(self._buf))
910 | 
911 |         # Yeah, this is ugly
912 |         h = map(ord, _bin)
913 |         return "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x" % \
914 |             (h[3], h[2], h[1], h[0],
915 |              h[5], h[4],
916 |              h[7], h[6],
917 |              h[8], h[9],
918 |              h[10], h[11], h[12], h[13], h[14], h[15])
919 | 
920 |     def absolute_offset(self, offset):
921 |         """
922 |         Get the absolute offset from an offset relative to this block
923 |         Arguments:
924 |         - `offset`: The relative offset into this block.
925 |         """
926 |         return self._offset + offset
927 | 
928 |     def offset(self):
929 |         """
930 |         Equivalent to self.absolute_offset(0x0), which is the starting
931 |           offset of this block.
932 |         """
933 |         return self._offset
934 | 
935 | 
936 | class Nestable(object):
937 |     """
938 |     A Nestable is a mixin type that can be provided with a Block type.
939 |     The only requirement is that it implement a `len` method, or a
940 |     `structure_size` staticmethod.  This enables the parent Block to
941 |     seek among its children.
942 |     """
943 |     def __init__(self, buf, offset):
944 |         super(Nestable, self).__init__()
945 | 
946 |     @staticmethod
947 |     def structure_size(buf, offset, parent):
948 |         """
949 |         This staticmethod should return the size of a block located at the
950 |           specified location in the given buffer.  This method should do the
951 |           minimal amount of processing involved to compute the size.  It should
952 |           not perform any worse than simply instantiating the this type and
953 |           using its `__len__` method.
954 | 
955 |         @type  buf: bytestring
956 |         @param buf: The buffer in which this Block is found.
957 |         @type  offset: int
958 |         @param offset: The offset at which this Block begins.
959 |         @type  parent: object
960 |         @param parent: The logical parent of this Block.
961 |         @rtype: int
962 |         @return The length of the Block starting at the given location.
963 |         """
964 |         raise NotImplemented
965 | 
966 |     def __len__(self):
967 |         """
968 |         This method should return the size of this structure in bytes.
969 |         It should prefer to use size fields or logic that
970 |           is already parsed out.
971 | 
972 |         @rtype: int
973 |         @return The length of this Block in bytes.
974 |         """
975 |         raise NotImplemented
976 | 


--------------------------------------------------------------------------------
/TScopy/MFT.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | 
   3 | import array
   4 | import struct
   5 | import logging
   6 | from datetime import datetime
   7 | 
   8 | from . import BinaryParser
   9 | from BinaryParser import Block
  10 | from BinaryParser import Nestable
  11 | 
  12 | g_logger = logging.getLogger("ntfs.mft")
  13 | 
  14 | 
  15 | class INDXException(Exception):
  16 |     """
  17 |     Base Exception class for INDX parsing.
  18 |     """
  19 | 
  20 |     def __init__(self, value):
  21 |         """
  22 |         Constructor.
  23 |         Arguments:
  24 |         - `value`: A string description.
  25 |         """
  26 |         super(INDXException, self).__init__()
  27 |         self._value = value
  28 | 
  29 |     def __str__(self):
  30 |         return "INDX Exception: %s" % (self._value)
  31 | 
  32 | 
  33 | class FixupBlock(Block):
  34 |     """
  35 |     a fixup block requires modification to the underlying buffer.
  36 |       - we don't want to do it to the underlying buffer
  37 |         - if its mmapped, we'd change the source file
  38 |         - if its a string, then this would raise an exception
  39 |       - we can keep a shadow file/buffer for writes to the underlying storage
  40 |         - this is most complete
  41 |         - also most complex to implement
  42 |       - we can make a copy of the buffer, and work with that
  43 |     we take the third option for ease of implementation
  44 | 
  45 |     some notes:
  46 |       - we change the buffer for this object from whats passed to the constructor
  47 |       - we change the offset for this object from whats passed to the constructor
  48 |       - we assume the total object size is no greater than the size of the fixups!
  49 |     """
  50 | 
  51 |     def __init__(self, buf, offset, parent):
  52 |         super(FixupBlock, self).__init__(buf, offset)
  53 | 
  54 |     def fixup(self, num_fixups, fixup_value_offset):
  55 |         fixup_buffer = array.array("b", self.unpack_binary(0, length=(num_fixups - 1) * 512))
  56 |         self._buf = fixup_buffer
  57 |         self._offset = 0
  58 | 
  59 |         fixup_value = self.unpack_word(fixup_value_offset)
  60 | 
  61 |         for i in range(0, num_fixups - 1):
  62 |             fixup_offset = 512 * (i + 1) - 2
  63 |             check_value = self.unpack_word(fixup_offset)
  64 | 
  65 |             if check_value != fixup_value:
  66 |                 logging.warning("Bad fixup at %s", hex(self.offset() + fixup_offset))
  67 |                 continue
  68 | 
  69 |             new_value = self.unpack_word(fixup_value_offset + 2 + 2 * i)
  70 |             self.pack_word(fixup_offset, new_value)
  71 | 
  72 |             check_value = self.unpack_word(fixup_offset)
  73 | 
  74 | 
  75 | #            g_logger.debug("Fixup verified at %s and patched from %s to %s.",
  76 | #                          hex(self.offset() + fixup_offset),
  77 | #                          hex(fixup_value), hex(check_value))
  78 | 
  79 | 
  80 | class INDEX_ENTRY_FLAGS:
  81 |     """
  82 |     sizeof() == WORD
  83 |     """
  84 |     INDEX_ENTRY_NODE = 0x1
  85 |     INDEX_ENTRY_END = 0x2
  86 |     INDEX_ENTRY_SPACE_FILLER = 0xFFFF
  87 | 
  88 | 
  89 | class INDEX_ENTRY_HEADER(Block, Nestable):
  90 |     def __init__(self, buf, offset, parent):
  91 |         super(INDEX_ENTRY_HEADER, self).__init__(buf, offset)
  92 |         self.declare_field("word", "length", 0x8)
  93 |         self.declare_field("word", "key_length")
  94 |         self.declare_field("word", "index_entry_flags")  # see INDEX_ENTRY_FLAGS
  95 |         self.declare_field("word", "reserved")
  96 | 
  97 |     @staticmethod
  98 |     def structure_size(buf, offset, parent):
  99 |         return 0x10
 100 | 
 101 |     def __len__(self):
 102 |         return 0x10
 103 | 
 104 |     def is_index_entry_node(self):
 105 |         return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_NODE
 106 | 
 107 |     def is_index_entry_end(self):
 108 |         return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_END
 109 | 
 110 |     def is_index_entry_space_filler(self):
 111 |         return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_SPACE_FILLER
 112 | 
 113 | 
 114 | class MFT_INDEX_ENTRY_HEADER(INDEX_ENTRY_HEADER):
 115 |     """
 116 |     Index used by the MFT for INDX attributes.
 117 |     """
 118 | 
 119 |     def __init__(self, buf, offset, parent):
 120 |         super(MFT_INDEX_ENTRY_HEADER, self).__init__(buf, offset, parent)
 121 |         self.declare_field("qword", "mft_reference", 0x0)
 122 | 
 123 | 
 124 | class SECURE_INDEX_ENTRY_HEADER(INDEX_ENTRY_HEADER):
 125 |     """
 126 |     Index used by the $SECURE file indices SII and SDH
 127 |     """
 128 | 
 129 |     def __init__(self, buf, offset, parent):
 130 |         super(SECURE_INDEX_ENTRY_HEADER, self).__init__(buf, offset, parent)
 131 |         self.declare_field("word", "data_offset", 0x0)
 132 |         self.declare_field("word", "data_length")
 133 |         self.declare_field("dword", "reserved")
 134 | 
 135 | 
 136 | class INDEX_ENTRY(Block, Nestable):
 137 |     """
 138 |     NOTE: example structure. See the more specific classes below.
 139 |       Probably do not instantiate.
 140 |     """
 141 | 
 142 |     def __init__(self, buf, offset, parent):
 143 |         super(INDEX_ENTRY, self).__init__(buf, offset)
 144 |         self.declare_field(INDEX_ENTRY_HEADER, "header", 0x0)
 145 |         self.add_explicit_field(0x10, "string", "data")
 146 | 
 147 |     def data(self):
 148 |         start = self.offset() + 0x10
 149 |         end = start + self.header().key_length()
 150 |         return self._buf[start:end]
 151 | 
 152 |     @staticmethod
 153 |     def structure_size(buf, offset, parent):
 154 |         return BinaryParser.read_word(buf, offset + 0x8)
 155 | 
 156 |     def __len__(self):
 157 |         return self.header().length()
 158 | 
 159 |     def is_valid(self):
 160 |         return True
 161 | 
 162 | 
 163 | class MFT_INDEX_ENTRY(Block, Nestable):
 164 |     """
 165 |     Index entry for the MFT directory index $I30, attribute type 0x90.
 166 |     """
 167 | 
 168 |     def __init__(self, buf, offset, parent):
 169 |         super(MFT_INDEX_ENTRY, self).__init__(buf, offset)
 170 |         self.declare_field(MFT_INDEX_ENTRY_HEADER, "header", 0x0)
 171 |         self.declare_field(FilenameAttribute, "filename_information")
 172 | 
 173 |     @staticmethod
 174 |     def structure_size(buf, offset, parent):
 175 |         return BinaryParser.read_word(buf, offset + 0x8)
 176 | 
 177 |     def __len__(self):
 178 |         return self.header().length()
 179 | 
 180 |     def is_valid(self):
 181 |         # this is a bit of a mess, but it should work
 182 |         recent_date = datetime(1990, 1, 1, 0, 0, 0)
 183 |         future_date = datetime(2025, 1, 1, 0, 0, 0)
 184 |         try:
 185 |             fn = self.filename_information()
 186 |         except:
 187 |             return False
 188 |         if not fn:
 189 |             return False
 190 |         try:
 191 |             return fn.modified_time() > recent_date and \
 192 |                    fn.accessed_time() > recent_date and \
 193 |                    fn.changed_time() > recent_date and \
 194 |                    fn.created_time() > recent_date and \
 195 |                    fn.modified_time() < future_date and \
 196 |                    fn.accessed_time() < future_date and \
 197 |                    fn.changed_time() < future_date and \
 198 |                    fn.created_time() < future_date
 199 |         except ValueError:
 200 |             return False
 201 | 
 202 | 
 203 | class SII_INDEX_ENTRY(Block, Nestable):
 204 |     """
 205 |     Index entry for the $SECURE:$SII index.
 206 |     """
 207 | 
 208 |     def __init__(self, buf, offset, parent):
 209 |         super(SII_INDEX_ENTRY, self).__init__(buf, offset)
 210 |         self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0)
 211 |         self.declare_field("dword", "security_id")
 212 | 
 213 |     @staticmethod
 214 |     def structure_size(buf, offset, parent):
 215 |         return BinaryParser.read_word(buf, offset + 0x8)
 216 | 
 217 |     def __len__(self):
 218 |         return self.header().length()
 219 | 
 220 |     def is_valid(self):
 221 |         # TODO(wb): test
 222 |         return 1 < self.header().length() < 0x30 and \
 223 |                1 < self.header().key_lenght() < 0x20
 224 | 
 225 | 
 226 | class SDH_INDEX_ENTRY(Block, Nestable):
 227 |     """
 228 |     Index entry for the $SECURE:$SDH index.
 229 |     """
 230 | 
 231 |     def __init__(self, buf, offset, parent):
 232 |         super(SDH_INDEX_ENTRY, self).__init__(buf, offset)
 233 |         self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0)
 234 |         self.declare_field("dword", "hash")
 235 |         self.declare_field("dword", "security_id")
 236 | 
 237 |     @staticmethod
 238 |     def structure_size(buf, offset, parent):
 239 |         return BinaryParser.read_word(buf, offset + 0x8)
 240 | 
 241 |     def __len__(self):
 242 |         return self.header().length()
 243 | 
 244 |     def is_valid(self):
 245 |         # TODO(wb): test
 246 |         return 1 < self.header().length() < 0x30 and \
 247 |                1 < self.header().key_lenght() < 0x20
 248 | 
 249 | 
 250 | class INDEX_HEADER_FLAGS:
 251 |     SMALL_INDEX = 0x0  # MFT: INDX_ROOT only
 252 |     LARGE_INDEX = 0x1  # MFT: requires INDX_ALLOCATION
 253 |     LEAF_NODE = 0x1
 254 |     INDEX_NODE = 0x2
 255 |     NODE_MASK = 0x1
 256 | 
 257 | 
 258 | class INDEX_HEADER(Block, Nestable):
 259 |     def __init__(self, buf, offset, parent):
 260 |         super(INDEX_HEADER, self).__init__(buf, offset)
 261 |         self.declare_field("dword", "entries_offset", 0x0)
 262 |         self.declare_field("dword", "index_length")
 263 |         self.declare_field("dword", "allocated_size")
 264 |         self.declare_field("byte", "index_header_flags")  # see INDEX_HEADER_FLAGS
 265 |         # then 3 bytes padding/reserved
 266 | 
 267 |     @staticmethod
 268 |     def structure_size(buf, offset, parent):
 269 |         return 0x1C
 270 | 
 271 |     def __len__(self):
 272 |         return 0x1C
 273 | 
 274 |     def is_small_index(self):
 275 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.SMALL_INDEX
 276 | 
 277 |     def is_large_index(self):
 278 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.LARGE_INDEX
 279 | 
 280 |     def is_leaf_node(self):
 281 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.LEAF_NODE
 282 | 
 283 |     def is_index_node(self):
 284 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.INDEX_NODE
 285 | 
 286 |     def is_NODE_MASK(self):
 287 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.NODE_MASK
 288 | 
 289 | 
 290 | class INDEX(Block, Nestable):
 291 |     def __init__(self, buf, offset, parent, index_entry_class):
 292 |         self._INDEX_ENTRY = index_entry_class
 293 |         super(INDEX, self).__init__(buf, offset)
 294 |         self.declare_field(INDEX_HEADER, "header", 0x0)
 295 |         self.add_explicit_field(self.header().entries_offset(),
 296 |                                 INDEX_ENTRY, "entries")
 297 |         slack_start = self.header().entries_offset() + self.header().index_length()
 298 |         # TODO: reenable
 299 |         # self.add_explicit_field(slack_start, INDEX_ENTRY, "slack_entries")
 300 | 
 301 |     @staticmethod
 302 |     def structure_size(buf, offset, parent):
 303 |         return BinaryParser.read_dword(buf, offset + 0x8)
 304 | 
 305 |     def __len__(self):
 306 |         return self.header().allocated_size()
 307 | 
 308 |     def entries(self):
 309 |         """
 310 |         A generator that returns each INDEX_ENTRY associated with this node.
 311 |         """
 312 |         offset = self.header().entries_offset()
 313 |         if offset == 0:
 314 |             return
 315 |         while offset <= self.header().index_length() - 0x52:
 316 |             e = self._INDEX_ENTRY(self._buf, self.offset() + offset, self)
 317 |             offset += len(e)
 318 |             yield e
 319 | 
 320 |     def slack_entries(self):
 321 |         """
 322 |         A generator that yields INDEX_ENTRYs found in the slack space
 323 |         associated with this header.
 324 |         """
 325 |         offset = self.header().index_length()
 326 |         try:
 327 |             while offset <= self.header().allocated_size() - 0x52:
 328 |                 try:
 329 |                     g_logger.debug("Trying to find slack entry at %s.", hex(offset))
 330 |                     e = self._INDEX_ENTRY(self._buf, offset, self)
 331 |                     if e.is_valid():
 332 |                         g_logger.debug("Slack entry is valid.")
 333 |                         offset += len(e) or 1
 334 |                         yield e
 335 |                     else:
 336 |                         g_logger.debug("Slack entry is invalid.")
 337 |                         # TODO(wb): raise a custom exception
 338 |                         raise BinaryParser.ParseException("Not a deleted entry")
 339 |                 except BinaryParser.ParseException:
 340 |                     g_logger.debug("Scanning one byte forward.")
 341 |                     offset += 1
 342 |         except struct.error:
 343 |             logging.debug("Slack entry parsing overran buffer.")
 344 |             pass
 345 | 
 346 | 
 347 | class INDEX_ROOT(Block, Nestable):
 348 |     def __init__(self, buf, offset, parent=None):
 349 |         super(INDEX_ROOT, self).__init__(buf, offset)
 350 |         self.declare_field("dword", "type", 0x0)
 351 |         self.declare_field("dword", "collation_rule")
 352 |         self.declare_field("dword", "index_record_size_bytes")
 353 |         self.declare_field("byte", "index_record_size_clusters")
 354 |         self.declare_field("byte", "unused1")
 355 |         self.declare_field("byte", "unused2")
 356 |         self.declare_field("byte", "unused3")
 357 |         self._index_offset = self.current_field_offset()
 358 |         self.add_explicit_field(self._index_offset, INDEX, "index")
 359 | 
 360 |     def index(self):
 361 |         return INDEX(self._buf, self._offset + self._index_offset,
 362 |                      self, MFT_INDEX_ENTRY)
 363 | 
 364 |     @staticmethod
 365 |     def structure_size(buf, offset, parent):
 366 |         return 0x10 + INDEX.structure_size(buf, offset + 0x10, parent)
 367 | 
 368 |     def __len__(self):
 369 |         return 0x10 + len(self.index())
 370 | 
 371 | 
 372 | class NTATTR_STANDARD_INDEX_HEADER(Block):
 373 |     def __init__(self, buf, offset, parent):
 374 |         super(NTATTR_STANDARD_INDEX_HEADER, self).__init__(buf, offset)
 375 |         self.declare_field("dword", "entry_list_start", 0x0)
 376 |         self.declare_field("dword", "entry_list_end")
 377 |         self.declare_field("dword", "entry_list_allocation_end")
 378 |         self.declare_field("dword", "flags")
 379 |         self.declare_field("binary", "list_buffer", \
 380 |                            self.entry_list_start(),
 381 |                            self.entry_list_allocation_end() - self.entry_list_start())
 382 | 
 383 |     def entries(self):
 384 |         """
 385 |         A generator that returns each INDX entry associated with this node.
 386 |         """
 387 |         offset = self.entry_list_start()
 388 |         if offset == 0:
 389 |             return
 390 | 
 391 |         # 0x52 is an approximate size of a small index entry
 392 |         while offset <= self.entry_list_end() - 0x52:
 393 |             e = IndexEntry(self._buf, self.offset() + offset, self)
 394 |             offset += e.length()
 395 |             yield e
 396 | 
 397 |     def slack_entries(self):
 398 |         """
 399 |         A generator that yields INDX entries found in the slack space
 400 |         associated with this header.
 401 |         """
 402 |         offset = self.entry_list_end()
 403 |         try:
 404 |             # 0x52 is an approximate size of a small index entry
 405 |             while offset <= self.entry_list_allocation_end() - 0x52:
 406 |                 try:
 407 |                     e = SlackIndexEntry(self._buf, offset, self)
 408 |                     if e.is_valid():
 409 |                         offset += e.length() or 1
 410 |                         yield e
 411 |                     else:
 412 |                         # TODO(wb): raise a custom exception
 413 |                         raise BinaryParser.ParseException("Not a deleted entry")
 414 |                 except BinaryParser.ParseException:
 415 |                     # ensure we're always moving forward
 416 |                     offset += 1
 417 |         except struct.error:
 418 |             pass
 419 | 
 420 | 
 421 | class IndexRootHeader(Block):
 422 |     def __init__(self, buf, offset, parent):
 423 |         super(IndexRootHeader, self).__init__(buf, offset)
 424 |         self.declare_field("dword", "type", 0x0)
 425 |         self.declare_field("dword", "collation_rule")
 426 |         self.declare_field("dword", "index_record_size_bytes")
 427 |         self.declare_field("byte", "index_record_size_clusters")
 428 |         self.declare_field("byte", "unused1")
 429 |         self.declare_field("byte", "unused2")
 430 |         self.declare_field("byte", "unused3")
 431 |         self._node_header_offset = self.current_field_offset()
 432 | 
 433 |     def node_header(self):
 434 |         return NTATTR_STANDARD_INDEX_HEADER(self._buf,
 435 |                                             self.offset() + self._node_header_offset,
 436 |                                             self)
 437 | 
 438 | 
 439 | class IndexRecordHeader(FixupBlock):
 440 |     def __init__(self, buf, offset, parent):
 441 |         super(IndexRecordHeader, self).__init__(buf, offset, parent)
 442 |         self.declare_field("dword", "magic", 0x0)
 443 |         self.declare_field("word", "usa_offset")
 444 |         self.declare_field("word", "usa_count")
 445 |         self.declare_field("qword", "lsn")
 446 |         self.declare_field("qword", "vcn")
 447 |         self._node_header_offset = self.current_field_offset()
 448 |         self.fixup(self.usa_count(), self.usa_offset())
 449 | 
 450 |     def node_header(self):
 451 |         return NTATTR_STANDARD_INDEX_HEADER(self._buf,
 452 |                                             self.offset() + self._node_header_offset,
 453 |                                             self)
 454 | 
 455 | 
 456 | class INDEX_BLOCK(FixupBlock):
 457 |     def __init__(self, buf, offset, parent=None):
 458 |         super(INDEX_BLOCK, self).__init__(buf, offset, parent)
 459 |         self.declare_field("dword", "magic", 0x0)
 460 |         self.declare_field("word", "usa_offset")
 461 |         self.declare_field("word", "usa_count")
 462 |         self.declare_field("qword", "lsn")
 463 |         self.declare_field("qword", "vcn")
 464 |         self._index_offset = self.current_field_offset()
 465 |         self.add_explicit_field(self._index_offset, INDEX, "index")
 466 |         self.fixup(self.usa_count(), self.usa_offset())
 467 | 
 468 |     def index(self):
 469 |         return INDEX(self._buf, self._offset + self._index_offset,
 470 |                      self, MFT_INDEX_ENTRY)
 471 | 
 472 |     @staticmethod
 473 |     def structure_size(buf, offset, parent):
 474 |         return 0x30 + INDEX.structure_size(buf, offset + 0x10, parent)
 475 | 
 476 |     def __len__(self):
 477 |         return 0x1000
 478 | 
 479 | 
 480 | class INDEX_ALLOCATION(FixupBlock):
 481 |     def __init__(self, buf, offset, parent=None):
 482 |         super(INDEX_ALLOCATION, self).__init__(buf, offset, parent)
 483 |         self.add_explicit_field(0, INDEX_BLOCK, "blocks")
 484 | 
 485 |     @staticmethod
 486 |     def guess_num_blocks(buf, offset):
 487 |         count = 0
 488 |         # TODO: don't hardcode things
 489 |         BLOCK_SIZE = 0x1000
 490 |         try:
 491 |             while BinaryParser.read_dword(buf, offset) == 0x58444e49:  # "INDX"
 492 |                 offset += BLOCK_SIZE
 493 |                 count += 1
 494 |         except (IndexError, BinaryParser.OverrunBufferException):
 495 |             return count
 496 |         return count
 497 | 
 498 |     def blocks(self):
 499 |         for i in xrange(INDEX_ALLOCATION.guess_num_blocks(self._buf, self.offset())):
 500 |             # TODO: don't hardcode things
 501 |             yield INDEX_BLOCK(self._buf, self._offset + 0x1000 * i)
 502 | 
 503 |     @staticmethod
 504 |     def structure_size(buf, offset, parent):
 505 |         # TODO: don't hardcode things
 506 |         return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(buf, offset)
 507 | 
 508 |     def __len__(self):
 509 |         # TODO: don't hardcode things
 510 |         return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(self._buf, self._offset)
 511 | 
 512 | 
 513 | class IndexEntry(Block):
 514 |     def __init__(self, buf, offset, parent):
 515 |         super(IndexEntry, self).__init__(buf, offset)
 516 |         self.declare_field("qword", "mft_reference", 0x0)
 517 |         self.declare_field("word", "length")
 518 |         self.declare_field("word", "filename_information_length")
 519 |         self.declare_field("dword", "flags")
 520 |         self.declare_field("binary", "filename_information_buffer", \
 521 |                            self.current_field_offset(),
 522 |                            self.filename_information_length())
 523 |         self.declare_field("qword", "child_vcn",
 524 |                            BinaryParser.align(self.current_field_offset(), 0x8))
 525 | 
 526 |     def filename_information(self):
 527 |         return FilenameAttribute(self._buf,
 528 |                                  self.offset() + self._off_filename_information_buffer,
 529 |                                  self)
 530 | 
 531 | 
 532 | class StandardInformationFieldDoesNotExist(Exception):
 533 |     def __init__(self, msg):
 534 |         self._msg = msg
 535 | 
 536 |     def __str__(self):
 537 |         return "Standard Information attribute field does not exist: %s" % (self._msg)
 538 | 
 539 | 
 540 | class StandardInformation(Block):
 541 |     # TODO(wb): implement sizing so we can make this nestable
 542 |     def __init__(self, buf, offset, parent):
 543 |         super(StandardInformation, self).__init__(buf, offset)
 544 |         self.declare_field("filetime", "created_time", 0x0)
 545 |         self.declare_field("filetime", "modified_time")
 546 |         self.declare_field("filetime", "changed_time")
 547 |         self.declare_field("filetime", "accessed_time")
 548 |         self.declare_field("dword", "attributes")
 549 |         self.declare_field("binary", "reserved", self.current_field_offset(), 0xC)
 550 |         # self.declare_field("dword", "owner_id", 0x30)  # Win2k+, NTFS 3.x
 551 |         # self.declare_field("dword", "security_id")  # Win2k+, NTFS 3.x
 552 |         # self.declare_field("qword", "quota_charged")  # Win2k+, NTFS 3.x
 553 |         # self.declare_field("qword", "usn")  # Win2k+, NTFS 3.x
 554 | 
 555 |     # Can't implement this unless we know the NTFS version in use
 556 |     # @staticmethod
 557 |     # def structure_size(buf, offset, parent):
 558 |     #    return 0x42 + (read_byte(buf, offset + 0x40) * 2)
 559 | 
 560 |     # Can't implement this unless we know the NTFS version in use
 561 |     # def __len__(self):
 562 |     #    return 0x42 + (self.filename_length() * 2)
 563 | 
 564 |     def owner_id(self):
 565 |         """
 566 |         This is an explicit method because it may not exist in OSes under Win2k
 567 | 
 568 |         @raises StandardInformationFieldDoesNotExist
 569 |         """
 570 |         try:
 571 |             return self.unpack_dword(0x30)
 572 |         except BinaryParser.OverrunBufferException:
 573 |             raise StandardInformationFieldDoesNotExist("Owner ID")
 574 | 
 575 |     def security_id(self):
 576 |         """
 577 |         This is an explicit method because it may not exist in OSes under Win2k
 578 | 
 579 |         @raises StandardInformationFieldDoesNotExist
 580 |         """
 581 |         try:
 582 |             return self.unpack_dword(0x34)
 583 |         except BinaryParser.OverrunBufferException:
 584 |             raise StandardInformationFieldDoesNotExist("Security ID")
 585 | 
 586 |     def quota_charged(self):
 587 |         """
 588 |         This is an explicit method because it may not exist in OSes under Win2k
 589 | 
 590 |         @raises StandardInformationFieldDoesNotExist
 591 |         """
 592 |         try:
 593 |             return self.unpack_dword(0x38)
 594 |         except BinaryParser.OverrunBufferException:
 595 |             raise StandardInformationFieldDoesNotExist("Quota Charged")
 596 | 
 597 |     def usn(self):
 598 |         """
 599 |         This is an explicit method because it may not exist in OSes under Win2k
 600 | 
 601 |         @raises StandardInformationFieldDoesNotExist
 602 |         """
 603 |         try:
 604 |             return self.unpack_dword(0x40)
 605 |         except BinaryParser.OverrunBufferException:
 606 |             raise StandardInformationFieldDoesNotExist("USN")
 607 | 
 608 | 
 609 | class Attribute_List(Block, Nestable):
 610 |     def __init__(self, buf, offset, size, logger):
 611 |         self.__list = []
 612 |         csize = 0
 613 |         while csize < size:
 614 |             lEntry = Attribute_List_Entry(buf[csize:], 0, logger)
 615 |             self.__list.append(lEntry)
 616 |             csize += lEntry.record_length()
 617 | 
 618 |     def get(self):
 619 |         return self.__list
 620 | 
 621 | 
 622 | class Attribute_List_Entry(Block, Nestable):
 623 |     def __init__(self, buf, offset, logger):
 624 |         super(Attribute_List_Entry, self).__init__(buf, offset)
 625 |         self.declare_field("dword", "type", 0x0)
 626 |         self.declare_field("word", "record_length", 0x4)
 627 |         self.declare_field("byte", "nameLength", 0x6)
 628 |         self.declare_field("byte", "offsetToName", 0x7)
 629 |         self.declare_field("qword", "startVCN", 0x8)
 630 |         self.declare_field("qword", "baseFileReference", 0x10)
 631 |         self.declare_field("word", "attributeID", 0x18)
 632 |         self.declare_field("wstring", "name", 0x1a, 2 * self.nameLength())
 633 | 
 634 |     def __len__(self):
 635 |         return self.size()
 636 | 
 637 | 
 638 | class FilenameAttribute(Block, Nestable):
 639 |     def __init__(self, buf, offset, parent):
 640 |         super(FilenameAttribute, self).__init__(buf, offset)
 641 |         self.declare_field("qword", "mft_parent_reference", 0x0)
 642 |         self.declare_field("filetime", "created_time")
 643 |         self.declare_field("filetime", "modified_time")
 644 |         self.declare_field("filetime", "changed_time")
 645 |         self.declare_field("filetime", "accessed_time")
 646 |         self.declare_field("qword", "physical_size")
 647 |         self.declare_field("qword", "logical_size")
 648 |         self.declare_field("dword", "flags")
 649 |         self.declare_field("dword", "reparse_value")
 650 |         self.declare_field("byte", "filename_length")
 651 |         self.declare_field("byte", "filename_type")
 652 |         self.declare_field("wstring", "filename", 0x42, self.filename_length())
 653 | 
 654 |     @staticmethod
 655 |     def structure_size(buf, offset, parent):
 656 |         return 0x42 + (BinaryParser.read_byte(buf, offset + 0x40) * 2)
 657 | 
 658 |     def __len__(self):
 659 |         return 0x42 + (self.filename_length() * 2)
 660 | 
 661 | 
 662 | class SlackIndexEntry(IndexEntry):
 663 |     def __init__(self, buf, offset, parent):
 664 |         """
 665 |         Constructor.
 666 |         Arguments:
 667 |         - `buf`: Byte string containing NTFS INDX file
 668 |         - `offset`: The offset into the buffer at which the block starts.
 669 |         - `parent`: The parent NTATTR_STANDARD_INDEX_HEADER block,
 670 |             which links to this block.
 671 |         """
 672 |         super(SlackIndexEntry, self).__init__(buf, offset, parent)
 673 | 
 674 |     def is_valid(self):
 675 |         # this is a bit of a mess, but it should work
 676 |         recent_date = datetime(1990, 1, 1, 0, 0, 0)
 677 |         future_date = datetime(2025, 1, 1, 0, 0, 0)
 678 |         try:
 679 |             fn = self.filename_information()
 680 |         except:
 681 |             return False
 682 |         if not fn:
 683 |             return False
 684 |         try:
 685 |             return fn.modified_time() > recent_date and \
 686 |                    fn.accessed_time() > recent_date and \
 687 |                    fn.changed_time() > recent_date and \
 688 |                    fn.created_time() > recent_date and \
 689 |                    fn.modified_time() < future_date and \
 690 |                    fn.accessed_time() < future_date and \
 691 |                    fn.changed_time() < future_date and \
 692 |                    fn.created_time() < future_date
 693 |         except ValueError:
 694 |             return False
 695 | 
 696 | 
 697 | class Runentry(Block, Nestable):
 698 |     def __init__(self, buf, offset, parent):
 699 |         super(Runentry, self).__init__(buf, offset)
 700 |         self.declare_field("byte", "header")
 701 |         self._offset_length = self.header() >> 4
 702 |         self._length_length = self.header() & 0x0F
 703 |         self.declare_field("binary",
 704 |                            "length_binary",
 705 |                            self.current_field_offset(), self._length_length)
 706 |         self.declare_field("binary",
 707 |                            "offset_binary",
 708 |                            self.current_field_offset(), self._offset_length)
 709 | 
 710 |     @staticmethod
 711 |     def structure_size(buf, offset, parent):
 712 |         b = BinaryParser.read_byte(buf, offset)
 713 |         return (b >> 4) + (b & 0x0F) + 1
 714 | 
 715 |     def __len__(self):
 716 |         return 0x1 + (self._length_length + self._offset_length)
 717 | 
 718 |     def is_valid(self):
 719 |         return self._offset_length > 0 and self._length_length > 0
 720 | 
 721 |     def is_sparsed(self):
 722 |         return self._offset_length == 0
 723 | 
 724 |     def lsb2num(self, binary):
 725 |         count = 0
 726 |         ret = 0
 727 |         for b in binary:
 728 |             ret += ord(b) << (8 * count)
 729 |             count += 1
 730 |         return ret
 731 | 
 732 |     def lsb2signednum(self, binary):
 733 |         count = 0
 734 |         ret = 0
 735 |         working = []
 736 | 
 737 |         is_negative = (ord(binary[-1]) & (1 << 7) != 0)
 738 |         if is_negative:
 739 |             working = [ord(b) ^ 0xFF for b in binary]
 740 |         else:
 741 |             working = [ord(b) for b in binary]
 742 |         for b in working:
 743 |             ret += b << (8 * count)
 744 |             count += 1
 745 |         if is_negative:
 746 |             ret += 1
 747 |             ret *= -1
 748 |         return ret
 749 | 
 750 |     def offset(self):
 751 |         # TODO(wb): make this run_offset
 752 |         if self.offset_binary() == "":
 753 |             return 0
 754 |         return self.lsb2signednum(self.offset_binary())
 755 | 
 756 |     def length(self):
 757 |         # TODO(wb): make this run_offset
 758 |         return self.lsb2num(self.length_binary())
 759 | 
 760 | 
 761 | class Runlist(Block):
 762 |     def __init__(self, buf, offset, parent):
 763 |         super(Runlist, self).__init__(buf, offset)
 764 | 
 765 |     @staticmethod
 766 |     def structure_size(buf, offset, parent):
 767 |         length = 0
 768 |         while True:
 769 |             b = BinaryParser.read_byte(buf, offset + length)
 770 |             length += 1
 771 |             if b == 0:
 772 |                 return length
 773 | 
 774 |             length += (b >> 4) + (b & 0x0F)
 775 | 
 776 |     def __len__(self):
 777 |         return sum(map(len, self._entries()))
 778 | 
 779 |     def _entries(self, length=None):
 780 |         ret = []
 781 |         offset = self.offset()
 782 |         entry = Runentry(self._buf, offset, self)
 783 | 
 784 |         while entry.header() != 0 and \
 785 |                 (not length or offset < self.offset() + length) and \
 786 |                 (entry.is_valid() or entry.is_sparsed()):
 787 |             ret.append(entry)
 788 |             offset += len(entry)
 789 |             entry = Runentry(self._buf, offset, self)
 790 |         return ret
 791 | 
 792 |     def runs(self, length=None):
 793 |         """
 794 |         Yields tuples (volume offset, length).
 795 |         Recall that the entries are relative to one another
 796 |         """
 797 |         last_offset = 0
 798 |         for e in self._entries(length=length):
 799 |             current_offset = 0
 800 |             if not e.offset() == 0:
 801 |                 current_offset = last_offset + e.offset()
 802 |             current_length = e.length()
 803 |             if not e.offset() == 0:
 804 |                 last_offset = current_offset
 805 |             yield (current_offset, current_length)
 806 | 
 807 | 
 808 | class ATTR_TYPE:
 809 |     STANDARD_INFORMATION = 0x10
 810 |     ATTRIBUTE_LIST = 0x20
 811 |     FILENAME_INFORMATION = 0x30
 812 |     DATA = 0x80
 813 |     INDEX_ROOT = 0x90
 814 |     INDEX_ALLOCATION = 0xA0
 815 |     UTILITY_STREAM = 0x100
 816 | 
 817 | 
 818 | class Attribute(Block, Nestable):
 819 |     TYPES = {
 820 |         16: "$STANDARD INFORMATION",
 821 |         32: "$ATTRIBUTE LIST",
 822 |         48: "$FILENAME INFORMATION",
 823 |         64: "$OBJECT ID/$VOLUME VERSION",
 824 |         80: "$SECURITY DESCRIPTOR",
 825 |         96: "$VOLUME NAME",
 826 |         112: "$VOLUME INFORMATION",
 827 |         128: "$DATA",
 828 |         144: "$INDEX ROOT",
 829 |         160: "$INDEX ALLOCATION",
 830 |         176: "$BITMAP",
 831 |         192: "$SYMBOLIC LINK",
 832 |         208: "$REPARSE POINT/$EA INFORMATION",
 833 |         224: "$EA",
 834 |         256: "$LOGGED UTILITY STREAM",
 835 |     }
 836 | 
 837 |     FLAGS = {
 838 |         0x01: "readonly",
 839 |         0x02: "hidden",
 840 |         0x04: "system",
 841 |         0x08: "unused-dos",
 842 |         0x10: "directory-dos",
 843 |         0x20: "archive",
 844 |         0x40: "device",
 845 |         0x80: "normal",
 846 |         0x100: "temporary",
 847 |         0x200: "sparse",
 848 |         0x400: "reparse-point",
 849 |         0x800: "compressed",
 850 |         0x1000: "offline",
 851 |         0x2000: "not-indexed",
 852 |         0x4000: "encrypted",
 853 |         0x10000000: "has-indx",
 854 |         0x20000000: "has-view-index",
 855 |     }
 856 | 
 857 |     def __init__(self, buf, offset, parent):
 858 |         super(Attribute, self).__init__(buf, offset)
 859 |         self.declare_field("dword", "type")
 860 |         self.declare_field("dword", "size")  # this value must rounded up to 0x8 byte alignment
 861 |         self.declare_field("byte", "non_resident")
 862 |         self.declare_field("byte", "name_length")
 863 |         self.declare_field("word", "name_offset")
 864 |         self.declare_field("word", "flags")
 865 |         self.declare_field("word", "instance")
 866 |         if self.non_resident() > 0:
 867 |             self.declare_field("qword", "lowest_vcn", 0x10)
 868 |             self.declare_field("qword", "highest_vcn")
 869 |             self.declare_field("word", "runlist_offset")
 870 |             self.declare_field("byte", "compression_unit")
 871 |             self.declare_field("byte", "reserved1")
 872 |             self.declare_field("byte", "reserved2")
 873 |             self.declare_field("byte", "reserved3")
 874 |             self.declare_field("byte", "reserved4")
 875 |             self.declare_field("byte", "reserved5")
 876 |             self.declare_field("qword", "allocated_size")
 877 |             self.declare_field("qword", "data_size")
 878 |             self.declare_field("qword", "initialized_size")
 879 |             self.declare_field("qword", "compressed_size")
 880 |         else:
 881 |             self.declare_field("dword", "value_length", 0x10)
 882 |             self.declare_field("word", "value_offset")
 883 |             self.declare_field("byte", "value_flags")
 884 |             self.declare_field("byte", "reserved")
 885 |             self.declare_field("binary", "value",
 886 |                                self.value_offset(), self.value_length())
 887 | 
 888 |     @staticmethod
 889 |     def structure_size(buf, offset, parent):
 890 |         s = BinaryParser.read_dword(buf, offset + 0x4)
 891 |         return s + (8 - (s % 8))
 892 | 
 893 |     def __len__(self):
 894 |         return self.size()
 895 | 
 896 |     def __str__(self):
 897 |         return "%s" % (Attribute.TYPES[self.type()])
 898 | 
 899 |     def runlist(self):
 900 |         return Runlist(self._buf, self.offset() + self.runlist_offset(), self)
 901 | 
 902 |     def size(self):
 903 |         s = self.unpack_dword(self._off_size)
 904 |         return s + (8 - (s % 8))
 905 | 
 906 |     def name(self):
 907 |         return self.unpack_wstring(self.name_offset(), self.name_length())
 908 | 
 909 | 
 910 | class MFT_RECORD_FLAGS:
 911 |     MFT_RECORD_IN_USE = 0x1
 912 |     MFT_RECORD_IS_DIRECTORY = 0x2
 913 | 
 914 | 
 915 | def MREF(mft_reference):
 916 |     """
 917 |     Given a MREF/mft_reference, return the record number part.
 918 |     """
 919 |     return mft_reference & 0xFFFFFFFFFFFF
 920 | 
 921 | 
 922 | def MSEQNO(mft_reference):
 923 |     """
 924 |     Given a MREF/mft_reference, return the sequence number part.
 925 |     """
 926 |     return (mft_reference >> 48) & 0xFFFF
 927 | 
 928 | 
 929 | class AttributeNotFoundError(Exception):
 930 |     pass
 931 | 
 932 | 
 933 | class MFTRecord(FixupBlock):
 934 |     def __init__(self, buf, offset, parent, inode=None):
 935 |         super(MFTRecord, self).__init__(buf, offset, parent)
 936 | 
 937 |         # 0x0 File or BAAD
 938 |         self.declare_field("dword", "magic")
 939 |         # 0x04 Offset to fixup array
 940 |         self.declare_field("word", "usa_offset")
 941 |         # 0x06 Number of entries in fixup array
 942 |         self.declare_field("word", "usa_count")
 943 |         # 0x08 $LogFile sequence number
 944 |         self.declare_field("qword", "lsn")
 945 |         # 0x10 Sequence value
 946 |         self.declare_field("word", "sequence_number")
 947 |         # 0x12 Link Count
 948 |         self.declare_field("word", "link_count")
 949 |         # 0x14 Offset of first attribute
 950 |         self.declare_field("word", "attrs_offset")
 951 |         # 0x16 Flags:
 952 |         #   0x00 - not in use
 953 |         #   0x01 - in use
 954 |         #   0x02 - directory
 955 |         #   0x03 - directory in use
 956 |         self.declare_field("word", "flags")
 957 | 
 958 |         # 0x18 Used size of MFT entry
 959 |         self.declare_field("dword", "bytes_in_use")
 960 |         # 0x1c Allocated size of MFT entry
 961 |         self.declare_field("dword", "bytes_allocated")
 962 |         # 0x20 File reference to base record
 963 |         self.declare_field("qword", "base_mft_record")
 964 |         # 0x28 Nex attribute identifier
 965 |         self.declare_field("word", "next_attr_instance")
 966 | 
 967 |         # Attributes and fixup values
 968 |         # 0x2a
 969 |         self.declare_field("word", "reserved")
 970 |         # 0x2c
 971 |         self.declare_field("dword", "mft_record_number")
 972 | 
 973 |         self.inode = inode or self.mft_record_number()
 974 |         #        print self.sequence_number()
 975 |         #        print self.usa_offset()
 976 |         self.fixup(self.usa_count(), self.usa_offset())
 977 | 
 978 |     def attributes(self):
 979 |         offset = self.attrs_offset()
 980 |         right_border = self.offset() + self.bytes_in_use()
 981 | 
 982 |         while (self.unpack_dword(offset) != 0 and
 983 |                self.unpack_dword(offset) != 0xFFFFFFFF and
 984 |                offset + self.unpack_dword(offset + 4) <= right_border):
 985 |             a = Attribute(self._buf, offset, self)
 986 |             offset += len(a)
 987 |             yield a
 988 | 
 989 |     def attribute(self, attr_type):
 990 |         for a in self.attributes():
 991 |             if a.type() == attr_type:
 992 |                 return a
 993 |         raise AttributeNotFoundError()
 994 | 
 995 |     def is_directory(self):
 996 |         return (self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IS_DIRECTORY) == 2
 997 | 
 998 |     def is_active(self):
 999 |         return self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IN_USE
1000 | 
1001 |     # this a required resident attribute
1002 |     def filename_informations(self):
1003 |         """
1004 |         MFT Records may have more than one FN info attribute,
1005 |         each with a different type of filename (8.3, POSIX, etc.)
1006 | 
1007 |         This function returns all of the these attributes.
1008 |         """
1009 |         ret = []
1010 |         for a in self.attributes():
1011 |             if a.type() == ATTR_TYPE.FILENAME_INFORMATION:
1012 |                 try:
1013 |                     value = a.value()
1014 |                     check = FilenameAttribute(value, 0, self)
1015 |                     ret.append(check)
1016 |                 except Exception:
1017 |                     pass
1018 |         return ret
1019 | 
1020 |     # this a required resident attribute
1021 |     def filename_information(self):
1022 |         """
1023 |         MFT Records may have more than one FN info attribute,
1024 |         each with a different type of filename (8.3, POSIX, etc.)
1025 | 
1026 |         This function returns the attribute with the most complete name,
1027 |           that is, it tends towards Win32, then POSIX, and then 8.3.
1028 |         """
1029 |         fn = None
1030 |         for check in self.filename_informations():
1031 |             try:
1032 |                 if check.filename_type() == 0x0001 or \
1033 |                         check.filename_type() == 0x0003:
1034 |                     return check
1035 |                 fn = check
1036 |             except Exception:
1037 |                 pass
1038 |         return fn
1039 | 
1040 |     # this a required resident attribute
1041 |     def standard_information(self):
1042 |         try:
1043 |             attr = self.attribute(ATTR_TYPE.STANDARD_INFORMATION)
1044 |             return StandardInformation(attr.value(), 0, self)
1045 |         except AttributeError:
1046 |             return None
1047 | 
1048 |     def data_attribute(self):
1049 |         """
1050 |         Returns None if the default $DATA attribute does not exist
1051 |         """
1052 |         for attr in self.attributes():
1053 |             if attr.type() == ATTR_TYPE.DATA and attr.name() == "":
1054 |                 return attr
1055 | 
1056 |     def slack_data(self):
1057 |         """
1058 |         Returns A binary string containing the MFT record slack.
1059 |         """
1060 |         return self._buf[self.offset() + self.bytes_in_use():self.offset() + 1024].tostring()
1061 | 
1062 |     def active_data(self):
1063 |         """
1064 |         Returns A binary string containing the MFT record slack.
1065 |         """
1066 |         return self._buf[self.offset():self.offset() + self.bytes_in_use()].tostring()
1067 | 
1068 | 
1069 | class InvalidAttributeException(INDXException):
1070 |     def __init__(self, value):
1071 |         super(InvalidAttributeException, self).__init__(value)
1072 | 
1073 |     def __str__(self):
1074 |         return "Invalid attribute Exception(%s)" % (self._value)
1075 | 
1076 | 
1077 | class InvalidMFTRecordNumber(Exception):
1078 |     def __init__(self, value):
1079 |         self.value = value
1080 | 
1081 | 
1082 | class MFTOperationNotImplementedError(Exception):
1083 |     def __init__(self, msg):
1084 |         super(MFTOperationNotImplementedError, self).__init__(msg)
1085 |         self._msg = msg
1086 | 
1087 |     def __str__(self):
1088 |         return "MFTOperationNotImplemented(%s)" % (self._msg)
1089 | 
1090 | 
1091 | class InvalidRecordException(Exception):
1092 |     def __init__(self, msg):
1093 |         super(InvalidRecordException, self).__init__(msg)
1094 |         self._msg = msg
1095 | 
1096 |     def __str__(self):
1097 |         return "InvalidRecordException(%s)" % (self._msg)
1098 | 


--------------------------------------------------------------------------------
/TScopy/tscopy.py:
--------------------------------------------------------------------------------
   1 | """
   2 | This project is based off the work from the following projects:
   3 | * https://github.com/williballenthin/python-ntfs 
   4 | * https://github.com/jschicht/RawCopy
   5 | """
   6 | # TODO: Will have issues with non ascii characters in files names
   7 | # TODO: Currently only processes '\\.\' where RawCopy supported other formats
   8 | import sys
   9 | import os
  10 | import re
  11 | import pickle
  12 | import traceback
  13 | 
  14 | from math import ceil
  15 | from BinaryParser import hex_dump, Block
  16 | from MFT import INDXException, MFTRecord, ATTR_TYPE, Attribute_List
  17 | from MFT import INDEX_ROOT
  18 | 
  19 | if os.name == "nt":
  20 |     try:
  21 |         import win32file, win32api, win32con
  22 |     except:
  23 |         print "Must have pywin32 installed -- pip install pywin32"
  24 |         sys.exit(1)
  25 | 
  26 | ####################################################################################
  27 | # BootSector structure
  28 | #   https://flatcap.org/linux-ntfs/ntfs/files/boot.html
  29 | ####################################################################################
  30 | class BootSector(Block):
  31 |     def __init__(self, buf, offset, logger):
  32 |         super(BootSector, self).__init__(buf, offset)
  33 |         self.declare_field("qword", "system_id", 0x3)
  34 |         self.declare_field("word", "bytes_per_sector", 0x0b)
  35 |         self.declare_field("byte", "sectors_per_cluster", 0xd)
  36 |         self.declare_field("word", "reserved_sectors", 0xe)
  37 |         self.declare_field("byte", "media_desc", 0x15)
  38 |         self.declare_field("word", "sectors_per_track", 0x18)
  39 |         self.declare_field("word", "heads", 0x1a)
  40 |         self.declare_field("dword", "hidden_sectors", 0x1c)
  41 |         self.declare_field("qword", "total_sectors", 0x28)
  42 |         self.declare_field("qword", "start_c_mft", 0x30)
  43 |         self.declare_field("qword", "start_c_mftmir", 0x38)
  44 |         self.declare_field("byte", "file_rec_indicator", 0x40)
  45 |         self.declare_field("byte", "idx_buf_size_indicator", 0x44)
  46 |         self.declare_field("qword", "serial_number", 0x48)
  47 |         self.bytes_per_cluster = self.bytes_per_sector() * self.sectors_per_cluster()
  48 |         #COPIED FROM  RAWCOPY:: A really lame fix for a rare bug seen in certain Windows 7 x64 vm's
  49 |         if self.file_rec_indicator() > 127:
  50 |             testval = 256 - self.file_rec_indicator()
  51 |             self.mft_record_size = 2
  52 |             for i in range(testval-1):
  53 |                 self.mft_record_size *= 2
  54 |         else:
  55 |             self.mft_record_size = self.bytes_per_cluster * self.file_rec_indicator()
  56 |             
  57 |         self.sectors_per_mft_record = self.mft_record_size / self.bytes_per_sector()
  58 |         self.cluster_per_file_record_segment = int(ceil(float(self.mft_record_size) / self.bytes_per_cluster))
  59 |         
  60 | 
  61 | ####################################################################################
  62 | #  NTFS INDX Record structure
  63 | #     https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html 
  64 | ####################################################################################
  65 | class INDX( Block ):
  66 |     def __init__(self, buf, offset ):
  67 |         super(INDX, self).__init__(buf, offset)
  68 |         self.declare_field("dword", "magic", 0x0)
  69 |         self.declare_field("word", "update_seq_offset", 0x4)
  70 |         self.declare_field("word", "update_seq_sz", 0x6)
  71 |         self.declare_field("qword", "logfile_seq_num", 0x8)
  72 |         self.declare_field("qword", "VCN_INDX", 0x10)
  73 |         self.declare_field("dword", "index_entries_offset", 0x18)
  74 |         self.declare_field("dword", "index_entries_sz", 0x1c)
  75 |         self.declare_field("dword", "alloc_sz", 0x20)
  76 |         self.declare_field("byte", "leaf_node", 0x24)
  77 |         self.declare_field("word", "update_seq", 0x28)
  78 |         s = self.update_seq_sz()
  79 | 
  80 |     def update_seq_arr( self, idx_buf ):
  81 |         # TODO: Clean this up into a for loop
  82 |         seq_arr = idx_buf[self.update_seq_offset()+2:self.update_seq_offset()+2+self.update_seq_sz()*2]
  83 |         ret  = idx_buf[0x0000:0x01fe] + seq_arr[0x00:0x2]
  84 |         ret += idx_buf[0x0200:0x03fe] + seq_arr[0x02:0x4]
  85 |         ret += idx_buf[0x0400:0x05fe] + seq_arr[0x04:0x6]
  86 |         ret += idx_buf[0x0600:0x07fe] + seq_arr[0x06:0x8]
  87 |         ret += idx_buf[0x0800:0x09fe] + seq_arr[0x08:0xa]
  88 |         ret += idx_buf[0x0a00:0x0bfe] + seq_arr[0x0a:0xc]
  89 |         ret += idx_buf[0x0c00:0x0dfe] + seq_arr[0x0c:0xe]
  90 |         ret += idx_buf[0x0e00:0x0ffe] + seq_arr[0x0e:0x10]
  91 |         ret += idx_buf[0x1000:      ] 
  92 |         return ret
  93 | 
  94 | ####################################################################################
  95 | #  NTFS INDX Entry Structure
  96 | #     https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html
  97 | ####################################################################################
  98 | class INDX_ENTRY( Block ):
  99 |     def __init__(self, buf, offset):
 100 |         super(INDX_ENTRY, self).__init__(buf, offset)
 101 |         self.declare_field("qword", "mft_recordnum", 0)
 102 |         self.declare_field("word", "entry_sz", 0x08 )
 103 |         if self.entry_sz() == 0x18 and self.mft_recordnum() == 0:
 104 |             raise INDXException("End of INDX File found")
 105 |         if self.entry_sz() == 0x10 and self.mft_recordnum() == 0:
 106 |             raise INDXException("End of INDX File found")
 107 |         if self.entry_sz() == 0x00 and self.mft_recordnum() == 0:
 108 |             raise INDXException("NULLS INDX File found")
 109 |         self.declare_field("word", "filename_offset", 0x0a )
 110 |         self.declare_field("word", "index_flags", 0x0c )
 111 |         self.declare_field("qword", "mft_parent_recordnum", 0x10 )
 112 |         self.declare_field("qword", "alloc_sz", 0x38 )
 113 |         self.declare_field("qword", "file_sz", 0x40 )
 114 |         self.declare_field("qword", "file_flags", 0x48 )
 115 |         self.declare_field("byte", "filename_sz", 0x50 )
 116 |         self.declare_field("binary", "filename", 0x52, self.filename_sz()*2 )
 117 | 
 118 | 
 119 | ####################################################################################
 120 | #  The main class of TScopy.
 121 | #     * Is a singleton instance
 122 | #     * Example usage
 123 | #       config = {'outputbasedir':dst, 'pickledir':dir,'logger':log,'debug':False,'ignore_table':False}
 124 | #       tscopy = TScopy()
 125 | #       tscopy.setConfiguration( config )
 126 | #       tscopy.copy( src, dst )
 127 | #
 128 | #     * Config key descriptions
 129 | #       - outputbasedir : The FULL PATH of directory where the files will be copied too.
 130 | #       - pickledir : The FULL PATH of directory where the pickle file will be created or used.
 131 | #       - logger : A preconfigured instance of the python Logger class. 
 132 | #       - debug : Not used
 133 | #       - ignore_table: 
 134 | #           * True  = Rebuilds the MFT table from the root node and does not save the table at the end of the run
 135 | #           * False = Uses a previous mft.pickle file if found. Saves the file after every copy.
 136 | ####################################################################################
 137 | class TScopy( object ):
 138 |     _instance = None
 139 |     def __new__( cls ):
 140 |         if cls._instance == None:
 141 |             cls._instance = super(TScopy, cls).__new__(cls)
 142 |             cls.__isConfigured = False
 143 |             cls.__pickle_filename = "mft.pickle"
 144 |             cls.config = { 'files': None,
 145 |                             'pickledir': None,
 146 |                             'logger': None,
 147 |                             'debug': True,
 148 |                             'ignore_table':False,
 149 |                           }
 150 |             cls.__useWin32 = False
 151 |         return cls._instance
 152 | 
 153 |     ####################################################################################
 154 |     #  isConfigured:  Verifies that the object has  been configured at least once
 155 |     ####################################################################################
 156 |     def isConfigured( self ):
 157 |         return self.__isConfigured
 158 | 
 159 |     ####################################################################################
 160 |     # setConfiguration:  Parses the config dictionary to set the values for debug, logger,
 161 |     #                    lookuptable and the picke directory
 162 |     ####################################################################################
 163 |     def setConfiguration( self, config ):
 164 |         if self.__isConfigured == True:
 165 |             return
 166 |         self.__MFT_lookup_table = None
 167 |         self.__isConfigured = True
 168 |         self.setDebug( config['debug'] )
 169 |         self.setLogger( config['logger'] )
 170 |         self.setLookupTable( config['ignore_table'] )
 171 |         self.setPickleDir( config['pickledir'] )
 172 | 
 173 | 
 174 |     ####################################################################################
 175 |     # SetLogger:  Sets the class object logger variable
 176 |     #       Needs to be preconfigured
 177 |     ####################################################################################
 178 |     def setLogger( self, logger ):
 179 |         if logger == None:
 180 |             raise Exception( "TSCOPY", "Invalid Logger")
 181 |         self.config['logger'] = logger
 182 | 
 183 |     ####################################################################################
 184 |     # setDebug: Sets the class object debugger variable
 185 |     ####################################################################################
 186 |     def setDebug( self, debug ):
 187 |         self.config['debug'] = debug
 188 | 
 189 |     ####################################################################################
 190 |     # setLookuptable: Sets the class object ignore_table.
 191 |     ####################################################################################
 192 |     def setLookupTable( self, tf ):
 193 |         self.config['ignore_table'] = tf
 194 | 
 195 |     ####################################################################################
 196 |     #  setPickleDir: Sets the output directory to save the mft.pickle file too
 197 |     ####################################################################################
 198 |     def setPickleDir( self, directory ):
 199 |         if not directory == None and not os.path.isdir( directory ):
 200 |             self.config['logger'].error("Error pickle destination (%s) not found" % directory)
 201 |             raise Exception( "TSCOPY", "Error pickle destination (%s) not found" % directory)
 202 |         self.__pickle_fullpath = '%s%s%s' % ( directory, os.sep, self.__pickle_filename )
 203 |         self.__MFT_lookup_table = self.__getLookupTableFromDisk( "c" )
 204 |         
 205 |     ####################################################################################
 206 |     #  __getLookupTableFromDisk: Checks the mft.pickle file. 
 207 |     #       If it exists then it loads into memory.
 208 |     #       If it does not exists then it creates a new basic structure
 209 |     ####################################################################################
 210 |     def __getLookupTableFromDisk( self, drive_letter ):
 211 |         if not os.path.isfile( self.__pickle_fullpath):
 212 |             return {drive_letter:{5:{'seq_num': 5, 'name':'','children':{}}}}
 213 |         try:
 214 |             self.config['logger'].debug("Using Pickle file: %s " % self.__pickle_fullpath)
 215 |             with open( self.__pickle_fullpath, 'rb') as fd:
 216 |                 return pickle.loads( fd.read() )
 217 |         except:
 218 |             raise Exception( "TSCOPY", "FAILED to parse pickle file %s" % self.__pickle_fullpath )
 219 |         
 220 |     ####################################################################################
 221 |     #  __saveLookuptable: Write the lookup table from memory to disk. 
 222 |     #       Overwrites previous copy if it exists.
 223 |     ####################################################################################
 224 |     def __saveLookuptable( self, lookup_table ):
 225 |         with open(self.__pickle_fullpath, 'wb') as fd:
 226 |             fd.write( pickle.dumps( lookup_table ))
 227 | 
 228 |     ####################################################################################
 229 |     # __getMFT: Gets the root record of the MFT 
 230 |     ####################################################################################
 231 |     def __getMFT( self, index=0 ):
 232 |         fd = self.config['fd']
 233 |         bss = self.config['bss']
 234 |         mft_offset = bss.bytes_per_sector() * bss.sectors_per_cluster() * bss.start_c_mft()
 235 |         if self.__useWin32 == False:
 236 |             mft_offset = 0x400
 237 | #        win32file.SetFilePointer( fd, mft_offset+(index*bss.mft_record_size ), win32file.FILE_BEGIN)
 238 | #        buf = win32file.ReadFile( fd, bss.mft_record_size )[1]
 239 |         buf, buf_sz = self.__read( fd, mft_offset+(index*bss.mft_record_size ), bss.mft_record_size )
 240 |         record = MFTRecord(buf, 0, None)
 241 |         ret = {}
 242 | 
 243 |         attribute = record.data_attribute()
 244 |         cnt = 0
 245 |         for offset, length in attribute.runlist().runs():
 246 |             if length > 16 and (length%16) > 0:
 247 |                 if offset == 0:
 248 |                      # may be sparse section at end of Compression Signature
 249 |                      ret[cnt] = (offset, length%16)
 250 |                      length -= length%16
 251 |                      cnt += 1
 252 |                 else:
 253 |                      #may be compressed data section at start of Compression Signature
 254 |                      ret[cnt] = (offset, length-length%16)
 255 |                      offset += length-length%16
 256 |                      length = length%16
 257 |                      cnt += 1
 258 |             #just normal or sparse data
 259 |             ret[cnt] = (offset, length)
 260 |             cnt += 1
 261 |         
 262 |         return ret
 263 | 
 264 |     ####################################################################################
 265 |     #  __GenRefArray: Iterates through the seq_num 5 datadruns 
 266 |     ####################################################################################
 267 |     def __GenRefArray( self ):
 268 |         MFTClustersToKeep = 0
 269 |         ref = -1
 270 |         dataruns = self.config['mft_dataruns']
 271 |         bytes_per_cluster = self.config['bss'].bytes_per_cluster 
 272 |         ClustersPerFileRecordSegment = self.config['bss'].cluster_per_file_record_segment 
 273 |         split_mft_rec = {} 
 274 |         cnt = 0
 275 |         for x in dataruns:
 276 |             r = dataruns[x]
 277 |             doKeepCluster = MFTClustersToKeep
 278 |             MFTClustersToKeep = (r[1]+ClustersPerFileRecordSegment - MFTClustersToKeep) % ClustersPerFileRecordSegment
 279 |             if not MFTClustersToKeep == 0:
 280 |                 MFTClustersToKeep = ClustersPerFileRecordSegment - MFTClustersToKeep
 281 |             pos = r[0] * bytes_per_cluster 
 282 |             subtr = self.config['bss'].mft_record_size 
 283 |             if  MFTClustersToKeep or doKeepCluster:
 284 |                 subtr = 0
 285 |             end_of_run = r[1] * bytes_per_cluster - subtr
 286 |             for i in range(0, end_of_run, self.config['bss'].mft_record_size):
 287 |                 if MFTClustersToKeep:
 288 |                     if i >= end_of_run - ((ClustersPerFileRecordSegment - MFTClustersToKeep) * bytes_per_cluster):
 289 |                         bytesToGet = (ClustersPerFileRecordSegment - MFTClustersToKeep) * bytes_per_cluster
 290 |                         split_mft_rec[cnt] = '%d?%d,%d' % (ref+1, pos+i, bytesToGet )
 291 |                 ref += 1
 292 |                 if i == 0 and doKeepCluster:
 293 |                     bytesToGet = doKeepCluster * bytes_per_cluster
 294 |                     if bytesToGet > self.config['bss'].mft_record_size:
 295 |                         bytesToGet = self.config['bss'].mft_record_size 
 296 |                     split_mft_rec[cnt] += '|%d&%d' % ( pos+i, bytesToGet )
 297 |                 cnt += 1
 298 |         self.config['split_mft_rec'] = split_mft_rec
 299 | 
 300 |     ####################################################################################
 301 |     #  __process_image: TODO 
 302 |     ####################################################################################
 303 |     def __process_image( self, targetDrive ):
 304 |         pass
 305 | 
 306 |     ####################################################################################
 307 |     # __search_mft: Iterates through the target files path, populating the table and seq_path
 308 |     #           with each branch of the path as it parses the MFT records. The search ends when 
 309 |     #           it fails to find the next item in the target path or the target is identified.
 310 |     #       table: The pointer to the current location into the mft metadata table stored in memory
 311 |     #       tmp_path: The target directory path as a list
 312 |     #       seq_path: A list of the found target dirctory path with mft sequesnce numbers   
 313 |     ####################################################################################
 314 |     def __search_mft( self, table, tmp_path, seq_path ):
 315 |         for name in tmp_path:
 316 |             index = table['seq_num']
 317 | #            self.config['logger'].debug('Looking for (%s) MFT_INDEX(%016X)' % (name, index))
 318 |             ret = self.__getChildIndex( index )
 319 | #            self.config['logger'].debug("childindex = %r" % len(ret) )
 320 |             tmp_index = index
 321 |             for seq_num in ret:
 322 |                 c_index = seq_num & 0xffffffff
 323 |                 c_name = ret[seq_num].lower()
 324 |                 table['children'][c_name] = { 'name':c_name, 'seq_num':c_index, 'children':{}}
 325 |                 if c_name == name.lower():
 326 |                     index = c_index
 327 |                     seq_path.append( (index, c_name ) )
 328 |                     table = table['children'][c_name]
 329 |                     break
 330 |             if tmp_index == index:
 331 | #                self.config['logger'].info("%s NOT FOUND" % name)
 332 |                 return None, None, None
 333 |         return table, tmp_path, seq_path
 334 |     ####################################################################################
 335 |     #  __find_last_known_path: Iterates through the target files path and matches with the 
 336 |     #           currently known indexes in the table. Returns as soon as the next path item 
 337 |     #           is not found or the end target has been located.
 338 |     #       table: The pointer to the current location into the mft metadata table stored in memory
 339 |     #       tmp_path: The target directory path as a list
 340 |     #       seq_path: A list of the found target dirctory path with mft sequesnce numbers   
 341 |     ####################################################################################
 342 | 
 343 |     def __find_last_known_path( self, table, tmp_path, seq_path  ):
 344 |         l_path = tmp_path[:]
 345 |         for name in l_path:
 346 |             name = name.lower()
 347 |             if not name in table['children']:
 348 |                 break
 349 |             table = table['children'][name]
 350 |             tmp_path = tmp_path[1:]
 351 |             seq_path.append( ( table['seq_num'], name ))
 352 |         return table, tmp_path, seq_path
 353 | 
 354 |     ####################################################################################
 355 |     #  __copydir: Copies the entire directory. If bRecursive this function calls itself with 
 356 |     #           any child drictories
 357 |     #       fname: fullpath of the dirctory to copy
 358 |     #       index: Sequence number of the MFT record of the parent:
 359 |     #       table: Pointer to the current index in the MFT metadata table
 360 |     #       bRecursive:  
 361 |     #           True: When the parents child is a directory __copydir is called recursivly
 362 |     #           False: Does not copy child directories
 363 |     ####################################################################################
 364 |     def __copydir( self, fname, index, table, bRecursive=False):
 365 |         self.config['logger'].debug('fname(%r) index(%r)' % (fname, index) )
 366 |         table = self.__copydirfiles( fname, index, table )
 367 | 
 368 |         if bRecursive == True:
 369 |             for dirs in table['children']:
 370 |                 l_table = table['children'][dirs]
 371 |                 c_index = l_table['seq_num']
 372 |                 buf, buf_sz = self.__calcOffset( c_index )
 373 |                 if buf == None or buf_sz == 0:
 374 |                     raise Exception("Failed to process mft_offset")
 375 |                 record = MFTRecord(buf, 0, None)
 376 |                 if record.is_directory():
 377 |                     self.config['logger'].debug( "Next Directory %r  %r %r" % (c_index, dirs, fname))
 378 |                     self.config['current_file'] = fname[2:]
 379 |                     self.__copydir( os.path.join(fname,dirs), c_index, l_table, bRecursive=True )
 380 |         
 381 |     ####################################################################################
 382 |     # __copydirfiles: Wraps __getFile and copies all the files under the current directory
 383 |     #       fname: fullpath of the dirctory to copy
 384 |     #       index: Sequence number of the MFT record of the parent:
 385 |     #       table: Pointer to the current index in the MFT metadata table
 386 |     ####################################################################################
 387 |     def __copydirfiles( self, fname, index, table ):
 388 |         self.config['logger'].debug( "copydirfiles \n\tfname:\t%r\n\tindex:\t%r\n\ttable %r" % (fname,index,table))
 389 |         if table['children'] == {}:
 390 |             ret = self.__getChildIndex( index )
 391 |             self.config['logger'].debug( "\tchildren: %r" % len(ret))
 392 |             for seq_num in ret: 
 393 |                 c_index = seq_num & 0xffffffff
 394 |                 c_name = ret[seq_num].lower()
 395 |                 table['children'][c_name] = { 'name':c_name, 'seq_num':c_index, 'children':{}}
 396 | 
 397 |                 if ret[seq_num].strip() == '' or seq_num == 0:
 398 |                     continue
 399 | 
 400 |         tmp_filename = self.config['current_file']
 401 |         for name in table['children']:
 402 |             seq_num = table['children'][name]['seq_num']
 403 |             self.config['logger'].debug("\tCopying %s to %s" % (fname+os.sep+name, self.config['outputbasedir']+tmp_filename+os.sep+name))
 404 | 
 405 |             self.config['current_file'] = fname[2:]+os.sep+name # strip the drive letter off the front
 406 |             if '*' in fname[2:]+os.sep+name:
 407 |                 self.config['current_file'] = tmp_filename+os.sep+name # strip the drive letter off the front
 408 |                 
 409 |             self.__getFile( [seq_num&0xffffffff, name] )
 410 |         return table
 411 | 
 412 |     ####################################################################################
 413 |     #  __copyfile: Internal copy function. Used to setup and parse target filename, locate
 414 |     #           previously identified paths in the mft metadata list. and then copy the file/
 415 |     #           files/ or direcotories
 416 |     #       filename: Full path to the target file/directory or wildcarded to copy
 417 |     #       mft_filename: TODO remove
 418 |     #       bRecursive: 
 419 |     #           True:  Copy all children from this directory on
 420 |     #           False: Do not copy children
 421 |     ####################################################################################
 422 |     def __copyfile( self, filename, mft_filename=None, bRecursive=False ):
 423 |         if self.__useWin32 == True:
 424 |             self.config['logger'].debug( 'filename %r' % filename)
 425 |             if not filename[:4].lower() == '\\\\.\\':
 426 |                 targetDrive = '\\\\.\\'+filename[:2]
 427 |             else:
 428 |                 targetDrive = filename[:6]
 429 |             
 430 |             driveLetter = targetDrive[-2]
 431 |             self.config['logger'].debug( 'Target Drive %s' % targetDrive)
 432 |             self.config['logger'].debug( 'DriveLetter %s' % driveLetter)
 433 | 
 434 |             self.__process_image( targetDrive ) # TODO process this to determin correct offsets
 435 | 
 436 |             if self.config['ignore_table'] == True:
 437 |                 self.__MFT_lookup_table = {driveLetter:{5:{'seq_num':5,'name':'','children':{}}}}
 438 |             elif not driveLetter in self.__MFT_lookup_table.keys():
 439 |                 self.__MFT_lookup_table = self.__MFT_lookup_table[driveLetter] = {5:{'seq_num':5,'name':'','children':{}}}
 440 | #            self.config['logger'].debug( 'Target Drive %s' % driveLetter)
 441 |         else:
 442 |             self.__MFT_lookup_table = {"c":{5:{'seq_num':5,'name':'','children':{}}}}
 443 |             targetDrive = mft_filename
 444 |             driveLetter = "c"
 445 |             self.config['logger'].debug( 'Processing the %s MFT file' % targetDrive )
 446 | 
 447 |         self.config['driveLetter'] = driveLetter
 448 |         fd = self.__open( targetDrive )
 449 |         self.config['fd'] = fd
 450 |         buf, buf_sz = self.__read( fd, 0, 0x200 ) #        buf = win32file.ReadFile( fd, 0x200)[1]
 451 |         self.config['bss'] = BootSector( buf, 0, self.config['logger'] ) 
 452 |         self.config['mft_dataruns'] = self.__getMFT( 0)
 453 |         self.__GenRefArray()
 454 | 
 455 |         fname = filename 
 456 |         index = 5
 457 |         
 458 |         try:
 459 |             # Find the last known directory in the MFT_lookup_table
 460 |             seq_path = [(index,None)]
 461 |             tmp_path = fname[3:].split(os.sep)
 462 |             table = self.__MFT_lookup_table[driveLetter][5]
 463 | 
 464 |             expandedWildCards = self.__process_wildcards( filename, table )
 465 |             if expandedWildCards == False:
 466 |                 cp_files = [ tmp_path ]
 467 |             else:
 468 |                 cp_files = expandedWildCards
 469 | 
 470 |             
 471 |             for cp_file in cp_files:
 472 |                 self.config['current_file'] = os.sep.join(cp_file) # strip the drive letter off the front
 473 |                 l_fname = fname[:3] + self.config['current_file']
 474 |                 self.config['logger'].info("Copying %s to %s" % (l_fname, self.config['outputbasedir']+self.config['current_file']))
 475 |                 table, tmp_path, seq_path = self.__get_file_mft_seqid( cp_file )
 476 |                 
 477 |                 # Index was not located exit (error message already logged)
 478 |                 if table == None:
 479 |                     self.config['logger'].error("File Not Found"  )
 480 |                     return
 481 | 
 482 |                 # Check the mft structure if this is a directory
 483 |                 index = seq_path[-1][0]
 484 |                 buf, buf_sz = self.__calcOffset( index )
 485 |                 if buf == None or buf_sz == 0:
 486 |                     raise Exception("Failed to process mft_offset")
 487 |                 record = MFTRecord(buf, 0, None)
 488 |                 if record.is_directory():
 489 |                     self.__copydir( l_fname, index, table, bRecursive=bRecursive )
 490 |                 else:
 491 |                     self.__getFile( seq_path[-1] )
 492 |         except:
 493 |             self.config['logger'].error(traceback.format_exc())
 494 |         finally:
 495 |             if self.config['ignore_table'] == False:
 496 |                 self.__saveLookuptable( self.__MFT_lookup_table)                
 497 | 
 498 |     ####################################################################################
 499 |     # __isSplitMFT: Determines if the MFT record is split
 500 |     ####################################################################################
 501 |     def __isSplitMFT( self, array, target_seq_num ):
 502 |         for ind in array:
 503 |             i = array[ind]
 504 |             if not '?' in i:
 505 |                 continue
 506 |             ind = i.index('?')
 507 |             testRef = i[0:ind]   
 508 |             if int(testRef) == target_seq_num:
 509 |                 return ind 
 510 |         return None
 511 | 
 512 |     ####################################################################################
 513 |     #  __GetChildIndex: Parses the MFT records to find all children of the current sequence ID
 514 |     #       index: Sequence ID or seq_num of the current MFT record to extract and parse
 515 |     ####################################################################################
 516 |     def __getChildIndex( self, index  ):
 517 |         fd = self.config['fd']
 518 |         bss = self.config['bss']
 519 |         bpc = bss.bytes_per_cluster
 520 | 
 521 |         buf, buf_sz = self.__calcOffset( index )
 522 |         if buf == None or buf_sz == 0:
 523 |             raise Exception("Failed to process mft_offset")
 524 |         record = MFTRecord(buf, 0, None)
 525 |         if not record.is_directory():
 526 |             return []
 527 |         ret  = {}
 528 |         for attribute in record.attributes():
 529 |             if attribute.type() == ATTR_TYPE.INDEX_ROOT:
 530 |                 for entry in INDEX_ROOT(attribute.value(), 0).index().entries():
 531 |                     refNum = entry.header().mft_reference() & 0xfffffffff
 532 |                     if refNum in ret:
 533 |                         if "~" in ret[refNum]:
 534 |                             ret[refNum] = entry.filename_information().filename()  
 535 |                     else:
 536 |                         ret[refNum] = entry.filename_information().filename()  
 537 |             elif attribute.type() == ATTR_TYPE.ATTRIBUTE_LIST:
 538 |                 self.config['logger'].debug("ATTRIBUTE_LIST HAS BEEN FOUND 0x(%08x)!!!!" % index )
 539 |                 attr_list = Attribute_List(attribute.value(), 0, attribute.value_length(), self.config['logger'] )
 540 |                 self.config['logger'].debug(hex_dump(attribute.value()[:attribute.value_length()]))
 541 |                 a_list = []
 542 |                 for entry in attr_list.get():
 543 |                     if (entry.type() == ATTR_TYPE.INDEX_ROOT or entry.type() == ATTR_TYPE.INDEX_ALLOCATION ) and not (entry.baseFileReference()&0xffffffff) == index:
 544 |                         if not entry.baseFileReference() in a_list:
 545 |                             a_list.append( entry.baseFileReference() & 0xffffffff   )
 546 |                 for next_index in a_list:
 547 |                     # WARNING!!! Recursive
 548 |                     if index == next_index:
 549 |                         self.config['logger'].debug(hex_dump(attribute.value()[:attribute.value_length()]))
 550 | #                        raise Exception("Attribute_list failed to parse.")
 551 |                         continue
 552 |                     rec_children = self. __getChildIndex( next_index )
 553 |                     self.config['logger'].debug("ATTRIBUTE_LIST index(%d) children (%r) " % (next_index, rec_children) )
 554 |                     ret.update( rec_children )
 555 |             elif attribute.type() == ATTR_TYPE.INDEX_ALLOCATION:
 556 |                 for cluster_offset, length  in attribute.runlist().runs():
 557 |                     offset=cluster_offset*bpc
 558 |                     buf, buf_sz = self.__read( fd, offset, length*bpc)
 559 |                     for cnt in range(length):
 560 |                         idx_buf = buf[cnt*bpc:(cnt+2)*bpc]
 561 |                         ind = INDX( idx_buf, 0 )
 562 |                         idx_buf = ind.update_seq_arr( idx_buf )
 563 |                         entry_offset = ind.index_entries_offset()+0x18 
 564 |                         i = 0 
 565 |                         last_i = i
 566 |                         while i < ind.index_entries_sz() :
 567 |                             try:
 568 |                                 entry  = INDX_ENTRY( idx_buf, entry_offset )
 569 |                                 refNum = entry.mft_recordnum() & 0xfffffffff
 570 |                                 if refNum in ret:
 571 |                                     if "~" in ret[refNum]:
 572 |                                         ret[refNum] = entry.filename().replace('\x00','')
 573 |                                 else:
 574 |                                     ret[refNum] = entry.filename().replace('\x00','')
 575 |                             except   INDXException:
 576 |                                 break
 577 |                             except:
 578 |                                 self.config['logger'].error(traceback.format_exc())
 579 |                                 self.config['logger'].debug( 'len(idx_buf (%03x) entry_offset(%03x)' % ( len(idx_buf), entry_offset))
 580 |                                 pass
 581 |                             entry_offset += entry.entry_sz()
 582 | 
 583 |                             i += entry.entry_sz()
 584 |                             if entry.entry_sz() == 0:
 585 |                                 break
 586 |         return ret
 587 | 
 588 |     ####################################################################################
 589 |     # __calcOffset: Calculates the offset into the drive to locat the specific data 
 590 |     #       for the taget sequence Number
 591 |     #   target_seq_num: Sequence ID to copy form the disk
 592 |     ####################################################################################
 593 |     def __calcOffset( self, target_seq_num ):
 594 |         fd = self.config['fd']
 595 |         bss = self.config['bss']
 596 |         mft_vcn = self.config['mft_dataruns']
 597 |         image_offset = 0 # TODO: Change this when finished processing the image
 598 |         array = self.config['split_mft_rec']
 599 | 
 600 |         # Handle in the case that the object is split accross two dataruns
 601 |         split = self.__isSplitMFT( array, target_seq_num )
 602 |         if not split == None:
 603 | #            self.config['logger'].debug( 'calcOffset: a split record was detected' )
 604 |             item = array[split]
 605 |             ind = item.index('?')
 606 |             testRef = item[0:ind]   
 607 |             if not int(testRef) == target_seq_num:
 608 | #                self.config['logger'].debug("Error: The ref in the array did not match target ref.")
 609 |                 return None
 610 |             
 611 |             srecord3 = item[ind+1:]
 612 |             srecordArr = srecord3.split('|')
 613 |             if not len( srecordArr ) == 3:
 614 | #                self.config['logger'].debug("Error: Array contained more elements than expected: %d" % len( srecordArr ))
 615 |                 return None
 616 | 
 617 |             record = ""
 618 |             record_sz = 0
 619 |             for i in srecordArr:
 620 |                 if not ',' in i: 
 621 | #                    self.config['logger'].debug('Split:: Could not find ","')
 622 |                     continue
 623 |                 ind = i.index(',')
 624 |                 srOffset = i[:ind]
 625 |                 srSize   = i[ind+1:]
 626 | #                win32file.SetFilePointer( fd, srOffset + image_offset, win32file.FILE_BEGIN)
 627 | #                record += win32file.ReadFile( fd, srSize)[1]
 628 |                 buf, buf_sz = self.__read( fd, srOffset + image_offset, srSize )
 629 |                 record  += buf
 630 |                 record_sz += buf_sz
 631 |             return record, record_sz
 632 |         else:
 633 |             counter = 0
 634 |             offset = 0
 635 |             recordsdivisor = bss.mft_record_size/512
 636 |             for indx in mft_vcn: 
 637 |                 current_cluster = mft_vcn[indx][1]
 638 |                 offset = mft_vcn[indx][0]
 639 |                 records_in_currentrun = (current_cluster * bss.sectors_per_cluster() ) / recordsdivisor 
 640 |                 counter += records_in_currentrun 
 641 |                 if counter > target_seq_num:
 642 |                     break
 643 |             tryat = counter - records_in_currentrun
 644 |             records_per_cluster = bss.sectors_per_cluster() / recordsdivisor
 645 |             final = 0
 646 |             counter2 = 0
 647 |             record_jmp = 0
 648 |             while final < target_seq_num:
 649 |                 record_jmp += records_per_cluster
 650 |                 counter2 += 1
 651 |                 final = tryat + record_jmp
 652 |             records_to_much = final - target_seq_num
 653 | 
 654 |             mft_offset = image_offset + offset * bss.bytes_per_cluster + ( counter2 * bss.bytes_per_cluster ) - ( records_to_much * bss.mft_record_size )
 655 | #            win32file.SetFilePointer( fd, mft_offset, win32file.FILE_BEGIN)
 656 | #            return win32file.ReadFile( fd, bss.mft_record_size )[1]
 657 |             if self.__useWin32 == False:
 658 |                 mft_offset = 0x400 + 0x400*target_seq_num
 659 | #            self.config['logger'].debug('Split:: mft_offset(%r) record_size(%r)' % ( mft_offset, bss.mft_record_size))
 660 |             return self.__read( fd, mft_offset, bss.mft_record_size)
 661 |         return None
 662 | 
 663 |     ####################################################################################
 664 |     #  __parse_attribute_data: Processes the files data sections and combines them to 
 665 |     #            create the file.
 666 |     #       attribute: The data attribute from the MFT record 
 667 |     #       Returns the dat content 
 668 |     ####################################################################################
 669 |     def __parse_attribute_data( self, attribute, output_name ):
 670 |         ret = ''
 671 |         fd = self.config['fd']
 672 |         out_name = output_name
 673 |         bpc = self.config['bss'].bytes_per_cluster
 674 |         filename = attribute.name()
 675 | #        import pdb; pdf.set_trace()
 676 |         try:
 677 |             self.config['logger'].debug("Attribute File Name %s" % attribute.name())
 678 |             if attribute.name_length() > 0:
 679 |                 out_name += "_ADS_%s" % attribute.name()
 680 |             fd_out = open(out_name, "wb")
 681 |             self.config['logger'].debug("non_resident %r" % attribute.non_resident() )
 682 |             if attribute.non_resident() == 0:
 683 |                 fd_out.write( attribute.value() )
 684 |             else:
 685 |                 cnt = 0
 686 |                 padd = False
 687 |                 for cluster_offset, length in attribute.runlist().runs():
 688 |                     read_sz = length * bpc 
 689 | 
 690 |                     if cluster_offset == 0: ## Sparsed file segment detected
 691 |                         self.config['logger'].debug("parse_attribute_data:: Sparsed file segment detected  length( %08x ) lengthx4096 (%08x)" % ( length, read_sz))
 692 |                         chunk_sz = 0x1000
 693 |                         chunk = "\x00"*chunk_sz
 694 |                         while cnt < read_sz:
 695 |                             if read_sz-cnt > chunk_sz:
 696 |                                 chunk_sz = read_sz-cnt
 697 |                             fd_out.write(chunk[:chunk_sz])
 698 |                             cnt += chunk_sz
 699 |                     else:
 700 |                         self.config['logger'].debug("GetFile:: cluster_offset( %08x ) length( %08x )  " % ( cluster_offset, length))
 701 |                         self.config['logger'].debug("readsize %08x cnt %08x init_sz %08x" % ( read_sz, cnt, attribute.initialized_size()))
 702 |                         if read_sz + cnt > attribute.initialized_size():
 703 |                             read_sz = attribute.initialized_size() - cnt
 704 |                             padd = True
 705 |                         if (read_sz % 0x1000) > 0:
 706 |                             read_sz += 0x1000 - (read_sz%0x1000)
 707 |                         offset=cluster_offset * bpc
 708 |     
 709 |                         self.config['logger'].debug("readsize %08x cnt %08x init_sz %08x" % ( read_sz, cnt, attribute.initialized_size()))
 710 |                         name = ''
 711 | 
 712 |                         # Detected ADS
 713 |                         buf, buf_sz = self.__read( fd, offset, read_sz, fd_out )
 714 |     
 715 |                         if attribute.data_size() < cnt + read_sz:
 716 |                             read_sz = attribute.data_size()-cnt
 717 |                         cnt += read_sz
 718 |                             
 719 |                         if padd == True:
 720 |                             padd_sz  = attribute.data_size() - attribute.initialized_size()
 721 |                             ret += '\x00' * padd_sz
 722 |                             cnt += padd_sz
 723 |                         if cnt > attribute.initialized_size():
 724 |     #                        self.config['logger'].debug("readsize %08x cnt %08x init_sz %08x" % ( read_sz, cnt, attribute.initialized_size()))
 725 |                             break
 726 |         except:
 727 |             self.config['logger'].error('Failed to get file %s\n%s' % (filename, traceback.format_exc() ))
 728 | 
 729 |     ####################################################################################
 730 |     # __parse_file_record: Given the sequence ID parse the contents of the file from the 
 731 |     #           MFT and return as a string.
 732 |     #       mft_file_seq_id: The sequence ID of the MFT record to return the data from
 733 |     ####################################################################################
 734 |     def __parse_file_record( self, mft_file_seq_id, output_name ):
 735 |         self.config['logger'].debug("parse_fle_record 0x%08x" % mft_file_seq_id)
 736 |         buf, buf_sz = self.__calcOffset( mft_file_seq_id )
 737 |         if buf == None:
 738 |             raise Exception("Failed to process mft_offset")
 739 | 
 740 |         record = MFTRecord(buf, 0, None)
 741 |         if record.is_directory():
 742 |             return None
 743 | 
 744 |         ret_val = {}
 745 |         for attribute in record.attributes():
 746 |             self.config['logger'].debug("Parsing Attribute 0x%2x" % attribute.type() )
 747 |             if attribute.type() == ATTR_TYPE.ATTRIBUTE_LIST:
 748 |                 file_contents = ''
 749 |                 self.config['logger'].debug("ATTRIBUTE_LIST HAS BEEN FOUND getting the File 0x(%08x)!!!!" % mft_file_seq_id)
 750 |                 attr_list = Attribute_List(attribute.value(), 0, attribute.value_length(), self.config['logger'] )
 751 |                 a_list = []
 752 |                 for entry in attr_list.get():
 753 |                     if entry.type() == ATTR_TYPE.DATA and not (entry.baseFileReference()&0xffffffff) == mft_file_seq_id:
 754 |                         if not entry.baseFileReference() in a_list:
 755 |                             a_list.append( entry.baseFileReference() & 0xffffffff   )
 756 |                 for next_index in a_list:
 757 |                     if mft_file_seq_id == next_index:
 758 |                         continue
 759 |                     # WARNING RECURSION
 760 |                     self.__parse_file_record( next_index, output_name )
 761 |             elif attribute.type() == ATTR_TYPE.DATA:
 762 |                 self.__parse_attribute_data( attribute, output_name )
 763 | 
 764 |     ####################################################################################
 765 |     # __getFile: The required file was identified this function locates all the parts of 
 766 |     #           the file and writes them in order to the destination location
 767 |     #       mft_file_object:
 768 |     ####################################################################################
 769 |     def __getFile( self, mft_file_object ):
 770 |         try:
 771 |             fullpath = self.config['outputbasedir'] + self.config['current_file']
 772 |             #        self.config['logger'].debug( "GetFile:: fullpath %s" % fullpath )
 773 |             #        self.config['logger'].debug( "GetFile:: attributes %s" % attribute.get_all_string())
 774 |             path = '\\'.join(fullpath.split('\\')[:-1])
 775 |             winapi_path = self.__winapi_path(path)
 776 |             if not os.path.isdir(winapi_path):
 777 |                 os.makedirs(winapi_path)
 778 |             self.config['logger'].debug("GetFile:: fullpath edit %s" % fullpath)
 779 |             self.__parse_file_record( mft_file_object[0], self.__winapi_path(fullpath) )
 780 |         except:
 781 |             self.config['logger'].error('Failed to get file %s\n%s' % (mft_file_object[1], traceback.format_exc() ))
 782 | 
 783 |     ####################################################################################
 784 |     # __winapi_path: Convert Filepath to Unicode to bypass win32 filepath length limit of 260
 785 |     ####################################################################################
 786 |     def __winapi_path( self, filename, encoding=None ):
 787 |         if (not isinstance(filename, unicode) and encoding is not None):
 788 |             filename = filename.decode(encoding)
 789 |         path = os.path.abspath(filename)
 790 |         if path.startswith(u"\\\\"):
 791 |             return u"\\\\?\\UNC\\" + path[2:]
 792 |         return u"\\\\?\\" + path
 793 | 
 794 | 
 795 |     ####################################################################################
 796 |     # __open: Wrapper around win32file createfile. 
 797 |     ####################################################################################
 798 |     def __open( self, filename ):
 799 |         fd = None
 800 |         try:
 801 |             if self.__useWin32 == False:
 802 |                 fd = open(filename, 'rb') 
 803 |             else:
 804 |                 fd = win32file.CreateFile( filename,
 805 |                                 win32file.GENERIC_READ,
 806 |                                 win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE,
 807 |                                 None, 
 808 |                                 win32con.OPEN_EXISTING, 
 809 |                                 win32file.FILE_ATTRIBUTE_NORMAL,
 810 |                                 None)
 811 |         except:
 812 |             self.config['logger'].error( traceback.format_exc())
 813 |         return fd
 814 | 
 815 |     ####################################################################################
 816 |     # __read: Wrapper around win32file set file pointer and read contents.
 817 |     #   fd => the handle to the file to be copied
 818 |     #   offset => number of bytes to skip of the file
 819 |     #   read_sz => Number of bytes to read from the file
 820 |     #   fd_output => Default None. If none then read into buffer otherwise
 821 |     #                The handle to the output file
 822 |     ####################################################################################
 823 |     def __read( self, fd, offset, read_sz, fd_output=None ):
 824 |         bytes_read = 0
 825 |         buf = ''
 826 |         try:
 827 |             if self.__useWin32 == False:
 828 |                 fd.seek( offset, 0)
 829 |                 if read_sz > 0x10000000:
 830 |                     read_step = 0x01500000
 831 |                     buf = ''
 832 |                     while bytes_read <= read_sz:
 833 |                         if not fd_output == None:
 834 |                             fd_output.write(fd.read( read_step ))
 835 |                             bytes_read += read_step
 836 |                         else:
 837 |                             buf += fd.read( read_step )
 838 |                             bytes_read += read_step
 839 |                 else:
 840 |                     if not fd_output == None:
 841 |                         fd_output.write(fd.read(read_sz))
 842 |                         bytes_read += read_sz
 843 |                     else:
 844 |                         buf += fd.read(read_sz)
 845 |                         bytes_read += read_sz
 846 |             else:
 847 |                 if read_sz > 0x10000000:
 848 |                     read_step = 0x01500000
 849 |                     buf = ''
 850 |                     while bytes_read <= read_sz:
 851 |                         win32file.SetFilePointer( fd, offset + bytes_read, win32file.FILE_BEGIN)
 852 |                         if not fd_output == None:
 853 |                             fd_output.write( win32file.ReadFile( fd, read_step)[1] )
 854 |                             bytes_read += read_step
 855 |                         else:
 856 |                             buf += win32file.ReadFile(fd, read_step)[1]
 857 |                             bytes_read += read_step
 858 |                 else:
 859 |                     win32file.SetFilePointer( fd, offset, win32file.FILE_BEGIN)
 860 |                     if not fd_output == None:
 861 |                         buff =  win32file.ReadFile( fd, read_sz)[1]
 862 |                         fd_output.write( buff )
 863 |                         bytes_read = read_sz
 864 |                     else:
 865 |                         buf += win32file.ReadFile( fd, read_sz)[1]
 866 |                         bytes_read = read_sz
 867 |         except:
 868 |             self.config['logger'].error( traceback.format_exc())
 869 |             self.config['logger'].debug("offset(%08x), readsize (%08x) fd (%08x)" % ( offset, read_sz, fd))
 870 |             self.config['logger'].debug("stack %s" % traceback.print_stack() )
 871 |         return (buf, bytes_read)
 872 | 
 873 |     ####################################################################################
 874 |     # __get_wildcard_children:  Get the children of the wildcarded directory location
 875 |     #       path: is a tuple containing the base path and the wildcard
 876 |     # TODO Move this someplace else in the file
 877 |     ####################################################################################
 878 |     def __get_wildcard_children( self, path ):
 879 |         copy_list = []
 880 |         table, x, seq_path = self.__get_file_mft_seqid( path[0] )
 881 |         if seq_path == None:
 882 |             return copy_list
 883 |         # Test if the last value seq_path[-1] is the directory we are looking for
 884 |         if path[1] == None:
 885 |             if seq_path[-1][1] == path[0][-1]:
 886 |                 copy_list.append( path[0] )
 887 | 
 888 |         # get children of found path and find all that match wildcard.
 889 |         ret = self.__getChildIndex( seq_path[-1][0] )
 890 |         for x in ret:
 891 |             if path[1] == None:
 892 |                     break
 893 |             l_name = ret[x].lower()
 894 |             l_reg = re.escape(path[1]).replace('\\*', '.*')
 895 |             if not l_reg[-1] == '*':
 896 |                 l_reg += '$'
 897 |             if re.match( l_reg, l_name ):
 898 |                 l_name =  path[0] + [ l_name ] 
 899 |                 copy_list.append( l_name )
 900 |         return copy_list
 901 | 
 902 |     ####################################################################################
 903 |     # __get_file_mft_seqid: Wrapper used to search for the file in the current memory mft 
 904 |     #           metadata list then process the rest of the path from parsing the MFT
 905 |     #       tmp_path: List of the source path
 906 |     ####################################################################################
 907 |     def __get_file_mft_seqid( self, tmp_path ):
 908 |         index = 5
 909 |         seq_path = [(index,None)]
 910 |         table = self.__MFT_lookup_table[self.config['driveLetter']][index]
 911 |         table, tmp_path, seq_path = self.__find_last_known_path( table, tmp_path, seq_path  )
 912 |         table, tmp_path, seq_path = self.__search_mft( table, tmp_path, seq_path )
 913 |         return table, tmp_path, seq_path
 914 | 
 915 |     ####################################################################################
 916 |     # __process_wildcards: Called when a wildcard was detected in the source filename.
 917 |     #           Parses the wildcards and breaks up into sections then the paths are expanded
 918 |     #           and each matching record is copied.
 919 |     #       filename: Filename containing the wildcards
 920 |     #       table: Pointer to the root of the mft Metadata table
 921 |     ####################################################################################
 922 |     def  __process_wildcards( self, filename, table ):
 923 |         filename = filename.lower()
 924 |         if not '*' in filename:
 925 |             return False
 926 |         if filename[1:3] == ":\\":
 927 |             filename = filename[3:]
 928 |         
 929 |         index = 5
 930 |         seq_path = [(index,None)]
 931 |         tmp_path = filename.split( os.sep )
 932 |         path = []
 933 |         path_start = 0
 934 |         for ind in range( len(tmp_path)):
 935 |             if "*" in tmp_path[ind]:
 936 |                 path.append( ( tmp_path[ path_start : ind ], tmp_path[ind]) )
 937 |                 path_start = ind + 1
 938 |         if path_start < len(tmp_path):
 939 |             path.append( ( tmp_path[ path_start : ], None) )
 940 | 
 941 |         tList = []
 942 |         for iPath in path:
 943 |             tList = self.__regexsearch( iPath, tList ) 
 944 |         return tList
 945 | 
 946 |     ####################################################################################
 947 |     # __regexsearch: Searches the path to determine if it matches the wildcard. Only the
 948 |     #           '*' wildcard is supported. 
 949 |     #       path:
 950 |     #       tList:
 951 |     ####################################################################################
 952 |     def __regexsearch( self, path, tList ):
 953 |         if tList == []:
 954 |             findPaths = [ path ]
 955 |         else:
 956 |             findPaths = []
 957 |             for ePath in tList:
 958 |                 findPaths.append( ( ePath + path[0], path[1] ))
 959 |         ret = []
 960 |         for fp in findPaths:
 961 |             found =  self.__get_wildcard_children( fp )
 962 |             ret.extend( found )
 963 |         return ret
 964 | 
 965 |             
 966 |     def __get_local_drives(self):
 967 |         """Returns a list containing letters from local drives"""
 968 |         drive_list = win32api.GetLogicalDriveStrings()
 969 |         drive_list = drive_list.split("\x00")[0:-1]  # the last element is ""
 970 |         list_local_drives = []
 971 |         for letter in drive_list:
 972 |             if win32file.GetDriveType(letter) == win32file.DRIVE_FIXED:
 973 |                 list_local_drives.append(letter)
 974 |         return list_local_drives
 975 |     
 976 |     ####################################################################################
 977 |     # Copy file from a single source file or directory. Wildcards (*) are acceptable
 978 |     #   src_filename: Can be a filename, directory, or a wildcard
 979 |     #   dest_filename: The root directory to save files too. Each will create a mirror path
 980 |     #                  Example: dest_filename = 'c:\test\' and copying "c:\windows\somefile" 
 981 |     #                           the output file will have the path of "c:\test\windows\somefile"
 982 |     #   bRecursive: Tells the copy to recursivly copy a directory. Only works with directories
 983 |     ####################################################################################
 984 |     def copy( self, src_filename, dest_filename, bRecursive=False ):
 985 |         self.__useWin32 = True
 986 |         if not (dest_filename[-1] == '/' or dest_filename[-1] == '\\'):
 987 |             dest_filename = dest_filename+os.sep
 988 |         self.config['outputbasedir'] = dest_filename 
 989 |         if type(src_filename) == unicode:
 990 |             src_filename = src_filename.encode('ascii', 'ignore')
 991 |         if not type( src_filename ) == str:
 992 |             self.config['logger'].error("INVALID src type (%r)" % (src_filename ) )
 993 |             return
 994 |         src_filename = os.path.abspath( src_filename )
 995 |         src_filename = [ src_filename ]
 996 |         for filename in src_filename: 
 997 |             driveLetter = None
 998 |             if self.__useWin32 == True:
 999 |                 self.config['logger'].debug( 'filename %r' % filename)
1000 |                 if not filename[:4].lower() == '\\\\.\\':
1001 |                     targetDrive = '\\\\.\\'+filename[:2]
1002 |                 else:
1003 |                     targetDrive = filename[:6]
1004 |                 
1005 |                 driveLetter = targetDrive[-2]
1006 |             if driveLetter == '*':
1007 |                 for drive in self.__get_local_drives():  
1008 |                     self.__copyfile( filename.replace("*", drive[0], 1), bRecursive=bRecursive )
1009 |             else:
1010 |                 self.__copyfile( filename, bRecursive=bRecursive )
1011 | 
1012 | 
1013 | 
1014 | 
1015 | 
1016 | 
1017 | 


--------------------------------------------------------------------------------