├── TScopy ├── __init__.py ├── BinaryParser.py ├── MFT.py └── tscopy.py ├── dist ├── TScopy_x64.exe └── TScopy_x86.exe ├── README_imgs └── Blog_061120.png ├── TScopy.spec ├── tscopy.py └── README.md /TScopy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dist/TScopy_x64.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trustedsec/tscopy/HEAD/dist/TScopy_x64.exe -------------------------------------------------------------------------------- /dist/TScopy_x86.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trustedsec/tscopy/HEAD/dist/TScopy_x86.exe -------------------------------------------------------------------------------- /README_imgs/Blog_061120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trustedsec/tscopy/HEAD/README_imgs/Blog_061120.png -------------------------------------------------------------------------------- /TScopy.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | import platform 3 | import sys 4 | 5 | operating_sys = platform.system() 6 | 7 | # Computing binary name 8 | suffix = "" 9 | if operating_sys == "Windows": 10 | if sys.maxsize > 2**32: 11 | suffix='_x64.exe' 12 | else: 13 | suffix= '_x86.exe' 14 | 15 | binary_name = "TScopy" + suffix 16 | block_cipher = None 17 | 18 | 19 | a = Analysis(['tscopy.py'], 20 | pathex=['Z:\\'], 21 | binaries=[], 22 | datas=[], 23 | hiddenimports=[], 24 | hookspath=[], 25 | runtime_hooks=[], 26 | excludes=[], 27 | win_no_prefer_redirects=False, 28 | win_private_assemblies=False, 29 | cipher=block_cipher, 30 | noarchive=False) 31 | pyz = PYZ(a.pure, a.zipped_data, 32 | cipher=block_cipher) 33 | exe = EXE(pyz, 34 | a.scripts, 35 | a.binaries, 36 | a.zipfiles, 37 | a.datas, 38 | [], 39 | name = binary_name, 40 | debug=False, 41 | bootloader_ignore_signals=False, 42 | strip=False, 43 | upx=True, 44 | upx_exclude=[], 45 | runtime_tmpdir=None, 46 | console=True ) 47 | -------------------------------------------------------------------------------- /tscopy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This project is based off the work from the following projects: 5 | * https://github.com/williballenthin/python-ntfs 6 | * https://github.com/jschicht/RawCopy 7 | """ 8 | 9 | # TODO: Parsing of command line input for multiple files needs to be more robust 10 | import logging 11 | import sys 12 | import os 13 | import argparse 14 | import traceback 15 | import time 16 | import ctypes 17 | 18 | from TScopy.tscopy import TScopy 19 | 20 | log = logging.getLogger("tscopy") 21 | log.setLevel(logging.INFO) 22 | handler = logging.StreamHandler(sys.stdout) 23 | handler.setLevel(logging.DEBUG) 24 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 25 | handler.setFormatter(formatter) 26 | log.addHandler(handler) 27 | 28 | def check_administrative_rights( ): 29 | if ctypes.windll.shell32.IsUserAnAdmin() == 0: 30 | log.info("TrustedIR Collector must run with administrative privileges") 31 | print "ERROR: TrustedIR Collector must run with administrative privileges\nPress ENTER to finish..." 32 | sys.stdin.readline() 33 | return False 34 | return True 35 | 36 | def parseArgs(): 37 | parser = argparse.ArgumentParser( description="Copy protected files by parsing the MFT. Must be run with Administrator privileges", usage="""\ 38 | 39 | TScopy_x64.exe -r -o c:\\test -f c:\\users\\tscopy\\ntuser.dat 40 | Description: Copies only the ntuser.dat file to the c:\\test directory 41 | TScopy_x64.exe -o c:\\test -f c:\\Windows\\system32\\config 42 | Description: Copies all files in the config directory but does not copy the directories under it. 43 | TScopy_x64.exe -r -o c:\\test -f c:\\Windows\\system32\\config 44 | Description: Copies all files and subdirectories in the config directory. 45 | TScopy_x64.exe -r -o c:\\test -f c:\\users\\*\\ntuser*,c:\\Windows\\system32\\config 46 | Description: Uses Wildcards and listings to copy any file beginning with ntuser under users accounts and recursively copies the registry hives. 47 | """) 48 | parser.add_argument('-f', '--file', help="Full path of the file or directory to be copied. Filenames can be grouped in a comma ',' seperated list. Wildcard '*' is accepted." ) 49 | parser.add_argument('-o', '--outputdir', help="Directory to copy files too. Copy will keep paths" ) 50 | parser.add_argument('-i', '--ignore_saved_ref_nums', action='store_true', help="Script stores the Reference numbers and path info to speed up internal run. This option will ignore and not save the stored MFT reference numbers and path") 51 | parser.add_argument('-r', '--recursive', action='store_true', help="Recursively copies directory. Note this only works with directories.") 52 | parser.add_argument('--debug', action='store_true', help=argparse.SUPPRESS) 53 | 54 | args = parser.parse_args() 55 | if args.debug: 56 | log.setLevel(logging.DEBUG) 57 | 58 | if args.file: 59 | process_files = [] 60 | for name in args.file.split(','): 61 | process_files.append( name ) 62 | else: 63 | log.error("\nError select --file\n\n") 64 | parser.print_help() 65 | sys.exit(1) 66 | 67 | if args.outputdir: 68 | tmp_dir = args.outputdir 69 | if tmp_dir[-1] == os.sep: 70 | tmp_dir = tmp_dir[:-1] 71 | 72 | if not os.path.isdir( tmp_dir ): 73 | log.error("Error output destination (%s) not found\n\n" %tmp_dir ) 74 | parser.print_help() 75 | sys.exit(1) 76 | args.outputdir = tmp_dir 77 | return { 'files': process_files, 78 | 'outputbasedir': args.outputdir, 79 | 'debug': args.debug, 80 | 'recursive': args.recursive, 81 | 'ignore_table': args.ignore_saved_ref_nums 82 | } 83 | 84 | if __name__ == '__main__': 85 | start = time.time() 86 | args = parseArgs() 87 | if check_administrative_rights( ) == False: 88 | sys.exit(1) 89 | 90 | config = { 91 | 'pickledir': args['outputbasedir'], 92 | 'debug': args['debug'], 93 | 'logger': log, 94 | 'ignore_table': args['ignore_table']} 95 | 96 | try: 97 | tscopy = TScopy() 98 | tscopy.setConfiguration( config ) 99 | dst_path = args['outputbasedir'] 100 | for src in args['files']: 101 | try: 102 | tscopy.copy( src, dst_path, bRecursive=args['recursive']) 103 | except: 104 | log.error( traceback.format_exc() ) 105 | except: 106 | log.error( traceback.format_exc() ) 107 | 108 | log.info("Job Took %r seconds" % (time.time()-start)) 109 | 110 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TScopy 2 | ![TScopy Logo](/README_imgs/Blog_061120.png) 3 | 4 | Updated 2022-03-31 5 | 6 | ## Introducing TScopy 7 | It is a requirement during an Incident Response (IR) engagement to have the ability to analyze files on the filesystem. Sometimes these files are locked by the operating system (OS) because they are in use, which is particularly frustrating with event logs and registry hives. TScopy allows the user, who is running with administrator privileges, to access locked files by parsing out their raw location in the filesystem and copying them without asking the OS. 8 | 9 | There are other tools that perform similar functions, such as RawCopy, which we have used and is the basis for this tool. However, there are some disadvantages to RawCopy that led us to develop TScopy, including performance, size, and the ability to incorporate it in other tools. 10 | 11 | This blog is intended to introduce TScopy but also to ask for assistance. As in all software development, the more a tool is used, the more edge cases can be found. We are asking that people try out the tool and report any bugs. 12 | 13 | ## What is TScopy? 14 | TScopy is a Python script used to parse the NTFS $MFT file to locate and copy specific files. By parsing the Master File Table (MFT), the script bypasses operating system locks on files. The script was originally based on the work of RawCopy. RawCopy is written in AutoIT and is difficult to modify for our purposes. The decision to port RawCopy to Python was done because of the need to incorporate this functionality natively into our toolset. 15 | 16 | TScopy is designed to be run as a standalone program or included as a python module. The python implementation makes use of the python-ntfs tools found at https://github.com/williballenthin/python-ntfs. TScopy built upon the base functionality of python-ntfs to isolate the location of each file from the raw disk. 17 | 18 | ## What makes TScopy different? 19 | TScopy is written in Python and organized into classes to make it more maintainable and readable than AutoIT. AutoIT can be flagged as malicious by anti-virus or detections software because some malware has utilized its potential. 20 | 21 | The major difference between TScopy and RawCopy is the ability to copy multiple files per execution and to cache the file structure. As shown in the image below, TScopy has options to download a single file, multiple comma delimited files, the contents of a directory, wildcarded paths (individual files or directories), and recursive directories. 22 | 23 | TScopy caches the location of each directory and file as it iterates the target file’s full path. It then uses this cache to optimize the search for any other files, ensuring future file copies are performed much faster. This is a significant advantage over RawCopy, which iterates over the entire path for each file. 24 | 25 | ## TScopy Options 26 | ``` 27 | .\TScopy_x64.exe -h 28 | 29 | usage: 30 | TScopy_x64.exe -r -o c:\test -f c:\users\tscopy\ntuser.dat 31 | Description: Copies only the ntuser.dat file to the c:\test directory 32 | TScopy_x64.exe -o c:\test -f c:\Windows\system32\config 33 | Description: Copies all files in the config directory but does not copy the directories under it. 34 | TScopy_x64.exe -r -o c:\test -f c:\Windows\system32\config 35 | Description: Copies all files and subdirectories in the config directory. 36 | TScopy_x64.exe -r -o c:\test -f c:\users\*\ntuser*,c:\Windows\system32\config 37 | Description: Uses Wildcards and listings to copy any file beginning with ntuser under users accounts and recursively copies the registry hives. 38 | 39 | 40 | Copy protected files by parsing the MFT. Must be run with Administrator privileges 41 | 42 | optional arguments: 43 | -h, --help show this help message and exit 44 | -f FILE, --file FILE Full path of the file or directory to be copied. 45 | Filenames can be grouped in a comma ',' seperated 46 | list. Wildcard '*' is accepted. 47 | -o OUTPUTDIR, --outputdir OUTPUTDIR 48 | Directory to copy files too. Copy will keep paths 49 | -i, --ignore_saved_ref_nums 50 | Script stores the Reference numbers and path info to 51 | speed up internal run. This option will ignore and not 52 | save the stored MFT reference numbers and path 53 | -r, --recursive Recursively copies directory. Note this only works with 54 | directories. 55 | ``` 56 | There is a hidden option ‘--debug’, which enables the debug output. 57 | 58 | ## Examples 59 | ```code 60 | TScopy_x64.exe -f c:\windows\system32\config\SYSTEM -o e:\outputdir 61 | ``` 62 | Copies the SYSTEM registry to e:\outputdir 63 | The new file will be located at e:\outputdir\windows\system32\config\SYSTEM 64 | ```code 65 | TScopy_x64.exe -f c:\windows\system32\config\SYSTEM -o e:\outputdir -i 66 | ``` 67 | Copies the SYSTEM registry to e:\outputdir but ignores any previous cached files and does not save the current cache to disk 68 | 69 | ```code 70 | TScopy_x64.exe -f c:\windows\system32\config\SYSTEM,c:\windows\system32\config\SOFTWARE -o e:\outputdir 71 | ``` 72 | Copies the SYSTEM and the SOFTWARE registries to e:\outputdir 73 | 74 | ```code 75 | TScopy_x64.exe -f c:\windows\system32\config\ -o e:\outputdir 76 | ``` 77 | Copies the contents of the directory config to e:\outputdir 78 | 79 | ```code 80 | TScopy_x64.exe -r -f c:\windows\system32\config\ -o e:\outputdir 81 | ``` 82 | Recursively copies the contents of the directory config to e:\outputdir 83 | 84 | ```code 85 | TScopy_x64.exe -f c:\users\*\ntuser.dat -o e:\outputdir 86 | ``` 87 | Copies each users NTUSER.DAT file to e:\outputdir 88 | 89 | ```code 90 | TScopy_x64.exe -f c:\users\*\ntuser.dat* -o e:\outputdir 91 | ``` 92 | For each users copies all files that begin with NTUSER.DAT to e:\outputdi 93 | 94 | ```code 95 | TScopy_x64.exe -f c:\users\*\AppData\Roaming\Microsoft\Windows\Recent,c:\windows\system32\config,c:\users\*\AppData\Roaming\Microsoft\Windows\PowerShell\PSReadLine\ConsoleHost_history.txt -o e:\outputdir 96 | ``` 97 | For each users copies all jumplists, Registry hives, and Powershell history commands to e:\outputdi 98 | 99 | ## Bug Reporting Information 100 | Please report bugs in the issues section of the GitHub page. 101 | 102 | ## Bug Fixes and Enhancements 103 | ### Version 4.0 104 | - Corrected copying file containing sparsed data. Issue #13 (Error copying c:\$extend\$usnjrnl$j) 105 | - Files are no longer read into memory before writing to disk. Writes are performed by data run read now. Should reduce memory usage on large files. 106 | ### Version 3.0 107 | - Added Support for Alternative Data Stream. Request the root file and the ADS streams are copied 108 | - WildCard for the drive letter. Fixed Drives only. Example "\*:\$MFT" will find the $MFT for all local drives 109 | - Logging issues. Failed copies are reporting failed again. 110 | - Filepath size limit of 256 removed. 111 | ### Version 2.0 112 | - Issue 1: Change sys.exit to raise Exception 113 | - Issue 2: The double copying of files. Full name and short name. 114 | - Issue 3: Added the ability to recursively copy a directory 115 | - Issue 4: Add the support for wildcards in the path. Currently only supports * 116 | - Issue 5: Removed the hardcoded MFT size. MFT size determined by the Boot Sector 117 | - Issue 6: Converted the TScopy class into a singleton. This allows the class to be instantiated once and reuse the current MFT metadata object for all copies. 118 | - Issue 7: Attribute type ATTRIBUTE_LIST is now being handled. 119 | - Issue 9: Attrubute type ATTRIBUTE_LIST was not handled for files. THis caused a silent failure for files like SOFTWARE regestry hive. 120 | - Changes: General comments have been added to the code 121 | - Changes: Input parameters have changed. Reduced the three(3) different options --file, --list, and --directory to --file. 122 | - Changes: Backend restructuring to support new features. 123 | 124 | ## TODO: 125 | -------------------------------------------------------------------------------- /TScopy/BinaryParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v.0.1 20 | import mmap 21 | import sys 22 | import types 23 | import struct 24 | import logging 25 | import cPickle 26 | from datetime import datetime 27 | 28 | g_logger = logging.getLogger("ntfs.BinaryParser") 29 | 30 | 31 | def unpack_from(fmt, buf, off=0): 32 | """ 33 | Shim struct.unpack_from and divert unpacking of __unpackable__ things. 34 | 35 | Otherwise, you'd get an exception like: 36 | TypeError: unpack_from() argument 1 must be convertible to a buffer, not FileMap 37 | 38 | So, we extract a true sub-buffer from the FileMap, and feed this 39 | back into the old unpack function. 40 | Theres an extra allocation and copy, but there's no getting 41 | around that. 42 | """ 43 | if isinstance(buf, basestring): 44 | return struct.unpack_from(fmt, buf, off) 45 | elif not hasattr(buf, "__unpackable__"): 46 | return struct.unpack_from(fmt, buf, off) 47 | else: 48 | size = struct.calcsize(fmt) 49 | buf = buf[off:off + size] 50 | return struct.unpack_from(fmt, buf, 0x0) 51 | 52 | 53 | def unpack(fmt, buf): 54 | """ 55 | Like the shimmed unpack_from, but for struct.unpack. 56 | """ 57 | if isinstance(buf, basestring): 58 | return struct.unpack(fmt, buf) 59 | elif not hasattr(buf, "__unpackable__"): 60 | return struct.unpack(fmt, buf) 61 | else: 62 | size = struct.calcsize(fmt) 63 | buf = buf[:size] 64 | return struct.unpack(fmt, buf, 0x0) 65 | 66 | 67 | class Mmap(object): 68 | """ 69 | Convenience class for opening a read-only memory map for a file path. 70 | """ 71 | def __init__(self, filename): 72 | super(Mmap, self).__init__() 73 | self._filename = filename 74 | self._f = None 75 | self._mmap = None 76 | 77 | def __enter__(self): 78 | self._f = open(self._filename, "rb") 79 | self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ) 80 | return self._mmap 81 | 82 | def __exit__(self, type, value, traceback): 83 | self._mmap.close() 84 | self._f.close() 85 | 86 | 87 | def hex_dump(src, start_addr=0): 88 | """ 89 | see: 90 | http://code.activestate.com/recipes/142812-hex-dumper/ 91 | @param src A bytestring containing the data to dump. 92 | @param start_addr An integer representing the start 93 | address of the data in whatever context it comes from. 94 | @return A string containing a classic hex dump with 16 95 | bytes per line. If start_addr is provided, then the 96 | data is interpreted as starting at this offset, and 97 | the offset column is updated accordingly. 98 | """ 99 | FILTER = ''.join([(len(repr(chr(x))) == 3) and 100 | chr(x) or 101 | '.' for x in range(256)]) 102 | length = 16 103 | result = [] 104 | 105 | remainder_start_addr = start_addr 106 | 107 | if start_addr % length != 0: 108 | base_addr = start_addr - (start_addr % length) 109 | num_spaces = (start_addr % length) 110 | num_chars = length - (start_addr % length) 111 | 112 | spaces = " ".join([" " for i in xrange(num_spaces)]) 113 | s = src[0:num_chars] 114 | hexa = ' '.join(["%02X" % ord(x) for x in s]) 115 | printable = s.translate(FILTER) 116 | 117 | result.append("%04X %s %s %s%s\n" % 118 | (base_addr, spaces, hexa, 119 | " " * (num_spaces + 1), printable)) 120 | 121 | src = src[num_chars:] 122 | remainder_start_addr = base_addr + length 123 | 124 | for i in xrange(0, len(src), length): 125 | s = src[i:i + length] 126 | hexa = ' '.join(["%02X" % ord(x) for x in s]) 127 | printable = s.translate(FILTER) 128 | result.append("%04X %-*s %s\n" % 129 | (remainder_start_addr + i, length * 3, 130 | hexa, printable)) 131 | 132 | return ''.join(result) 133 | 134 | 135 | class decoratorargs(object): 136 | def __new__(typ, *attr_args, **attr_kwargs): 137 | def decorator(orig_func): 138 | self = object.__new__(typ) 139 | self.__init__(orig_func, *attr_args, **attr_kwargs) 140 | return self 141 | return decorator 142 | 143 | 144 | class memoize(decoratorargs): 145 | class Node: 146 | __slots__ = ['key', 'value', 'older', 'newer'] 147 | 148 | def __init__(self, key, value, older=None, newer=None): 149 | self.key = key 150 | self.value = value 151 | self.older = older 152 | self.newer = newer 153 | 154 | def __init__(self, func, capacity=1000, 155 | keyfunc=lambda *args, **kwargs: cPickle.dumps((args, 156 | kwargs))): 157 | if not isinstance(func, property): 158 | self.func = func 159 | self.name = func.__name__ 160 | self.is_property = False 161 | else: 162 | self.func = func.fget 163 | self.name = func.fget.__name__ 164 | self.is_property = True 165 | self.capacity = capacity 166 | self.keyfunc = keyfunc 167 | self.reset() 168 | 169 | def reset(self): 170 | self.mru = self.Node(None, None) 171 | self.mru.older = self.mru.newer = self.mru 172 | self.nodes = {self.mru.key: self.mru} 173 | self.count = 1 174 | self.hits = 0 175 | self.misses = 0 176 | 177 | def __get__(self, inst, clas): 178 | self.obj = inst 179 | if self.is_property: 180 | return self.__call__() 181 | else: 182 | return self 183 | 184 | def __call__(self, *args, **kwargs): 185 | key = self.keyfunc(*args, **kwargs) 186 | try: 187 | node = self.nodes[key] 188 | except KeyError: 189 | # We have an entry not in the cache 190 | self.misses += 1 191 | func = types.MethodType(self.func, self.obj, self.name) 192 | value = func(*args, **kwargs) 193 | lru = self.mru.newer # Always true 194 | # If we haven't reached capacity 195 | if self.count < self.capacity: 196 | # Put it between the MRU and LRU - it'll be the new MRU 197 | node = self.Node(key, value, self.mru, lru) 198 | self.mru.newer = node 199 | 200 | lru.older = node 201 | self.mru = node 202 | self.count += 1 203 | else: 204 | # It's FULL! We'll make the LRU be the new MRU, but replace its 205 | # value first 206 | try: 207 | del self.nodes[lru.key] # This mapping is now invalid 208 | except KeyError: # HACK TODO: this may not work/leak 209 | pass 210 | lru.key = key 211 | lru.value = value 212 | self.mru = lru 213 | 214 | # Add the new mapping 215 | self.nodes[key] = self.mru 216 | return value 217 | 218 | # We have an entry in the cache 219 | self.hits += 1 220 | 221 | # If it's already the MRU, do nothing 222 | if node is self.mru: 223 | return node.value 224 | 225 | lru = self.mru.newer # Always true 226 | 227 | # If it's the LRU, update the MRU to be it 228 | if node is lru: 229 | self.mru = lru 230 | return node.value 231 | 232 | # Remove the node from the list 233 | node.older.newer = node.newer 234 | node.newer.older = node.older 235 | 236 | # Put it between MRU and LRU 237 | node.older = self.mru 238 | self.mru.newer = node 239 | 240 | node.newer = lru 241 | lru.older = node 242 | 243 | self.mru = node 244 | return node.value 245 | 246 | 247 | def align(offset, alignment): 248 | """ 249 | Return the offset aligned to the nearest greater given alignment 250 | Arguments: 251 | - `offset`: An integer 252 | - `alignment`: An integer 253 | """ 254 | if offset % alignment == 0: 255 | return offset 256 | return offset + (alignment - (offset % alignment)) 257 | 258 | 259 | def dosdate(dosdate, dostime): 260 | """ 261 | `dosdate`: 2 bytes, little endian. 262 | `dostime`: 2 bytes, little endian. 263 | returns: datetime.datetime or datetime.datetime.min on error 264 | """ 265 | try: 266 | t = ord(dosdate[1]) << 8 267 | t |= ord(dosdate[0]) 268 | day = t & 0b0000000000011111 269 | month = (t & 0b0000000111100000) >> 5 270 | year = (t & 0b1111111000000000) >> 9 271 | year += 1980 272 | 273 | t = ord(dostime[1]) << 8 274 | t |= ord(dostime[0]) 275 | sec = t & 0b0000000000011111 276 | sec *= 2 277 | minute = (t & 0b0000011111100000) >> 5 278 | hour = (t & 0b1111100000000000) >> 11 279 | 280 | return datetime.datetime(year, month, day, hour, minute, sec) 281 | except: 282 | return datetime.datetime.min 283 | 284 | 285 | def parse_filetime(qword): 286 | # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/ 287 | return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600) 288 | 289 | 290 | class BinaryParserException(Exception): 291 | """ 292 | Base Exception class for binary parsing. 293 | """ 294 | def __init__(self, value): 295 | """ 296 | Constructor. 297 | Arguments: 298 | - `value`: A string description. 299 | """ 300 | super(BinaryParserException, self).__init__() 301 | self._value = value 302 | 303 | def __repr__(self): 304 | return "BinaryParserException(%r)" % (self._value) 305 | 306 | def __str__(self): 307 | return "Binary Parser Exception: %s" % (self._value) 308 | 309 | 310 | class ParseException(BinaryParserException): 311 | """ 312 | An exception to be thrown during binary parsing, such as 313 | when an invalid header is encountered. 314 | """ 315 | def __init__(self, value): 316 | """ 317 | Constructor. 318 | Arguments: 319 | - `value`: A string description. 320 | """ 321 | super(ParseException, self).__init__(value) 322 | 323 | def __repr__(self): 324 | return "ParseException(%r)" % (self._value) 325 | 326 | def __str__(self): 327 | return "Parse Exception(%s)" % (self._value) 328 | 329 | 330 | class OverrunBufferException(ParseException): 331 | def __init__(self, readOffs, bufLen): 332 | tvalue = "read: %s, buffer length: %s" % (hex(readOffs), hex(bufLen)) 333 | super(ParseException, self).__init__(tvalue) 334 | 335 | def __repr__(self): 336 | return "OverrunBufferException(%r)" % (self._value) 337 | 338 | def __str__(self): 339 | return "Tried to parse beyond the end of the file (%s)" % \ 340 | (self._value) 341 | 342 | 343 | def read_byte(buf, offset): 344 | """ 345 | Returns a little-endian unsigned byte from the relative offset of the given buffer. 346 | Arguments: 347 | - `buf`: The buffer from which to read the value. 348 | - `offset`: The relative offset from the start of the block. 349 | Throws: 350 | - `OverrunBufferException` 351 | """ 352 | try: 353 | return unpack_from(" 1: 438 | raise "Cannot specify both `length` and `count`." 439 | 440 | if offset is None: 441 | offset = self._implicit_offset 442 | 443 | basic_sizes = { 444 | "byte": 1, 445 | "int8": 1, 446 | "word": 2, 447 | "word_be": 2, 448 | "int16": 2, 449 | "dword": 4, 450 | "dword_be": 4, 451 | "int32": 4, 452 | "qword": 8, 453 | "int64": 8, 454 | "float": 4, 455 | "double": 8, 456 | "dosdate": 4, 457 | "filetime": 8, 458 | "systemtime": 8, 459 | "guid": 16, 460 | } 461 | 462 | handler = None 463 | 464 | if isinstance(type_, type): 465 | if not issubclass(type_, Nestable): 466 | raise TypeError("Invalid nested structure") 467 | 468 | typename = type_.__name__ 469 | 470 | if count == 0: 471 | def no_class_handler(): 472 | return 473 | handler = no_class_handler 474 | elif is_generator: 475 | def many_class_handler(): 476 | ofs = offset 477 | for _ in range(count): 478 | r = type_(self._buf, self.absolute_offset(ofs), self) 479 | ofs += len(r) 480 | yield r 481 | handler = many_class_handler 482 | 483 | if hasattr(type_, "structure_size"): 484 | ofs = offset 485 | for _ in range(count): 486 | ofs += type_.structure_size(self._buf, self.absolute_offset(ofs), self) 487 | self._implicit_offset = ofs 488 | else: 489 | ofs = offset 490 | for _ in range(count): 491 | r = type_(self._buf, self.absolute_offset(ofs), self) 492 | ofs += len(r) 493 | self._implicit_offset = ofs 494 | else: 495 | # TODO(wb): this needs to cache/memoize 496 | def class_handler(): 497 | return type_(self._buf, self.absolute_offset(offset), self) 498 | handler = class_handler 499 | 500 | if hasattr(type_, "structure_size"): 501 | size = type_.structure_size(self._buf, self.absolute_offset(offset), self) 502 | self._implicit_offset = offset + size 503 | else: 504 | temp = type_(self._buf, self.absolute_offset(offset), self) 505 | 506 | self._implicit_offset = offset + len(temp) 507 | elif isinstance(type_, basestring): 508 | typename = type_ 509 | 510 | if count == 0: 511 | def no_basic_handler(): 512 | return 513 | handler = no_basic_handler 514 | elif is_generator: 515 | # length must be in basic_sizes 516 | def many_basic_handler(): 517 | ofs = offset 518 | f = getattr(self, "unpack_" + type_) 519 | for _ in range(count): 520 | yield f(ofs) 521 | ofs += basic_sizes[type_] 522 | handler = many_basic_handler 523 | 524 | self._implicit_offset = offset + count * basic_sizes[type_] 525 | else: 526 | if length is None: 527 | def basic_no_length_handler(): 528 | f = getattr(self, "unpack_" + type_) 529 | return f(offset) 530 | handler = basic_no_length_handler 531 | 532 | if type_ in basic_sizes: 533 | self._implicit_offset = offset + basic_sizes[type_] 534 | elif type_ == "binary": 535 | self._implicit_offset = offset + length 536 | elif type_ == "string" and length is not None: 537 | self._implicit_offset = offset + length 538 | elif type_ == "wstring" and length is not None: 539 | self._implicit_offset = offset + (2 * length) 540 | elif "string" in type_ and length is None: 541 | raise ParseException("Implicit offset not supported for dynamic length strings") 542 | else: 543 | raise ParseException("Implicit offset not supported for type: " + type_) 544 | else: 545 | def basic_length_handler(): 546 | f = getattr(self, "unpack_" + type_) 547 | return f(offset, length) 548 | handler = basic_length_handler 549 | 550 | if type_ == "wstring": 551 | self._implicit_offset = offset + (2 * length) 552 | else: 553 | self._implicit_offset = offset + length 554 | 555 | setattr(self, name, handler) 556 | setattr(self, "_off_" + name, offset) 557 | self.add_explicit_field(offset, typename, name, length, count) 558 | 559 | def add_explicit_field(self, offset, typename, name, length=None, count=1): 560 | """ 561 | The `Block` class tracks the fields that have been added so that you can 562 | pretty print the structure. If there are other fields a subclass 563 | parses, use `add_explicit_field` to include them in the pretty printing. 564 | @type offset: int 565 | @param offset: The offset at which the field begins. 566 | @type typename: str or Block subclass 567 | @param typename: The type of the value of the field. 568 | @type name: str 569 | @param name: The name of the field. 570 | @type length: int 571 | @param length: An explicit length for the field. 572 | @type count: int 573 | @param count: The number of repetitions for the field. 574 | @rtype: None 575 | @return: None 576 | """ 577 | if type(typename) == type: 578 | typename = typename.__name__ 579 | self._declared_fields.append({ 580 | "offset": offset, 581 | "type": typename, 582 | "name": name, 583 | "length": length, 584 | "count": count, 585 | }) 586 | 587 | def get_all_string(self, indent=0): 588 | """ 589 | Get a nicely formatted, nested string of the contents of this structure 590 | and any sub-structures. If a sub-structure has a method `.string()`, then 591 | this method will use it to represent its value. 592 | Implementation note, can't look for `__str__`, because everything has this. 593 | @type indent: int 594 | @param indent: The level of nesting this objects has. 595 | @rtype: str 596 | @return A nicely formatted string that describes this structure. 597 | """ 598 | ret = "" 599 | for field in self._declared_fields: 600 | v = getattr(self, field["name"])() 601 | if isinstance(v, Block): 602 | if hasattr(v, "string"): 603 | ret += "%s%s (%s)%s\t%s\n" % \ 604 | (" " * indent, hex(field["offset"]), field["type"], 605 | field["name"], v.string()) 606 | else: 607 | ret += "%s%s (%s)%s\n" % \ 608 | (" " * indent, hex(field["offset"]), field["type"], 609 | field["name"]) 610 | ret += v.get_all_string(indent + 1) 611 | elif isinstance(v, types.GeneratorType): 612 | ret += "%s%s (%s[])%s\n" % (" " * indent, hex(field["offset"]), field["type"], field["name"],) 613 | for i, j in enumerate(v): 614 | ret += "%s[%d] (%s) " % (" " * (indent + 1), i, field["type"]) 615 | if hasattr(j, "get_all_string"): 616 | ret += "\n" + j.get_all_string(indent + 2) 617 | else: 618 | ret += str(j) + "\n" 619 | else: 620 | if isinstance(v, int): 621 | v = hex(v) 622 | ret += "%s%s (%s)%s\t%s\n" % \ 623 | (" " * indent, hex(field["offset"]), field["type"], 624 | field["name"], str(v)) 625 | return ret 626 | 627 | def current_field_offset(self): 628 | return self._implicit_offset 629 | 630 | def unpack_byte(self, offset): 631 | """ 632 | Returns a little-endian unsigned byte from the relative offset. 633 | Arguments: 634 | - `offset`: The relative offset from the start of the block. 635 | Throws: 636 | - `OverrunBufferException` 637 | """ 638 | return read_byte(self._buf, self._offset + offset) 639 | 640 | def unpack_int8(self, offset): 641 | """ 642 | Returns a little-endian signed byte from the relative offset. 643 | Arguments: 644 | - `offset`: The relative offset from the start of the block. 645 | Throws: 646 | - `OverrunBufferException` 647 | """ 648 | o = self._offset + offset 649 | try: 650 | return unpack_from("H", self._buf, o)[0] 677 | except struct.error: 678 | raise OverrunBufferException(o, len(self._buf)) 679 | 680 | def unpack_int16(self, offset): 681 | """ 682 | Returns a little-endian signed WORD (2 bytes) from the 683 | relative offset. 684 | Arguments: 685 | - `offset`: The relative offset from the start of the block. 686 | Throws: 687 | - `OverrunBufferException` 688 | """ 689 | o = self._offset + offset 690 | try: 691 | return unpack_from("I", self._buf, o)[0] 727 | except struct.error: 728 | raise OverrunBufferException(o, len(self._buf)) 729 | 730 | def unpack_int32(self, offset): 731 | """ 732 | Returns a little-endian signed integer (4 bytes) from the 733 | relative offset. 734 | Arguments: 735 | - `offset`: The relative offset from the start of the block. 736 | Throws: 737 | - `OverrunBufferException` 738 | """ 739 | o = self._offset + offset 740 | try: 741 | return unpack_from(" recent_date and \ 192 | fn.accessed_time() > recent_date and \ 193 | fn.changed_time() > recent_date and \ 194 | fn.created_time() > recent_date and \ 195 | fn.modified_time() < future_date and \ 196 | fn.accessed_time() < future_date and \ 197 | fn.changed_time() < future_date and \ 198 | fn.created_time() < future_date 199 | except ValueError: 200 | return False 201 | 202 | 203 | class SII_INDEX_ENTRY(Block, Nestable): 204 | """ 205 | Index entry for the $SECURE:$SII index. 206 | """ 207 | 208 | def __init__(self, buf, offset, parent): 209 | super(SII_INDEX_ENTRY, self).__init__(buf, offset) 210 | self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0) 211 | self.declare_field("dword", "security_id") 212 | 213 | @staticmethod 214 | def structure_size(buf, offset, parent): 215 | return BinaryParser.read_word(buf, offset + 0x8) 216 | 217 | def __len__(self): 218 | return self.header().length() 219 | 220 | def is_valid(self): 221 | # TODO(wb): test 222 | return 1 < self.header().length() < 0x30 and \ 223 | 1 < self.header().key_lenght() < 0x20 224 | 225 | 226 | class SDH_INDEX_ENTRY(Block, Nestable): 227 | """ 228 | Index entry for the $SECURE:$SDH index. 229 | """ 230 | 231 | def __init__(self, buf, offset, parent): 232 | super(SDH_INDEX_ENTRY, self).__init__(buf, offset) 233 | self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0) 234 | self.declare_field("dword", "hash") 235 | self.declare_field("dword", "security_id") 236 | 237 | @staticmethod 238 | def structure_size(buf, offset, parent): 239 | return BinaryParser.read_word(buf, offset + 0x8) 240 | 241 | def __len__(self): 242 | return self.header().length() 243 | 244 | def is_valid(self): 245 | # TODO(wb): test 246 | return 1 < self.header().length() < 0x30 and \ 247 | 1 < self.header().key_lenght() < 0x20 248 | 249 | 250 | class INDEX_HEADER_FLAGS: 251 | SMALL_INDEX = 0x0 # MFT: INDX_ROOT only 252 | LARGE_INDEX = 0x1 # MFT: requires INDX_ALLOCATION 253 | LEAF_NODE = 0x1 254 | INDEX_NODE = 0x2 255 | NODE_MASK = 0x1 256 | 257 | 258 | class INDEX_HEADER(Block, Nestable): 259 | def __init__(self, buf, offset, parent): 260 | super(INDEX_HEADER, self).__init__(buf, offset) 261 | self.declare_field("dword", "entries_offset", 0x0) 262 | self.declare_field("dword", "index_length") 263 | self.declare_field("dword", "allocated_size") 264 | self.declare_field("byte", "index_header_flags") # see INDEX_HEADER_FLAGS 265 | # then 3 bytes padding/reserved 266 | 267 | @staticmethod 268 | def structure_size(buf, offset, parent): 269 | return 0x1C 270 | 271 | def __len__(self): 272 | return 0x1C 273 | 274 | def is_small_index(self): 275 | return self.index_header_flags() & INDEX_HEADER_FLAGS.SMALL_INDEX 276 | 277 | def is_large_index(self): 278 | return self.index_header_flags() & INDEX_HEADER_FLAGS.LARGE_INDEX 279 | 280 | def is_leaf_node(self): 281 | return self.index_header_flags() & INDEX_HEADER_FLAGS.LEAF_NODE 282 | 283 | def is_index_node(self): 284 | return self.index_header_flags() & INDEX_HEADER_FLAGS.INDEX_NODE 285 | 286 | def is_NODE_MASK(self): 287 | return self.index_header_flags() & INDEX_HEADER_FLAGS.NODE_MASK 288 | 289 | 290 | class INDEX(Block, Nestable): 291 | def __init__(self, buf, offset, parent, index_entry_class): 292 | self._INDEX_ENTRY = index_entry_class 293 | super(INDEX, self).__init__(buf, offset) 294 | self.declare_field(INDEX_HEADER, "header", 0x0) 295 | self.add_explicit_field(self.header().entries_offset(), 296 | INDEX_ENTRY, "entries") 297 | slack_start = self.header().entries_offset() + self.header().index_length() 298 | # TODO: reenable 299 | # self.add_explicit_field(slack_start, INDEX_ENTRY, "slack_entries") 300 | 301 | @staticmethod 302 | def structure_size(buf, offset, parent): 303 | return BinaryParser.read_dword(buf, offset + 0x8) 304 | 305 | def __len__(self): 306 | return self.header().allocated_size() 307 | 308 | def entries(self): 309 | """ 310 | A generator that returns each INDEX_ENTRY associated with this node. 311 | """ 312 | offset = self.header().entries_offset() 313 | if offset == 0: 314 | return 315 | while offset <= self.header().index_length() - 0x52: 316 | e = self._INDEX_ENTRY(self._buf, self.offset() + offset, self) 317 | offset += len(e) 318 | yield e 319 | 320 | def slack_entries(self): 321 | """ 322 | A generator that yields INDEX_ENTRYs found in the slack space 323 | associated with this header. 324 | """ 325 | offset = self.header().index_length() 326 | try: 327 | while offset <= self.header().allocated_size() - 0x52: 328 | try: 329 | g_logger.debug("Trying to find slack entry at %s.", hex(offset)) 330 | e = self._INDEX_ENTRY(self._buf, offset, self) 331 | if e.is_valid(): 332 | g_logger.debug("Slack entry is valid.") 333 | offset += len(e) or 1 334 | yield e 335 | else: 336 | g_logger.debug("Slack entry is invalid.") 337 | # TODO(wb): raise a custom exception 338 | raise BinaryParser.ParseException("Not a deleted entry") 339 | except BinaryParser.ParseException: 340 | g_logger.debug("Scanning one byte forward.") 341 | offset += 1 342 | except struct.error: 343 | logging.debug("Slack entry parsing overran buffer.") 344 | pass 345 | 346 | 347 | class INDEX_ROOT(Block, Nestable): 348 | def __init__(self, buf, offset, parent=None): 349 | super(INDEX_ROOT, self).__init__(buf, offset) 350 | self.declare_field("dword", "type", 0x0) 351 | self.declare_field("dword", "collation_rule") 352 | self.declare_field("dword", "index_record_size_bytes") 353 | self.declare_field("byte", "index_record_size_clusters") 354 | self.declare_field("byte", "unused1") 355 | self.declare_field("byte", "unused2") 356 | self.declare_field("byte", "unused3") 357 | self._index_offset = self.current_field_offset() 358 | self.add_explicit_field(self._index_offset, INDEX, "index") 359 | 360 | def index(self): 361 | return INDEX(self._buf, self._offset + self._index_offset, 362 | self, MFT_INDEX_ENTRY) 363 | 364 | @staticmethod 365 | def structure_size(buf, offset, parent): 366 | return 0x10 + INDEX.structure_size(buf, offset + 0x10, parent) 367 | 368 | def __len__(self): 369 | return 0x10 + len(self.index()) 370 | 371 | 372 | class NTATTR_STANDARD_INDEX_HEADER(Block): 373 | def __init__(self, buf, offset, parent): 374 | super(NTATTR_STANDARD_INDEX_HEADER, self).__init__(buf, offset) 375 | self.declare_field("dword", "entry_list_start", 0x0) 376 | self.declare_field("dword", "entry_list_end") 377 | self.declare_field("dword", "entry_list_allocation_end") 378 | self.declare_field("dword", "flags") 379 | self.declare_field("binary", "list_buffer", \ 380 | self.entry_list_start(), 381 | self.entry_list_allocation_end() - self.entry_list_start()) 382 | 383 | def entries(self): 384 | """ 385 | A generator that returns each INDX entry associated with this node. 386 | """ 387 | offset = self.entry_list_start() 388 | if offset == 0: 389 | return 390 | 391 | # 0x52 is an approximate size of a small index entry 392 | while offset <= self.entry_list_end() - 0x52: 393 | e = IndexEntry(self._buf, self.offset() + offset, self) 394 | offset += e.length() 395 | yield e 396 | 397 | def slack_entries(self): 398 | """ 399 | A generator that yields INDX entries found in the slack space 400 | associated with this header. 401 | """ 402 | offset = self.entry_list_end() 403 | try: 404 | # 0x52 is an approximate size of a small index entry 405 | while offset <= self.entry_list_allocation_end() - 0x52: 406 | try: 407 | e = SlackIndexEntry(self._buf, offset, self) 408 | if e.is_valid(): 409 | offset += e.length() or 1 410 | yield e 411 | else: 412 | # TODO(wb): raise a custom exception 413 | raise BinaryParser.ParseException("Not a deleted entry") 414 | except BinaryParser.ParseException: 415 | # ensure we're always moving forward 416 | offset += 1 417 | except struct.error: 418 | pass 419 | 420 | 421 | class IndexRootHeader(Block): 422 | def __init__(self, buf, offset, parent): 423 | super(IndexRootHeader, self).__init__(buf, offset) 424 | self.declare_field("dword", "type", 0x0) 425 | self.declare_field("dword", "collation_rule") 426 | self.declare_field("dword", "index_record_size_bytes") 427 | self.declare_field("byte", "index_record_size_clusters") 428 | self.declare_field("byte", "unused1") 429 | self.declare_field("byte", "unused2") 430 | self.declare_field("byte", "unused3") 431 | self._node_header_offset = self.current_field_offset() 432 | 433 | def node_header(self): 434 | return NTATTR_STANDARD_INDEX_HEADER(self._buf, 435 | self.offset() + self._node_header_offset, 436 | self) 437 | 438 | 439 | class IndexRecordHeader(FixupBlock): 440 | def __init__(self, buf, offset, parent): 441 | super(IndexRecordHeader, self).__init__(buf, offset, parent) 442 | self.declare_field("dword", "magic", 0x0) 443 | self.declare_field("word", "usa_offset") 444 | self.declare_field("word", "usa_count") 445 | self.declare_field("qword", "lsn") 446 | self.declare_field("qword", "vcn") 447 | self._node_header_offset = self.current_field_offset() 448 | self.fixup(self.usa_count(), self.usa_offset()) 449 | 450 | def node_header(self): 451 | return NTATTR_STANDARD_INDEX_HEADER(self._buf, 452 | self.offset() + self._node_header_offset, 453 | self) 454 | 455 | 456 | class INDEX_BLOCK(FixupBlock): 457 | def __init__(self, buf, offset, parent=None): 458 | super(INDEX_BLOCK, self).__init__(buf, offset, parent) 459 | self.declare_field("dword", "magic", 0x0) 460 | self.declare_field("word", "usa_offset") 461 | self.declare_field("word", "usa_count") 462 | self.declare_field("qword", "lsn") 463 | self.declare_field("qword", "vcn") 464 | self._index_offset = self.current_field_offset() 465 | self.add_explicit_field(self._index_offset, INDEX, "index") 466 | self.fixup(self.usa_count(), self.usa_offset()) 467 | 468 | def index(self): 469 | return INDEX(self._buf, self._offset + self._index_offset, 470 | self, MFT_INDEX_ENTRY) 471 | 472 | @staticmethod 473 | def structure_size(buf, offset, parent): 474 | return 0x30 + INDEX.structure_size(buf, offset + 0x10, parent) 475 | 476 | def __len__(self): 477 | return 0x1000 478 | 479 | 480 | class INDEX_ALLOCATION(FixupBlock): 481 | def __init__(self, buf, offset, parent=None): 482 | super(INDEX_ALLOCATION, self).__init__(buf, offset, parent) 483 | self.add_explicit_field(0, INDEX_BLOCK, "blocks") 484 | 485 | @staticmethod 486 | def guess_num_blocks(buf, offset): 487 | count = 0 488 | # TODO: don't hardcode things 489 | BLOCK_SIZE = 0x1000 490 | try: 491 | while BinaryParser.read_dword(buf, offset) == 0x58444e49: # "INDX" 492 | offset += BLOCK_SIZE 493 | count += 1 494 | except (IndexError, BinaryParser.OverrunBufferException): 495 | return count 496 | return count 497 | 498 | def blocks(self): 499 | for i in xrange(INDEX_ALLOCATION.guess_num_blocks(self._buf, self.offset())): 500 | # TODO: don't hardcode things 501 | yield INDEX_BLOCK(self._buf, self._offset + 0x1000 * i) 502 | 503 | @staticmethod 504 | def structure_size(buf, offset, parent): 505 | # TODO: don't hardcode things 506 | return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(buf, offset) 507 | 508 | def __len__(self): 509 | # TODO: don't hardcode things 510 | return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(self._buf, self._offset) 511 | 512 | 513 | class IndexEntry(Block): 514 | def __init__(self, buf, offset, parent): 515 | super(IndexEntry, self).__init__(buf, offset) 516 | self.declare_field("qword", "mft_reference", 0x0) 517 | self.declare_field("word", "length") 518 | self.declare_field("word", "filename_information_length") 519 | self.declare_field("dword", "flags") 520 | self.declare_field("binary", "filename_information_buffer", \ 521 | self.current_field_offset(), 522 | self.filename_information_length()) 523 | self.declare_field("qword", "child_vcn", 524 | BinaryParser.align(self.current_field_offset(), 0x8)) 525 | 526 | def filename_information(self): 527 | return FilenameAttribute(self._buf, 528 | self.offset() + self._off_filename_information_buffer, 529 | self) 530 | 531 | 532 | class StandardInformationFieldDoesNotExist(Exception): 533 | def __init__(self, msg): 534 | self._msg = msg 535 | 536 | def __str__(self): 537 | return "Standard Information attribute field does not exist: %s" % (self._msg) 538 | 539 | 540 | class StandardInformation(Block): 541 | # TODO(wb): implement sizing so we can make this nestable 542 | def __init__(self, buf, offset, parent): 543 | super(StandardInformation, self).__init__(buf, offset) 544 | self.declare_field("filetime", "created_time", 0x0) 545 | self.declare_field("filetime", "modified_time") 546 | self.declare_field("filetime", "changed_time") 547 | self.declare_field("filetime", "accessed_time") 548 | self.declare_field("dword", "attributes") 549 | self.declare_field("binary", "reserved", self.current_field_offset(), 0xC) 550 | # self.declare_field("dword", "owner_id", 0x30) # Win2k+, NTFS 3.x 551 | # self.declare_field("dword", "security_id") # Win2k+, NTFS 3.x 552 | # self.declare_field("qword", "quota_charged") # Win2k+, NTFS 3.x 553 | # self.declare_field("qword", "usn") # Win2k+, NTFS 3.x 554 | 555 | # Can't implement this unless we know the NTFS version in use 556 | # @staticmethod 557 | # def structure_size(buf, offset, parent): 558 | # return 0x42 + (read_byte(buf, offset + 0x40) * 2) 559 | 560 | # Can't implement this unless we know the NTFS version in use 561 | # def __len__(self): 562 | # return 0x42 + (self.filename_length() * 2) 563 | 564 | def owner_id(self): 565 | """ 566 | This is an explicit method because it may not exist in OSes under Win2k 567 | 568 | @raises StandardInformationFieldDoesNotExist 569 | """ 570 | try: 571 | return self.unpack_dword(0x30) 572 | except BinaryParser.OverrunBufferException: 573 | raise StandardInformationFieldDoesNotExist("Owner ID") 574 | 575 | def security_id(self): 576 | """ 577 | This is an explicit method because it may not exist in OSes under Win2k 578 | 579 | @raises StandardInformationFieldDoesNotExist 580 | """ 581 | try: 582 | return self.unpack_dword(0x34) 583 | except BinaryParser.OverrunBufferException: 584 | raise StandardInformationFieldDoesNotExist("Security ID") 585 | 586 | def quota_charged(self): 587 | """ 588 | This is an explicit method because it may not exist in OSes under Win2k 589 | 590 | @raises StandardInformationFieldDoesNotExist 591 | """ 592 | try: 593 | return self.unpack_dword(0x38) 594 | except BinaryParser.OverrunBufferException: 595 | raise StandardInformationFieldDoesNotExist("Quota Charged") 596 | 597 | def usn(self): 598 | """ 599 | This is an explicit method because it may not exist in OSes under Win2k 600 | 601 | @raises StandardInformationFieldDoesNotExist 602 | """ 603 | try: 604 | return self.unpack_dword(0x40) 605 | except BinaryParser.OverrunBufferException: 606 | raise StandardInformationFieldDoesNotExist("USN") 607 | 608 | 609 | class Attribute_List(Block, Nestable): 610 | def __init__(self, buf, offset, size, logger): 611 | self.__list = [] 612 | csize = 0 613 | while csize < size: 614 | lEntry = Attribute_List_Entry(buf[csize:], 0, logger) 615 | self.__list.append(lEntry) 616 | csize += lEntry.record_length() 617 | 618 | def get(self): 619 | return self.__list 620 | 621 | 622 | class Attribute_List_Entry(Block, Nestable): 623 | def __init__(self, buf, offset, logger): 624 | super(Attribute_List_Entry, self).__init__(buf, offset) 625 | self.declare_field("dword", "type", 0x0) 626 | self.declare_field("word", "record_length", 0x4) 627 | self.declare_field("byte", "nameLength", 0x6) 628 | self.declare_field("byte", "offsetToName", 0x7) 629 | self.declare_field("qword", "startVCN", 0x8) 630 | self.declare_field("qword", "baseFileReference", 0x10) 631 | self.declare_field("word", "attributeID", 0x18) 632 | self.declare_field("wstring", "name", 0x1a, 2 * self.nameLength()) 633 | 634 | def __len__(self): 635 | return self.size() 636 | 637 | 638 | class FilenameAttribute(Block, Nestable): 639 | def __init__(self, buf, offset, parent): 640 | super(FilenameAttribute, self).__init__(buf, offset) 641 | self.declare_field("qword", "mft_parent_reference", 0x0) 642 | self.declare_field("filetime", "created_time") 643 | self.declare_field("filetime", "modified_time") 644 | self.declare_field("filetime", "changed_time") 645 | self.declare_field("filetime", "accessed_time") 646 | self.declare_field("qword", "physical_size") 647 | self.declare_field("qword", "logical_size") 648 | self.declare_field("dword", "flags") 649 | self.declare_field("dword", "reparse_value") 650 | self.declare_field("byte", "filename_length") 651 | self.declare_field("byte", "filename_type") 652 | self.declare_field("wstring", "filename", 0x42, self.filename_length()) 653 | 654 | @staticmethod 655 | def structure_size(buf, offset, parent): 656 | return 0x42 + (BinaryParser.read_byte(buf, offset + 0x40) * 2) 657 | 658 | def __len__(self): 659 | return 0x42 + (self.filename_length() * 2) 660 | 661 | 662 | class SlackIndexEntry(IndexEntry): 663 | def __init__(self, buf, offset, parent): 664 | """ 665 | Constructor. 666 | Arguments: 667 | - `buf`: Byte string containing NTFS INDX file 668 | - `offset`: The offset into the buffer at which the block starts. 669 | - `parent`: The parent NTATTR_STANDARD_INDEX_HEADER block, 670 | which links to this block. 671 | """ 672 | super(SlackIndexEntry, self).__init__(buf, offset, parent) 673 | 674 | def is_valid(self): 675 | # this is a bit of a mess, but it should work 676 | recent_date = datetime(1990, 1, 1, 0, 0, 0) 677 | future_date = datetime(2025, 1, 1, 0, 0, 0) 678 | try: 679 | fn = self.filename_information() 680 | except: 681 | return False 682 | if not fn: 683 | return False 684 | try: 685 | return fn.modified_time() > recent_date and \ 686 | fn.accessed_time() > recent_date and \ 687 | fn.changed_time() > recent_date and \ 688 | fn.created_time() > recent_date and \ 689 | fn.modified_time() < future_date and \ 690 | fn.accessed_time() < future_date and \ 691 | fn.changed_time() < future_date and \ 692 | fn.created_time() < future_date 693 | except ValueError: 694 | return False 695 | 696 | 697 | class Runentry(Block, Nestable): 698 | def __init__(self, buf, offset, parent): 699 | super(Runentry, self).__init__(buf, offset) 700 | self.declare_field("byte", "header") 701 | self._offset_length = self.header() >> 4 702 | self._length_length = self.header() & 0x0F 703 | self.declare_field("binary", 704 | "length_binary", 705 | self.current_field_offset(), self._length_length) 706 | self.declare_field("binary", 707 | "offset_binary", 708 | self.current_field_offset(), self._offset_length) 709 | 710 | @staticmethod 711 | def structure_size(buf, offset, parent): 712 | b = BinaryParser.read_byte(buf, offset) 713 | return (b >> 4) + (b & 0x0F) + 1 714 | 715 | def __len__(self): 716 | return 0x1 + (self._length_length + self._offset_length) 717 | 718 | def is_valid(self): 719 | return self._offset_length > 0 and self._length_length > 0 720 | 721 | def is_sparsed(self): 722 | return self._offset_length == 0 723 | 724 | def lsb2num(self, binary): 725 | count = 0 726 | ret = 0 727 | for b in binary: 728 | ret += ord(b) << (8 * count) 729 | count += 1 730 | return ret 731 | 732 | def lsb2signednum(self, binary): 733 | count = 0 734 | ret = 0 735 | working = [] 736 | 737 | is_negative = (ord(binary[-1]) & (1 << 7) != 0) 738 | if is_negative: 739 | working = [ord(b) ^ 0xFF for b in binary] 740 | else: 741 | working = [ord(b) for b in binary] 742 | for b in working: 743 | ret += b << (8 * count) 744 | count += 1 745 | if is_negative: 746 | ret += 1 747 | ret *= -1 748 | return ret 749 | 750 | def offset(self): 751 | # TODO(wb): make this run_offset 752 | if self.offset_binary() == "": 753 | return 0 754 | return self.lsb2signednum(self.offset_binary()) 755 | 756 | def length(self): 757 | # TODO(wb): make this run_offset 758 | return self.lsb2num(self.length_binary()) 759 | 760 | 761 | class Runlist(Block): 762 | def __init__(self, buf, offset, parent): 763 | super(Runlist, self).__init__(buf, offset) 764 | 765 | @staticmethod 766 | def structure_size(buf, offset, parent): 767 | length = 0 768 | while True: 769 | b = BinaryParser.read_byte(buf, offset + length) 770 | length += 1 771 | if b == 0: 772 | return length 773 | 774 | length += (b >> 4) + (b & 0x0F) 775 | 776 | def __len__(self): 777 | return sum(map(len, self._entries())) 778 | 779 | def _entries(self, length=None): 780 | ret = [] 781 | offset = self.offset() 782 | entry = Runentry(self._buf, offset, self) 783 | 784 | while entry.header() != 0 and \ 785 | (not length or offset < self.offset() + length) and \ 786 | (entry.is_valid() or entry.is_sparsed()): 787 | ret.append(entry) 788 | offset += len(entry) 789 | entry = Runentry(self._buf, offset, self) 790 | return ret 791 | 792 | def runs(self, length=None): 793 | """ 794 | Yields tuples (volume offset, length). 795 | Recall that the entries are relative to one another 796 | """ 797 | last_offset = 0 798 | for e in self._entries(length=length): 799 | current_offset = 0 800 | if not e.offset() == 0: 801 | current_offset = last_offset + e.offset() 802 | current_length = e.length() 803 | if not e.offset() == 0: 804 | last_offset = current_offset 805 | yield (current_offset, current_length) 806 | 807 | 808 | class ATTR_TYPE: 809 | STANDARD_INFORMATION = 0x10 810 | ATTRIBUTE_LIST = 0x20 811 | FILENAME_INFORMATION = 0x30 812 | DATA = 0x80 813 | INDEX_ROOT = 0x90 814 | INDEX_ALLOCATION = 0xA0 815 | UTILITY_STREAM = 0x100 816 | 817 | 818 | class Attribute(Block, Nestable): 819 | TYPES = { 820 | 16: "$STANDARD INFORMATION", 821 | 32: "$ATTRIBUTE LIST", 822 | 48: "$FILENAME INFORMATION", 823 | 64: "$OBJECT ID/$VOLUME VERSION", 824 | 80: "$SECURITY DESCRIPTOR", 825 | 96: "$VOLUME NAME", 826 | 112: "$VOLUME INFORMATION", 827 | 128: "$DATA", 828 | 144: "$INDEX ROOT", 829 | 160: "$INDEX ALLOCATION", 830 | 176: "$BITMAP", 831 | 192: "$SYMBOLIC LINK", 832 | 208: "$REPARSE POINT/$EA INFORMATION", 833 | 224: "$EA", 834 | 256: "$LOGGED UTILITY STREAM", 835 | } 836 | 837 | FLAGS = { 838 | 0x01: "readonly", 839 | 0x02: "hidden", 840 | 0x04: "system", 841 | 0x08: "unused-dos", 842 | 0x10: "directory-dos", 843 | 0x20: "archive", 844 | 0x40: "device", 845 | 0x80: "normal", 846 | 0x100: "temporary", 847 | 0x200: "sparse", 848 | 0x400: "reparse-point", 849 | 0x800: "compressed", 850 | 0x1000: "offline", 851 | 0x2000: "not-indexed", 852 | 0x4000: "encrypted", 853 | 0x10000000: "has-indx", 854 | 0x20000000: "has-view-index", 855 | } 856 | 857 | def __init__(self, buf, offset, parent): 858 | super(Attribute, self).__init__(buf, offset) 859 | self.declare_field("dword", "type") 860 | self.declare_field("dword", "size") # this value must rounded up to 0x8 byte alignment 861 | self.declare_field("byte", "non_resident") 862 | self.declare_field("byte", "name_length") 863 | self.declare_field("word", "name_offset") 864 | self.declare_field("word", "flags") 865 | self.declare_field("word", "instance") 866 | if self.non_resident() > 0: 867 | self.declare_field("qword", "lowest_vcn", 0x10) 868 | self.declare_field("qword", "highest_vcn") 869 | self.declare_field("word", "runlist_offset") 870 | self.declare_field("byte", "compression_unit") 871 | self.declare_field("byte", "reserved1") 872 | self.declare_field("byte", "reserved2") 873 | self.declare_field("byte", "reserved3") 874 | self.declare_field("byte", "reserved4") 875 | self.declare_field("byte", "reserved5") 876 | self.declare_field("qword", "allocated_size") 877 | self.declare_field("qword", "data_size") 878 | self.declare_field("qword", "initialized_size") 879 | self.declare_field("qword", "compressed_size") 880 | else: 881 | self.declare_field("dword", "value_length", 0x10) 882 | self.declare_field("word", "value_offset") 883 | self.declare_field("byte", "value_flags") 884 | self.declare_field("byte", "reserved") 885 | self.declare_field("binary", "value", 886 | self.value_offset(), self.value_length()) 887 | 888 | @staticmethod 889 | def structure_size(buf, offset, parent): 890 | s = BinaryParser.read_dword(buf, offset + 0x4) 891 | return s + (8 - (s % 8)) 892 | 893 | def __len__(self): 894 | return self.size() 895 | 896 | def __str__(self): 897 | return "%s" % (Attribute.TYPES[self.type()]) 898 | 899 | def runlist(self): 900 | return Runlist(self._buf, self.offset() + self.runlist_offset(), self) 901 | 902 | def size(self): 903 | s = self.unpack_dword(self._off_size) 904 | return s + (8 - (s % 8)) 905 | 906 | def name(self): 907 | return self.unpack_wstring(self.name_offset(), self.name_length()) 908 | 909 | 910 | class MFT_RECORD_FLAGS: 911 | MFT_RECORD_IN_USE = 0x1 912 | MFT_RECORD_IS_DIRECTORY = 0x2 913 | 914 | 915 | def MREF(mft_reference): 916 | """ 917 | Given a MREF/mft_reference, return the record number part. 918 | """ 919 | return mft_reference & 0xFFFFFFFFFFFF 920 | 921 | 922 | def MSEQNO(mft_reference): 923 | """ 924 | Given a MREF/mft_reference, return the sequence number part. 925 | """ 926 | return (mft_reference >> 48) & 0xFFFF 927 | 928 | 929 | class AttributeNotFoundError(Exception): 930 | pass 931 | 932 | 933 | class MFTRecord(FixupBlock): 934 | def __init__(self, buf, offset, parent, inode=None): 935 | super(MFTRecord, self).__init__(buf, offset, parent) 936 | 937 | # 0x0 File or BAAD 938 | self.declare_field("dword", "magic") 939 | # 0x04 Offset to fixup array 940 | self.declare_field("word", "usa_offset") 941 | # 0x06 Number of entries in fixup array 942 | self.declare_field("word", "usa_count") 943 | # 0x08 $LogFile sequence number 944 | self.declare_field("qword", "lsn") 945 | # 0x10 Sequence value 946 | self.declare_field("word", "sequence_number") 947 | # 0x12 Link Count 948 | self.declare_field("word", "link_count") 949 | # 0x14 Offset of first attribute 950 | self.declare_field("word", "attrs_offset") 951 | # 0x16 Flags: 952 | # 0x00 - not in use 953 | # 0x01 - in use 954 | # 0x02 - directory 955 | # 0x03 - directory in use 956 | self.declare_field("word", "flags") 957 | 958 | # 0x18 Used size of MFT entry 959 | self.declare_field("dword", "bytes_in_use") 960 | # 0x1c Allocated size of MFT entry 961 | self.declare_field("dword", "bytes_allocated") 962 | # 0x20 File reference to base record 963 | self.declare_field("qword", "base_mft_record") 964 | # 0x28 Nex attribute identifier 965 | self.declare_field("word", "next_attr_instance") 966 | 967 | # Attributes and fixup values 968 | # 0x2a 969 | self.declare_field("word", "reserved") 970 | # 0x2c 971 | self.declare_field("dword", "mft_record_number") 972 | 973 | self.inode = inode or self.mft_record_number() 974 | # print self.sequence_number() 975 | # print self.usa_offset() 976 | self.fixup(self.usa_count(), self.usa_offset()) 977 | 978 | def attributes(self): 979 | offset = self.attrs_offset() 980 | right_border = self.offset() + self.bytes_in_use() 981 | 982 | while (self.unpack_dword(offset) != 0 and 983 | self.unpack_dword(offset) != 0xFFFFFFFF and 984 | offset + self.unpack_dword(offset + 4) <= right_border): 985 | a = Attribute(self._buf, offset, self) 986 | offset += len(a) 987 | yield a 988 | 989 | def attribute(self, attr_type): 990 | for a in self.attributes(): 991 | if a.type() == attr_type: 992 | return a 993 | raise AttributeNotFoundError() 994 | 995 | def is_directory(self): 996 | return (self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IS_DIRECTORY) == 2 997 | 998 | def is_active(self): 999 | return self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IN_USE 1000 | 1001 | # this a required resident attribute 1002 | def filename_informations(self): 1003 | """ 1004 | MFT Records may have more than one FN info attribute, 1005 | each with a different type of filename (8.3, POSIX, etc.) 1006 | 1007 | This function returns all of the these attributes. 1008 | """ 1009 | ret = [] 1010 | for a in self.attributes(): 1011 | if a.type() == ATTR_TYPE.FILENAME_INFORMATION: 1012 | try: 1013 | value = a.value() 1014 | check = FilenameAttribute(value, 0, self) 1015 | ret.append(check) 1016 | except Exception: 1017 | pass 1018 | return ret 1019 | 1020 | # this a required resident attribute 1021 | def filename_information(self): 1022 | """ 1023 | MFT Records may have more than one FN info attribute, 1024 | each with a different type of filename (8.3, POSIX, etc.) 1025 | 1026 | This function returns the attribute with the most complete name, 1027 | that is, it tends towards Win32, then POSIX, and then 8.3. 1028 | """ 1029 | fn = None 1030 | for check in self.filename_informations(): 1031 | try: 1032 | if check.filename_type() == 0x0001 or \ 1033 | check.filename_type() == 0x0003: 1034 | return check 1035 | fn = check 1036 | except Exception: 1037 | pass 1038 | return fn 1039 | 1040 | # this a required resident attribute 1041 | def standard_information(self): 1042 | try: 1043 | attr = self.attribute(ATTR_TYPE.STANDARD_INFORMATION) 1044 | return StandardInformation(attr.value(), 0, self) 1045 | except AttributeError: 1046 | return None 1047 | 1048 | def data_attribute(self): 1049 | """ 1050 | Returns None if the default $DATA attribute does not exist 1051 | """ 1052 | for attr in self.attributes(): 1053 | if attr.type() == ATTR_TYPE.DATA and attr.name() == "": 1054 | return attr 1055 | 1056 | def slack_data(self): 1057 | """ 1058 | Returns A binary string containing the MFT record slack. 1059 | """ 1060 | return self._buf[self.offset() + self.bytes_in_use():self.offset() + 1024].tostring() 1061 | 1062 | def active_data(self): 1063 | """ 1064 | Returns A binary string containing the MFT record slack. 1065 | """ 1066 | return self._buf[self.offset():self.offset() + self.bytes_in_use()].tostring() 1067 | 1068 | 1069 | class InvalidAttributeException(INDXException): 1070 | def __init__(self, value): 1071 | super(InvalidAttributeException, self).__init__(value) 1072 | 1073 | def __str__(self): 1074 | return "Invalid attribute Exception(%s)" % (self._value) 1075 | 1076 | 1077 | class InvalidMFTRecordNumber(Exception): 1078 | def __init__(self, value): 1079 | self.value = value 1080 | 1081 | 1082 | class MFTOperationNotImplementedError(Exception): 1083 | def __init__(self, msg): 1084 | super(MFTOperationNotImplementedError, self).__init__(msg) 1085 | self._msg = msg 1086 | 1087 | def __str__(self): 1088 | return "MFTOperationNotImplemented(%s)" % (self._msg) 1089 | 1090 | 1091 | class InvalidRecordException(Exception): 1092 | def __init__(self, msg): 1093 | super(InvalidRecordException, self).__init__(msg) 1094 | self._msg = msg 1095 | 1096 | def __str__(self): 1097 | return "InvalidRecordException(%s)" % (self._msg) 1098 | -------------------------------------------------------------------------------- /TScopy/tscopy.py: -------------------------------------------------------------------------------- 1 | """ 2 | This project is based off the work from the following projects: 3 | * https://github.com/williballenthin/python-ntfs 4 | * https://github.com/jschicht/RawCopy 5 | """ 6 | # TODO: Will have issues with non ascii characters in files names 7 | # TODO: Currently only processes '\\.\' where RawCopy supported other formats 8 | import sys 9 | import os 10 | import re 11 | import pickle 12 | import traceback 13 | 14 | from math import ceil 15 | from BinaryParser import hex_dump, Block 16 | from MFT import INDXException, MFTRecord, ATTR_TYPE, Attribute_List 17 | from MFT import INDEX_ROOT 18 | 19 | if os.name == "nt": 20 | try: 21 | import win32file, win32api, win32con 22 | except: 23 | print "Must have pywin32 installed -- pip install pywin32" 24 | sys.exit(1) 25 | 26 | #################################################################################### 27 | # BootSector structure 28 | # https://flatcap.org/linux-ntfs/ntfs/files/boot.html 29 | #################################################################################### 30 | class BootSector(Block): 31 | def __init__(self, buf, offset, logger): 32 | super(BootSector, self).__init__(buf, offset) 33 | self.declare_field("qword", "system_id", 0x3) 34 | self.declare_field("word", "bytes_per_sector", 0x0b) 35 | self.declare_field("byte", "sectors_per_cluster", 0xd) 36 | self.declare_field("word", "reserved_sectors", 0xe) 37 | self.declare_field("byte", "media_desc", 0x15) 38 | self.declare_field("word", "sectors_per_track", 0x18) 39 | self.declare_field("word", "heads", 0x1a) 40 | self.declare_field("dword", "hidden_sectors", 0x1c) 41 | self.declare_field("qword", "total_sectors", 0x28) 42 | self.declare_field("qword", "start_c_mft", 0x30) 43 | self.declare_field("qword", "start_c_mftmir", 0x38) 44 | self.declare_field("byte", "file_rec_indicator", 0x40) 45 | self.declare_field("byte", "idx_buf_size_indicator", 0x44) 46 | self.declare_field("qword", "serial_number", 0x48) 47 | self.bytes_per_cluster = self.bytes_per_sector() * self.sectors_per_cluster() 48 | #COPIED FROM RAWCOPY:: A really lame fix for a rare bug seen in certain Windows 7 x64 vm's 49 | if self.file_rec_indicator() > 127: 50 | testval = 256 - self.file_rec_indicator() 51 | self.mft_record_size = 2 52 | for i in range(testval-1): 53 | self.mft_record_size *= 2 54 | else: 55 | self.mft_record_size = self.bytes_per_cluster * self.file_rec_indicator() 56 | 57 | self.sectors_per_mft_record = self.mft_record_size / self.bytes_per_sector() 58 | self.cluster_per_file_record_segment = int(ceil(float(self.mft_record_size) / self.bytes_per_cluster)) 59 | 60 | 61 | #################################################################################### 62 | # NTFS INDX Record structure 63 | # https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html 64 | #################################################################################### 65 | class INDX( Block ): 66 | def __init__(self, buf, offset ): 67 | super(INDX, self).__init__(buf, offset) 68 | self.declare_field("dword", "magic", 0x0) 69 | self.declare_field("word", "update_seq_offset", 0x4) 70 | self.declare_field("word", "update_seq_sz", 0x6) 71 | self.declare_field("qword", "logfile_seq_num", 0x8) 72 | self.declare_field("qword", "VCN_INDX", 0x10) 73 | self.declare_field("dword", "index_entries_offset", 0x18) 74 | self.declare_field("dword", "index_entries_sz", 0x1c) 75 | self.declare_field("dword", "alloc_sz", 0x20) 76 | self.declare_field("byte", "leaf_node", 0x24) 77 | self.declare_field("word", "update_seq", 0x28) 78 | s = self.update_seq_sz() 79 | 80 | def update_seq_arr( self, idx_buf ): 81 | # TODO: Clean this up into a for loop 82 | seq_arr = idx_buf[self.update_seq_offset()+2:self.update_seq_offset()+2+self.update_seq_sz()*2] 83 | ret = idx_buf[0x0000:0x01fe] + seq_arr[0x00:0x2] 84 | ret += idx_buf[0x0200:0x03fe] + seq_arr[0x02:0x4] 85 | ret += idx_buf[0x0400:0x05fe] + seq_arr[0x04:0x6] 86 | ret += idx_buf[0x0600:0x07fe] + seq_arr[0x06:0x8] 87 | ret += idx_buf[0x0800:0x09fe] + seq_arr[0x08:0xa] 88 | ret += idx_buf[0x0a00:0x0bfe] + seq_arr[0x0a:0xc] 89 | ret += idx_buf[0x0c00:0x0dfe] + seq_arr[0x0c:0xe] 90 | ret += idx_buf[0x0e00:0x0ffe] + seq_arr[0x0e:0x10] 91 | ret += idx_buf[0x1000: ] 92 | return ret 93 | 94 | #################################################################################### 95 | # NTFS INDX Entry Structure 96 | # https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html 97 | #################################################################################### 98 | class INDX_ENTRY( Block ): 99 | def __init__(self, buf, offset): 100 | super(INDX_ENTRY, self).__init__(buf, offset) 101 | self.declare_field("qword", "mft_recordnum", 0) 102 | self.declare_field("word", "entry_sz", 0x08 ) 103 | if self.entry_sz() == 0x18 and self.mft_recordnum() == 0: 104 | raise INDXException("End of INDX File found") 105 | if self.entry_sz() == 0x10 and self.mft_recordnum() == 0: 106 | raise INDXException("End of INDX File found") 107 | if self.entry_sz() == 0x00 and self.mft_recordnum() == 0: 108 | raise INDXException("NULLS INDX File found") 109 | self.declare_field("word", "filename_offset", 0x0a ) 110 | self.declare_field("word", "index_flags", 0x0c ) 111 | self.declare_field("qword", "mft_parent_recordnum", 0x10 ) 112 | self.declare_field("qword", "alloc_sz", 0x38 ) 113 | self.declare_field("qword", "file_sz", 0x40 ) 114 | self.declare_field("qword", "file_flags", 0x48 ) 115 | self.declare_field("byte", "filename_sz", 0x50 ) 116 | self.declare_field("binary", "filename", 0x52, self.filename_sz()*2 ) 117 | 118 | 119 | #################################################################################### 120 | # The main class of TScopy. 121 | # * Is a singleton instance 122 | # * Example usage 123 | # config = {'outputbasedir':dst, 'pickledir':dir,'logger':log,'debug':False,'ignore_table':False} 124 | # tscopy = TScopy() 125 | # tscopy.setConfiguration( config ) 126 | # tscopy.copy( src, dst ) 127 | # 128 | # * Config key descriptions 129 | # - outputbasedir : The FULL PATH of directory where the files will be copied too. 130 | # - pickledir : The FULL PATH of directory where the pickle file will be created or used. 131 | # - logger : A preconfigured instance of the python Logger class. 132 | # - debug : Not used 133 | # - ignore_table: 134 | # * True = Rebuilds the MFT table from the root node and does not save the table at the end of the run 135 | # * False = Uses a previous mft.pickle file if found. Saves the file after every copy. 136 | #################################################################################### 137 | class TScopy( object ): 138 | _instance = None 139 | def __new__( cls ): 140 | if cls._instance == None: 141 | cls._instance = super(TScopy, cls).__new__(cls) 142 | cls.__isConfigured = False 143 | cls.__pickle_filename = "mft.pickle" 144 | cls.config = { 'files': None, 145 | 'pickledir': None, 146 | 'logger': None, 147 | 'debug': True, 148 | 'ignore_table':False, 149 | } 150 | cls.__useWin32 = False 151 | return cls._instance 152 | 153 | #################################################################################### 154 | # isConfigured: Verifies that the object has been configured at least once 155 | #################################################################################### 156 | def isConfigured( self ): 157 | return self.__isConfigured 158 | 159 | #################################################################################### 160 | # setConfiguration: Parses the config dictionary to set the values for debug, logger, 161 | # lookuptable and the picke directory 162 | #################################################################################### 163 | def setConfiguration( self, config ): 164 | if self.__isConfigured == True: 165 | return 166 | self.__MFT_lookup_table = None 167 | self.__isConfigured = True 168 | self.setDebug( config['debug'] ) 169 | self.setLogger( config['logger'] ) 170 | self.setLookupTable( config['ignore_table'] ) 171 | self.setPickleDir( config['pickledir'] ) 172 | 173 | 174 | #################################################################################### 175 | # SetLogger: Sets the class object logger variable 176 | # Needs to be preconfigured 177 | #################################################################################### 178 | def setLogger( self, logger ): 179 | if logger == None: 180 | raise Exception( "TSCOPY", "Invalid Logger") 181 | self.config['logger'] = logger 182 | 183 | #################################################################################### 184 | # setDebug: Sets the class object debugger variable 185 | #################################################################################### 186 | def setDebug( self, debug ): 187 | self.config['debug'] = debug 188 | 189 | #################################################################################### 190 | # setLookuptable: Sets the class object ignore_table. 191 | #################################################################################### 192 | def setLookupTable( self, tf ): 193 | self.config['ignore_table'] = tf 194 | 195 | #################################################################################### 196 | # setPickleDir: Sets the output directory to save the mft.pickle file too 197 | #################################################################################### 198 | def setPickleDir( self, directory ): 199 | if not directory == None and not os.path.isdir( directory ): 200 | self.config['logger'].error("Error pickle destination (%s) not found" % directory) 201 | raise Exception( "TSCOPY", "Error pickle destination (%s) not found" % directory) 202 | self.__pickle_fullpath = '%s%s%s' % ( directory, os.sep, self.__pickle_filename ) 203 | self.__MFT_lookup_table = self.__getLookupTableFromDisk( "c" ) 204 | 205 | #################################################################################### 206 | # __getLookupTableFromDisk: Checks the mft.pickle file. 207 | # If it exists then it loads into memory. 208 | # If it does not exists then it creates a new basic structure 209 | #################################################################################### 210 | def __getLookupTableFromDisk( self, drive_letter ): 211 | if not os.path.isfile( self.__pickle_fullpath): 212 | return {drive_letter:{5:{'seq_num': 5, 'name':'','children':{}}}} 213 | try: 214 | self.config['logger'].debug("Using Pickle file: %s " % self.__pickle_fullpath) 215 | with open( self.__pickle_fullpath, 'rb') as fd: 216 | return pickle.loads( fd.read() ) 217 | except: 218 | raise Exception( "TSCOPY", "FAILED to parse pickle file %s" % self.__pickle_fullpath ) 219 | 220 | #################################################################################### 221 | # __saveLookuptable: Write the lookup table from memory to disk. 222 | # Overwrites previous copy if it exists. 223 | #################################################################################### 224 | def __saveLookuptable( self, lookup_table ): 225 | with open(self.__pickle_fullpath, 'wb') as fd: 226 | fd.write( pickle.dumps( lookup_table )) 227 | 228 | #################################################################################### 229 | # __getMFT: Gets the root record of the MFT 230 | #################################################################################### 231 | def __getMFT( self, index=0 ): 232 | fd = self.config['fd'] 233 | bss = self.config['bss'] 234 | mft_offset = bss.bytes_per_sector() * bss.sectors_per_cluster() * bss.start_c_mft() 235 | if self.__useWin32 == False: 236 | mft_offset = 0x400 237 | # win32file.SetFilePointer( fd, mft_offset+(index*bss.mft_record_size ), win32file.FILE_BEGIN) 238 | # buf = win32file.ReadFile( fd, bss.mft_record_size )[1] 239 | buf, buf_sz = self.__read( fd, mft_offset+(index*bss.mft_record_size ), bss.mft_record_size ) 240 | record = MFTRecord(buf, 0, None) 241 | ret = {} 242 | 243 | attribute = record.data_attribute() 244 | cnt = 0 245 | for offset, length in attribute.runlist().runs(): 246 | if length > 16 and (length%16) > 0: 247 | if offset == 0: 248 | # may be sparse section at end of Compression Signature 249 | ret[cnt] = (offset, length%16) 250 | length -= length%16 251 | cnt += 1 252 | else: 253 | #may be compressed data section at start of Compression Signature 254 | ret[cnt] = (offset, length-length%16) 255 | offset += length-length%16 256 | length = length%16 257 | cnt += 1 258 | #just normal or sparse data 259 | ret[cnt] = (offset, length) 260 | cnt += 1 261 | 262 | return ret 263 | 264 | #################################################################################### 265 | # __GenRefArray: Iterates through the seq_num 5 datadruns 266 | #################################################################################### 267 | def __GenRefArray( self ): 268 | MFTClustersToKeep = 0 269 | ref = -1 270 | dataruns = self.config['mft_dataruns'] 271 | bytes_per_cluster = self.config['bss'].bytes_per_cluster 272 | ClustersPerFileRecordSegment = self.config['bss'].cluster_per_file_record_segment 273 | split_mft_rec = {} 274 | cnt = 0 275 | for x in dataruns: 276 | r = dataruns[x] 277 | doKeepCluster = MFTClustersToKeep 278 | MFTClustersToKeep = (r[1]+ClustersPerFileRecordSegment - MFTClustersToKeep) % ClustersPerFileRecordSegment 279 | if not MFTClustersToKeep == 0: 280 | MFTClustersToKeep = ClustersPerFileRecordSegment - MFTClustersToKeep 281 | pos = r[0] * bytes_per_cluster 282 | subtr = self.config['bss'].mft_record_size 283 | if MFTClustersToKeep or doKeepCluster: 284 | subtr = 0 285 | end_of_run = r[1] * bytes_per_cluster - subtr 286 | for i in range(0, end_of_run, self.config['bss'].mft_record_size): 287 | if MFTClustersToKeep: 288 | if i >= end_of_run - ((ClustersPerFileRecordSegment - MFTClustersToKeep) * bytes_per_cluster): 289 | bytesToGet = (ClustersPerFileRecordSegment - MFTClustersToKeep) * bytes_per_cluster 290 | split_mft_rec[cnt] = '%d?%d,%d' % (ref+1, pos+i, bytesToGet ) 291 | ref += 1 292 | if i == 0 and doKeepCluster: 293 | bytesToGet = doKeepCluster * bytes_per_cluster 294 | if bytesToGet > self.config['bss'].mft_record_size: 295 | bytesToGet = self.config['bss'].mft_record_size 296 | split_mft_rec[cnt] += '|%d&%d' % ( pos+i, bytesToGet ) 297 | cnt += 1 298 | self.config['split_mft_rec'] = split_mft_rec 299 | 300 | #################################################################################### 301 | # __process_image: TODO 302 | #################################################################################### 303 | def __process_image( self, targetDrive ): 304 | pass 305 | 306 | #################################################################################### 307 | # __search_mft: Iterates through the target files path, populating the table and seq_path 308 | # with each branch of the path as it parses the MFT records. The search ends when 309 | # it fails to find the next item in the target path or the target is identified. 310 | # table: The pointer to the current location into the mft metadata table stored in memory 311 | # tmp_path: The target directory path as a list 312 | # seq_path: A list of the found target dirctory path with mft sequesnce numbers 313 | #################################################################################### 314 | def __search_mft( self, table, tmp_path, seq_path ): 315 | for name in tmp_path: 316 | index = table['seq_num'] 317 | # self.config['logger'].debug('Looking for (%s) MFT_INDEX(%016X)' % (name, index)) 318 | ret = self.__getChildIndex( index ) 319 | # self.config['logger'].debug("childindex = %r" % len(ret) ) 320 | tmp_index = index 321 | for seq_num in ret: 322 | c_index = seq_num & 0xffffffff 323 | c_name = ret[seq_num].lower() 324 | table['children'][c_name] = { 'name':c_name, 'seq_num':c_index, 'children':{}} 325 | if c_name == name.lower(): 326 | index = c_index 327 | seq_path.append( (index, c_name ) ) 328 | table = table['children'][c_name] 329 | break 330 | if tmp_index == index: 331 | # self.config['logger'].info("%s NOT FOUND" % name) 332 | return None, None, None 333 | return table, tmp_path, seq_path 334 | #################################################################################### 335 | # __find_last_known_path: Iterates through the target files path and matches with the 336 | # currently known indexes in the table. Returns as soon as the next path item 337 | # is not found or the end target has been located. 338 | # table: The pointer to the current location into the mft metadata table stored in memory 339 | # tmp_path: The target directory path as a list 340 | # seq_path: A list of the found target dirctory path with mft sequesnce numbers 341 | #################################################################################### 342 | 343 | def __find_last_known_path( self, table, tmp_path, seq_path ): 344 | l_path = tmp_path[:] 345 | for name in l_path: 346 | name = name.lower() 347 | if not name in table['children']: 348 | break 349 | table = table['children'][name] 350 | tmp_path = tmp_path[1:] 351 | seq_path.append( ( table['seq_num'], name )) 352 | return table, tmp_path, seq_path 353 | 354 | #################################################################################### 355 | # __copydir: Copies the entire directory. If bRecursive this function calls itself with 356 | # any child drictories 357 | # fname: fullpath of the dirctory to copy 358 | # index: Sequence number of the MFT record of the parent: 359 | # table: Pointer to the current index in the MFT metadata table 360 | # bRecursive: 361 | # True: When the parents child is a directory __copydir is called recursivly 362 | # False: Does not copy child directories 363 | #################################################################################### 364 | def __copydir( self, fname, index, table, bRecursive=False): 365 | self.config['logger'].debug('fname(%r) index(%r)' % (fname, index) ) 366 | table = self.__copydirfiles( fname, index, table ) 367 | 368 | if bRecursive == True: 369 | for dirs in table['children']: 370 | l_table = table['children'][dirs] 371 | c_index = l_table['seq_num'] 372 | buf, buf_sz = self.__calcOffset( c_index ) 373 | if buf == None or buf_sz == 0: 374 | raise Exception("Failed to process mft_offset") 375 | record = MFTRecord(buf, 0, None) 376 | if record.is_directory(): 377 | self.config['logger'].debug( "Next Directory %r %r %r" % (c_index, dirs, fname)) 378 | self.config['current_file'] = fname[2:] 379 | self.__copydir( os.path.join(fname,dirs), c_index, l_table, bRecursive=True ) 380 | 381 | #################################################################################### 382 | # __copydirfiles: Wraps __getFile and copies all the files under the current directory 383 | # fname: fullpath of the dirctory to copy 384 | # index: Sequence number of the MFT record of the parent: 385 | # table: Pointer to the current index in the MFT metadata table 386 | #################################################################################### 387 | def __copydirfiles( self, fname, index, table ): 388 | self.config['logger'].debug( "copydirfiles \n\tfname:\t%r\n\tindex:\t%r\n\ttable %r" % (fname,index,table)) 389 | if table['children'] == {}: 390 | ret = self.__getChildIndex( index ) 391 | self.config['logger'].debug( "\tchildren: %r" % len(ret)) 392 | for seq_num in ret: 393 | c_index = seq_num & 0xffffffff 394 | c_name = ret[seq_num].lower() 395 | table['children'][c_name] = { 'name':c_name, 'seq_num':c_index, 'children':{}} 396 | 397 | if ret[seq_num].strip() == '' or seq_num == 0: 398 | continue 399 | 400 | tmp_filename = self.config['current_file'] 401 | for name in table['children']: 402 | seq_num = table['children'][name]['seq_num'] 403 | self.config['logger'].debug("\tCopying %s to %s" % (fname+os.sep+name, self.config['outputbasedir']+tmp_filename+os.sep+name)) 404 | 405 | self.config['current_file'] = fname[2:]+os.sep+name # strip the drive letter off the front 406 | if '*' in fname[2:]+os.sep+name: 407 | self.config['current_file'] = tmp_filename+os.sep+name # strip the drive letter off the front 408 | 409 | self.__getFile( [seq_num&0xffffffff, name] ) 410 | return table 411 | 412 | #################################################################################### 413 | # __copyfile: Internal copy function. Used to setup and parse target filename, locate 414 | # previously identified paths in the mft metadata list. and then copy the file/ 415 | # files/ or direcotories 416 | # filename: Full path to the target file/directory or wildcarded to copy 417 | # mft_filename: TODO remove 418 | # bRecursive: 419 | # True: Copy all children from this directory on 420 | # False: Do not copy children 421 | #################################################################################### 422 | def __copyfile( self, filename, mft_filename=None, bRecursive=False ): 423 | if self.__useWin32 == True: 424 | self.config['logger'].debug( 'filename %r' % filename) 425 | if not filename[:4].lower() == '\\\\.\\': 426 | targetDrive = '\\\\.\\'+filename[:2] 427 | else: 428 | targetDrive = filename[:6] 429 | 430 | driveLetter = targetDrive[-2] 431 | self.config['logger'].debug( 'Target Drive %s' % targetDrive) 432 | self.config['logger'].debug( 'DriveLetter %s' % driveLetter) 433 | 434 | self.__process_image( targetDrive ) # TODO process this to determin correct offsets 435 | 436 | if self.config['ignore_table'] == True: 437 | self.__MFT_lookup_table = {driveLetter:{5:{'seq_num':5,'name':'','children':{}}}} 438 | elif not driveLetter in self.__MFT_lookup_table.keys(): 439 | self.__MFT_lookup_table = self.__MFT_lookup_table[driveLetter] = {5:{'seq_num':5,'name':'','children':{}}} 440 | # self.config['logger'].debug( 'Target Drive %s' % driveLetter) 441 | else: 442 | self.__MFT_lookup_table = {"c":{5:{'seq_num':5,'name':'','children':{}}}} 443 | targetDrive = mft_filename 444 | driveLetter = "c" 445 | self.config['logger'].debug( 'Processing the %s MFT file' % targetDrive ) 446 | 447 | self.config['driveLetter'] = driveLetter 448 | fd = self.__open( targetDrive ) 449 | self.config['fd'] = fd 450 | buf, buf_sz = self.__read( fd, 0, 0x200 ) # buf = win32file.ReadFile( fd, 0x200)[1] 451 | self.config['bss'] = BootSector( buf, 0, self.config['logger'] ) 452 | self.config['mft_dataruns'] = self.__getMFT( 0) 453 | self.__GenRefArray() 454 | 455 | fname = filename 456 | index = 5 457 | 458 | try: 459 | # Find the last known directory in the MFT_lookup_table 460 | seq_path = [(index,None)] 461 | tmp_path = fname[3:].split(os.sep) 462 | table = self.__MFT_lookup_table[driveLetter][5] 463 | 464 | expandedWildCards = self.__process_wildcards( filename, table ) 465 | if expandedWildCards == False: 466 | cp_files = [ tmp_path ] 467 | else: 468 | cp_files = expandedWildCards 469 | 470 | 471 | for cp_file in cp_files: 472 | self.config['current_file'] = os.sep.join(cp_file) # strip the drive letter off the front 473 | l_fname = fname[:3] + self.config['current_file'] 474 | self.config['logger'].info("Copying %s to %s" % (l_fname, self.config['outputbasedir']+self.config['current_file'])) 475 | table, tmp_path, seq_path = self.__get_file_mft_seqid( cp_file ) 476 | 477 | # Index was not located exit (error message already logged) 478 | if table == None: 479 | self.config['logger'].error("File Not Found" ) 480 | return 481 | 482 | # Check the mft structure if this is a directory 483 | index = seq_path[-1][0] 484 | buf, buf_sz = self.__calcOffset( index ) 485 | if buf == None or buf_sz == 0: 486 | raise Exception("Failed to process mft_offset") 487 | record = MFTRecord(buf, 0, None) 488 | if record.is_directory(): 489 | self.__copydir( l_fname, index, table, bRecursive=bRecursive ) 490 | else: 491 | self.__getFile( seq_path[-1] ) 492 | except: 493 | self.config['logger'].error(traceback.format_exc()) 494 | finally: 495 | if self.config['ignore_table'] == False: 496 | self.__saveLookuptable( self.__MFT_lookup_table) 497 | 498 | #################################################################################### 499 | # __isSplitMFT: Determines if the MFT record is split 500 | #################################################################################### 501 | def __isSplitMFT( self, array, target_seq_num ): 502 | for ind in array: 503 | i = array[ind] 504 | if not '?' in i: 505 | continue 506 | ind = i.index('?') 507 | testRef = i[0:ind] 508 | if int(testRef) == target_seq_num: 509 | return ind 510 | return None 511 | 512 | #################################################################################### 513 | # __GetChildIndex: Parses the MFT records to find all children of the current sequence ID 514 | # index: Sequence ID or seq_num of the current MFT record to extract and parse 515 | #################################################################################### 516 | def __getChildIndex( self, index ): 517 | fd = self.config['fd'] 518 | bss = self.config['bss'] 519 | bpc = bss.bytes_per_cluster 520 | 521 | buf, buf_sz = self.__calcOffset( index ) 522 | if buf == None or buf_sz == 0: 523 | raise Exception("Failed to process mft_offset") 524 | record = MFTRecord(buf, 0, None) 525 | if not record.is_directory(): 526 | return [] 527 | ret = {} 528 | for attribute in record.attributes(): 529 | if attribute.type() == ATTR_TYPE.INDEX_ROOT: 530 | for entry in INDEX_ROOT(attribute.value(), 0).index().entries(): 531 | refNum = entry.header().mft_reference() & 0xfffffffff 532 | if refNum in ret: 533 | if "~" in ret[refNum]: 534 | ret[refNum] = entry.filename_information().filename() 535 | else: 536 | ret[refNum] = entry.filename_information().filename() 537 | elif attribute.type() == ATTR_TYPE.ATTRIBUTE_LIST: 538 | self.config['logger'].debug("ATTRIBUTE_LIST HAS BEEN FOUND 0x(%08x)!!!!" % index ) 539 | attr_list = Attribute_List(attribute.value(), 0, attribute.value_length(), self.config['logger'] ) 540 | self.config['logger'].debug(hex_dump(attribute.value()[:attribute.value_length()])) 541 | a_list = [] 542 | for entry in attr_list.get(): 543 | if (entry.type() == ATTR_TYPE.INDEX_ROOT or entry.type() == ATTR_TYPE.INDEX_ALLOCATION ) and not (entry.baseFileReference()&0xffffffff) == index: 544 | if not entry.baseFileReference() in a_list: 545 | a_list.append( entry.baseFileReference() & 0xffffffff ) 546 | for next_index in a_list: 547 | # WARNING!!! Recursive 548 | if index == next_index: 549 | self.config['logger'].debug(hex_dump(attribute.value()[:attribute.value_length()])) 550 | # raise Exception("Attribute_list failed to parse.") 551 | continue 552 | rec_children = self. __getChildIndex( next_index ) 553 | self.config['logger'].debug("ATTRIBUTE_LIST index(%d) children (%r) " % (next_index, rec_children) ) 554 | ret.update( rec_children ) 555 | elif attribute.type() == ATTR_TYPE.INDEX_ALLOCATION: 556 | for cluster_offset, length in attribute.runlist().runs(): 557 | offset=cluster_offset*bpc 558 | buf, buf_sz = self.__read( fd, offset, length*bpc) 559 | for cnt in range(length): 560 | idx_buf = buf[cnt*bpc:(cnt+2)*bpc] 561 | ind = INDX( idx_buf, 0 ) 562 | idx_buf = ind.update_seq_arr( idx_buf ) 563 | entry_offset = ind.index_entries_offset()+0x18 564 | i = 0 565 | last_i = i 566 | while i < ind.index_entries_sz() : 567 | try: 568 | entry = INDX_ENTRY( idx_buf, entry_offset ) 569 | refNum = entry.mft_recordnum() & 0xfffffffff 570 | if refNum in ret: 571 | if "~" in ret[refNum]: 572 | ret[refNum] = entry.filename().replace('\x00','') 573 | else: 574 | ret[refNum] = entry.filename().replace('\x00','') 575 | except INDXException: 576 | break 577 | except: 578 | self.config['logger'].error(traceback.format_exc()) 579 | self.config['logger'].debug( 'len(idx_buf (%03x) entry_offset(%03x)' % ( len(idx_buf), entry_offset)) 580 | pass 581 | entry_offset += entry.entry_sz() 582 | 583 | i += entry.entry_sz() 584 | if entry.entry_sz() == 0: 585 | break 586 | return ret 587 | 588 | #################################################################################### 589 | # __calcOffset: Calculates the offset into the drive to locat the specific data 590 | # for the taget sequence Number 591 | # target_seq_num: Sequence ID to copy form the disk 592 | #################################################################################### 593 | def __calcOffset( self, target_seq_num ): 594 | fd = self.config['fd'] 595 | bss = self.config['bss'] 596 | mft_vcn = self.config['mft_dataruns'] 597 | image_offset = 0 # TODO: Change this when finished processing the image 598 | array = self.config['split_mft_rec'] 599 | 600 | # Handle in the case that the object is split accross two dataruns 601 | split = self.__isSplitMFT( array, target_seq_num ) 602 | if not split == None: 603 | # self.config['logger'].debug( 'calcOffset: a split record was detected' ) 604 | item = array[split] 605 | ind = item.index('?') 606 | testRef = item[0:ind] 607 | if not int(testRef) == target_seq_num: 608 | # self.config['logger'].debug("Error: The ref in the array did not match target ref.") 609 | return None 610 | 611 | srecord3 = item[ind+1:] 612 | srecordArr = srecord3.split('|') 613 | if not len( srecordArr ) == 3: 614 | # self.config['logger'].debug("Error: Array contained more elements than expected: %d" % len( srecordArr )) 615 | return None 616 | 617 | record = "" 618 | record_sz = 0 619 | for i in srecordArr: 620 | if not ',' in i: 621 | # self.config['logger'].debug('Split:: Could not find ","') 622 | continue 623 | ind = i.index(',') 624 | srOffset = i[:ind] 625 | srSize = i[ind+1:] 626 | # win32file.SetFilePointer( fd, srOffset + image_offset, win32file.FILE_BEGIN) 627 | # record += win32file.ReadFile( fd, srSize)[1] 628 | buf, buf_sz = self.__read( fd, srOffset + image_offset, srSize ) 629 | record += buf 630 | record_sz += buf_sz 631 | return record, record_sz 632 | else: 633 | counter = 0 634 | offset = 0 635 | recordsdivisor = bss.mft_record_size/512 636 | for indx in mft_vcn: 637 | current_cluster = mft_vcn[indx][1] 638 | offset = mft_vcn[indx][0] 639 | records_in_currentrun = (current_cluster * bss.sectors_per_cluster() ) / recordsdivisor 640 | counter += records_in_currentrun 641 | if counter > target_seq_num: 642 | break 643 | tryat = counter - records_in_currentrun 644 | records_per_cluster = bss.sectors_per_cluster() / recordsdivisor 645 | final = 0 646 | counter2 = 0 647 | record_jmp = 0 648 | while final < target_seq_num: 649 | record_jmp += records_per_cluster 650 | counter2 += 1 651 | final = tryat + record_jmp 652 | records_to_much = final - target_seq_num 653 | 654 | mft_offset = image_offset + offset * bss.bytes_per_cluster + ( counter2 * bss.bytes_per_cluster ) - ( records_to_much * bss.mft_record_size ) 655 | # win32file.SetFilePointer( fd, mft_offset, win32file.FILE_BEGIN) 656 | # return win32file.ReadFile( fd, bss.mft_record_size )[1] 657 | if self.__useWin32 == False: 658 | mft_offset = 0x400 + 0x400*target_seq_num 659 | # self.config['logger'].debug('Split:: mft_offset(%r) record_size(%r)' % ( mft_offset, bss.mft_record_size)) 660 | return self.__read( fd, mft_offset, bss.mft_record_size) 661 | return None 662 | 663 | #################################################################################### 664 | # __parse_attribute_data: Processes the files data sections and combines them to 665 | # create the file. 666 | # attribute: The data attribute from the MFT record 667 | # Returns the dat content 668 | #################################################################################### 669 | def __parse_attribute_data( self, attribute, output_name ): 670 | ret = '' 671 | fd = self.config['fd'] 672 | out_name = output_name 673 | bpc = self.config['bss'].bytes_per_cluster 674 | filename = attribute.name() 675 | # import pdb; pdf.set_trace() 676 | try: 677 | self.config['logger'].debug("Attribute File Name %s" % attribute.name()) 678 | if attribute.name_length() > 0: 679 | out_name += "_ADS_%s" % attribute.name() 680 | fd_out = open(out_name, "wb") 681 | self.config['logger'].debug("non_resident %r" % attribute.non_resident() ) 682 | if attribute.non_resident() == 0: 683 | fd_out.write( attribute.value() ) 684 | else: 685 | cnt = 0 686 | padd = False 687 | for cluster_offset, length in attribute.runlist().runs(): 688 | read_sz = length * bpc 689 | 690 | if cluster_offset == 0: ## Sparsed file segment detected 691 | self.config['logger'].debug("parse_attribute_data:: Sparsed file segment detected length( %08x ) lengthx4096 (%08x)" % ( length, read_sz)) 692 | chunk_sz = 0x1000 693 | chunk = "\x00"*chunk_sz 694 | while cnt < read_sz: 695 | if read_sz-cnt > chunk_sz: 696 | chunk_sz = read_sz-cnt 697 | fd_out.write(chunk[:chunk_sz]) 698 | cnt += chunk_sz 699 | else: 700 | self.config['logger'].debug("GetFile:: cluster_offset( %08x ) length( %08x ) " % ( cluster_offset, length)) 701 | self.config['logger'].debug("readsize %08x cnt %08x init_sz %08x" % ( read_sz, cnt, attribute.initialized_size())) 702 | if read_sz + cnt > attribute.initialized_size(): 703 | read_sz = attribute.initialized_size() - cnt 704 | padd = True 705 | if (read_sz % 0x1000) > 0: 706 | read_sz += 0x1000 - (read_sz%0x1000) 707 | offset=cluster_offset * bpc 708 | 709 | self.config['logger'].debug("readsize %08x cnt %08x init_sz %08x" % ( read_sz, cnt, attribute.initialized_size())) 710 | name = '' 711 | 712 | # Detected ADS 713 | buf, buf_sz = self.__read( fd, offset, read_sz, fd_out ) 714 | 715 | if attribute.data_size() < cnt + read_sz: 716 | read_sz = attribute.data_size()-cnt 717 | cnt += read_sz 718 | 719 | if padd == True: 720 | padd_sz = attribute.data_size() - attribute.initialized_size() 721 | ret += '\x00' * padd_sz 722 | cnt += padd_sz 723 | if cnt > attribute.initialized_size(): 724 | # self.config['logger'].debug("readsize %08x cnt %08x init_sz %08x" % ( read_sz, cnt, attribute.initialized_size())) 725 | break 726 | except: 727 | self.config['logger'].error('Failed to get file %s\n%s' % (filename, traceback.format_exc() )) 728 | 729 | #################################################################################### 730 | # __parse_file_record: Given the sequence ID parse the contents of the file from the 731 | # MFT and return as a string. 732 | # mft_file_seq_id: The sequence ID of the MFT record to return the data from 733 | #################################################################################### 734 | def __parse_file_record( self, mft_file_seq_id, output_name ): 735 | self.config['logger'].debug("parse_fle_record 0x%08x" % mft_file_seq_id) 736 | buf, buf_sz = self.__calcOffset( mft_file_seq_id ) 737 | if buf == None: 738 | raise Exception("Failed to process mft_offset") 739 | 740 | record = MFTRecord(buf, 0, None) 741 | if record.is_directory(): 742 | return None 743 | 744 | ret_val = {} 745 | for attribute in record.attributes(): 746 | self.config['logger'].debug("Parsing Attribute 0x%2x" % attribute.type() ) 747 | if attribute.type() == ATTR_TYPE.ATTRIBUTE_LIST: 748 | file_contents = '' 749 | self.config['logger'].debug("ATTRIBUTE_LIST HAS BEEN FOUND getting the File 0x(%08x)!!!!" % mft_file_seq_id) 750 | attr_list = Attribute_List(attribute.value(), 0, attribute.value_length(), self.config['logger'] ) 751 | a_list = [] 752 | for entry in attr_list.get(): 753 | if entry.type() == ATTR_TYPE.DATA and not (entry.baseFileReference()&0xffffffff) == mft_file_seq_id: 754 | if not entry.baseFileReference() in a_list: 755 | a_list.append( entry.baseFileReference() & 0xffffffff ) 756 | for next_index in a_list: 757 | if mft_file_seq_id == next_index: 758 | continue 759 | # WARNING RECURSION 760 | self.__parse_file_record( next_index, output_name ) 761 | elif attribute.type() == ATTR_TYPE.DATA: 762 | self.__parse_attribute_data( attribute, output_name ) 763 | 764 | #################################################################################### 765 | # __getFile: The required file was identified this function locates all the parts of 766 | # the file and writes them in order to the destination location 767 | # mft_file_object: 768 | #################################################################################### 769 | def __getFile( self, mft_file_object ): 770 | try: 771 | fullpath = self.config['outputbasedir'] + self.config['current_file'] 772 | # self.config['logger'].debug( "GetFile:: fullpath %s" % fullpath ) 773 | # self.config['logger'].debug( "GetFile:: attributes %s" % attribute.get_all_string()) 774 | path = '\\'.join(fullpath.split('\\')[:-1]) 775 | winapi_path = self.__winapi_path(path) 776 | if not os.path.isdir(winapi_path): 777 | os.makedirs(winapi_path) 778 | self.config['logger'].debug("GetFile:: fullpath edit %s" % fullpath) 779 | self.__parse_file_record( mft_file_object[0], self.__winapi_path(fullpath) ) 780 | except: 781 | self.config['logger'].error('Failed to get file %s\n%s' % (mft_file_object[1], traceback.format_exc() )) 782 | 783 | #################################################################################### 784 | # __winapi_path: Convert Filepath to Unicode to bypass win32 filepath length limit of 260 785 | #################################################################################### 786 | def __winapi_path( self, filename, encoding=None ): 787 | if (not isinstance(filename, unicode) and encoding is not None): 788 | filename = filename.decode(encoding) 789 | path = os.path.abspath(filename) 790 | if path.startswith(u"\\\\"): 791 | return u"\\\\?\\UNC\\" + path[2:] 792 | return u"\\\\?\\" + path 793 | 794 | 795 | #################################################################################### 796 | # __open: Wrapper around win32file createfile. 797 | #################################################################################### 798 | def __open( self, filename ): 799 | fd = None 800 | try: 801 | if self.__useWin32 == False: 802 | fd = open(filename, 'rb') 803 | else: 804 | fd = win32file.CreateFile( filename, 805 | win32file.GENERIC_READ, 806 | win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE, 807 | None, 808 | win32con.OPEN_EXISTING, 809 | win32file.FILE_ATTRIBUTE_NORMAL, 810 | None) 811 | except: 812 | self.config['logger'].error( traceback.format_exc()) 813 | return fd 814 | 815 | #################################################################################### 816 | # __read: Wrapper around win32file set file pointer and read contents. 817 | # fd => the handle to the file to be copied 818 | # offset => number of bytes to skip of the file 819 | # read_sz => Number of bytes to read from the file 820 | # fd_output => Default None. If none then read into buffer otherwise 821 | # The handle to the output file 822 | #################################################################################### 823 | def __read( self, fd, offset, read_sz, fd_output=None ): 824 | bytes_read = 0 825 | buf = '' 826 | try: 827 | if self.__useWin32 == False: 828 | fd.seek( offset, 0) 829 | if read_sz > 0x10000000: 830 | read_step = 0x01500000 831 | buf = '' 832 | while bytes_read <= read_sz: 833 | if not fd_output == None: 834 | fd_output.write(fd.read( read_step )) 835 | bytes_read += read_step 836 | else: 837 | buf += fd.read( read_step ) 838 | bytes_read += read_step 839 | else: 840 | if not fd_output == None: 841 | fd_output.write(fd.read(read_sz)) 842 | bytes_read += read_sz 843 | else: 844 | buf += fd.read(read_sz) 845 | bytes_read += read_sz 846 | else: 847 | if read_sz > 0x10000000: 848 | read_step = 0x01500000 849 | buf = '' 850 | while bytes_read <= read_sz: 851 | win32file.SetFilePointer( fd, offset + bytes_read, win32file.FILE_BEGIN) 852 | if not fd_output == None: 853 | fd_output.write( win32file.ReadFile( fd, read_step)[1] ) 854 | bytes_read += read_step 855 | else: 856 | buf += win32file.ReadFile(fd, read_step)[1] 857 | bytes_read += read_step 858 | else: 859 | win32file.SetFilePointer( fd, offset, win32file.FILE_BEGIN) 860 | if not fd_output == None: 861 | buff = win32file.ReadFile( fd, read_sz)[1] 862 | fd_output.write( buff ) 863 | bytes_read = read_sz 864 | else: 865 | buf += win32file.ReadFile( fd, read_sz)[1] 866 | bytes_read = read_sz 867 | except: 868 | self.config['logger'].error( traceback.format_exc()) 869 | self.config['logger'].debug("offset(%08x), readsize (%08x) fd (%08x)" % ( offset, read_sz, fd)) 870 | self.config['logger'].debug("stack %s" % traceback.print_stack() ) 871 | return (buf, bytes_read) 872 | 873 | #################################################################################### 874 | # __get_wildcard_children: Get the children of the wildcarded directory location 875 | # path: is a tuple containing the base path and the wildcard 876 | # TODO Move this someplace else in the file 877 | #################################################################################### 878 | def __get_wildcard_children( self, path ): 879 | copy_list = [] 880 | table, x, seq_path = self.__get_file_mft_seqid( path[0] ) 881 | if seq_path == None: 882 | return copy_list 883 | # Test if the last value seq_path[-1] is the directory we are looking for 884 | if path[1] == None: 885 | if seq_path[-1][1] == path[0][-1]: 886 | copy_list.append( path[0] ) 887 | 888 | # get children of found path and find all that match wildcard. 889 | ret = self.__getChildIndex( seq_path[-1][0] ) 890 | for x in ret: 891 | if path[1] == None: 892 | break 893 | l_name = ret[x].lower() 894 | l_reg = re.escape(path[1]).replace('\\*', '.*') 895 | if not l_reg[-1] == '*': 896 | l_reg += '$' 897 | if re.match( l_reg, l_name ): 898 | l_name = path[0] + [ l_name ] 899 | copy_list.append( l_name ) 900 | return copy_list 901 | 902 | #################################################################################### 903 | # __get_file_mft_seqid: Wrapper used to search for the file in the current memory mft 904 | # metadata list then process the rest of the path from parsing the MFT 905 | # tmp_path: List of the source path 906 | #################################################################################### 907 | def __get_file_mft_seqid( self, tmp_path ): 908 | index = 5 909 | seq_path = [(index,None)] 910 | table = self.__MFT_lookup_table[self.config['driveLetter']][index] 911 | table, tmp_path, seq_path = self.__find_last_known_path( table, tmp_path, seq_path ) 912 | table, tmp_path, seq_path = self.__search_mft( table, tmp_path, seq_path ) 913 | return table, tmp_path, seq_path 914 | 915 | #################################################################################### 916 | # __process_wildcards: Called when a wildcard was detected in the source filename. 917 | # Parses the wildcards and breaks up into sections then the paths are expanded 918 | # and each matching record is copied. 919 | # filename: Filename containing the wildcards 920 | # table: Pointer to the root of the mft Metadata table 921 | #################################################################################### 922 | def __process_wildcards( self, filename, table ): 923 | filename = filename.lower() 924 | if not '*' in filename: 925 | return False 926 | if filename[1:3] == ":\\": 927 | filename = filename[3:] 928 | 929 | index = 5 930 | seq_path = [(index,None)] 931 | tmp_path = filename.split( os.sep ) 932 | path = [] 933 | path_start = 0 934 | for ind in range( len(tmp_path)): 935 | if "*" in tmp_path[ind]: 936 | path.append( ( tmp_path[ path_start : ind ], tmp_path[ind]) ) 937 | path_start = ind + 1 938 | if path_start < len(tmp_path): 939 | path.append( ( tmp_path[ path_start : ], None) ) 940 | 941 | tList = [] 942 | for iPath in path: 943 | tList = self.__regexsearch( iPath, tList ) 944 | return tList 945 | 946 | #################################################################################### 947 | # __regexsearch: Searches the path to determine if it matches the wildcard. Only the 948 | # '*' wildcard is supported. 949 | # path: 950 | # tList: 951 | #################################################################################### 952 | def __regexsearch( self, path, tList ): 953 | if tList == []: 954 | findPaths = [ path ] 955 | else: 956 | findPaths = [] 957 | for ePath in tList: 958 | findPaths.append( ( ePath + path[0], path[1] )) 959 | ret = [] 960 | for fp in findPaths: 961 | found = self.__get_wildcard_children( fp ) 962 | ret.extend( found ) 963 | return ret 964 | 965 | 966 | def __get_local_drives(self): 967 | """Returns a list containing letters from local drives""" 968 | drive_list = win32api.GetLogicalDriveStrings() 969 | drive_list = drive_list.split("\x00")[0:-1] # the last element is "" 970 | list_local_drives = [] 971 | for letter in drive_list: 972 | if win32file.GetDriveType(letter) == win32file.DRIVE_FIXED: 973 | list_local_drives.append(letter) 974 | return list_local_drives 975 | 976 | #################################################################################### 977 | # Copy file from a single source file or directory. Wildcards (*) are acceptable 978 | # src_filename: Can be a filename, directory, or a wildcard 979 | # dest_filename: The root directory to save files too. Each will create a mirror path 980 | # Example: dest_filename = 'c:\test\' and copying "c:\windows\somefile" 981 | # the output file will have the path of "c:\test\windows\somefile" 982 | # bRecursive: Tells the copy to recursivly copy a directory. Only works with directories 983 | #################################################################################### 984 | def copy( self, src_filename, dest_filename, bRecursive=False ): 985 | self.__useWin32 = True 986 | if not (dest_filename[-1] == '/' or dest_filename[-1] == '\\'): 987 | dest_filename = dest_filename+os.sep 988 | self.config['outputbasedir'] = dest_filename 989 | if type(src_filename) == unicode: 990 | src_filename = src_filename.encode('ascii', 'ignore') 991 | if not type( src_filename ) == str: 992 | self.config['logger'].error("INVALID src type (%r)" % (src_filename ) ) 993 | return 994 | src_filename = os.path.abspath( src_filename ) 995 | src_filename = [ src_filename ] 996 | for filename in src_filename: 997 | driveLetter = None 998 | if self.__useWin32 == True: 999 | self.config['logger'].debug( 'filename %r' % filename) 1000 | if not filename[:4].lower() == '\\\\.\\': 1001 | targetDrive = '\\\\.\\'+filename[:2] 1002 | else: 1003 | targetDrive = filename[:6] 1004 | 1005 | driveLetter = targetDrive[-2] 1006 | if driveLetter == '*': 1007 | for drive in self.__get_local_drives(): 1008 | self.__copyfile( filename.replace("*", drive[0], 1), bRecursive=bRecursive ) 1009 | else: 1010 | self.__copyfile( filename, bRecursive=bRecursive ) 1011 | 1012 | 1013 | 1014 | 1015 | 1016 | 1017 | --------------------------------------------------------------------------------