└── parseNTFS.py


/parseNTFS.py:
--------------------------------------------------------------------------------
   1 | import struct
   2 | import datetime
   3 | import os
   4 | import sys
   5 | 
   6 | global debug
   7 | global rootFile
   8 | global setRootFile
   9 | global mftCluster
  10 | global fdDisk
  11 | global clusterSize
  12 | global mftIdx
  13 | global mftSize
  14 | global mftEntries
  15 | global mftRecords
  16 | global mftEntryFromPath
  17 | global maxFileSize
  18 | global partitionOffset
  19 | 
  20 | mftSize = 0
  21 | maxFileSize = 0x8000000
  22 | rootFile = "C"
  23 | setRootFile = ""
  24 | clusterSize = 4096
  25 | partitionOffset = 0
  26 | 
  27 | verbose = 0
  28 | 
  29 | debug = False
  30 | # debug = True
  31 | mftIdx = {}
  32 | mftEntries = {}
  33 | mftRecords = {}
  34 | mftEntryFromPath = {}
  35 | fdDisk = None
  36 | 
  37 | if (len(sys.argv) < 3):
  38 |     print "+-------------------------------+"
  39 |     print "|      NTFS Crawler v0.1        |"
  40 |     print "+-------------------------------+"
  41 |     
  42 |     print "usage : parseNtfs.py [options]"
  43 |     print r"         -f suce.dmp                : Choice another file for root (c:) NTFS Header, not MBR"
  44 |     print r"         -ls c:5                    : List entries in MFT node 5 of c:"
  45 |     print r"         -ls c:\windows             : List entries in c:\windows"
  46 |     print r"         -lsd c:\windows            : List entries in MFT node 5"
  47 |     print r"         -indexOffset c:5           : Get hard-disk offset of node 5"
  48 |     print r"         -deleted                   : Found removed files but node already existing"
  49 |     print r"         -getFile c:1234 output.dll : Copy from NTFS datas node"
  50 |     print r"         -sz 0x1000000              : Set a maximum size for NTFS copy"
  51 |     print r"         -v                         : Verbose"
  52 |     sys.exit()
  53 | 
  54 | class WindowsTime:
  55 |     "Convert the Windows time in 100 nanosecond intervals since Jan 1, 1601 to time in seconds since Jan 1, 1970"
  56 |     def __init__(self, low, high):
  57 |         self.low = long(low)
  58 |         self.high = long(high)
  59 |         
  60 |         if (low == 0) and (high == 0):
  61 |             self.dt = 0
  62 |             self.dtstr = "Not defined"
  63 |             self.unixtime = 0
  64 |             return
  65 |         
  66 |         # Windows NT time is specified as the number of 100 nanosecond intervals since January 1, 1601.
  67 |         # UNIX time is specified as the number of seconds since January 1, 1970. 
  68 |         # There are 134,774 days (or 11,644,473,600 seconds) between these dates.
  69 |         self.unixtime = self.GetUnixTime()
  70 |               
  71 |         try:
  72 |             self.dt = datetime.datetime.utcfromtimestamp(self.unixtime)
  73 |             # Pass isoformat a delimiter if you don't like the default "T".
  74 |             self.dtstr = str(self.dt)
  75 |           
  76 |         except:
  77 |             self.dt = 0
  78 |             self.dtstr = "Invalid timestamp"
  79 |             self.unixtime = 0
  80 |           
  81 |         
  82 |     def GetUnixTime(self):
  83 |         t=float(self.high)*2**32 + self.low
  84 | 
  85 |      # The '//' does a floor on the float value, where *1e-7 does not, resulting in an off by one second error
  86 |      # However, doing the floor loses the usecs....
  87 |         return (t*1e-7 - 11644473600)
  88 |      #return((t//10000000)-11644473600)
  89 | 
  90 | 
  91 | # Code from: https://pypi.python.org/pypi/LaZy_NT
  92 | def process_comp_stream(to_decompress):
  93 |         """
  94 |         Follow a compressed data stream from the beginning of this chunk,
  95 |         stored in the `__working_cluster`. Decompress the stream into the
  96 |         `__decompressed_stream`, then send the data to all `CarveObject`s in
  97 |         the 'Processing' state.
  98 |         """
  99 |         
 100 |         working_cluster = to_decompress
 101 |         decompressed_stream_all = ""
 102 |         decompressed_stream = ""
 103 |         abs_offset = 0
 104 |         comp_header = working_cluster[0:2]
 105 |         # 0x0000 header indicates the end of this compression stream
 106 |         while not (comp_header[0] == chr(0x00) and
 107 |                    comp_header[1] == chr(0x00)):
 108 |             # Otherwise decode the header to determine this segment's length
 109 |             header_decode = ((ord(comp_header[1]) << 8) +
 110 |                              ord(comp_header[0]))
 111 |             header_len = int((header_decode & 0x0FFF) + 1)
 112 |             if (header_decode & 0x8000) == 0x8000:
 113 |                 header_cbit = True
 114 |             else:
 115 |                 header_cbit = False
 116 |             # Grab the data length indicated, plus the next two-byte header
 117 |             # self.read_bytes(header_len + 4, seek=abs_offset)
 118 |             
 119 |             working_cluster = to_decompress[abs_offset:abs_offset+header_len + 4]
 120 |             
 121 |             # Uncompressed data in the stream can be copied directly into
 122 |             # the decompressed stream (minus the two headers)
 123 |             if not header_cbit:
 124 |                 decompressed_stream = "".join(
 125 |                     [decompressed_stream, working_cluster[2:-2]])
 126 |                 # Move abs_offset, extract the next header and repeat
 127 |                 abs_offset += len(working_cluster) - 2
 128 |                 comp_header = (working_cluster[-2] +
 129 |                                       working_cluster[-1])
 130 |                 continue
 131 |             # Compressed data in the stream must be decompressed first
 132 |             else:
 133 |                 rel_offset = 2 # account for the 2 byte tag we already passed
 134 |                 # For the length of this segment (until you hit the next header)
 135 |                 while rel_offset < len(working_cluster) - 2:
 136 |                     # Extract and decode the next tag byte
 137 |                     tag_byte = working_cluster[rel_offset]
 138 |                     rel_offset += 1
 139 |                     tag_byte_decode = format(ord(tag_byte), '008b')
 140 |                     # if len(working_cluster) < 10:
 141 |                         # break
 142 |                     # For of the 8 bits in the tag byte
 143 |                     for i in range(7, -1, -1):
 144 |                         # Safely handle a 'partial' tag byte at end of segment
 145 |                         if rel_offset >= len(working_cluster) - 2:
 146 |                             continue
 147 |                         # '0' bit indicates an uncompressed byte
 148 |                         if tag_byte_decode[i] == '0':
 149 |                             decompressed_stream = "".join(
 150 |                                 [decompressed_stream,
 151 |                                  working_cluster[rel_offset]])
 152 |                             rel_offset += 1
 153 |                         # '1' bit indicates a compression tuple
 154 |                         else:
 155 |                             comp_tuple = ord(
 156 |                                 working_cluster[rel_offset + 1])
 157 |                             comp_tuple = (comp_tuple << 8) + \
 158 |                                          ord(working_cluster[rel_offset])
 159 |                             # Tuple decoding changes based on offset within
 160 |                             # the orignal 4K cluster (now dec_stream length)
 161 |                             len_mask = 0x0FFF
 162 |                             shift_amt = 12
 163 |                             rel_cluster_offset = len(
 164 |                                 decompressed_stream) % 4096 - 1
 165 |                             while rel_cluster_offset >= 16:
 166 |                                 rel_cluster_offset >>= 1
 167 |                                 len_mask >>= 1
 168 |                                 shift_amt -= 1
 169 |                             # Decode the tuple into backref and length
 170 |                             tuple_len = (comp_tuple & len_mask) + 3
 171 |                             tuple_backref = (
 172 |                                 ((len_mask ^ 0xFFFF) & comp_tuple)
 173 |                                 >> shift_amt) + 1
 174 |                             # Locate the backref starting offset
 175 |                             backref_start = (len(decompressed_stream) -
 176 |                                              tuple_backref)
 177 |                             # Is the stream long enough to fulfill the length?
 178 |                             if (backref_start + tuple_len <=
 179 |                                     len(decompressed_stream)):
 180 |                                 decompressed_stream = "".join(
 181 |                                     [decompressed_stream,
 182 |                                      decompressed_stream[
 183 |                                          backref_start:
 184 |                                          backref_start + tuple_len]])
 185 |                             # If not, we have to repeat bytes to meet length
 186 |                             else:
 187 |                                 backref_len = (len(decompressed_stream) -
 188 |                                                backref_start)
 189 |                                 remainder_len = tuple_len % backref_len
 190 |                                 if not remainder_len == 0:
 191 |                                     backref_remainder = \
 192 |                                         decompressed_stream[
 193 |                                             backref_start:
 194 |                                             backref_start + remainder_len]
 195 |                                 else:
 196 |                                     backref_remainder = ""
 197 |                                 temp_stream = decompressed_stream[
 198 |                                     backref_start:
 199 |                                     len(decompressed_stream)] * \
 200 |                                     (tuple_len // backref_len)
 201 |                                 decompressed_stream = "".join(
 202 |                                     [decompressed_stream,
 203 |                                      temp_stream, backref_remainder])
 204 |                             rel_offset += 2
 205 |                 # Finished with this compression header
 206 |                 decompressed_stream_all += decompressed_stream
 207 |                 decompressed_stream = ""
 208 |                 # Move abs_offset, extract the next header and repeat
 209 |                 abs_offset += len(working_cluster) - 2
 210 |                 comp_header = (working_cluster[-2] +
 211 |                                       working_cluster[-1])
 212 |                 continue
 213 |         # Found 0x0000 header - this stream is finished.
 214 |         rel_offset = abs_offset % 4096
 215 |         if rel_offset == 0:
 216 |             return decompressed_stream_all+decompressed_stream
 217 |         # Re-align abs_offset to the next 4K cluster boundary. If this
 218 |         # file isn't fragmented, its stream will continue there...
 219 |         else:
 220 |             rel_offset = 4096 - rel_offset - 1
 221 |             abs_offset += rel_offset
 222 |             # self.read_bytes(1, seek=abs_offset)
 223 |             working_cluster = to_decompress[abs_offset:abs_offset+header_len + 4]
 224 |             abs_offset += 1
 225 |             return decompressed_stream_all+decompressed_stream
 226 | 
 227 | def hexprint(string, no_print = False):
 228 |     result = ""
 229 |     if len(string) == 0:
 230 |         return
 231 |     ascii = list("."*256)
 232 |     for i in range(1,0x7f):
 233 |         ascii[i] = chr(i)
 234 |     ascii[0x0] = "."
 235 |     ascii[0x7] = "."
 236 |     ascii[0x8] = "."
 237 |     ascii[0x9] = "."
 238 |     ascii[0xa] = "."
 239 |     ascii[0x1b] = "."
 240 |     ascii[0xd] = "."
 241 |     ascii[0xff] = "\xfe"
 242 |     ascii = "".join(ascii)
 243 |     offset = 0
 244 |     while (offset+0x10) <= len(string):
 245 |         line = string[offset:(offset+0x10)]
 246 |         linebuf = " %08X " % offset
 247 |         for i in range(0,16):
 248 |             if i == 8:
 249 |                 linebuf += " "
 250 |             linebuf += "%02X " % ord(line[i])
 251 |         linebuf += " "
 252 |         for i in range(0,16):
 253 |             linebuf += ascii[ord(line[i])]
 254 |         if no_print == True:
 255 |             result += linebuf+"\n"
 256 |         else:
 257 |             print linebuf
 258 |         offset += 0x10
 259 |     if (len(string) % 0x10) > 0:
 260 |         linebuf = " %08X " % offset
 261 |         for i in range((len(string)-(len(string) % 0x10)),(len(string))):
 262 |             if i == 8:
 263 |                 linebuf += " "
 264 |             linebuf += "%02X " % ord(string[i])
 265 |         linebuf += "   "*(0x10-(len(string) % 0x10))
 266 |         linebuf += " "
 267 |         for i in range((len(string)-(len(string) % 0x10)),(len(string))):
 268 |             linebuf += ascii[ord(string[i])]
 269 |         if no_print == True:
 270 |             result += linebuf+"\n"
 271 |         else:
 272 |             print linebuf
 273 |     return result.decode("cp1252")
 274 | 
 275 | 
 276 | def raw_to_int(strNumber):
 277 |     result = 0
 278 |     i = 0
 279 |     while (i<len(strNumber)):
 280 |         result += ord(strNumber[i]) << (i*8)
 281 |         i += 1
 282 |     return result
 283 | 
 284 | def parse_little_endian_signed_positive(buf):
 285 |     ret = 0
 286 |     for i, b in enumerate(buf):
 287 |         ret += ord(b) * (1 << (i * 8))
 288 |     return ret
 289 | 
 290 | def parse_little_endian_signed_negative(buf):
 291 |     ret = 0
 292 |     for i, b in enumerate(buf):
 293 |         ret += (ord(b) ^ 0xFF) * (1 << (i * 8))
 294 |     ret += 1
 295 |         
 296 |     ret *= -1
 297 |     return ret
 298 | 
 299 | def parse_little_endian_signed(buf):
 300 |     try:
 301 |         if not ord(buf[-1]) & 0b10000000:
 302 |             return parse_little_endian_signed_positive(buf)
 303 |         else:
 304 |             return parse_little_endian_signed_negative(buf)
 305 |     except Exception:
 306 |         return ''
 307 | 
 308 | def quotechars( chars ):
 309 |        return ''.join( ['.', c][c.isalnum()] for c in chars )
 310 | 
 311 | def decodeATRHeader(s):
 312 |     d = {}
 313 |     d['type'] = struct.unpack("<L",s[:4])[0]
 314 |     if d['type'] == 0xffffffff:
 315 |         return d
 316 |     d['len'] = struct.unpack("<L",s[4:8])[0]
 317 |     d['res'] = struct.unpack("B",s[8])[0] # Non-resident flag
 318 |     d['nlen'] = struct.unpack("B",s[9])[0]
 319 |     d['name_off'] = struct.unpack("<H",s[10:12])[0]
 320 |     d['flags'] = struct.unpack("<H",s[12:14])[0]
 321 |     d['id'] = struct.unpack("<H",s[14:16])[0]
 322 |     if d['res'] == 0:
 323 |         d['ssize'] = struct.unpack("<L",s[0x10:0x14])[0]            # dwLength
 324 |         d['soff'] = struct.unpack("<H",s[0x14:0x16])[0]             # wAttrOffset
 325 |         d['idxflag'] = struct.unpack("B",s[0x16])[0]              # uchIndexedTag
 326 |         padding = struct.unpack("B",s[0x17])[0]                   # Padding
 327 |     else:
 328 |         d['start_vcn'] = struct.unpack("<Q",s[0x10:0x18])[0]    # n64StartVCN
 329 |         d['last_vcn'] = struct.unpack("<Q",s[0x18:0x20])[0]     # n64EndVCN
 330 |         d['run_off'] = struct.unpack("<H",s[0x20:0x22])[0]          # wDataRunOffset (in clusters, from start of partition?)
 331 |         d['compsize'] = struct.unpack("<H",s[0x22:0x24])[0]         # wCompressionSize
 332 |         padding = struct.unpack("<I",s[0x24:0x28])[0]               # Padding
 333 |         d['allocsize'] = struct.unpack("<Lxxxx",s[0x28:0x30])[0]    # n64AllocSize
 334 |         d['realsize'] = struct.unpack("<Lxxxx",s[0x30:0x38])[0]     # n64RealSize
 335 |         d['streamsize'] = struct.unpack("<Lxxxx",s[0x38:0x40])[0]   # n64StreamSize
 336 |         if d['nlen'] > 0:
 337 |             d['name'] = s[d['name_off']:d['name_off']+(d['nlen']*2)].decode('utf-16').encode('utf-8')
 338 |         else:
 339 |             d['name'] = ''
 340 |         (d['ndataruns'],d['dataruns'],d['drunerror']) = unpack_dataruns(s[d['run_off']:d['len']])
 341 | 
 342 |     return d
 343 | 
 344 | def decodeSIAttribute(s):
 345 |     d = {}
 346 |     d['crtime'] = WindowsTime(struct.unpack("<L",s[:4])[0],struct.unpack("<L",s[4:8])[0]).dtstr
 347 |     d['mtime'] = WindowsTime(struct.unpack("<L",s[8:0xc])[0],struct.unpack("<L",s[0xc:0x10])[0]).dtstr
 348 |     d['ctime'] = WindowsTime(struct.unpack("<L",s[0x10:0x14])[0],struct.unpack("<L",s[0x14:0x18])[0]).dtstr
 349 |     d['atime'] = WindowsTime(struct.unpack("<L",s[0x18:0x1c])[0],struct.unpack("<L",s[0x1c:0x20])[0]).dtstr
 350 |     d['dos'] = struct.unpack("<I",s[0x20:0x24])[0]          # 4
 351 |     d['maxver'] = struct.unpack("<I",s[0x24:0x28])[0]       # 4
 352 |     d['ver'] = struct.unpack("<I",s[0x28:0x2c])[0]          # 4
 353 |     d['class_id'] = struct.unpack("<I",s[0x2c:0x30])[0]     # 4
 354 |     d['own_id'] = struct.unpack("<I",s[0x30:0x34])[0]       # 4
 355 |     d['sec_id'] = struct.unpack("<I",s[0x34:0x38])[0]       # 4
 356 |     d['quota'] = struct.unpack("<d",s[0x38:0x40])[0]        # 8
 357 |     d['usn'] = struct.unpack("<d",s[0x40:0x48])[0]          # 8 - end of date to here is 40
 358 | 
 359 |     return d
 360 | 
 361 | def decodeFNAttribute(s, record):
 362 |     d = {}
 363 |     d['par_ref'] = struct.unpack("<Lxx", s[:6])[0]      # Parent reference nummber + seq number = 8 byte "File reference to the parent directory."
 364 |     d['par_seq'] = struct.unpack("<H",s[6:8])[0]        # Parent sequence number
 365 |     d['crtime'] = WindowsTime(struct.unpack("<L",s[8:0xc])[0],struct.unpack("<L",s[0xc:0x10])[0]).dtstr
 366 |     d['mtime'] = WindowsTime(struct.unpack("<L",s[0x10:0x14])[0],struct.unpack("<L",s[0x14:0x18])[0]).dtstr
 367 |     d['ctime'] = WindowsTime(struct.unpack("<L",s[0x18:0x1c])[0],struct.unpack("<L",s[0x1c:0x20])[0]).dtstr
 368 |     d['atime'] = WindowsTime(struct.unpack("<L",s[0x20:0x24])[0],struct.unpack("<L",s[0x24:0x28])[0]).dtstr
 369 |     d['alloc_fsize'] = struct.unpack("<q",s[0x28:0x30])[0]
 370 |     d['real_fsize'] = struct.unpack("<q",s[0x30:0x38])[0]
 371 |     d['flags'] = struct.unpack("<d",s[0x38:0x40])[0]            # 0x01=NTFS, 0x02=DOS
 372 |     d['nlen'] = struct.unpack("B",s[0x40])[0]
 373 |     d['nspace'] = struct.unpack("B",s[0x41])[0]
 374 | 
 375 |     bytes = s[0x42:0x42 + d['nlen']*2]
 376 |     try:
 377 |         d['name'] = bytes.decode('utf-16').encode('utf-8').lower()
 378 |     except:
 379 |         d['name'] = 'UnableToDecodeFilename'
 380 | 
 381 |     return d
 382 | 
 383 | def decodeIndexRoot(s):
 384 |     d = {}
 385 |     d['type'] = struct.unpack("<L", s[:4])[0]
 386 |     d['collection'] = struct.unpack("<L",s[4:8])[0]
 387 |     d['rec_size_byte'] = struct.unpack("<L",s[8:0xc])[0]
 388 |     d['rec_size_clust'] = struct.unpack("B",s[0xc])[0]
 389 |     # 3 bytes of padding
 390 |     d['off_start'] = struct.unpack("<L",s[0x10:0x14])[0]
 391 |     d['off_end_use'] = struct.unpack("<L",s[0x14:0x18])[0]
 392 |     d['off_end_alloc'] = struct.unpack("<L",s[0x18:0x1c])[0]
 393 |     d['flags'] = struct.unpack("<L",s[0x1c:0x20])[0]
 394 |     
 395 |     return d
 396 | 
 397 | def decodeReparsePoint(s):
 398 |     d = {}
 399 |     # 8 bytes of WTF
 400 |     d['full_path_size'] = struct.unpack("<H",s[0xc:0xe])[0]
 401 |     d['path_size'] = struct.unpack("<H",s[0xe:0x10])[0]
 402 |     
 403 |     # Not a good idea but not have the time to identify how it defined the offset
 404 |     offset = 0x10
 405 |     while offset < len(s) and s[offset:offset+4] != "\\\x00?\x00":
 406 |         offset += 4
 407 |     
 408 |     d['full_path'] = s[offset:offset+d['full_path_size']].decode('utf-16').encode('utf-8').lower()
 409 |     d['path'] = s[offset+d['full_path_size']:offset+d['full_path_size']+d['path_size']].decode('utf-16').encode('utf-8').lower()
 410 |     
 411 |     return d
 412 | 
 413 | def decodeIndex(s):
 414 |     d = {}
 415 |     # 8 bytes of WTF
 416 |     d['entry_len'] = struct.unpack("<H",s[8:0xa])[0]
 417 |     d['fn_len'] = struct.unpack("<H",s[0xa:0xc])[0]
 418 |     d['idx_flags'] = struct.unpack("<L",s[0xc:0x10])[0]
 419 |     
 420 |     return d
 421 | 
 422 | def decodeEaInfo(s):
 423 |     d = {}
 424 |     # 8 bytes of WTF
 425 |     # print s[:8]
 426 |     d['ea_packet_len'] = struct.unpack("<H",s[0:2])[0] # Size of the packed Extended Attributes
 427 |     d['len_ea'] = struct.unpack("<H",s[2:4])[0] # Number of Extended Attributes which have NEED_EA set
 428 |     d['unpack_ea_size'] = struct.unpack("<L",s[4:8])[0] # Size of the unpacked Extended Attributes
 429 |     
 430 |     return d
 431 | 
 432 | def decodeEa(s):
 433 |     
 434 |     result = []
 435 |     ptr = 0
 436 |     
 437 |     while (ptr+8) < len(s):
 438 |         d = {}
 439 |         d['next_ea_off'] = struct.unpack("<L",s[ptr+0:ptr+4])[0] # Offset to next Extended Attribute
 440 |         d['flags'] = struct.unpack("<B",s[ptr+4])[0] # Flags
 441 |         d['nlen'] = struct.unpack("<B",s[ptr+5])[0] # Name Length
 442 |         d['nval'] = struct.unpack("<H",s[ptr+6:ptr+8])[0] # Value Length
 443 |         d['name'] = s[ptr+8:ptr+8+d['nlen']]
 444 |         d['value'] = s[ptr+8+d['nlen']:ptr+8+d['nlen']+d['nval']]
 445 |         result.append(d)
 446 |         if (ptr+8+d['nlen']+d['nval']) > d['next_ea_off']:
 447 |             break
 448 |         ptr += d['next_ea_off']
 449 |     return result
 450 | 
 451 | def decodeAttributeList(s, record):
 452 |     hexFlag = False
 453 | 
 454 |     d = {}
 455 |     d['type'] = struct.unpack("<I",s[:4])[0]                # 4
 456 |     d['len'] = struct.unpack("<H",s[4:6])[0]                # 2
 457 |     d['nlen'] = struct.unpack("B",s[6])[0]                  # 1
 458 |     d['f1'] = struct.unpack("B",s[7])[0]                    # 1
 459 |     d['start_vcn'] = struct.unpack("<d",s[8:16])[0]         # 8
 460 |     d['file_ref'] = struct.unpack("<Lxx",s[16:22])[0]       # 6
 461 |     d['seq'] = struct.unpack("<H",s[22:24])[0]              # 2
 462 |     d['id'] = struct.unpack("<H",s[24:26])[0]               # 4
 463 |     
 464 |     bytes = s[26:26 + d['nlen']*2]
 465 |     d['name'] = bytes.decode('utf-16').encode('utf-8')
 466 | 
 467 |     return d
 468 | 
 469 | def decodeVolumeInfo(s):
 470 |     d = {}
 471 |     d['f1'] = struct.unpack("<d",s[:8])[0]                  # 8
 472 |     d['maj_ver'] = struct.unpack("B",s[8])[0]               # 1
 473 |     d['min_ver'] = struct.unpack("B",s[9])[0]               # 1
 474 |     d['flags'] = struct.unpack("<H",s[10:12])[0]            # 2
 475 |     d['f2'] = struct.unpack("<I",s[12:16])[0]               # 4
 476 | 
 477 |     if (debug):
 478 |         print "+Volume Info"
 479 |         print "++F1%d" % d['f1']
 480 |         print "++Major Version: %d" % d['maj_ver']
 481 |         print "++Minor Version: %d" % d['min_ver']
 482 |         print "++Flags: %d" % d['flags']
 483 |         print "++F2: %d" % d['f2']
 484 | 
 485 |     return d
 486 | 
 487 | # Decode a Resident Data Attribute
 488 | def decodeDataAttribute(s):
 489 |     d = {}
 490 |     
 491 |     d['dataSize'] = struct.unpack("<L",s[0x4:0x8])[0]
 492 |     d['non_resid_flag'] = struct.unpack("<B",s[0x8])[0] # Non-resident flag
 493 |     d['nlen'] = struct.unpack("<B",s[0x9])[0] # Name length
 494 |     d['name_off'] = struct.unpack("<H",s[0xa:0xc])[0] # Offset to the Name
 495 |     d['flags'] = struct.unpack("<H",s[0xc:0xe])[0] # Flags
 496 |     d['attrib_id'] = struct.unpack("<H",s[0xe:0x10])[0] # Attribute Id
 497 |     if d['non_resid_flag'] == 1:
 498 |         d['vcn_start'] = struct.unpack("<Q",s[0x10:0x18])[0]
 499 |         d['vcn_end'] = struct.unpack("<Q",s[0x18:0x20])[0]
 500 |         d['dataruns_offset'] = struct.unpack("<H",s[0x20:0x22])[0]
 501 |         d['compress_unit_size'] = struct.unpack("<H",s[0x22:0x24])[0]
 502 |         d['alloc_size'] = struct.unpack("<Q",s[0x28:0x30])[0]
 503 |         d['real_size'] = struct.unpack("<Q",s[0x30:0x38])[0]
 504 |         d['init_size'] = struct.unpack("<Q",s[0x38:0x40])[0]
 505 |         if d['nlen'] == 0:
 506 |             d['attrib_name'] = ''
 507 |         else:
 508 |             d['attrib_name'] = s[d['name_off']:d['name_off']+(d['nlen']*2)].decode('utf-16').encode('utf-8')
 509 |         datas = unpack_dataruns(s[d['dataruns_offset']:d['dataSize']])
 510 |         d['numruns'] = datas[0]
 511 |         d['dataruns'] = datas[1]
 512 |         d['error'] = datas[2]
 513 |     elif d['non_resid_flag'] == 0:
 514 |         d['attrib_len'] = struct.unpack("<L",s[0x10:0x14])[0] # Length of the Attribute
 515 |         d['attrib_off'] = struct.unpack("<H",s[0x14:0x16])[0] # Offset to the Attribute
 516 |         d['indexed_flag'] = struct.unpack("<B",s[0x16])[0] # Indexed flag
 517 |         # 1 byte of padding
 518 |         d['attrib_name'] = s[0x18:0x18+(d['nlen']*2)].decode('utf-16').encode('utf-8')
 519 |         d['attrib_value'] = s[d['attrib_off']:d['attrib_off']+d['attrib_len']]
 520 |     
 521 |     return d
 522 |     
 523 | def ObjectID(s):
 524 |     objstr = ''
 525 |     if s == 0:
 526 |         objstr = 'Undefined'
 527 |     else:
 528 |         objstr = "%08x-%04x-%04x-%04x-%06x" % (raw_to_int(s[0:4]),raw_to_int(s[4:6]),
 529 |             raw_to_int(s[6:8]),raw_to_int(s[8:10]),raw_to_int(s[10:16]))
 530 | 
 531 |     return objstr
 532 | 
 533 | def decodeObjectID(s):
 534 |     d = {}
 535 |     d['objid'] = ObjectID(s[0:16])
 536 |     d['orig_volid'] = ObjectID(s[16:32])
 537 |     d['orig_objid'] = ObjectID(s[32:48])
 538 |     d['orig_domid'] = ObjectID(s[48:64])
 539 | 
 540 |     return d
 541 | 
 542 | def unpack_dataruns(str):
 543 |     # hexprint(str)
 544 |     dataruns = []
 545 |     
 546 |     lenOfRun = 0
 547 |     lcnElem = 0
 548 |     offset = 0
 549 |     while offset < len(str):
 550 |         sizeLen = (struct.unpack("<B", str[offset])[0] & 0xF)
 551 |         sizeOffset = (struct.unpack("<B", str[offset])[0] & 0xF0)>>4
 552 |         if sizeLen == 0:
 553 |             lenOfRun = 0
 554 |         else:
 555 |             lenOfRun = raw_to_int(str[offset+1:offset+1+sizeLen])
 556 |         if sizeOffset == 0:
 557 |             lcnElem += 0
 558 |         else:
 559 |             relativeLCN = raw_to_int(str[offset+1+sizeLen:offset+1+sizeLen+sizeOffset])
 560 |             if (relativeLCN & ((2**(8*sizeOffset))>>1)): # is negative ?
 561 |                 relativeLCN = -((2**(8*sizeOffset))-relativeLCN)
 562 |             lcnElem += relativeLCN
 563 |             dataruns.append([lcnElem,lenOfRun])
 564 |         # print "sizeLen : %x" % sizeLen
 565 |         # print "relativeLCN : %x" % relativeLCN
 566 |         # print "lenOfRun : %x" % lenOfRun
 567 |         # print "lcnElem : %x (new : %x)" % (lcnElem,raw_to_int(str[offset+1+sizeLen:offset+1+sizeLen+sizeOffset]))
 568 |         offset += 1+sizeLen+sizeOffset
 569 |     numruns = len(dataruns)
 570 |     error = 0
 571 |     
 572 |     return numruns, dataruns, error
 573 | 
 574 | def decodeIndexHeader(s):
 575 |     hexFlag = False
 576 |     # File name attributes can have null dates.
 577 | 
 578 |     d = {}
 579 |     
 580 |     if s[:4] != "INDX":
 581 |         return d
 582 |     
 583 |     d['off_udp_seq'] = struct.unpack("<H", s[4:6])[0] # Offset to the Update Sequence
 584 |     d['size_udp'] = struct.unpack("<H", s[6:8])[0] * 2 # Size in words of the Update Sequence Number
 585 |     d['lf_num'] = struct.unpack("<Q", s[8:0x10])[0] # $LogFile sequence number
 586 |     d['vcn_idx'] = struct.unpack("<Q", s[0x10:0x18])[0] # VCN of this INDX buffer in the Index Allocation
 587 |     d['idx_entry_off'] = struct.unpack("<L", s[0x18:0x1c])[0] # Offset to the Index Entries (relative to 0x18)
 588 |     d['idx_size'] = struct.unpack("<L", s[0x1c:0x20])[0] # Size of Index Entries (relative to 0x18)
 589 |     d['idx_alloc_size'] = struct.unpack("<L", s[0x20:0x24])[0] # Allocated size of the Index Entries (relative to 0x18)
 590 |     d['is_not_leaf'] = struct.unpack("<B", s[0x24])[0] # 1 if not leaf node (b)
 591 |     # 3 bytes of padding
 592 |     d['udp_seq'] = struct.unpack("<H", s[0x28:0x2a])[0] # Update sequence
 593 |     if ((d['size_udp']-2)/2) > 0:
 594 |         d['upd_array'] = struct.unpack("<"+str((d['size_udp']/2)-1)+"H", s[0x2a:0x2a+(d['size_udp']-2)])
 595 |     else:
 596 |         d['upd_array'] = []
 597 |     
 598 |     return d
 599 | 
 600 | def decodeIndexRecord(s,idx_size):
 601 |     hexFlag = False
 602 |     # File name attributes can have null dates.
 603 |     
 604 |     blobFiles = {}
 605 |     offset = 0
 606 |     
 607 |     while (offset+0x52) < idx_size:
 608 |         d = {}
 609 |         
 610 |         d['mft_ref'] = raw_to_int(s[offset:offset+6]) # MFT Reference of the file
 611 |         d['mft_seq'] = raw_to_int(s[offset+6:offset+8]) # Sequence number
 612 |         d['idx_entry_size'] = struct.unpack("<H", s[offset+8:offset+0xa])[0] # Size of this index entry
 613 |         d['name_off'] = struct.unpack("<H", s[offset+0xa:offset+0xc])[0] # Offset to the filename
 614 |         d['idx_flags'] = struct.unpack("<H", s[offset+0xc:offset+0xe])[0] # Small Index / Large index
 615 |         # 2 bytes of padding
 616 |         d['mft_parent_ref'] = raw_to_int(s[offset+0x10:offset+0x16]) # MFT File Reference of the parent
 617 |         d['ctrime'] = WindowsTime(struct.unpack("<L",s[offset+0x18:offset+0x1c])[0],struct.unpack("<L",s[offset+0x1c:offset+0x20])[0]).dtstr # File creation time
 618 |         d['mtime'] = WindowsTime(struct.unpack("<L",s[offset+0x20:offset+0x24])[0],struct.unpack("<L",s[offset+0x24:offset+0x28])[0]).dtstr # Last modification time
 619 |         d['ctime'] = WindowsTime(struct.unpack("<L",s[offset+0x28:offset+0x2c])[0],struct.unpack("<L",s[offset+0x2c:offset+0x30])[0]).dtstr # Last modification time for FILE record
 620 |         d['atime'] = WindowsTime(struct.unpack("<L",s[offset+0x30:offset+0x34])[0],struct.unpack("<L",s[offset+0x34:offset+0x38])[0]).dtstr # Last access time
 621 |         d['file_size'] = struct.unpack("<Q", s[offset+0x38:offset+0x40])[0] # Allocated size of file
 622 |         d['file_size_real'] = struct.unpack("<Q", s[offset+0x40:offset+0x48])[0] # Real size of file
 623 |         d['file_flags'] = struct.unpack("<Q", s[offset+0x48:offset+0x50])[0] # File Flags
 624 |         d['fn_len'] = struct.unpack("<B", s[offset+0x50])[0] # Length of filename
 625 |         d['fn_namespace'] = struct.unpack("<B", s[offset+0x51])[0] # Length of filename
 626 |         try:
 627 |             d['name'] = s[offset+0x52:offset+0x52+(d['fn_len']*2)].decode('utf-16').encode('utf-8').lower()
 628 |         except:
 629 |             return d
 630 |         offset += 0x52+(d['fn_len']*2)
 631 |         if d['idx_flags'] == 1:
 632 |             # print "idx_flags !"
 633 |             offset += 8
 634 |         if (offset % 8) != 0:
 635 |             offset += 8-(offset % 8) # 8 bytes aling
 636 |         # print d
 637 |         if offset > idx_size:
 638 |             continue
 639 |         blobFiles[d['name']] = d
 640 |     
 641 |     return blobFiles
 642 | 
 643 | def getDirList(indx_datas):
 644 |     global clusterSize
 645 |     indx_blob = 0
 646 |     files = {}
 647 |     while indx_blob < len(indx_datas):
 648 |         indx_infos = decodeIndexHeader(indx_datas[indx_blob:])
 649 |         if len(indx_infos) == 0:
 650 |            indx_blob += clusterSize
 651 |            continue
 652 |         upd_offset = 0
 653 |         newIndexBuf = bytearray(indx_datas[indx_blob:])
 654 |         while upd_offset < ((indx_infos['size_udp']-2)/2) :
 655 |             newIndexBuf[(upd_offset*512)+512-1] = (indx_infos['upd_array'][upd_offset] & 0xff00)>>8
 656 |             newIndexBuf[(upd_offset*512)+512-2] = (indx_infos['upd_array'][upd_offset] & 0xff)
 657 |             upd_offset += 1
 658 |         curFiles = decodeIndexRecord(str(newIndexBuf)[0x18+indx_infos['idx_entry_off']:],indx_infos['idx_size']-indx_infos['idx_entry_off']+8)
 659 |         for curkey in curFiles.keys():
 660 |             if curFiles[curkey]['fn_namespace'] != 2 and not curkey.lower() in files.keys():
 661 |                 files[curkey] = curFiles[curkey]
 662 |         indx_blob += clusterSize
 663 |     return files
 664 | 
 665 | def decodeMFTHeader(record, raw_record):
 666 |     record['magic'] = struct.unpack("<I", raw_record[:4])[0]
 667 |     record['upd_off'] = struct.unpack("<H",raw_record[4:6])[0]
 668 |     record['upd_cnt'] = struct.unpack("<H",raw_record[6:8])[0]
 669 |     record['lsn'] = struct.unpack("<d",raw_record[8:0x10])[0]
 670 |     record['seq'] = struct.unpack("<H",raw_record[0x10:0x12])[0]
 671 |     record['link'] = struct.unpack("<H",raw_record[0x12:0x14])[0]
 672 |     record['attr_off'] = struct.unpack("<H",raw_record[0x14:0x16])[0]
 673 |     record['flags'] = struct.unpack("<H", raw_record[0x16:0x18])[0]
 674 |     record['size'] = struct.unpack("<I",raw_record[0x18:0x1c])[0]
 675 |     record['alloc_sizef'] = struct.unpack("<I",raw_record[0x1c:0x20])[0]
 676 |     record['base_ref'] = struct.unpack("<Lxx",raw_record[0x20:0x26])[0]
 677 |     record['base_seq'] = struct.unpack("<H",raw_record[0x26:0x28])[0]
 678 |     record['next_attrid'] = struct.unpack("<H",raw_record[0x28:0x2a])[0]
 679 |     record['f1'] = raw_record[0x2a:0x2c]                            # Padding
 680 |     record['recordnum'] = struct.unpack("<I", raw_record[0x2c:0x30])[0]  # Number of this MFT Record
 681 |     record['seq_number'] = struct.unpack("<H",raw_record[0x30:0x32])[0]  # Sequence number
 682 |     if record['upd_off'] == 42:
 683 |         record['seq_attr1'] = raw_record[0x2c:0x2e]  # Sequence attribute for sector 1
 684 |         record['seq_attr2'] = raw_record[0x2e:0x3a]  # Sequence attribute for sector 2
 685 |     else:
 686 |         record['seq_attr1'] = raw_record[0x32:0x34]  # Sequence attribute for sector 1
 687 |         record['seq_attr2'] = raw_record[0x34:0x36]  # Sequence attribute for sector 2
 688 |     record['fncnt'] = 0                              # Counter for number of FN attributes
 689 |     record['datacnt'] = 0                            # Counter for number of $DATA attributes
 690 | 
 691 | def parse_record(raw_record, operation = "None"):
 692 |     record = {}
 693 |     record['filename'] = ''
 694 |     record['notes'] = ''
 695 |     record['ads'] = {}
 696 |     record['datacnt'] = 0
 697 |     
 698 |     clistFiles = {}
 699 |     
 700 |     if (not raw_record) or len(raw_record) < 1024:
 701 |         return record
 702 |     
 703 |     decodeMFTHeader(record, raw_record)
 704 | 
 705 |     if (record['seq_number'] == raw_record[0x1fe:0x200] and record['seq_number'] == raw_record[0x3fe:0x400]):
 706 |         raw_record = raw_record[:0x1fe]+record['seq_attr1']+raw_record[0x200:0x3fe]+record['seq_attr2']
 707 |     
 708 |     new_raw_record = bytearray(raw_record)
 709 |     new_raw_record[0x1fe] = record['seq_attr1'][0]
 710 |     new_raw_record[0x1ff] = record['seq_attr1'][1]
 711 |     new_raw_record[0x3fe] = record['seq_attr2'][0]
 712 |     new_raw_record[0x3ff] = record['seq_attr2'][1]
 713 |     raw_record = str(new_raw_record)
 714 |     
 715 |     record_number = record['recordnum']
 716 |     
 717 |     if debug:
 718 |         print '-->Record number: %d\n\tMagic: %s Attribute offset: %d Flags: %s Size:%d' % (record_number, record['magic'],
 719 |             record['attr_off'], hex(int(record['flags'])), record['size'])
 720 | 
 721 |     if record['magic'] == 0x44414142:
 722 |         if debug:
 723 |             print "BAAD MFT Record"
 724 |         record['baad'] = True
 725 |         return record
 726 | 
 727 |     if record['magic'] != 0x454c4946:
 728 |         if debug:
 729 |             print "Corrupt MFT Record"
 730 |         record['corrupt'] = True
 731 |         return record
 732 | 
 733 |     read_ptr = record['attr_off']
 734 |     listFiles = {}
 735 |     
 736 |     if record['size'] > 1024:
 737 |         record['size'] = 1024
 738 |     
 739 |     while (read_ptr < record['size']):
 740 | 
 741 |         ATRrecord = decodeATRHeader(raw_record[read_ptr:])
 742 |         
 743 |         if ATRrecord['type'] == 0xffffffff:             # End of attributes
 744 |             break
 745 | 
 746 |         if ATRrecord['nlen'] > 0:
 747 |             bytes = raw_record[read_ptr+ATRrecord['name_off']:read_ptr+ATRrecord['name_off'] + ATRrecord['nlen']*2]
 748 |             ATRrecord['name'] = bytes.decode('utf-16').encode('utf-8')
 749 |         else:
 750 |             ATRrecord['name'] = ''
 751 | 
 752 |         if debug:
 753 |             print "Attribute type: %x Length: %d Res: %x" % (ATRrecord['type'], ATRrecord['len'], ATRrecord['res'])
 754 | 
 755 |         if ATRrecord['type'] == 0x10:                   # Standard Information
 756 |             if debug:
 757 |                 print "Stardard Information:\n++Type: %s Length: %d Resident: %s Name Len:%d Name Offset: %d" % \
 758 |                      (hex(int(ATRrecord['type'])),ATRrecord['len'],ATRrecord['res'],ATRrecord['nlen'],ATRrecord['name_off'])
 759 |             SIrecord = decodeSIAttribute(raw_record[read_ptr+ATRrecord['soff']:])
 760 |             record['si'] = SIrecord
 761 |             if debug:
 762 |                 print "++CRTime: %s\n++MTime: %s\n++ATime: %s\n++EntryTime: %s" % \
 763 |                    (SIrecord['crtime'], SIrecord['mtime'], SIrecord['atime'], SIrecord['ctime'])
 764 | 
 765 |         elif ATRrecord['type'] == 0x20:                 # Attribute list
 766 |             if debug:
 767 |                 print "Attribute list"
 768 |             if ATRrecord['res'] == 0:
 769 |                 ALrecord = decodeAttributeList(raw_record[read_ptr+ATRrecord['soff']:], record)
 770 |                 record['al'] = ALrecord
 771 |                 if debug:
 772 |                     print "Name: %s"  % (ALrecord['name'])
 773 |             else:
 774 |                 if debug:
 775 |                     print "Non-resident Attribute List?"
 776 |                 record['al'] = None
 777 | 
 778 |         elif ATRrecord['type'] == 0x30:                 # File name
 779 |             if debug: print "File name record"
 780 |             FNrecord = decodeFNAttribute(raw_record[read_ptr+ATRrecord['soff']:], record)
 781 |             if not('fn' in record):
 782 |                 record['fn'] = {}
 783 |             record['fn'] = record['fn'][record['fncnt']] = FNrecord
 784 |             if debug: print "Name: %s (%d)" % (FNrecord['name'],record['fncnt'])
 785 |             record['fncnt'] = record['fncnt'] + 1
 786 |             if FNrecord['crtime'] != 0:
 787 |                 if debug: print "\tCRTime: %s MTime: %s ATime: %s EntryTime: %s" % (FNrecord['crtime'],
 788 |                         FNrecord['mtime'], FNrecord['atime'], FNrecord['ctime'])
 789 | 
 790 |         elif ATRrecord['type'] == 0x40:                 #  Object ID
 791 |             ObjectIDRecord = decodeObjectID(raw_record[read_ptr+ATRrecord['soff']:])
 792 |             record['objid'] = ObjectIDRecord
 793 |             if debug: print "Object ID"
 794 | 
 795 |         elif ATRrecord['type'] == 0x50:                 # Security descriptor
 796 |             record['sd'] = True
 797 |             if debug: print "Security descriptor"
 798 | 
 799 |         elif ATRrecord['type'] == 0x60:                 # Volume name
 800 |             record['volname'] = True
 801 |             if debug: print "Volume name"
 802 | 
 803 |         elif ATRrecord['type'] == 0x70:                 # Volume information
 804 |             if debug: print "Volume info attribute"
 805 |             VolumeInfoRecord = decodeVolumeInfo(raw_record[read_ptr+ATRrecord['soff']:])
 806 |             record['volinfo'] = VolumeInfoRecord
 807 | 
 808 |         elif ATRrecord['type'] == 0x80:                 # Data
 809 |             DataAttribute = decodeDataAttribute(raw_record[read_ptr:])
 810 |             if DataAttribute['attrib_name'] == '':
 811 |                 record['data'] = DataAttribute
 812 |             else:
 813 |                 record['ads'][DataAttribute['attrib_name']] = DataAttribute
 814 |             record['datacnt'] = record['datacnt'] + 1
 815 |             
 816 |             if debug: print "Data attribute"
 817 | 
 818 |         elif ATRrecord['type'] == 0x90:                 # Index root
 819 |             record['indexroot'] = decodeIndexRoot(raw_record[read_ptr+ATRrecord['soff']:])
 820 |             if (operation == "DIR" or operation == "FILE"):
 821 |                 if record['indexroot']['flags'] == 1: # Child node exist
 822 |                     cOffset = record['indexroot']['off_start']
 823 |                     baseOffset = read_ptr+ATRrecord['soff']+0x10 # 0x10 is size of Attrib header
 824 |                     
 825 |                     while cOffset < record['indexroot']['off_end_alloc']:
 826 |                         cIndex = decodeIndex(raw_record[baseOffset+cOffset:])
 827 |                         if debug:
 828 |                             if (cIndex['idx_flags'] & 1):
 829 |                                 print "    + Entry has a child"
 830 |                             if (cIndex['idx_flags'] & 2):
 831 |                                 print "    + Last entry"
 832 |                         cOffset += cIndex['entry_len']
 833 |                 clistFiles = decodeIndexRecord(raw_record[read_ptr+ATRrecord['soff']+0x20:],ATRrecord['ssize']-0x20)
 834 |                 for ckey in clistFiles.keys():
 835 |                     if clistFiles[ckey]['fn_namespace'] != 2:
 836 |                         listFiles[ckey] = clistFiles[ckey]
 837 |                 
 838 |             record['indexroot'] = ATRrecord
 839 |             if debug: print "Index root"
 840 | 
 841 |         elif ATRrecord['type'] == 0xA0:                 # Index allocation
 842 |             record['indexallocation'] = ATRrecord
 843 |             if (operation == "DIR" or operation == "FILE"):
 844 |                 if ATRrecord['res'] == 1: # Non resident datas
 845 |                     parsedSize = 0
 846 |                     for clust_index, idxSz in ATRrecord['dataruns']:
 847 |                         if parsedSize < ATRrecord['allocsize']:
 848 |                             parsedSize += (idxSz * clusterSize)
 849 |                             index_offset = clust_index * clusterSize
 850 |                             if idxSz > 0x8000:
 851 |                                 # print "LIMIT!"
 852 |                                 # sys.exit()
 853 |                                 continue
 854 |                             indx_datas = getRawDatas(index_offset,idxSz * clusterSize)
 855 |                             clistFiles = getDirList(indx_datas)
 856 |                             for ckey in clistFiles.keys():
 857 |                                 listFiles[ckey] = clistFiles[ckey]
 858 |                         else:
 859 |                             idxSz = ATRrecord['allocsize'] - parsedSize
 860 |                             index_offset = clust_index * clusterSize
 861 |                             if idxSz > 0x1000000:
 862 |                                 # print "LIMIT!"
 863 |                                 # sys.exit()
 864 |                                 continue
 865 |                             indx_datas = getRawDatas(index_offset,idxSz)
 866 |                             clistFiles = getDirList(indx_datas)
 867 |                             for ckey in clistFiles.keys():
 868 |                                 listFiles[ckey] = clistFiles[ckey]
 869 |                             parsedSize += ATRrecord['allocsize'] - parsedSize
 870 | 
 871 |         elif ATRrecord['type'] == 0xB0:                 # Bitmap
 872 |             record['bitmap'] = True
 873 |             if debug: print "Bitmap"
 874 | 
 875 |         elif ATRrecord['type'] == 0xC0:                 # Reparse point
 876 |             record['reparsepoint'] = True
 877 |             target = decodeReparsePoint(raw_record[read_ptr+ATRrecord['soff']:read_ptr+ATRrecord['soff']+ATRrecord['len']])
 878 |             record['reparsepoint_dest'] = target
 879 |             if debug: print "Reparse point"
 880 | 
 881 |         elif ATRrecord['type'] == 0xD0:                 # EA Information
 882 |             # record['eainfo'] = True
 883 |             record['eainfo'] = decodeEaInfo(raw_record[read_ptr+ATRrecord['soff']:read_ptr+ATRrecord['soff']+ATRrecord['len']])
 884 |             if debug: print "EA Information"
 885 | 
 886 |         elif ATRrecord['type'] == 0xE0:                 # EA
 887 |             # record['ea'] = True
 888 |             
 889 |             if ATRrecord['res'] == 1: # is a non-resident data store ?
 890 |                 datas = getDatasFromAttribute(ATRrecord)
 891 |             else:
 892 |                 datas = raw_record[read_ptr+ATRrecord['soff']:read_ptr+ATRrecord['len']]
 893 |             record['ea'] = decodeEa(datas)
 894 |             if debug: print "EA"
 895 | 
 896 |         elif ATRrecord['type'] == 0xF0:                 # Property set
 897 |             record['propertyset'] = True
 898 |             if debug: print "Property set"
 899 | 
 900 |         elif ATRrecord['type'] == 0x100:                 # Logged utility stream
 901 |             record['loggedutility'] = True
 902 |             if debug: print "Logged utility stream"
 903 | 
 904 |         else:
 905 |             if debug: print "Found an unknown attribute"
 906 | 
 907 |         if ATRrecord['len'] > 0:
 908 |             read_ptr = read_ptr + ATRrecord['len']
 909 |         else:
 910 |             if debug: print "ATRrecord->len < 0, exiting loop"
 911 |             break
 912 |     if operation == "DIR" and not 'reparsepoint' in record:
 913 |         return listFiles
 914 |     return record
 915 | 
 916 | def init():
 917 |     global fdDisk
 918 |     global clusterSize
 919 |     global mftCluster
 920 |     global setRootFile
 921 |     global partitionOffset
 922 |     
 923 |     try:
 924 |         if setRootFile == "":
 925 |             fdDisk = open("\\\\.\\"+rootFile+":", "rb")
 926 |         else:
 927 |             fdDisk = open(setRootFile, "rb")
 928 |     except:
 929 |         print "[!] Device \\\\.\\"+rootFile+": isn't accessible"
 930 |         sys.exit()
 931 |     dump = fdDisk.read(512)
 932 |     
 933 |     if "Invalid partition table" in dump:
 934 |         # This is a MBR and not VBR
 935 |         partitionOffset = struct.unpack("L",dump[0x1c6:0x1ca])[0] * 512
 936 |         fdDisk.seek(partitionOffset)
 937 |         dump = fdDisk.read(512)
 938 | 
 939 |     if dump[0x10:0x13] != "\x00\x00\x00":
 940 |         print "BAD Sector !"
 941 |         sys.exit()
 942 |     sectorSize = struct.unpack("H",dump[0xb:0xd])[0]
 943 |     sectorPerCluster = struct.unpack("b",dump[0xd:0xe])[0]
 944 |     sectorsPerTrack = struct.unpack("H",dump[0x18:0x1a])[0]
 945 |     nbHead = struct.unpack("H",dump[0x1a:0x1c])[0]
 946 |     hiddenSectors = struct.unpack("L",dump[0x1c:0x20])[0]
 947 |     totalSectors = struct.unpack("Q",dump[0x28:0x30])[0]
 948 |     mftCluster = struct.unpack("Q",dump[0x30:0x38])[0]
 949 |     mftMirrCluster = struct.unpack("Q",dump[0x38:0x40])[0]
 950 |     clusterPerFilerecordSegment = struct.unpack("L",dump[0x40:0x44])[0]
 951 |     clusterPerIndexBuffer = struct.unpack("b",dump[0x44:0x45])[0]
 952 | 
 953 |     clusterSize = (sectorSize*sectorPerCluster)
 954 | 
 955 |     # print " + Bytes per sector : "+str(sectorSize)
 956 |     # print " + Sectors per Cluser : "+str(sectorPerCluster)
 957 |     # print " + Sector per track : "+str(sectorsPerTrack)
 958 |     # print " + Number of Head : "+str(nbHead)
 959 |     # print " + Hidden sectors : "+str(hiddenSectors)
 960 |     # print " + Number of sectors : "+str(totalSectors)
 961 |     # print " + $MFT Index : "+hex(mftCluster)
 962 | 
 963 | global mftMap
 964 | mftMap = []
 965 | 
 966 | def printDetails(parserEntry, level = 1):
 967 |     if level > 3:
 968 |         print ('  '*level)+"  - (...)"
 969 |         return
 970 |     
 971 |     if type(parserEntry) == dict:
 972 |         for key in sorted(parserEntry.keys()):
 973 |             if type(parserEntry[key]) == dict:
 974 |                 print ('  '*level)+"  - "+str(key)+" :"
 975 |                 try:
 976 |                     printDetails(parserEntry[key], level + 1)
 977 |                 except:
 978 |                     print ('  '*level)+"  - "+key+" : "+str(parserEntry[key])
 979 |             else:
 980 |                 print ('  '*level)+"  - "+key+" : "+str(parserEntry[key])
 981 |     elif type(parserEntry) == list:
 982 |         for key in sorted(parserEntry):
 983 |             if type(key) == dict:
 984 |                 print ('  '*level)+"  - "+str(key)+" :"
 985 |                 try:
 986 |                     printDetails(key, level + 1)
 987 |                 except:
 988 |                     print ('  '*level)+"  - "+key+" : "+str(key)
 989 |             else:
 990 |                 print ('  '*level)+"  - "+key+" : "+str(key)
 991 | 
 992 | def seekIndex(idx,getOffset = False):
 993 |     global fdDisk
 994 |     global mftCluster
 995 |     global clusterSize
 996 |     global mftMap
 997 |     global mftSize
 998 |     global partitionOffset
 999 |     
1000 |     if fdDisk == None:
1001 |         init()
1002 |     
1003 |     if mftMap == []:
1004 |         fdDisk.seek(partitionOffset + (mftCluster*clusterSize), os.SEEK_SET)
1005 |         dump = fdDisk.read(1024)
1006 |         mftInfos = parse_record(dump)
1007 |         mftMap = mftInfos['data']['dataruns']
1008 |         for fileChunkOffset, chunkSize in mftMap:
1009 |             mftSize += (chunkSize * clusterSize)
1010 |     idxOffset = idx * 1024
1011 |     realOffset = 0
1012 |     mft_offset = 0
1013 |     for fileChunkOffset, chunkSize in mftMap:
1014 |         if idxOffset < (mft_offset+(chunkSize*clusterSize)) and idxOffset >= mft_offset:
1015 |             realOffset += (fileChunkOffset*clusterSize) + (idxOffset-mft_offset)
1016 |             break
1017 |         mft_offset += (chunkSize*clusterSize)
1018 |     if getOffset == True:
1019 |         # print "Real Offset of 0x%x : 0x%x" % (idx,realOffset)
1020 |         return realOffset
1021 |     fdDisk.seek(partitionOffset + realOffset, os.SEEK_SET)
1022 | 
1023 | def getRawDatas(offset, size):
1024 |     global fdDisk
1025 |     global partitionOffset
1026 |     try:
1027 |         fdDisk.seek(partitionOffset + offset, os.SEEK_SET)
1028 |         if size < maxFileSize:
1029 |             dump = fdDisk.read(size)
1030 |         else:
1031 |             dump = fdDisk.read(maxFileSize)
1032 |         return dump
1033 |     except:
1034 |         return ""
1035 | 
1036 | def getEntry(idx):
1037 |     global fdDisk
1038 |     seekIndex(idx)
1039 |     dump = fdDisk.read(1024)
1040 |     return dump
1041 | 
1042 | def getRecordFromIndex(index):
1043 |     if index in mftRecords:
1044 |         return mftRecords[index]
1045 |     dump = getEntry(index)
1046 |     mftRecords[index] = parse_record(dump)
1047 |     return mftRecords[index]
1048 | 
1049 | def getNodeFromPath(path,node=5):
1050 |     path = path.lower()
1051 |     if path[-1] == "\\":
1052 |         path = path[:-1]
1053 |     if path in mftEntryFromPath:
1054 |         return mftEntryFromPath[path]
1055 |     spath = path.split("\\")
1056 |     if len(spath[0]) == 0:
1057 |         return node
1058 |     index = 0
1059 |     if spath[0][-1] == ':':
1060 |         if len(spath) < 2 or spath[1] == '':
1061 |             return 5
1062 |         else:
1063 |             index = 1
1064 |     cpath = spath[index]
1065 |     dump = getEntry(node)
1066 |     files = parse_record(dump,"DIR")
1067 |     if cpath.lower() in files:
1068 |         if len(spath) > (index+1):
1069 |             destNode = getNodeFromPath("\\".join(spath[index+1:]),files[cpath.lower()]['mft_ref'])
1070 |             if node == 5:
1071 |                 mftEntryFromPath[path] = destNode
1072 |             return destNode
1073 |         else:
1074 |             destNode = files[cpath.lower()]['mft_ref']
1075 |             if node == 5:
1076 |                 mftEntryFromPath[path] = destNode
1077 |             return destNode
1078 |     else:
1079 |         return -1
1080 | 
1081 | def printMftEntry(objfile):
1082 |     strResult = ""
1083 |     
1084 |     if 'fn' in objfile:
1085 |         file = objfile['fn']
1086 |         if 'indexroot' in objfile:
1087 |             file['file_flags'] = 0x10000000
1088 |         else:
1089 |             file['file_flags'] = 0
1090 |         file['file_size'] = file['real_fsize']
1091 |         file['mft_ref'] = objfile['recordnum']
1092 |     else:
1093 |         file = objfile
1094 |     if file['mtime'] == 'Not defined':
1095 |         file['mtime'] = "0000-00-00 00:00:00"
1096 |     if file['file_flags'] & 0x10000000:
1097 |         strResult += "<DIR> "+file['mtime'][:19]+" "+("%13d" % (file['file_size']))+" "+file['name']+" ("+str(file['mft_ref'])+")"#+" "+str(file['mft_parent_ref'])
1098 |     else:
1099 |         strResult += "      "+file['mtime'][:19]+" "+("%13d" % (file['file_size']))+" "+file['name']+" ("+str(file['mft_ref'])+")"#+" "+str(file['mft_parent_ref'])
1100 |     dump = getEntry(file['mft_ref'])
1101 |     ccfile = parse_record(dump)
1102 |     for cads in ccfile['ads']:
1103 |         strResult += "\n<ADS>                                   "+file['name']+":"+cads
1104 |     return strResult
1105 | 
1106 | def listFromNode(node):
1107 |     dump = getEntry(node)
1108 |     files = parse_record(dump,"DIR")
1109 |     delFiles = []
1110 |     if node in mftEntries:
1111 |         delFiles = mftEntries[node]
1112 |     
1113 |     if ('fn' in files) and ('reparsepoint_dest' in files):
1114 |         print "<LNK> "+files['fn']['name']+" -> "+str(files['reparsepoint_dest']['path'])
1115 |         return
1116 |     for cfile in sorted(files.keys()):
1117 |         strEntry = printMftEntry(files[cfile])
1118 |         print strEntry
1119 |     for cdelFile, cdelNode in delFiles:
1120 |         if "~" in cdelFile:
1121 |             continue
1122 |         if not cdelFile in files.keys():
1123 |             print "<DEL> "+files[cfile]['mtime'][:19]+" "+("%13d" % (files[cfile]['file_size']))+" "+cdelFile+" ("+str(cdelNode)+")"#+" "+str(files[cfile]['mft_parent_ref'])
1124 | 
1125 | def getMftParentRef(mftBlob):
1126 |     if mftBlob[:4] != "FILE":
1127 |         return -1
1128 |     coff = 0x38
1129 |     
1130 |     recordSize = struct.unpack("<L",mftBlob[0x1c:0x20])[0]
1131 |     while (coff < recordSize) and (coff < 1024):
1132 |         if mftBlob[coff:coff+4] != "\x30\x00\x00\x00":
1133 |             coff += struct.unpack("<L",mftBlob[coff+4:coff+8])[0]
1134 |             if mftBlob[coff:coff+4] == "\xFF\xFF\xFF\xFF" or coff == 0:
1135 |                 return -1
1136 |             continue
1137 |         return struct.unpack("<L",mftBlob[coff+0x18:coff+0x1c])[0]
1138 |     return -1
1139 | 
1140 | def isDeletedNode(mftBlob):
1141 |     if mftBlob[:4] != "FILE":
1142 |         return -1
1143 |     flags = struct.unpack("<H",mftBlob[0x16:0x18])[0]
1144 |     if flags == 0:
1145 |         return True
1146 |     return False
1147 | 
1148 | def getParentPath(parentNode):
1149 |     parentPath = ""
1150 |     parentList = []
1151 |     while parentNode != -1:
1152 |         if parentNode == 5:
1153 |             parentPath = "c:\\"+parentPath
1154 |             break
1155 |         if parentNode in parentList:
1156 |             parentPath = "...\\"+parentPath
1157 |             break
1158 |         parentList.append(parentNode)
1159 |         file = getRecordFromIndex(parentNode)
1160 |         if 'fn' in file:
1161 |             parentPath = file['fn']['name']+"\\"+parentPath
1162 |             parentNode = file['fn']['par_ref']
1163 |         else:
1164 |             parentPath = "...\\"+parentPath
1165 |             break
1166 |     return parentPath
1167 | 
1168 | def getDeletedFiles():
1169 |     global clusterSize
1170 |     
1171 |     seekIndex(0)
1172 |     sizeOfChunkCopy = 0x1000000
1173 |     
1174 |     globOffset = 0
1175 |     seekOffset = 0
1176 |     
1177 |     for hdOffset, hdSize in mftMap:
1178 |         hdSize = hdSize * clusterSize
1179 |         hdOffset = hdOffset * clusterSize
1180 |         coffset = 0
1181 |         seekIndex(globOffset/1024)
1182 |         while (coffset+sizeOfChunkCopy) < (hdSize):
1183 |             # print "seekOffset = %x" % seekOffset
1184 |             seekIndex((globOffset+coffset)/1024)
1185 |             dump = fdDisk.read(sizeOfChunkCopy)
1186 |             ccoffset = 0
1187 |             while ccoffset < len(dump):
1188 |                 if isDeletedNode(dump[ccoffset:ccoffset+1024]):
1189 |                     cobj = parse_record(dump[ccoffset:ccoffset+1024])
1190 |                     if 'fn' in cobj:
1191 |                         strEntry = printMftEntry(cobj)
1192 |                         print strEntry
1193 |                 ccoffset += 1024
1194 |                 seekOffset += 1024
1195 |             coffset += len(dump)
1196 |         if coffset < (hdSize):
1197 |             seekIndex((globOffset+coffset)/1024)
1198 |             dump = fdDisk.read(hdSize-coffset)
1199 |             ccoffset = 0
1200 |             while ccoffset < len(dump):
1201 |                 if isDeletedNode(dump[ccoffset:ccoffset+1024]):
1202 |                     cobj = parse_record(dump[ccoffset:ccoffset+1024])
1203 |                     if 'fn' in cobj:
1204 |                         strEntry = printMftEntry(cobj)
1205 |                         print strEntry
1206 |                 ccoffset += 1024
1207 |                 seekOffset += 1024
1208 |             coffset += len(dump)
1209 |         globOffset += hdSize
1210 |     return mftEntries
1211 | 
1212 | def getMftList(node):
1213 |     global clusterSize
1214 |     
1215 |     seekIndex(0)
1216 |     sizeOfChunkCopy = 0x1000000
1217 |     
1218 |     globOffset = 0
1219 |     seekOffset = 0
1220 |     
1221 |     if not node in mftEntries:
1222 |         mftEntries[node] = []
1223 |     
1224 |     for hdOffset, hdSize in mftMap:
1225 |         hdSize = hdSize * clusterSize
1226 |         hdOffset = hdOffset * clusterSize
1227 |         coffset = 0
1228 |         seekIndex(globOffset/1024)
1229 |         while (coffset+sizeOfChunkCopy) < (hdSize):
1230 |             seekIndex((globOffset+coffset)/1024)
1231 |             dump = fdDisk.read(sizeOfChunkCopy)
1232 |             ccoffset = 0
1233 |             while ccoffset < len(dump):
1234 |                 if getMftParentRef(dump[ccoffset:ccoffset+1024]) == node:
1235 |                     cobj = parse_record(dump[ccoffset:ccoffset+1024])
1236 |                     if 'fn' in cobj:
1237 |                         mftEntries[node].append([cobj['fn']['name'].lower(), ((globOffset+coffset+ccoffset)/1024)])
1238 |                 ccoffset += 1024
1239 |                 seekOffset += 1024
1240 |             coffset += len(dump)
1241 |         if coffset < (hdSize):
1242 |             seekIndex((globOffset+coffset)/1024)
1243 |             dump = fdDisk.read(hdSize-coffset)
1244 |             ccoffset = 0
1245 |             while ccoffset < len(dump):
1246 |                 if getMftParentRef(dump[ccoffset:ccoffset+1024]) == node:
1247 |                     cobj = parse_record(dump[ccoffset:ccoffset+1024])
1248 |                     if 'fn' in cobj:
1249 |                         mftEntries[node].append([cobj['fn']['name'].lower(), ((globOffset+coffset+ccoffset)/1024)])
1250 |                 ccoffset += 1024
1251 |                 seekOffset += 1024
1252 |             coffset += len(dump)
1253 |         globOffset += hdSize
1254 |     return mftEntries
1255 | 
1256 | def listFromPath(path):
1257 |     if not path in mftEntryFromPath:
1258 |         node = getNodeFromPath(path)
1259 |         mftEntryFromPath[path] = node
1260 |     else:
1261 |         node = mftEntryFromPath[path]
1262 |     if node <= 0:
1263 |         print path+" is not accessible"
1264 |         sys.exit()
1265 |     listFromNode(node)
1266 | 
1267 | def dumpFile(node, outputFile, ads = ""):
1268 |     global maxFileSize
1269 |     seekIndex(node)
1270 |     dump = fdDisk.read(1024)
1271 |     fileInfos = parse_record(dump)
1272 |     if ads == "":
1273 |         if 'data' in fileInfos:
1274 |             dump = getDatasFromAttribute(fileInfos['data'])
1275 |             if 'compress_unit_size' in fileInfos['data'] and fileInfos['data']['compress_unit_size'] > 0:
1276 |                 dump = process_comp_stream(dump)[:fileInfos['data']['init_size']]
1277 |             try:
1278 |                 destFile = open(outputFile,"wb")
1279 |             except:
1280 |                 print outputFile+" not exist :-("
1281 |                 sys.exit()
1282 |             destFile.write(dump)
1283 |             destFile.close()
1284 |             print str(len(dump))+" write in "+outputFile
1285 |         else:
1286 |             if fileInfos['fn']['alloc_fsize'] == 0:
1287 |                 print "Size of the file is 0."
1288 |             else:
1289 |                 print "[!] No datas accessible"
1290 |     else:
1291 |         if 'ads' in fileInfos:
1292 |             dump = getDatasFromAttribute(fileInfos['ads'][ads])
1293 |             try:
1294 |                 destFile = open(outputFile,"wb")
1295 |             except:
1296 |                 print outputFile+" not exist :-("
1297 |                 sys.exit()
1298 |             destFile.write(dump)
1299 |             destFile.close()
1300 |             print str(len(dump))+" write in "+outputFile
1301 | 
1302 | def getDatasFromAttribute(fn_attrib):
1303 |     dump = ""
1304 |     
1305 |     if 'dataruns' in fn_attrib:
1306 |         copySize = 0
1307 |         for offset, size in fn_attrib['dataruns']:
1308 |             fdDisk.seek(partitionOffset + (offset*clusterSize), os.SEEK_SET)
1309 |             if maxFileSize < (copySize + (size * clusterSize)):
1310 |                 dump += fdDisk.read(maxFileSize-copySize)
1311 |                 break
1312 |             dump += fdDisk.read(size * clusterSize)
1313 |             copySize += size * clusterSize
1314 |     elif 'attrib_value' in fn_attrib:
1315 |         dump = fn_attrib['attrib_value']
1316 |     
1317 |     return dump
1318 | 
1319 | curr_opt = 1
1320 | 
1321 | while (curr_opt < len(sys.argv)) and (sys.argv[curr_opt][0] == '-'):
1322 |     if (sys.argv[curr_opt] == '-ls'):
1323 |         curr_opt += 1
1324 |         if len(sys.argv[curr_opt]) < 3:
1325 |             print sys.argv[curr_opt]+" isn't a correct argument"
1326 |             sys.exit()
1327 |         if sys.argv[curr_opt][1] != ":":
1328 |             print sys.argv[curr_opt]+" isn't a correct argument"
1329 |             sys.exit()
1330 |         rootFile = sys.argv[curr_opt][0]
1331 |         if sys.argv[curr_opt][2] == "\\":
1332 |             listFromPath(sys.argv[curr_opt].lower())
1333 |         else:
1334 |             try:
1335 |                 node = int(sys.argv[curr_opt][2:],0)
1336 |             except:
1337 |                 print sys.argv[curr_opt]+" isn't a correct argument"
1338 |                 sys.exit()
1339 |             listFromNode(node)
1340 |     elif (sys.argv[curr_opt] == '-lsd'):
1341 |         curr_opt += 1
1342 |         if len(sys.argv[curr_opt]) < 3:
1343 |             print sys.argv[curr_opt]+" isn't a correct argument"
1344 |             sys.exit()
1345 |         if sys.argv[curr_opt][1] != ":":
1346 |             print sys.argv[curr_opt]+" isn't a correct argument"
1347 |             sys.exit()
1348 |         rootFile = sys.argv[curr_opt][0]
1349 |         if sys.argv[curr_opt][2] == "\\":
1350 |             prevNode = getNodeFromPath(sys.argv[curr_opt].lower())
1351 |             getMftList(prevNode)
1352 |             listFromPath(sys.argv[curr_opt].lower())
1353 |         else:
1354 |             try:
1355 |                 node = int(sys.argv[curr_opt][2:],0)
1356 |             except:
1357 |                 print sys.argv[curr_opt]+" isn't a correct argument"
1358 |                 sys.exit()
1359 |             getMftList(node)
1360 |             listFromNode(node)
1361 |     elif (sys.argv[curr_opt] == '-v'):
1362 |         verbose = 1
1363 |     elif (sys.argv[curr_opt] == '-f'):
1364 |         curr_opt += 1
1365 |         setRootFile = sys.argv[curr_opt]
1366 |     elif (sys.argv[curr_opt] == '-indexOffset'):
1367 |         curr_opt += 1
1368 |         if sys.argv[curr_opt][1] != ":":
1369 |             print sys.argv[curr_opt]+" isn't a correct argument"
1370 |             sys.exit()
1371 |         rootFile = sys.argv[curr_opt][0]
1372 |         offset = seekIndex(int(sys.argv[curr_opt].split(":")[1],0),True)
1373 |         print "MFT node offset : 0x%x" % offset
1374 |         fdDisk.seek(partitionOffset + offset, os.SEEK_SET)
1375 |         nodeDump = fdDisk.read(1024)
1376 |         hexprint(nodeDump)
1377 |         printDetails(getRecordFromIndex(int(sys.argv[curr_opt].split(":")[1],0)))
1378 |         sys.exit()
1379 |     elif (sys.argv[curr_opt] == '-deleted'):
1380 |         curr_opt += 1
1381 |         if sys.argv[curr_opt][1] != ":":
1382 |             print sys.argv[curr_opt]+" isn't a correct argument"
1383 |             sys.exit()
1384 |         rootFile = sys.argv[curr_opt][0]
1385 |         getDeletedFiles()
1386 |     elif (sys.argv[curr_opt] == '-sz'):
1387 |         curr_opt += 1
1388 |         maxFileSize = int(sys.argv[curr_opt],0)
1389 |     elif (sys.argv[curr_opt] == '-getFile'):
1390 |         curr_opt += 1
1391 |         if len(sys.argv) < 4:
1392 |             print "Too few arguments"
1393 |             sys.exit()
1394 |         if len(sys.argv[curr_opt]) < 3:
1395 |             print sys.argv[curr_opt]+" isn't a correct argument"
1396 |             sys.exit()
1397 |         if sys.argv[curr_opt][1] != ":":
1398 |             print sys.argv[curr_opt]+" isn't a correct argument"
1399 |             sys.exit()
1400 |         rootFile = sys.argv[curr_opt][0]
1401 |         fileNode = sys.argv[curr_opt][2:]
1402 |         if ":" in fileNode:
1403 |             dumpFile(int(fileNode.split(":")[0],0),sys.argv[curr_opt+1],fileNode.split(":")[1])
1404 |         else:
1405 |             dumpFile(int(fileNode,0),sys.argv[curr_opt+1])
1406 |     curr_opt += 1
1407 | 
1408 | 
1409 | 


--------------------------------------------------------------------------------