├── .gitignore
├── CHANGELOG
├── CONTRIBUTORS
├── LICENSE
├── README.md
├── examples
    ├── indxparse
    │   └── indxparse.py
    ├── inspect_directory
    │   └── inspect_directory.py
    ├── inspect_file
    │   └── inspect_file.py
    ├── inspect_record
    │   └── inspect_record.py
    ├── inspect_vbr
    │   └── inspect_vbr.py
    ├── mount
    │   ├── mount.py
    │   ├── readme.md
    │   └── requirements.txt
    └── parse_usnjrnl
    │   └── parse_usnjrnl.py
├── ntfs
    ├── BinaryParser.py
    ├── FileMap.py
    ├── Progress.py
    ├── SortedCollection.py
    ├── __init__.py
    ├── filesystem
    │   └── __init__.py
    ├── logfile
    │   └── __init__.py
    ├── mft
    │   ├── .MFT.py.swp
    │   ├── MFT.py
    │   └── __init__.py
    ├── secure
    │   ├── SDS.py
    │   └── __init__.py
    ├── usnjrnl
    │   └── __init__.py
    └── volume
    │   └── __init__.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | 


--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | python-ntfs
2 | ===========
3 | 
4 | Open source Python library for NTFS analysis
5 | 


--------------------------------------------------------------------------------
/examples/indxparse/indxparse.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Clone of INDXParse.py that processes an entire file system.
  3 | """
  4 | import argparse
  5 | from datetime import datetime
  6 | import logging
  7 | 
  8 | from ntfs.volume import FlatVolume
  9 | from ntfs.BinaryParser import Mmap
 10 | from ntfs.filesystem import NTFSFilesystem
 11 | from ntfs.mft.MFT import AttributeNotFoundError
 12 | from ntfs.mft.MFT import ATTR_TYPE
 13 | from ntfs.mft.MFT import INDEX_ALLOCATION
 14 | from ntfs.mft.MFT import INDEX_ROOT
 15 | 
 16 | 
 17 | g_logger = logging.getLogger("ntfs.examples.indxparse")
 18 | 
 19 | 
 20 | class InvalidArgumentError(Exception):
 21 |     pass
 22 | 
 23 | 
 24 | def get_directory_index_active_entries(fs, directory):
 25 |     """
 26 |     get the active MFT_INDEX_ENTRYs from a directory's
 27 |     INDEX_ROOT and INDEX_ALLOCATION attributes
 28 |     """
 29 |     if not directory.is_directory():
 30 |         raise InvalidArgumentError()
 31 | 
 32 |     # sorry, reaching
 33 |     record = directory._record
 34 | 
 35 |     ret = []
 36 | 
 37 |     try:
 38 |         indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION)
 39 |         indx_alloc = INDEX_ALLOCATION(fs.get_attribute_data(indx_alloc_attr), 0)
 40 |         for block in indx_alloc.blocks():
 41 |             for entry in block.index().entries():
 42 |                 ret.append(entry)
 43 |     except AttributeNotFoundError:
 44 |         pass
 45 | 
 46 |     try:
 47 |         indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT)
 48 |         indx_root = INDEX_ROOT(fs.get_attribute_data(indx_root_attr), 0)
 49 |         for entry in indx_root.index().entries():
 50 |             ret.append(entry)
 51 |     except AttributeNotFoundError:
 52 |         pass
 53 | 
 54 |     return ret
 55 | 
 56 | 
 57 | def get_directory_index_inactive_entries(fs, directory):
 58 |     """
 59 |     get the inactive (slack) MFT_INDEX_ENTRYs from a directory's
 60 |     INDEX_ROOT and INDEX_ALLOCATION attributes
 61 |     """
 62 |     if not directory.is_directory():
 63 |         raise InvalidArgumentError()
 64 | 
 65 |     # sorry, reaching
 66 |     record = directory._record
 67 | 
 68 |     ret = []
 69 | 
 70 |     try:
 71 |         indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION)
 72 |         indx_alloc = INDEX_ALLOCATION(fs.get_attribute_data(indx_alloc_attr), 0)
 73 |         for block in indx_alloc.blocks():
 74 |             for entry in block.index().slack_entries():
 75 |                 ret.append(entry)
 76 |     except AttributeNotFoundError:
 77 |         pass
 78 | 
 79 |     try:
 80 |         indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT)
 81 |         indx_root = INDEX_ROOT(fs.get_attribute_data(indx_root_attr), 0)
 82 |         for entry in indx_root.index().slack_entries():
 83 |             ret.append(entry)
 84 |     except AttributeNotFoundError:
 85 |         pass
 86 | 
 87 |     return ret
 88 | 
 89 | 
 90 | def make_dump_directory_indices_visitor(formatter):
 91 |     """
 92 |     `formatter` is a function that accepts a dict, and returns a string.
 93 |     the string is dumped via print().
 94 |     the schema for the dict is:
 95 |       active: bool
 96 |       path: str
 97 |       entry: MFT_INDEX_ENTRY
 98 | 
 99 |     this function returns a function that applies the format to the
100 |       given FileSystem and Directory and dumps it out.
101 |     """
102 |     def dump_directory_indices_visitor(fs, directory):
103 |         for e in get_directory_index_active_entries(fs, directory):
104 |             try:
105 |                 print(formatter({
106 |                     "active": True,
107 |                     "path": directory.get_full_path(),
108 |                     "entry": e}))
109 |             except Exception as e:
110 |                 g_logger.warning("Failed to output entry: %s", e)
111 |         for e in get_directory_index_inactive_entries(fs, directory):
112 |             try:
113 |                 print(formatter({
114 |                     "active": False,
115 |                     "path": directory.get_full_path(),
116 |                     "entry": e}))
117 |             except Exception as e:
118 |                 g_logger.warning("Failed to output entry: %s", e)
119 |     return dump_directory_indices_visitor
120 | 
121 | 
122 | def walk_directories(fs, directory, visitor):
123 |     """
124 |     `visitor` is a function that accepts two parameters: a FileSystem
125 |       and a Directory
126 | 
127 |     this function applies the function `visitor` to each directory
128 |       in the file system recursively.
129 |     """
130 |     visitor(fs, directory)
131 |     for d in directory.get_directories():
132 |         walk_directories(fs, d, visitor)
133 | 
134 | 
135 | def safe_date(f):
136 |     try:
137 |         return f()
138 |     except ValueError:
139 |         return datetime(1970, 1, 1, 0, 0, 0)
140 | 
141 | 
142 | def csv_directory_index_formatter(e):
143 |     entry = e["entry"].filename_information()
144 |     fn = entry.filename()
145 |     f = (u"{status},{path},{filename},{physical_size},{logical_size},{mtime},"
146 |          u"{atime},{ctime},{crtime}")
147 |     if e["active"]:
148 |         status = "active"
149 |     else:
150 |         status = "slack"
151 | 
152 |     return f.format(
153 |         status=status,
154 |         path=e["path"],
155 |         filename=entry.filename(),
156 |         physical_size=entry.physical_size(),
157 |         logical_size=entry.logical_size(),
158 |         mtime=safe_date(entry.modified_time),
159 |         atime=safe_date(entry.accessed_time),
160 |         ctime=safe_date(entry.changed_time),
161 |         crtime=safe_date(entry.created_time))
162 | 
163 | 
164 | def bodyfile_directory_index_formatter(e):
165 |     # TODO
166 |     pass
167 | 
168 | 
169 | def main(image_filename, volume_offset, path):
170 | 
171 |     with Mmap(image_filename) as buf:
172 |         v = FlatVolume(buf, volume_offset)
173 |         fs = NTFSFilesystem(v)
174 |         root = fs.get_root_directory()
175 | 
176 |         if path == "/":
177 |             entry = root
178 |         else:
179 |             entry = root.get_path_entry(path)
180 | 
181 |         v = make_dump_directory_indices_visitor(csv_directory_index_formatter)
182 |         walk_directories(fs, entry, v)
183 | 
184 | if __name__ == '__main__':
185 | 
186 |     parser = argparse.ArgumentParser()
187 |     parser.add_argument('img_file', help='Path to image file')
188 |     parser.add_argument('volume_offset', help='Offset in bytes '
189 |                                               'to Boot Sector Section',
190 |                         type=int)
191 |     parser.add_argument('path', help='Path')
192 |     parser.add_argument('-d', '--debug', default=False, action='store_true')
193 |     args = parser.parse_args()
194 | 
195 |     if args.debug:
196 |         logging.basicConfig(level=logging.DEBUG)
197 |     logging.getLogger("ntfs.mft").setLevel(logging.INFO)
198 | 
199 |     main(args.img_file, args.volume_offset, args.path)
200 | 


--------------------------------------------------------------------------------
/examples/inspect_directory/inspect_directory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dump the directory index for a directory.
 3 | """
 4 | import logging
 5 | 
 6 | from ntfs.volume import FlatVolume
 7 | from ntfs.BinaryParser import Mmap
 8 | from ntfs.filesystem import NTFSFilesystem
 9 | from ntfs.mft.MFT import AttributeNotFoundError
10 | from ntfs.mft.MFT import ATTR_TYPE
11 | from ntfs.mft.MFT import MREF
12 | from ntfs.mft.MFT import INDEX_ALLOCATION
13 | from ntfs.mft.MFT import INDEX_ROOT
14 | 
15 | 
16 | g_logger = logging.getLogger("ntfs.examples.inspect_directory")
17 | 
18 | 
19 | def main(image_filename, volume_offset, path):
20 |     logging.basicConfig(level=logging.DEBUG)
21 |     #logging.getLogger("ntfs.mft").setLevel(logging.INFO)
22 | 
23 |     with Mmap(image_filename) as buf:
24 |         v = FlatVolume(buf, volume_offset)
25 |         fs = NTFSFilesystem(v)
26 |         root = fs.get_root_directory()
27 | 
28 |         if path == "/":
29 |             entry = root
30 |         else:
31 |             entry = root.get_path_entry(path)
32 | 
33 |         if not entry.is_directory():
34 |             g_logger.error("not a directory")
35 |             return
36 | 
37 |         # sorry, reaching
38 |         record = entry._record
39 | 
40 |         entries = {}
41 |         try:
42 |             indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION)
43 |             indx_alloc = INDEX_ALLOCATION(fs.get_attribute_data(indx_alloc_attr), 0)
44 |             g_logger.debug("INDEX_ALLOCATION len: %s", hex(len(indx_alloc)))
45 |             g_logger.debug("alloc:\n%s", indx_alloc.get_all_string(indent=2))
46 |             indx = indx_alloc
47 | 
48 |             g_logger.info("found:")
49 |             for block in indx.blocks():
50 |                 for entry in block.index().entries():
51 |                     ref = MREF(entry.header().mft_reference())
52 |                     entries[ref] = entry.filename_information().filename()
53 | 
54 |         except AttributeNotFoundError:
55 |             indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT)
56 |             indx_root = INDEX_ROOT(fs.get_attribute_data(indx_root_attr), 0)
57 |             g_logger.debug("INDEX_ROOT len: %s", hex(len(indx_root)))
58 |             g_logger.debug("root:\n%s", indx_root.get_all_string(indent=2))
59 |             indx = indx_root
60 | 
61 |             g_logger.info("found:")
62 |             for entry in indx.index().entries():
63 |                 ref = MREF(entry.header().mft_reference())
64 |                 entries[ref] = entry.filename_information().filename()
65 | 
66 |         for k, v in entries.iteritems():
67 |             g_logger.info("  - %s", v)
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     import sys
72 |     main(sys.argv[1], int(sys.argv[2]), sys.argv[3])
73 | 
74 | 


--------------------------------------------------------------------------------
/examples/inspect_file/inspect_file.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dump stuff related to a single record.
 3 | """
 4 | import logging
 5 | 
 6 | from ntfs.volume import FlatVolume
 7 | from ntfs.BinaryParser import Mmap
 8 | from ntfs.filesystem import NTFSFilesystem
 9 | from ntfs.mft.MFT import AttributeNotFoundError
10 | from ntfs.mft.MFT import ATTR_TYPE
11 | from ntfs.mft.MFT import MREF
12 | from ntfs.mft.MFT import INDEX_ALLOCATION
13 | from ntfs.mft.MFT import INDEX_ROOT
14 | 
15 | 
16 | g_logger = logging.getLogger("ntfs.examples.inspect_record")
17 | 
18 | 
19 | def main(image_filename, volume_offset, record_number):
20 |     logging.basicConfig(level=logging.DEBUG)
21 |     #logging.getLogger("ntfs.mft").setLevel(logging.INFO)
22 | 
23 |     with Mmap(image_filename) as buf:
24 |         v = FlatVolume(buf, volume_offset)
25 |         fs = NTFSFilesystem(v)
26 |         record = fs.get_record(record_number)
27 |         print(record.get_all_string())
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     import sys
32 |     main(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]))
33 | 
34 | 


--------------------------------------------------------------------------------
/examples/inspect_record/inspect_record.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dump stuff related to a single record.
 3 | """
 4 | import logging
 5 | 
 6 | from ntfs.BinaryParser import Mmap
 7 | from ntfs.mft.MFT import MFTRecord
 8 | from ntfs.mft.MFT import Attribute
 9 | from ntfs.mft.MFT import ATTR_TYPE
10 | from ntfs.mft.MFT import StandardInformation
11 | from ntfs.mft.MFT import FilenameAttribute
12 | 
13 | 
14 | g_logger = logging.getLogger("ntfs.examples.inspect_record")
15 | 
16 | 
17 | def main(record_filename):
18 |     logging.basicConfig(level=logging.DEBUG)
19 |     #logging.getLogger("ntfs.mft").setLevel(logging.INFO)
20 | 
21 |     with Mmap(record_filename) as buf:
22 |         record = MFTRecord(buf, 0, None)
23 |         print("=== MFT Record Header")
24 |         print(record.get_all_string())
25 | 
26 |         for attribute in record.attributes():
27 |             print("=== Attribute Header (type: {:s}) at offset {:s}".format(
28 |                 Attribute.TYPES[attribute.type()],
29 |                 hex(attribute.offset())))
30 |             print(attribute.get_all_string())
31 | 
32 |             if attribute.type() == ATTR_TYPE.STANDARD_INFORMATION:
33 |                 print("=== STANDARD INFORMATION value")
34 |                 si = StandardInformation(attribute.value(), 0, None)
35 |                 print(si.get_all_string())
36 | 
37 |             elif attribute.type() == ATTR_TYPE.FILENAME_INFORMATION:
38 |                 print("=== FILENAME INFORMATION value")
39 |                 fn = FilenameAttribute(attribute.value(), 0, None)
40 |                 print(fn.get_all_string())
41 | 
42 | if __name__ == '__main__':
43 |     import sys
44 |     main(sys.argv[1])
45 | 
46 | 


--------------------------------------------------------------------------------
/examples/inspect_vbr/inspect_vbr.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dump the NTFS VBR for a volume.
 3 | """
 4 | import logging
 5 | 
 6 | from ntfs.volume import FlatVolume
 7 | from ntfs.BinaryParser import Mmap
 8 | from ntfs.filesystem import NTFSVBR
 9 | 
10 | 
11 | g_logger = logging.getLogger("ntfs.examples.inspect_vbr")
12 | 
13 | 
14 | def main(image_filename, volume_offset):
15 |     logging.basicConfig(level=logging.DEBUG)
16 |     logging.getLogger("ntfs.mft").setLevel(logging.INFO)
17 | 
18 |     with Mmap(image_filename) as buf:
19 |         v = FlatVolume(buf, volume_offset)
20 |         vbr = NTFSVBR(v)
21 |         print(vbr.get_all_string())
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     import sys
26 |     main(sys.argv[1], int(sys.argv[2]))
27 | 
28 | 


--------------------------------------------------------------------------------
/examples/mount/mount.py:
--------------------------------------------------------------------------------
  1 | from __future__ import with_statement
  2 | 
  3 | import os
  4 | import sys
  5 | import stat
  6 | import errno
  7 | import inspect
  8 | import logging
  9 | import calendar
 10 | 
 11 | from fuse import FUSE, FuseOSError, Operations, fuse_get_context
 12 | 
 13 | from ntfs.filesystem import NTFSFilesystem
 14 | from ntfs.filesystem import ChildNotFoundError
 15 | 
 16 | PERMISSION_ALL_READ = int("444", 8)
 17 | 
 18 | g_logger = logging.getLogger("ntfs.examples.mount")
 19 | 
 20 | 
 21 | def unixtimestamp(ts):
 22 |     """
 23 |     unixtimestamp converts a datetime.datetime to a UNIX timestamp.
 24 |     @type ts: datetime.datetime
 25 |     @rtype: int
 26 |     """
 27 |     return calendar.timegm(ts.utctimetuple())
 28 | 
 29 | 
 30 | def log(func):
 31 |     """
 32 |     log is a decorator that logs the a function call with its
 33 |       parameters and return value.
 34 |     """
 35 |     def inner(*args, **kwargs):
 36 |         func_name = inspect.stack()[3][3]
 37 |         if func_name == "_wrapper":
 38 |             func_name = inspect.stack()[2][3]
 39 |         (uid, gid, pid) = fuse_get_context()
 40 |         pre = "(%s: UID=%d GID=%d PID=%d ARGS=(%s) KWARGS=(%s))" % (
 41 |             func_name, uid, gid, pid,
 42 |             ", ".join(map(str, list(args)[1:])), str(**kwargs))
 43 |         try:
 44 |             g_logger.debug("log: call: %s",  pre)
 45 |             ret = func(*args, **kwargs)
 46 |             g_logger.debug("log: result: %s", ret)
 47 |             return ret
 48 |         except Exception as e:
 49 |             g_logger.warning("log: exception: %s", str(e))
 50 |             raise e
 51 |     return inner
 52 | 
 53 | 
 54 | class NTFSFuseOperations(Operations):
 55 |     def __init__(self, filesystem):
 56 |         self._fs = filesystem
 57 |         self._opened_files = {}
 58 | 
 59 |     def _get_path_entry(self, path):
 60 |         root = self._fs.get_root_directory()
 61 |         if path == "/":
 62 |             g_logger.debug("asking for root")
 63 |             entry = root
 64 |         else:
 65 |             _, __, rest = path.partition("/")
 66 |             g_logger.debug("asking for: %s", rest)
 67 |             try:
 68 |                 entry = root.get_path_entry(rest)
 69 |             except ChildNotFoundError:
 70 |                 raise FuseOSError(errno.ENOENT)
 71 |         return entry
 72 | 
 73 |     # Filesystem methods
 74 |     # ==================
 75 |     @log
 76 |     def getattr(self, path, fh=None):
 77 |         (uid, gid, pid) = fuse_get_context()
 78 |         entry = self._get_path_entry(path)
 79 | 
 80 |         if entry.is_directory():
 81 |             mode = (stat.S_IFDIR | PERMISSION_ALL_READ)
 82 |             nlink = 2
 83 |         else:
 84 |             mode = (stat.S_IFREG | PERMISSION_ALL_READ)
 85 |             nlink = 1
 86 | 
 87 |         return {
 88 |             "st_atime": unixtimestamp(entry.get_si_accessed_timestamp()),
 89 |             "st_ctime": unixtimestamp(entry.get_si_changed_timestamp()),
 90 |             "st_crtime": unixtimestamp(entry.get_si_created_timestamp()),
 91 |             "st_mtime": unixtimestamp(entry.get_si_modified_timestamp()),
 92 |             "st_size": entry.get_size(),
 93 |             "st_uid": uid,
 94 |             "st_gid": gid,
 95 |             "st_mode": mode,
 96 |             "st_nlink": nlink,
 97 |         }
 98 | 
 99 |     @log
100 |     def readdir(self, path, fh):
101 |         dirents = ['.', '..']
102 |         entry = self._get_path_entry(path)
103 | 
104 |         dirents.extend(map(lambda r: r.get_name(), entry.get_children()))
105 |         return dirents
106 | 
107 |     @log
108 |     def readlink(self, path):
109 |         return path
110 | 
111 |     @log
112 |     def statfs(self, path):
113 |         return dict((key, 0) for key in ('f_bavail', 'f_bfree',
114 |                                          'f_blocks', 'f_bsize', 'f_favail',
115 |                                          'f_ffree', 'f_files', 'f_flag',
116 |                                          'f_frsize', 'f_namemax'))
117 | 
118 |     @log
119 |     def chmod(self, path, mode):
120 |         return errno.EROFS
121 | 
122 |     @log
123 |     def chown(self, path, uid, gid):
124 |         return errno.EROFS
125 | 
126 |     @log
127 |     def mknod(self, path, mode, dev):
128 |         return errno.EROFS
129 | 
130 |     @log
131 |     def rmdir(self, path):
132 |         return errno.EROFS
133 | 
134 |     @log
135 |     def mkdir(self, path, mode):
136 |         return errno.EROFS
137 | 
138 |     @log
139 |     def unlink(self, path):
140 |         return errno.EROFS
141 | 
142 |     @log
143 |     def symlink(self, target, name):
144 |         return errno.EROFS
145 | 
146 |     @log
147 |     def rename(self, old, new):
148 |         return errno.EROFS
149 | 
150 |     @log
151 |     def link(self, target, name):
152 |         return errno.EROFS
153 | 
154 |     @log
155 |     def utimens(self, path, times=None):
156 |         return errno.EROFS
157 | 
158 |     # File methods
159 |     # ============
160 | 
161 |     def _get_available_fh(self):
162 |         """
163 |         _get_available_fh returns an unused fh
164 |         The caller must be careful to handle race conditions.
165 |         @rtype: int
166 |         """
167 |         for i in xrange(65534):
168 |             if i not in self._opened_files:
169 |                 return i
170 | 
171 |     @log
172 |     def open(self, path, flags):
173 |         if flags & os.O_WRONLY > 0:
174 |             return errno.EROFS
175 |         if flags & os.O_RDWR > 0:
176 |             return errno.EROFS
177 | 
178 |         entry = self._get_path_entry(path)
179 | 
180 |         # TODO(wb): race here on fh used/unused
181 |         fh = self._get_available_fh()
182 |         self._opened_files[fh] = entry
183 | 
184 |         return fh
185 | 
186 |     @log
187 |     def read(self, path, length, offset, fh):
188 |         entry = self._opened_files[fh]
189 |         return entry.read(offset, length)
190 | 
191 |     @log
192 |     def flush(self, path, fh):
193 |         return ""
194 | 
195 |     @log
196 |     def release(self, path, fh):
197 |         del self._opened_files[fh]
198 | 
199 |     @log
200 |     def create(self, path, mode, fi=None):
201 |         return errno.EROFS
202 | 
203 |     @log
204 |     def write(self, path, buf, offset, fh):
205 |         return errno.EROFS
206 | 
207 |     @log
208 |     def truncate(self, path, length, fh=None):
209 |         return errno.EROFS
210 | 
211 |     @log
212 |     def fsync(self, path, fdatasync, fh):
213 |         return errno.EPERM
214 | 
215 | 
216 | def main(image_filename, volume_offset, mountpoint):
217 |     from ntfs.volume import FlatVolume
218 |     from ntfs.BinaryParser import Mmap
219 | 
220 |     logging.basicConfig(level=logging.DEBUG)
221 |     logging.getLogger("ntfs.mft").setLevel(logging.INFO)
222 | 
223 |     with Mmap(image_filename) as buf:
224 |         v = FlatVolume(buf, volume_offset)
225 |         fs = NTFSFilesystem(v)
226 |         handler = NTFSFuseOperations(fs)
227 |         FUSE(handler, mountpoint, foreground=True)
228 | 
229 | 
230 | if __name__ == '__main__':
231 |     import sys
232 |     main(sys.argv[1], int(sys.argv[2]), sys.argv[3])
233 | 
234 | 


--------------------------------------------------------------------------------
/examples/mount/readme.md:
--------------------------------------------------------------------------------
1 | This is a read-only NTFS FUSE driver written in pure Python
2 | 


--------------------------------------------------------------------------------
/examples/mount/requirements.txt:
--------------------------------------------------------------------------------
1 | fusepy
2 | 


--------------------------------------------------------------------------------
/examples/parse_usnjrnl/parse_usnjrnl.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Parse a UsnJrnl:$J object into a CSV file.
  3 | 
  4 | inspired by parser-usnjrnl by Seth Nazarro (http://code.google.com/p/parser-usnjrnl/)
  5 | """
  6 | import logging
  7 | import sys, struct, datetime
  8 | 
  9 | class BadRecordException(Exception):
 10 |     def __init__(self):
 11 |         pass
 12 | 
 13 | 
 14 | flag_def = {
 15 |     0x00:       " ",
 16 |     0x01:       "Data in one or more named data streams for the file was overwritten.",
 17 |     0x02:       "The file or directory was added to.",
 18 |     0x04:       "The file or directory was truncated.",
 19 |     0x10:       "Data in one or more named data streams for the file was overwritten.",
 20 |     0x20:       "One or more named data streams for the file were added to.",
 21 |     0x40:       "One or more named data streams for the file was truncated.",
 22 |     0x100:      "The file or directory was created for the first time.",
 23 |     0x200:      "The file or directory was deleted.",
 24 |     0x400:      "The user made a change to the file's or directory's extended attributes.",
 25 |     0x800:      "A change was made in the access rights to the file or directory.",
 26 |     0x1000:     "The file or directory was renamed and the file name in this structure is the previous name.",
 27 |     0x2000:     "The file or directory was renamed and the file name in this structure is the new name.",
 28 |     0x4000:     "A user toggled the FILE_ATTRIBUTE_NOT_CONTENT_INDEXED attribute.",
 29 |     0x8000:     "A user has either changed one or more file or directory attributes or one or more time stamps.",
 30 |     0x10000:    "An NTFS hard link was added to or removed from the file or directory",
 31 |     0x20000:    "The compression state of the file or directory was changed from or to compressed.",
 32 |     0x40000:    "The file or directory was encrypted or decrypted.",
 33 |     0x80000:    "The object identifier of the file or directory was changed.",
 34 |     0x100000:   "The reparse point contained in the file or directory was changed, or a reparse point was added to or deleted from the file or directory.",
 35 |     0x200000:   "A named stream has been added to or removed from the file or a named stream has been renamed.",
 36 |     0x80000000: "The file or directory was closed."
 37 | }
 38 | 
 39 | 
 40 | attrs_def = {
 41 |     1:    'READONLY',
 42 |     2:    'HIDDEN',
 43 |     4:    'SYSTEM',
 44 |     8:    '???',
 45 |     16:   'DIRECTORY',
 46 |     32:   'ARCHIVE',
 47 |     64:   'DEVICE',
 48 |     128:  'NORMAL',
 49 |     256:  'TEMPORARY',
 50 |     512:  'SPARSE_FILE',
 51 |     1024: 'REPARSE_POINT',
 52 |     2048: 'COMPRESSED',
 53 |     4096: 'OFFLINE',
 54 |     8192: 'NOT_CONTENT_INDEXED',
 55 |     16383:'ENCRYPTED',
 56 |     65536:'VIRTUAL'
 57 | }
 58 | 
 59 | 
 60 | def parse_windows_timestamp(qword):
 61 |     # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/
 62 |     return datetime.datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
 63 | 
 64 | 
 65 | def MREF(mft_reference):
 66 |     """
 67 |     Given a MREF/mft_reference, return the record number part.
 68 |     """
 69 |     return mft_reference & 0xFFFFFFFFFFFF
 70 | 
 71 | 
 72 | def MSEQNO(mft_reference):
 73 |     """
 74 |     Given a MREF/mft_reference, return the sequence number part.
 75 |     """
 76 |     return (mft_reference >> 48) & 0xFFFF
 77 | 
 78 | 
 79 | def process_record(buf):
 80 |     offset = 0
 81 |     while True:
 82 |         record_size = struct.unpack_from("<i", buf[offset:])[0]
 83 |         if record_size < 0:
 84 |             raise BadRecordException
 85 |         if record_size < 60:
 86 |             offset += 4
 87 |         else:
 88 |             break
 89 | 
 90 |     (size, major, minor, file_ref, parent_ref,
 91 |      usn, timestamp, flags, source, sid, attrs,
 92 |      name_length, unknown) = struct.unpack_from("<IHHQQQQIIIIHH", buf[offset:offset + record_size])
 93 |     name = buf[offset + 0x3C:offset + 0x3C + name_length].decode("utf16")
 94 |     return (offset, size, major, minor, file_ref, parent_ref, usn, timestamp, flags, source, sid, attrs, name_length, unknown, name)
 95 | 
 96 | 
 97 | def main(filename, offset):
 98 |     with open(filename, "rb") as f:
 99 |         f_offset = 0
100 | 
101 |         # get file size
102 |         f.seek(0, 2)
103 |         f_length = f.tell()
104 |         f_offset = offset
105 |         f.seek(f_offset)
106 | 
107 |         # find start of data
108 |         while f.read(32) == "\x00" * 32:
109 |             f_offset += 655360 - 32
110 |             f.seek(f_offset)
111 | 
112 |         if f_offset > 655360:
113 |             f_offset -= 655360
114 | 
115 |         f.seek(f_offset)
116 | 
117 |         # data starts within the next 655360
118 |         buf = f.read(655360)
119 |         for i in range(len(buf)):
120 |             if buf[i] != "\x00":
121 |                 f_offset += i
122 |                 f.seek(f_offset)
123 |                 break
124 |             i += 1
125 | 
126 |         # we are at the main records now
127 |         print '"size", "major", "minor", "file_ref", "file_ref_seq", "file_ref_mft_record_num", "parent_ref", "parent_ref_seq", "parent_ref_mft_record_num", "usn", "timestamp", "flags", "source", "sid", "attrs", "name_length", "unknown", "name"'
128 |         while True:
129 |             buf = f.read(min((f_length - f_offset), 800))
130 |             f.seek(f_offset)
131 | 
132 |             (gap, size, major, minor, file_ref, parent_ref, usn, timestamp, flags, source, sid, attrs, name_length, unknown, name) = process_record(buf)
133 |             print '"{size:d}", "{major:d}", "{minor:d}", "{file_ref:d}", "{file_ref_seq:d}", "{file_ref_mft_record_num:d}", "{parent_ref:d}", "{parent_ref_seq:d}", "{parent_ref_mft_record_num:d}", "{usn:d}", "{timestamp:s}", "{flags:s}", "{source:d}", "{sid:d}", "{attrs:s}", "{name_length:d}", "{unknown:d}", "{name:s}"'.format(
134 |                         size=size,
135 |                         major=major,
136 |                         minor=minor,
137 |                         file_ref=file_ref,
138 |                         file_ref_seq=MSEQNO(file_ref),
139 |                         file_ref_mft_record_num=MREF(file_ref),
140 |                         parent_ref=parent_ref,
141 |                         parent_ref_seq=MSEQNO(parent_ref),
142 |                         parent_ref_mft_record_num=MREF(parent_ref),
143 |                         usn=usn,
144 |                         timestamp=parse_windows_timestamp(timestamp).isoformat("T") + "Z",
145 |                         flags=" ".join([v for (k, v) in flag_def.items() if flags & k]),
146 |                         source=source,
147 |                         sid=sid,
148 |                         attrs=" ".join([v for (k, v) in attrs_def.items() if attrs & k]),
149 |                         name_length=name_length,
150 |                         unknown=unknown,
151 |                         name=name)
152 | 
153 |             f_offset += gap + size
154 |             if f_offset == f_length:
155 |                 break
156 |             f.seek(f_offset)
157 | 
158 | if __name__ == '__main__':
159 |     offset = 0
160 | 
161 |     if len(sys.argv) > 2:
162 |         offset = int(sys.argv[2])
163 |     main(sys.argv[1], offset)
164 | 


--------------------------------------------------------------------------------
/ntfs/BinaryParser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | #    This file is part of python-evtx.
  3 | #
  4 | #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
  5 | #                    while at Mandiant <http://www.mandiant.com>
  6 | #
  7 | #   Licensed under the Apache License, Version 2.0 (the "License");
  8 | #   you may not use this file except in compliance with the License.
  9 | #   You may obtain a copy of the License at
 10 | #
 11 | #       http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | #   Unless required by applicable law or agreed to in writing, software
 14 | #   distributed under the License is distributed on an "AS IS" BASIS,
 15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | #   See the License for the specific language governing permissions and
 17 | #   limitations under the License.
 18 | #
 19 | #   Version v.0.1
 20 | import mmap
 21 | import sys
 22 | import types
 23 | import struct
 24 | import logging
 25 | import cPickle
 26 | from datetime import datetime
 27 | 
 28 | g_logger = logging.getLogger("ntfs.BinaryParser")
 29 | 
 30 | 
 31 | def unpack_from(fmt, buf, off=0):
 32 |     """
 33 |     Shim struct.unpack_from and divert unpacking of __unpackable__ things.
 34 | 
 35 |     Otherwise, you'd get an exception like:
 36 |       TypeError: unpack_from() argument 1 must be convertible to a buffer, not FileMap
 37 | 
 38 |     So, we extract a true sub-buffer from the FileMap, and feed this
 39 |       back into the old unpack function.
 40 |     Theres an extra allocation and copy, but there's no getting
 41 |       around that.
 42 |     """
 43 |     if isinstance(buf, basestring):
 44 |         return struct.unpack_from(fmt, buf, off)
 45 |     elif not hasattr(buf, "__unpackable__"):
 46 |         return struct.unpack_from(fmt, buf, off)
 47 |     else:
 48 |         size = struct.calcsize(fmt)
 49 |         buf = buf[off:off + size]
 50 |         return struct.unpack_from(fmt, buf, 0x0)
 51 | 
 52 | 
 53 | def unpack(fmt, string):
 54 |     """
 55 |     Like the shimmed unpack_from, but for struct.unpack.
 56 |     """
 57 |     if isinstance(buf, basestring):
 58 |         return struct.unpack(fmt, string)
 59 |     elif not hasattr(buf, "__unpackable__"):
 60 |         return struct.unpack(fmt, string)
 61 |     else:
 62 |         size = struct.calcsize(fmt)
 63 |         buf = string[:size]
 64 |         return struct.unpack(fmt, buf, 0x0)
 65 | 
 66 | 
 67 | class Mmap(object):
 68 |     """
 69 |     Convenience class for opening a read-only memory map for a file path.
 70 |     """
 71 |     def __init__(self, filename):
 72 |         super(Mmap, self).__init__()
 73 |         self._filename = filename
 74 |         self._f = None
 75 |         self._mmap = None
 76 | 
 77 |     def __enter__(self):
 78 |         self._f = open(self._filename, "rb")
 79 |         self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ)
 80 |         return self._mmap
 81 | 
 82 |     def __exit__(self, type, value, traceback):
 83 |         self._mmap.close()
 84 |         self._f.close()
 85 | 
 86 | 
 87 | def hex_dump(src, start_addr=0):
 88 |     """
 89 |     see:
 90 |     http://code.activestate.com/recipes/142812-hex-dumper/
 91 |     @param src A bytestring containing the data to dump.
 92 |     @param start_addr An integer representing the start
 93 |       address of the data in whatever context it comes from.
 94 |     @return A string containing a classic hex dump with 16
 95 |       bytes per line.  If start_addr is provided, then the
 96 |       data is interpreted as starting at this offset, and
 97 |       the offset column is updated accordingly.
 98 |     """
 99 |     FILTER = ''.join([(len(repr(chr(x))) == 3) and
100 |                         chr(x) or
101 |                         '.' for x in range(256)])
102 |     length = 16
103 |     result = []
104 | 
105 |     remainder_start_addr = start_addr
106 | 
107 |     if start_addr % length != 0:
108 |         base_addr = start_addr - (start_addr % length)
109 |         num_spaces = (start_addr % length)
110 |         num_chars = length - (start_addr % length)
111 | 
112 |         spaces = " ".join(["  " for i in xrange(num_spaces)])
113 |         s = src[0:num_chars]
114 |         hexa = ' '.join(["%02X" % ord(x) for x in s])
115 |         printable = s.translate(FILTER)
116 | 
117 |         result.append("%04X   %s %s   %s%s\n" %
118 |                       (base_addr, spaces, hexa,
119 |                       " " * (num_spaces + 1), printable))
120 | 
121 |         src = src[num_chars:]
122 |         remainder_start_addr = base_addr + length
123 | 
124 |     for i in xrange(0, len(src), length):
125 |         s = src[i:i + length]
126 |         hexa = ' '.join(["%02X" % ord(x) for x in s])
127 |         printable = s.translate(FILTER)
128 |         result.append("%04X   %-*s   %s\n" %
129 |                          (remainder_start_addr + i, length * 3,
130 |                           hexa, printable))
131 | 
132 |     return ''.join(result)
133 | 
134 | 
135 | class decoratorargs(object):
136 |     def __new__(typ, *attr_args, **attr_kwargs):
137 |         def decorator(orig_func):
138 |             self = object.__new__(typ)
139 |             self.__init__(orig_func, *attr_args, **attr_kwargs)
140 |             return self
141 |         return decorator
142 | 
143 | 
144 | class memoize(decoratorargs):
145 |     class Node:
146 |         __slots__ = ['key', 'value', 'older', 'newer']
147 | 
148 |         def __init__(self, key, value, older=None, newer=None):
149 |             self.key = key
150 |             self.value = value
151 |             self.older = older
152 |             self.newer = newer
153 | 
154 |     def __init__(self, func, capacity=1000,
155 |                  keyfunc=lambda *args, **kwargs: cPickle.dumps((args,
156 |                                                                 kwargs))):
157 |         if not isinstance(func, property):
158 |             self.func = func
159 |             self.name = func.__name__
160 |             self.is_property = False
161 |         else:
162 |             self.func = func.fget
163 |             self.name = func.fget.__name__
164 |             self.is_property = True
165 |         self.capacity = capacity
166 |         self.keyfunc = keyfunc
167 |         self.reset()
168 | 
169 |     def reset(self):
170 |         self.mru = self.Node(None, None)
171 |         self.mru.older = self.mru.newer = self.mru
172 |         self.nodes = {self.mru.key: self.mru}
173 |         self.count = 1
174 |         self.hits = 0
175 |         self.misses = 0
176 | 
177 |     def __get__(self, inst, clas):
178 |         self.obj = inst
179 |         if self.is_property:
180 |             return self.__call__()
181 |         else:
182 |             return self
183 | 
184 |     def __call__(self, *args, **kwargs):
185 |         key = self.keyfunc(*args, **kwargs)
186 |         try:
187 |             node = self.nodes[key]
188 |         except KeyError:
189 |             # We have an entry not in the cache
190 |             self.misses += 1
191 |             func = types.MethodType(self.func, self.obj, self.name)
192 |             value = func(*args, **kwargs)
193 |             lru = self.mru.newer  # Always true
194 |             # If we haven't reached capacity
195 |             if self.count < self.capacity:
196 |                 # Put it between the MRU and LRU - it'll be the new MRU
197 |                 node = self.Node(key, value, self.mru, lru)
198 |                 self.mru.newer = node
199 | 
200 |                 lru.older = node
201 |                 self.mru = node
202 |                 self.count += 1
203 |             else:
204 |                 # It's FULL! We'll make the LRU be the new MRU, but replace its
205 |                 # value first
206 |                 try:
207 |                     del self.nodes[lru.key]  # This mapping is now invalid
208 |                 except KeyError:  # HACK TODO: this may not work/leak
209 |                     pass
210 |                 lru.key = key
211 |                 lru.value = value
212 |                 self.mru = lru
213 | 
214 |             # Add the new mapping
215 |             self.nodes[key] = self.mru
216 |             return value
217 | 
218 |         # We have an entry in the cache
219 |         self.hits += 1
220 | 
221 |         # If it's already the MRU, do nothing
222 |         if node is self.mru:
223 |             return node.value
224 | 
225 |         lru = self.mru.newer  # Always true
226 | 
227 |         # If it's the LRU, update the MRU to be it
228 |         if node is lru:
229 |             self.mru = lru
230 |             return node.value
231 | 
232 |         # Remove the node from the list
233 |         node.older.newer = node.newer
234 |         node.newer.older = node.older
235 | 
236 |         # Put it between MRU and LRU
237 |         node.older = self.mru
238 |         self.mru.newer = node
239 | 
240 |         node.newer = lru
241 |         lru.older = node
242 | 
243 |         self.mru = node
244 |         return node.value
245 | 
246 | 
247 | def align(offset, alignment):
248 |     """
249 |     Return the offset aligned to the nearest greater given alignment
250 |     Arguments:
251 |     - `offset`: An integer
252 |     - `alignment`: An integer
253 |     """
254 |     if offset % alignment == 0:
255 |         return offset
256 |     return offset + (alignment - (offset % alignment))
257 | 
258 | 
259 | def dosdate(dosdate, dostime):
260 |     """
261 |     `dosdate`: 2 bytes, little endian.
262 |     `dostime`: 2 bytes, little endian.
263 |     returns: datetime.datetime or datetime.datetime.min on error
264 |     """
265 |     try:
266 |         t  = ord(dosdate[1]) << 8
267 |         t |= ord(dosdate[0])
268 |         day   = t & 0b0000000000011111
269 |         month = (t & 0b0000000111100000) >> 5
270 |         year  = (t & 0b1111111000000000) >> 9
271 |         year += 1980
272 | 
273 |         t  = ord(dostime[1]) << 8
274 |         t |= ord(dostime[0])
275 |         sec     = t & 0b0000000000011111
276 |         sec    *= 2
277 |         minute  = (t & 0b0000011111100000) >> 5
278 |         hour    = (t & 0b1111100000000000) >> 11
279 | 
280 |         return datetime.datetime(year, month, day, hour, minute, sec)
281 |     except:
282 |         return datetime.datetime.min
283 | 
284 | 
285 | def parse_filetime(qword):
286 |     # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/
287 |     return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
288 | 
289 | 
290 | class BinaryParserException(Exception):
291 |     """
292 |     Base Exception class for binary parsing.
293 |     """
294 |     def __init__(self, value):
295 |         """
296 |         Constructor.
297 |         Arguments:
298 |         - `value`: A string description.
299 |         """
300 |         super(BinaryParserException, self).__init__()
301 |         self._value = value
302 | 
303 |     def __repr__(self):
304 |         return "BinaryParserException(%r)" % (self._value)
305 | 
306 |     def __str__(self):
307 |         return "Binary Parser Exception: %s" % (self._value)
308 | 
309 | 
310 | class ParseException(BinaryParserException):
311 |     """
312 |     An exception to be thrown during binary parsing, such as
313 |     when an invalid header is encountered.
314 |     """
315 |     def __init__(self, value):
316 |         """
317 |         Constructor.
318 |         Arguments:
319 |         - `value`: A string description.
320 |         """
321 |         super(ParseException, self).__init__(value)
322 | 
323 |     def __repr__(self):
324 |         return "ParseException(%r)" % (self._value)
325 | 
326 |     def __str__(self):
327 |         return "Parse Exception(%s)" % (self._value)
328 | 
329 | 
330 | class OverrunBufferException(ParseException):
331 |     def __init__(self, readOffs, bufLen):
332 |         tvalue = "read: %s, buffer length: %s" % (hex(readOffs), hex(bufLen))
333 |         super(ParseException, self).__init__(tvalue)
334 | 
335 |     def __repr__(self):
336 |         return "OverrunBufferException(%r)" % (self._value)
337 | 
338 |     def __str__(self):
339 |         return "Tried to parse beyond the end of the file (%s)" % \
340 |             (self._value)
341 | 
342 | 
343 | def read_byte(buf, offset):
344 |     """
345 |     Returns a little-endian unsigned byte from the relative offset of the given buffer.
346 |     Arguments:
347 |     - `buf`: The buffer from which to read the value.
348 |     - `offset`: The relative offset from the start of the block.
349 |     Throws:
350 |     - `OverrunBufferException`
351 |     """
352 |     try:
353 |         return unpack_from("<B", buf, offset)[0]
354 |     except struct.error:
355 |         raise OverrunBufferException(offset, len(buf))
356 | 
357 | 
358 | def read_word(buf, offset):
359 |     """
360 |     Returns a little-endian unsigned word from the relative offset of the given buffer.
361 |     Arguments:
362 |     - `buf`: The buffer from which to read the value.
363 |     - `offset`: The relative offset from the start of the block.
364 |     Throws:
365 |     - `OverrunBufferException`
366 |     """
367 |     try:
368 |         return unpack_from("<H", buf, offset)[0]
369 |     except struct.error:
370 |         raise OverrunBufferException(offset, len(buf))
371 | 
372 | 
373 | def read_dword(buf, offset):
374 |     """
375 |     Returns a little-endian unsigned dword from the relative offset of the given buffer.
376 |     Arguments:
377 |     - `buf`: The buffer from which to read the value.
378 |     - `offset`: The relative offset from the start of the block.
379 |     Throws:
380 |     - `OverrunBufferException`
381 |     """
382 |     try:
383 |         return unpack_from("<I", buf, offset)[0]
384 |     except struct.error:
385 |         raise OverrunBufferException(offset, len(buf))
386 | 
387 | 
388 | class Block(object):
389 |     """
390 |     Base class for structure blocks in binary parsing.
391 |     A block is associated with a offset into a byte-string.
392 |     """
393 |     def __init__(self, buf, offset):
394 |         """
395 |         Constructor.
396 |         Arguments:
397 |         - `buf`: Byte string containing stuff to parse.
398 |         - `offset`: The offset into the buffer at which the block starts.
399 |         """
400 |         self._buf = buf
401 |         self._offset = offset
402 |         self._implicit_offset = 0
403 |         # list of dict(offset:number, type:string, name:string,
404 |         #              length:number, count:number)
405 |         self._declared_fields = []
406 | 
407 |     def __repr__(self):
408 |         return "Block(buf=%r, offset=%r)" % (self._buf, self._offset)
409 | 
410 |     def declare_field(self, type_, name, offset=None, length=None, count=None):
411 |         """
412 |         Declaratively add fields to this block.
413 |         This method will dynamically add corresponding offset and
414 |         unpacker methods to this block.
415 | 
416 |         Arguments:
417 |         - `type_`: A string or a Nestable type.
418 |             If a string, should be one of the unpack_* types.
419 |             If a type, then it must be a subclass of Nestable.
420 |         - `name`: A string.
421 |         - `offset`: A number.
422 |         - `length`: (Optional) A number. For (w)strings, length in chars.
423 |         - `count`: (Optional) A number that specifies the number of
424 |             instances of this type.
425 |             If the count is greater than 1, then the handler will return
426 |             a generator of the items. This parameter is not valid if
427 |             the `length` parameter is provided.
428 |         """
429 |         is_generator = True
430 |         if count is None:
431 |             count = 1
432 |             is_generator = False
433 | 
434 |         if count < 0:
435 |             raise "Count must be greater than 0."
436 | 
437 |         if length is not None and count > 1:
438 |             raise "Cannot specify both `length` and `count`."
439 | 
440 |         if offset is None:
441 |             offset = self._implicit_offset
442 | 
443 |         basic_sizes = {
444 |             "byte": 1,
445 |             "int8": 1,
446 |             "word": 2,
447 |             "word_be": 2,
448 |             "int16": 2,
449 |             "dword": 4,
450 |             "dword_be": 4,
451 |             "int32": 4,
452 |             "qword": 8,
453 |             "int64": 8,
454 |             "float": 4,
455 |             "double": 8,
456 |             "dosdate": 4,
457 |             "filetime": 8,
458 |             "systemtime": 8,
459 |             "guid": 16,
460 |         }
461 | 
462 |         handler = None
463 | 
464 |         if isinstance(type_, type):
465 |             if not issubclass(type_, Nestable):
466 |                 raise TypeError("Invalid nested structure")
467 | 
468 |             typename = type_.__name__
469 | 
470 |             if count == 0:
471 |                 def no_class_handler():
472 |                     return
473 |                 handler = no_class_handler
474 |             elif is_generator:
475 |                 def many_class_handler():
476 |                     ofs = offset
477 |                     for _ in range(count):
478 |                         r = type_(self._buf, self.absolute_offset(ofs), self)
479 |                         ofs += len(r)
480 |                         yield r
481 |                 handler = many_class_handler
482 | 
483 |                 if hasattr(type_, "structure_size"):
484 |                     ofs = offset
485 |                     for _ in range(count):
486 |                         ofs += type_.structure_size(self._buf, self.absolute_offset(ofs), self)
487 |                     self._implicit_offset = ofs
488 |                 else:
489 |                     ofs = offset
490 |                     for _ in range(count):
491 |                         r = type_(self._buf, self.absolute_offset(ofs), self)
492 |                         ofs += len(r)
493 |                     self._implicit_offset = ofs
494 |             else:
495 |                 # TODO(wb): this needs to cache/memoize
496 |                 def class_handler():
497 |                     return type_(self._buf, self.absolute_offset(offset), self)
498 |                 handler = class_handler
499 | 
500 |                 if hasattr(type_, "structure_size"):
501 |                     size = type_.structure_size(self._buf, self.absolute_offset(offset), self)
502 |                     self._implicit_offset = offset + size
503 |                 else:
504 |                     temp = type_(self._buf, self.absolute_offset(offset), self)
505 | 
506 |                     self._implicit_offset = offset + len(temp)
507 |         elif isinstance(type_, basestring):
508 |             typename = type_
509 | 
510 |             if count == 0:
511 |                 def no_basic_handler():
512 |                     return
513 |                 handler = no_basic_handler
514 |             elif is_generator:
515 |                 # length must be in basic_sizes
516 |                 def many_basic_handler():
517 |                     ofs = offset
518 |                     f = getattr(self, "unpack_" + type_)
519 |                     for _ in range(count):
520 |                         yield f(ofs)
521 |                         ofs += basic_sizes[type_]
522 |                 handler = many_basic_handler
523 | 
524 |                 self._implicit_offset = offset + count * basic_sizes[type_]
525 |             else:
526 |                 if length is None:
527 |                     def basic_no_length_handler():
528 |                         f = getattr(self, "unpack_" + type_)
529 |                         return f(offset)
530 |                     handler = basic_no_length_handler
531 | 
532 |                     if type_ in basic_sizes:
533 |                         self._implicit_offset = offset + basic_sizes[type_]
534 |                     elif type_ == "binary":
535 |                         self._implicit_offset = offset + length
536 |                     elif type_ == "string" and length is not None:
537 |                         self._implicit_offset = offset + length
538 |                     elif type_ == "wstring" and length is not None:
539 |                         self._implicit_offset = offset + (2 * length)
540 |                     elif "string" in type_ and length is None:
541 |                         raise ParseException("Implicit offset not supported for dynamic length strings")
542 |                     else:
543 |                         raise ParseException("Implicit offset not supported for type: " + type_)
544 |                 else:
545 |                     def basic_length_handler():
546 |                         f = getattr(self, "unpack_" + type_)
547 |                         return f(offset, length)
548 |                     handler = basic_length_handler
549 | 
550 |                     if type_ == "wstring":
551 |                         self._implicit_offset = offset + (2 * length)
552 |                     else:
553 |                         self._implicit_offset = offset + length
554 | 
555 |         setattr(self, name, handler)
556 |         setattr(self, "_off_" + name, offset)
557 |         self.add_explicit_field(offset, typename, name, length, count)
558 | 
559 |     def add_explicit_field(self, offset, typename, name, length=None, count=1):
560 |         """
561 |         The `Block` class tracks the fields that have been added so that you can
562 |           pretty print the structure.  If there are other fields a subclass
563 |           parses, use `add_explicit_field` to include them in the pretty printing.
564 |         @type offset:  int
565 |         @param offset: The offset at which the field begins.
566 |         @type typename:  str or Block subclass
567 |         @param typename: The type of the value of the field.
568 |         @type name:  str
569 |         @param name: The name of the field.
570 |         @type length:  int
571 |         @param length: An explicit length for the field.
572 |         @type count:  int
573 |         @param count: The number of repetitions for the field.
574 |         @rtype: None
575 |         @return: None
576 |         """
577 |         if type(typename) == type:
578 |             typename = typename.__name__
579 |         self._declared_fields.append({
580 |                 "offset": offset,
581 |                 "type": typename,
582 |                 "name": name,
583 |                 "length": length,
584 |                 "count": count,
585 |                 })
586 | 
587 |     def get_all_string(self, indent=0):
588 |         """
589 |         Get a nicely formatted, nested string of the contents of this structure
590 |           and any sub-structures.  If a sub-structure has a method `.string()`, then
591 |           this method will use it to represent its value.
592 |           Implementation note, can't look for `__str__`, because everything has this.
593 |         @type indent:  int
594 |         @param indent: The level of nesting this objects has.
595 |         @rtype: str
596 |         @return A nicely formatted string that describes this structure.
597 |         """
598 |         ret = ""
599 |         for field in self._declared_fields:
600 |             v = getattr(self, field["name"])()
601 |             if isinstance(v, Block):
602 |                 if hasattr(v, "string"):
603 |                     ret += "%s%s (%s)%s\t%s\n" % \
604 |                         ("  " * indent, hex(field["offset"]), field["type"], 
605 |                          field["name"], v.string())
606 |                 else:
607 |                     ret += "%s%s (%s)%s\n" % \
608 |                         ("  " * indent, hex(field["offset"]), field["type"], 
609 |                          field["name"])
610 |                     ret += v.get_all_string(indent + 1)
611 |             elif isinstance(v, types.GeneratorType):
612 |                 ret += "%s%s (%s[])%s\n" % ("  " * indent, hex(field["offset"]), field["type"], field["name"],)
613 |                 for i, j in enumerate(v):
614 |                     ret += "%s[%d] (%s) " % ("  " * (indent + 1), i, field["type"])
615 |                     if hasattr(j, "get_all_string"):
616 |                         ret += "\n" + j.get_all_string(indent + 2)
617 |                     else:
618 |                         ret += str(j) + "\n"
619 |             else:
620 |                 if isinstance(v, int):
621 |                     v = hex(v)
622 |                 ret += "%s%s (%s)%s\t%s\n" % \
623 |                     ("  " * indent, hex(field["offset"]), field["type"], 
624 |                      field["name"],  str(v))
625 |         return ret
626 | 
627 |     def current_field_offset(self):
628 |         return self._implicit_offset
629 | 
630 |     def unpack_byte(self, offset):
631 |         """
632 |         Returns a little-endian unsigned byte from the relative offset.
633 |         Arguments:
634 |         - `offset`: The relative offset from the start of the block.
635 |         Throws:
636 |         - `OverrunBufferException`
637 |         """
638 |         return read_byte(self._buf, self._offset + offset)
639 | 
640 |     def unpack_int8(self, offset):
641 |         """
642 |         Returns a little-endian signed byte from the relative offset.
643 |         Arguments:
644 |         - `offset`: The relative offset from the start of the block.
645 |         Throws:
646 |         - `OverrunBufferException`
647 |         """
648 |         o = self._offset + offset
649 |         try:
650 |             return unpack_from("<b", self._buf, o)[0]
651 |         except struct.error:
652 |             raise OverrunBufferException(o, len(self._buf))
653 | 
654 |     def unpack_word(self, offset):
655 |         """
656 |         Returns a little-endian unsigned WORD (2 bytes) from the
657 |           relative offset.
658 |         Arguments:
659 |         - `offset`: The relative offset from the start of the block.
660 |         Throws:
661 |         - `OverrunBufferException`
662 |         """
663 |         return read_word(self._buf, self._offset + offset)
664 | 
665 |     def unpack_word_be(self, offset):
666 |         """
667 |         Returns a big-endian unsigned WORD (2 bytes) from the
668 |           relative offset.
669 |         Arguments:
670 |         - `offset`: The relative offset from the start of the block.
671 |         Throws:
672 |         - `OverrunBufferException`
673 |         """
674 |         o = self._offset + offset
675 |         try:
676 |             return unpack_from(">H", self._buf, o)[0]
677 |         except struct.error:
678 |             raise OverrunBufferException(o, len(self._buf))
679 | 
680 |     def unpack_int16(self, offset):
681 |         """
682 |         Returns a little-endian signed WORD (2 bytes) from the
683 |           relative offset.
684 |         Arguments:
685 |         - `offset`: The relative offset from the start of the block.
686 |         Throws:
687 |         - `OverrunBufferException`
688 |         """
689 |         o = self._offset + offset
690 |         try:
691 |             return unpack_from("<h", self._buf, o)[0]
692 |         except struct.error:
693 |             raise OverrunBufferException(o, len(self._buf))
694 | 
695 |     def pack_word(self, offset, word):
696 |         """
697 |         Applies the little-endian WORD (2 bytes) to the relative offset.
698 |         Arguments:
699 |         - `offset`: The relative offset from the start of the block.
700 |         - `word`: The data to apply.
701 |         """
702 |         o = self._offset + offset
703 |         # TODO
704 |         return struct.pack_into("<H", self._buf, o, word)
705 | 
706 |     def unpack_dword(self, offset):
707 |         """
708 |         Returns a little-endian DWORD (4 bytes) from the relative offset.
709 |         Arguments:
710 |         - `offset`: The relative offset from the start of the block.
711 |         Throws:
712 |         - `OverrunBufferException`
713 |         """
714 |         return read_dword(self._buf, self._offset + offset)
715 | 
716 |     def unpack_dword_be(self, offset):
717 |         """
718 |         Returns a big-endian DWORD (4 bytes) from the relative offset.
719 |         Arguments:
720 |         - `offset`: The relative offset from the start of the block.
721 |         Throws:
722 |         - `OverrunBufferException`
723 |         """
724 |         o = self._offset + offset
725 |         try:
726 |             return unpack_from(">I", self._buf, o)[0]
727 |         except struct.error:
728 |             raise OverrunBufferException(o, len(self._buf))
729 | 
730 |     def unpack_int32(self, offset):
731 |         """
732 |         Returns a little-endian signed integer (4 bytes) from the
733 |           relative offset.
734 |         Arguments:
735 |         - `offset`: The relative offset from the start of the block.
736 |         Throws:
737 |         - `OverrunBufferException`
738 |         """
739 |         o = self._offset + offset
740 |         try:
741 |             return unpack_from("<i", self._buf, o)[0]
742 |         except struct.error:
743 |             raise OverrunBufferException(o, len(self._buf))
744 | 
745 |     def unpack_qword(self, offset):
746 |         """
747 |         Returns a little-endian QWORD (8 bytes) from the relative offset.
748 |         Arguments:
749 |         - `offset`: The relative offset from the start of the block.
750 |         Throws:
751 |         - `OverrunBufferException`
752 |         """
753 |         o = self._offset + offset
754 |         try:
755 |             return unpack_from("<Q", self._buf, o)[0]
756 |         except struct.error:
757 |             raise OverrunBufferException(o, len(self._buf))
758 | 
759 |     def unpack_int64(self, offset):
760 |         """
761 |         Returns a little-endian signed 64-bit integer (8 bytes) from
762 |           the relative offset.
763 |         Arguments:
764 |         - `offset`: The relative offset from the start of the block.
765 |         Throws:
766 |         - `OverrunBufferException`
767 |         """
768 |         o = self._offset + offset
769 |         try:
770 |             return unpack_from("<q", self._buf, o)[0]
771 |         except struct.error:
772 |             raise OverrunBufferException(o, len(self._buf))
773 | 
774 |     def unpack_float(self, offset):
775 |         """
776 |         Returns a single-precision float (4 bytes) from
777 |           the relative offset.  IEEE 754 format.
778 |         Arguments:
779 |         - `offset`: The relative offset from the start of the block.
780 |         Throws:
781 |         - `OverrunBufferException`
782 |         """
783 |         o = self._offset + offset
784 |         try:
785 |             return unpack_from("<f", self._buf, o)[0]
786 |         except struct.error:
787 |             raise OverrunBufferException(o, len(self._buf))
788 | 
789 |     def unpack_double(self, offset):
790 |         """
791 |         Returns a double-precision float (8 bytes) from
792 |           the relative offset.  IEEE 754 format.
793 |         Arguments:
794 |         - `offset`: The relative offset from the start of the block.
795 |         Throws:
796 |         - `OverrunBufferException`
797 |         """
798 |         o = self._offset + offset
799 |         try:
800 |             return unpack_from("<d", self._buf, o)[0]
801 |         except struct.error:
802 |             raise OverrunBufferException(o, len(self._buf))
803 | 
804 |     def unpack_binary(self, offset, length=False):
805 |         """
806 |         Returns raw binary data from the relative offset with the given length.
807 |         Arguments:
808 |         - `offset`: The relative offset from the start of the block.
809 |         - `length`: The length of the binary blob. If zero, the empty string
810 |             zero length is returned.
811 |         Throws:
812 |         - `OverrunBufferException`
813 |         """
814 |         if not length:
815 |             return ""
816 |         o = self._offset + offset
817 |         try:
818 |             return unpack_from("<%ds" % (length), self._buf, o)[0]
819 |         except struct.error:
820 |             raise OverrunBufferException(o, len(self._buf))
821 | 
822 |     def unpack_string(self, offset, length):
823 |         """
824 |         Returns a string from the relative offset with the given length.
825 |         Arguments:
826 |         - `offset`: The relative offset from the start of the block.
827 |         - `length`: The length of the string.
828 |         Throws:
829 |         - `OverrunBufferException`
830 |         """
831 |         return self.unpack_binary(offset, length)
832 | 
833 |     def unpack_wstring(self, offset, length):
834 |         """
835 |         Returns a string from the relative offset with the given length,
836 |         where each character is a wchar (2 bytes)
837 |         Arguments:
838 |         - `offset`: The relative offset from the start of the block.
839 |         - `length`: The length of the string.
840 |         Throws:
841 |         - `UnicodeDecodeError`
842 |         """
843 |         try:
844 |             return self._buf[self._offset + offset:self._offset + offset + \
845 |                              2 * length].tostring().decode("utf16")
846 |         except AttributeError: # already a 'str' ?
847 |             return self._buf[self._offset + offset:self._offset + offset + \
848 |                              2 * length].decode("utf16")
849 | 
850 |     def unpack_dosdate(self, offset):
851 |         """
852 |         Returns a datetime from the DOSDATE and DOSTIME starting at
853 |         the relative offset.
854 |         Arguments:
855 |         - `offset`: The relative offset from the start of the block.
856 |         Throws:
857 |         - `OverrunBufferException`
858 |         """
859 |         try:
860 |             o = self._offset + offset
861 |             return dosdate(self._buf[o:o + 2], self._buf[o + 2:o + 4])
862 |         except struct.error:
863 |             raise OverrunBufferException(o, len(self._buf))
864 | 
865 |     def unpack_filetime(self, offset):
866 |         """
867 |         Returns a datetime from the QWORD Windows timestamp starting at
868 |         the relative offset.
869 |         Arguments:
870 |         - `offset`: The relative offset from the start of the block.
871 |         Throws:
872 |         - `OverrunBufferException`
873 |         """
874 |         return parse_filetime(self.unpack_qword(offset))
875 | 
876 |     def unpack_systemtime(self, offset):
877 |         """
878 |         Returns a datetime from the QWORD Windows SYSTEMTIME timestamp
879 |           starting at the relative offset.
880 |           See http://msdn.microsoft.com/en-us/library/ms724950%28VS.85%29.aspx
881 |         Arguments:
882 |         - `offset`: The relative offset from the start of the block.
883 |         Throws:
884 |         - `OverrunBufferException`
885 |         """
886 |         o = self._offset + offset
887 |         try:
888 |             parts = unpack_from("<WWWWWWWW", self._buf, o)
889 |         except struct.error:
890 |             raise OverrunBufferException(o, len(self._buf))
891 |         return datetime.datetime(parts[0], parts[1],
892 |                                  parts[3],  # skip part 2 (day of week)
893 |                                  parts[4], parts[5],
894 |                                  parts[6], parts[7])
895 | 
896 |     def unpack_guid(self, offset):
897 |         """
898 |         Returns a string containing a GUID starting at the relative offset.
899 |         Arguments:
900 |         - `offset`: The relative offset from the start of the block.
901 |         Throws:
902 |         - `OverrunBufferException`
903 |         """
904 |         o = self._offset + offset
905 | 
906 |         try:
907 |             _bin = self._buf[o:o + 16]
908 |         except IndexError:
909 |             raise OverrunBufferException(o, len(self._buf))
910 | 
911 |         # Yeah, this is ugly
912 |         h = map(ord, _bin)
913 |         return "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x" % \
914 |             (h[3], h[2], h[1], h[0],
915 |              h[5], h[4],
916 |              h[7], h[6],
917 |              h[8], h[9],
918 |              h[10], h[11], h[12], h[13], h[14], h[15])
919 | 
920 |     def absolute_offset(self, offset):
921 |         """
922 |         Get the absolute offset from an offset relative to this block
923 |         Arguments:
924 |         - `offset`: The relative offset into this block.
925 |         """
926 |         return self._offset + offset
927 | 
928 |     def offset(self):
929 |         """
930 |         Equivalent to self.absolute_offset(0x0), which is the starting
931 |           offset of this block.
932 |         """
933 |         return self._offset
934 | 
935 | 
936 | class Nestable(object):
937 |     """
938 |     A Nestable is a mixin type that can be provided with a Block type.
939 |     The only requirement is that it implement a `len` method, or a
940 |     `structure_size` staticmethod.  This enables the parent Block to
941 |     seek among its children.
942 |     """
943 |     def __init__(self, buf, offset):
944 |         super(Nestable, self).__init__()
945 | 
946 |     @staticmethod
947 |     def structure_size(buf, offset, parent):
948 |         """
949 |         This staticmethod should return the size of a block located at the
950 |           specified location in the given buffer.  This method should do the
951 |           minimal amount of processing involved to compute the size.  It should
952 |           not perform any worse than simply instantiating the this type and
953 |           using its `__len__` method.
954 | 
955 |         @type  buf: bytestring
956 |         @param buf: The buffer in which this Block is found.
957 |         @type  offset: int
958 |         @param offset: The offset at which this Block begins.
959 |         @type  parent: object
960 |         @param parent: The logical parent of this Block.
961 |         @rtype: int
962 |         @return The length of the Block starting at the given location.
963 |         """
964 |         raise NotImplemented
965 | 
966 |     def __len__(self):
967 |         """
968 |         This method should return the size of this structure in bytes.
969 |         It should prefer to use size fields or logic that
970 |           is already parsed out.
971 | 
972 |         @rtype: int
973 |         @return The length of this Block in bytes.
974 |         """
975 |         raise NotImplemented
976 | 


--------------------------------------------------------------------------------
/ntfs/FileMap.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | import sys
  4 | from struct import unpack_from as old_unpack_from
  5 | from struct import unpack_from as old_unpack
  6 | from struct import calcsize
  7 | 
  8 | from collections import OrderedDict
  9 | # From: http://code.activestate.com/recipes/577197-sortedcollection/
 10 | from SortedCollection import SortedCollection
 11 | 
 12 | 
 13 | MEGABYTE = 1024 * 1024
 14 | 
 15 | 
 16 | class LRUQueue(object):
 17 |     """
 18 |     LRUQueue is a data structure that orders objects by
 19 |       their insertion time, and supports an update/touch operation
 20 |       that resets an item to the newest slot.
 21 | 
 22 |     This is an example of a priority queue, ordered by
 23 |       insertion time, with explicit support for "touch".
 24 |     """
 25 |     def __init__(self, key=lambda n: n):
 26 |         """
 27 |         The `key` parameter may be provided if the
 28 |           items in the queue are complex.
 29 |         The `key` parameter should select a unique "id" field from
 30 |           each item.
 31 |         """
 32 |         super(LRUQueue, self).__init__()
 33 |         self._q = OrderedDict()
 34 |         self._key = key
 35 | 
 36 |     def push(self, v):
 37 |         k = self._key(v)
 38 |         self._q[k] = v
 39 | 
 40 |     def pop(self):
 41 |         return self._q.popitem(last=False)[1]
 42 | 
 43 |     def touch(self, v):
 44 |         """
 45 |         Reset the given value back to the newest slot.
 46 |         """
 47 |         k = self._key(v)
 48 |         del self._q[k]
 49 |         self._q[k] = v
 50 | 
 51 |     def size(self):
 52 |         return len(self._q)
 53 | 
 54 |     def __len__(self):
 55 |         return self.size()
 56 | 
 57 |     @staticmethod
 58 |     def test():
 59 |         q = LRUQueue()
 60 |         assert q.size() == 0
 61 |         assert len(q) == 0
 62 | 
 63 |         q.push(0)
 64 |         assert q.size() == 1
 65 |         assert len(q) == 1
 66 | 
 67 |         assert q.pop() == 0
 68 |         assert q.size() == 0
 69 |         assert len(q) == 0
 70 | 
 71 |         q.push(0)
 72 |         q.push(1)
 73 |         assert q.pop() == 0
 74 |         assert q.pop() == 1
 75 | 
 76 |         q.push(0)
 77 |         q.push(1)
 78 |         q.touch(0)
 79 |         assert q.pop() == 1
 80 |         assert q.pop() == 0
 81 | 
 82 |         q = LRUQueue(key=lambda n: n[0])
 83 |         q.push([0])
 84 |         assert q.pop() == [0]
 85 | 
 86 |         q.push([0])
 87 |         q.push([1])
 88 |         assert q.pop() == [0]
 89 |         assert q.pop() == [1]
 90 |         return True
 91 | 
 92 | 
 93 | class BoundedLRUQueue(object):
 94 |     """
 95 |     BoundedLRUQueue is a LRUQueue with a finite capacity.
 96 |       When an item is pushed that causes the capacity to be exceeded,
 97 |       the LRU item is automatically popped.
 98 | 
 99 |     Otherwise, this class behaves just like the LRUQueue.
100 |     """
101 |     def __init__(self, capacity, key=lambda n: n):
102 |         """
103 |         The `key` parameter may be provided if the
104 |           items in the queue are complex.
105 |         The `key` parameter should select a unique "id" field from
106 |           each item.
107 |         """
108 |         super(BoundedLRUQueue, self).__init__()
109 |         self._q = LRUQueue(key)
110 |         self._capacity = capacity
111 | 
112 |     def push(self, v):
113 |         self._q.push(v)
114 |         if len(self._q) > self._capacity:
115 |             return self._q.pop()
116 | 
117 |     def pop(self):
118 |         return self._q.pop()
119 | 
120 |     def touch(self, v):
121 |         self._q.touch(v)
122 | 
123 |     def size(self):
124 |         return len(self._q)
125 | 
126 |     def __len__(self):
127 |         return self.size()
128 | 
129 |     @staticmethod
130 |     def test():
131 |         q = BoundedLRUQueue(5)
132 |         assert q.size() == 0
133 |         assert len(q) == 0
134 | 
135 |         q.push(0)
136 |         assert q.size() == 1
137 |         assert len(q) == 1
138 | 
139 |         assert q.pop() == 0
140 |         assert q.size() == 0
141 |         assert len(q) == 0
142 | 
143 |         q.push(0)
144 |         q.push(1)
145 |         assert q.pop() == 0
146 |         assert q.pop() == 1
147 | 
148 |         q.push(0)
149 |         q.push(1)
150 |         q.touch(0)
151 |         assert q.pop() == 1
152 |         assert q.pop() == 0
153 | 
154 |         q = BoundedLRUQueue(5, key=lambda n: n[0])
155 |         q.push([0])
156 |         assert q.pop() == [0]
157 | 
158 |         q.push([0])
159 |         q.push([1])
160 |         assert q.pop() == [0]
161 |         assert q.pop() == [1]
162 | 
163 |         q = BoundedLRUQueue(2)
164 |         assert q.push(0) is None
165 |         assert q.push(1) is None
166 |         assert q.push(2) == 0
167 |         assert q.pop() == 1
168 |         assert q.pop() == 2
169 |         return True
170 | 
171 | 
172 | class RangeCache(object):
173 |     """
174 |     RangeCache is a data structure that tracks a finite set of
175 |       ranges (a range is a 2-tuple consisting of a numeric start
176 |       and numeric length). New ranges can be added via the `push`
177 |       method, and if such a call causes the capacity to be exceeded,
178 |       then the "oldest" range is removed. The `get` method implements
179 |       an efficient lookup for a single value that may be found within
180 |       one of the ranges.
181 |     """
182 |     def __init__(self, capacity,
183 |                  start_key=lambda o: o[0],
184 |                  length_key=lambda o: o[1]):
185 |         """
186 |         @param key: A function that fetches the range start from an item.
187 |         """
188 |         super(RangeCache, self).__init__()
189 |         self._ranges = SortedCollection(key=start_key)
190 |         self._lru = BoundedLRUQueue(capacity, key=start_key)
191 |         self._start_key = start_key
192 |         self._length_key = length_key
193 | 
194 |     def push(self, o):
195 |         """
196 |         Add a range to the cache.
197 | 
198 |         If `key` is not provided to the constructor, then
199 |           `o` should be a 3-tuple:
200 |             - range start (numeric)
201 |             - range length (numeric)
202 |             - range item (object)
203 |         """
204 |         self._ranges.insert(o)
205 |         popped = self._lru.push(o)
206 |         if popped is not None:
207 |             self._ranges.remove(popped)
208 | 
209 |     def touch(self, o):
210 |         self._lru.touch(o)
211 | 
212 |     def get(self, value):
213 |         """
214 |         Search for the numeric `value` within the ranges
215 |           tracked by this cache.
216 |         @raise ValueError: if the value is not found in the range cache.
217 |         """
218 |         hit = self._ranges.find_le(value)
219 |         if value < self._start_key(hit) + self._length_key(hit):
220 |             return hit
221 |         raise ValueError("%s not found in range cache" % value)
222 | 
223 |     @staticmethod
224 |     def test():
225 |         q = RangeCache(2)
226 | 
227 |         x = None
228 |         try: x = q.get(0)
229 |         except ValueError: pass
230 |         assert x is None
231 | 
232 |         x = None
233 |         try: x = q.get(1)
234 |         except ValueError: pass
235 |         assert x is None
236 | 
237 |         q.push((1, 1, [0]))
238 | 
239 |         x = None
240 |         try: x = q.get(0)
241 |         except ValueError: pass
242 |         assert x is None
243 | 
244 |         assert q.get(1) == (1, 1, [0])
245 |         assert q.get(1.99) == (1, 1, [0])
246 |         x = None
247 |         try: x = q.get(2.01)
248 |         except ValueError: pass
249 |         assert x is None
250 | 
251 |         q.push((3, 1, [1]))
252 |         assert q.get(1) == (1, 1, [0])
253 |         assert q.get(3) == (3, 1, [1])
254 | 
255 |         q.push((5, 1, [2]))
256 |         x = None
257 |         try: x = q.get(1)
258 |         except ValueError: pass
259 |         assert x is None
260 | 
261 |         assert q.get(3) == (3, 1, [1])
262 |         assert q.get(5) == (5, 1, [2])
263 | 
264 |         q.touch((3, 1, [1]))
265 |         q.push((7, 1, [3]))
266 | 
267 |         assert q.get(3) == (3, 1, [1])
268 |         assert q.get(7) == (7, 1, [3])
269 |         x = None
270 |         try: x = q.get(5)
271 |         except ValueError: pass
272 |         assert x is None
273 | 
274 |         return True
275 | 
276 | 
277 | class FileMap(object):
278 |     """
279 |     FileMap is a wrapper for a file-like object that satisfies the
280 |       buffer interface. This is essentially the inverse of StringIO.
281 |       It implements a caching layer over the calls to the OS seek/read
282 |       functions for improved performance.
283 | 
284 |     Q: Why might you want this over mmap?
285 |     A: 1) Its pure Python
286 |        2) You can stack this over any Python file-like objects.
287 |             eg. FileMap over ZipFile gives you a random access buffer
288 |                   thats backed by a compressed image on the file system.
289 |     """
290 |     __unpackable__ = True
291 |     def __init__(self, filelike, block_size=MEGABYTE,
292 |                  cache_size=10, size=None):
293 |         """
294 |         If `size` is not provided, then `filelike` must have the
295 |           `seek` and `tell` methods implemented.
296 |         """
297 |         super(FileMap, self).__init__()
298 |         if size is None:
299 |             import os
300 |             filelike.seek(0, os.SEEK_END)
301 |             size = filelike.tell()
302 |         self._f = filelike
303 |         self._block_size = block_size
304 |         self._size = size
305 |         self._block_cache = RangeCache(cache_size)
306 | 
307 |     def __getitem__(self, index):
308 |         if index < 0:
309 |             index = self._size + index
310 |         block_index = index % self._block_size
311 |         block_start = index - block_index
312 | 
313 |         try:
314 |             hit = self._block_cache.get(index)
315 |             buf = hit[2]
316 |             self._block_cache.touch(hit)
317 |             return buf[block_index]
318 |         except ValueError:
319 |             self._f.seek(block_start)
320 |             buf = self._f.read(self._block_size)
321 |             self._block_cache.push((block_start, self._block_size, buf))
322 |             return buf[block_index]
323 | 
324 |     def _get_containing_block(self, index):
325 |         """
326 |         Given an index, return block-aligned block that contains it,
327 |           updating the appropriate caches.
328 |         """
329 |         block_index = index % self._block_size
330 |         block_start = index - block_index
331 | 
332 |         try:
333 |             hit = self._block_cache.get(block_start)
334 |             buf = hit[2]
335 |             self._block_cache.touch(hit)
336 |             return buf
337 |         except ValueError:
338 |             self._f.seek(block_start)
339 |             buf = self._f.read(self._block_size)
340 |             self._block_cache.push((block_start, self._block_size, buf))
341 |             return buf
342 | 
343 |     def __getslice__(self, start, end):
344 |         if end == sys.maxint:
345 |             end = self._size
346 | 
347 |         start_block_index = start % self._block_size
348 |         start_block_start = start - start_block_index
349 | 
350 |         end_block_index = end % self._block_size
351 |         end_block_start = end - end_block_index
352 | 
353 |         if start_block_start == end_block_start:
354 |             # easy case, everything falls within the same block
355 |             buf = self._get_containing_block(start)
356 |             return buf[start_block_index:end_block_index]
357 |         else:
358 |             # hard case, slice goes over one or more block boundaries
359 |             ret = ""
360 | 
361 |             # phase 1, start to block boundary
362 |             buf = self._get_containing_block(start_block_start)
363 |             s = start_block_index
364 |             e = start_block_start + self._block_size
365 |             ret += buf[s:e]
366 | 
367 |             # phase 2, any complete blocks
368 |             cur_block_start = start_block_start + self._block_size
369 |             while cur_block_start + self._block_size < end_block_start:
370 |                 buf = self._get_containing_block(cur_block_start)
371 |                 ret += buf
372 |                 cur_block_start += self._block_size
373 | 
374 |             # phase 3, block boundary to end
375 |             buf = self._get_containing_block(cur_block_start)
376 |             s = 0
377 |             e = end_block_index or self._block_size
378 |             ret += buf[0:e]
379 |             return ret
380 | 
381 |     def __len__(self):
382 |         return self._size
383 | 
384 |     @staticmethod
385 |     def test():
386 |         from cStringIO import StringIO
387 |         f = StringIO("0123abcd4567efgh")
388 |         buf = FileMap(f, block_size=4, cache_size=2)
389 | 
390 |         assert len(buf) == 16
391 | 
392 |         assert buf[0] == "0"
393 |         assert buf[1] == "1"
394 |         assert buf[0:2] == "01"
395 | 
396 |         assert buf[4] == "a"
397 |         assert buf[5] == "b"
398 |         assert buf[4:6] == "ab"
399 | 
400 |         assert buf[2:6] == "23ab"
401 |         assert buf[0:8] == "0123abcd"
402 | 
403 |         assert buf[0:12] == "0123abcd4567"
404 |         assert buf[0:16] == "0123abcd4567efgh"
405 |         assert buf[:] == "0123abcd4567efgh"
406 | 
407 |         assert buf[-1] == "h"
408 |         assert buf[-2:] == "gh"
409 |         assert buf[-4:] == "efgh"
410 |         assert buf[-8:] == "4567efgh"
411 | 
412 |         return True
413 | 
414 | 
415 | def unpack_from(fmt, buffer, off=0):
416 |     """
417 |     Shim struct.unpack_from and divert unpacking of FileMaps.
418 | 
419 |     Otherwise, you'd get an exception like:
420 |       TypeError: unpack_from() argument 1 must be convertible to a buffer, not FileMap
421 | 
422 |     So, we extract a true sub-buffer from the FileMap, and feed this
423 |       back into the old unpack function.
424 |     Theres an extra allocation and copy, but there's no getting
425 |       around that.
426 |     """
427 |     if not isinstance(buffer, FileMap):
428 |         return old_unpack_from(fmt, buffer, off)
429 |     size = calcsize(fmt)
430 |     buf = buffer[off:off + size]
431 |     return old_unpack_from(fmt, buf, 0x0)
432 | 
433 | 
434 | def unpack(fmt, string):
435 |     """
436 |     Like the shimmed unpack_from, but for struct.unpack.
437 |     """
438 |     if not isinstance(buffer, FileMap):
439 |         return old_unpack(fmt, string)
440 |     size = calcsize(fmt)
441 |     buf = string[:size]
442 |     return old_unpack(fmt, buf, 0x0)
443 | 
444 | 
445 | def struct_test():
446 |     from cStringIO import StringIO
447 |     f = StringIO("\x04\x03\x02\x01")
448 |     buf = FileMap(f)
449 |     assert unpack_from("<B", buf, 0x0)[0] == 0x04
450 |     assert unpack_from("<H", buf, 0x0)[0] == 0x0304
451 |     assert unpack_from("<I", buf, 0x0)[0] == 0x01020304
452 | 
453 | 
454 | def test():
455 |     if LRUQueue.test():
456 |         print "LRUQueue passed tests."
457 |     if BoundedLRUQueue.test():
458 |         print "BoundedLRUQueue passed tests."
459 |     if RangeCache.test():
460 |         print "RangeCache passed tests."
461 |     if FileMap.test():
462 |         print "FileMap passed tests."
463 |     if struct_test():
464 |         print "struct passed tests."
465 | 
466 | 
467 | if __name__ == "__main__":
468 |     test()
469 | 


--------------------------------------------------------------------------------
/ntfs/Progress.py:
--------------------------------------------------------------------------------
 1 | #!/bin/python
 2 | #    This file is part of INDXParse.
 3 | #
 4 | #   Copyright 2013 Willi Ballenthin <william.ballenthin@mandiant.com>
 5 | #                    while at Mandiant <http://www.mandiant.com>
 6 | #
 7 | #   Licensed under the Apache License, Version 2.0 (the "License");
 8 | #   you may not use this file except in compliance with the License.
 9 | #   You may obtain a copy of the License at
10 | #
11 | #       http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | #   Unless required by applicable law or agreed to in writing, software
14 | #   distributed under the License is distributed on an "AS IS" BASIS,
15 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | #   See the License for the specific language governing permissions and
17 | #   limitations under the License.
18 | 
19 | 
20 | class Progress(object):
21 |     """
22 |     An interface to things that track the progress of a long running task.
23 |     """
24 |     def __init__(self, max_):
25 |         super(Progress, self).__init__()
26 |         self._max = max_
27 |         self._current = 0
28 | 
29 |     def set_current(self, current):
30 |         """
31 |         Set the number of steps that this task has completed.
32 | 
33 |         @type current: int
34 |         """
35 |         self._current = current
36 | 
37 |     def set_complete(self):
38 |         """
39 |         Convenience method to set the task as having completed all steps.
40 |         """
41 |         self._current = self._max
42 | 
43 | 
44 | class NullProgress(Progress):
45 |     """
46 |     A Progress class that ignores any updates.
47 |     """
48 |     def __init__(self, max_):
49 |         super(NullProgress, self).__init__(max_)
50 | 
51 |     def set_current(self, current):
52 |         pass
53 | 
54 | 
55 | class ProgressBarProgress(Progress):
56 |     def __init__(self, max_):
57 |         from progressbar import Bar
58 |         from progressbar import ETA
59 |         from progressbar import ProgressBar
60 |         super(ProgressBarProgress, self).__init__(max_)
61 | 
62 |         widgets = ["Progress: ",
63 |                    Bar(marker="=", left="[", right="]"), " ",
64 |                    ETA(), " ", ]
65 |         self._pbar = ProgressBar(widgets=widgets, maxval=self._max)
66 |         self._has_notified_started = False
67 | 
68 |     def set_current(self, current):
69 |         if not self._has_notified_started:
70 |             self._pbar.start()
71 |             self._has_notified_started = True
72 | 
73 |         self._pbar.update(current)
74 | 
75 |     def set_complete(self):
76 |         self._pbar.finish()
77 | 


--------------------------------------------------------------------------------
/ntfs/SortedCollection.py:
--------------------------------------------------------------------------------
  1 | """
  2 | From http://code.activestate.com/recipes/577197-sortedcollection/
  3 | """
  4 | from bisect import bisect_left, bisect_right
  5 | 
  6 | class SortedCollection(object):
  7 |     '''Sequence sorted by a key function.
  8 | 
  9 |     SortedCollection() is much easier to work with than using bisect() directly.
 10 |     It supports key functions like those use in sorted(), min(), and max().
 11 |     The result of the key function call is saved so that keys can be searched
 12 |     efficiently.
 13 | 
 14 |     Instead of returning an insertion-point which can be hard to interpret, the
 15 |     five find-methods return a specific item in the sequence. They can scan for
 16 |     exact matches, the last item less-than-or-equal to a key, or the first item
 17 |     greater-than-or-equal to a key.
 18 | 
 19 |     Once found, an item's ordinal position can be located with the index() method.
 20 |     New items can be added with the insert() and insert_right() methods.
 21 |     Old items can be deleted with the remove() method.
 22 | 
 23 |     The usual sequence methods are provided to support indexing, slicing,
 24 |     length lookup, clearing, copying, forward and reverse iteration, contains
 25 |     checking, item counts, item removal, and a nice looking repr.
 26 | 
 27 |     Finding and indexing are O(log n) operations while iteration and insertion
 28 |     are O(n).  The initial sort is O(n log n).
 29 | 
 30 |     The key function is stored in the 'key' attibute for easy introspection or
 31 |     so that you can assign a new key function (triggering an automatic re-sort).
 32 | 
 33 |     In short, the class was designed to handle all of the common use cases for
 34 |     bisect but with a simpler API and support for key functions.
 35 | 
 36 |     >>> from pprint import pprint
 37 |     >>> from operator import itemgetter
 38 | 
 39 |     >>> s = SortedCollection(key=itemgetter(2))
 40 |     >>> for record in [
 41 |     ...         ('roger', 'young', 30),
 42 |     ...         ('angela', 'jones', 28),
 43 |     ...         ('bill', 'smith', 22),
 44 |     ...         ('david', 'thomas', 32)]:
 45 |     ...     s.insert(record)
 46 | 
 47 |     >>> pprint(list(s))         # show records sorted by age
 48 |     [('bill', 'smith', 22),
 49 |      ('angela', 'jones', 28),
 50 |      ('roger', 'young', 30),
 51 |      ('david', 'thomas', 32)]
 52 | 
 53 |     >>> s.find_le(29)           # find oldest person aged 29 or younger
 54 |     ('angela', 'jones', 28)
 55 |     >>> s.find_lt(28)           # find oldest person under 28
 56 |     ('bill', 'smith', 22)
 57 |     >>> s.find_gt(28)           # find youngest person over 28
 58 |     ('roger', 'young', 30)
 59 | 
 60 |     >>> r = s.find_ge(32)       # find youngest person aged 32 or older
 61 |     >>> s.index(r)              # get the index of their record
 62 |     3
 63 |     >>> s[3]                    # fetch the record at that index
 64 |     ('david', 'thomas', 32)
 65 | 
 66 |     >>> s.key = itemgetter(0)   # now sort by first name
 67 |     >>> pprint(list(s))
 68 |     [('angela', 'jones', 28),
 69 |      ('bill', 'smith', 22),
 70 |      ('david', 'thomas', 32),
 71 |      ('roger', 'young', 30)]
 72 | 
 73 |     '''
 74 | 
 75 |     def __init__(self, iterable=(), key=None):
 76 |         self._given_key = key
 77 |         key = (lambda x: x) if key is None else key
 78 |         decorated = sorted((key(item), item) for item in iterable)
 79 |         self._keys = [k for k, item in decorated]
 80 |         self._items = [item for k, item in decorated]
 81 |         self._key = key
 82 | 
 83 |     def _getkey(self):
 84 |         return self._key
 85 | 
 86 |     def _setkey(self, key):
 87 |         if key is not self._key:
 88 |             self.__init__(self._items, key=key)
 89 | 
 90 |     def _delkey(self):
 91 |         self._setkey(None)
 92 | 
 93 |     key = property(_getkey, _setkey, _delkey, 'key function')
 94 | 
 95 |     def clear(self):
 96 |         self.__init__([], self._key)
 97 | 
 98 |     def copy(self):
 99 |         return self.__class__(self, self._key)
100 | 
101 |     def __len__(self):
102 |         return len(self._items)
103 | 
104 |     def __getitem__(self, i):
105 |         return self._items[i]
106 | 
107 |     def __iter__(self):
108 |         return iter(self._items)
109 | 
110 |     def __reversed__(self):
111 |         return reversed(self._items)
112 | 
113 |     def __repr__(self):
114 |         return '%s(%r, key=%s)' % (
115 |             self.__class__.__name__,
116 |             self._items,
117 |             getattr(self._given_key, '__name__', repr(self._given_key))
118 |         )
119 | 
120 |     def __reduce__(self):
121 |         return self.__class__, (self._items, self._given_key)
122 | 
123 |     def __contains__(self, item):
124 |         k = self._key(item)
125 |         i = bisect_left(self._keys, k)
126 |         j = bisect_right(self._keys, k)
127 |         return item in self._items[i:j]
128 | 
129 |     def index(self, item):
130 |         'Find the position of an item.  Raise ValueError if not found.'
131 |         k = self._key(item)
132 |         i = bisect_left(self._keys, k)
133 |         j = bisect_right(self._keys, k)
134 |         return self._items[i:j].index(item) + i
135 | 
136 |     def count(self, item):
137 |         'Return number of occurrences of item'
138 |         k = self._key(item)
139 |         i = bisect_left(self._keys, k)
140 |         j = bisect_right(self._keys, k)
141 |         return self._items[i:j].count(item)
142 | 
143 |     def insert(self, item):
144 |         'Insert a new item.  If equal keys are found, add to the left'
145 |         k = self._key(item)
146 |         i = bisect_left(self._keys, k)
147 |         self._keys.insert(i, k)
148 |         self._items.insert(i, item)
149 | 
150 |     def insert_right(self, item):
151 |         'Insert a new item.  If equal keys are found, add to the right'
152 |         k = self._key(item)
153 |         i = bisect_right(self._keys, k)
154 |         self._keys.insert(i, k)
155 |         self._items.insert(i, item)
156 | 
157 |     def remove(self, item):
158 |         'Remove first occurence of item.  Raise ValueError if not found'
159 |         i = self.index(item)
160 |         del self._keys[i]
161 |         del self._items[i]
162 | 
163 |     def find(self, k):
164 |         'Return first item with a key == k.  Raise ValueError if not found.'
165 |         i = bisect_left(self._keys, k)
166 |         if i != len(self) and self._keys[i] == k:
167 |             return self._items[i]
168 |         raise ValueError('No item found with key equal to: %r' % (k,))
169 | 
170 |     def find_le(self, k):
171 |         'Return last item with a key <= k.  Raise ValueError if not found.'
172 |         i = bisect_right(self._keys, k)
173 |         if i:
174 |             return self._items[i-1]
175 |         raise ValueError('No item found with key at or below: %r' % (k,))
176 | 
177 |     def find_lt(self, k):
178 |         'Return last item with a key < k.  Raise ValueError if not found.'
179 |         i = bisect_left(self._keys, k)
180 |         if i:
181 |             return self._items[i-1]
182 |         raise ValueError('No item found with key below: %r' % (k,))
183 | 
184 |     def find_ge(self, k):
185 |         'Return first item with a key >= equal to k.  Raise ValueError if not found'
186 |         i = bisect_left(self._keys, k)
187 |         if i != len(self):
188 |             return self._items[i]
189 |         raise ValueError('No item found with key at or above: %r' % (k,))
190 | 
191 |     def find_gt(self, k):
192 |         'Return first item with a key > k.  Raise ValueError if not found'
193 |         i = bisect_right(self._keys, k)
194 |         if i != len(self):
195 |             return self._items[i]
196 |         raise ValueError('No item found with key above: %r' % (k,))
197 | 


--------------------------------------------------------------------------------
/ntfs/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = 0.1
2 | 
3 | __all__ = [
4 |     "secure",
5 |     "mft",
6 |     "logfile",
7 |     "usnjrnl",
8 | ]
9 | 


--------------------------------------------------------------------------------
/ntfs/filesystem/__init__.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import logging
  3 | 
  4 | from ntfs.BinaryParser import Block
  5 | from ntfs.BinaryParser import OverrunBufferException
  6 | from ntfs.mft.MFT import InvalidRecordException
  7 | from ntfs.mft.MFT import MREF
  8 | from ntfs.mft.MFT import MSEQNO
  9 | from ntfs.mft.MFT import MFTRecord
 10 | from ntfs.mft.MFT import ATTR_TYPE
 11 | from ntfs.mft.MFT import INDEX_ROOT
 12 | from ntfs.mft.MFT import MFTEnumerator
 13 | from ntfs.mft.MFT import MFT_RECORD_SIZE
 14 | from ntfs.mft.MFT import INDEX_ALLOCATION
 15 | from ntfs.mft.MFT import AttributeNotFoundError
 16 | 
 17 | 
 18 | g_logger = logging.getLogger("ntfs.filesystem")
 19 | 
 20 | 
 21 | class FileSystemError(Exception):
 22 |     def __init__(self, msg="no details"):
 23 |         super(FileSystemError, self).__init__(self)
 24 |         self._msg = msg
 25 | 
 26 |     def __str__(self):
 27 |         return "%s(%s)" % (self.__class__.__name__, self._msg)
 28 | 
 29 | 
 30 | class CorruptNTFSFilesystemError(FileSystemError):
 31 |     pass
 32 | 
 33 | 
 34 | class NoParentError(FileSystemError):
 35 |     pass
 36 | 
 37 | 
 38 | class UnsupportedPathError(FileSystemError):
 39 |     pass
 40 | 
 41 | 
 42 | class File(object):
 43 |     """
 44 |     interface
 45 |     """
 46 |     def get_name(self):
 47 |         raise NotImplementedError()
 48 | 
 49 |     def get_parent_directory(self):
 50 |         """
 51 |         @raise NoParentError:
 52 |         """
 53 |         raise NotImplementedError()
 54 | 
 55 |     def read(self, offset, length):
 56 |         raise NotImplementedError()
 57 | 
 58 |     def get_full_path(self):
 59 |         raise NotImplementedError()
 60 | 
 61 | 
 62 | class NTFSFileMetadataMixin(object):
 63 |     def __init__(self, record):
 64 |         self._record = record
 65 | 
 66 |     def get_filenames(self):
 67 |         ret = []
 68 |         for fn in self._record.filename_informations():
 69 |             ret.append(fn.filename())
 70 |         return ret
 71 | 
 72 |     def get_si_created_timestamp(self):
 73 |         return self._record.standard_information().created_time()
 74 | 
 75 |     def get_si_accessed_timestamp(self):
 76 |         return self._record.standard_information().accessed_time()
 77 | 
 78 |     def get_si_changed_timestamp(self):
 79 |         return self._record.standard_information().changed_time()
 80 | 
 81 |     def get_si_modified_timestamp(self):
 82 |         return self._record.standard_information().modified_time()
 83 | 
 84 |     def get_fn_created_timestamp(self):
 85 |         return self._record.filename_information().created_time()
 86 | 
 87 |     def get_fn_accessed_timestamp(self):
 88 |         return self._record.filename_information().accessed_time()
 89 | 
 90 |     def get_fn_changed_timestamp(self):
 91 |         return self._record.filename_information().changed_time()
 92 | 
 93 |     def get_fn_modified_timestamp(self):
 94 |         return self._record.filename_information().modified_time()
 95 | 
 96 |     def is_file(self):
 97 |         return self._record.is_file()
 98 | 
 99 |     def is_directory(self):
100 |         return self._record.is_directory()
101 | 
102 |     def get_size(self):
103 |         if self.is_directory():
104 |             return 0
105 |         else:
106 |             data_attribute = self._record.data_attribute()
107 |             if data_attribute is not None:
108 |                 if data_attribute.non_resident() == 0:
109 |                     size = len(data_attribute.value())
110 |                 else:
111 |                     size = data_attribute.data_size()
112 |             else:
113 |                 size = self._record.filename_information().logical_size()
114 |         return size
115 | 
116 | 
117 | class NTFSFile(File, NTFSFileMetadataMixin):
118 |     def __init__(self, filesystem, mft_record):
119 |         File.__init__(self)
120 |         NTFSFileMetadataMixin.__init__(self, mft_record)
121 |         self._fs = filesystem
122 |         self._record = mft_record
123 | 
124 |     def get_name(self):
125 |         return self._record.filename_information().filename()
126 | 
127 |     def get_parent_directory(self):
128 |         return self._fs.get_record_parent(self._record)
129 | 
130 |     def __str__(self):
131 |         return "File(name: %s)" % (self.get_name())
132 | 
133 |     def read(self, offset, length):
134 |         data_attribute = self._record.data_attribute()
135 |         data = self._fs.get_attribute_data(data_attribute)
136 |         return data[offset:offset+length]
137 | 
138 |     def get_full_path(self):
139 |         return self._fs.get_record_path(self._record)
140 | 
141 | 
142 | class ChildNotFoundError(Exception):
143 |     pass
144 | 
145 | 
146 | class Directory(object):
147 |     """
148 |     interface
149 |     """
150 |     def get_name(self):
151 |         raise NotImplementedError()
152 | 
153 |     def get_children(self):
154 |         raise NotImplementedError()
155 | 
156 |     def get_files(self):
157 |         raise NotImplementedError()
158 | 
159 |     def get_directories(self):
160 |         raise NotImplementedError()
161 | 
162 |     def get_parent_directory(self):
163 |         """
164 |         @raise NoParentError:
165 |         """
166 |         raise NotImplementedError()
167 | 
168 |     def get_child(self, name):
169 |         """
170 |         @raise ChildNotFoundError: if the given filename is not found.
171 |         """
172 |         raise NotImplementedError()
173 | 
174 |     def get_full_path(self):
175 |         raise NotImplementedError()
176 | 
177 | 
178 | class PathDoesNotExistError(Exception):
179 |     pass
180 | 
181 | 
182 | class DirectoryDoesNotExistError(PathDoesNotExistError):
183 |     pass
184 | 
185 | 
186 | class NTFSDirectory(Directory, NTFSFileMetadataMixin):
187 |     def __init__(self, filesystem, mft_record):
188 |         Directory.__init__(self)
189 |         NTFSFileMetadataMixin.__init__(self, mft_record)
190 |         self._fs = filesystem
191 |         self._record = mft_record
192 | 
193 |     def get_name(self):
194 |         return self._record.filename_information().filename()
195 | 
196 |     def get_children(self):
197 |         ret = []
198 |         for child in self._fs.get_record_children(self._record):
199 |             if child.is_directory():
200 |                 ret.append(NTFSDirectory(self._fs, child))
201 |             else:
202 |                 ret.append(NTFSFile(self._fs, child))
203 |         return ret
204 | 
205 |     def get_files(self):
206 |         return filter(lambda c: isinstance(c, NTFSFile),
207 |                       self.get_children())
208 | 
209 |     def get_directories(self):
210 |         return filter(lambda c: isinstance(c, NTFSDirectory),
211 |                       self.get_children())
212 | 
213 |     def get_parent_directory(self):
214 |         return self._fs.get_record_parent(self._record)
215 | 
216 |     def __str__(self):
217 |         return "Directory(name: %s)" % (self.get_name())
218 | 
219 |     def get_child(self, name):
220 |         name_lower = name.lower()
221 |         for child in self.get_children():
222 |             if len(child.get_filenames()) > 1:
223 |                 g_logger.debug("file names: %s -> %s",
224 |                   child.get_name(), child.get_filenames())
225 |             for fn in child.get_filenames():
226 |                 if name_lower == fn.lower():
227 |                     return child
228 |         raise ChildNotFoundError()
229 | 
230 |     def _split_path(self, path):
231 |         """
232 |         Hack to try to support both types of file system paths:
233 |           - forward slash, /etc
234 |           - backslash, C:\windows\system32
235 | 
236 |         Linux uses forward slashes, so we'd like that when working with FUSE.
237 |         The original file system used backslashes, so we'd also like that.
238 | 
239 |         This is a poor attempt at doing both:
240 |           - detect which slash type is in use
241 |           - don't support both at the same time
242 | 
243 |         This works like string.partition(PATH_SEPARATOR)
244 |         """
245 |         if "\\" in path:
246 |             if "/" in path:
247 |                 raise UnsupportedPathError(path)
248 |             return path.partition("\\")
249 | 
250 |         elif "/" in path:
251 |             if "\\" in path:
252 |                 raise UnsupportedPathError(path)
253 |             return path.partition("/")
254 |         else:
255 |             return path, "", ""
256 | 
257 |     def get_path_entry(self, path):
258 |         g_logger.debug("get_path_entry: path: %s", path)
259 |         imm, slash, rest = self._split_path(path)
260 |         if slash == "":
261 |             return self.get_child(path)
262 |         else:
263 |             if rest == "":
264 |                 return self
265 | 
266 |             child = self.get_child(imm)
267 |             if not isinstance(child, NTFSDirectory):
268 |                 raise DirectoryDoesNotExistError()
269 | 
270 |             return child.get_path_entry(rest)
271 | 
272 |     def get_full_path(self):
273 |         return self._fs.get_record_path(self._record)
274 | 
275 | 
276 | class Filesystem(object):
277 |     """
278 |     interface
279 |     """
280 |     def get_root_directory(self):
281 |         raise NotImplementedError()
282 | 
283 | 
284 | class NTFSVBR(Block):
285 |     """
286 |     NTFS Volume Boot Record
287 |     """
288 |     def __init__(self, volume):
289 |         super(NTFSVBR, self).__init__(volume, 0)
290 |         # 0x0
291 |         self.declare_field("byte", "jump", offset=0x0, count=3)
292 |         # 0x3 OEM ID
293 |         self.declare_field("qword", "oem_id")
294 | 
295 |         # The BIOS parameter block (BPB)
296 |         # 0x0b Bytes Per Sector
297 |         self.declare_field("word", "bytes_per_sector")
298 |         # 0x0d Sectors Per Cluster. The number of sectors in a cluster
299 |         self.declare_field("byte", "sectors_per_cluster")
300 |         # Must be 0
301 |         # 0x0e
302 |         self.declare_field("word", "reserved_sectors")
303 |         # 0x10
304 |         self.declare_field("byte", "zero0", count=3)
305 |         # 0x13
306 |         self.declare_field("word", "unused0")
307 |         # 0x15 Media Descriptor. Legacy
308 |         self.declare_field("byte", "media_descriptor")
309 |         # 0x16
310 |         self.declare_field("word", "zero1")
311 |         # 0x18
312 |         self.declare_field("word", "sectors_per_track")
313 |         # 0x1a
314 |         self.declare_field("word", "number_of_heads")
315 |         # 0x1c
316 |         self.declare_field("dword", "hidden_sectors")
317 |         # 0x20 Unused
318 |         self.declare_field("dword", "unused1")
319 | 
320 |         # 0x24 Extended BPB
321 |         self.declare_field("dword", "unused2")
322 |         # 0x28 Total Sectors. The total number of sectors on the hard disk
323 |         self.declare_field("qword", "total_sectors")
324 |         # 0x30 Logical Cluster Number for the File $MFT
325 |         self.declare_field("qword", "mft_lcn")
326 |         # 0x38 Logical Cluster Number for the File $MFTMirr
327 |         self.declare_field("qword", "mftmirr_lcn")
328 |         # 0x40 Cluster Per MFT Record
329 |         # The Number of Clusters for each MFT record,
330 |         # which can be a negative number when the cluster size is larger
331 |         # than the MFT File record
332 |         # if the value is negative number,
333 |         # the MFT record size in bytes equals 2**value
334 |         self.declare_field("byte", "clusters_per_file_record_segment")
335 |         # 0x41 Unused
336 |         self.declare_field("byte", "unused3", count=3)
337 |         # 0x44 Cluster Per Index Buffer.`
338 |         self.declare_field("byte", "clusters_per_index_buffer")
339 |         # 0x45 Unused
340 |         self.declare_field("byte", "unused4", count=3)
341 |         # 0x48 Volume Serial Number
342 |         self.declare_field("qword", "volume_serial_number")
343 |         # 0x50 Checksum. Not used by NTFS.
344 |         self.declare_field("dword", "checksum")
345 | 
346 |         # 0x54 Bootstrap code
347 |         self.declare_field("byte", "bootstrap_code", count=426)
348 |         # 0x01fe End of sector
349 |         self.declare_field("word", "end_of_sector")
350 | 
351 | 
352 | class ClusterAccessor(object):
353 |     """
354 |     index volume data using `cluster_size` units
355 |     """
356 |     def __init__(self, volume, cluster_size):
357 |         super(ClusterAccessor, self).__init__()
358 |         self._volume = volume
359 |         self._cluster_size = cluster_size
360 | 
361 |     def __getitem__(self, index):
362 |         size = self._cluster_size
363 |         start, end = index * size, (index + 1) * size
364 |         g_logger.debug('Get clusters %s:%s', start, end)
365 |         return self._volume[start:end]
366 | 
367 |     def __getslice__(self, start, end):
368 |         size = self._cluster_size
369 |         start, end = start * size, end * size
370 |         g_logger.debug('Get clusters %s:%s', start, end)
371 |         return self._volume[start:end]
372 | 
373 |     def __len__(self):
374 |         return len(self._volume) / self._cluster_size
375 | 
376 |     def get_cluster_size(self):
377 |         return self._cluster_size
378 | 
379 | 
380 | INODE_MFT = 0
381 | INODE_MFTMIRR = 1
382 | INODE_LOGFILE = 2
383 | INODE_VOLUME = 3
384 | INODE_ATTR_DEF = 4
385 | INODE_ROOT = 5
386 | INODE_BITMAP = 6
387 | INODE_BOOT = 7
388 | INODE_BADCLUS = 8
389 | INODE_SECURE = 9
390 | INODE_UPCASE = 10
391 | INODE_EXTEND = 11
392 | INODE_RESERVED0 = 12
393 | INODE_RESERVED1 = 13
394 | INODE_RESERVED2 = 14
395 | INODE_RESERVED3 = 15
396 | INODE_FIRST_USER = 16
397 | 
398 | 
399 | class NonResidentAttributeData(object):
400 |     """
401 |     expose a potentially non-continuous set of data runs as a single
402 |       logical buffer
403 | 
404 |     once constructed, use this like a bytestring.
405 |     you can unpack from it, slice it, etc.
406 | 
407 |     implementation note: this is likely a good place to optimize
408 |     """
409 |     __unpackable__ = True
410 |     def __init__(self, clusters, runlist):
411 |         self._clusters = clusters
412 |         self._runlist = runlist
413 |         self._runentries = list(self._runlist.runs())
414 |         self._len = None
415 | 
416 |     def __getitem__(self, index):
417 |         # TODO: clarify variable names and their units
418 |         # units: bytes
419 |         current_run_start_offset = 0
420 | 
421 |         if index < 0:
422 |             index = len(self) + index
423 | 
424 |         clusters = self._clusters
425 |         csize = clusters.get_cluster_size()
426 | 
427 |         # units: clusters
428 |         for cluster_offset, num_clusters in self._runentries:
429 |             # units: bytes
430 |             run_length = num_clusters * csize
431 |             right_border = current_run_start_offset + run_length
432 | 
433 |             # Check if the target byte in the run entry
434 |             if current_run_start_offset <= index < right_border:
435 |                 # units: bytes
436 |                 target_idx = index - current_run_start_offset
437 |                 # The index of the cluster that contains the target byte
438 |                 target_cluster_idx = int(target_idx/csize)
439 |                 # The index of the target byte relative to the cluster
440 |                 byte_relative_idx = (target_idx - target_cluster_idx * csize)
441 |                 cluster = clusters[cluster_offset+target_cluster_idx]
442 |                 return cluster[byte_relative_idx]
443 |             # else looking at next run entry
444 |             current_run_start_offset += run_length
445 |         raise IndexError("%d is greater than the non resident "
446 |                          "attribute data length %s", index, len(self))
447 | 
448 |     def __getslice__(self, start, stop):
449 |         """
450 | 
451 |         :param start: start byte
452 |         :param stop: stop byte
453 |         :return:
454 |         """
455 |         # TODO: there are some pretty bad inefficiencies here, i believe
456 |         # TODO: clarify variable names and their units
457 |         ret = bytearray()
458 |         virt_byte_offset = 0
459 |         have_found_start = False
460 | 
461 |         g_logger.debug("NonResidentAttributeData: getslice: "
462 |                        "start: %x end: %x", start, stop)
463 |         _len = len(self)
464 |         if stop == sys.maxint:
465 |             stop = _len
466 | 
467 |         if stop < 0:
468 |             stop = _len + stop
469 | 
470 |         if start < 0:
471 |             start = _len + start
472 | 
473 |         if max(start, stop) > _len:
474 |             raise IndexError("(%d, %d) is greater "
475 |                              "than the non resident attribute data length %s",
476 |                              start, stop, _len)
477 |         clusters = self._clusters
478 |         csize = clusters.get_cluster_size()
479 |         for cluster_offset, num_clusters in self._runentries:
480 |             g_logger.debug("NonResidentAttributeData: "
481 |                            "getslice: runentry: start: %x len: %x",
482 |                            cluster_offset * csize, num_clusters * csize)
483 |             run_byte_len = num_clusters * csize
484 |             # check if start byte in the data run
485 |             virt_byte_stop = virt_byte_offset + run_byte_len
486 |             is_start_in_run = (virt_byte_offset <= start < virt_byte_stop)
487 | 
488 |             if not have_found_start:
489 |                 if is_start_in_run:
490 |                     have_found_start = True
491 |                 else:
492 |                     virt_byte_offset += run_byte_len
493 |                     continue
494 | 
495 |             cluster_stop = cluster_offset + num_clusters
496 |             _bytes = clusters[cluster_offset:cluster_stop]
497 | 
498 |             is_stop_in_run = stop <= virt_byte_stop
499 |             # This is the situation when we have only one data run
500 |             # everything is in this run
501 |             if is_start_in_run and is_stop_in_run:
502 |                 return _bytes[start:stop]
503 | 
504 |             _start = _stop = None
505 |             if is_start_in_run:
506 |                 _start = start - virt_byte_offset
507 |             if is_stop_in_run:
508 |                 _stop = stop - virt_byte_offset
509 |             # if start and stop are not in the data run,
510 |             # then copy all bytes from the data run's clusters
511 |             # _bytes[None:None] === _bytes[:]
512 |             ret.extend(_bytes[_start:_stop])
513 |             virt_byte_offset += run_byte_len
514 | 
515 |         return ret
516 | 
517 |     def __len__(self):
518 |         if self._len is not None:
519 |             return self._len
520 |         ret = 0
521 |         for cluster_start, num_clusters in self._runentries:
522 |             g_logger.debug("NonResidentAttributeData: len: run: "
523 |                            "cluster: %x len: %x", cluster_start, num_clusters)
524 |             ret += num_clusters * self._clusters.get_cluster_size()
525 |         self._len = ret
526 |         return ret
527 | 
528 | 
529 | class NTFSFilesystem(object):
530 |     def __init__(self, volume, cluster_size=None):
531 |         oem_id = volume[3:7]
532 |         assert oem_id == 'NTFS', 'Wrong OEM signature'
533 | 
534 |         super(NTFSFilesystem, self).__init__()
535 |         self._volume = volume
536 |         self._cluster_size = cluster_size
537 |         vbr = self._vbr = NTFSVBR(volume)
538 |         self._cluster_size = cluster_size = (cluster_size or
539 |                                              vbr.bytes_per_sector() *
540 |                                              vbr.sectors_per_cluster())
541 | 
542 |         self._clusters = ClusterAccessor(volume, cluster_size)
543 |         self._logger = logging.getLogger("NTFSFilesystem")
544 | 
545 |         # balance memory usage with performance
546 |         try:
547 |             b = self.get_mft_buffer()
548 | 
549 |             # test we can access last MFT byte, demonstrating we can
550 |             #   reach all runs
551 |             _ = b[-1]
552 |         except OverrunBufferException as e:
553 |             g_logger.warning("failed to read MFT from image, will fall back to MFTMirr: %s", e)
554 |             try:
555 |                 b = self.get_mftmirr_buffer()
556 | 
557 |                 # test we can access last MFTMirr byte, demonstrating
558 |                 #   we can reach all runs
559 |                 _ = b[-1]
560 |             except OverrunBufferException as e:
561 |                 g_logger.error("failed to read MFTMirr from image: %s", e)
562 |                 raise CorruptNTFSFilesystemError("failed to read MFT or MFTMirr from image")
563 | 
564 |         if len(b) > 1024 * 1024 * 500:
565 |             self._mft_data = b
566 |         else:
567 |             # note optimization: copy entire mft buffer from NonResidentNTFSAttribute
568 |             #  to avoid getslice lookups
569 |             self._mft_data = b[:]
570 |         self._enumerator = MFTEnumerator(self._mft_data)
571 | 
572 |         # test there's at least some user content (aside from root), or we'll
573 |         #   assume something's up
574 |         try:
575 |             _ = self.get_record(INODE_FIRST_USER)
576 |         except OverrunBufferException:
577 |             g_logger.error("overrun reading first user MFT record")
578 |             raise CorruptNTFSFilesystemError("failed to read first user record (MFT not large enough)")
579 | 
580 |     def get_attribute_data(self, attribute):
581 |         if attribute.non_resident() == 0:
582 |             return attribute.value()
583 |         else:
584 |             return NonResidentAttributeData(self._clusters, attribute.runlist())
585 | 
586 |     def get_mft_record(self):
587 |         mft_lcn = self._vbr.mft_lcn()
588 |         g_logger.debug("mft: %x", mft_lcn * 4096)
589 |         mft_chunk = self._clusters[mft_lcn]
590 |         mft_record = MFTRecord(mft_chunk, 0, None, inode=INODE_MFT)
591 |         return mft_record
592 | 
593 |     def get_mft_buffer(self):
594 |         mft_lcn = self._vbr.mft_lcn()
595 |         g_logger.debug("mft: %x", mft_lcn * 4096)
596 |         mft_chunk = self._clusters[mft_lcn]
597 |         mft_record = MFTRecord(mft_chunk, 0, None, inode=INODE_MFT)
598 |         mft_data_attribute = mft_record.data_attribute()
599 |         return self.get_attribute_data(mft_data_attribute)
600 | 
601 |     def get_mftmirr_buffer(self):
602 |         g_logger.debug("mft mirr: %s", hex(self._vbr.mftmirr_lcn() * 4096))
603 |         mftmirr_chunk = self._clusters[self._vbr.mftmirr_lcn()]
604 |         mftmirr_mft_record = MFTRecord(mftmirr_chunk, INODE_MFTMIRR * MFT_RECORD_SIZE, None, inode=INODE_MFTMIRR)
605 |         mftmirr_data_attribute = mftmirr_mft_record.data_attribute()
606 |         return self.get_attribute_data(mftmirr_data_attribute)
607 | 
608 |     def get_root_directory(self):
609 |         return NTFSDirectory(self, self._enumerator.get_record(INODE_ROOT))
610 | 
611 |     def get_record(self, record_number):
612 |         g_logger.debug("get_record: %d", record_number)
613 |         return self._enumerator.get_record(record_number)
614 | 
615 |     def get_record_path(self, record):
616 |         return self._enumerator.get_path(record)
617 | 
618 |     def get_record_parent(self, record):
619 |         """
620 |         @raises NoParentError: on various error conditions
621 |         """
622 |         if record.mft_record_number() == 5:
623 |             raise NoParentError("Root directory has no parent")
624 | 
625 |         fn = record.filename_information()
626 |         if not fn:
627 |             raise NoParentError("File has no filename attribute")
628 | 
629 |         parent_record_num = MREF(fn.mft_parent_reference())
630 |         parent_seq_num = MSEQNO(fn.mft_parent_reference())
631 | 
632 |         try:
633 |             parent_record = self._enumerator.get_record(parent_record_num)
634 |         except (OverrunBufferException, InvalidRecordException):
635 |             raise NoParentError("Invalid parent MFT record")
636 | 
637 |         if parent_record.sequence_number() != parent_seq_num:
638 |             raise NoParentError("Invalid parent MFT record (bad sequence number)")
639 | 
640 |         return NTFSDirectory(self, parent_record)
641 | 
642 |     def get_record_children(self, record):
643 |         # we use a map here to de-dup entries with different filename types
644 |         #  such as 8.3, POSIX, or Windows,  but the same ultimate MFT reference
645 |         ret = {}  # type: dict(int, MFTRecord)
646 |         if not record.is_directory():
647 |             return ret.values()
648 | 
649 |         # TODO: cleanup the duplication here
650 |         try:
651 |             indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION)
652 |             indx_alloc = INDEX_ALLOCATION(self.get_attribute_data(indx_alloc_attr), 0)
653 |             #g_logger.debug("INDEX_ALLOCATION len: %s", hex(len(indx_alloc)))
654 |             #g_logger.debug("alloc:\n%s", indx_alloc.get_all_string(indent=2))
655 |             indx = indx_alloc
656 | 
657 |             for block in indx.blocks():
658 |                 for entry in block.index().entries():
659 |                     ref = MREF(entry.header().mft_reference())
660 |                     if ref == INODE_ROOT and \
661 |                        entry.filename_information().filename() == ".":
662 |                         continue
663 |                     ret[ref] = self._enumerator.get_record(ref)
664 | 
665 |         except AttributeNotFoundError:
666 |             indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT)
667 |             indx_root = INDEX_ROOT(self.get_attribute_data(indx_root_attr), 0)
668 |             indx = indx_root
669 | 
670 |             for entry in indx.index().entries():
671 |                 ref = MREF(entry.header().mft_reference())
672 |                 if ref == INODE_ROOT and \
673 |                    entry.filename_information().filename() == ".":
674 |                     continue
675 |                 ret[ref] = self._enumerator.get_record(ref)
676 | 
677 |         return ret.values()
678 | 
679 | 
680 | def main():
681 |     import sys
682 |     from ntfs.volume import FlatVolume
683 |     from ntfs.BinaryParser import Mmap
684 |     from ntfs.mft.MFT import MFTEnumerator
685 |     logging.basicConfig(level=logging.DEBUG)
686 | 
687 |     with Mmap(sys.argv[1]) as buf:
688 |         v = FlatVolume(buf, int(sys.argv[2]))
689 |         fs = NTFSFilesystem(v)
690 |         root = fs.get_root_directory()
691 |         g_logger.info("root dir: %s", root)
692 |         for c in root.get_children():
693 |             g_logger.info("  - %s", c.get_name())
694 | 
695 |         sys32 = root.get_path_entry("windows\\system32")
696 |         g_logger.info("sys32: %s", sys32)
697 | 
698 | 
699 | if __name__ == "__main__":
700 |     main()
701 | 


--------------------------------------------------------------------------------
/ntfs/logfile/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/logfile/__init__.py


--------------------------------------------------------------------------------
/ntfs/mft/.MFT.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/mft/.MFT.py.swp


--------------------------------------------------------------------------------
/ntfs/mft/MFT.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | 
   3 | import array
   4 | import os
   5 | import sys
   6 | import struct
   7 | import logging
   8 | from datetime import datetime
   9 | from collections import OrderedDict  # python 2.7 only
  10 | 
  11 | from .. import Progress
  12 | from .. import BinaryParser
  13 | from ..BinaryParser import Block
  14 | from ..BinaryParser import Nestable
  15 | 
  16 | 
  17 | g_logger = logging.getLogger("ntfs.mft")
  18 | 
  19 | 
  20 | class INDXException(Exception):
  21 |     """
  22 |     Base Exception class for INDX parsing.
  23 |     """
  24 |     def __init__(self, value):
  25 |         """
  26 |         Constructor.
  27 |         Arguments:
  28 |         - `value`: A string description.
  29 |         """
  30 |         super(INDXException, self).__init__()
  31 |         self._value = value
  32 | 
  33 |     def __str__(self):
  34 |         return "INDX Exception: %s" % (self._value)
  35 | 
  36 | 
  37 | class FixupBlock(Block):
  38 |     """
  39 |     a fixup block requires modification to the underlying buffer.
  40 |       - we don't want to do it to the underlying buffer
  41 |         - if its mmapped, we'd change the source file
  42 |         - if its a string, then this would raise an exception
  43 |       - we can keep a shadow file/buffer for writes to the underlying storage
  44 |         - this is most complete
  45 |         - also most complex to implement
  46 |       - we can make a copy of the buffer, and work with that
  47 |     we take the third option for ease of implementation
  48 | 
  49 |     some notes:
  50 |       - we change the buffer for this object from whats passed to the constructor
  51 |       - we change the offset for this object from whats passed to the constructor
  52 |       - we assume the total object size is no greater than the size of the fixups!
  53 |     """
  54 |     def __init__(self, buf, offset, parent):
  55 |         super(FixupBlock, self).__init__(buf, offset)
  56 | 
  57 |     def fixup(self, num_fixups, fixup_value_offset):
  58 |         fixup_buffer = array.array("b", self.unpack_binary(0, length=(num_fixups - 1) * 512))
  59 |         self._buf = fixup_buffer
  60 |         self._offset = 0
  61 | 
  62 |         fixup_value = self.unpack_word(fixup_value_offset)
  63 | 
  64 |         for i in range(0, num_fixups - 1):
  65 |             fixup_offset = 512 * (i + 1) - 2
  66 |             check_value = self.unpack_word(fixup_offset)
  67 | 
  68 |             if check_value != fixup_value:
  69 |                 logging.warning("Bad fixup at %s", hex(self.offset() + fixup_offset))
  70 |                 continue
  71 | 
  72 |             new_value = self.unpack_word(fixup_value_offset + 2 + 2 * i)
  73 |             self.pack_word(fixup_offset, new_value)
  74 | 
  75 |             check_value = self.unpack_word(fixup_offset)
  76 |             g_logger.debug("Fixup verified at %s and patched from %s to %s.",
  77 |                           hex(self.offset() + fixup_offset),
  78 |                           hex(fixup_value), hex(check_value))
  79 | 
  80 | 
  81 | class INDEX_ENTRY_FLAGS:
  82 |     """
  83 |     sizeof() == WORD
  84 |     """
  85 |     INDEX_ENTRY_NODE = 0x1
  86 |     INDEX_ENTRY_END = 0x2
  87 |     INDEX_ENTRY_SPACE_FILLER = 0xFFFF
  88 | 
  89 | 
  90 | class INDEX_ENTRY_HEADER(Block, Nestable):
  91 |     def __init__(self, buf, offset, parent):
  92 |         super(INDEX_ENTRY_HEADER, self).__init__(buf, offset)
  93 |         self.declare_field("word", "length", 0x8)
  94 |         self.declare_field("word", "key_length")
  95 |         self.declare_field("word", "index_entry_flags")  # see INDEX_ENTRY_FLAGS
  96 |         self.declare_field("word", "reserved")
  97 | 
  98 |     @staticmethod
  99 |     def structure_size(buf, offset, parent):
 100 |         return 0x10
 101 | 
 102 |     def __len__(self):
 103 |         return 0x10
 104 | 
 105 |     def is_index_entry_node(self):
 106 |         return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_NODE
 107 | 
 108 |     def is_index_entry_end(self):
 109 |         return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_END
 110 | 
 111 |     def is_index_entry_space_filler(self):
 112 |         return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_SPACE_FILLER
 113 | 
 114 | 
 115 | class MFT_INDEX_ENTRY_HEADER(INDEX_ENTRY_HEADER):
 116 |     """
 117 |     Index used by the MFT for INDX attributes.
 118 |     """
 119 |     def __init__(self, buf, offset, parent):
 120 |         super(MFT_INDEX_ENTRY_HEADER, self).__init__(buf, offset, parent)
 121 |         self.declare_field("qword", "mft_reference", 0x0)
 122 | 
 123 | 
 124 | class SECURE_INDEX_ENTRY_HEADER(INDEX_ENTRY_HEADER):
 125 |     """
 126 |     Index used by the $SECURE file indices SII and SDH
 127 |     """
 128 |     def __init__(self, buf, offset, parent):
 129 |         super(SECURE_INDEX_ENTRY_HEADER, self).__init__(buf, offset, parent)
 130 |         self.declare_field("word", "data_offset", 0x0)
 131 |         self.declare_field("word", "data_length")
 132 |         self.declare_field("dword", "reserved")
 133 | 
 134 | 
 135 | class INDEX_ENTRY(Block, Nestable):
 136 |     """
 137 |     NOTE: example structure. See the more specific classes below.
 138 |       Probably do not instantiate.
 139 |     """
 140 |     def __init__(self, buf, offset, parent):
 141 |         super(INDEX_ENTRY, self).__init__(buf, offset)
 142 |         self.declare_field(INDEX_ENTRY_HEADER, "header", 0x0)
 143 |         self.add_explicit_field(0x10, "string", "data")
 144 | 
 145 |     def data(self):
 146 |         start = self.offset() + 0x10
 147 |         end = start + self.header().key_length()
 148 |         return self._buf[start:end]
 149 | 
 150 |     @staticmethod
 151 |     def structure_size(buf, offset, parent):
 152 |         return BinaryParser.read_word(buf, offset + 0x8)
 153 | 
 154 |     def __len__(self):
 155 |         return self.header().length()
 156 | 
 157 |     def is_valid(self):
 158 |         return True
 159 | 
 160 | 
 161 | class MFT_INDEX_ENTRY(Block, Nestable):
 162 |     """
 163 |     Index entry for the MFT directory index $I30, attribute type 0x90.
 164 |     """
 165 |     def __init__(self, buf, offset, parent):
 166 |         super(MFT_INDEX_ENTRY, self).__init__(buf, offset)
 167 |         self.declare_field(MFT_INDEX_ENTRY_HEADER, "header", 0x0)
 168 |         self.declare_field(FilenameAttribute, "filename_information")
 169 | 
 170 |     @staticmethod
 171 |     def structure_size(buf, offset, parent):
 172 |         return BinaryParser.read_word(buf, offset + 0x8)
 173 | 
 174 |     def __len__(self):
 175 |         return self.header().length()
 176 | 
 177 |     def is_valid(self):
 178 |         # this is a bit of a mess, but it should work
 179 |         recent_date = datetime(1990, 1, 1, 0, 0, 0)
 180 |         future_date = datetime(2025, 1, 1, 0, 0, 0)
 181 |         try:
 182 |             fn = self.filename_information()
 183 |         except:
 184 |             return False
 185 |         if not fn:
 186 |             return False
 187 |         try:
 188 |             return fn.modified_time() > recent_date and \
 189 |                    fn.accessed_time() > recent_date and \
 190 |                    fn.changed_time() > recent_date and \
 191 |                    fn.created_time() > recent_date and \
 192 |                    fn.modified_time() < future_date and \
 193 |                    fn.accessed_time() < future_date and \
 194 |                    fn.changed_time() < future_date and \
 195 |                    fn.created_time() < future_date
 196 |         except ValueError:
 197 |             return False
 198 | 
 199 | 
 200 | class SII_INDEX_ENTRY(Block, Nestable):
 201 |     """
 202 |     Index entry for the $SECURE:$SII index.
 203 |     """
 204 |     def __init__(self, buf, offset, parent):
 205 |         super(SII_INDEX_ENTRY, self).__init__(buf, offset)
 206 |         self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0)
 207 |         self.declare_field("dword", "security_id")
 208 | 
 209 |     @staticmethod
 210 |     def structure_size(buf, offset, parent):
 211 |         return BinaryParser.read_word(buf, offset + 0x8)
 212 | 
 213 |     def __len__(self):
 214 |         return self.header().length()
 215 | 
 216 |     def is_valid(self):
 217 |         # TODO(wb): test
 218 |         return 1 < self.header().length() < 0x30 and \
 219 |             1 < self.header().key_lenght() < 0x20
 220 | 
 221 | 
 222 | class SDH_INDEX_ENTRY(Block, Nestable):
 223 |     """
 224 |     Index entry for the $SECURE:$SDH index.
 225 |     """
 226 |     def __init__(self, buf, offset, parent):
 227 |         super(SDH_INDEX_ENTRY, self).__init__(buf, offset)
 228 |         self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0)
 229 |         self.declare_field("dword", "hash")
 230 |         self.declare_field("dword", "security_id")
 231 | 
 232 |     @staticmethod
 233 |     def structure_size(buf, offset, parent):
 234 |         return BinaryParser.read_word(buf, offset + 0x8)
 235 | 
 236 |     def __len__(self):
 237 |         return self.header().length()
 238 | 
 239 |     def is_valid(self):
 240 |         # TODO(wb): test
 241 |         return 1 < self.header().length() < 0x30 and \
 242 |             1 < self.header().key_lenght() < 0x20
 243 | 
 244 | 
 245 | class INDEX_HEADER_FLAGS:
 246 |     SMALL_INDEX = 0x0  # MFT: INDX_ROOT only
 247 |     LARGE_INDEX = 0x1  # MFT: requires INDX_ALLOCATION
 248 |     LEAF_NODE = 0x1
 249 |     INDEX_NODE = 0x2
 250 |     NODE_MASK = 0x1
 251 | 
 252 | 
 253 | class INDEX_HEADER(Block, Nestable):
 254 |     def __init__(self, buf, offset, parent):
 255 |         super(INDEX_HEADER, self).__init__(buf, offset)
 256 |         self.declare_field("dword", "entries_offset", 0x0)
 257 |         self.declare_field("dword", "index_length")
 258 |         self.declare_field("dword", "allocated_size")
 259 |         self.declare_field("byte", "index_header_flags")  # see INDEX_HEADER_FLAGS
 260 |         # then 3 bytes padding/reserved
 261 | 
 262 |     @staticmethod
 263 |     def structure_size(buf, offset, parent):
 264 |         return 0x1C
 265 | 
 266 |     def __len__(self):
 267 |         return 0x1C
 268 | 
 269 |     def is_small_index(self):
 270 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.SMALL_INDEX
 271 | 
 272 |     def is_large_index(self):
 273 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.LARGE_INDEX
 274 | 
 275 |     def is_leaf_node(self):
 276 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.LEAF_NODE
 277 | 
 278 |     def is_index_node(self):
 279 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.INDEX_NODE
 280 | 
 281 |     def is_NODE_MASK(self):
 282 |         return self.index_header_flags() & INDEX_HEADER_FLAGS.NODE_MASK
 283 | 
 284 | 
 285 | class INDEX(Block, Nestable):
 286 |     def __init__(self, buf, offset, parent, index_entry_class):
 287 |         self._INDEX_ENTRY = index_entry_class
 288 |         super(INDEX, self).__init__(buf, offset)
 289 |         self.declare_field(INDEX_HEADER, "header", 0x0)
 290 |         self.add_explicit_field(self.header().entries_offset(),
 291 |                                 INDEX_ENTRY, "entries")
 292 |         slack_start = self.header().entries_offset() + self.header().index_length()
 293 |         # TODO: reenable
 294 |         #self.add_explicit_field(slack_start, INDEX_ENTRY, "slack_entries")
 295 | 
 296 |     @staticmethod
 297 |     def structure_size(buf, offset, parent):
 298 |         return BinaryParser.read_dword(buf, offset + 0x8)
 299 | 
 300 |     def __len__(self):
 301 |         return self.header().allocated_size()
 302 | 
 303 |     def entries(self):
 304 |         """
 305 |         A generator that returns each INDEX_ENTRY associated with this node.
 306 |         """
 307 |         offset = self.header().entries_offset()
 308 |         if offset == 0:
 309 |             return
 310 |         while offset <= self.header().index_length() - 0x52:
 311 |             e = self._INDEX_ENTRY(self._buf, self.offset() + offset, self)
 312 |             offset += len(e)
 313 |             yield e
 314 | 
 315 |     def slack_entries(self):
 316 |         """
 317 |         A generator that yields INDEX_ENTRYs found in the slack space
 318 |         associated with this header.
 319 |         """
 320 |         offset = self.header().index_length()
 321 |         try:
 322 |             while offset <= self.header().allocated_size() - 0x52:
 323 |                 try:
 324 |                     g_logger.debug("Trying to find slack entry at %s.", hex(offset))
 325 |                     e = self._INDEX_ENTRY(self._buf, offset, self)
 326 |                     if e.is_valid():
 327 |                         g_logger.debug("Slack entry is valid.")
 328 |                         offset += len(e) or 1
 329 |                         yield e
 330 |                     else:
 331 |                         g_logger.debug("Slack entry is invalid.")
 332 |                         # TODO(wb): raise a custom exception
 333 |                         raise BinaryParser.ParseException("Not a deleted entry")
 334 |                 except BinaryParser.ParseException:
 335 |                     g_logger.debug("Scanning one byte forward.")
 336 |                     offset += 1
 337 |         except struct.error:
 338 |             logging.debug("Slack entry parsing overran buffer.")
 339 |             pass
 340 | 
 341 | 
 342 | class INDEX_ROOT(Block, Nestable):
 343 |     def __init__(self, buf, offset, parent=None):
 344 |         super(INDEX_ROOT, self).__init__(buf, offset)
 345 |         self.declare_field("dword", "type", 0x0)
 346 |         self.declare_field("dword", "collation_rule")
 347 |         self.declare_field("dword", "index_record_size_bytes")
 348 |         self.declare_field("byte",  "index_record_size_clusters")
 349 |         self.declare_field("byte", "unused1")
 350 |         self.declare_field("byte", "unused2")
 351 |         self.declare_field("byte", "unused3")
 352 |         self._index_offset = self.current_field_offset()
 353 |         self.add_explicit_field(self._index_offset, INDEX, "index")
 354 | 
 355 |     def index(self):
 356 |         return INDEX(self._buf, self._offset + self._index_offset,
 357 |                      self, MFT_INDEX_ENTRY)
 358 | 
 359 |     @staticmethod
 360 |     def structure_size(buf, offset, parent):
 361 |         return 0x10 + INDEX.structure_size(buf, offset + 0x10, parent)
 362 | 
 363 |     def __len__(self):
 364 |         return 0x10 + len(self.index())
 365 | 
 366 | 
 367 | class NTATTR_STANDARD_INDEX_HEADER(Block):
 368 |     def __init__(self, buf, offset, parent):
 369 |         super(NTATTR_STANDARD_INDEX_HEADER, self).__init__(buf, offset)
 370 |         self.declare_field("dword", "entry_list_start", 0x0)
 371 |         self.declare_field("dword", "entry_list_end")
 372 |         self.declare_field("dword", "entry_list_allocation_end")
 373 |         self.declare_field("dword", "flags")
 374 |         self.declare_field("binary", "list_buffer", \
 375 |                            self.entry_list_start(),
 376 |                            self.entry_list_allocation_end() - self.entry_list_start())
 377 | 
 378 |     def entries(self):
 379 |         """
 380 |         A generator that returns each INDX entry associated with this node.
 381 |         """
 382 |         offset = self.entry_list_start()
 383 |         if offset == 0:
 384 |             return
 385 | 
 386 |         # 0x52 is an approximate size of a small index entry
 387 |         while offset <= self.entry_list_end() - 0x52:
 388 |             e = IndexEntry(self._buf, self.offset() + offset, self)
 389 |             offset += e.length()
 390 |             yield e
 391 | 
 392 |     def slack_entries(self):
 393 |         """
 394 |         A generator that yields INDX entries found in the slack space
 395 |         associated with this header.
 396 |         """
 397 |         offset = self.entry_list_end()
 398 |         try:
 399 |             # 0x52 is an approximate size of a small index entry
 400 |             while offset <= self.entry_list_allocation_end() - 0x52:
 401 |                 try:
 402 |                     e = SlackIndexEntry(self._buf, offset, self)
 403 |                     if e.is_valid():
 404 |                         offset += e.length() or 1
 405 |                         yield e
 406 |                     else:
 407 |                         # TODO(wb): raise a custom exception
 408 |                         raise BinaryParser.ParseException("Not a deleted entry")
 409 |                 except BinaryParser.ParseException:
 410 |                     # ensure we're always moving forward
 411 |                     offset += 1
 412 |         except struct.error:
 413 |             pass
 414 | 
 415 | 
 416 | class IndexRootHeader(Block):
 417 |     def __init__(self, buf, offset, parent):
 418 |         super(IndexRootHeader, self).__init__(buf, offset)
 419 |         self.declare_field("dword", "type", 0x0)
 420 |         self.declare_field("dword", "collation_rule")
 421 |         self.declare_field("dword", "index_record_size_bytes")
 422 |         self.declare_field("byte",  "index_record_size_clusters")
 423 |         self.declare_field("byte", "unused1")
 424 |         self.declare_field("byte", "unused2")
 425 |         self.declare_field("byte", "unused3")
 426 |         self._node_header_offset = self.current_field_offset()
 427 | 
 428 |     def node_header(self):
 429 |         return NTATTR_STANDARD_INDEX_HEADER(self._buf,
 430 |                                self.offset() + self._node_header_offset,
 431 |                                self)
 432 | 
 433 | 
 434 | class IndexRecordHeader(FixupBlock):
 435 |     def __init__(self, buf, offset, parent):
 436 |         super(IndexRecordHeader, self).__init__(buf, offset, parent)
 437 |         self.declare_field("dword", "magic", 0x0)
 438 |         self.declare_field("word",  "usa_offset")
 439 |         self.declare_field("word",  "usa_count")
 440 |         self.declare_field("qword", "lsn")
 441 |         self.declare_field("qword", "vcn")
 442 |         self._node_header_offset = self.current_field_offset()
 443 |         self.fixup(self.usa_count(), self.usa_offset())
 444 | 
 445 |     def node_header(self):
 446 |         return NTATTR_STANDARD_INDEX_HEADER(self._buf,
 447 |                                self.offset() + self._node_header_offset,
 448 |                                self)
 449 | 
 450 | 
 451 | class INDEX_BLOCK(FixupBlock):
 452 |     def __init__(self, buf, offset, parent=None):
 453 |         super(INDEX_BLOCK, self).__init__(buf, offset, parent)
 454 |         self.declare_field("dword", "magic", 0x0)
 455 |         self.declare_field("word",  "usa_offset")
 456 |         self.declare_field("word",  "usa_count")
 457 |         self.declare_field("qword", "lsn")
 458 |         self.declare_field("qword", "vcn")
 459 |         self._index_offset = self.current_field_offset()
 460 |         self.add_explicit_field(self._index_offset, INDEX, "index")
 461 |         self.fixup(self.usa_count(), self.usa_offset())
 462 | 
 463 |     def index(self):
 464 |         return INDEX(self._buf, self._offset + self._index_offset,
 465 |                      self, MFT_INDEX_ENTRY)
 466 | 
 467 |     @staticmethod
 468 |     def structure_size(buf, offset, parent):
 469 |         return 0x30 + INDEX.structure_size(buf, offset + 0x10, parent)
 470 | 
 471 |     def __len__(self):
 472 |         return 0x1000
 473 | 
 474 | 
 475 | class INDEX_ALLOCATION(FixupBlock):
 476 |     def __init__(self, buf, offset, parent=None):
 477 |         super(INDEX_ALLOCATION, self).__init__(buf, offset, parent)
 478 |         self.add_explicit_field(0, INDEX_BLOCK, "blocks")
 479 | 
 480 |     @staticmethod
 481 |     def guess_num_blocks(buf, offset):
 482 |         count = 0
 483 |         # TODO: don't hardcode things
 484 |         BLOCK_SIZE = 0x1000
 485 |         try:
 486 |             while BinaryParser.read_dword(buf, offset) == 0x58444e49:  # "INDX"
 487 |                 offset += BLOCK_SIZE
 488 |                 count += 1
 489 |         except (IndexError, BinaryParser.OverrunBufferException):
 490 |             return count
 491 |         return count
 492 | 
 493 |     def blocks(self):
 494 |         for i in xrange(INDEX_ALLOCATION.guess_num_blocks(self._buf, self.offset())):
 495 |             # TODO: don't hardcode things
 496 |             yield INDEX_BLOCK(self._buf, self._offset + 0x1000 * i)
 497 | 
 498 |     @staticmethod
 499 |     def structure_size(buf, offset, parent):
 500 |         # TODO: don't hardcode things
 501 |         return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(buf, offset)
 502 | 
 503 |     def __len__(self):
 504 |         # TODO: don't hardcode things
 505 |         return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(self._buf, self._offset)
 506 | 
 507 | 
 508 | class IndexEntry(Block):
 509 |     def __init__(self, buf, offset, parent):
 510 |         super(IndexEntry, self).__init__(buf, offset)
 511 |         self.declare_field("qword", "mft_reference", 0x0)
 512 |         self.declare_field("word", "length")
 513 |         self.declare_field("word", "filename_information_length")
 514 |         self.declare_field("dword", "flags")
 515 |         self.declare_field("binary", "filename_information_buffer", \
 516 |                            self.current_field_offset(),
 517 |                            self.filename_information_length())
 518 |         self.declare_field("qword", "child_vcn",
 519 |                            BinaryParser.align(self.current_field_offset(), 0x8))
 520 | 
 521 |     def filename_information(self):
 522 |         return FilenameAttribute(self._buf,
 523 |                                  self.offset() + self._off_filename_information_buffer,
 524 |                                  self)
 525 | 
 526 | 
 527 | class StandardInformationFieldDoesNotExist(Exception):
 528 |     def __init__(self, msg):
 529 |         self._msg = msg
 530 | 
 531 |     def __str__(self):
 532 |         return "Standard Information attribute field does not exist: %s" % (self._msg)
 533 | 
 534 | 
 535 | class StandardInformation(Block):
 536 |     # TODO(wb): implement sizing so we can make this nestable
 537 |     def __init__(self, buf, offset, parent):
 538 |         super(StandardInformation, self).__init__(buf, offset)
 539 |         self.declare_field("filetime", "created_time", 0x0)
 540 |         self.declare_field("filetime", "modified_time")
 541 |         self.declare_field("filetime", "changed_time")
 542 |         self.declare_field("filetime", "accessed_time")
 543 |         self.declare_field("dword", "attributes")
 544 |         self.declare_field("binary", "reserved", self.current_field_offset(), 0xC)
 545 |         # self.declare_field("dword", "owner_id", 0x30)  # Win2k+, NTFS 3.x
 546 |         # self.declare_field("dword", "security_id")  # Win2k+, NTFS 3.x
 547 |         # self.declare_field("qword", "quota_charged")  # Win2k+, NTFS 3.x
 548 |         # self.declare_field("qword", "usn")  # Win2k+, NTFS 3.x
 549 | 
 550 |     # Can't implement this unless we know the NTFS version in use
 551 |     #@staticmethod
 552 |     #def structure_size(buf, offset, parent):
 553 |     #    return 0x42 + (read_byte(buf, offset + 0x40) * 2)
 554 | 
 555 |     # Can't implement this unless we know the NTFS version in use
 556 |     #def __len__(self):
 557 |     #    return 0x42 + (self.filename_length() * 2)
 558 | 
 559 |     def owner_id(self):
 560 |         """
 561 |         This is an explicit method because it may not exist in OSes under Win2k
 562 | 
 563 |         @raises StandardInformationFieldDoesNotExist
 564 |         """
 565 |         try:
 566 |             return self.unpack_dword(0x30)
 567 |         except BinaryParser.OverrunBufferException:
 568 |             raise StandardInformationFieldDoesNotExist("Owner ID")
 569 | 
 570 |     def security_id(self):
 571 |         """
 572 |         This is an explicit method because it may not exist in OSes under Win2k
 573 | 
 574 |         @raises StandardInformationFieldDoesNotExist
 575 |         """
 576 |         try:
 577 |             return self.unpack_dword(0x34)
 578 |         except BinaryParser.OverrunBufferException:
 579 |             raise StandardInformationFieldDoesNotExist("Security ID")
 580 | 
 581 |     def quota_charged(self):
 582 |         """
 583 |         This is an explicit method because it may not exist in OSes under Win2k
 584 | 
 585 |         @raises StandardInformationFieldDoesNotExist
 586 |         """
 587 |         try:
 588 |             return self.unpack_dword(0x38)
 589 |         except BinaryParser.OverrunBufferException:
 590 |             raise StandardInformationFieldDoesNotExist("Quota Charged")
 591 | 
 592 |     def usn(self):
 593 |         """
 594 |         This is an explicit method because it may not exist in OSes under Win2k
 595 | 
 596 |         @raises StandardInformationFieldDoesNotExist
 597 |         """
 598 |         try:
 599 |             return self.unpack_dword(0x40)
 600 |         except BinaryParser.OverrunBufferException:
 601 |             raise StandardInformationFieldDoesNotExist("USN")
 602 | 
 603 | 
 604 | class FilenameAttribute(Block, Nestable):
 605 |     def __init__(self, buf, offset, parent):
 606 |         super(FilenameAttribute, self).__init__(buf, offset)
 607 |         self.declare_field("qword", "mft_parent_reference", 0x0)
 608 |         self.declare_field("filetime", "created_time")
 609 |         self.declare_field("filetime", "modified_time")
 610 |         self.declare_field("filetime", "changed_time")
 611 |         self.declare_field("filetime", "accessed_time")
 612 |         self.declare_field("qword", "physical_size")
 613 |         self.declare_field("qword", "logical_size")
 614 |         self.declare_field("dword", "flags")
 615 |         self.declare_field("dword", "reparse_value")
 616 |         self.declare_field("byte", "filename_length")
 617 |         self.declare_field("byte", "filename_type")
 618 |         self.declare_field("wstring", "filename", 0x42, self.filename_length())
 619 | 
 620 |     @staticmethod
 621 |     def structure_size(buf, offset, parent):
 622 |         return 0x42 + (BinaryParser.read_byte(buf, offset + 0x40) * 2)
 623 | 
 624 |     def __len__(self):
 625 |         return 0x42 + (self.filename_length() * 2)
 626 | 
 627 | 
 628 | class SlackIndexEntry(IndexEntry):
 629 |     def __init__(self, buf, offset, parent):
 630 |         """
 631 |         Constructor.
 632 |         Arguments:
 633 |         - `buf`: Byte string containing NTFS INDX file
 634 |         - `offset`: The offset into the buffer at which the block starts.
 635 |         - `parent`: The parent NTATTR_STANDARD_INDEX_HEADER block,
 636 |             which links to this block.
 637 |         """
 638 |         super(SlackIndexEntry, self).__init__(buf, offset, parent)
 639 | 
 640 |     def is_valid(self):
 641 |         # this is a bit of a mess, but it should work
 642 |         recent_date = datetime(1990, 1, 1, 0, 0, 0)
 643 |         future_date = datetime(2025, 1, 1, 0, 0, 0)
 644 |         try:
 645 |             fn = self.filename_information()
 646 |         except:
 647 |             return False
 648 |         if not fn:
 649 |             return False
 650 |         try:
 651 |             return fn.modified_time() > recent_date and \
 652 |                    fn.accessed_time() > recent_date and \
 653 |                    fn.changed_time() > recent_date and \
 654 |                    fn.created_time() > recent_date and \
 655 |                    fn.modified_time() < future_date and \
 656 |                    fn.accessed_time() < future_date and \
 657 |                    fn.changed_time() < future_date and \
 658 |                    fn.created_time() < future_date
 659 |         except ValueError:
 660 |             return False
 661 | 
 662 | 
 663 | class Runentry(Block, Nestable):
 664 |     def __init__(self, buf, offset, parent):
 665 |         super(Runentry, self).__init__(buf, offset)
 666 |         self.declare_field("byte", "header")
 667 |         self._offset_length = self.header() >> 4
 668 |         self._length_length = self.header() & 0x0F
 669 |         self.declare_field("binary",
 670 |                            "length_binary",
 671 |                            self.current_field_offset(), self._length_length)
 672 |         self.declare_field("binary",
 673 |                            "offset_binary",
 674 |                            self.current_field_offset(), self._offset_length)
 675 | 
 676 |     @staticmethod
 677 |     def structure_size(buf, offset, parent):
 678 |         b = BinaryParser.read_byte(buf, offset)
 679 |         return (b >> 4) + (b & 0x0F) + 1
 680 | 
 681 |     def __len__(self):
 682 |         return 0x1 + (self._length_length + self._offset_length)
 683 | 
 684 |     def is_valid(self):
 685 |         return self._offset_length > 0 and self._length_length > 0
 686 | 
 687 |     def lsb2num(self, binary):
 688 |         count = 0
 689 |         ret = 0
 690 |         for b in binary:
 691 |             ret += ord(b) << (8 * count)
 692 |             count += 1
 693 |         return ret
 694 | 
 695 |     def lsb2signednum(self, binary):
 696 |         count = 0
 697 |         ret = 0
 698 |         working = []
 699 | 
 700 |         is_negative = (ord(binary[-1]) & (1 << 7) != 0)
 701 |         if is_negative:
 702 |             working = [ord(b) ^ 0xFF for b in binary]
 703 |         else:
 704 |             working = [ord(b) for b in binary]
 705 |         for b in working:
 706 |             ret += b << (8 * count)
 707 |             count += 1
 708 |         if is_negative:
 709 |             ret += 1
 710 |             ret *= -1
 711 |         return ret
 712 | 
 713 |     def offset(self):
 714 |         # TODO(wb): make this run_offset
 715 |         return self.lsb2signednum(self.offset_binary())
 716 | 
 717 |     def length(self):
 718 |         # TODO(wb): make this run_offset
 719 |         return self.lsb2num(self.length_binary())
 720 | 
 721 | 
 722 | class Runlist(Block):
 723 |     def __init__(self, buf, offset, parent):
 724 |         super(Runlist, self).__init__(buf, offset)
 725 | 
 726 |     @staticmethod
 727 |     def structure_size(buf, offset, parent):
 728 |         length = 0
 729 |         while True:
 730 |             b = BinaryParser.read_byte(buf, offset + length)
 731 |             length += 1
 732 |             if b == 0:
 733 |                 return length
 734 | 
 735 |             length += (b >> 4) + (b & 0x0F)
 736 | 
 737 |     def __len__(self):
 738 |         return sum(map(len, self._entries()))
 739 | 
 740 |     def _entries(self, length=None):
 741 |         ret = []
 742 |         offset = self.offset()
 743 |         entry = Runentry(self._buf, offset, self)
 744 |         while entry.header() != 0 and \
 745 |               (not length or offset < self.offset() + length) and \
 746 |               entry.is_valid():
 747 |             ret.append(entry)
 748 |             offset += len(entry)
 749 |             entry = Runentry(self._buf, offset, self)
 750 |         return ret
 751 | 
 752 |     def runs(self, length=None):
 753 |         """
 754 |         Yields tuples (volume offset, length).
 755 |         Recall that the entries are relative to one another
 756 |         """
 757 |         last_offset = 0
 758 |         for e in self._entries(length=length):
 759 |             current_offset = last_offset + e.offset()
 760 |             current_length = e.length()
 761 |             last_offset = current_offset
 762 |             yield (current_offset, current_length)
 763 | 
 764 | 
 765 | class ATTR_TYPE:
 766 |     STANDARD_INFORMATION = 0x10
 767 |     FILENAME_INFORMATION = 0x30
 768 |     DATA = 0x80
 769 |     INDEX_ROOT = 0x90
 770 |     INDEX_ALLOCATION = 0xA0
 771 | 
 772 | 
 773 | class Attribute(Block, Nestable):
 774 |     TYPES = {
 775 |         16: "$STANDARD INFORMATION",
 776 |         32: "$ATTRIBUTE LIST",
 777 |         48: "$FILENAME INFORMATION",
 778 |         64: "$OBJECT ID/$VOLUME VERSION",
 779 |         80: "$SECURITY DESCRIPTOR",
 780 |         96: "$VOLUME NAME",
 781 |         112: "$VOLUME INFORMATION",
 782 |         128: "$DATA",
 783 |         144: "$INDEX ROOT",
 784 |         160: "$INDEX ALLOCATION",
 785 |         176: "$BITMAP",
 786 |         192: "$SYMBOLIC LINK",
 787 |         208: "$REPARSE POINT/$EA INFORMATION",
 788 |         224: "$EA",
 789 |         256: "$LOGGED UTILITY STREAM",
 790 |     }
 791 | 
 792 |     FLAGS = {
 793 |         0x01: "readonly",
 794 |         0x02: "hidden",
 795 |         0x04: "system",
 796 |         0x08: "unused-dos",
 797 |         0x10: "directory-dos",
 798 |         0x20: "archive",
 799 |         0x40: "device",
 800 |         0x80: "normal",
 801 |         0x100: "temporary",
 802 |         0x200: "sparse",
 803 |         0x400: "reparse-point",
 804 |         0x800: "compressed",
 805 |         0x1000: "offline",
 806 |         0x2000: "not-indexed",
 807 |         0x4000: "encrypted",
 808 |         0x10000000: "has-indx",
 809 |         0x20000000: "has-view-index",
 810 |         }
 811 | 
 812 |     def __init__(self, buf, offset, parent):
 813 |         super(Attribute, self).__init__(buf, offset)
 814 |         self.declare_field("dword", "type")
 815 |         self.declare_field("dword", "size")  # this value must rounded up to 0x8 byte alignment
 816 |         self.declare_field("byte", "non_resident")
 817 |         self.declare_field("byte", "name_length")
 818 |         self.declare_field("word", "name_offset")
 819 |         self.declare_field("word", "flags")
 820 |         self.declare_field("word", "instance")
 821 |         if self.non_resident() > 0:
 822 |             self.declare_field("qword", "lowest_vcn", 0x10)
 823 |             self.declare_field("qword", "highest_vcn")
 824 |             self.declare_field("word", "runlist_offset")
 825 |             self.declare_field("byte", "compression_unit")
 826 |             self.declare_field("byte", "reserved1")
 827 |             self.declare_field("byte", "reserved2")
 828 |             self.declare_field("byte", "reserved3")
 829 |             self.declare_field("byte", "reserved4")
 830 |             self.declare_field("byte", "reserved5")
 831 |             self.declare_field("qword", "allocated_size")
 832 |             self.declare_field("qword", "data_size")
 833 |             self.declare_field("qword", "initialized_size")
 834 |             self.declare_field("qword", "compressed_size")
 835 |         else:
 836 |             self.declare_field("dword", "value_length", 0x10)
 837 |             self.declare_field("word", "value_offset")
 838 |             self.declare_field("byte", "value_flags")
 839 |             self.declare_field("byte", "reserved")
 840 |             self.declare_field("binary", "value",
 841 |                                self.value_offset(), self.value_length())
 842 | 
 843 |     @staticmethod
 844 |     def structure_size(buf, offset, parent):
 845 |         s = BinaryParser.read_dword(buf, offset + 0x4)
 846 |         return s + (8 - (s % 8))
 847 | 
 848 |     def __len__(self):
 849 |         return self.size()
 850 | 
 851 |     def __str__(self):
 852 |         return "%s" % (Attribute.TYPES[self.type()])
 853 | 
 854 |     def runlist(self):
 855 |         return Runlist(self._buf, self.offset() + self.runlist_offset(), self)
 856 | 
 857 |     def size(self):
 858 |         s = self.unpack_dword(self._off_size)
 859 |         return s + (8 - (s % 8))
 860 | 
 861 |     def name(self):
 862 |         return self.unpack_wstring(self.name_offset(), self.name_length())
 863 | 
 864 | 
 865 | class MFT_RECORD_FLAGS:
 866 |     MFT_RECORD_IN_USE = 0x1
 867 |     MFT_RECORD_IS_DIRECTORY = 0x2
 868 | 
 869 | 
 870 | def MREF(mft_reference):
 871 |     """
 872 |     Given a MREF/mft_reference, return the record number part.
 873 |     """
 874 |     return mft_reference & 0xFFFFFFFFFFFF
 875 | 
 876 | 
 877 | def MSEQNO(mft_reference):
 878 |     """
 879 |     Given a MREF/mft_reference, return the sequence number part.
 880 |     """
 881 |     return (mft_reference >> 48) & 0xFFFF
 882 | 
 883 | 
 884 | class AttributeNotFoundError(Exception):
 885 |     pass
 886 | 
 887 | 
 888 | class MFTRecord(FixupBlock):
 889 |     def __init__(self, buf, offset, parent, inode=None):
 890 |         super(MFTRecord, self).__init__(buf, offset, parent)
 891 | 
 892 |         # 0x0 File or BAAD
 893 |         self.declare_field("dword", "magic")
 894 |         # 0x04 Offset to fixup array
 895 |         self.declare_field("word",  "usa_offset")
 896 |         # 0x06 Number of entries in fixup array
 897 |         self.declare_field("word",  "usa_count")
 898 |         # 0x08 $LogFile sequence number
 899 |         self.declare_field("qword", "lsn")
 900 |         # 0x10 Sequence value
 901 |         self.declare_field("word",  "sequence_number")
 902 |         # 0x12 Link Count
 903 |         self.declare_field("word",  "link_count")
 904 |         # 0x14 Offset of first attribute
 905 |         self.declare_field("word",  "attrs_offset")
 906 |         # 0x16 Flags:
 907 |         #   0x00 - not in use
 908 |         #   0x01 - in use
 909 |         #   0x02 - directory
 910 |         #   0x03 - directory in use
 911 |         self.declare_field("word",  "flags")
 912 | 
 913 |         # 0x18 Used size of MFT entry
 914 |         self.declare_field("dword", "bytes_in_use")
 915 |         # 0x1c Allocated size of MFT entry
 916 |         self.declare_field("dword", "bytes_allocated")
 917 |         # 0x20 File reference to base record
 918 |         self.declare_field("qword", "base_mft_record")
 919 |         # 0x28 Nex attribute identifier
 920 |         self.declare_field("word",  "next_attr_instance")
 921 | 
 922 |         # Attributes and fixup values
 923 |         # 0x2a
 924 |         self.declare_field("word",  "reserved")
 925 |         # 0x2c
 926 |         self.declare_field("dword", "mft_record_number")
 927 | 
 928 |         self.inode = inode or self.mft_record_number()
 929 |         self.fixup(self.usa_count(), self.usa_offset())
 930 | 
 931 |     def attributes(self):
 932 |         offset = self.attrs_offset()
 933 |         right_border = self.offset() + self.bytes_in_use()
 934 | 
 935 |         while (self.unpack_dword(offset) != 0 and
 936 |                self.unpack_dword(offset) != 0xFFFFFFFF and
 937 |                offset + self.unpack_dword(offset + 4) <= right_border):
 938 |             a = Attribute(self._buf, offset, self)
 939 |             offset += len(a)
 940 |             yield a
 941 | 
 942 |     def attribute(self, attr_type):
 943 |         for a in self.attributes():
 944 |             if a.type() == attr_type:
 945 |                 return a
 946 |         raise AttributeNotFoundError()
 947 | 
 948 |     def is_directory(self):
 949 |         return self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IS_DIRECTORY
 950 | 
 951 |     def is_active(self):
 952 |         return self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IN_USE
 953 | 
 954 |     # this a required resident attribute
 955 |     def filename_informations(self):
 956 |         """
 957 |         MFT Records may have more than one FN info attribute,
 958 |         each with a different type of filename (8.3, POSIX, etc.)
 959 | 
 960 |         This function returns all of the these attributes.
 961 |         """
 962 |         ret = []
 963 |         for a in self.attributes():
 964 |             if a.type() == ATTR_TYPE.FILENAME_INFORMATION:
 965 |                 try:
 966 |                     value = a.value()
 967 |                     check = FilenameAttribute(value, 0, self)
 968 |                     ret.append(check)
 969 |                 except Exception:
 970 |                     pass
 971 |         return ret
 972 | 
 973 |     # this a required resident attribute
 974 |     def filename_information(self):
 975 |         """
 976 |         MFT Records may have more than one FN info attribute,
 977 |         each with a different type of filename (8.3, POSIX, etc.)
 978 | 
 979 |         This function returns the attribute with the most complete name,
 980 |           that is, it tends towards Win32, then POSIX, and then 8.3.
 981 |         """
 982 |         fn = None
 983 |         for check in self.filename_informations():
 984 |             try:
 985 |                 if check.filename_type() == 0x0001 or \
 986 |                    check.filename_type() == 0x0003:
 987 |                     return check
 988 |                 fn = check
 989 |             except Exception:
 990 |                 pass
 991 |         return fn
 992 | 
 993 |     # this a required resident attribute
 994 |     def standard_information(self):
 995 |         try:
 996 |             attr = self.attribute(ATTR_TYPE.STANDARD_INFORMATION)
 997 |             return StandardInformation(attr.value(), 0, self)
 998 |         except AttributeError:
 999 |             return None
1000 | 
1001 |     def data_attribute(self):
1002 |         """
1003 |         Returns None if the default $DATA attribute does not exist
1004 |         """
1005 |         for attr in self.attributes():
1006 |             if attr.type() == ATTR_TYPE.DATA and attr.name() == "":
1007 |                 return attr
1008 | 
1009 |     def slack_data(self):
1010 |         """
1011 |         Returns A binary string containing the MFT record slack.
1012 |         """
1013 |         return self._buf[self.offset()+self.bytes_in_use():self.offset() + 1024].tostring()
1014 | 
1015 |     def active_data(self):
1016 |         """
1017 |         Returns A binary string containing the MFT record slack.
1018 |         """
1019 |         return self._buf[self.offset():self.offset() + self.bytes_in_use()].tostring()
1020 | 
1021 | 
1022 | class InvalidAttributeException(INDXException):
1023 |     def __init__(self, value):
1024 |         super(InvalidAttributeException, self).__init__(value)
1025 | 
1026 |     def __str__(self):
1027 |         return "Invalid attribute Exception(%s)" % (self._value)
1028 | 
1029 | 
1030 | class InvalidMFTRecordNumber(Exception):
1031 |     def __init__(self, value):
1032 |         self.value = value
1033 | 
1034 | 
1035 | class MFTOperationNotImplementedError(Exception):
1036 |     def __init__(self, msg):
1037 |         super(MFTOperationNotImplementedError, self).__init__(msg)
1038 |         self._msg = msg
1039 | 
1040 |     def __str__(self):
1041 |         return "MFTOperationNotImplemented(%s)" % (self._msg)
1042 | 
1043 | 
1044 | class InvalidRecordException(Exception):
1045 |     def __init__(self, msg):
1046 |         super(InvalidRecordException, self).__init__(msg)
1047 |         self._msg = msg
1048 | 
1049 |     def __str__(self):
1050 |         return "InvalidRecordException(%s)" % (self._msg)
1051 | 
1052 | 
1053 | class Cache(object):
1054 |     def __init__(self, size_limit):
1055 |         super(Cache, self).__init__()
1056 |         self._c = OrderedDict()
1057 |         self._size_limit = size_limit
1058 | 
1059 |     def insert(self, k, v):
1060 |         """
1061 |         add a key and value to the front
1062 |         """
1063 |         self._c[k] = v
1064 |         if len(self._c) > self._size_limit:
1065 |             self._c.popitem(last=False)
1066 | 
1067 |     def exists(self, k):
1068 |         return k in self._c
1069 | 
1070 |     def touch(self, k):
1071 |         """
1072 |         bring a key to the front
1073 |         """
1074 |         v = self._c[k]
1075 |         del self._c[k]
1076 |         self._c[k] = v
1077 | 
1078 |     def get(self, k):
1079 |         return self._c[k]
1080 | 
1081 | 
1082 | MFT_RECORD_SIZE = 1024
1083 | FILE_SEP = "\\"
1084 | UNKNOWN_ENTRY = "??"
1085 | ORPHAN_ENTRY = "$ORPHAN"
1086 | CYCLE_ENTRY = "<CYCLE>"
1087 | 
1088 | 
1089 | class MFTEnumerator(object):
1090 |     def __init__(self, buf, record_cache=None, path_cache=None):
1091 |         DEFAULT_CACHE_SIZE = 102400
1092 |         if record_cache is None:
1093 |             record_cache = Cache(size_limit=DEFAULT_CACHE_SIZE)
1094 |         if path_cache is None:
1095 |             path_cache = Cache(size_limit=DEFAULT_CACHE_SIZE)
1096 | 
1097 |         self._buf = buf
1098 |         self._record_cache = record_cache
1099 |         self._path_cache = path_cache
1100 | 
1101 |     def len(self):
1102 |         return len(self._buf) / MFT_RECORD_SIZE
1103 | 
1104 |     def get_record_buf(self, record_num):
1105 |         """
1106 |         @raises OverrunBufferException: if the record_num is beyond the end of the MFT
1107 |         """
1108 |         start = record_num * MFT_RECORD_SIZE
1109 |         end = start + MFT_RECORD_SIZE
1110 |         g_logger.debug("get_record_buf: start: %s len: %s bufsize: %s", hex(start), hex(end - start), hex(len(self._buf)))
1111 |         if end > len(self._buf):
1112 |             raise BinaryParser.OverrunBufferException(end, len(self._buf))
1113 | 
1114 |         buf = self._buf[start:end]
1115 |         return buf
1116 | 
1117 |     def get_record(self, record_num):
1118 |         """
1119 |         @raises OverrunBufferException: if the record_num is beyond the end of the MFT.
1120 |         @raises InvalidRecordException: if the record appears invalid (incorrect magic header).
1121 |         """
1122 |         if self._record_cache.exists(record_num):
1123 |             self._record_cache.touch(record_num)
1124 |             return self._record_cache.get(record_num)
1125 | 
1126 |         record_buf = self.get_record_buf(record_num)
1127 |         if BinaryParser.read_dword(record_buf, 0x0) != 0x454C4946:
1128 |             raise InvalidRecordException("record_num: %d" % record_num)
1129 | 
1130 |         record = MFTRecord(record_buf, 0, False, inode=record_num)
1131 |         self._record_cache.insert(record_num, record)
1132 |         return record
1133 | 
1134 |     def enumerate_records(self):
1135 |         index = 0
1136 |         while True:
1137 |             if index == 12:  # reserved records are 12-15
1138 |                 index = 16
1139 |             try:
1140 |                 record = self.get_record(index)
1141 |                 yield record
1142 |                 index += 1
1143 |             except InvalidRecordException:
1144 |                 index += 1
1145 |                 continue
1146 |             except BinaryParser.OverrunBufferException:
1147 |                 return
1148 | 
1149 |     def enumerate_paths(self):
1150 |         for record in self.enumerate_records():
1151 |             path = self.get_path(record)
1152 |             yield record, path
1153 | 
1154 |     def get_path(self, record):
1155 |         """
1156 |         @type record: MFTRecord
1157 |         @rtype: str
1158 |         @return: A string containing the path of the given record.
1159 |           It will begin with the first path component, that is,
1160 |           something like "Documents and Settings\Adminstrator\bad.exe".
1161 |           In the event that a path component cannot be determined, it is
1162 |           replaced by "??". If the parent of an entry cannot be verified,
1163 |           then it is added to the $ORPHAN directory. If a cycle is detected
1164 |           during the path resolution, then the offending entry is
1165 |           replaced with <CYCLE>. This occastionally happens at the root
1166 |           directory.
1167 |         """
1168 |         r = self._get_path_impl(record, set())
1169 |         if r == "":
1170 |             return FILE_SEP
1171 |         else:
1172 |             return r
1173 | 
1174 | 
1175 |     def _get_path_impl(self, record, cycledetector):
1176 |         """
1177 |         @type cycledetector: set of int
1178 |         @param cycledetector: A set of numbers that describe which records have been processed
1179 |           in the building of the path.
1180 |         """
1181 |         key = "%d-%d-%d-%d-%d" % (record.magic(), record.lsn(),
1182 |                                   record.link_count(), record.mft_record_number(),
1183 |                                   record.flags())
1184 |         if self._path_cache.exists(key):
1185 |             self._path_cache.touch(key)
1186 |             return self._path_cache.get(key)
1187 | 
1188 |         record_num = record.mft_record_number()
1189 |         if record_num == 5:
1190 |             return ""
1191 | 
1192 |         if record_num in cycledetector:
1193 |             return CYCLE_ENTRY
1194 |         cycledetector.add(record_num)
1195 | 
1196 |         fn = record.filename_information()
1197 | 
1198 |         if not fn:
1199 |             return UNKNOWN_ENTRY
1200 |         else:
1201 |             record_filename = fn.filename()
1202 | 
1203 |         parent_record_num = MREF(fn.mft_parent_reference())
1204 |         parent_seq_num = MSEQNO(fn.mft_parent_reference())
1205 | 
1206 |         try:
1207 |             parent_record = self.get_record(parent_record_num)
1208 |         except (BinaryParser.OverrunBufferException, InvalidRecordException):
1209 |             return ORPHAN_ENTRY + FILE_SEP + record_filename
1210 | 
1211 |         if parent_record.sequence_number() != parent_seq_num:
1212 |             return ORPHAN_ENTRY + FILE_SEP + record_filename
1213 | 
1214 |         path = self._get_path_impl(parent_record, cycledetector) + FILE_SEP + record_filename
1215 |         self._path_cache.insert(key, path)
1216 |         return path
1217 | 
1218 |     def get_record_by_path(self, path):
1219 |         lower_path = path.lower()
1220 |         for record, record_path in self.enumerate_paths():
1221 |             if lower_path == record_path.lower():
1222 |                 return record
1223 |         raise KeyError("Path not found: %s" % path)
1224 | 
1225 | 
1226 | class MFTTreeNode(object):
1227 |     def __init__(self, nodes, record_number, filename, parent_record_number):
1228 |         super(MFTTreeNode, self).__init__()
1229 |         self._nodes = nodes
1230 |         self._record_number = record_number
1231 |         self._filename = filename
1232 |         self._parent_record_number = parent_record_number
1233 |         self._children_record_numbers = []
1234 | 
1235 |     def get_record_number(self):
1236 |         return self._record_number
1237 | 
1238 |     def get_filename(self):
1239 |         return self._filename
1240 | 
1241 |     def get_parent(self):
1242 |         return self._nodes[self._parent_record_number]
1243 | 
1244 |     def add_child_record_number(self, child_record_number):
1245 |         self._children_record_numbers.append(child_record_number)
1246 | 
1247 |     def get_children_nodes(self):
1248 |         return map(lambda n: self._nodes[n], self._children_record_numbers)
1249 | 
1250 |     def get_child_node(self, filename):
1251 |         for child in self.get_children_nodes():
1252 |             if child.get_filename() == filename:
1253 |                 return child
1254 |         raise KeyError("Failed to find filename: " + filename)
1255 | 
1256 | 
1257 | ROOT_INDEX = 5
1258 | class MFTTree(object):
1259 |     ORPHAN_INDEX = 12
1260 | 
1261 |     def __init__(self, buf):
1262 |         super(MFTTree, self).__init__()
1263 |         self._buf = buf
1264 |         self._nodes = {}  # array of MFTTreeNodes
1265 | 
1266 |     def _add_record(self, mft_enumerator, record):
1267 |         record_num = record.mft_record_number()
1268 | 
1269 |         if record_num in self._nodes:
1270 |             return
1271 | 
1272 |         if record_num == ROOT_INDEX:
1273 |             self._nodes[ROOT_INDEX] = MFTTreeNode(self._nodes, ROOT_INDEX, "\.", ROOT_INDEX)
1274 |             return
1275 | 
1276 |         fn = record.filename_information()
1277 |         if not fn:
1278 |             # then there's no filename, or parent reference
1279 |             # there could be some standard information (timestamps),
1280 |             # or named streams
1281 |             # but still no parent link.
1282 |             # ...so lets bail
1283 |             return
1284 | 
1285 |         parent_record_num = MREF(fn.mft_parent_reference())
1286 |         parent_seq_num = MSEQNO(fn.mft_parent_reference())
1287 | 
1288 |         try:
1289 |             parent_record = mft_enumerator.get_record(parent_record_num)
1290 |         except (BinaryParser.OverrunBufferException, InvalidRecordException):
1291 |             parent_record_num = MFTTree.ORPHAN_INDEX
1292 |             parent_record = None
1293 | 
1294 |         if not parent_record:
1295 |             parent_record_num = MFTTree.ORPHAN_INDEX
1296 |         elif parent_record.sequence_number() != parent_seq_num:
1297 |             parent_record_num = MFTTree.ORPHAN_INDEX
1298 | 
1299 |         if parent_record_num != MFTTree.ORPHAN_INDEX and parent_record:
1300 |             self._add_record(mft_enumerator, parent_record)
1301 | 
1302 |         try:
1303 |             parent_node = self._nodes[parent_record_num]
1304 |         except IndexError:
1305 |             parent_record_num = MFTTree.ORPHAN_INDEX
1306 | 
1307 |         record_node = MFTTreeNode(self._nodes, record_num, fn.filename(), parent_record_num)
1308 |         self._nodes[record_num] = record_node
1309 |         if parent_node:
1310 |             parent_node.add_child_record_number(record_num)
1311 | 
1312 |     def build(self, record_cache=None,
1313 |               path_cache=None, progress_class=Progress.NullProgress):
1314 |         DEFAULT_CACHE_SIZE = 1024
1315 |         if record_cache is None:
1316 |             record_cache = Cache(size_limit=DEFAULT_CACHE_SIZE)
1317 |         if path_cache is None:
1318 |             path_cache = Cache(size_limit=DEFAULT_CACHE_SIZE)
1319 | 
1320 |         enum = MFTEnumerator(self._buf, record_cache=record_cache, path_cache=path_cache)
1321 | 
1322 |         self._nodes[MFTTree.ORPHAN_INDEX] = MFTTreeNode(self._nodes, MFTTree.ORPHAN_INDEX,
1323 |                                                         ORPHAN_ENTRY, ROOT_INDEX)
1324 | 
1325 |         count = 0
1326 |         progress = progress_class(len(self._buf) / 1024)
1327 |         for record in enum.enumerate_records():
1328 |             self._add_record(enum, record)
1329 |             count += 1
1330 |             progress.set_current(count)
1331 |         progress.set_complete()
1332 | 
1333 |     def get_root(self):
1334 |         return self._nodes[ROOT_INDEX]
1335 | 


--------------------------------------------------------------------------------
/ntfs/mft/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | __all__ = [
3 |     "MFT",
4 | ]
5 | 


--------------------------------------------------------------------------------
/ntfs/secure/SDS.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python
  2 | 
  3 | #    This file is part of INDXParse.
  4 | #
  5 | #   Copyright 2011-13 Will Ballenthin <william.ballenthin@mandiant.com>
  6 | #                    while at Mandiant <http://www.mandiant.com>
  7 | #
  8 | #   Licensed under the Apache License, Version 2.0 (the "License");
  9 | #   you may not use this file except in compliance with the License.
 10 | #   You may obtain a copy of the License at
 11 | #
 12 | #       http://www.apache.org/licenses/LICENSE-2.0
 13 | #
 14 | #   Unless required by applicable law or agreed to in writing, software
 15 | #   distributed under the License is distributed on an "AS IS" BASIS,
 16 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | #   See the License for the specific language governing permissions and
 18 | #   limitations under the License.
 19 | #
 20 | #   Version v.1.2
 21 | 
 22 | from .. import BinaryParser
 23 | from ..BinaryParser import Block
 24 | from ..BinaryParser import Nestable
 25 | 
 26 | 
 27 | class NULL_OBJECT(object):
 28 |     def __init__(self):
 29 |         super(NULL_OBJECT, self).__init__()
 30 | 
 31 |     @staticmethod
 32 |     def structure_size(buf, offset, parent):
 33 |         return 0
 34 | 
 35 |     def __len__(self):
 36 |         return 0
 37 | 
 38 | null_object = NULL_OBJECT()
 39 | 
 40 | 
 41 | class SECURITY_DESCRIPTOR_CONTROL:
 42 |     SE_OWNER_DEFAULTED = 1 << 0
 43 |     SE_GROUP_DEFAULTED = 1 << 1
 44 |     SE_DACL_PRESENT = 1 << 2
 45 |     SE_DACL_DEFAULTED = 1 << 3
 46 |     SE_SACL_PRESENT = 1 << 4
 47 |     SE_SACL_DEFAULTED = 1 << 5
 48 |     SE_SACL_UNUSED0 = 1 << 6
 49 |     SE_SACL_UNUSED1 = 1 << 7
 50 |     SE_DACL_AUTO_INHERIT_REQ = 1 << 8
 51 |     SE_SACL_AUTO_INHERIT_REQ = 1 << 9
 52 |     SE_DACL_AUTO_INHERITED = 1 << 10
 53 |     SE_SACL_AUTO_INHERITED = 1 << 11
 54 |     SE_DACL_PROTECTED = 1 << 12
 55 |     SE_SACL_PROTECTED = 1 << 13
 56 |     SE_RM_CONTROL_VALID = 1 << 14
 57 |     SE_SELF_RELATIVE = 1 << 15
 58 | 
 59 | 
 60 | class SID_IDENTIFIER_AUTHORITY(Block, Nestable):
 61 |     def __init__(self, buf, offset, parent):
 62 |         super(SID_IDENTIFIER_AUTHORITY, self).__init__(buf, offset)
 63 |         self.declare_field("word_be", "high_part", 0x0)
 64 |         self.declare_field("dword_be", "low_part")
 65 | 
 66 |     @staticmethod
 67 |     def structure_size(buf, offset, parent):
 68 |         return 6
 69 | 
 70 |     def __len__(self):
 71 |         return SID_IDENTIFIER_AUTHORITY.structure_size(self._buf, self.absolute_offset(0x0), None)
 72 | 
 73 |     def __str__(self):
 74 |         return "%s" % (self.high_part() << 32 + self.low_part())
 75 | 
 76 | 
 77 | class SID(Block, Nestable):
 78 |     def __init__(self, buf, offset, parent):
 79 |         super(SID, self).__init__(buf, offset)
 80 |         self.declare_field("byte", "revision", 0x0)
 81 |         self.declare_field("byte", "sub_authority_count")
 82 |         self.declare_field(SID_IDENTIFIER_AUTHORITY, "identifier_authority")
 83 |         self.declare_field("dword", "sub_authorities", count=self.sub_authority_count())
 84 | 
 85 |     @staticmethod
 86 |     def structure_size(buf, offset, parent):
 87 |         sub_auth_count = BinaryParser.read_byte(buf, offset + 1)
 88 |         auth_size = SID_IDENTIFIER_AUTHORITY.structure_size(buf, offset + 2, parent)
 89 |         return 2 + auth_size + (sub_auth_count * 4)
 90 | 
 91 |     def __len__(self):
 92 |         return self._off_sub_authorities + (self.sub_authority_count() * 4)
 93 | 
 94 |     def string(self):
 95 |         ret = "S-%d-%s" % (self.revision(), self.identifier_authority())
 96 |         for sub_auth in self.sub_authorities():
 97 |             ret += "-%s" % (str(sub_auth))
 98 |         return ret
 99 | 
100 | 
101 | class ACE_TYPES:
102 |     """
103 |     One byte.
104 |     """
105 |     ACCESS_MIN_MS_ACE_TYPE = 0
106 |     ACCESS_ALLOWED_ACE_TYPE = 0
107 |     ACCESS_DENIED_ACE_TYPE = 1
108 |     SYSTEM_AUDIT_ACE_TYPE = 2
109 |     SYSTEM_ALARM_ACE_TYPE = 3  # Not implemented as of Win2k.
110 |     ACCESS_MAX_MS_V2_ACE_TYPE = 3
111 | 
112 |     ACCESS_ALLOWED_COMPOUND_ACE_TYPE = 4
113 |     ACCESS_MAX_MS_V3_ACE_TYPE = 4
114 | 
115 |     # The following are Win2k only.
116 |     ACCESS_MIN_MS_OBJECT_ACE_TYPE = 5
117 |     ACCESS_ALLOWED_OBJECT_ACE_TYPE = 5
118 |     ACCESS_DENIED_OBJECT_ACE_TYPE = 6
119 |     SYSTEM_AUDIT_OBJECT_ACE_TYPE = 7
120 |     SYSTEM_ALARM_OBJECT_ACE_TYPE = 8
121 |     ACCESS_MAX_MS_OBJECT_ACE_TYPE = 8
122 |     ACCESS_MAX_MS_V4_ACE_TYPE = 8
123 | 
124 |     # This one is for WinNT/2k.
125 |     ACCESS_MAX_MS_ACE_TYPE = 8
126 | 
127 | 
128 | class ACE_FLAGS:
129 |     """
130 |     One byte.
131 |     """
132 |     OBJECT_INHERIT_ACE = 0x01
133 |     CONTAINER_INHERIT_ACE = 0x02
134 |     NO_PROPAGATE_INHERIT_ACE = 0x04
135 |     INHERIT_ONLY_ACE = 0x08
136 |     INHERITED_ACE = 0x10  # Win2k only.
137 |     VALID_INHERIT_FLAGS = 0x1f
138 | 
139 |     # The audit flags.
140 |     SUCCESSFUL_ACCESS_ACE_FLAG = 0x40
141 |     FAILED_ACCESS_ACE_FLAG = 0x80
142 | 
143 | 
144 | class ACCESS_MASK:
145 |     """
146 |     DWORD.
147 |     """
148 |     FILE_READ_DATA = 0x00000001
149 |     FILE_LIST_DIRECTORY = 0x00000001
150 |     FILE_WRITE_DATA = 0x00000002
151 |     FILE_ADD_FILE = 0x00000002
152 |     FILE_APPEND_DATA = 0x00000004
153 |     FILE_ADD_SUBDIRECTORY = 0x00000004
154 |     FILE_READ_EA = 0x00000008
155 |     FILE_WRITE_EA = 0x00000010
156 |     FILE_EXECUTE = 0x00000020
157 |     FILE_TRAVERSE = 0x00000020
158 |     FILE_DELETE_CHILD = 0x00000040
159 |     FILE_READ_ATTRIBUTES = 0x00000080
160 |     FILE_WRITE_ATTRIBUTES = 0x00000100
161 |     DELETE = 0x00010000
162 |     READ_CONTROL = 0x00020000
163 |     WRITE_DAC = 0x00040000
164 |     WRITE_OWNER = 0x00080000
165 |     SYNCHRONIZE = 0x00100000
166 |     STANDARD_RIGHTS_READ = 0x00020000
167 |     STANDARD_RIGHTS_WRITE = 0x00020000
168 |     STANDARD_RIGHTS_EXECUTE = 0x00020000
169 |     STANDARD_RIGHTS_REQUIRED = 0x000f0000
170 |     STANDARD_RIGHTS_ALL = 0x001f0000
171 |     ACCESS_SYSTEM_SECURITY = 0x01000000
172 |     MAXIMUM_ALLOWED = 0x02000000
173 |     GENERIC_ALL = 0x10000000
174 |     GENERIC_EXECUTE = 0x20000000
175 |     GENERIC_WRITE = 0x40000000
176 |     GENERIC_READ = 0x80000000
177 | 
178 | 
179 | class ACE(Block):
180 |     def __init__(self, buf, offset, parent):
181 |         super(ACE, self).__init__(buf, offset)
182 |         self.declare_field("byte", "ace_type", 0x0)
183 |         self.declare_field("byte", "ace_flags")
184 | 
185 |     @staticmethod
186 |     def get_ace(buf, offset, parent):
187 |         header = ACE(buf, offset, parent)
188 |         if header.ace_type() == ACE_TYPES.ACCESS_ALLOWED_ACE_TYPE:
189 |             return ACCESS_ALLOWED_ACE(buf, offset, parent)
190 |         elif header.ace_type() == ACE_TYPES.ACCESS_DENIED_ACE_TYPE:
191 |             return ACCESS_DENIED_ACE(buf, offset, parent)
192 |         elif header.ace_type() == ACE_TYPES.SYSTEM_AUDIT_ACE_TYPE:
193 |             return SYSTEM_AUDIT_ACE(buf, offset, parent)
194 |         elif header.ace_type() == ACE_TYPES.SYSTEM_ALARM_ACE_TYPE:
195 |             return SYSTEM_ALARM_ACE(buf, offset, parent)
196 |         elif header.ace_type() == ACE_TYPES.ACCESS_ALLOWED_OBJECT_ACE_TYPE:
197 |             return ACCESS_ALLOWED_OBJECT_ACE(buf, offset, parent)
198 |         elif header.ace_type() == ACE_TYPES.ACCESS_DENIED_OBJECT_ACE_TYPE:
199 |             return ACCESS_DENIED_OBJECT_ACE(buf, offset, parent)
200 |         elif header.ace_type() == ACE_TYPES.SYSTEM_AUDIT_OBJECT_ACE_TYPE:
201 |             return SYSTEM_AUDIT_OBJECT_ACE(buf, offset, parent)
202 |         elif header.ace_type() == ACE_TYPES.SYSTEM_ALARM_OBJECT_ACE_TYPE:
203 |             return SYSTEM_ALARM_OBJECT_ACE(buf, offset, parent)
204 |         else:
205 |             # TODO(wb): raise a custom exception type
206 |             raise BinaryParser.ParseException("unknown ACE type")
207 | 
208 | 
209 | class StandardACE(ACE, Nestable):
210 |     def __init__(self, buf, offset, parent):
211 |         super(StandardACE, self).__init__(buf, offset, parent)
212 |         self.declare_field("word", "size", 0x2)
213 |         self.declare_field("dword", "access_mask")
214 |         self.declare_field(SID, "sid")
215 | 
216 |     @staticmethod
217 |     def structure_size(buf, offset, parent):
218 |         return BinaryParser.read_word(buf, offset + 0x2)
219 | 
220 |     def __len__(self):
221 |         return self.size()
222 | 
223 | 
224 | class ACCESS_ALLOWED_ACE(StandardACE):
225 |     def __init__(self, buf, offset, parent):
226 |         super(ACCESS_ALLOWED_ACE, self).__init__(buf, offset, parent)
227 | 
228 | 
229 | class ACCESS_DENIED_ACE(StandardACE):
230 |     def __init__(self, buf, offset, parent):
231 |         super(ACCESS_DENIED_ACE, self).__init__(buf, offset, parent)
232 | 
233 | 
234 | class SYSTEM_AUDIT_ACE(StandardACE):
235 |     def __init__(self, buf, offset, parent):
236 |         super(SYSTEM_AUDIT_ACE, self).__init__(buf, offset, parent)
237 | 
238 | 
239 | class SYSTEM_ALARM_ACE(StandardACE):
240 |     def __init__(self, buf, offset, parent):
241 |         super(SYSTEM_ALARM_ACE, self).__init__(buf, offset, parent)
242 | 
243 | 
244 | class OBJECT_ACE_FLAGS:
245 |     """
246 |     DWORD.
247 |     """
248 |     ACE_OBJECT_TYPE_PRESENT = 1
249 |     ACE_INHERITED_OBJECT_TYPE_PRESENT = 2
250 | 
251 | 
252 | class ObjectACE(ACE, Nestable):
253 |     def __init__(self, buf, offset, parent):
254 |         super(ObjectACE, self).__init__(buf, offset, parent)
255 |         self.declare_field("word", "size", 0x2)
256 |         self.declare_field("dword", "access_mask")
257 |         self.declare_field("dword", "object_flags")
258 |         self.declare_field("guid", "object_type")
259 |         self.declare_field("guid", "inherited_object_type")
260 | 
261 |     @staticmethod
262 |     def structure_size(buf, offset, parent):
263 |         return BinaryParser.read_word(buf, offset + 0x2)
264 | 
265 |     def __len__(self):
266 |         return self.size()
267 | 
268 | 
269 | class ACCESS_ALLOWED_OBJECT_ACE(ObjectACE):
270 |     def __init__(self, buf, offset, parent):
271 |         super(ACCESS_ALLOWED_OBJECT_ACE, self).__init__(buf, offset, parent)
272 | 
273 | 
274 | class ACCESS_DENIED_OBJECT_ACE(ObjectACE):
275 |     def __init__(self, buf, offset, parent):
276 |         super(ACCESS_DENIED_OBJECT_ACE, self).__init__(buf, offset, parent)
277 | 
278 | 
279 | class SYSTEM_AUDIT_OBJECT_ACE(ObjectACE):
280 |     def __init__(self, buf, offset, parent):
281 |         super(SYSTEM_AUDIT_OBJECT_ACE, self).__init__(buf, offset, parent)
282 | 
283 | 
284 | class SYSTEM_ALARM_OBJECT_ACE(ObjectACE):
285 |     def __init__(self, buf, offset, parent):
286 |         super(SYSTEM_ALARM_OBJECT_ACE, self).__init__(buf, offset, parent)
287 | 
288 | 
289 | class ACL(Block, Nestable):
290 |     def __init__(self, buf, offset, parent):
291 |         super(ACL, self).__init__(buf, offset)
292 |         self.declare_field("byte", "revision", 0x0)
293 |         self.declare_field("byte", "alignment1")
294 |         self.declare_field("word", "size")
295 |         self.declare_field("word", "ace_count")
296 |         self.declare_field("word", "alignment2")
297 |         self._off_ACEs = self.current_field_offset()
298 |         self.add_explicit_field(self._off_ACEs, ACE, "ACEs")
299 | 
300 |     @staticmethod
301 |     def structure_size(buf, offset, parent):
302 |         return BinaryParser.read_word(buf, offset + 0x2)
303 | 
304 |     def __len__(self):
305 |         return self.size()
306 | 
307 |     def ACEs(self):
308 |         ofs = self._off_ACEs
309 |         for _ in range(self.ace_count()):
310 |             a = ACE.get_ace(self._buf, self.offset() + ofs, self)
311 |             yield a
312 |             ofs += a.size()
313 |             ofs = BinaryParser.align(ofs, 4)
314 | 
315 | 
316 | class NULL_ACL(object):
317 |     """
318 |     TODO(wb): Not actually sure what the NULL ACL is...
319 |       just guessing at the values here.
320 |     """
321 |     def __init__(self):
322 |         super(NULL_ACL, self).__init__()
323 | 
324 |     def revision(self):
325 |         return 1
326 | 
327 |     def alignment1(self):
328 |         return 0
329 | 
330 |     def size(self):
331 |         return 0
332 | 
333 |     def ace_count(self):
334 |         return 0
335 | 
336 |     def ACEs(self):
337 |         return
338 | 
339 |     @staticmethod
340 |     def structure_size(buf, offset, parent):
341 |         return 0
342 | 
343 |     def __len__(self):
344 |         return 0
345 | 
346 | 
347 | class SECURITY_DESCRIPTOR_RELATIVE(Block, Nestable):
348 |     def __init__(self, buf, offset, parent):
349 |         super(SECURITY_DESCRIPTOR_RELATIVE, self).__init__(buf, offset)
350 |         self.declare_field("byte", "revision", 0x0)
351 |         self.declare_field("byte", "alignment")
352 |         self.declare_field("word", "control")
353 |         self.declare_field("dword", "owner_offset")
354 |         self.declare_field("dword", "group_offset")
355 |         self.declare_field("dword", "sacl_offset")
356 |         self.declare_field("dword", "dacl_offset")
357 | 
358 |         self.add_explicit_field(self.owner_offset(), "SID", "owner")
359 |         self.add_explicit_field(self.group_offset(), "SID", "group")
360 |         if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_SACL_PRESENT:
361 |             self.add_explicit_field(self.sacl_offset(), "ACL", "sacl")
362 |         if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_DACL_PRESENT:
363 |             self.add_explicit_field(self.dacl_offset(), "ACL", "dacl")
364 | 
365 |     @staticmethod
366 |     def structure_size(buf, offset, parent):
367 |         return len(SECURITY_DESCRIPTOR_RELATIVE(buf, offset, parent))
368 | 
369 |     def __len__(self):
370 |         ret = 20
371 |         ret += len((self.owner() or null_object))
372 |         ret += len((self.group() or null_object))
373 |         ret += len((self.sacl() or null_object))
374 |         ret += len((self.dacl() or null_object))
375 |         return ret
376 | 
377 |     def owner(self):
378 |         if self.owner_offset() != 0:
379 |             return SID(self._buf, self.absolute_offset(self.owner_offset()), self)
380 |         else:
381 |             return None
382 | 
383 |     def group(self):
384 |         if self.group_offset() != 0:
385 |             return SID(self._buf, self.absolute_offset(self.group_offset()), self)
386 |         else:
387 |             return None
388 | 
389 |     def sacl(self):
390 |         if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_SACL_PRESENT:
391 |             if self.sacl_offset() > 0:
392 |                 return ACL(self._buf, self.absolute_offset(self.sacl_offset()), self)
393 |             else:
394 |                 return NULL_ACL()
395 |         else:
396 |             return None
397 | 
398 |     def dacl(self):
399 |         if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_DACL_PRESENT:
400 |             if self.dacl_offset() > 0:
401 |                 return ACL(self._buf, self.absolute_offset(self.dacl_offset()), self)
402 |             else:
403 |                 return NULL_ACL()
404 |         else:
405 |             return None
406 | 
407 | 
408 | class SDS_ENTRY(Block, Nestable):
409 |     def __init__(self, buf, offset, parent):
410 |         super(SDS_ENTRY, self).__init__(buf, offset)
411 |         self.declare_field("dword", "hash", 0x0)
412 |         self.declare_field("dword", "security_id")
413 |         self.declare_field("qword", "offset")
414 |         self.declare_field("dword", "length")
415 |         self.declare_field(SECURITY_DESCRIPTOR_RELATIVE, "sid")
416 | 
417 |     @staticmethod
418 |     def structure_size(buf, offset, parent):
419 |         return BinaryParser.read_dword(buf, offset + 0x10)
420 | 
421 |     def __len__(self):
422 |         return self.length()
423 | 
424 | 
425 | class SDS(Block):
426 |     def __init__(self, buf, offset, parent):
427 |         super(SDS, self).__init__(buf, offset)
428 |         self.add_explicit_field(0, SDS, "sds_entries")
429 | 
430 |     def sds_entries(self):
431 |         ofs = 0
432 |         while len(self._buf) > self.offset() + ofs + 0x14:
433 |             s = SDS_ENTRY(self._buf, self.offset() + ofs, self)
434 |             if len(s) != 0:
435 |                 yield s
436 |                 ofs += len(s)
437 |                 ofs = BinaryParser.align(ofs, 0x10)
438 |             else:
439 |                 if ofs % 0x10000 == 0:
440 |                     return
441 |                 else:
442 |                     ofs = BinaryParser.align(ofs, 0x10000)
443 | 
444 | 
445 | def main():
446 |     import sys
447 |     import mmap
448 |     import contextlib
449 | 
450 |     with open(sys.argv[1], 'r') as f:
451 |         with contextlib.closing(mmap.mmap(f.fileno(), 0,
452 |                                           access=mmap.ACCESS_READ)) as buf:
453 |             s = SDS(buf, 0, None)
454 |             print "SDS"
455 |             for e in s.sds_entries():
456 |                 print("  SDS_ENTRY")
457 |                 print(e.get_all_string(indent=2))
458 | 
459 | if __name__ == "__main__":
460 |     main()
461 | 


--------------------------------------------------------------------------------
/ntfs/secure/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/secure/__init__.py


--------------------------------------------------------------------------------
/ntfs/usnjrnl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/usnjrnl/__init__.py


--------------------------------------------------------------------------------
/ntfs/volume/__init__.py:
--------------------------------------------------------------------------------
 1 | from ntfs.BinaryParser import Block
 2 | from ntfs.BinaryParser import Mmap
 3 | from ntfs.FileMap import FileMap
 4 | 
 5 | 
 6 | class Volume(Block):
 7 |     """
 8 |     A volume is a logically contiguous run of bytes over which a FS is found.
 9 | 
10 |     Use FlatVolume over this.
11 |     """
12 |     __unpackable__ = True
13 |     def __init__(self, buf, offset, sector_size=512):
14 |         super(Volume, self).__init__(buf, offset)
15 |         self._sector_size = sector_size
16 | 
17 |     def __getitem__(self, index):
18 |         return self._buf[index + self._offset]
19 | 
20 |     def __getslice__(self, start, end):
21 |         return self._buf[start + self._offset:end + self._offset]
22 | 
23 |     def __len__(self):
24 |         return len(self._buf) - self._offset
25 | 
26 | 
27 | class FlatVolume(Volume):
28 |     """
29 |     A volume found in a physically contiguous run of bytes.
30 |     """
31 |     def __init__(self, buf, offset, sector_size=512):
32 |         super(FlatVolume, self).__init__(buf, offset, sector_size=sector_size)
33 | 
34 | 
35 | def main():
36 |     import sys
37 | 
38 |     # two methods
39 |     with open(sys.argv[1], "rb") as f:
40 |         buf = FileMap(f)
41 |         v = FlatVolume(buf, int(sys.argv[2]))
42 |         print list(v[3:3+4])
43 | 
44 |     # probably prefer this one
45 |     with Mmap(sys.argv[1]) as buf:
46 |         v = FlatVolume(buf, int(sys.argv[2]))
47 |         print list(v[3:3+4])
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     main()
52 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from setuptools import setup
 4 | from ntfs import __version__
 5 | 
 6 | 
 7 | setup(name="python-ntfs",
 8 |         version=__version__,
 9 |         description="Open source Python library for NTFS analysis",
10 |         author="Willi Ballenthin",
11 |         author_email="willi.ballenthin@gmail.com",
12 |         url="http://www.williballenthin.com/forensics/ntfs",
13 |         license="Apache License (2.0)",
14 |         packages=[
15 |             "ntfs",
16 |             "ntfs.mft",
17 |             "ntfs.volume",
18 |             "ntfs.filesystem",
19 |             #"nfts.secure",
20 |             #"ntfs.logfile",
21 |             #"ntfs.usnjrnl",
22 |             ],
23 |         classifiers=["Programming Language :: Python",
24 |             "Operating System :: OS Independent",
25 |             "License :: OSI Approved :: Apache Software License"],
26 |         install_requires=["enum34"])
27 | 


--------------------------------------------------------------------------------