├── .gitignore ├── CHANGELOG ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── examples ├── indxparse │ └── indxparse.py ├── inspect_directory │ └── inspect_directory.py ├── inspect_file │ └── inspect_file.py ├── inspect_record │ └── inspect_record.py ├── inspect_vbr │ └── inspect_vbr.py ├── mount │ ├── mount.py │ ├── readme.md │ └── requirements.txt └── parse_usnjrnl │ └── parse_usnjrnl.py ├── ntfs ├── BinaryParser.py ├── FileMap.py ├── Progress.py ├── SortedCollection.py ├── __init__.py ├── filesystem │ └── __init__.py ├── logfile │ └── __init__.py ├── mft │ ├── .MFT.py.swp │ ├── MFT.py │ └── __init__.py ├── secure │ ├── SDS.py │ └── __init__.py ├── usnjrnl │ └── __init__.py └── volume │ └── __init__.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-ntfs 2 | =========== 3 | 4 | Open source Python library for NTFS analysis 5 | -------------------------------------------------------------------------------- /examples/indxparse/indxparse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clone of INDXParse.py that processes an entire file system. 3 | """ 4 | import argparse 5 | from datetime import datetime 6 | import logging 7 | 8 | from ntfs.volume import FlatVolume 9 | from ntfs.BinaryParser import Mmap 10 | from ntfs.filesystem import NTFSFilesystem 11 | from ntfs.mft.MFT import AttributeNotFoundError 12 | from ntfs.mft.MFT import ATTR_TYPE 13 | from ntfs.mft.MFT import INDEX_ALLOCATION 14 | from ntfs.mft.MFT import INDEX_ROOT 15 | 16 | 17 | g_logger = logging.getLogger("ntfs.examples.indxparse") 18 | 19 | 20 | class InvalidArgumentError(Exception): 21 | pass 22 | 23 | 24 | def get_directory_index_active_entries(fs, directory): 25 | """ 26 | get the active MFT_INDEX_ENTRYs from a directory's 27 | INDEX_ROOT and INDEX_ALLOCATION attributes 28 | """ 29 | if not directory.is_directory(): 30 | raise InvalidArgumentError() 31 | 32 | # sorry, reaching 33 | record = directory._record 34 | 35 | ret = [] 36 | 37 | try: 38 | indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION) 39 | indx_alloc = INDEX_ALLOCATION(fs.get_attribute_data(indx_alloc_attr), 0) 40 | for block in indx_alloc.blocks(): 41 | for entry in block.index().entries(): 42 | ret.append(entry) 43 | except AttributeNotFoundError: 44 | pass 45 | 46 | try: 47 | indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT) 48 | indx_root = INDEX_ROOT(fs.get_attribute_data(indx_root_attr), 0) 49 | for entry in indx_root.index().entries(): 50 | ret.append(entry) 51 | except AttributeNotFoundError: 52 | pass 53 | 54 | return ret 55 | 56 | 57 | def get_directory_index_inactive_entries(fs, directory): 58 | """ 59 | get the inactive (slack) MFT_INDEX_ENTRYs from a directory's 60 | INDEX_ROOT and INDEX_ALLOCATION attributes 61 | """ 62 | if not directory.is_directory(): 63 | raise InvalidArgumentError() 64 | 65 | # sorry, reaching 66 | record = directory._record 67 | 68 | ret = [] 69 | 70 | try: 71 | indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION) 72 | indx_alloc = INDEX_ALLOCATION(fs.get_attribute_data(indx_alloc_attr), 0) 73 | for block in indx_alloc.blocks(): 74 | for entry in block.index().slack_entries(): 75 | ret.append(entry) 76 | except AttributeNotFoundError: 77 | pass 78 | 79 | try: 80 | indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT) 81 | indx_root = INDEX_ROOT(fs.get_attribute_data(indx_root_attr), 0) 82 | for entry in indx_root.index().slack_entries(): 83 | ret.append(entry) 84 | except AttributeNotFoundError: 85 | pass 86 | 87 | return ret 88 | 89 | 90 | def make_dump_directory_indices_visitor(formatter): 91 | """ 92 | `formatter` is a function that accepts a dict, and returns a string. 93 | the string is dumped via print(). 94 | the schema for the dict is: 95 | active: bool 96 | path: str 97 | entry: MFT_INDEX_ENTRY 98 | 99 | this function returns a function that applies the format to the 100 | given FileSystem and Directory and dumps it out. 101 | """ 102 | def dump_directory_indices_visitor(fs, directory): 103 | for e in get_directory_index_active_entries(fs, directory): 104 | try: 105 | print(formatter({ 106 | "active": True, 107 | "path": directory.get_full_path(), 108 | "entry": e})) 109 | except Exception as e: 110 | g_logger.warning("Failed to output entry: %s", e) 111 | for e in get_directory_index_inactive_entries(fs, directory): 112 | try: 113 | print(formatter({ 114 | "active": False, 115 | "path": directory.get_full_path(), 116 | "entry": e})) 117 | except Exception as e: 118 | g_logger.warning("Failed to output entry: %s", e) 119 | return dump_directory_indices_visitor 120 | 121 | 122 | def walk_directories(fs, directory, visitor): 123 | """ 124 | `visitor` is a function that accepts two parameters: a FileSystem 125 | and a Directory 126 | 127 | this function applies the function `visitor` to each directory 128 | in the file system recursively. 129 | """ 130 | visitor(fs, directory) 131 | for d in directory.get_directories(): 132 | walk_directories(fs, d, visitor) 133 | 134 | 135 | def safe_date(f): 136 | try: 137 | return f() 138 | except ValueError: 139 | return datetime(1970, 1, 1, 0, 0, 0) 140 | 141 | 142 | def csv_directory_index_formatter(e): 143 | entry = e["entry"].filename_information() 144 | fn = entry.filename() 145 | f = (u"{status},{path},{filename},{physical_size},{logical_size},{mtime}," 146 | u"{atime},{ctime},{crtime}") 147 | if e["active"]: 148 | status = "active" 149 | else: 150 | status = "slack" 151 | 152 | return f.format( 153 | status=status, 154 | path=e["path"], 155 | filename=entry.filename(), 156 | physical_size=entry.physical_size(), 157 | logical_size=entry.logical_size(), 158 | mtime=safe_date(entry.modified_time), 159 | atime=safe_date(entry.accessed_time), 160 | ctime=safe_date(entry.changed_time), 161 | crtime=safe_date(entry.created_time)) 162 | 163 | 164 | def bodyfile_directory_index_formatter(e): 165 | # TODO 166 | pass 167 | 168 | 169 | def main(image_filename, volume_offset, path): 170 | 171 | with Mmap(image_filename) as buf: 172 | v = FlatVolume(buf, volume_offset) 173 | fs = NTFSFilesystem(v) 174 | root = fs.get_root_directory() 175 | 176 | if path == "/": 177 | entry = root 178 | else: 179 | entry = root.get_path_entry(path) 180 | 181 | v = make_dump_directory_indices_visitor(csv_directory_index_formatter) 182 | walk_directories(fs, entry, v) 183 | 184 | if __name__ == '__main__': 185 | 186 | parser = argparse.ArgumentParser() 187 | parser.add_argument('img_file', help='Path to image file') 188 | parser.add_argument('volume_offset', help='Offset in bytes ' 189 | 'to Boot Sector Section', 190 | type=int) 191 | parser.add_argument('path', help='Path') 192 | parser.add_argument('-d', '--debug', default=False, action='store_true') 193 | args = parser.parse_args() 194 | 195 | if args.debug: 196 | logging.basicConfig(level=logging.DEBUG) 197 | logging.getLogger("ntfs.mft").setLevel(logging.INFO) 198 | 199 | main(args.img_file, args.volume_offset, args.path) 200 | -------------------------------------------------------------------------------- /examples/inspect_directory/inspect_directory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dump the directory index for a directory. 3 | """ 4 | import logging 5 | 6 | from ntfs.volume import FlatVolume 7 | from ntfs.BinaryParser import Mmap 8 | from ntfs.filesystem import NTFSFilesystem 9 | from ntfs.mft.MFT import AttributeNotFoundError 10 | from ntfs.mft.MFT import ATTR_TYPE 11 | from ntfs.mft.MFT import MREF 12 | from ntfs.mft.MFT import INDEX_ALLOCATION 13 | from ntfs.mft.MFT import INDEX_ROOT 14 | 15 | 16 | g_logger = logging.getLogger("ntfs.examples.inspect_directory") 17 | 18 | 19 | def main(image_filename, volume_offset, path): 20 | logging.basicConfig(level=logging.DEBUG) 21 | #logging.getLogger("ntfs.mft").setLevel(logging.INFO) 22 | 23 | with Mmap(image_filename) as buf: 24 | v = FlatVolume(buf, volume_offset) 25 | fs = NTFSFilesystem(v) 26 | root = fs.get_root_directory() 27 | 28 | if path == "/": 29 | entry = root 30 | else: 31 | entry = root.get_path_entry(path) 32 | 33 | if not entry.is_directory(): 34 | g_logger.error("not a directory") 35 | return 36 | 37 | # sorry, reaching 38 | record = entry._record 39 | 40 | entries = {} 41 | try: 42 | indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION) 43 | indx_alloc = INDEX_ALLOCATION(fs.get_attribute_data(indx_alloc_attr), 0) 44 | g_logger.debug("INDEX_ALLOCATION len: %s", hex(len(indx_alloc))) 45 | g_logger.debug("alloc:\n%s", indx_alloc.get_all_string(indent=2)) 46 | indx = indx_alloc 47 | 48 | g_logger.info("found:") 49 | for block in indx.blocks(): 50 | for entry in block.index().entries(): 51 | ref = MREF(entry.header().mft_reference()) 52 | entries[ref] = entry.filename_information().filename() 53 | 54 | except AttributeNotFoundError: 55 | indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT) 56 | indx_root = INDEX_ROOT(fs.get_attribute_data(indx_root_attr), 0) 57 | g_logger.debug("INDEX_ROOT len: %s", hex(len(indx_root))) 58 | g_logger.debug("root:\n%s", indx_root.get_all_string(indent=2)) 59 | indx = indx_root 60 | 61 | g_logger.info("found:") 62 | for entry in indx.index().entries(): 63 | ref = MREF(entry.header().mft_reference()) 64 | entries[ref] = entry.filename_information().filename() 65 | 66 | for k, v in entries.iteritems(): 67 | g_logger.info(" - %s", v) 68 | 69 | 70 | if __name__ == '__main__': 71 | import sys 72 | main(sys.argv[1], int(sys.argv[2]), sys.argv[3]) 73 | 74 | -------------------------------------------------------------------------------- /examples/inspect_file/inspect_file.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dump stuff related to a single record. 3 | """ 4 | import logging 5 | 6 | from ntfs.volume import FlatVolume 7 | from ntfs.BinaryParser import Mmap 8 | from ntfs.filesystem import NTFSFilesystem 9 | from ntfs.mft.MFT import AttributeNotFoundError 10 | from ntfs.mft.MFT import ATTR_TYPE 11 | from ntfs.mft.MFT import MREF 12 | from ntfs.mft.MFT import INDEX_ALLOCATION 13 | from ntfs.mft.MFT import INDEX_ROOT 14 | 15 | 16 | g_logger = logging.getLogger("ntfs.examples.inspect_record") 17 | 18 | 19 | def main(image_filename, volume_offset, record_number): 20 | logging.basicConfig(level=logging.DEBUG) 21 | #logging.getLogger("ntfs.mft").setLevel(logging.INFO) 22 | 23 | with Mmap(image_filename) as buf: 24 | v = FlatVolume(buf, volume_offset) 25 | fs = NTFSFilesystem(v) 26 | record = fs.get_record(record_number) 27 | print(record.get_all_string()) 28 | 29 | 30 | if __name__ == '__main__': 31 | import sys 32 | main(sys.argv[1], int(sys.argv[2]), int(sys.argv[3])) 33 | 34 | -------------------------------------------------------------------------------- /examples/inspect_record/inspect_record.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dump stuff related to a single record. 3 | """ 4 | import logging 5 | 6 | from ntfs.BinaryParser import Mmap 7 | from ntfs.mft.MFT import MFTRecord 8 | from ntfs.mft.MFT import Attribute 9 | from ntfs.mft.MFT import ATTR_TYPE 10 | from ntfs.mft.MFT import StandardInformation 11 | from ntfs.mft.MFT import FilenameAttribute 12 | 13 | 14 | g_logger = logging.getLogger("ntfs.examples.inspect_record") 15 | 16 | 17 | def main(record_filename): 18 | logging.basicConfig(level=logging.DEBUG) 19 | #logging.getLogger("ntfs.mft").setLevel(logging.INFO) 20 | 21 | with Mmap(record_filename) as buf: 22 | record = MFTRecord(buf, 0, None) 23 | print("=== MFT Record Header") 24 | print(record.get_all_string()) 25 | 26 | for attribute in record.attributes(): 27 | print("=== Attribute Header (type: {:s}) at offset {:s}".format( 28 | Attribute.TYPES[attribute.type()], 29 | hex(attribute.offset()))) 30 | print(attribute.get_all_string()) 31 | 32 | if attribute.type() == ATTR_TYPE.STANDARD_INFORMATION: 33 | print("=== STANDARD INFORMATION value") 34 | si = StandardInformation(attribute.value(), 0, None) 35 | print(si.get_all_string()) 36 | 37 | elif attribute.type() == ATTR_TYPE.FILENAME_INFORMATION: 38 | print("=== FILENAME INFORMATION value") 39 | fn = FilenameAttribute(attribute.value(), 0, None) 40 | print(fn.get_all_string()) 41 | 42 | if __name__ == '__main__': 43 | import sys 44 | main(sys.argv[1]) 45 | 46 | -------------------------------------------------------------------------------- /examples/inspect_vbr/inspect_vbr.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dump the NTFS VBR for a volume. 3 | """ 4 | import logging 5 | 6 | from ntfs.volume import FlatVolume 7 | from ntfs.BinaryParser import Mmap 8 | from ntfs.filesystem import NTFSVBR 9 | 10 | 11 | g_logger = logging.getLogger("ntfs.examples.inspect_vbr") 12 | 13 | 14 | def main(image_filename, volume_offset): 15 | logging.basicConfig(level=logging.DEBUG) 16 | logging.getLogger("ntfs.mft").setLevel(logging.INFO) 17 | 18 | with Mmap(image_filename) as buf: 19 | v = FlatVolume(buf, volume_offset) 20 | vbr = NTFSVBR(v) 21 | print(vbr.get_all_string()) 22 | 23 | 24 | if __name__ == '__main__': 25 | import sys 26 | main(sys.argv[1], int(sys.argv[2])) 27 | 28 | -------------------------------------------------------------------------------- /examples/mount/mount.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | 3 | import os 4 | import sys 5 | import stat 6 | import errno 7 | import inspect 8 | import logging 9 | import calendar 10 | 11 | from fuse import FUSE, FuseOSError, Operations, fuse_get_context 12 | 13 | from ntfs.filesystem import NTFSFilesystem 14 | from ntfs.filesystem import ChildNotFoundError 15 | 16 | PERMISSION_ALL_READ = int("444", 8) 17 | 18 | g_logger = logging.getLogger("ntfs.examples.mount") 19 | 20 | 21 | def unixtimestamp(ts): 22 | """ 23 | unixtimestamp converts a datetime.datetime to a UNIX timestamp. 24 | @type ts: datetime.datetime 25 | @rtype: int 26 | """ 27 | return calendar.timegm(ts.utctimetuple()) 28 | 29 | 30 | def log(func): 31 | """ 32 | log is a decorator that logs the a function call with its 33 | parameters and return value. 34 | """ 35 | def inner(*args, **kwargs): 36 | func_name = inspect.stack()[3][3] 37 | if func_name == "_wrapper": 38 | func_name = inspect.stack()[2][3] 39 | (uid, gid, pid) = fuse_get_context() 40 | pre = "(%s: UID=%d GID=%d PID=%d ARGS=(%s) KWARGS=(%s))" % ( 41 | func_name, uid, gid, pid, 42 | ", ".join(map(str, list(args)[1:])), str(**kwargs)) 43 | try: 44 | g_logger.debug("log: call: %s", pre) 45 | ret = func(*args, **kwargs) 46 | g_logger.debug("log: result: %s", ret) 47 | return ret 48 | except Exception as e: 49 | g_logger.warning("log: exception: %s", str(e)) 50 | raise e 51 | return inner 52 | 53 | 54 | class NTFSFuseOperations(Operations): 55 | def __init__(self, filesystem): 56 | self._fs = filesystem 57 | self._opened_files = {} 58 | 59 | def _get_path_entry(self, path): 60 | root = self._fs.get_root_directory() 61 | if path == "/": 62 | g_logger.debug("asking for root") 63 | entry = root 64 | else: 65 | _, __, rest = path.partition("/") 66 | g_logger.debug("asking for: %s", rest) 67 | try: 68 | entry = root.get_path_entry(rest) 69 | except ChildNotFoundError: 70 | raise FuseOSError(errno.ENOENT) 71 | return entry 72 | 73 | # Filesystem methods 74 | # ================== 75 | @log 76 | def getattr(self, path, fh=None): 77 | (uid, gid, pid) = fuse_get_context() 78 | entry = self._get_path_entry(path) 79 | 80 | if entry.is_directory(): 81 | mode = (stat.S_IFDIR | PERMISSION_ALL_READ) 82 | nlink = 2 83 | else: 84 | mode = (stat.S_IFREG | PERMISSION_ALL_READ) 85 | nlink = 1 86 | 87 | return { 88 | "st_atime": unixtimestamp(entry.get_si_accessed_timestamp()), 89 | "st_ctime": unixtimestamp(entry.get_si_changed_timestamp()), 90 | "st_crtime": unixtimestamp(entry.get_si_created_timestamp()), 91 | "st_mtime": unixtimestamp(entry.get_si_modified_timestamp()), 92 | "st_size": entry.get_size(), 93 | "st_uid": uid, 94 | "st_gid": gid, 95 | "st_mode": mode, 96 | "st_nlink": nlink, 97 | } 98 | 99 | @log 100 | def readdir(self, path, fh): 101 | dirents = ['.', '..'] 102 | entry = self._get_path_entry(path) 103 | 104 | dirents.extend(map(lambda r: r.get_name(), entry.get_children())) 105 | return dirents 106 | 107 | @log 108 | def readlink(self, path): 109 | return path 110 | 111 | @log 112 | def statfs(self, path): 113 | return dict((key, 0) for key in ('f_bavail', 'f_bfree', 114 | 'f_blocks', 'f_bsize', 'f_favail', 115 | 'f_ffree', 'f_files', 'f_flag', 116 | 'f_frsize', 'f_namemax')) 117 | 118 | @log 119 | def chmod(self, path, mode): 120 | return errno.EROFS 121 | 122 | @log 123 | def chown(self, path, uid, gid): 124 | return errno.EROFS 125 | 126 | @log 127 | def mknod(self, path, mode, dev): 128 | return errno.EROFS 129 | 130 | @log 131 | def rmdir(self, path): 132 | return errno.EROFS 133 | 134 | @log 135 | def mkdir(self, path, mode): 136 | return errno.EROFS 137 | 138 | @log 139 | def unlink(self, path): 140 | return errno.EROFS 141 | 142 | @log 143 | def symlink(self, target, name): 144 | return errno.EROFS 145 | 146 | @log 147 | def rename(self, old, new): 148 | return errno.EROFS 149 | 150 | @log 151 | def link(self, target, name): 152 | return errno.EROFS 153 | 154 | @log 155 | def utimens(self, path, times=None): 156 | return errno.EROFS 157 | 158 | # File methods 159 | # ============ 160 | 161 | def _get_available_fh(self): 162 | """ 163 | _get_available_fh returns an unused fh 164 | The caller must be careful to handle race conditions. 165 | @rtype: int 166 | """ 167 | for i in xrange(65534): 168 | if i not in self._opened_files: 169 | return i 170 | 171 | @log 172 | def open(self, path, flags): 173 | if flags & os.O_WRONLY > 0: 174 | return errno.EROFS 175 | if flags & os.O_RDWR > 0: 176 | return errno.EROFS 177 | 178 | entry = self._get_path_entry(path) 179 | 180 | # TODO(wb): race here on fh used/unused 181 | fh = self._get_available_fh() 182 | self._opened_files[fh] = entry 183 | 184 | return fh 185 | 186 | @log 187 | def read(self, path, length, offset, fh): 188 | entry = self._opened_files[fh] 189 | return entry.read(offset, length) 190 | 191 | @log 192 | def flush(self, path, fh): 193 | return "" 194 | 195 | @log 196 | def release(self, path, fh): 197 | del self._opened_files[fh] 198 | 199 | @log 200 | def create(self, path, mode, fi=None): 201 | return errno.EROFS 202 | 203 | @log 204 | def write(self, path, buf, offset, fh): 205 | return errno.EROFS 206 | 207 | @log 208 | def truncate(self, path, length, fh=None): 209 | return errno.EROFS 210 | 211 | @log 212 | def fsync(self, path, fdatasync, fh): 213 | return errno.EPERM 214 | 215 | 216 | def main(image_filename, volume_offset, mountpoint): 217 | from ntfs.volume import FlatVolume 218 | from ntfs.BinaryParser import Mmap 219 | 220 | logging.basicConfig(level=logging.DEBUG) 221 | logging.getLogger("ntfs.mft").setLevel(logging.INFO) 222 | 223 | with Mmap(image_filename) as buf: 224 | v = FlatVolume(buf, volume_offset) 225 | fs = NTFSFilesystem(v) 226 | handler = NTFSFuseOperations(fs) 227 | FUSE(handler, mountpoint, foreground=True) 228 | 229 | 230 | if __name__ == '__main__': 231 | import sys 232 | main(sys.argv[1], int(sys.argv[2]), sys.argv[3]) 233 | 234 | -------------------------------------------------------------------------------- /examples/mount/readme.md: -------------------------------------------------------------------------------- 1 | This is a read-only NTFS FUSE driver written in pure Python 2 | -------------------------------------------------------------------------------- /examples/mount/requirements.txt: -------------------------------------------------------------------------------- 1 | fusepy 2 | -------------------------------------------------------------------------------- /examples/parse_usnjrnl/parse_usnjrnl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parse a UsnJrnl:$J object into a CSV file. 3 | 4 | inspired by parser-usnjrnl by Seth Nazarro (http://code.google.com/p/parser-usnjrnl/) 5 | """ 6 | import logging 7 | import sys, struct, datetime 8 | 9 | class BadRecordException(Exception): 10 | def __init__(self): 11 | pass 12 | 13 | 14 | flag_def = { 15 | 0x00: " ", 16 | 0x01: "Data in one or more named data streams for the file was overwritten.", 17 | 0x02: "The file or directory was added to.", 18 | 0x04: "The file or directory was truncated.", 19 | 0x10: "Data in one or more named data streams for the file was overwritten.", 20 | 0x20: "One or more named data streams for the file were added to.", 21 | 0x40: "One or more named data streams for the file was truncated.", 22 | 0x100: "The file or directory was created for the first time.", 23 | 0x200: "The file or directory was deleted.", 24 | 0x400: "The user made a change to the file's or directory's extended attributes.", 25 | 0x800: "A change was made in the access rights to the file or directory.", 26 | 0x1000: "The file or directory was renamed and the file name in this structure is the previous name.", 27 | 0x2000: "The file or directory was renamed and the file name in this structure is the new name.", 28 | 0x4000: "A user toggled the FILE_ATTRIBUTE_NOT_CONTENT_INDEXED attribute.", 29 | 0x8000: "A user has either changed one or more file or directory attributes or one or more time stamps.", 30 | 0x10000: "An NTFS hard link was added to or removed from the file or directory", 31 | 0x20000: "The compression state of the file or directory was changed from or to compressed.", 32 | 0x40000: "The file or directory was encrypted or decrypted.", 33 | 0x80000: "The object identifier of the file or directory was changed.", 34 | 0x100000: "The reparse point contained in the file or directory was changed, or a reparse point was added to or deleted from the file or directory.", 35 | 0x200000: "A named stream has been added to or removed from the file or a named stream has been renamed.", 36 | 0x80000000: "The file or directory was closed." 37 | } 38 | 39 | 40 | attrs_def = { 41 | 1: 'READONLY', 42 | 2: 'HIDDEN', 43 | 4: 'SYSTEM', 44 | 8: '???', 45 | 16: 'DIRECTORY', 46 | 32: 'ARCHIVE', 47 | 64: 'DEVICE', 48 | 128: 'NORMAL', 49 | 256: 'TEMPORARY', 50 | 512: 'SPARSE_FILE', 51 | 1024: 'REPARSE_POINT', 52 | 2048: 'COMPRESSED', 53 | 4096: 'OFFLINE', 54 | 8192: 'NOT_CONTENT_INDEXED', 55 | 16383:'ENCRYPTED', 56 | 65536:'VIRTUAL' 57 | } 58 | 59 | 60 | def parse_windows_timestamp(qword): 61 | # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/ 62 | return datetime.datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600) 63 | 64 | 65 | def MREF(mft_reference): 66 | """ 67 | Given a MREF/mft_reference, return the record number part. 68 | """ 69 | return mft_reference & 0xFFFFFFFFFFFF 70 | 71 | 72 | def MSEQNO(mft_reference): 73 | """ 74 | Given a MREF/mft_reference, return the sequence number part. 75 | """ 76 | return (mft_reference >> 48) & 0xFFFF 77 | 78 | 79 | def process_record(buf): 80 | offset = 0 81 | while True: 82 | record_size = struct.unpack_from(" 655360: 113 | f_offset -= 655360 114 | 115 | f.seek(f_offset) 116 | 117 | # data starts within the next 655360 118 | buf = f.read(655360) 119 | for i in range(len(buf)): 120 | if buf[i] != "\x00": 121 | f_offset += i 122 | f.seek(f_offset) 123 | break 124 | i += 1 125 | 126 | # we are at the main records now 127 | print '"size", "major", "minor", "file_ref", "file_ref_seq", "file_ref_mft_record_num", "parent_ref", "parent_ref_seq", "parent_ref_mft_record_num", "usn", "timestamp", "flags", "source", "sid", "attrs", "name_length", "unknown", "name"' 128 | while True: 129 | buf = f.read(min((f_length - f_offset), 800)) 130 | f.seek(f_offset) 131 | 132 | (gap, size, major, minor, file_ref, parent_ref, usn, timestamp, flags, source, sid, attrs, name_length, unknown, name) = process_record(buf) 133 | print '"{size:d}", "{major:d}", "{minor:d}", "{file_ref:d}", "{file_ref_seq:d}", "{file_ref_mft_record_num:d}", "{parent_ref:d}", "{parent_ref_seq:d}", "{parent_ref_mft_record_num:d}", "{usn:d}", "{timestamp:s}", "{flags:s}", "{source:d}", "{sid:d}", "{attrs:s}", "{name_length:d}", "{unknown:d}", "{name:s}"'.format( 134 | size=size, 135 | major=major, 136 | minor=minor, 137 | file_ref=file_ref, 138 | file_ref_seq=MSEQNO(file_ref), 139 | file_ref_mft_record_num=MREF(file_ref), 140 | parent_ref=parent_ref, 141 | parent_ref_seq=MSEQNO(parent_ref), 142 | parent_ref_mft_record_num=MREF(parent_ref), 143 | usn=usn, 144 | timestamp=parse_windows_timestamp(timestamp).isoformat("T") + "Z", 145 | flags=" ".join([v for (k, v) in flag_def.items() if flags & k]), 146 | source=source, 147 | sid=sid, 148 | attrs=" ".join([v for (k, v) in attrs_def.items() if attrs & k]), 149 | name_length=name_length, 150 | unknown=unknown, 151 | name=name) 152 | 153 | f_offset += gap + size 154 | if f_offset == f_length: 155 | break 156 | f.seek(f_offset) 157 | 158 | if __name__ == '__main__': 159 | offset = 0 160 | 161 | if len(sys.argv) > 2: 162 | offset = int(sys.argv[2]) 163 | main(sys.argv[1], offset) 164 | -------------------------------------------------------------------------------- /ntfs/BinaryParser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # This file is part of python-evtx. 3 | # 4 | # Copyright 2012, 2013 Willi Ballenthin 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # Version v.0.1 20 | import mmap 21 | import sys 22 | import types 23 | import struct 24 | import logging 25 | import cPickle 26 | from datetime import datetime 27 | 28 | g_logger = logging.getLogger("ntfs.BinaryParser") 29 | 30 | 31 | def unpack_from(fmt, buf, off=0): 32 | """ 33 | Shim struct.unpack_from and divert unpacking of __unpackable__ things. 34 | 35 | Otherwise, you'd get an exception like: 36 | TypeError: unpack_from() argument 1 must be convertible to a buffer, not FileMap 37 | 38 | So, we extract a true sub-buffer from the FileMap, and feed this 39 | back into the old unpack function. 40 | Theres an extra allocation and copy, but there's no getting 41 | around that. 42 | """ 43 | if isinstance(buf, basestring): 44 | return struct.unpack_from(fmt, buf, off) 45 | elif not hasattr(buf, "__unpackable__"): 46 | return struct.unpack_from(fmt, buf, off) 47 | else: 48 | size = struct.calcsize(fmt) 49 | buf = buf[off:off + size] 50 | return struct.unpack_from(fmt, buf, 0x0) 51 | 52 | 53 | def unpack(fmt, string): 54 | """ 55 | Like the shimmed unpack_from, but for struct.unpack. 56 | """ 57 | if isinstance(buf, basestring): 58 | return struct.unpack(fmt, string) 59 | elif not hasattr(buf, "__unpackable__"): 60 | return struct.unpack(fmt, string) 61 | else: 62 | size = struct.calcsize(fmt) 63 | buf = string[:size] 64 | return struct.unpack(fmt, buf, 0x0) 65 | 66 | 67 | class Mmap(object): 68 | """ 69 | Convenience class for opening a read-only memory map for a file path. 70 | """ 71 | def __init__(self, filename): 72 | super(Mmap, self).__init__() 73 | self._filename = filename 74 | self._f = None 75 | self._mmap = None 76 | 77 | def __enter__(self): 78 | self._f = open(self._filename, "rb") 79 | self._mmap = mmap.mmap(self._f.fileno(), 0, access=mmap.ACCESS_READ) 80 | return self._mmap 81 | 82 | def __exit__(self, type, value, traceback): 83 | self._mmap.close() 84 | self._f.close() 85 | 86 | 87 | def hex_dump(src, start_addr=0): 88 | """ 89 | see: 90 | http://code.activestate.com/recipes/142812-hex-dumper/ 91 | @param src A bytestring containing the data to dump. 92 | @param start_addr An integer representing the start 93 | address of the data in whatever context it comes from. 94 | @return A string containing a classic hex dump with 16 95 | bytes per line. If start_addr is provided, then the 96 | data is interpreted as starting at this offset, and 97 | the offset column is updated accordingly. 98 | """ 99 | FILTER = ''.join([(len(repr(chr(x))) == 3) and 100 | chr(x) or 101 | '.' for x in range(256)]) 102 | length = 16 103 | result = [] 104 | 105 | remainder_start_addr = start_addr 106 | 107 | if start_addr % length != 0: 108 | base_addr = start_addr - (start_addr % length) 109 | num_spaces = (start_addr % length) 110 | num_chars = length - (start_addr % length) 111 | 112 | spaces = " ".join([" " for i in xrange(num_spaces)]) 113 | s = src[0:num_chars] 114 | hexa = ' '.join(["%02X" % ord(x) for x in s]) 115 | printable = s.translate(FILTER) 116 | 117 | result.append("%04X %s %s %s%s\n" % 118 | (base_addr, spaces, hexa, 119 | " " * (num_spaces + 1), printable)) 120 | 121 | src = src[num_chars:] 122 | remainder_start_addr = base_addr + length 123 | 124 | for i in xrange(0, len(src), length): 125 | s = src[i:i + length] 126 | hexa = ' '.join(["%02X" % ord(x) for x in s]) 127 | printable = s.translate(FILTER) 128 | result.append("%04X %-*s %s\n" % 129 | (remainder_start_addr + i, length * 3, 130 | hexa, printable)) 131 | 132 | return ''.join(result) 133 | 134 | 135 | class decoratorargs(object): 136 | def __new__(typ, *attr_args, **attr_kwargs): 137 | def decorator(orig_func): 138 | self = object.__new__(typ) 139 | self.__init__(orig_func, *attr_args, **attr_kwargs) 140 | return self 141 | return decorator 142 | 143 | 144 | class memoize(decoratorargs): 145 | class Node: 146 | __slots__ = ['key', 'value', 'older', 'newer'] 147 | 148 | def __init__(self, key, value, older=None, newer=None): 149 | self.key = key 150 | self.value = value 151 | self.older = older 152 | self.newer = newer 153 | 154 | def __init__(self, func, capacity=1000, 155 | keyfunc=lambda *args, **kwargs: cPickle.dumps((args, 156 | kwargs))): 157 | if not isinstance(func, property): 158 | self.func = func 159 | self.name = func.__name__ 160 | self.is_property = False 161 | else: 162 | self.func = func.fget 163 | self.name = func.fget.__name__ 164 | self.is_property = True 165 | self.capacity = capacity 166 | self.keyfunc = keyfunc 167 | self.reset() 168 | 169 | def reset(self): 170 | self.mru = self.Node(None, None) 171 | self.mru.older = self.mru.newer = self.mru 172 | self.nodes = {self.mru.key: self.mru} 173 | self.count = 1 174 | self.hits = 0 175 | self.misses = 0 176 | 177 | def __get__(self, inst, clas): 178 | self.obj = inst 179 | if self.is_property: 180 | return self.__call__() 181 | else: 182 | return self 183 | 184 | def __call__(self, *args, **kwargs): 185 | key = self.keyfunc(*args, **kwargs) 186 | try: 187 | node = self.nodes[key] 188 | except KeyError: 189 | # We have an entry not in the cache 190 | self.misses += 1 191 | func = types.MethodType(self.func, self.obj, self.name) 192 | value = func(*args, **kwargs) 193 | lru = self.mru.newer # Always true 194 | # If we haven't reached capacity 195 | if self.count < self.capacity: 196 | # Put it between the MRU and LRU - it'll be the new MRU 197 | node = self.Node(key, value, self.mru, lru) 198 | self.mru.newer = node 199 | 200 | lru.older = node 201 | self.mru = node 202 | self.count += 1 203 | else: 204 | # It's FULL! We'll make the LRU be the new MRU, but replace its 205 | # value first 206 | try: 207 | del self.nodes[lru.key] # This mapping is now invalid 208 | except KeyError: # HACK TODO: this may not work/leak 209 | pass 210 | lru.key = key 211 | lru.value = value 212 | self.mru = lru 213 | 214 | # Add the new mapping 215 | self.nodes[key] = self.mru 216 | return value 217 | 218 | # We have an entry in the cache 219 | self.hits += 1 220 | 221 | # If it's already the MRU, do nothing 222 | if node is self.mru: 223 | return node.value 224 | 225 | lru = self.mru.newer # Always true 226 | 227 | # If it's the LRU, update the MRU to be it 228 | if node is lru: 229 | self.mru = lru 230 | return node.value 231 | 232 | # Remove the node from the list 233 | node.older.newer = node.newer 234 | node.newer.older = node.older 235 | 236 | # Put it between MRU and LRU 237 | node.older = self.mru 238 | self.mru.newer = node 239 | 240 | node.newer = lru 241 | lru.older = node 242 | 243 | self.mru = node 244 | return node.value 245 | 246 | 247 | def align(offset, alignment): 248 | """ 249 | Return the offset aligned to the nearest greater given alignment 250 | Arguments: 251 | - `offset`: An integer 252 | - `alignment`: An integer 253 | """ 254 | if offset % alignment == 0: 255 | return offset 256 | return offset + (alignment - (offset % alignment)) 257 | 258 | 259 | def dosdate(dosdate, dostime): 260 | """ 261 | `dosdate`: 2 bytes, little endian. 262 | `dostime`: 2 bytes, little endian. 263 | returns: datetime.datetime or datetime.datetime.min on error 264 | """ 265 | try: 266 | t = ord(dosdate[1]) << 8 267 | t |= ord(dosdate[0]) 268 | day = t & 0b0000000000011111 269 | month = (t & 0b0000000111100000) >> 5 270 | year = (t & 0b1111111000000000) >> 9 271 | year += 1980 272 | 273 | t = ord(dostime[1]) << 8 274 | t |= ord(dostime[0]) 275 | sec = t & 0b0000000000011111 276 | sec *= 2 277 | minute = (t & 0b0000011111100000) >> 5 278 | hour = (t & 0b1111100000000000) >> 11 279 | 280 | return datetime.datetime(year, month, day, hour, minute, sec) 281 | except: 282 | return datetime.datetime.min 283 | 284 | 285 | def parse_filetime(qword): 286 | # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/ 287 | return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600) 288 | 289 | 290 | class BinaryParserException(Exception): 291 | """ 292 | Base Exception class for binary parsing. 293 | """ 294 | def __init__(self, value): 295 | """ 296 | Constructor. 297 | Arguments: 298 | - `value`: A string description. 299 | """ 300 | super(BinaryParserException, self).__init__() 301 | self._value = value 302 | 303 | def __repr__(self): 304 | return "BinaryParserException(%r)" % (self._value) 305 | 306 | def __str__(self): 307 | return "Binary Parser Exception: %s" % (self._value) 308 | 309 | 310 | class ParseException(BinaryParserException): 311 | """ 312 | An exception to be thrown during binary parsing, such as 313 | when an invalid header is encountered. 314 | """ 315 | def __init__(self, value): 316 | """ 317 | Constructor. 318 | Arguments: 319 | - `value`: A string description. 320 | """ 321 | super(ParseException, self).__init__(value) 322 | 323 | def __repr__(self): 324 | return "ParseException(%r)" % (self._value) 325 | 326 | def __str__(self): 327 | return "Parse Exception(%s)" % (self._value) 328 | 329 | 330 | class OverrunBufferException(ParseException): 331 | def __init__(self, readOffs, bufLen): 332 | tvalue = "read: %s, buffer length: %s" % (hex(readOffs), hex(bufLen)) 333 | super(ParseException, self).__init__(tvalue) 334 | 335 | def __repr__(self): 336 | return "OverrunBufferException(%r)" % (self._value) 337 | 338 | def __str__(self): 339 | return "Tried to parse beyond the end of the file (%s)" % \ 340 | (self._value) 341 | 342 | 343 | def read_byte(buf, offset): 344 | """ 345 | Returns a little-endian unsigned byte from the relative offset of the given buffer. 346 | Arguments: 347 | - `buf`: The buffer from which to read the value. 348 | - `offset`: The relative offset from the start of the block. 349 | Throws: 350 | - `OverrunBufferException` 351 | """ 352 | try: 353 | return unpack_from(" 1: 438 | raise "Cannot specify both `length` and `count`." 439 | 440 | if offset is None: 441 | offset = self._implicit_offset 442 | 443 | basic_sizes = { 444 | "byte": 1, 445 | "int8": 1, 446 | "word": 2, 447 | "word_be": 2, 448 | "int16": 2, 449 | "dword": 4, 450 | "dword_be": 4, 451 | "int32": 4, 452 | "qword": 8, 453 | "int64": 8, 454 | "float": 4, 455 | "double": 8, 456 | "dosdate": 4, 457 | "filetime": 8, 458 | "systemtime": 8, 459 | "guid": 16, 460 | } 461 | 462 | handler = None 463 | 464 | if isinstance(type_, type): 465 | if not issubclass(type_, Nestable): 466 | raise TypeError("Invalid nested structure") 467 | 468 | typename = type_.__name__ 469 | 470 | if count == 0: 471 | def no_class_handler(): 472 | return 473 | handler = no_class_handler 474 | elif is_generator: 475 | def many_class_handler(): 476 | ofs = offset 477 | for _ in range(count): 478 | r = type_(self._buf, self.absolute_offset(ofs), self) 479 | ofs += len(r) 480 | yield r 481 | handler = many_class_handler 482 | 483 | if hasattr(type_, "structure_size"): 484 | ofs = offset 485 | for _ in range(count): 486 | ofs += type_.structure_size(self._buf, self.absolute_offset(ofs), self) 487 | self._implicit_offset = ofs 488 | else: 489 | ofs = offset 490 | for _ in range(count): 491 | r = type_(self._buf, self.absolute_offset(ofs), self) 492 | ofs += len(r) 493 | self._implicit_offset = ofs 494 | else: 495 | # TODO(wb): this needs to cache/memoize 496 | def class_handler(): 497 | return type_(self._buf, self.absolute_offset(offset), self) 498 | handler = class_handler 499 | 500 | if hasattr(type_, "structure_size"): 501 | size = type_.structure_size(self._buf, self.absolute_offset(offset), self) 502 | self._implicit_offset = offset + size 503 | else: 504 | temp = type_(self._buf, self.absolute_offset(offset), self) 505 | 506 | self._implicit_offset = offset + len(temp) 507 | elif isinstance(type_, basestring): 508 | typename = type_ 509 | 510 | if count == 0: 511 | def no_basic_handler(): 512 | return 513 | handler = no_basic_handler 514 | elif is_generator: 515 | # length must be in basic_sizes 516 | def many_basic_handler(): 517 | ofs = offset 518 | f = getattr(self, "unpack_" + type_) 519 | for _ in range(count): 520 | yield f(ofs) 521 | ofs += basic_sizes[type_] 522 | handler = many_basic_handler 523 | 524 | self._implicit_offset = offset + count * basic_sizes[type_] 525 | else: 526 | if length is None: 527 | def basic_no_length_handler(): 528 | f = getattr(self, "unpack_" + type_) 529 | return f(offset) 530 | handler = basic_no_length_handler 531 | 532 | if type_ in basic_sizes: 533 | self._implicit_offset = offset + basic_sizes[type_] 534 | elif type_ == "binary": 535 | self._implicit_offset = offset + length 536 | elif type_ == "string" and length is not None: 537 | self._implicit_offset = offset + length 538 | elif type_ == "wstring" and length is not None: 539 | self._implicit_offset = offset + (2 * length) 540 | elif "string" in type_ and length is None: 541 | raise ParseException("Implicit offset not supported for dynamic length strings") 542 | else: 543 | raise ParseException("Implicit offset not supported for type: " + type_) 544 | else: 545 | def basic_length_handler(): 546 | f = getattr(self, "unpack_" + type_) 547 | return f(offset, length) 548 | handler = basic_length_handler 549 | 550 | if type_ == "wstring": 551 | self._implicit_offset = offset + (2 * length) 552 | else: 553 | self._implicit_offset = offset + length 554 | 555 | setattr(self, name, handler) 556 | setattr(self, "_off_" + name, offset) 557 | self.add_explicit_field(offset, typename, name, length, count) 558 | 559 | def add_explicit_field(self, offset, typename, name, length=None, count=1): 560 | """ 561 | The `Block` class tracks the fields that have been added so that you can 562 | pretty print the structure. If there are other fields a subclass 563 | parses, use `add_explicit_field` to include them in the pretty printing. 564 | @type offset: int 565 | @param offset: The offset at which the field begins. 566 | @type typename: str or Block subclass 567 | @param typename: The type of the value of the field. 568 | @type name: str 569 | @param name: The name of the field. 570 | @type length: int 571 | @param length: An explicit length for the field. 572 | @type count: int 573 | @param count: The number of repetitions for the field. 574 | @rtype: None 575 | @return: None 576 | """ 577 | if type(typename) == type: 578 | typename = typename.__name__ 579 | self._declared_fields.append({ 580 | "offset": offset, 581 | "type": typename, 582 | "name": name, 583 | "length": length, 584 | "count": count, 585 | }) 586 | 587 | def get_all_string(self, indent=0): 588 | """ 589 | Get a nicely formatted, nested string of the contents of this structure 590 | and any sub-structures. If a sub-structure has a method `.string()`, then 591 | this method will use it to represent its value. 592 | Implementation note, can't look for `__str__`, because everything has this. 593 | @type indent: int 594 | @param indent: The level of nesting this objects has. 595 | @rtype: str 596 | @return A nicely formatted string that describes this structure. 597 | """ 598 | ret = "" 599 | for field in self._declared_fields: 600 | v = getattr(self, field["name"])() 601 | if isinstance(v, Block): 602 | if hasattr(v, "string"): 603 | ret += "%s%s (%s)%s\t%s\n" % \ 604 | (" " * indent, hex(field["offset"]), field["type"], 605 | field["name"], v.string()) 606 | else: 607 | ret += "%s%s (%s)%s\n" % \ 608 | (" " * indent, hex(field["offset"]), field["type"], 609 | field["name"]) 610 | ret += v.get_all_string(indent + 1) 611 | elif isinstance(v, types.GeneratorType): 612 | ret += "%s%s (%s[])%s\n" % (" " * indent, hex(field["offset"]), field["type"], field["name"],) 613 | for i, j in enumerate(v): 614 | ret += "%s[%d] (%s) " % (" " * (indent + 1), i, field["type"]) 615 | if hasattr(j, "get_all_string"): 616 | ret += "\n" + j.get_all_string(indent + 2) 617 | else: 618 | ret += str(j) + "\n" 619 | else: 620 | if isinstance(v, int): 621 | v = hex(v) 622 | ret += "%s%s (%s)%s\t%s\n" % \ 623 | (" " * indent, hex(field["offset"]), field["type"], 624 | field["name"], str(v)) 625 | return ret 626 | 627 | def current_field_offset(self): 628 | return self._implicit_offset 629 | 630 | def unpack_byte(self, offset): 631 | """ 632 | Returns a little-endian unsigned byte from the relative offset. 633 | Arguments: 634 | - `offset`: The relative offset from the start of the block. 635 | Throws: 636 | - `OverrunBufferException` 637 | """ 638 | return read_byte(self._buf, self._offset + offset) 639 | 640 | def unpack_int8(self, offset): 641 | """ 642 | Returns a little-endian signed byte from the relative offset. 643 | Arguments: 644 | - `offset`: The relative offset from the start of the block. 645 | Throws: 646 | - `OverrunBufferException` 647 | """ 648 | o = self._offset + offset 649 | try: 650 | return unpack_from("H", self._buf, o)[0] 677 | except struct.error: 678 | raise OverrunBufferException(o, len(self._buf)) 679 | 680 | def unpack_int16(self, offset): 681 | """ 682 | Returns a little-endian signed WORD (2 bytes) from the 683 | relative offset. 684 | Arguments: 685 | - `offset`: The relative offset from the start of the block. 686 | Throws: 687 | - `OverrunBufferException` 688 | """ 689 | o = self._offset + offset 690 | try: 691 | return unpack_from("I", self._buf, o)[0] 727 | except struct.error: 728 | raise OverrunBufferException(o, len(self._buf)) 729 | 730 | def unpack_int32(self, offset): 731 | """ 732 | Returns a little-endian signed integer (4 bytes) from the 733 | relative offset. 734 | Arguments: 735 | - `offset`: The relative offset from the start of the block. 736 | Throws: 737 | - `OverrunBufferException` 738 | """ 739 | o = self._offset + offset 740 | try: 741 | return unpack_from(" self._capacity: 115 | return self._q.pop() 116 | 117 | def pop(self): 118 | return self._q.pop() 119 | 120 | def touch(self, v): 121 | self._q.touch(v) 122 | 123 | def size(self): 124 | return len(self._q) 125 | 126 | def __len__(self): 127 | return self.size() 128 | 129 | @staticmethod 130 | def test(): 131 | q = BoundedLRUQueue(5) 132 | assert q.size() == 0 133 | assert len(q) == 0 134 | 135 | q.push(0) 136 | assert q.size() == 1 137 | assert len(q) == 1 138 | 139 | assert q.pop() == 0 140 | assert q.size() == 0 141 | assert len(q) == 0 142 | 143 | q.push(0) 144 | q.push(1) 145 | assert q.pop() == 0 146 | assert q.pop() == 1 147 | 148 | q.push(0) 149 | q.push(1) 150 | q.touch(0) 151 | assert q.pop() == 1 152 | assert q.pop() == 0 153 | 154 | q = BoundedLRUQueue(5, key=lambda n: n[0]) 155 | q.push([0]) 156 | assert q.pop() == [0] 157 | 158 | q.push([0]) 159 | q.push([1]) 160 | assert q.pop() == [0] 161 | assert q.pop() == [1] 162 | 163 | q = BoundedLRUQueue(2) 164 | assert q.push(0) is None 165 | assert q.push(1) is None 166 | assert q.push(2) == 0 167 | assert q.pop() == 1 168 | assert q.pop() == 2 169 | return True 170 | 171 | 172 | class RangeCache(object): 173 | """ 174 | RangeCache is a data structure that tracks a finite set of 175 | ranges (a range is a 2-tuple consisting of a numeric start 176 | and numeric length). New ranges can be added via the `push` 177 | method, and if such a call causes the capacity to be exceeded, 178 | then the "oldest" range is removed. The `get` method implements 179 | an efficient lookup for a single value that may be found within 180 | one of the ranges. 181 | """ 182 | def __init__(self, capacity, 183 | start_key=lambda o: o[0], 184 | length_key=lambda o: o[1]): 185 | """ 186 | @param key: A function that fetches the range start from an item. 187 | """ 188 | super(RangeCache, self).__init__() 189 | self._ranges = SortedCollection(key=start_key) 190 | self._lru = BoundedLRUQueue(capacity, key=start_key) 191 | self._start_key = start_key 192 | self._length_key = length_key 193 | 194 | def push(self, o): 195 | """ 196 | Add a range to the cache. 197 | 198 | If `key` is not provided to the constructor, then 199 | `o` should be a 3-tuple: 200 | - range start (numeric) 201 | - range length (numeric) 202 | - range item (object) 203 | """ 204 | self._ranges.insert(o) 205 | popped = self._lru.push(o) 206 | if popped is not None: 207 | self._ranges.remove(popped) 208 | 209 | def touch(self, o): 210 | self._lru.touch(o) 211 | 212 | def get(self, value): 213 | """ 214 | Search for the numeric `value` within the ranges 215 | tracked by this cache. 216 | @raise ValueError: if the value is not found in the range cache. 217 | """ 218 | hit = self._ranges.find_le(value) 219 | if value < self._start_key(hit) + self._length_key(hit): 220 | return hit 221 | raise ValueError("%s not found in range cache" % value) 222 | 223 | @staticmethod 224 | def test(): 225 | q = RangeCache(2) 226 | 227 | x = None 228 | try: x = q.get(0) 229 | except ValueError: pass 230 | assert x is None 231 | 232 | x = None 233 | try: x = q.get(1) 234 | except ValueError: pass 235 | assert x is None 236 | 237 | q.push((1, 1, [0])) 238 | 239 | x = None 240 | try: x = q.get(0) 241 | except ValueError: pass 242 | assert x is None 243 | 244 | assert q.get(1) == (1, 1, [0]) 245 | assert q.get(1.99) == (1, 1, [0]) 246 | x = None 247 | try: x = q.get(2.01) 248 | except ValueError: pass 249 | assert x is None 250 | 251 | q.push((3, 1, [1])) 252 | assert q.get(1) == (1, 1, [0]) 253 | assert q.get(3) == (3, 1, [1]) 254 | 255 | q.push((5, 1, [2])) 256 | x = None 257 | try: x = q.get(1) 258 | except ValueError: pass 259 | assert x is None 260 | 261 | assert q.get(3) == (3, 1, [1]) 262 | assert q.get(5) == (5, 1, [2]) 263 | 264 | q.touch((3, 1, [1])) 265 | q.push((7, 1, [3])) 266 | 267 | assert q.get(3) == (3, 1, [1]) 268 | assert q.get(7) == (7, 1, [3]) 269 | x = None 270 | try: x = q.get(5) 271 | except ValueError: pass 272 | assert x is None 273 | 274 | return True 275 | 276 | 277 | class FileMap(object): 278 | """ 279 | FileMap is a wrapper for a file-like object that satisfies the 280 | buffer interface. This is essentially the inverse of StringIO. 281 | It implements a caching layer over the calls to the OS seek/read 282 | functions for improved performance. 283 | 284 | Q: Why might you want this over mmap? 285 | A: 1) Its pure Python 286 | 2) You can stack this over any Python file-like objects. 287 | eg. FileMap over ZipFile gives you a random access buffer 288 | thats backed by a compressed image on the file system. 289 | """ 290 | __unpackable__ = True 291 | def __init__(self, filelike, block_size=MEGABYTE, 292 | cache_size=10, size=None): 293 | """ 294 | If `size` is not provided, then `filelike` must have the 295 | `seek` and `tell` methods implemented. 296 | """ 297 | super(FileMap, self).__init__() 298 | if size is None: 299 | import os 300 | filelike.seek(0, os.SEEK_END) 301 | size = filelike.tell() 302 | self._f = filelike 303 | self._block_size = block_size 304 | self._size = size 305 | self._block_cache = RangeCache(cache_size) 306 | 307 | def __getitem__(self, index): 308 | if index < 0: 309 | index = self._size + index 310 | block_index = index % self._block_size 311 | block_start = index - block_index 312 | 313 | try: 314 | hit = self._block_cache.get(index) 315 | buf = hit[2] 316 | self._block_cache.touch(hit) 317 | return buf[block_index] 318 | except ValueError: 319 | self._f.seek(block_start) 320 | buf = self._f.read(self._block_size) 321 | self._block_cache.push((block_start, self._block_size, buf)) 322 | return buf[block_index] 323 | 324 | def _get_containing_block(self, index): 325 | """ 326 | Given an index, return block-aligned block that contains it, 327 | updating the appropriate caches. 328 | """ 329 | block_index = index % self._block_size 330 | block_start = index - block_index 331 | 332 | try: 333 | hit = self._block_cache.get(block_start) 334 | buf = hit[2] 335 | self._block_cache.touch(hit) 336 | return buf 337 | except ValueError: 338 | self._f.seek(block_start) 339 | buf = self._f.read(self._block_size) 340 | self._block_cache.push((block_start, self._block_size, buf)) 341 | return buf 342 | 343 | def __getslice__(self, start, end): 344 | if end == sys.maxint: 345 | end = self._size 346 | 347 | start_block_index = start % self._block_size 348 | start_block_start = start - start_block_index 349 | 350 | end_block_index = end % self._block_size 351 | end_block_start = end - end_block_index 352 | 353 | if start_block_start == end_block_start: 354 | # easy case, everything falls within the same block 355 | buf = self._get_containing_block(start) 356 | return buf[start_block_index:end_block_index] 357 | else: 358 | # hard case, slice goes over one or more block boundaries 359 | ret = "" 360 | 361 | # phase 1, start to block boundary 362 | buf = self._get_containing_block(start_block_start) 363 | s = start_block_index 364 | e = start_block_start + self._block_size 365 | ret += buf[s:e] 366 | 367 | # phase 2, any complete blocks 368 | cur_block_start = start_block_start + self._block_size 369 | while cur_block_start + self._block_size < end_block_start: 370 | buf = self._get_containing_block(cur_block_start) 371 | ret += buf 372 | cur_block_start += self._block_size 373 | 374 | # phase 3, block boundary to end 375 | buf = self._get_containing_block(cur_block_start) 376 | s = 0 377 | e = end_block_index or self._block_size 378 | ret += buf[0:e] 379 | return ret 380 | 381 | def __len__(self): 382 | return self._size 383 | 384 | @staticmethod 385 | def test(): 386 | from cStringIO import StringIO 387 | f = StringIO("0123abcd4567efgh") 388 | buf = FileMap(f, block_size=4, cache_size=2) 389 | 390 | assert len(buf) == 16 391 | 392 | assert buf[0] == "0" 393 | assert buf[1] == "1" 394 | assert buf[0:2] == "01" 395 | 396 | assert buf[4] == "a" 397 | assert buf[5] == "b" 398 | assert buf[4:6] == "ab" 399 | 400 | assert buf[2:6] == "23ab" 401 | assert buf[0:8] == "0123abcd" 402 | 403 | assert buf[0:12] == "0123abcd4567" 404 | assert buf[0:16] == "0123abcd4567efgh" 405 | assert buf[:] == "0123abcd4567efgh" 406 | 407 | assert buf[-1] == "h" 408 | assert buf[-2:] == "gh" 409 | assert buf[-4:] == "efgh" 410 | assert buf[-8:] == "4567efgh" 411 | 412 | return True 413 | 414 | 415 | def unpack_from(fmt, buffer, off=0): 416 | """ 417 | Shim struct.unpack_from and divert unpacking of FileMaps. 418 | 419 | Otherwise, you'd get an exception like: 420 | TypeError: unpack_from() argument 1 must be convertible to a buffer, not FileMap 421 | 422 | So, we extract a true sub-buffer from the FileMap, and feed this 423 | back into the old unpack function. 424 | Theres an extra allocation and copy, but there's no getting 425 | around that. 426 | """ 427 | if not isinstance(buffer, FileMap): 428 | return old_unpack_from(fmt, buffer, off) 429 | size = calcsize(fmt) 430 | buf = buffer[off:off + size] 431 | return old_unpack_from(fmt, buf, 0x0) 432 | 433 | 434 | def unpack(fmt, string): 435 | """ 436 | Like the shimmed unpack_from, but for struct.unpack. 437 | """ 438 | if not isinstance(buffer, FileMap): 439 | return old_unpack(fmt, string) 440 | size = calcsize(fmt) 441 | buf = string[:size] 442 | return old_unpack(fmt, buf, 0x0) 443 | 444 | 445 | def struct_test(): 446 | from cStringIO import StringIO 447 | f = StringIO("\x04\x03\x02\x01") 448 | buf = FileMap(f) 449 | assert unpack_from(" 5 | # while at Mandiant 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | 20 | class Progress(object): 21 | """ 22 | An interface to things that track the progress of a long running task. 23 | """ 24 | def __init__(self, max_): 25 | super(Progress, self).__init__() 26 | self._max = max_ 27 | self._current = 0 28 | 29 | def set_current(self, current): 30 | """ 31 | Set the number of steps that this task has completed. 32 | 33 | @type current: int 34 | """ 35 | self._current = current 36 | 37 | def set_complete(self): 38 | """ 39 | Convenience method to set the task as having completed all steps. 40 | """ 41 | self._current = self._max 42 | 43 | 44 | class NullProgress(Progress): 45 | """ 46 | A Progress class that ignores any updates. 47 | """ 48 | def __init__(self, max_): 49 | super(NullProgress, self).__init__(max_) 50 | 51 | def set_current(self, current): 52 | pass 53 | 54 | 55 | class ProgressBarProgress(Progress): 56 | def __init__(self, max_): 57 | from progressbar import Bar 58 | from progressbar import ETA 59 | from progressbar import ProgressBar 60 | super(ProgressBarProgress, self).__init__(max_) 61 | 62 | widgets = ["Progress: ", 63 | Bar(marker="=", left="[", right="]"), " ", 64 | ETA(), " ", ] 65 | self._pbar = ProgressBar(widgets=widgets, maxval=self._max) 66 | self._has_notified_started = False 67 | 68 | def set_current(self, current): 69 | if not self._has_notified_started: 70 | self._pbar.start() 71 | self._has_notified_started = True 72 | 73 | self._pbar.update(current) 74 | 75 | def set_complete(self): 76 | self._pbar.finish() 77 | -------------------------------------------------------------------------------- /ntfs/SortedCollection.py: -------------------------------------------------------------------------------- 1 | """ 2 | From http://code.activestate.com/recipes/577197-sortedcollection/ 3 | """ 4 | from bisect import bisect_left, bisect_right 5 | 6 | class SortedCollection(object): 7 | '''Sequence sorted by a key function. 8 | 9 | SortedCollection() is much easier to work with than using bisect() directly. 10 | It supports key functions like those use in sorted(), min(), and max(). 11 | The result of the key function call is saved so that keys can be searched 12 | efficiently. 13 | 14 | Instead of returning an insertion-point which can be hard to interpret, the 15 | five find-methods return a specific item in the sequence. They can scan for 16 | exact matches, the last item less-than-or-equal to a key, or the first item 17 | greater-than-or-equal to a key. 18 | 19 | Once found, an item's ordinal position can be located with the index() method. 20 | New items can be added with the insert() and insert_right() methods. 21 | Old items can be deleted with the remove() method. 22 | 23 | The usual sequence methods are provided to support indexing, slicing, 24 | length lookup, clearing, copying, forward and reverse iteration, contains 25 | checking, item counts, item removal, and a nice looking repr. 26 | 27 | Finding and indexing are O(log n) operations while iteration and insertion 28 | are O(n). The initial sort is O(n log n). 29 | 30 | The key function is stored in the 'key' attibute for easy introspection or 31 | so that you can assign a new key function (triggering an automatic re-sort). 32 | 33 | In short, the class was designed to handle all of the common use cases for 34 | bisect but with a simpler API and support for key functions. 35 | 36 | >>> from pprint import pprint 37 | >>> from operator import itemgetter 38 | 39 | >>> s = SortedCollection(key=itemgetter(2)) 40 | >>> for record in [ 41 | ... ('roger', 'young', 30), 42 | ... ('angela', 'jones', 28), 43 | ... ('bill', 'smith', 22), 44 | ... ('david', 'thomas', 32)]: 45 | ... s.insert(record) 46 | 47 | >>> pprint(list(s)) # show records sorted by age 48 | [('bill', 'smith', 22), 49 | ('angela', 'jones', 28), 50 | ('roger', 'young', 30), 51 | ('david', 'thomas', 32)] 52 | 53 | >>> s.find_le(29) # find oldest person aged 29 or younger 54 | ('angela', 'jones', 28) 55 | >>> s.find_lt(28) # find oldest person under 28 56 | ('bill', 'smith', 22) 57 | >>> s.find_gt(28) # find youngest person over 28 58 | ('roger', 'young', 30) 59 | 60 | >>> r = s.find_ge(32) # find youngest person aged 32 or older 61 | >>> s.index(r) # get the index of their record 62 | 3 63 | >>> s[3] # fetch the record at that index 64 | ('david', 'thomas', 32) 65 | 66 | >>> s.key = itemgetter(0) # now sort by first name 67 | >>> pprint(list(s)) 68 | [('angela', 'jones', 28), 69 | ('bill', 'smith', 22), 70 | ('david', 'thomas', 32), 71 | ('roger', 'young', 30)] 72 | 73 | ''' 74 | 75 | def __init__(self, iterable=(), key=None): 76 | self._given_key = key 77 | key = (lambda x: x) if key is None else key 78 | decorated = sorted((key(item), item) for item in iterable) 79 | self._keys = [k for k, item in decorated] 80 | self._items = [item for k, item in decorated] 81 | self._key = key 82 | 83 | def _getkey(self): 84 | return self._key 85 | 86 | def _setkey(self, key): 87 | if key is not self._key: 88 | self.__init__(self._items, key=key) 89 | 90 | def _delkey(self): 91 | self._setkey(None) 92 | 93 | key = property(_getkey, _setkey, _delkey, 'key function') 94 | 95 | def clear(self): 96 | self.__init__([], self._key) 97 | 98 | def copy(self): 99 | return self.__class__(self, self._key) 100 | 101 | def __len__(self): 102 | return len(self._items) 103 | 104 | def __getitem__(self, i): 105 | return self._items[i] 106 | 107 | def __iter__(self): 108 | return iter(self._items) 109 | 110 | def __reversed__(self): 111 | return reversed(self._items) 112 | 113 | def __repr__(self): 114 | return '%s(%r, key=%s)' % ( 115 | self.__class__.__name__, 116 | self._items, 117 | getattr(self._given_key, '__name__', repr(self._given_key)) 118 | ) 119 | 120 | def __reduce__(self): 121 | return self.__class__, (self._items, self._given_key) 122 | 123 | def __contains__(self, item): 124 | k = self._key(item) 125 | i = bisect_left(self._keys, k) 126 | j = bisect_right(self._keys, k) 127 | return item in self._items[i:j] 128 | 129 | def index(self, item): 130 | 'Find the position of an item. Raise ValueError if not found.' 131 | k = self._key(item) 132 | i = bisect_left(self._keys, k) 133 | j = bisect_right(self._keys, k) 134 | return self._items[i:j].index(item) + i 135 | 136 | def count(self, item): 137 | 'Return number of occurrences of item' 138 | k = self._key(item) 139 | i = bisect_left(self._keys, k) 140 | j = bisect_right(self._keys, k) 141 | return self._items[i:j].count(item) 142 | 143 | def insert(self, item): 144 | 'Insert a new item. If equal keys are found, add to the left' 145 | k = self._key(item) 146 | i = bisect_left(self._keys, k) 147 | self._keys.insert(i, k) 148 | self._items.insert(i, item) 149 | 150 | def insert_right(self, item): 151 | 'Insert a new item. If equal keys are found, add to the right' 152 | k = self._key(item) 153 | i = bisect_right(self._keys, k) 154 | self._keys.insert(i, k) 155 | self._items.insert(i, item) 156 | 157 | def remove(self, item): 158 | 'Remove first occurence of item. Raise ValueError if not found' 159 | i = self.index(item) 160 | del self._keys[i] 161 | del self._items[i] 162 | 163 | def find(self, k): 164 | 'Return first item with a key == k. Raise ValueError if not found.' 165 | i = bisect_left(self._keys, k) 166 | if i != len(self) and self._keys[i] == k: 167 | return self._items[i] 168 | raise ValueError('No item found with key equal to: %r' % (k,)) 169 | 170 | def find_le(self, k): 171 | 'Return last item with a key <= k. Raise ValueError if not found.' 172 | i = bisect_right(self._keys, k) 173 | if i: 174 | return self._items[i-1] 175 | raise ValueError('No item found with key at or below: %r' % (k,)) 176 | 177 | def find_lt(self, k): 178 | 'Return last item with a key < k. Raise ValueError if not found.' 179 | i = bisect_left(self._keys, k) 180 | if i: 181 | return self._items[i-1] 182 | raise ValueError('No item found with key below: %r' % (k,)) 183 | 184 | def find_ge(self, k): 185 | 'Return first item with a key >= equal to k. Raise ValueError if not found' 186 | i = bisect_left(self._keys, k) 187 | if i != len(self): 188 | return self._items[i] 189 | raise ValueError('No item found with key at or above: %r' % (k,)) 190 | 191 | def find_gt(self, k): 192 | 'Return first item with a key > k. Raise ValueError if not found' 193 | i = bisect_right(self._keys, k) 194 | if i != len(self): 195 | return self._items[i] 196 | raise ValueError('No item found with key above: %r' % (k,)) 197 | -------------------------------------------------------------------------------- /ntfs/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = 0.1 2 | 3 | __all__ = [ 4 | "secure", 5 | "mft", 6 | "logfile", 7 | "usnjrnl", 8 | ] 9 | -------------------------------------------------------------------------------- /ntfs/filesystem/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import logging 3 | 4 | from ntfs.BinaryParser import Block 5 | from ntfs.BinaryParser import OverrunBufferException 6 | from ntfs.mft.MFT import InvalidRecordException 7 | from ntfs.mft.MFT import MREF 8 | from ntfs.mft.MFT import MSEQNO 9 | from ntfs.mft.MFT import MFTRecord 10 | from ntfs.mft.MFT import ATTR_TYPE 11 | from ntfs.mft.MFT import INDEX_ROOT 12 | from ntfs.mft.MFT import MFTEnumerator 13 | from ntfs.mft.MFT import MFT_RECORD_SIZE 14 | from ntfs.mft.MFT import INDEX_ALLOCATION 15 | from ntfs.mft.MFT import AttributeNotFoundError 16 | 17 | 18 | g_logger = logging.getLogger("ntfs.filesystem") 19 | 20 | 21 | class FileSystemError(Exception): 22 | def __init__(self, msg="no details"): 23 | super(FileSystemError, self).__init__(self) 24 | self._msg = msg 25 | 26 | def __str__(self): 27 | return "%s(%s)" % (self.__class__.__name__, self._msg) 28 | 29 | 30 | class CorruptNTFSFilesystemError(FileSystemError): 31 | pass 32 | 33 | 34 | class NoParentError(FileSystemError): 35 | pass 36 | 37 | 38 | class UnsupportedPathError(FileSystemError): 39 | pass 40 | 41 | 42 | class File(object): 43 | """ 44 | interface 45 | """ 46 | def get_name(self): 47 | raise NotImplementedError() 48 | 49 | def get_parent_directory(self): 50 | """ 51 | @raise NoParentError: 52 | """ 53 | raise NotImplementedError() 54 | 55 | def read(self, offset, length): 56 | raise NotImplementedError() 57 | 58 | def get_full_path(self): 59 | raise NotImplementedError() 60 | 61 | 62 | class NTFSFileMetadataMixin(object): 63 | def __init__(self, record): 64 | self._record = record 65 | 66 | def get_filenames(self): 67 | ret = [] 68 | for fn in self._record.filename_informations(): 69 | ret.append(fn.filename()) 70 | return ret 71 | 72 | def get_si_created_timestamp(self): 73 | return self._record.standard_information().created_time() 74 | 75 | def get_si_accessed_timestamp(self): 76 | return self._record.standard_information().accessed_time() 77 | 78 | def get_si_changed_timestamp(self): 79 | return self._record.standard_information().changed_time() 80 | 81 | def get_si_modified_timestamp(self): 82 | return self._record.standard_information().modified_time() 83 | 84 | def get_fn_created_timestamp(self): 85 | return self._record.filename_information().created_time() 86 | 87 | def get_fn_accessed_timestamp(self): 88 | return self._record.filename_information().accessed_time() 89 | 90 | def get_fn_changed_timestamp(self): 91 | return self._record.filename_information().changed_time() 92 | 93 | def get_fn_modified_timestamp(self): 94 | return self._record.filename_information().modified_time() 95 | 96 | def is_file(self): 97 | return self._record.is_file() 98 | 99 | def is_directory(self): 100 | return self._record.is_directory() 101 | 102 | def get_size(self): 103 | if self.is_directory(): 104 | return 0 105 | else: 106 | data_attribute = self._record.data_attribute() 107 | if data_attribute is not None: 108 | if data_attribute.non_resident() == 0: 109 | size = len(data_attribute.value()) 110 | else: 111 | size = data_attribute.data_size() 112 | else: 113 | size = self._record.filename_information().logical_size() 114 | return size 115 | 116 | 117 | class NTFSFile(File, NTFSFileMetadataMixin): 118 | def __init__(self, filesystem, mft_record): 119 | File.__init__(self) 120 | NTFSFileMetadataMixin.__init__(self, mft_record) 121 | self._fs = filesystem 122 | self._record = mft_record 123 | 124 | def get_name(self): 125 | return self._record.filename_information().filename() 126 | 127 | def get_parent_directory(self): 128 | return self._fs.get_record_parent(self._record) 129 | 130 | def __str__(self): 131 | return "File(name: %s)" % (self.get_name()) 132 | 133 | def read(self, offset, length): 134 | data_attribute = self._record.data_attribute() 135 | data = self._fs.get_attribute_data(data_attribute) 136 | return data[offset:offset+length] 137 | 138 | def get_full_path(self): 139 | return self._fs.get_record_path(self._record) 140 | 141 | 142 | class ChildNotFoundError(Exception): 143 | pass 144 | 145 | 146 | class Directory(object): 147 | """ 148 | interface 149 | """ 150 | def get_name(self): 151 | raise NotImplementedError() 152 | 153 | def get_children(self): 154 | raise NotImplementedError() 155 | 156 | def get_files(self): 157 | raise NotImplementedError() 158 | 159 | def get_directories(self): 160 | raise NotImplementedError() 161 | 162 | def get_parent_directory(self): 163 | """ 164 | @raise NoParentError: 165 | """ 166 | raise NotImplementedError() 167 | 168 | def get_child(self, name): 169 | """ 170 | @raise ChildNotFoundError: if the given filename is not found. 171 | """ 172 | raise NotImplementedError() 173 | 174 | def get_full_path(self): 175 | raise NotImplementedError() 176 | 177 | 178 | class PathDoesNotExistError(Exception): 179 | pass 180 | 181 | 182 | class DirectoryDoesNotExistError(PathDoesNotExistError): 183 | pass 184 | 185 | 186 | class NTFSDirectory(Directory, NTFSFileMetadataMixin): 187 | def __init__(self, filesystem, mft_record): 188 | Directory.__init__(self) 189 | NTFSFileMetadataMixin.__init__(self, mft_record) 190 | self._fs = filesystem 191 | self._record = mft_record 192 | 193 | def get_name(self): 194 | return self._record.filename_information().filename() 195 | 196 | def get_children(self): 197 | ret = [] 198 | for child in self._fs.get_record_children(self._record): 199 | if child.is_directory(): 200 | ret.append(NTFSDirectory(self._fs, child)) 201 | else: 202 | ret.append(NTFSFile(self._fs, child)) 203 | return ret 204 | 205 | def get_files(self): 206 | return filter(lambda c: isinstance(c, NTFSFile), 207 | self.get_children()) 208 | 209 | def get_directories(self): 210 | return filter(lambda c: isinstance(c, NTFSDirectory), 211 | self.get_children()) 212 | 213 | def get_parent_directory(self): 214 | return self._fs.get_record_parent(self._record) 215 | 216 | def __str__(self): 217 | return "Directory(name: %s)" % (self.get_name()) 218 | 219 | def get_child(self, name): 220 | name_lower = name.lower() 221 | for child in self.get_children(): 222 | if len(child.get_filenames()) > 1: 223 | g_logger.debug("file names: %s -> %s", 224 | child.get_name(), child.get_filenames()) 225 | for fn in child.get_filenames(): 226 | if name_lower == fn.lower(): 227 | return child 228 | raise ChildNotFoundError() 229 | 230 | def _split_path(self, path): 231 | """ 232 | Hack to try to support both types of file system paths: 233 | - forward slash, /etc 234 | - backslash, C:\windows\system32 235 | 236 | Linux uses forward slashes, so we'd like that when working with FUSE. 237 | The original file system used backslashes, so we'd also like that. 238 | 239 | This is a poor attempt at doing both: 240 | - detect which slash type is in use 241 | - don't support both at the same time 242 | 243 | This works like string.partition(PATH_SEPARATOR) 244 | """ 245 | if "\\" in path: 246 | if "/" in path: 247 | raise UnsupportedPathError(path) 248 | return path.partition("\\") 249 | 250 | elif "/" in path: 251 | if "\\" in path: 252 | raise UnsupportedPathError(path) 253 | return path.partition("/") 254 | else: 255 | return path, "", "" 256 | 257 | def get_path_entry(self, path): 258 | g_logger.debug("get_path_entry: path: %s", path) 259 | imm, slash, rest = self._split_path(path) 260 | if slash == "": 261 | return self.get_child(path) 262 | else: 263 | if rest == "": 264 | return self 265 | 266 | child = self.get_child(imm) 267 | if not isinstance(child, NTFSDirectory): 268 | raise DirectoryDoesNotExistError() 269 | 270 | return child.get_path_entry(rest) 271 | 272 | def get_full_path(self): 273 | return self._fs.get_record_path(self._record) 274 | 275 | 276 | class Filesystem(object): 277 | """ 278 | interface 279 | """ 280 | def get_root_directory(self): 281 | raise NotImplementedError() 282 | 283 | 284 | class NTFSVBR(Block): 285 | """ 286 | NTFS Volume Boot Record 287 | """ 288 | def __init__(self, volume): 289 | super(NTFSVBR, self).__init__(volume, 0) 290 | # 0x0 291 | self.declare_field("byte", "jump", offset=0x0, count=3) 292 | # 0x3 OEM ID 293 | self.declare_field("qword", "oem_id") 294 | 295 | # The BIOS parameter block (BPB) 296 | # 0x0b Bytes Per Sector 297 | self.declare_field("word", "bytes_per_sector") 298 | # 0x0d Sectors Per Cluster. The number of sectors in a cluster 299 | self.declare_field("byte", "sectors_per_cluster") 300 | # Must be 0 301 | # 0x0e 302 | self.declare_field("word", "reserved_sectors") 303 | # 0x10 304 | self.declare_field("byte", "zero0", count=3) 305 | # 0x13 306 | self.declare_field("word", "unused0") 307 | # 0x15 Media Descriptor. Legacy 308 | self.declare_field("byte", "media_descriptor") 309 | # 0x16 310 | self.declare_field("word", "zero1") 311 | # 0x18 312 | self.declare_field("word", "sectors_per_track") 313 | # 0x1a 314 | self.declare_field("word", "number_of_heads") 315 | # 0x1c 316 | self.declare_field("dword", "hidden_sectors") 317 | # 0x20 Unused 318 | self.declare_field("dword", "unused1") 319 | 320 | # 0x24 Extended BPB 321 | self.declare_field("dword", "unused2") 322 | # 0x28 Total Sectors. The total number of sectors on the hard disk 323 | self.declare_field("qword", "total_sectors") 324 | # 0x30 Logical Cluster Number for the File $MFT 325 | self.declare_field("qword", "mft_lcn") 326 | # 0x38 Logical Cluster Number for the File $MFTMirr 327 | self.declare_field("qword", "mftmirr_lcn") 328 | # 0x40 Cluster Per MFT Record 329 | # The Number of Clusters for each MFT record, 330 | # which can be a negative number when the cluster size is larger 331 | # than the MFT File record 332 | # if the value is negative number, 333 | # the MFT record size in bytes equals 2**value 334 | self.declare_field("byte", "clusters_per_file_record_segment") 335 | # 0x41 Unused 336 | self.declare_field("byte", "unused3", count=3) 337 | # 0x44 Cluster Per Index Buffer.` 338 | self.declare_field("byte", "clusters_per_index_buffer") 339 | # 0x45 Unused 340 | self.declare_field("byte", "unused4", count=3) 341 | # 0x48 Volume Serial Number 342 | self.declare_field("qword", "volume_serial_number") 343 | # 0x50 Checksum. Not used by NTFS. 344 | self.declare_field("dword", "checksum") 345 | 346 | # 0x54 Bootstrap code 347 | self.declare_field("byte", "bootstrap_code", count=426) 348 | # 0x01fe End of sector 349 | self.declare_field("word", "end_of_sector") 350 | 351 | 352 | class ClusterAccessor(object): 353 | """ 354 | index volume data using `cluster_size` units 355 | """ 356 | def __init__(self, volume, cluster_size): 357 | super(ClusterAccessor, self).__init__() 358 | self._volume = volume 359 | self._cluster_size = cluster_size 360 | 361 | def __getitem__(self, index): 362 | size = self._cluster_size 363 | start, end = index * size, (index + 1) * size 364 | g_logger.debug('Get clusters %s:%s', start, end) 365 | return self._volume[start:end] 366 | 367 | def __getslice__(self, start, end): 368 | size = self._cluster_size 369 | start, end = start * size, end * size 370 | g_logger.debug('Get clusters %s:%s', start, end) 371 | return self._volume[start:end] 372 | 373 | def __len__(self): 374 | return len(self._volume) / self._cluster_size 375 | 376 | def get_cluster_size(self): 377 | return self._cluster_size 378 | 379 | 380 | INODE_MFT = 0 381 | INODE_MFTMIRR = 1 382 | INODE_LOGFILE = 2 383 | INODE_VOLUME = 3 384 | INODE_ATTR_DEF = 4 385 | INODE_ROOT = 5 386 | INODE_BITMAP = 6 387 | INODE_BOOT = 7 388 | INODE_BADCLUS = 8 389 | INODE_SECURE = 9 390 | INODE_UPCASE = 10 391 | INODE_EXTEND = 11 392 | INODE_RESERVED0 = 12 393 | INODE_RESERVED1 = 13 394 | INODE_RESERVED2 = 14 395 | INODE_RESERVED3 = 15 396 | INODE_FIRST_USER = 16 397 | 398 | 399 | class NonResidentAttributeData(object): 400 | """ 401 | expose a potentially non-continuous set of data runs as a single 402 | logical buffer 403 | 404 | once constructed, use this like a bytestring. 405 | you can unpack from it, slice it, etc. 406 | 407 | implementation note: this is likely a good place to optimize 408 | """ 409 | __unpackable__ = True 410 | def __init__(self, clusters, runlist): 411 | self._clusters = clusters 412 | self._runlist = runlist 413 | self._runentries = list(self._runlist.runs()) 414 | self._len = None 415 | 416 | def __getitem__(self, index): 417 | # TODO: clarify variable names and their units 418 | # units: bytes 419 | current_run_start_offset = 0 420 | 421 | if index < 0: 422 | index = len(self) + index 423 | 424 | clusters = self._clusters 425 | csize = clusters.get_cluster_size() 426 | 427 | # units: clusters 428 | for cluster_offset, num_clusters in self._runentries: 429 | # units: bytes 430 | run_length = num_clusters * csize 431 | right_border = current_run_start_offset + run_length 432 | 433 | # Check if the target byte in the run entry 434 | if current_run_start_offset <= index < right_border: 435 | # units: bytes 436 | target_idx = index - current_run_start_offset 437 | # The index of the cluster that contains the target byte 438 | target_cluster_idx = int(target_idx/csize) 439 | # The index of the target byte relative to the cluster 440 | byte_relative_idx = (target_idx - target_cluster_idx * csize) 441 | cluster = clusters[cluster_offset+target_cluster_idx] 442 | return cluster[byte_relative_idx] 443 | # else looking at next run entry 444 | current_run_start_offset += run_length 445 | raise IndexError("%d is greater than the non resident " 446 | "attribute data length %s", index, len(self)) 447 | 448 | def __getslice__(self, start, stop): 449 | """ 450 | 451 | :param start: start byte 452 | :param stop: stop byte 453 | :return: 454 | """ 455 | # TODO: there are some pretty bad inefficiencies here, i believe 456 | # TODO: clarify variable names and their units 457 | ret = bytearray() 458 | virt_byte_offset = 0 459 | have_found_start = False 460 | 461 | g_logger.debug("NonResidentAttributeData: getslice: " 462 | "start: %x end: %x", start, stop) 463 | _len = len(self) 464 | if stop == sys.maxint: 465 | stop = _len 466 | 467 | if stop < 0: 468 | stop = _len + stop 469 | 470 | if start < 0: 471 | start = _len + start 472 | 473 | if max(start, stop) > _len: 474 | raise IndexError("(%d, %d) is greater " 475 | "than the non resident attribute data length %s", 476 | start, stop, _len) 477 | clusters = self._clusters 478 | csize = clusters.get_cluster_size() 479 | for cluster_offset, num_clusters in self._runentries: 480 | g_logger.debug("NonResidentAttributeData: " 481 | "getslice: runentry: start: %x len: %x", 482 | cluster_offset * csize, num_clusters * csize) 483 | run_byte_len = num_clusters * csize 484 | # check if start byte in the data run 485 | virt_byte_stop = virt_byte_offset + run_byte_len 486 | is_start_in_run = (virt_byte_offset <= start < virt_byte_stop) 487 | 488 | if not have_found_start: 489 | if is_start_in_run: 490 | have_found_start = True 491 | else: 492 | virt_byte_offset += run_byte_len 493 | continue 494 | 495 | cluster_stop = cluster_offset + num_clusters 496 | _bytes = clusters[cluster_offset:cluster_stop] 497 | 498 | is_stop_in_run = stop <= virt_byte_stop 499 | # This is the situation when we have only one data run 500 | # everything is in this run 501 | if is_start_in_run and is_stop_in_run: 502 | return _bytes[start:stop] 503 | 504 | _start = _stop = None 505 | if is_start_in_run: 506 | _start = start - virt_byte_offset 507 | if is_stop_in_run: 508 | _stop = stop - virt_byte_offset 509 | # if start and stop are not in the data run, 510 | # then copy all bytes from the data run's clusters 511 | # _bytes[None:None] === _bytes[:] 512 | ret.extend(_bytes[_start:_stop]) 513 | virt_byte_offset += run_byte_len 514 | 515 | return ret 516 | 517 | def __len__(self): 518 | if self._len is not None: 519 | return self._len 520 | ret = 0 521 | for cluster_start, num_clusters in self._runentries: 522 | g_logger.debug("NonResidentAttributeData: len: run: " 523 | "cluster: %x len: %x", cluster_start, num_clusters) 524 | ret += num_clusters * self._clusters.get_cluster_size() 525 | self._len = ret 526 | return ret 527 | 528 | 529 | class NTFSFilesystem(object): 530 | def __init__(self, volume, cluster_size=None): 531 | oem_id = volume[3:7] 532 | assert oem_id == 'NTFS', 'Wrong OEM signature' 533 | 534 | super(NTFSFilesystem, self).__init__() 535 | self._volume = volume 536 | self._cluster_size = cluster_size 537 | vbr = self._vbr = NTFSVBR(volume) 538 | self._cluster_size = cluster_size = (cluster_size or 539 | vbr.bytes_per_sector() * 540 | vbr.sectors_per_cluster()) 541 | 542 | self._clusters = ClusterAccessor(volume, cluster_size) 543 | self._logger = logging.getLogger("NTFSFilesystem") 544 | 545 | # balance memory usage with performance 546 | try: 547 | b = self.get_mft_buffer() 548 | 549 | # test we can access last MFT byte, demonstrating we can 550 | # reach all runs 551 | _ = b[-1] 552 | except OverrunBufferException as e: 553 | g_logger.warning("failed to read MFT from image, will fall back to MFTMirr: %s", e) 554 | try: 555 | b = self.get_mftmirr_buffer() 556 | 557 | # test we can access last MFTMirr byte, demonstrating 558 | # we can reach all runs 559 | _ = b[-1] 560 | except OverrunBufferException as e: 561 | g_logger.error("failed to read MFTMirr from image: %s", e) 562 | raise CorruptNTFSFilesystemError("failed to read MFT or MFTMirr from image") 563 | 564 | if len(b) > 1024 * 1024 * 500: 565 | self._mft_data = b 566 | else: 567 | # note optimization: copy entire mft buffer from NonResidentNTFSAttribute 568 | # to avoid getslice lookups 569 | self._mft_data = b[:] 570 | self._enumerator = MFTEnumerator(self._mft_data) 571 | 572 | # test there's at least some user content (aside from root), or we'll 573 | # assume something's up 574 | try: 575 | _ = self.get_record(INODE_FIRST_USER) 576 | except OverrunBufferException: 577 | g_logger.error("overrun reading first user MFT record") 578 | raise CorruptNTFSFilesystemError("failed to read first user record (MFT not large enough)") 579 | 580 | def get_attribute_data(self, attribute): 581 | if attribute.non_resident() == 0: 582 | return attribute.value() 583 | else: 584 | return NonResidentAttributeData(self._clusters, attribute.runlist()) 585 | 586 | def get_mft_record(self): 587 | mft_lcn = self._vbr.mft_lcn() 588 | g_logger.debug("mft: %x", mft_lcn * 4096) 589 | mft_chunk = self._clusters[mft_lcn] 590 | mft_record = MFTRecord(mft_chunk, 0, None, inode=INODE_MFT) 591 | return mft_record 592 | 593 | def get_mft_buffer(self): 594 | mft_lcn = self._vbr.mft_lcn() 595 | g_logger.debug("mft: %x", mft_lcn * 4096) 596 | mft_chunk = self._clusters[mft_lcn] 597 | mft_record = MFTRecord(mft_chunk, 0, None, inode=INODE_MFT) 598 | mft_data_attribute = mft_record.data_attribute() 599 | return self.get_attribute_data(mft_data_attribute) 600 | 601 | def get_mftmirr_buffer(self): 602 | g_logger.debug("mft mirr: %s", hex(self._vbr.mftmirr_lcn() * 4096)) 603 | mftmirr_chunk = self._clusters[self._vbr.mftmirr_lcn()] 604 | mftmirr_mft_record = MFTRecord(mftmirr_chunk, INODE_MFTMIRR * MFT_RECORD_SIZE, None, inode=INODE_MFTMIRR) 605 | mftmirr_data_attribute = mftmirr_mft_record.data_attribute() 606 | return self.get_attribute_data(mftmirr_data_attribute) 607 | 608 | def get_root_directory(self): 609 | return NTFSDirectory(self, self._enumerator.get_record(INODE_ROOT)) 610 | 611 | def get_record(self, record_number): 612 | g_logger.debug("get_record: %d", record_number) 613 | return self._enumerator.get_record(record_number) 614 | 615 | def get_record_path(self, record): 616 | return self._enumerator.get_path(record) 617 | 618 | def get_record_parent(self, record): 619 | """ 620 | @raises NoParentError: on various error conditions 621 | """ 622 | if record.mft_record_number() == 5: 623 | raise NoParentError("Root directory has no parent") 624 | 625 | fn = record.filename_information() 626 | if not fn: 627 | raise NoParentError("File has no filename attribute") 628 | 629 | parent_record_num = MREF(fn.mft_parent_reference()) 630 | parent_seq_num = MSEQNO(fn.mft_parent_reference()) 631 | 632 | try: 633 | parent_record = self._enumerator.get_record(parent_record_num) 634 | except (OverrunBufferException, InvalidRecordException): 635 | raise NoParentError("Invalid parent MFT record") 636 | 637 | if parent_record.sequence_number() != parent_seq_num: 638 | raise NoParentError("Invalid parent MFT record (bad sequence number)") 639 | 640 | return NTFSDirectory(self, parent_record) 641 | 642 | def get_record_children(self, record): 643 | # we use a map here to de-dup entries with different filename types 644 | # such as 8.3, POSIX, or Windows, but the same ultimate MFT reference 645 | ret = {} # type: dict(int, MFTRecord) 646 | if not record.is_directory(): 647 | return ret.values() 648 | 649 | # TODO: cleanup the duplication here 650 | try: 651 | indx_alloc_attr = record.attribute(ATTR_TYPE.INDEX_ALLOCATION) 652 | indx_alloc = INDEX_ALLOCATION(self.get_attribute_data(indx_alloc_attr), 0) 653 | #g_logger.debug("INDEX_ALLOCATION len: %s", hex(len(indx_alloc))) 654 | #g_logger.debug("alloc:\n%s", indx_alloc.get_all_string(indent=2)) 655 | indx = indx_alloc 656 | 657 | for block in indx.blocks(): 658 | for entry in block.index().entries(): 659 | ref = MREF(entry.header().mft_reference()) 660 | if ref == INODE_ROOT and \ 661 | entry.filename_information().filename() == ".": 662 | continue 663 | ret[ref] = self._enumerator.get_record(ref) 664 | 665 | except AttributeNotFoundError: 666 | indx_root_attr = record.attribute(ATTR_TYPE.INDEX_ROOT) 667 | indx_root = INDEX_ROOT(self.get_attribute_data(indx_root_attr), 0) 668 | indx = indx_root 669 | 670 | for entry in indx.index().entries(): 671 | ref = MREF(entry.header().mft_reference()) 672 | if ref == INODE_ROOT and \ 673 | entry.filename_information().filename() == ".": 674 | continue 675 | ret[ref] = self._enumerator.get_record(ref) 676 | 677 | return ret.values() 678 | 679 | 680 | def main(): 681 | import sys 682 | from ntfs.volume import FlatVolume 683 | from ntfs.BinaryParser import Mmap 684 | from ntfs.mft.MFT import MFTEnumerator 685 | logging.basicConfig(level=logging.DEBUG) 686 | 687 | with Mmap(sys.argv[1]) as buf: 688 | v = FlatVolume(buf, int(sys.argv[2])) 689 | fs = NTFSFilesystem(v) 690 | root = fs.get_root_directory() 691 | g_logger.info("root dir: %s", root) 692 | for c in root.get_children(): 693 | g_logger.info(" - %s", c.get_name()) 694 | 695 | sys32 = root.get_path_entry("windows\\system32") 696 | g_logger.info("sys32: %s", sys32) 697 | 698 | 699 | if __name__ == "__main__": 700 | main() 701 | -------------------------------------------------------------------------------- /ntfs/logfile/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/logfile/__init__.py -------------------------------------------------------------------------------- /ntfs/mft/.MFT.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/mft/.MFT.py.swp -------------------------------------------------------------------------------- /ntfs/mft/MFT.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import array 4 | import os 5 | import sys 6 | import struct 7 | import logging 8 | from datetime import datetime 9 | from collections import OrderedDict # python 2.7 only 10 | 11 | from .. import Progress 12 | from .. import BinaryParser 13 | from ..BinaryParser import Block 14 | from ..BinaryParser import Nestable 15 | 16 | 17 | g_logger = logging.getLogger("ntfs.mft") 18 | 19 | 20 | class INDXException(Exception): 21 | """ 22 | Base Exception class for INDX parsing. 23 | """ 24 | def __init__(self, value): 25 | """ 26 | Constructor. 27 | Arguments: 28 | - `value`: A string description. 29 | """ 30 | super(INDXException, self).__init__() 31 | self._value = value 32 | 33 | def __str__(self): 34 | return "INDX Exception: %s" % (self._value) 35 | 36 | 37 | class FixupBlock(Block): 38 | """ 39 | a fixup block requires modification to the underlying buffer. 40 | - we don't want to do it to the underlying buffer 41 | - if its mmapped, we'd change the source file 42 | - if its a string, then this would raise an exception 43 | - we can keep a shadow file/buffer for writes to the underlying storage 44 | - this is most complete 45 | - also most complex to implement 46 | - we can make a copy of the buffer, and work with that 47 | we take the third option for ease of implementation 48 | 49 | some notes: 50 | - we change the buffer for this object from whats passed to the constructor 51 | - we change the offset for this object from whats passed to the constructor 52 | - we assume the total object size is no greater than the size of the fixups! 53 | """ 54 | def __init__(self, buf, offset, parent): 55 | super(FixupBlock, self).__init__(buf, offset) 56 | 57 | def fixup(self, num_fixups, fixup_value_offset): 58 | fixup_buffer = array.array("b", self.unpack_binary(0, length=(num_fixups - 1) * 512)) 59 | self._buf = fixup_buffer 60 | self._offset = 0 61 | 62 | fixup_value = self.unpack_word(fixup_value_offset) 63 | 64 | for i in range(0, num_fixups - 1): 65 | fixup_offset = 512 * (i + 1) - 2 66 | check_value = self.unpack_word(fixup_offset) 67 | 68 | if check_value != fixup_value: 69 | logging.warning("Bad fixup at %s", hex(self.offset() + fixup_offset)) 70 | continue 71 | 72 | new_value = self.unpack_word(fixup_value_offset + 2 + 2 * i) 73 | self.pack_word(fixup_offset, new_value) 74 | 75 | check_value = self.unpack_word(fixup_offset) 76 | g_logger.debug("Fixup verified at %s and patched from %s to %s.", 77 | hex(self.offset() + fixup_offset), 78 | hex(fixup_value), hex(check_value)) 79 | 80 | 81 | class INDEX_ENTRY_FLAGS: 82 | """ 83 | sizeof() == WORD 84 | """ 85 | INDEX_ENTRY_NODE = 0x1 86 | INDEX_ENTRY_END = 0x2 87 | INDEX_ENTRY_SPACE_FILLER = 0xFFFF 88 | 89 | 90 | class INDEX_ENTRY_HEADER(Block, Nestable): 91 | def __init__(self, buf, offset, parent): 92 | super(INDEX_ENTRY_HEADER, self).__init__(buf, offset) 93 | self.declare_field("word", "length", 0x8) 94 | self.declare_field("word", "key_length") 95 | self.declare_field("word", "index_entry_flags") # see INDEX_ENTRY_FLAGS 96 | self.declare_field("word", "reserved") 97 | 98 | @staticmethod 99 | def structure_size(buf, offset, parent): 100 | return 0x10 101 | 102 | def __len__(self): 103 | return 0x10 104 | 105 | def is_index_entry_node(self): 106 | return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_NODE 107 | 108 | def is_index_entry_end(self): 109 | return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_END 110 | 111 | def is_index_entry_space_filler(self): 112 | return self.index_entry_flags() & INDEX_ENTRY_FLAGS.INDEX_ENTRY_SPACE_FILLER 113 | 114 | 115 | class MFT_INDEX_ENTRY_HEADER(INDEX_ENTRY_HEADER): 116 | """ 117 | Index used by the MFT for INDX attributes. 118 | """ 119 | def __init__(self, buf, offset, parent): 120 | super(MFT_INDEX_ENTRY_HEADER, self).__init__(buf, offset, parent) 121 | self.declare_field("qword", "mft_reference", 0x0) 122 | 123 | 124 | class SECURE_INDEX_ENTRY_HEADER(INDEX_ENTRY_HEADER): 125 | """ 126 | Index used by the $SECURE file indices SII and SDH 127 | """ 128 | def __init__(self, buf, offset, parent): 129 | super(SECURE_INDEX_ENTRY_HEADER, self).__init__(buf, offset, parent) 130 | self.declare_field("word", "data_offset", 0x0) 131 | self.declare_field("word", "data_length") 132 | self.declare_field("dword", "reserved") 133 | 134 | 135 | class INDEX_ENTRY(Block, Nestable): 136 | """ 137 | NOTE: example structure. See the more specific classes below. 138 | Probably do not instantiate. 139 | """ 140 | def __init__(self, buf, offset, parent): 141 | super(INDEX_ENTRY, self).__init__(buf, offset) 142 | self.declare_field(INDEX_ENTRY_HEADER, "header", 0x0) 143 | self.add_explicit_field(0x10, "string", "data") 144 | 145 | def data(self): 146 | start = self.offset() + 0x10 147 | end = start + self.header().key_length() 148 | return self._buf[start:end] 149 | 150 | @staticmethod 151 | def structure_size(buf, offset, parent): 152 | return BinaryParser.read_word(buf, offset + 0x8) 153 | 154 | def __len__(self): 155 | return self.header().length() 156 | 157 | def is_valid(self): 158 | return True 159 | 160 | 161 | class MFT_INDEX_ENTRY(Block, Nestable): 162 | """ 163 | Index entry for the MFT directory index $I30, attribute type 0x90. 164 | """ 165 | def __init__(self, buf, offset, parent): 166 | super(MFT_INDEX_ENTRY, self).__init__(buf, offset) 167 | self.declare_field(MFT_INDEX_ENTRY_HEADER, "header", 0x0) 168 | self.declare_field(FilenameAttribute, "filename_information") 169 | 170 | @staticmethod 171 | def structure_size(buf, offset, parent): 172 | return BinaryParser.read_word(buf, offset + 0x8) 173 | 174 | def __len__(self): 175 | return self.header().length() 176 | 177 | def is_valid(self): 178 | # this is a bit of a mess, but it should work 179 | recent_date = datetime(1990, 1, 1, 0, 0, 0) 180 | future_date = datetime(2025, 1, 1, 0, 0, 0) 181 | try: 182 | fn = self.filename_information() 183 | except: 184 | return False 185 | if not fn: 186 | return False 187 | try: 188 | return fn.modified_time() > recent_date and \ 189 | fn.accessed_time() > recent_date and \ 190 | fn.changed_time() > recent_date and \ 191 | fn.created_time() > recent_date and \ 192 | fn.modified_time() < future_date and \ 193 | fn.accessed_time() < future_date and \ 194 | fn.changed_time() < future_date and \ 195 | fn.created_time() < future_date 196 | except ValueError: 197 | return False 198 | 199 | 200 | class SII_INDEX_ENTRY(Block, Nestable): 201 | """ 202 | Index entry for the $SECURE:$SII index. 203 | """ 204 | def __init__(self, buf, offset, parent): 205 | super(SII_INDEX_ENTRY, self).__init__(buf, offset) 206 | self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0) 207 | self.declare_field("dword", "security_id") 208 | 209 | @staticmethod 210 | def structure_size(buf, offset, parent): 211 | return BinaryParser.read_word(buf, offset + 0x8) 212 | 213 | def __len__(self): 214 | return self.header().length() 215 | 216 | def is_valid(self): 217 | # TODO(wb): test 218 | return 1 < self.header().length() < 0x30 and \ 219 | 1 < self.header().key_lenght() < 0x20 220 | 221 | 222 | class SDH_INDEX_ENTRY(Block, Nestable): 223 | """ 224 | Index entry for the $SECURE:$SDH index. 225 | """ 226 | def __init__(self, buf, offset, parent): 227 | super(SDH_INDEX_ENTRY, self).__init__(buf, offset) 228 | self.declare_field(SECURE_INDEX_ENTRY_HEADER, "header", 0x0) 229 | self.declare_field("dword", "hash") 230 | self.declare_field("dword", "security_id") 231 | 232 | @staticmethod 233 | def structure_size(buf, offset, parent): 234 | return BinaryParser.read_word(buf, offset + 0x8) 235 | 236 | def __len__(self): 237 | return self.header().length() 238 | 239 | def is_valid(self): 240 | # TODO(wb): test 241 | return 1 < self.header().length() < 0x30 and \ 242 | 1 < self.header().key_lenght() < 0x20 243 | 244 | 245 | class INDEX_HEADER_FLAGS: 246 | SMALL_INDEX = 0x0 # MFT: INDX_ROOT only 247 | LARGE_INDEX = 0x1 # MFT: requires INDX_ALLOCATION 248 | LEAF_NODE = 0x1 249 | INDEX_NODE = 0x2 250 | NODE_MASK = 0x1 251 | 252 | 253 | class INDEX_HEADER(Block, Nestable): 254 | def __init__(self, buf, offset, parent): 255 | super(INDEX_HEADER, self).__init__(buf, offset) 256 | self.declare_field("dword", "entries_offset", 0x0) 257 | self.declare_field("dword", "index_length") 258 | self.declare_field("dword", "allocated_size") 259 | self.declare_field("byte", "index_header_flags") # see INDEX_HEADER_FLAGS 260 | # then 3 bytes padding/reserved 261 | 262 | @staticmethod 263 | def structure_size(buf, offset, parent): 264 | return 0x1C 265 | 266 | def __len__(self): 267 | return 0x1C 268 | 269 | def is_small_index(self): 270 | return self.index_header_flags() & INDEX_HEADER_FLAGS.SMALL_INDEX 271 | 272 | def is_large_index(self): 273 | return self.index_header_flags() & INDEX_HEADER_FLAGS.LARGE_INDEX 274 | 275 | def is_leaf_node(self): 276 | return self.index_header_flags() & INDEX_HEADER_FLAGS.LEAF_NODE 277 | 278 | def is_index_node(self): 279 | return self.index_header_flags() & INDEX_HEADER_FLAGS.INDEX_NODE 280 | 281 | def is_NODE_MASK(self): 282 | return self.index_header_flags() & INDEX_HEADER_FLAGS.NODE_MASK 283 | 284 | 285 | class INDEX(Block, Nestable): 286 | def __init__(self, buf, offset, parent, index_entry_class): 287 | self._INDEX_ENTRY = index_entry_class 288 | super(INDEX, self).__init__(buf, offset) 289 | self.declare_field(INDEX_HEADER, "header", 0x0) 290 | self.add_explicit_field(self.header().entries_offset(), 291 | INDEX_ENTRY, "entries") 292 | slack_start = self.header().entries_offset() + self.header().index_length() 293 | # TODO: reenable 294 | #self.add_explicit_field(slack_start, INDEX_ENTRY, "slack_entries") 295 | 296 | @staticmethod 297 | def structure_size(buf, offset, parent): 298 | return BinaryParser.read_dword(buf, offset + 0x8) 299 | 300 | def __len__(self): 301 | return self.header().allocated_size() 302 | 303 | def entries(self): 304 | """ 305 | A generator that returns each INDEX_ENTRY associated with this node. 306 | """ 307 | offset = self.header().entries_offset() 308 | if offset == 0: 309 | return 310 | while offset <= self.header().index_length() - 0x52: 311 | e = self._INDEX_ENTRY(self._buf, self.offset() + offset, self) 312 | offset += len(e) 313 | yield e 314 | 315 | def slack_entries(self): 316 | """ 317 | A generator that yields INDEX_ENTRYs found in the slack space 318 | associated with this header. 319 | """ 320 | offset = self.header().index_length() 321 | try: 322 | while offset <= self.header().allocated_size() - 0x52: 323 | try: 324 | g_logger.debug("Trying to find slack entry at %s.", hex(offset)) 325 | e = self._INDEX_ENTRY(self._buf, offset, self) 326 | if e.is_valid(): 327 | g_logger.debug("Slack entry is valid.") 328 | offset += len(e) or 1 329 | yield e 330 | else: 331 | g_logger.debug("Slack entry is invalid.") 332 | # TODO(wb): raise a custom exception 333 | raise BinaryParser.ParseException("Not a deleted entry") 334 | except BinaryParser.ParseException: 335 | g_logger.debug("Scanning one byte forward.") 336 | offset += 1 337 | except struct.error: 338 | logging.debug("Slack entry parsing overran buffer.") 339 | pass 340 | 341 | 342 | class INDEX_ROOT(Block, Nestable): 343 | def __init__(self, buf, offset, parent=None): 344 | super(INDEX_ROOT, self).__init__(buf, offset) 345 | self.declare_field("dword", "type", 0x0) 346 | self.declare_field("dword", "collation_rule") 347 | self.declare_field("dword", "index_record_size_bytes") 348 | self.declare_field("byte", "index_record_size_clusters") 349 | self.declare_field("byte", "unused1") 350 | self.declare_field("byte", "unused2") 351 | self.declare_field("byte", "unused3") 352 | self._index_offset = self.current_field_offset() 353 | self.add_explicit_field(self._index_offset, INDEX, "index") 354 | 355 | def index(self): 356 | return INDEX(self._buf, self._offset + self._index_offset, 357 | self, MFT_INDEX_ENTRY) 358 | 359 | @staticmethod 360 | def structure_size(buf, offset, parent): 361 | return 0x10 + INDEX.structure_size(buf, offset + 0x10, parent) 362 | 363 | def __len__(self): 364 | return 0x10 + len(self.index()) 365 | 366 | 367 | class NTATTR_STANDARD_INDEX_HEADER(Block): 368 | def __init__(self, buf, offset, parent): 369 | super(NTATTR_STANDARD_INDEX_HEADER, self).__init__(buf, offset) 370 | self.declare_field("dword", "entry_list_start", 0x0) 371 | self.declare_field("dword", "entry_list_end") 372 | self.declare_field("dword", "entry_list_allocation_end") 373 | self.declare_field("dword", "flags") 374 | self.declare_field("binary", "list_buffer", \ 375 | self.entry_list_start(), 376 | self.entry_list_allocation_end() - self.entry_list_start()) 377 | 378 | def entries(self): 379 | """ 380 | A generator that returns each INDX entry associated with this node. 381 | """ 382 | offset = self.entry_list_start() 383 | if offset == 0: 384 | return 385 | 386 | # 0x52 is an approximate size of a small index entry 387 | while offset <= self.entry_list_end() - 0x52: 388 | e = IndexEntry(self._buf, self.offset() + offset, self) 389 | offset += e.length() 390 | yield e 391 | 392 | def slack_entries(self): 393 | """ 394 | A generator that yields INDX entries found in the slack space 395 | associated with this header. 396 | """ 397 | offset = self.entry_list_end() 398 | try: 399 | # 0x52 is an approximate size of a small index entry 400 | while offset <= self.entry_list_allocation_end() - 0x52: 401 | try: 402 | e = SlackIndexEntry(self._buf, offset, self) 403 | if e.is_valid(): 404 | offset += e.length() or 1 405 | yield e 406 | else: 407 | # TODO(wb): raise a custom exception 408 | raise BinaryParser.ParseException("Not a deleted entry") 409 | except BinaryParser.ParseException: 410 | # ensure we're always moving forward 411 | offset += 1 412 | except struct.error: 413 | pass 414 | 415 | 416 | class IndexRootHeader(Block): 417 | def __init__(self, buf, offset, parent): 418 | super(IndexRootHeader, self).__init__(buf, offset) 419 | self.declare_field("dword", "type", 0x0) 420 | self.declare_field("dword", "collation_rule") 421 | self.declare_field("dword", "index_record_size_bytes") 422 | self.declare_field("byte", "index_record_size_clusters") 423 | self.declare_field("byte", "unused1") 424 | self.declare_field("byte", "unused2") 425 | self.declare_field("byte", "unused3") 426 | self._node_header_offset = self.current_field_offset() 427 | 428 | def node_header(self): 429 | return NTATTR_STANDARD_INDEX_HEADER(self._buf, 430 | self.offset() + self._node_header_offset, 431 | self) 432 | 433 | 434 | class IndexRecordHeader(FixupBlock): 435 | def __init__(self, buf, offset, parent): 436 | super(IndexRecordHeader, self).__init__(buf, offset, parent) 437 | self.declare_field("dword", "magic", 0x0) 438 | self.declare_field("word", "usa_offset") 439 | self.declare_field("word", "usa_count") 440 | self.declare_field("qword", "lsn") 441 | self.declare_field("qword", "vcn") 442 | self._node_header_offset = self.current_field_offset() 443 | self.fixup(self.usa_count(), self.usa_offset()) 444 | 445 | def node_header(self): 446 | return NTATTR_STANDARD_INDEX_HEADER(self._buf, 447 | self.offset() + self._node_header_offset, 448 | self) 449 | 450 | 451 | class INDEX_BLOCK(FixupBlock): 452 | def __init__(self, buf, offset, parent=None): 453 | super(INDEX_BLOCK, self).__init__(buf, offset, parent) 454 | self.declare_field("dword", "magic", 0x0) 455 | self.declare_field("word", "usa_offset") 456 | self.declare_field("word", "usa_count") 457 | self.declare_field("qword", "lsn") 458 | self.declare_field("qword", "vcn") 459 | self._index_offset = self.current_field_offset() 460 | self.add_explicit_field(self._index_offset, INDEX, "index") 461 | self.fixup(self.usa_count(), self.usa_offset()) 462 | 463 | def index(self): 464 | return INDEX(self._buf, self._offset + self._index_offset, 465 | self, MFT_INDEX_ENTRY) 466 | 467 | @staticmethod 468 | def structure_size(buf, offset, parent): 469 | return 0x30 + INDEX.structure_size(buf, offset + 0x10, parent) 470 | 471 | def __len__(self): 472 | return 0x1000 473 | 474 | 475 | class INDEX_ALLOCATION(FixupBlock): 476 | def __init__(self, buf, offset, parent=None): 477 | super(INDEX_ALLOCATION, self).__init__(buf, offset, parent) 478 | self.add_explicit_field(0, INDEX_BLOCK, "blocks") 479 | 480 | @staticmethod 481 | def guess_num_blocks(buf, offset): 482 | count = 0 483 | # TODO: don't hardcode things 484 | BLOCK_SIZE = 0x1000 485 | try: 486 | while BinaryParser.read_dword(buf, offset) == 0x58444e49: # "INDX" 487 | offset += BLOCK_SIZE 488 | count += 1 489 | except (IndexError, BinaryParser.OverrunBufferException): 490 | return count 491 | return count 492 | 493 | def blocks(self): 494 | for i in xrange(INDEX_ALLOCATION.guess_num_blocks(self._buf, self.offset())): 495 | # TODO: don't hardcode things 496 | yield INDEX_BLOCK(self._buf, self._offset + 0x1000 * i) 497 | 498 | @staticmethod 499 | def structure_size(buf, offset, parent): 500 | # TODO: don't hardcode things 501 | return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(buf, offset) 502 | 503 | def __len__(self): 504 | # TODO: don't hardcode things 505 | return 0x1000 * INDEX_ALLOCATION.guess_num_blocks(self._buf, self._offset) 506 | 507 | 508 | class IndexEntry(Block): 509 | def __init__(self, buf, offset, parent): 510 | super(IndexEntry, self).__init__(buf, offset) 511 | self.declare_field("qword", "mft_reference", 0x0) 512 | self.declare_field("word", "length") 513 | self.declare_field("word", "filename_information_length") 514 | self.declare_field("dword", "flags") 515 | self.declare_field("binary", "filename_information_buffer", \ 516 | self.current_field_offset(), 517 | self.filename_information_length()) 518 | self.declare_field("qword", "child_vcn", 519 | BinaryParser.align(self.current_field_offset(), 0x8)) 520 | 521 | def filename_information(self): 522 | return FilenameAttribute(self._buf, 523 | self.offset() + self._off_filename_information_buffer, 524 | self) 525 | 526 | 527 | class StandardInformationFieldDoesNotExist(Exception): 528 | def __init__(self, msg): 529 | self._msg = msg 530 | 531 | def __str__(self): 532 | return "Standard Information attribute field does not exist: %s" % (self._msg) 533 | 534 | 535 | class StandardInformation(Block): 536 | # TODO(wb): implement sizing so we can make this nestable 537 | def __init__(self, buf, offset, parent): 538 | super(StandardInformation, self).__init__(buf, offset) 539 | self.declare_field("filetime", "created_time", 0x0) 540 | self.declare_field("filetime", "modified_time") 541 | self.declare_field("filetime", "changed_time") 542 | self.declare_field("filetime", "accessed_time") 543 | self.declare_field("dword", "attributes") 544 | self.declare_field("binary", "reserved", self.current_field_offset(), 0xC) 545 | # self.declare_field("dword", "owner_id", 0x30) # Win2k+, NTFS 3.x 546 | # self.declare_field("dword", "security_id") # Win2k+, NTFS 3.x 547 | # self.declare_field("qword", "quota_charged") # Win2k+, NTFS 3.x 548 | # self.declare_field("qword", "usn") # Win2k+, NTFS 3.x 549 | 550 | # Can't implement this unless we know the NTFS version in use 551 | #@staticmethod 552 | #def structure_size(buf, offset, parent): 553 | # return 0x42 + (read_byte(buf, offset + 0x40) * 2) 554 | 555 | # Can't implement this unless we know the NTFS version in use 556 | #def __len__(self): 557 | # return 0x42 + (self.filename_length() * 2) 558 | 559 | def owner_id(self): 560 | """ 561 | This is an explicit method because it may not exist in OSes under Win2k 562 | 563 | @raises StandardInformationFieldDoesNotExist 564 | """ 565 | try: 566 | return self.unpack_dword(0x30) 567 | except BinaryParser.OverrunBufferException: 568 | raise StandardInformationFieldDoesNotExist("Owner ID") 569 | 570 | def security_id(self): 571 | """ 572 | This is an explicit method because it may not exist in OSes under Win2k 573 | 574 | @raises StandardInformationFieldDoesNotExist 575 | """ 576 | try: 577 | return self.unpack_dword(0x34) 578 | except BinaryParser.OverrunBufferException: 579 | raise StandardInformationFieldDoesNotExist("Security ID") 580 | 581 | def quota_charged(self): 582 | """ 583 | This is an explicit method because it may not exist in OSes under Win2k 584 | 585 | @raises StandardInformationFieldDoesNotExist 586 | """ 587 | try: 588 | return self.unpack_dword(0x38) 589 | except BinaryParser.OverrunBufferException: 590 | raise StandardInformationFieldDoesNotExist("Quota Charged") 591 | 592 | def usn(self): 593 | """ 594 | This is an explicit method because it may not exist in OSes under Win2k 595 | 596 | @raises StandardInformationFieldDoesNotExist 597 | """ 598 | try: 599 | return self.unpack_dword(0x40) 600 | except BinaryParser.OverrunBufferException: 601 | raise StandardInformationFieldDoesNotExist("USN") 602 | 603 | 604 | class FilenameAttribute(Block, Nestable): 605 | def __init__(self, buf, offset, parent): 606 | super(FilenameAttribute, self).__init__(buf, offset) 607 | self.declare_field("qword", "mft_parent_reference", 0x0) 608 | self.declare_field("filetime", "created_time") 609 | self.declare_field("filetime", "modified_time") 610 | self.declare_field("filetime", "changed_time") 611 | self.declare_field("filetime", "accessed_time") 612 | self.declare_field("qword", "physical_size") 613 | self.declare_field("qword", "logical_size") 614 | self.declare_field("dword", "flags") 615 | self.declare_field("dword", "reparse_value") 616 | self.declare_field("byte", "filename_length") 617 | self.declare_field("byte", "filename_type") 618 | self.declare_field("wstring", "filename", 0x42, self.filename_length()) 619 | 620 | @staticmethod 621 | def structure_size(buf, offset, parent): 622 | return 0x42 + (BinaryParser.read_byte(buf, offset + 0x40) * 2) 623 | 624 | def __len__(self): 625 | return 0x42 + (self.filename_length() * 2) 626 | 627 | 628 | class SlackIndexEntry(IndexEntry): 629 | def __init__(self, buf, offset, parent): 630 | """ 631 | Constructor. 632 | Arguments: 633 | - `buf`: Byte string containing NTFS INDX file 634 | - `offset`: The offset into the buffer at which the block starts. 635 | - `parent`: The parent NTATTR_STANDARD_INDEX_HEADER block, 636 | which links to this block. 637 | """ 638 | super(SlackIndexEntry, self).__init__(buf, offset, parent) 639 | 640 | def is_valid(self): 641 | # this is a bit of a mess, but it should work 642 | recent_date = datetime(1990, 1, 1, 0, 0, 0) 643 | future_date = datetime(2025, 1, 1, 0, 0, 0) 644 | try: 645 | fn = self.filename_information() 646 | except: 647 | return False 648 | if not fn: 649 | return False 650 | try: 651 | return fn.modified_time() > recent_date and \ 652 | fn.accessed_time() > recent_date and \ 653 | fn.changed_time() > recent_date and \ 654 | fn.created_time() > recent_date and \ 655 | fn.modified_time() < future_date and \ 656 | fn.accessed_time() < future_date and \ 657 | fn.changed_time() < future_date and \ 658 | fn.created_time() < future_date 659 | except ValueError: 660 | return False 661 | 662 | 663 | class Runentry(Block, Nestable): 664 | def __init__(self, buf, offset, parent): 665 | super(Runentry, self).__init__(buf, offset) 666 | self.declare_field("byte", "header") 667 | self._offset_length = self.header() >> 4 668 | self._length_length = self.header() & 0x0F 669 | self.declare_field("binary", 670 | "length_binary", 671 | self.current_field_offset(), self._length_length) 672 | self.declare_field("binary", 673 | "offset_binary", 674 | self.current_field_offset(), self._offset_length) 675 | 676 | @staticmethod 677 | def structure_size(buf, offset, parent): 678 | b = BinaryParser.read_byte(buf, offset) 679 | return (b >> 4) + (b & 0x0F) + 1 680 | 681 | def __len__(self): 682 | return 0x1 + (self._length_length + self._offset_length) 683 | 684 | def is_valid(self): 685 | return self._offset_length > 0 and self._length_length > 0 686 | 687 | def lsb2num(self, binary): 688 | count = 0 689 | ret = 0 690 | for b in binary: 691 | ret += ord(b) << (8 * count) 692 | count += 1 693 | return ret 694 | 695 | def lsb2signednum(self, binary): 696 | count = 0 697 | ret = 0 698 | working = [] 699 | 700 | is_negative = (ord(binary[-1]) & (1 << 7) != 0) 701 | if is_negative: 702 | working = [ord(b) ^ 0xFF for b in binary] 703 | else: 704 | working = [ord(b) for b in binary] 705 | for b in working: 706 | ret += b << (8 * count) 707 | count += 1 708 | if is_negative: 709 | ret += 1 710 | ret *= -1 711 | return ret 712 | 713 | def offset(self): 714 | # TODO(wb): make this run_offset 715 | return self.lsb2signednum(self.offset_binary()) 716 | 717 | def length(self): 718 | # TODO(wb): make this run_offset 719 | return self.lsb2num(self.length_binary()) 720 | 721 | 722 | class Runlist(Block): 723 | def __init__(self, buf, offset, parent): 724 | super(Runlist, self).__init__(buf, offset) 725 | 726 | @staticmethod 727 | def structure_size(buf, offset, parent): 728 | length = 0 729 | while True: 730 | b = BinaryParser.read_byte(buf, offset + length) 731 | length += 1 732 | if b == 0: 733 | return length 734 | 735 | length += (b >> 4) + (b & 0x0F) 736 | 737 | def __len__(self): 738 | return sum(map(len, self._entries())) 739 | 740 | def _entries(self, length=None): 741 | ret = [] 742 | offset = self.offset() 743 | entry = Runentry(self._buf, offset, self) 744 | while entry.header() != 0 and \ 745 | (not length or offset < self.offset() + length) and \ 746 | entry.is_valid(): 747 | ret.append(entry) 748 | offset += len(entry) 749 | entry = Runentry(self._buf, offset, self) 750 | return ret 751 | 752 | def runs(self, length=None): 753 | """ 754 | Yields tuples (volume offset, length). 755 | Recall that the entries are relative to one another 756 | """ 757 | last_offset = 0 758 | for e in self._entries(length=length): 759 | current_offset = last_offset + e.offset() 760 | current_length = e.length() 761 | last_offset = current_offset 762 | yield (current_offset, current_length) 763 | 764 | 765 | class ATTR_TYPE: 766 | STANDARD_INFORMATION = 0x10 767 | FILENAME_INFORMATION = 0x30 768 | DATA = 0x80 769 | INDEX_ROOT = 0x90 770 | INDEX_ALLOCATION = 0xA0 771 | 772 | 773 | class Attribute(Block, Nestable): 774 | TYPES = { 775 | 16: "$STANDARD INFORMATION", 776 | 32: "$ATTRIBUTE LIST", 777 | 48: "$FILENAME INFORMATION", 778 | 64: "$OBJECT ID/$VOLUME VERSION", 779 | 80: "$SECURITY DESCRIPTOR", 780 | 96: "$VOLUME NAME", 781 | 112: "$VOLUME INFORMATION", 782 | 128: "$DATA", 783 | 144: "$INDEX ROOT", 784 | 160: "$INDEX ALLOCATION", 785 | 176: "$BITMAP", 786 | 192: "$SYMBOLIC LINK", 787 | 208: "$REPARSE POINT/$EA INFORMATION", 788 | 224: "$EA", 789 | 256: "$LOGGED UTILITY STREAM", 790 | } 791 | 792 | FLAGS = { 793 | 0x01: "readonly", 794 | 0x02: "hidden", 795 | 0x04: "system", 796 | 0x08: "unused-dos", 797 | 0x10: "directory-dos", 798 | 0x20: "archive", 799 | 0x40: "device", 800 | 0x80: "normal", 801 | 0x100: "temporary", 802 | 0x200: "sparse", 803 | 0x400: "reparse-point", 804 | 0x800: "compressed", 805 | 0x1000: "offline", 806 | 0x2000: "not-indexed", 807 | 0x4000: "encrypted", 808 | 0x10000000: "has-indx", 809 | 0x20000000: "has-view-index", 810 | } 811 | 812 | def __init__(self, buf, offset, parent): 813 | super(Attribute, self).__init__(buf, offset) 814 | self.declare_field("dword", "type") 815 | self.declare_field("dword", "size") # this value must rounded up to 0x8 byte alignment 816 | self.declare_field("byte", "non_resident") 817 | self.declare_field("byte", "name_length") 818 | self.declare_field("word", "name_offset") 819 | self.declare_field("word", "flags") 820 | self.declare_field("word", "instance") 821 | if self.non_resident() > 0: 822 | self.declare_field("qword", "lowest_vcn", 0x10) 823 | self.declare_field("qword", "highest_vcn") 824 | self.declare_field("word", "runlist_offset") 825 | self.declare_field("byte", "compression_unit") 826 | self.declare_field("byte", "reserved1") 827 | self.declare_field("byte", "reserved2") 828 | self.declare_field("byte", "reserved3") 829 | self.declare_field("byte", "reserved4") 830 | self.declare_field("byte", "reserved5") 831 | self.declare_field("qword", "allocated_size") 832 | self.declare_field("qword", "data_size") 833 | self.declare_field("qword", "initialized_size") 834 | self.declare_field("qword", "compressed_size") 835 | else: 836 | self.declare_field("dword", "value_length", 0x10) 837 | self.declare_field("word", "value_offset") 838 | self.declare_field("byte", "value_flags") 839 | self.declare_field("byte", "reserved") 840 | self.declare_field("binary", "value", 841 | self.value_offset(), self.value_length()) 842 | 843 | @staticmethod 844 | def structure_size(buf, offset, parent): 845 | s = BinaryParser.read_dword(buf, offset + 0x4) 846 | return s + (8 - (s % 8)) 847 | 848 | def __len__(self): 849 | return self.size() 850 | 851 | def __str__(self): 852 | return "%s" % (Attribute.TYPES[self.type()]) 853 | 854 | def runlist(self): 855 | return Runlist(self._buf, self.offset() + self.runlist_offset(), self) 856 | 857 | def size(self): 858 | s = self.unpack_dword(self._off_size) 859 | return s + (8 - (s % 8)) 860 | 861 | def name(self): 862 | return self.unpack_wstring(self.name_offset(), self.name_length()) 863 | 864 | 865 | class MFT_RECORD_FLAGS: 866 | MFT_RECORD_IN_USE = 0x1 867 | MFT_RECORD_IS_DIRECTORY = 0x2 868 | 869 | 870 | def MREF(mft_reference): 871 | """ 872 | Given a MREF/mft_reference, return the record number part. 873 | """ 874 | return mft_reference & 0xFFFFFFFFFFFF 875 | 876 | 877 | def MSEQNO(mft_reference): 878 | """ 879 | Given a MREF/mft_reference, return the sequence number part. 880 | """ 881 | return (mft_reference >> 48) & 0xFFFF 882 | 883 | 884 | class AttributeNotFoundError(Exception): 885 | pass 886 | 887 | 888 | class MFTRecord(FixupBlock): 889 | def __init__(self, buf, offset, parent, inode=None): 890 | super(MFTRecord, self).__init__(buf, offset, parent) 891 | 892 | # 0x0 File or BAAD 893 | self.declare_field("dword", "magic") 894 | # 0x04 Offset to fixup array 895 | self.declare_field("word", "usa_offset") 896 | # 0x06 Number of entries in fixup array 897 | self.declare_field("word", "usa_count") 898 | # 0x08 $LogFile sequence number 899 | self.declare_field("qword", "lsn") 900 | # 0x10 Sequence value 901 | self.declare_field("word", "sequence_number") 902 | # 0x12 Link Count 903 | self.declare_field("word", "link_count") 904 | # 0x14 Offset of first attribute 905 | self.declare_field("word", "attrs_offset") 906 | # 0x16 Flags: 907 | # 0x00 - not in use 908 | # 0x01 - in use 909 | # 0x02 - directory 910 | # 0x03 - directory in use 911 | self.declare_field("word", "flags") 912 | 913 | # 0x18 Used size of MFT entry 914 | self.declare_field("dword", "bytes_in_use") 915 | # 0x1c Allocated size of MFT entry 916 | self.declare_field("dword", "bytes_allocated") 917 | # 0x20 File reference to base record 918 | self.declare_field("qword", "base_mft_record") 919 | # 0x28 Nex attribute identifier 920 | self.declare_field("word", "next_attr_instance") 921 | 922 | # Attributes and fixup values 923 | # 0x2a 924 | self.declare_field("word", "reserved") 925 | # 0x2c 926 | self.declare_field("dword", "mft_record_number") 927 | 928 | self.inode = inode or self.mft_record_number() 929 | self.fixup(self.usa_count(), self.usa_offset()) 930 | 931 | def attributes(self): 932 | offset = self.attrs_offset() 933 | right_border = self.offset() + self.bytes_in_use() 934 | 935 | while (self.unpack_dword(offset) != 0 and 936 | self.unpack_dword(offset) != 0xFFFFFFFF and 937 | offset + self.unpack_dword(offset + 4) <= right_border): 938 | a = Attribute(self._buf, offset, self) 939 | offset += len(a) 940 | yield a 941 | 942 | def attribute(self, attr_type): 943 | for a in self.attributes(): 944 | if a.type() == attr_type: 945 | return a 946 | raise AttributeNotFoundError() 947 | 948 | def is_directory(self): 949 | return self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IS_DIRECTORY 950 | 951 | def is_active(self): 952 | return self.flags() & MFT_RECORD_FLAGS.MFT_RECORD_IN_USE 953 | 954 | # this a required resident attribute 955 | def filename_informations(self): 956 | """ 957 | MFT Records may have more than one FN info attribute, 958 | each with a different type of filename (8.3, POSIX, etc.) 959 | 960 | This function returns all of the these attributes. 961 | """ 962 | ret = [] 963 | for a in self.attributes(): 964 | if a.type() == ATTR_TYPE.FILENAME_INFORMATION: 965 | try: 966 | value = a.value() 967 | check = FilenameAttribute(value, 0, self) 968 | ret.append(check) 969 | except Exception: 970 | pass 971 | return ret 972 | 973 | # this a required resident attribute 974 | def filename_information(self): 975 | """ 976 | MFT Records may have more than one FN info attribute, 977 | each with a different type of filename (8.3, POSIX, etc.) 978 | 979 | This function returns the attribute with the most complete name, 980 | that is, it tends towards Win32, then POSIX, and then 8.3. 981 | """ 982 | fn = None 983 | for check in self.filename_informations(): 984 | try: 985 | if check.filename_type() == 0x0001 or \ 986 | check.filename_type() == 0x0003: 987 | return check 988 | fn = check 989 | except Exception: 990 | pass 991 | return fn 992 | 993 | # this a required resident attribute 994 | def standard_information(self): 995 | try: 996 | attr = self.attribute(ATTR_TYPE.STANDARD_INFORMATION) 997 | return StandardInformation(attr.value(), 0, self) 998 | except AttributeError: 999 | return None 1000 | 1001 | def data_attribute(self): 1002 | """ 1003 | Returns None if the default $DATA attribute does not exist 1004 | """ 1005 | for attr in self.attributes(): 1006 | if attr.type() == ATTR_TYPE.DATA and attr.name() == "": 1007 | return attr 1008 | 1009 | def slack_data(self): 1010 | """ 1011 | Returns A binary string containing the MFT record slack. 1012 | """ 1013 | return self._buf[self.offset()+self.bytes_in_use():self.offset() + 1024].tostring() 1014 | 1015 | def active_data(self): 1016 | """ 1017 | Returns A binary string containing the MFT record slack. 1018 | """ 1019 | return self._buf[self.offset():self.offset() + self.bytes_in_use()].tostring() 1020 | 1021 | 1022 | class InvalidAttributeException(INDXException): 1023 | def __init__(self, value): 1024 | super(InvalidAttributeException, self).__init__(value) 1025 | 1026 | def __str__(self): 1027 | return "Invalid attribute Exception(%s)" % (self._value) 1028 | 1029 | 1030 | class InvalidMFTRecordNumber(Exception): 1031 | def __init__(self, value): 1032 | self.value = value 1033 | 1034 | 1035 | class MFTOperationNotImplementedError(Exception): 1036 | def __init__(self, msg): 1037 | super(MFTOperationNotImplementedError, self).__init__(msg) 1038 | self._msg = msg 1039 | 1040 | def __str__(self): 1041 | return "MFTOperationNotImplemented(%s)" % (self._msg) 1042 | 1043 | 1044 | class InvalidRecordException(Exception): 1045 | def __init__(self, msg): 1046 | super(InvalidRecordException, self).__init__(msg) 1047 | self._msg = msg 1048 | 1049 | def __str__(self): 1050 | return "InvalidRecordException(%s)" % (self._msg) 1051 | 1052 | 1053 | class Cache(object): 1054 | def __init__(self, size_limit): 1055 | super(Cache, self).__init__() 1056 | self._c = OrderedDict() 1057 | self._size_limit = size_limit 1058 | 1059 | def insert(self, k, v): 1060 | """ 1061 | add a key and value to the front 1062 | """ 1063 | self._c[k] = v 1064 | if len(self._c) > self._size_limit: 1065 | self._c.popitem(last=False) 1066 | 1067 | def exists(self, k): 1068 | return k in self._c 1069 | 1070 | def touch(self, k): 1071 | """ 1072 | bring a key to the front 1073 | """ 1074 | v = self._c[k] 1075 | del self._c[k] 1076 | self._c[k] = v 1077 | 1078 | def get(self, k): 1079 | return self._c[k] 1080 | 1081 | 1082 | MFT_RECORD_SIZE = 1024 1083 | FILE_SEP = "\\" 1084 | UNKNOWN_ENTRY = "??" 1085 | ORPHAN_ENTRY = "$ORPHAN" 1086 | CYCLE_ENTRY = "" 1087 | 1088 | 1089 | class MFTEnumerator(object): 1090 | def __init__(self, buf, record_cache=None, path_cache=None): 1091 | DEFAULT_CACHE_SIZE = 102400 1092 | if record_cache is None: 1093 | record_cache = Cache(size_limit=DEFAULT_CACHE_SIZE) 1094 | if path_cache is None: 1095 | path_cache = Cache(size_limit=DEFAULT_CACHE_SIZE) 1096 | 1097 | self._buf = buf 1098 | self._record_cache = record_cache 1099 | self._path_cache = path_cache 1100 | 1101 | def len(self): 1102 | return len(self._buf) / MFT_RECORD_SIZE 1103 | 1104 | def get_record_buf(self, record_num): 1105 | """ 1106 | @raises OverrunBufferException: if the record_num is beyond the end of the MFT 1107 | """ 1108 | start = record_num * MFT_RECORD_SIZE 1109 | end = start + MFT_RECORD_SIZE 1110 | g_logger.debug("get_record_buf: start: %s len: %s bufsize: %s", hex(start), hex(end - start), hex(len(self._buf))) 1111 | if end > len(self._buf): 1112 | raise BinaryParser.OverrunBufferException(end, len(self._buf)) 1113 | 1114 | buf = self._buf[start:end] 1115 | return buf 1116 | 1117 | def get_record(self, record_num): 1118 | """ 1119 | @raises OverrunBufferException: if the record_num is beyond the end of the MFT. 1120 | @raises InvalidRecordException: if the record appears invalid (incorrect magic header). 1121 | """ 1122 | if self._record_cache.exists(record_num): 1123 | self._record_cache.touch(record_num) 1124 | return self._record_cache.get(record_num) 1125 | 1126 | record_buf = self.get_record_buf(record_num) 1127 | if BinaryParser.read_dword(record_buf, 0x0) != 0x454C4946: 1128 | raise InvalidRecordException("record_num: %d" % record_num) 1129 | 1130 | record = MFTRecord(record_buf, 0, False, inode=record_num) 1131 | self._record_cache.insert(record_num, record) 1132 | return record 1133 | 1134 | def enumerate_records(self): 1135 | index = 0 1136 | while True: 1137 | if index == 12: # reserved records are 12-15 1138 | index = 16 1139 | try: 1140 | record = self.get_record(index) 1141 | yield record 1142 | index += 1 1143 | except InvalidRecordException: 1144 | index += 1 1145 | continue 1146 | except BinaryParser.OverrunBufferException: 1147 | return 1148 | 1149 | def enumerate_paths(self): 1150 | for record in self.enumerate_records(): 1151 | path = self.get_path(record) 1152 | yield record, path 1153 | 1154 | def get_path(self, record): 1155 | """ 1156 | @type record: MFTRecord 1157 | @rtype: str 1158 | @return: A string containing the path of the given record. 1159 | It will begin with the first path component, that is, 1160 | something like "Documents and Settings\Adminstrator\bad.exe". 1161 | In the event that a path component cannot be determined, it is 1162 | replaced by "??". If the parent of an entry cannot be verified, 1163 | then it is added to the $ORPHAN directory. If a cycle is detected 1164 | during the path resolution, then the offending entry is 1165 | replaced with . This occastionally happens at the root 1166 | directory. 1167 | """ 1168 | r = self._get_path_impl(record, set()) 1169 | if r == "": 1170 | return FILE_SEP 1171 | else: 1172 | return r 1173 | 1174 | 1175 | def _get_path_impl(self, record, cycledetector): 1176 | """ 1177 | @type cycledetector: set of int 1178 | @param cycledetector: A set of numbers that describe which records have been processed 1179 | in the building of the path. 1180 | """ 1181 | key = "%d-%d-%d-%d-%d" % (record.magic(), record.lsn(), 1182 | record.link_count(), record.mft_record_number(), 1183 | record.flags()) 1184 | if self._path_cache.exists(key): 1185 | self._path_cache.touch(key) 1186 | return self._path_cache.get(key) 1187 | 1188 | record_num = record.mft_record_number() 1189 | if record_num == 5: 1190 | return "" 1191 | 1192 | if record_num in cycledetector: 1193 | return CYCLE_ENTRY 1194 | cycledetector.add(record_num) 1195 | 1196 | fn = record.filename_information() 1197 | 1198 | if not fn: 1199 | return UNKNOWN_ENTRY 1200 | else: 1201 | record_filename = fn.filename() 1202 | 1203 | parent_record_num = MREF(fn.mft_parent_reference()) 1204 | parent_seq_num = MSEQNO(fn.mft_parent_reference()) 1205 | 1206 | try: 1207 | parent_record = self.get_record(parent_record_num) 1208 | except (BinaryParser.OverrunBufferException, InvalidRecordException): 1209 | return ORPHAN_ENTRY + FILE_SEP + record_filename 1210 | 1211 | if parent_record.sequence_number() != parent_seq_num: 1212 | return ORPHAN_ENTRY + FILE_SEP + record_filename 1213 | 1214 | path = self._get_path_impl(parent_record, cycledetector) + FILE_SEP + record_filename 1215 | self._path_cache.insert(key, path) 1216 | return path 1217 | 1218 | def get_record_by_path(self, path): 1219 | lower_path = path.lower() 1220 | for record, record_path in self.enumerate_paths(): 1221 | if lower_path == record_path.lower(): 1222 | return record 1223 | raise KeyError("Path not found: %s" % path) 1224 | 1225 | 1226 | class MFTTreeNode(object): 1227 | def __init__(self, nodes, record_number, filename, parent_record_number): 1228 | super(MFTTreeNode, self).__init__() 1229 | self._nodes = nodes 1230 | self._record_number = record_number 1231 | self._filename = filename 1232 | self._parent_record_number = parent_record_number 1233 | self._children_record_numbers = [] 1234 | 1235 | def get_record_number(self): 1236 | return self._record_number 1237 | 1238 | def get_filename(self): 1239 | return self._filename 1240 | 1241 | def get_parent(self): 1242 | return self._nodes[self._parent_record_number] 1243 | 1244 | def add_child_record_number(self, child_record_number): 1245 | self._children_record_numbers.append(child_record_number) 1246 | 1247 | def get_children_nodes(self): 1248 | return map(lambda n: self._nodes[n], self._children_record_numbers) 1249 | 1250 | def get_child_node(self, filename): 1251 | for child in self.get_children_nodes(): 1252 | if child.get_filename() == filename: 1253 | return child 1254 | raise KeyError("Failed to find filename: " + filename) 1255 | 1256 | 1257 | ROOT_INDEX = 5 1258 | class MFTTree(object): 1259 | ORPHAN_INDEX = 12 1260 | 1261 | def __init__(self, buf): 1262 | super(MFTTree, self).__init__() 1263 | self._buf = buf 1264 | self._nodes = {} # array of MFTTreeNodes 1265 | 1266 | def _add_record(self, mft_enumerator, record): 1267 | record_num = record.mft_record_number() 1268 | 1269 | if record_num in self._nodes: 1270 | return 1271 | 1272 | if record_num == ROOT_INDEX: 1273 | self._nodes[ROOT_INDEX] = MFTTreeNode(self._nodes, ROOT_INDEX, "\.", ROOT_INDEX) 1274 | return 1275 | 1276 | fn = record.filename_information() 1277 | if not fn: 1278 | # then there's no filename, or parent reference 1279 | # there could be some standard information (timestamps), 1280 | # or named streams 1281 | # but still no parent link. 1282 | # ...so lets bail 1283 | return 1284 | 1285 | parent_record_num = MREF(fn.mft_parent_reference()) 1286 | parent_seq_num = MSEQNO(fn.mft_parent_reference()) 1287 | 1288 | try: 1289 | parent_record = mft_enumerator.get_record(parent_record_num) 1290 | except (BinaryParser.OverrunBufferException, InvalidRecordException): 1291 | parent_record_num = MFTTree.ORPHAN_INDEX 1292 | parent_record = None 1293 | 1294 | if not parent_record: 1295 | parent_record_num = MFTTree.ORPHAN_INDEX 1296 | elif parent_record.sequence_number() != parent_seq_num: 1297 | parent_record_num = MFTTree.ORPHAN_INDEX 1298 | 1299 | if parent_record_num != MFTTree.ORPHAN_INDEX and parent_record: 1300 | self._add_record(mft_enumerator, parent_record) 1301 | 1302 | try: 1303 | parent_node = self._nodes[parent_record_num] 1304 | except IndexError: 1305 | parent_record_num = MFTTree.ORPHAN_INDEX 1306 | 1307 | record_node = MFTTreeNode(self._nodes, record_num, fn.filename(), parent_record_num) 1308 | self._nodes[record_num] = record_node 1309 | if parent_node: 1310 | parent_node.add_child_record_number(record_num) 1311 | 1312 | def build(self, record_cache=None, 1313 | path_cache=None, progress_class=Progress.NullProgress): 1314 | DEFAULT_CACHE_SIZE = 1024 1315 | if record_cache is None: 1316 | record_cache = Cache(size_limit=DEFAULT_CACHE_SIZE) 1317 | if path_cache is None: 1318 | path_cache = Cache(size_limit=DEFAULT_CACHE_SIZE) 1319 | 1320 | enum = MFTEnumerator(self._buf, record_cache=record_cache, path_cache=path_cache) 1321 | 1322 | self._nodes[MFTTree.ORPHAN_INDEX] = MFTTreeNode(self._nodes, MFTTree.ORPHAN_INDEX, 1323 | ORPHAN_ENTRY, ROOT_INDEX) 1324 | 1325 | count = 0 1326 | progress = progress_class(len(self._buf) / 1024) 1327 | for record in enum.enumerate_records(): 1328 | self._add_record(enum, record) 1329 | count += 1 1330 | progress.set_current(count) 1331 | progress.set_complete() 1332 | 1333 | def get_root(self): 1334 | return self._nodes[ROOT_INDEX] 1335 | -------------------------------------------------------------------------------- /ntfs/mft/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | __all__ = [ 3 | "MFT", 4 | ] 5 | -------------------------------------------------------------------------------- /ntfs/secure/SDS.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # This file is part of INDXParse. 4 | # 5 | # Copyright 2011-13 Will Ballenthin 6 | # while at Mandiant 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | # Version v.1.2 21 | 22 | from .. import BinaryParser 23 | from ..BinaryParser import Block 24 | from ..BinaryParser import Nestable 25 | 26 | 27 | class NULL_OBJECT(object): 28 | def __init__(self): 29 | super(NULL_OBJECT, self).__init__() 30 | 31 | @staticmethod 32 | def structure_size(buf, offset, parent): 33 | return 0 34 | 35 | def __len__(self): 36 | return 0 37 | 38 | null_object = NULL_OBJECT() 39 | 40 | 41 | class SECURITY_DESCRIPTOR_CONTROL: 42 | SE_OWNER_DEFAULTED = 1 << 0 43 | SE_GROUP_DEFAULTED = 1 << 1 44 | SE_DACL_PRESENT = 1 << 2 45 | SE_DACL_DEFAULTED = 1 << 3 46 | SE_SACL_PRESENT = 1 << 4 47 | SE_SACL_DEFAULTED = 1 << 5 48 | SE_SACL_UNUSED0 = 1 << 6 49 | SE_SACL_UNUSED1 = 1 << 7 50 | SE_DACL_AUTO_INHERIT_REQ = 1 << 8 51 | SE_SACL_AUTO_INHERIT_REQ = 1 << 9 52 | SE_DACL_AUTO_INHERITED = 1 << 10 53 | SE_SACL_AUTO_INHERITED = 1 << 11 54 | SE_DACL_PROTECTED = 1 << 12 55 | SE_SACL_PROTECTED = 1 << 13 56 | SE_RM_CONTROL_VALID = 1 << 14 57 | SE_SELF_RELATIVE = 1 << 15 58 | 59 | 60 | class SID_IDENTIFIER_AUTHORITY(Block, Nestable): 61 | def __init__(self, buf, offset, parent): 62 | super(SID_IDENTIFIER_AUTHORITY, self).__init__(buf, offset) 63 | self.declare_field("word_be", "high_part", 0x0) 64 | self.declare_field("dword_be", "low_part") 65 | 66 | @staticmethod 67 | def structure_size(buf, offset, parent): 68 | return 6 69 | 70 | def __len__(self): 71 | return SID_IDENTIFIER_AUTHORITY.structure_size(self._buf, self.absolute_offset(0x0), None) 72 | 73 | def __str__(self): 74 | return "%s" % (self.high_part() << 32 + self.low_part()) 75 | 76 | 77 | class SID(Block, Nestable): 78 | def __init__(self, buf, offset, parent): 79 | super(SID, self).__init__(buf, offset) 80 | self.declare_field("byte", "revision", 0x0) 81 | self.declare_field("byte", "sub_authority_count") 82 | self.declare_field(SID_IDENTIFIER_AUTHORITY, "identifier_authority") 83 | self.declare_field("dword", "sub_authorities", count=self.sub_authority_count()) 84 | 85 | @staticmethod 86 | def structure_size(buf, offset, parent): 87 | sub_auth_count = BinaryParser.read_byte(buf, offset + 1) 88 | auth_size = SID_IDENTIFIER_AUTHORITY.structure_size(buf, offset + 2, parent) 89 | return 2 + auth_size + (sub_auth_count * 4) 90 | 91 | def __len__(self): 92 | return self._off_sub_authorities + (self.sub_authority_count() * 4) 93 | 94 | def string(self): 95 | ret = "S-%d-%s" % (self.revision(), self.identifier_authority()) 96 | for sub_auth in self.sub_authorities(): 97 | ret += "-%s" % (str(sub_auth)) 98 | return ret 99 | 100 | 101 | class ACE_TYPES: 102 | """ 103 | One byte. 104 | """ 105 | ACCESS_MIN_MS_ACE_TYPE = 0 106 | ACCESS_ALLOWED_ACE_TYPE = 0 107 | ACCESS_DENIED_ACE_TYPE = 1 108 | SYSTEM_AUDIT_ACE_TYPE = 2 109 | SYSTEM_ALARM_ACE_TYPE = 3 # Not implemented as of Win2k. 110 | ACCESS_MAX_MS_V2_ACE_TYPE = 3 111 | 112 | ACCESS_ALLOWED_COMPOUND_ACE_TYPE = 4 113 | ACCESS_MAX_MS_V3_ACE_TYPE = 4 114 | 115 | # The following are Win2k only. 116 | ACCESS_MIN_MS_OBJECT_ACE_TYPE = 5 117 | ACCESS_ALLOWED_OBJECT_ACE_TYPE = 5 118 | ACCESS_DENIED_OBJECT_ACE_TYPE = 6 119 | SYSTEM_AUDIT_OBJECT_ACE_TYPE = 7 120 | SYSTEM_ALARM_OBJECT_ACE_TYPE = 8 121 | ACCESS_MAX_MS_OBJECT_ACE_TYPE = 8 122 | ACCESS_MAX_MS_V4_ACE_TYPE = 8 123 | 124 | # This one is for WinNT/2k. 125 | ACCESS_MAX_MS_ACE_TYPE = 8 126 | 127 | 128 | class ACE_FLAGS: 129 | """ 130 | One byte. 131 | """ 132 | OBJECT_INHERIT_ACE = 0x01 133 | CONTAINER_INHERIT_ACE = 0x02 134 | NO_PROPAGATE_INHERIT_ACE = 0x04 135 | INHERIT_ONLY_ACE = 0x08 136 | INHERITED_ACE = 0x10 # Win2k only. 137 | VALID_INHERIT_FLAGS = 0x1f 138 | 139 | # The audit flags. 140 | SUCCESSFUL_ACCESS_ACE_FLAG = 0x40 141 | FAILED_ACCESS_ACE_FLAG = 0x80 142 | 143 | 144 | class ACCESS_MASK: 145 | """ 146 | DWORD. 147 | """ 148 | FILE_READ_DATA = 0x00000001 149 | FILE_LIST_DIRECTORY = 0x00000001 150 | FILE_WRITE_DATA = 0x00000002 151 | FILE_ADD_FILE = 0x00000002 152 | FILE_APPEND_DATA = 0x00000004 153 | FILE_ADD_SUBDIRECTORY = 0x00000004 154 | FILE_READ_EA = 0x00000008 155 | FILE_WRITE_EA = 0x00000010 156 | FILE_EXECUTE = 0x00000020 157 | FILE_TRAVERSE = 0x00000020 158 | FILE_DELETE_CHILD = 0x00000040 159 | FILE_READ_ATTRIBUTES = 0x00000080 160 | FILE_WRITE_ATTRIBUTES = 0x00000100 161 | DELETE = 0x00010000 162 | READ_CONTROL = 0x00020000 163 | WRITE_DAC = 0x00040000 164 | WRITE_OWNER = 0x00080000 165 | SYNCHRONIZE = 0x00100000 166 | STANDARD_RIGHTS_READ = 0x00020000 167 | STANDARD_RIGHTS_WRITE = 0x00020000 168 | STANDARD_RIGHTS_EXECUTE = 0x00020000 169 | STANDARD_RIGHTS_REQUIRED = 0x000f0000 170 | STANDARD_RIGHTS_ALL = 0x001f0000 171 | ACCESS_SYSTEM_SECURITY = 0x01000000 172 | MAXIMUM_ALLOWED = 0x02000000 173 | GENERIC_ALL = 0x10000000 174 | GENERIC_EXECUTE = 0x20000000 175 | GENERIC_WRITE = 0x40000000 176 | GENERIC_READ = 0x80000000 177 | 178 | 179 | class ACE(Block): 180 | def __init__(self, buf, offset, parent): 181 | super(ACE, self).__init__(buf, offset) 182 | self.declare_field("byte", "ace_type", 0x0) 183 | self.declare_field("byte", "ace_flags") 184 | 185 | @staticmethod 186 | def get_ace(buf, offset, parent): 187 | header = ACE(buf, offset, parent) 188 | if header.ace_type() == ACE_TYPES.ACCESS_ALLOWED_ACE_TYPE: 189 | return ACCESS_ALLOWED_ACE(buf, offset, parent) 190 | elif header.ace_type() == ACE_TYPES.ACCESS_DENIED_ACE_TYPE: 191 | return ACCESS_DENIED_ACE(buf, offset, parent) 192 | elif header.ace_type() == ACE_TYPES.SYSTEM_AUDIT_ACE_TYPE: 193 | return SYSTEM_AUDIT_ACE(buf, offset, parent) 194 | elif header.ace_type() == ACE_TYPES.SYSTEM_ALARM_ACE_TYPE: 195 | return SYSTEM_ALARM_ACE(buf, offset, parent) 196 | elif header.ace_type() == ACE_TYPES.ACCESS_ALLOWED_OBJECT_ACE_TYPE: 197 | return ACCESS_ALLOWED_OBJECT_ACE(buf, offset, parent) 198 | elif header.ace_type() == ACE_TYPES.ACCESS_DENIED_OBJECT_ACE_TYPE: 199 | return ACCESS_DENIED_OBJECT_ACE(buf, offset, parent) 200 | elif header.ace_type() == ACE_TYPES.SYSTEM_AUDIT_OBJECT_ACE_TYPE: 201 | return SYSTEM_AUDIT_OBJECT_ACE(buf, offset, parent) 202 | elif header.ace_type() == ACE_TYPES.SYSTEM_ALARM_OBJECT_ACE_TYPE: 203 | return SYSTEM_ALARM_OBJECT_ACE(buf, offset, parent) 204 | else: 205 | # TODO(wb): raise a custom exception type 206 | raise BinaryParser.ParseException("unknown ACE type") 207 | 208 | 209 | class StandardACE(ACE, Nestable): 210 | def __init__(self, buf, offset, parent): 211 | super(StandardACE, self).__init__(buf, offset, parent) 212 | self.declare_field("word", "size", 0x2) 213 | self.declare_field("dword", "access_mask") 214 | self.declare_field(SID, "sid") 215 | 216 | @staticmethod 217 | def structure_size(buf, offset, parent): 218 | return BinaryParser.read_word(buf, offset + 0x2) 219 | 220 | def __len__(self): 221 | return self.size() 222 | 223 | 224 | class ACCESS_ALLOWED_ACE(StandardACE): 225 | def __init__(self, buf, offset, parent): 226 | super(ACCESS_ALLOWED_ACE, self).__init__(buf, offset, parent) 227 | 228 | 229 | class ACCESS_DENIED_ACE(StandardACE): 230 | def __init__(self, buf, offset, parent): 231 | super(ACCESS_DENIED_ACE, self).__init__(buf, offset, parent) 232 | 233 | 234 | class SYSTEM_AUDIT_ACE(StandardACE): 235 | def __init__(self, buf, offset, parent): 236 | super(SYSTEM_AUDIT_ACE, self).__init__(buf, offset, parent) 237 | 238 | 239 | class SYSTEM_ALARM_ACE(StandardACE): 240 | def __init__(self, buf, offset, parent): 241 | super(SYSTEM_ALARM_ACE, self).__init__(buf, offset, parent) 242 | 243 | 244 | class OBJECT_ACE_FLAGS: 245 | """ 246 | DWORD. 247 | """ 248 | ACE_OBJECT_TYPE_PRESENT = 1 249 | ACE_INHERITED_OBJECT_TYPE_PRESENT = 2 250 | 251 | 252 | class ObjectACE(ACE, Nestable): 253 | def __init__(self, buf, offset, parent): 254 | super(ObjectACE, self).__init__(buf, offset, parent) 255 | self.declare_field("word", "size", 0x2) 256 | self.declare_field("dword", "access_mask") 257 | self.declare_field("dword", "object_flags") 258 | self.declare_field("guid", "object_type") 259 | self.declare_field("guid", "inherited_object_type") 260 | 261 | @staticmethod 262 | def structure_size(buf, offset, parent): 263 | return BinaryParser.read_word(buf, offset + 0x2) 264 | 265 | def __len__(self): 266 | return self.size() 267 | 268 | 269 | class ACCESS_ALLOWED_OBJECT_ACE(ObjectACE): 270 | def __init__(self, buf, offset, parent): 271 | super(ACCESS_ALLOWED_OBJECT_ACE, self).__init__(buf, offset, parent) 272 | 273 | 274 | class ACCESS_DENIED_OBJECT_ACE(ObjectACE): 275 | def __init__(self, buf, offset, parent): 276 | super(ACCESS_DENIED_OBJECT_ACE, self).__init__(buf, offset, parent) 277 | 278 | 279 | class SYSTEM_AUDIT_OBJECT_ACE(ObjectACE): 280 | def __init__(self, buf, offset, parent): 281 | super(SYSTEM_AUDIT_OBJECT_ACE, self).__init__(buf, offset, parent) 282 | 283 | 284 | class SYSTEM_ALARM_OBJECT_ACE(ObjectACE): 285 | def __init__(self, buf, offset, parent): 286 | super(SYSTEM_ALARM_OBJECT_ACE, self).__init__(buf, offset, parent) 287 | 288 | 289 | class ACL(Block, Nestable): 290 | def __init__(self, buf, offset, parent): 291 | super(ACL, self).__init__(buf, offset) 292 | self.declare_field("byte", "revision", 0x0) 293 | self.declare_field("byte", "alignment1") 294 | self.declare_field("word", "size") 295 | self.declare_field("word", "ace_count") 296 | self.declare_field("word", "alignment2") 297 | self._off_ACEs = self.current_field_offset() 298 | self.add_explicit_field(self._off_ACEs, ACE, "ACEs") 299 | 300 | @staticmethod 301 | def structure_size(buf, offset, parent): 302 | return BinaryParser.read_word(buf, offset + 0x2) 303 | 304 | def __len__(self): 305 | return self.size() 306 | 307 | def ACEs(self): 308 | ofs = self._off_ACEs 309 | for _ in range(self.ace_count()): 310 | a = ACE.get_ace(self._buf, self.offset() + ofs, self) 311 | yield a 312 | ofs += a.size() 313 | ofs = BinaryParser.align(ofs, 4) 314 | 315 | 316 | class NULL_ACL(object): 317 | """ 318 | TODO(wb): Not actually sure what the NULL ACL is... 319 | just guessing at the values here. 320 | """ 321 | def __init__(self): 322 | super(NULL_ACL, self).__init__() 323 | 324 | def revision(self): 325 | return 1 326 | 327 | def alignment1(self): 328 | return 0 329 | 330 | def size(self): 331 | return 0 332 | 333 | def ace_count(self): 334 | return 0 335 | 336 | def ACEs(self): 337 | return 338 | 339 | @staticmethod 340 | def structure_size(buf, offset, parent): 341 | return 0 342 | 343 | def __len__(self): 344 | return 0 345 | 346 | 347 | class SECURITY_DESCRIPTOR_RELATIVE(Block, Nestable): 348 | def __init__(self, buf, offset, parent): 349 | super(SECURITY_DESCRIPTOR_RELATIVE, self).__init__(buf, offset) 350 | self.declare_field("byte", "revision", 0x0) 351 | self.declare_field("byte", "alignment") 352 | self.declare_field("word", "control") 353 | self.declare_field("dword", "owner_offset") 354 | self.declare_field("dword", "group_offset") 355 | self.declare_field("dword", "sacl_offset") 356 | self.declare_field("dword", "dacl_offset") 357 | 358 | self.add_explicit_field(self.owner_offset(), "SID", "owner") 359 | self.add_explicit_field(self.group_offset(), "SID", "group") 360 | if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_SACL_PRESENT: 361 | self.add_explicit_field(self.sacl_offset(), "ACL", "sacl") 362 | if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_DACL_PRESENT: 363 | self.add_explicit_field(self.dacl_offset(), "ACL", "dacl") 364 | 365 | @staticmethod 366 | def structure_size(buf, offset, parent): 367 | return len(SECURITY_DESCRIPTOR_RELATIVE(buf, offset, parent)) 368 | 369 | def __len__(self): 370 | ret = 20 371 | ret += len((self.owner() or null_object)) 372 | ret += len((self.group() or null_object)) 373 | ret += len((self.sacl() or null_object)) 374 | ret += len((self.dacl() or null_object)) 375 | return ret 376 | 377 | def owner(self): 378 | if self.owner_offset() != 0: 379 | return SID(self._buf, self.absolute_offset(self.owner_offset()), self) 380 | else: 381 | return None 382 | 383 | def group(self): 384 | if self.group_offset() != 0: 385 | return SID(self._buf, self.absolute_offset(self.group_offset()), self) 386 | else: 387 | return None 388 | 389 | def sacl(self): 390 | if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_SACL_PRESENT: 391 | if self.sacl_offset() > 0: 392 | return ACL(self._buf, self.absolute_offset(self.sacl_offset()), self) 393 | else: 394 | return NULL_ACL() 395 | else: 396 | return None 397 | 398 | def dacl(self): 399 | if self.control() & SECURITY_DESCRIPTOR_CONTROL.SE_DACL_PRESENT: 400 | if self.dacl_offset() > 0: 401 | return ACL(self._buf, self.absolute_offset(self.dacl_offset()), self) 402 | else: 403 | return NULL_ACL() 404 | else: 405 | return None 406 | 407 | 408 | class SDS_ENTRY(Block, Nestable): 409 | def __init__(self, buf, offset, parent): 410 | super(SDS_ENTRY, self).__init__(buf, offset) 411 | self.declare_field("dword", "hash", 0x0) 412 | self.declare_field("dword", "security_id") 413 | self.declare_field("qword", "offset") 414 | self.declare_field("dword", "length") 415 | self.declare_field(SECURITY_DESCRIPTOR_RELATIVE, "sid") 416 | 417 | @staticmethod 418 | def structure_size(buf, offset, parent): 419 | return BinaryParser.read_dword(buf, offset + 0x10) 420 | 421 | def __len__(self): 422 | return self.length() 423 | 424 | 425 | class SDS(Block): 426 | def __init__(self, buf, offset, parent): 427 | super(SDS, self).__init__(buf, offset) 428 | self.add_explicit_field(0, SDS, "sds_entries") 429 | 430 | def sds_entries(self): 431 | ofs = 0 432 | while len(self._buf) > self.offset() + ofs + 0x14: 433 | s = SDS_ENTRY(self._buf, self.offset() + ofs, self) 434 | if len(s) != 0: 435 | yield s 436 | ofs += len(s) 437 | ofs = BinaryParser.align(ofs, 0x10) 438 | else: 439 | if ofs % 0x10000 == 0: 440 | return 441 | else: 442 | ofs = BinaryParser.align(ofs, 0x10000) 443 | 444 | 445 | def main(): 446 | import sys 447 | import mmap 448 | import contextlib 449 | 450 | with open(sys.argv[1], 'r') as f: 451 | with contextlib.closing(mmap.mmap(f.fileno(), 0, 452 | access=mmap.ACCESS_READ)) as buf: 453 | s = SDS(buf, 0, None) 454 | print "SDS" 455 | for e in s.sds_entries(): 456 | print(" SDS_ENTRY") 457 | print(e.get_all_string(indent=2)) 458 | 459 | if __name__ == "__main__": 460 | main() 461 | -------------------------------------------------------------------------------- /ntfs/secure/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/secure/__init__.py -------------------------------------------------------------------------------- /ntfs/usnjrnl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/williballenthin/python-ntfs/080275e7e78285c68f5a0b5ca895b5123d754acc/ntfs/usnjrnl/__init__.py -------------------------------------------------------------------------------- /ntfs/volume/__init__.py: -------------------------------------------------------------------------------- 1 | from ntfs.BinaryParser import Block 2 | from ntfs.BinaryParser import Mmap 3 | from ntfs.FileMap import FileMap 4 | 5 | 6 | class Volume(Block): 7 | """ 8 | A volume is a logically contiguous run of bytes over which a FS is found. 9 | 10 | Use FlatVolume over this. 11 | """ 12 | __unpackable__ = True 13 | def __init__(self, buf, offset, sector_size=512): 14 | super(Volume, self).__init__(buf, offset) 15 | self._sector_size = sector_size 16 | 17 | def __getitem__(self, index): 18 | return self._buf[index + self._offset] 19 | 20 | def __getslice__(self, start, end): 21 | return self._buf[start + self._offset:end + self._offset] 22 | 23 | def __len__(self): 24 | return len(self._buf) - self._offset 25 | 26 | 27 | class FlatVolume(Volume): 28 | """ 29 | A volume found in a physically contiguous run of bytes. 30 | """ 31 | def __init__(self, buf, offset, sector_size=512): 32 | super(FlatVolume, self).__init__(buf, offset, sector_size=sector_size) 33 | 34 | 35 | def main(): 36 | import sys 37 | 38 | # two methods 39 | with open(sys.argv[1], "rb") as f: 40 | buf = FileMap(f) 41 | v = FlatVolume(buf, int(sys.argv[2])) 42 | print list(v[3:3+4]) 43 | 44 | # probably prefer this one 45 | with Mmap(sys.argv[1]) as buf: 46 | v = FlatVolume(buf, int(sys.argv[2])) 47 | print list(v[3:3+4]) 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | from ntfs import __version__ 5 | 6 | 7 | setup(name="python-ntfs", 8 | version=__version__, 9 | description="Open source Python library for NTFS analysis", 10 | author="Willi Ballenthin", 11 | author_email="willi.ballenthin@gmail.com", 12 | url="http://www.williballenthin.com/forensics/ntfs", 13 | license="Apache License (2.0)", 14 | packages=[ 15 | "ntfs", 16 | "ntfs.mft", 17 | "ntfs.volume", 18 | "ntfs.filesystem", 19 | #"nfts.secure", 20 | #"ntfs.logfile", 21 | #"ntfs.usnjrnl", 22 | ], 23 | classifiers=["Programming Language :: Python", 24 | "Operating System :: OS Independent", 25 | "License :: OSI Approved :: Apache Software License"], 26 | install_requires=["enum34"]) 27 | --------------------------------------------------------------------------------