├── Registry ├── __init__.py ├── Registry.py ├── LICENSE └── RegistryParse.py ├── README ├── LICENSE └── ShimCacheParser.py /Registry/__init__.py: -------------------------------------------------------------------------------- 1 | # This file is part of python-registry. 2 | # 3 | # Copyright 2011 Will Ballenthin 4 | # while at Mandiant 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | __all__ = [ 19 | 'Registry', 20 | 'RegistryParse', 21 | ] 22 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ShimCacheParser.py v1.0 2 | ==================== 3 | 4 | ShimCacheParser is a proof-of-concept tool for reading the Application Compatibility Shim Cache stored in the Windows registry. Metadata of files that are executed on a Windows system are placed within this data structure on the running system. Upon system shutdown, this data structure is serialized to the registry in one of two registry paths depending on the operating system version (HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\AppCompatibility\AppCompatCache or HKLM\SYSTEM\CurrentControlSet\Control\Session Manager\AppCompatCache\AppCompatCache) . The format of this data, as well as the types of information stored also vary between operating system which is summarized below: 5 | -Windows XP 32-bit: File Path, $STANDARD_INFORMATION Last Modified Time, File Size, and Last Update Time 6 | -Windows 2003 and XP 64-bit: File Path, $STANDARD_INFORMATION Last Modified Time, and File Size 7 | -Windows Vista and later: File Path, $STANDARD_INFORMATION Last Modified Time, Shim Flags 8 | 9 | More information about this cache and how it's implemented can be found here: http://www.mandiant.com/library/Whitepaper_ShimCacheParser.pdf 10 | 11 | The script will find these registry paths, automatically determine their format, and return the data in an optional CSV format. During testing it was discovered that on Windows Vista and later, files may be added to this cache if they were browsed to by explorer.exe and never actually executed. When these same files were executed, the 2nd least significant bit in the flags field was set by the CSRSS process while checking SXS information. During testing it was possible to identify if processes were executed based on this flag being set. This flag's true purpose is currently unknown and is still being testing for consistency, so it should not be currently used to definitively conclude that a file may or may not have executed. 12 | 13 | Usage 14 | ==================== 15 | ShimCacheParser.py requires python 2.x (2.6 or later) which can be obtained from http://www.python.org/download/. Parsing of exported registry hives requires Willi Ballenthin's python-registry library which is currently included in this project or can be downloaded here: https://github.com/williballenthin/python-registry. 16 | 17 | Several types of inputs are currently supported: 18 | -Extracted Registry Hives (-i, --hive) 19 | -Exported .reg registry files (-r, --reg) 20 | -MIR XML (-m, --mir) 21 | -Mass MIR registry acquisitions ZIP archives (-z, --zip) 22 | -The current Windows system (-l, --local) 23 | -Exported AppComatCache data from binary file (-b, --bin) 24 | 25 | The output CSV file is set with the (-o, --output) argument. If no output file is specified, the data will be printed to STDOUT. ShimCacheParser will search each ControlSet and will only return unique entries by default. If you want to display duplicates as well as the full registry path where the data was taken use the verbose (-v, --verbose) option. 26 | -------------------------------------------------------------------------------- /Registry/Registry.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # This file is part of python-registry. 4 | # 5 | # Copyright 2011 Will Ballenthin 6 | # while at Mandiant 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | import sys 21 | import RegistryParse 22 | 23 | RegSZ = 0x0001 24 | RegExpandSZ = 0x0002 25 | RegBin = 0x0003 26 | RegDWord = 0x0004 27 | RegMultiSZ = 0x0007 28 | RegQWord = 0x000B 29 | RegNone = 0x0000 30 | RegBigEndian = 0x0005 31 | RegLink = 0x0006 32 | RegResourceList = 0x0008 33 | RegFullResourceDescriptor = 0x0009 34 | RegResourceRequirementsList = 0x000A 35 | 36 | 37 | class RegistryKeyHasNoParentException(RegistryParse.RegistryStructureDoesNotExist): 38 | """ 39 | """ 40 | def __init__(self, value): 41 | """ 42 | Constructor. 43 | Arguments: 44 | - `value`: A string description. 45 | """ 46 | super(RegistryKeyHasNoParentException, self).__init__(value) 47 | 48 | def __str__(self): 49 | return "Registry key has no parent key: %s" % (self._value) 50 | 51 | 52 | class RegistryKeyNotFoundException(RegistryParse.RegistryStructureDoesNotExist): 53 | """ 54 | """ 55 | def __init__(self, value): 56 | """ 57 | 58 | Arguments: 59 | - `value`: 60 | """ 61 | super(RegistryKeyNotFoundException, self).__init__(value) 62 | 63 | def __str__(self): 64 | return "Registry key not found: %s" % (self._value) 65 | 66 | class RegistryValueNotFoundException(RegistryParse.RegistryStructureDoesNotExist): 67 | """ 68 | """ 69 | def __init__(self, value): 70 | """ 71 | 72 | Arguments: 73 | - `value`: 74 | """ 75 | super(RegistryValueNotFoundException, self).__init__(value) 76 | 77 | def __str__(self): 78 | return "Registry value not found: %s" % (self._value) 79 | 80 | class RegistryValue(object): 81 | """ 82 | This is a high level structure for working with the Windows Registry. 83 | It represents the 3-tuple of (name, type, value) associated with a registry value. 84 | """ 85 | def __init__(self, vkrecord): 86 | self._vkrecord = vkrecord 87 | 88 | def name(self): 89 | """ 90 | Get the name of the value as a string. 91 | The name of the default value is returned as "(default)". 92 | """ 93 | if self._vkrecord.has_name(): 94 | return self._vkrecord.name() 95 | else: 96 | return "(default)" 97 | 98 | def value_type(self): 99 | """ 100 | Get the type of the value as an integer constant. 101 | 102 | One of: 103 | - RegSZ = 0x0001 104 | - RegExpandSZ = 0x0002 105 | - RegBin = 0x0003 106 | - RegDWord = 0x0004 107 | - RegMultiSZ = 0x0007 108 | - RegQWord = 0x000B 109 | - RegNone = 0x0000 110 | - RegBigEndian = 0x0005 111 | - RegLink = 0x0006 112 | - RegResourceList = 0x0008 113 | - RegFullResourceDescriptor = 0x0009 114 | - RegResourceRequirementsList = 0x000A 115 | """ 116 | return self._vkrecord.data_type() 117 | 118 | def value_type_str(self): 119 | """ 120 | Get the type of the value as a string. 121 | 122 | One of: 123 | - RegSZ 124 | - RegExpandSZ 125 | - RegBin 126 | - RegDWord 127 | - RegMultiSZ 128 | - RegQWord 129 | - RegNone 130 | - RegBigEndian 131 | - RegLink 132 | - RegResourceList 133 | - RegFullResourceDescriptor 134 | - RegResourceRequirementsList 135 | """ 136 | return self._vkrecord.data_type_str() 137 | 138 | def value(self): 139 | return self._vkrecord.data() 140 | 141 | class RegistryKey(object): 142 | """ 143 | A high level structure for use in traversing the Windows Registry. 144 | A RegistryKey is a node in a tree-like structure. 145 | A RegistryKey may have a set of values associated with it, as well as a last modified timestamp. 146 | """ 147 | def __init__(self, nkrecord): 148 | """ 149 | 150 | Arguments: 151 | - `NKRecord`: 152 | """ 153 | self._nkrecord = nkrecord 154 | 155 | def __str__(self): 156 | return "Registry Key %s with %d values and %d subkeys" % (self.path(), len(self.values()), len(self.subkeys())) 157 | 158 | def __getitem__(self, key): 159 | return self.value(key) 160 | 161 | def timestamp(self): 162 | """ 163 | Get the last modified timestamp as a Python datetime. 164 | """ 165 | return self._nkrecord.timestamp() 166 | 167 | def name(self): 168 | """ 169 | Get the name of the key as a string. 170 | 171 | For example, "Windows" if the key path were /{hive name}/SOFTWARE/Microsoft/Windows 172 | See RegistryKey.path() to get the complete key name. 173 | """ 174 | return self._nkrecord.name() 175 | 176 | 177 | def path(self): 178 | """ 179 | Get the full path of the RegistryKey as a string. 180 | For example, "/{hive name}/SOFTWARE/Microsoft/Windows" 181 | """ 182 | return self._nkrecord.path() 183 | 184 | def parent(self): 185 | """ 186 | Get the parent RegistryKey of this key, or raise 187 | RegistryKeyHasNoParentException if it does not exist (for example, 188 | the root key has no parent). 189 | """ 190 | # there may be a memory inefficiency here, since we create 191 | # a new RegistryKey from the NKRecord parent key, rather 192 | # than using the parent of this instance, if it exists. 193 | try: 194 | return RegistryKey(self._nkrecord.parent_key()) 195 | except RegistryParse.ParseException: 196 | raise RegistryKeyHasNoParentException(self.name()) 197 | 198 | def subkeys(self): 199 | """ 200 | Return a list of all subkeys. Each element in the list is a RegistryKey. 201 | If the key has no subkeys, the empty list is returned. 202 | """ 203 | if self._nkrecord.subkey_number() == 0: 204 | return [] 205 | 206 | l = self._nkrecord.subkey_list() 207 | return [RegistryKey(k) for k in l.keys()] 208 | 209 | def subkey(self, name): 210 | """ 211 | Return the subkey with a given name as a RegistryKey. 212 | Raises RegistryKeyNotFoundException if the subkey with the given name does not exist. 213 | """ 214 | #print name 215 | if self._nkrecord.subkey_number() == 0: 216 | raise RegistryKeyNotFoundException(self.path() + "\\" + name) 217 | 218 | for k in self._nkrecord.subkey_list().keys(): 219 | if k.name() == name: 220 | return RegistryKey(k) 221 | raise RegistryKeyNotFoundException(self.path() + "\\" + name) 222 | 223 | def values(self): 224 | """ 225 | Return a list containing the values associated with this RegistryKey. 226 | Each element of the list will be a RegistryValue. 227 | If there are no values associated with this RegistryKey, then the 228 | empty list is returned. 229 | """ 230 | try: 231 | return [RegistryValue(v) for v in self._nkrecord.values_list().values()] 232 | except RegistryParse.RegistryStructureDoesNotExist: 233 | return [] 234 | 235 | def value(self, name): 236 | """ 237 | Return the value with the given name as a RegistryValue. 238 | Raises RegistryValueNotFoundExceptiono if the value with the given name does not exist. 239 | """ 240 | if name == "(default)": 241 | name = "" 242 | for v in self._nkrecord.values_list().values(): 243 | if v.name() == name: 244 | return RegistryValue(v) 245 | raise RegistryValueNotFoundException(self.path() + " : " + name) 246 | 247 | def find_key(self, path): 248 | """ 249 | Perform a search for a RegistryKey with a specific path. 250 | """ 251 | if len(path) == 0: 252 | return self 253 | 254 | (immediate, _, future) = path.partition("\\") 255 | return self.subkey(immediate).find_key(future) 256 | 257 | class Registry(object): 258 | """ 259 | A class for parsing and reading from a Windows Registry file. 260 | """ 261 | def __init__(self, filelikeobject): 262 | """ 263 | Constructor. 264 | Arguments: 265 | - `filelikeobject`: A file-like object with a .read() method. 266 | If a Python string is passed, it is interpreted as a filename, 267 | and the corresponding file is opened. 268 | """ 269 | try: 270 | self._buf = filelikeobject.read() 271 | except AttributeError: 272 | with open(filelikeobject, "rb") as f: 273 | self._buf = f.read() 274 | self._regf = RegistryParse.REGFBlock(self._buf, 0, False) 275 | 276 | def root(self): 277 | """ 278 | Return the first RegistryKey in the hive. 279 | """ 280 | return RegistryKey(self._regf.first_key()) 281 | 282 | def open(self, path): 283 | """ 284 | Return a RegistryKey by full path. 285 | Subkeys are separated by the backslash character ('\'). A trailing backslash may or may 286 | not be present. 287 | The hive name should not be included. 288 | """ 289 | # is the first registry key always the root? are there any other keys at this 290 | # level? is this the name of the hive? 291 | return RegistryKey(self._regf.first_key()).find_key(path) 292 | 293 | def print_all(key): 294 | if len(key.subkeys()) == 0: 295 | print key.path() 296 | else: 297 | for k in key.subkeys(): 298 | print_all(k) 299 | 300 | if __name__ == '__main__': 301 | r = Registry(sys.argv[1]) 302 | print_all(r.root()) 303 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /Registry/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /ShimCacheParser.py: -------------------------------------------------------------------------------- 1 | # ShimCacheParser.py 2 | # 3 | # Andrew Davis, andrew.davis@mandiant.com 4 | # Copyright 2012 Mandiant 5 | # 6 | # Mandiant licenses this file to you under the Apache License, Version 7 | # 2.0 (the "License"); you may not use this file except in compliance with the 8 | # License. You may obtain a copy of the License at: 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 15 | # implied. See the License for the specific language governing 16 | # permissions and limitations under the License. 17 | # 18 | # Identifies and parses Application Compatibility Shim Cache entries for forensic data. 19 | 20 | import sys 21 | import struct 22 | import zipfile 23 | import argparse 24 | import binascii 25 | import datetime 26 | import cStringIO as sio 27 | import xml.etree.cElementTree as et 28 | from os import path 29 | from csv import writer 30 | 31 | # Values used by Windows 5.2 and 6.0 (Server 2003 through Vista/Server 2008) 32 | CACHE_MAGIC_NT5_2 = 0xbadc0ffe 33 | CACHE_HEADER_SIZE_NT5_2 = 0x8 34 | NT5_2_ENTRY_SIZE32 = 0x18 35 | NT5_2_ENTRY_SIZE64 = 0x20 36 | 37 | # Values used by Windows 6.1 (Win7 and Server 2008 R2) 38 | CACHE_MAGIC_NT6_1 = 0xbadc0fee 39 | CACHE_HEADER_SIZE_NT6_1 = 0x80 40 | NT6_1_ENTRY_SIZE32 = 0x20 41 | NT6_1_ENTRY_SIZE64 = 0x30 42 | CSRSS_FLAG = 0x2 43 | 44 | # Values used by Windows 5.1 (WinXP 32-bit) 45 | WINXP_MAGIC32 = 0xdeadbeef 46 | WINXP_HEADER_SIZE32 = 0x190 47 | WINXP_ENTRY_SIZE32 = 0x228 48 | MAX_PATH = 520 49 | 50 | # Values used by Windows 8 51 | WIN8_STATS_SIZE = 0x80 52 | WIN8_MAGIC = '00ts' 53 | 54 | # Magic value used by Windows 8.1 55 | WIN81_MAGIC = '10ts' 56 | 57 | bad_entry_data = 'N/A' 58 | g_verbose = False 59 | output_header = ["Last Modified", "Last Update", "Path", "File Size", "Exec Flag"] 60 | 61 | # Shim Cache format used by Windows 5.2 and 6.0 (Server 2003 through Vista/Server 2008) 62 | class CacheEntryNt5(object): 63 | 64 | def __init__(self, is32bit, data=None): 65 | 66 | self.is32bit = is32bit 67 | if data != None: 68 | self.update(data) 69 | 70 | def update(self, data): 71 | 72 | if self.is32bit: 73 | entry = struct.unpack('<2H 3L 2L', data) 74 | else: 75 | entry = struct.unpack('<2H 4x Q 2L 2L', data) 76 | self.wLength = entry[0] 77 | self.wMaximumLength = entry[1] 78 | self.Offset = entry[2] 79 | self.dwLowDateTime = entry[3] 80 | self.dwHighDateTime = entry[4] 81 | self.dwFileSizeLow = entry[5] 82 | self.dwFileSizeHigh = entry[6] 83 | 84 | def size(self): 85 | 86 | if self.is32bit: 87 | return NT5_2_ENTRY_SIZE32 88 | else: 89 | return NT5_2_ENTRY_SIZE64 90 | 91 | # Shim Cache format used by Windows 6.1 (Win7 through Server 2008 R2) 92 | class CacheEntryNt6(object): 93 | 94 | def __init__(self, is32bit, data=None): 95 | 96 | self.is32bit = is32bit 97 | if data != None: 98 | self.update(data) 99 | 100 | def update(self, data): 101 | 102 | if self.is32bit: 103 | entry = struct.unpack('<2H 7L', data) 104 | else: 105 | entry = struct.unpack('<2H 4x Q 4L 2Q', data) 106 | self.wLength = entry[0] 107 | self.wMaximumLength = entry[1] 108 | self.Offset = entry[2] 109 | self.dwLowDateTime = entry[3] 110 | self.dwHighDateTime = entry[4] 111 | self.FileFlags = entry[5] 112 | self.Flags = entry[6] 113 | self.BlobSize = entry[7] 114 | self.BlobOffset = entry[8] 115 | 116 | def size(self): 117 | 118 | if self.is32bit: 119 | return NT6_1_ENTRY_SIZE32 120 | else: 121 | return NT6_1_ENTRY_SIZE64 122 | 123 | # Convert FILETIME to datetime. 124 | # Based on http://code.activestate.com/recipes/511425-filetime-to-datetime/ 125 | def convert_filetime(dwLowDateTime, dwHighDateTime): 126 | 127 | try: 128 | date = datetime.datetime(1601, 1, 1, 0, 0, 0) 129 | temp_time = dwHighDateTime 130 | temp_time <<= 32 131 | temp_time |= dwLowDateTime 132 | return date + datetime.timedelta(microseconds=temp_time/10) 133 | except OverflowError, err: 134 | return None 135 | 136 | # Return a unique list while preserving ordering. 137 | def unique_list(li): 138 | 139 | ret_list = [] 140 | for entry in li: 141 | if entry not in ret_list: 142 | ret_list.append(entry) 143 | return ret_list 144 | 145 | # Write the Log. 146 | def write_it(rows, outfile=None): 147 | 148 | try: 149 | 150 | if not rows: 151 | print "[-] No data to write..." 152 | return 153 | 154 | if not outfile: 155 | for row in rows: 156 | print " ".join(["%s"%x for x in row]) 157 | else: 158 | print "[+] Writing output to %s..."%outfile 159 | try: 160 | csv_writer = writer(file(outfile, 'wb'), delimiter=',') 161 | csv_writer.writerows(rows) 162 | except IOError, err: 163 | print "[-] Error writing output file: %s" % str(err) 164 | return 165 | 166 | except UnicodeEncodeError, err: 167 | print "[-] Error writing output file: %s" % str(err) 168 | return 169 | 170 | # Read the Shim Cache format, return a list of last modified dates/paths. 171 | def read_cache(cachebin, quiet=False): 172 | 173 | if len(cachebin) < 16: 174 | # Data size less than minimum header size. 175 | return None 176 | 177 | try: 178 | # Get the format type 179 | magic = struct.unpack(" WIN8_STATS_SIZE and cachebin[WIN8_STATS_SIZE:WIN8_STATS_SIZE+4] == WIN8_MAGIC: 237 | if not quiet: 238 | print "[+] Found Windows 8/2k12 Apphelp Cache data..." 239 | return read_win8_entries(cachebin, WIN8_MAGIC) 240 | 241 | # Windows 8.1 will use a different magic dword, check for it 242 | elif len(cachebin) > WIN8_STATS_SIZE and cachebin[WIN8_STATS_SIZE:WIN8_STATS_SIZE+4] == WIN81_MAGIC: 243 | if not quiet: 244 | print "[+] Found Windows 8.1 Apphelp Cache data..." 245 | return read_win8_entries(cachebin, WIN81_MAGIC) 246 | 247 | else: 248 | print "[-] Got an unrecognized magic value of 0x%x... bailing" % magic 249 | return None 250 | 251 | except (RuntimeError, TypeError, NameError), err: 252 | print "[-] Error reading Shim Cache data: %s" % err 253 | return None 254 | 255 | # Read Windows 8/2k12/8.1 Apphelp Cache entry formats. 256 | def read_win8_entries(bin_data, ver_magic): 257 | offset = 0 258 | entry_meta_len = 12 259 | entry_list = [] 260 | 261 | # Skip past the stats in the header 262 | cache_data = bin_data[WIN8_STATS_SIZE:] 263 | 264 | data = sio.StringIO(cache_data) 265 | while data.tell() < len(cache_data): 266 | header = data.read(entry_meta_len) 267 | # Read in the entry metadata 268 | # Note: the crc32 hash is of the cache entry data 269 | magic, crc32_hash, entry_len = struct.unpack('<4sLL', header) 270 | 271 | # Check the magic tag 272 | if magic != ver_magic: 273 | raise Exception("Invalid version magic tag found: 0x%x" % struct.unpack(" 0: 287 | # Just skip past the package data if present (for now) 288 | entry_data.seek(package_len, 1) 289 | 290 | # Read the remaining entry data 291 | flags, unk_1, low_datetime, high_datetime, unk_2 = struct.unpack(' 3: 332 | contains_file_size = True 333 | break 334 | 335 | # Now grab all the data in the value. 336 | for offset in xrange(CACHE_HEADER_SIZE_NT5_2, (num_entries * entry_size), 337 | entry_size): 338 | 339 | entry.update(bin_data[offset:offset+entry_size]) 340 | 341 | last_mod_date = convert_filetime(entry.dwLowDateTime, entry.dwHighDateTime) 342 | try: 343 | last_mod_date = last_mod_date.strftime("%m/%d/%y %H:%M:%S") 344 | except ValueError: 345 | last_mod_date = bad_entry_data 346 | path = bin_data[entry.Offset:entry.Offset + entry.wLength].decode('utf-16le', 'replace').encode('utf-8') 347 | path = path.replace("\\??\\", "") 348 | 349 | # It contains file size data. 350 | if contains_file_size: 351 | hit = [last_mod_date, 'N/A', path, str(entry.dwFileSizeLow), 'N/A'] 352 | if hit not in entry_list: 353 | entry_list.append(hit) 354 | 355 | # It contains flags. 356 | else: 357 | # Check the flag set in CSRSS 358 | if (entry.dwFileSizeLow & CSRSS_FLAG): 359 | exec_flag = 'True' 360 | else: 361 | exec_flag = 'False' 362 | 363 | hit = [last_mod_date, 'N/A', path, 'N/A', exec_flag] 364 | if hit not in entry_list: 365 | entry_list.append(hit) 366 | 367 | return entry_list 368 | 369 | except (RuntimeError, ValueError, NameError), err: 370 | print "[-] Error reading Shim Cache data: %s..." % err 371 | return None 372 | 373 | # Read the Shim Cache Windows 7/2k8-R2 entry format, 374 | # return a list of last modifed dates/paths. 375 | def read_nt6_entries(bin_data, entry): 376 | 377 | try: 378 | entry_list = [] 379 | exec_flag = "" 380 | entry_size = entry.size() 381 | num_entries = struct.unpack(' 0: 732 | filename = filename.pop() 733 | else: 734 | continue 735 | # Get the hostname from the MIR xml filename. 736 | hostname = '-'.join(filename.split('-')[:-3]) 737 | xml_file = archive.open(item) 738 | 739 | # Catch possibly corrupt MIR XML data. 740 | try: 741 | out_list = read_mir(xml_file, quiet=True) 742 | except(struct.error, et.ParseError), err: 743 | print "[-] Error reading XML data from host: %s, data looks corrupt. Continuing..." % hostname 744 | continue 745 | 746 | # Add the hostname to the entry list. 747 | if not out_list or len(out_list) == 0: 748 | continue 749 | else: 750 | for li in out_list: 751 | if "Last Modified" not in li[0]: 752 | li.insert(0, hostname) 753 | final_list.append(li) 754 | 755 | except IOError, err: 756 | print "[-] Error opening file: %s in MIR archive: %s" % (item, err) 757 | continue 758 | # Add the final header. 759 | final_list.insert(0, ("Hostname", "Last Modified", "Last Execution", 760 | "Path", "File Size", "File Executed", "Key Path")) 761 | return final_list 762 | 763 | except (IOError, zipfile.BadZipfile, struct.error), err: 764 | print "[-] Error reading zip archive: %s" % zip_name 765 | return None 766 | 767 | # Do the work. 768 | def main(): 769 | 770 | global g_verbose 771 | 772 | parser = argparse.ArgumentParser(description="Parses Application Compatibilty Shim Cache data") 773 | parser.add_argument("-v", "--verbose", action="store_true", 774 | help="Toggles verbose output") 775 | 776 | group = parser.add_argument_group() 777 | group.add_argument("-o", "--out", metavar="FILE", help="Writes to CSV data to FILE (default is STDOUT)") 778 | 779 | group = parser.add_mutually_exclusive_group() 780 | group.add_argument("-l", "--local", action="store_true", help="Reads data from local system") 781 | group.add_argument("-b", "--bin", metavar="BIN", help="Reads data from a binary BIN file") 782 | group.add_argument("-m", "--mir", metavar="XML", help="Reads data from a MIR XML file") 783 | group.add_argument("-z", "--zip", metavar="ZIP", help="Reads ZIP file containing MIR registry acquisitions") 784 | group.add_argument("-i", "--hive", metavar="HIVE", help="Reads data from a registry reg HIVE") 785 | group.add_argument("-r", "--reg", metavar="REG", help="Reads data from a .reg registry export file") 786 | 787 | args = parser.parse_args() 788 | 789 | if args.verbose: 790 | g_verbose = True 791 | 792 | # Pull Shim Cache MIR XML. 793 | if args.mir: 794 | print "[+] Reading MIR output XML file: %s..." % args.mir 795 | try: 796 | with file(args.mir, 'rb') as xml_data: 797 | entries = read_mir(xml_data) 798 | if not entries: 799 | print "[-] No Shim Cache entries found..." 800 | return 801 | else: 802 | write_it(entries, args.out) 803 | except IOError, err: 804 | print "[-] Error opening binary file: %s" % str(err) 805 | return 806 | 807 | # Process a MIR XML ZIP archive 808 | elif args.zip: 809 | print "[+] Reading MIR XML zip archive: %s..." % args.zip 810 | entries = read_zip(args.zip) 811 | if not entries: 812 | print "[-] No Shim Cache entries found..." 813 | else: 814 | write_it(entries, args.out) 815 | 816 | # Read the binary file. 817 | elif args.bin: 818 | print "[+] Reading binary file: %s..." % args.bin 819 | try: 820 | with file(args.bin, 'rb') as bin_data: 821 | bin_data = bin_data.read() 822 | except IOError, err: 823 | print "[-] Error opening binary file: %s" % str(err) 824 | return 825 | entries = read_cache(bin_data) 826 | if not entries: 827 | print "[-] No Shim Cache entries found..." 828 | else: 829 | write_it(entries, args.out) 830 | 831 | # Read the key data from a registry hive. 832 | elif args.reg: 833 | print "[+] Reading .reg file: %s..." % args.reg 834 | entries = read_from_reg(args.reg) 835 | if not entries: 836 | print "[-] No Shim Cache entries found..." 837 | else: 838 | write_it(entries, args.out) 839 | 840 | elif args.hive: 841 | print "[+] Reading registry hive: %s..." % args.hive 842 | try: 843 | entries = read_from_hive(args.hive) 844 | if not entries: 845 | print "[-] No Shim Cache entries found..." 846 | else: 847 | write_it(entries, args.out) 848 | except IOError, err: 849 | print "[-] Error opening hive file: %s" % str(err) 850 | return 851 | 852 | # Read the local Shim Cache data from the current system 853 | elif args.local: 854 | print "[+] Dumping Shim Cache data from the current system..." 855 | entries = get_local_data() 856 | if not entries: 857 | print "[-] No Shim Cache entries found..." 858 | else: 859 | write_it(entries, args.out) 860 | 861 | if __name__ == '__main__': 862 | main() -------------------------------------------------------------------------------- /Registry/RegistryParse.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | 3 | # This file is part of python-registry. 4 | # 5 | # Copyright 2011 Will Ballenthin 6 | # while at Mandiant 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | 20 | import struct 21 | from datetime import datetime 22 | 23 | # Constants 24 | RegSZ = 0x0001 25 | RegExpandSZ = 0x0002 26 | RegBin = 0x0003 27 | RegDWord = 0x0004 28 | RegMultiSZ = 0x0007 29 | RegQWord = 0x000B 30 | RegNone = 0x0000 31 | RegBigEndian = 0x0005 32 | RegLink = 0x0006 33 | RegResourceList = 0x0008 34 | RegFullResourceDescriptor = 0x0009 35 | RegResourceRequirementsList = 0x000A 36 | 37 | _global_warning_messages = [] 38 | def warn(msg): 39 | if msg not in _global_warning_messages: 40 | _global_warning_messages.append(msg) 41 | print "Warning: %s" % (msg) 42 | 43 | def parse_windows_timestamp(qword): 44 | # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/ 45 | return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600 ) 46 | 47 | class RegistryException(Exception): 48 | """ 49 | Base Exception class for Windows Registry access. 50 | """ 51 | 52 | def __init__(self, value): 53 | """ 54 | Constructor. 55 | Arguments: 56 | - `value`: A string description. 57 | """ 58 | super(RegistryException, self).__init__() 59 | self._value = value 60 | 61 | def __str__(self): 62 | return "Registry Exception: %s" % (self._value) 63 | 64 | class RegistryStructureDoesNotExist(RegistryException): 65 | """ 66 | Exception to be raised when a structure or block is requested which does not exist. 67 | For example, asking for the ValuesList structure of an NKRecord that has no values 68 | (and therefore no ValuesList) should result in this exception. 69 | """ 70 | def __init__(self, value): 71 | """ 72 | Constructor. 73 | Arguments: 74 | - `value`: A string description. 75 | """ 76 | super(RegistryStructureDoesNotExist, self).__init__(value) 77 | 78 | def __str__(self): 79 | return "Registry Structure Does Not Exist Exception: %s" % (self._value) 80 | 81 | class ParseException(RegistryException): 82 | """ 83 | An exception to be thrown during Windows Registry parsing, such as 84 | when an invalid header is encountered. 85 | """ 86 | def __init__(self, value): 87 | """ 88 | Constructor. 89 | Arguments: 90 | - `value`: A string description. 91 | """ 92 | super(ParseException, self).__init__(value) 93 | 94 | def __str__(self): 95 | return "Registry Parse Exception(%s)" % (self._value) 96 | 97 | class UnknownTypeException(RegistryException): 98 | """ 99 | An exception to be raised when an unknown data type is encountered. 100 | Supported data types current consist of 101 | - RegSZ 102 | - RegExpandSZ 103 | - RegBin 104 | - RegDWord 105 | - RegMultiSZ 106 | - RegQWord 107 | - RegNone 108 | - RegBigEndian 109 | - RegLink 110 | - RegResourceList 111 | - RegFullResourceDescriptor 112 | - RegResourceRequirementsList 113 | """ 114 | def __init__(self, value): 115 | """ 116 | Constructor. 117 | Arguments: 118 | - `value`: A string description. 119 | """ 120 | super(UnknownTypeException, self).__init__(value) 121 | 122 | def __str__(self): 123 | return "Unknown Type Exception(%s)" % (self._value) 124 | 125 | class RegistryBlock(object): 126 | """ 127 | Base class for structure blocks in the Windows Registry. 128 | A block is associated with a offset into a byte-string. 129 | 130 | All blocks (besides the root) also have a parent member, which refers to 131 | a RegistryBlock that contains a reference to this block, an is found at a 132 | hierarchically superior rank. Note, by following the parent links upwards, 133 | the root block should be accessible (aka. there should not be any loops) 134 | """ 135 | def __init__(self, buf, offset, parent): 136 | """ 137 | Constructor. 138 | Arguments: 139 | - `buf`: Byte string containing Windows Registry file. 140 | - `offset`: The offset into the buffer at which the block starts. 141 | - `parent`: The parent block, which links to this block. 142 | """ 143 | self._buf = buf 144 | self._offset = offset 145 | self._parent = parent 146 | 147 | def unpack_word(self, offset): 148 | """ 149 | Returns a little-endian WORD (2 bytes) from the relative offset. 150 | Arguments: 151 | - `offset`: The relative offset from the start of the block. 152 | """ 153 | return struct.unpack_from(" 0 306 | 307 | def size(self): 308 | """ 309 | Size of this cell, as an unsigned integer. 310 | """ 311 | if self.is_free(): 312 | return self._size 313 | else: 314 | return self._size * -1 315 | 316 | def next(self): 317 | """ 318 | Returns the next HBINCell, which is located immediately after this. 319 | Note: This will always return an HBINCell starting at the next location 320 | whether or not the buffer is large enough. The calling function should 321 | check the offset of the next HBINCell to ensure it does not overrun the 322 | HBIN buffer. 323 | """ 324 | try: 325 | return HBINCell(self._buf, self._offset + self.size(), self.parent()) 326 | except: 327 | raise RegistryStructureDoesNotExist("HBINCell does not exist at 0x%x" % (self._offset + self.size())) 328 | 329 | def offset(self): 330 | """ 331 | Accessor for absolute offset of this HBINCell. 332 | """ 333 | return self._offset 334 | 335 | def data_offset(self): 336 | """ 337 | Get the absolute offset of the data block of this HBINCell. 338 | """ 339 | return self._offset + 0x4 340 | 341 | def raw_data(self): 342 | """ 343 | Get the raw data from the buffer contained by this HBINCell. 344 | """ 345 | return self._buf[self.data_offset():self.data_offset() + self.size()] 346 | 347 | def data_id(self): 348 | """ 349 | Get the ID string of the data block of this HBINCell. 350 | """ 351 | return self.unpack_string(0x4, 2) 352 | 353 | def abs_offset_from_hbin_offset(self, offset): 354 | """ 355 | Offsets contained in HBIN cells are relative to the beginning of the first HBIN. 356 | This converts the relative offset into an absolute offset. 357 | """ 358 | h = self.parent() 359 | while h.__class__.__name__ != "HBINBlock": 360 | h = h.parent() 361 | 362 | return h.first_hbin().offset() + offset 363 | 364 | def child(self): 365 | """ 366 | Make a _guess_ as to the contents of this structure and 367 | return an instance of that class, or just a DataRecord 368 | otherwise. 369 | """ 370 | if self.is_free(): 371 | raise RegistryStructureDoesNotExist("HBINCell is free at 0x%x" % (self.offset())) 372 | 373 | id_ = self.data_id() 374 | 375 | if id_ == "vk": 376 | return VKRecord(self._buf, self.data_offset(), self) 377 | elif id_ == "nk": 378 | return NKRecord(self._buf, self.data_offset(), self) 379 | elif id_ == "lf": 380 | return LFRecord(self._buf, self.data_offset(), self) 381 | elif id_ == "lh": 382 | return LHRecord(self._buf, self.data_offset(), self) 383 | elif id_ == "li": 384 | return LIRecord(self._buf, self.data_offset(), self) 385 | elif id_ == "ri": 386 | return RIRecord(self._buf, self.data_offset(), self) 387 | elif id_ == "sk": 388 | return SKRecord(self._buf, self.data_offset(), self) 389 | elif id_ == "db": 390 | return DBRecord(self._buf, self.data_offset(), self) 391 | else: 392 | return DataRecord(self._buf, self.data_offset(), self) 393 | 394 | class Record(RegistryBlock): 395 | """ 396 | Abstract class for Records contained by cells in HBINs 397 | """ 398 | def __init__(self, buf, offset, parent): 399 | """ 400 | Constructor. 401 | Arguments: 402 | - `buf`: Byte string containing Windows Registry file. 403 | - `offset`: The offset into the buffer at which the block starts. 404 | - `parent`: The parent block, which links to this block. This SHOULD be an HBINCell. 405 | """ 406 | super(Record, self).__init__(buf, offset, parent) 407 | 408 | def abs_offset_from_hbin_offset(self, offset): 409 | # TODO This violates DRY as this is a redefinition, see HBINCell.abs_offset_from_hbin_offset() 410 | """ 411 | Offsets contained in HBIN cells are relative to the beginning of the first HBIN. 412 | This converts the relative offset into an absolute offset. 413 | """ 414 | h = self.parent() 415 | while h.__class__.__name__ != "HBINBlock": 416 | h = h.parent() 417 | 418 | return h.first_hbin().offset() + offset 419 | 420 | class DataRecord(Record): 421 | """ 422 | A DataRecord is a HBINCell that does not contain any further structural data, but 423 | may contain, for example, the values pointed to by a VKRecord. 424 | """ 425 | def __init__(self, buf, offset, parent): 426 | """ 427 | Constructor. 428 | 429 | Arguments: 430 | - `buf`: Byte string containing Windows Registry file. 431 | - `offset`: The offset into the buffer at which the block starts. 432 | - `parent`: The parent block, which links to this block. This should be an HBINCell. 433 | """ 434 | super(DataRecord, self).__init__(buf, offset, parent) 435 | 436 | def __str__(self): 437 | return "Data Record at 0x%x" % (self.offset()) 438 | 439 | class DBIndirectBlock(Record): 440 | """ 441 | The DBIndirect block is a list of offsets to DataRecords with data 442 | size up to 0x3fd8. 443 | """ 444 | def __init__(self, buf, offset, parent): 445 | """ 446 | Constructor. 447 | Arguments: 448 | - `buf`: Byte string containing Windows Registry file. 449 | - `offset`: The offset into the buffer at which the block starts. 450 | - `parent`: The parent block, which links to this block. This should be an HBINCell. 451 | """ 452 | super(DBIndirectBlock, self).__init__(buf, offset, parent) 453 | 454 | def __str__(self): 455 | return "Large Data Block at 0x%x" % (self.offset()) 456 | 457 | def large_data(self, length): 458 | """ 459 | Get the data pointed to by the indirect block. It may be large. 460 | Return a byte array. 461 | """ 462 | b = bytearray() 463 | count = 0 464 | while length > 0: 465 | off = self.abs_offset_from_hbin_offset(self.unpack_dword(4 * count)) 466 | size = min(0x3fd8, length) 467 | b += HBINCell(self._buf, off, self).raw_data()[0:size] 468 | 469 | count += 1 470 | length -= size 471 | return b 472 | 473 | class DBRecord(Record): 474 | """ 475 | A DBRecord is a large data block, which is not thoroughly documented. 476 | Its similar to an inode in the Ext file systems. 477 | """ 478 | def __init__(self, buf, offset, parent): 479 | """ 480 | Constructor. 481 | Arguments: 482 | - `buf`: Byte string containing Windows Registry file. 483 | - `offset`: The offset into the buffer at which the block starts. 484 | - `parent`: The parent block, which links to this block. This should be an HBINCell. 485 | """ 486 | super(DBRecord, self).__init__(buf, offset, parent) 487 | 488 | _id = self.unpack_string(0x0, 2) 489 | if _id != "db": 490 | raise ParseException("Invalid DB Record ID") 491 | 492 | def __str__(self): 493 | return "Large Data Block at 0x%x" % (self.offset()) 494 | 495 | def large_data(self, length): 496 | """ 497 | Get the data described by the DBRecord. It may be large. 498 | Return a byte array. 499 | """ 500 | off = self.abs_offset_from_hbin_offset(self.unpack_dword(0x4)) 501 | cell = HBINCell(self._buf, off, self) 502 | dbi = DBIndirectBlock(self._buf, cell.data_offset(), cell) 503 | return dbi.large_data(length) 504 | 505 | class VKRecord(Record): 506 | """ 507 | The VKRecord holds one name-value pair. The data may be one many types, 508 | including strings, integers, and binary data. 509 | """ 510 | def __init__(self, buf, offset, parent): 511 | """ 512 | Constructor. 513 | Arguments: 514 | - `buf`: Byte string containing Windows Registry file. 515 | - `offset`: The offset into the buffer at which the block starts. 516 | - `parent`: The parent block, which links to this block. 517 | This should be an HBINCell. 518 | """ 519 | super(VKRecord, self).__init__(buf, offset, parent) 520 | 521 | _id = self.unpack_string(0x0, 2) 522 | if _id != "vk": 523 | raise ParseException("Invalid VK Record ID") 524 | 525 | def data_type_str(self): 526 | """ 527 | Get the value data's type as a string 528 | """ 529 | data_type = self.data_type() 530 | if data_type == RegSZ: 531 | return "RegSZ" 532 | elif data_type == RegExpandSZ: 533 | return "RegExpandSZ" 534 | elif data_type == RegBin: 535 | return "RegBin" 536 | elif data_type == RegDWord: 537 | return "RegDWord" 538 | elif data_type == RegMultiSZ: 539 | return "RegMultiSZ" 540 | elif data_type == RegQWord: 541 | return "RegQWord" 542 | elif data_type == RegNone: 543 | return "RegNone" 544 | elif data_type == RegBigEndian: 545 | return "RegBigEndian" 546 | elif data_type == RegLink: 547 | return "RegLink" 548 | elif data_type == RegResourceList: 549 | return "RegResourceList" 550 | elif data_type == RegFullResourceDescriptor: 551 | return "RegFullResourceDescriptor" 552 | elif data_type == RegResourceRequirementsList: 553 | return "RegResourceRequirementsList" 554 | else: 555 | raise UnknownTypeException("Unknown VK Record type 0x%x at 0x%x" % (data_type, self.offset())) 556 | 557 | def __str__(self): 558 | if self.has_name(): 559 | name = self.name() 560 | else: 561 | name = "(default)" 562 | 563 | data = "" 564 | data_type = self.data_type() 565 | if data_type == RegSZ or data_type == RegExpandSZ: 566 | data = self.data()[0:16] + "..." 567 | elif data_type == RegMultiSZ: 568 | data = str(len(self.data())) + " strings" 569 | elif data_type == RegDWord or data_type == RegQWord: 570 | data = str(hex(self.data())) 571 | elif data_type == RegNone: 572 | data = "(none)" 573 | elif data_type == RegBin: 574 | data = "(binary)" 575 | else: 576 | data = "(unsupported)" 577 | 578 | return "VKRecord(Name: %s, Type: %s, Data: %s) at 0x%x" % (name, 579 | self.data_type_str(), 580 | data, 581 | self.offset()) 582 | 583 | def has_name(self): 584 | """ 585 | Has a name? or perhaps we should use '(default)' 586 | """ 587 | return self.unpack_word(0x2) != 0 588 | 589 | def has_ascii_name(self): 590 | """ 591 | Is the name of this value in the ASCII charset? 592 | Note, this doesnt work, yet... TODO 593 | """ 594 | if self.unpack_word(0x10) & 1 == 1: 595 | print "ascii name" 596 | else: 597 | print "not ascii name" 598 | return self.unpack_word(0x10) & 1 == 1 599 | 600 | def name(self): 601 | """ 602 | Get the name, if it exists. If not, the empty string is returned. 603 | """ 604 | if not self.has_name(): 605 | return "" 606 | else: 607 | name_length = self.unpack_word(0x2) 608 | return self.unpack_string(0x14, name_length) 609 | 610 | def data_type(self): 611 | """ 612 | Get the data type of this value data as an unsigned integer. 613 | """ 614 | return self.unpack_dword(0xC) 615 | 616 | def data_length(self): 617 | """ 618 | Get the length of this value data. 619 | """ 620 | return self.unpack_dword(0x4) 621 | 622 | def data_offset(self): 623 | """ 624 | Get the offset to the raw data associated with this value. 625 | """ 626 | if self.data_length() < 5 or self.data_length() >= 0x80000000: 627 | return self.absolute_offset(0x8) 628 | else: 629 | return self.abs_offset_from_hbin_offset(self.unpack_dword(0x8)) 630 | 631 | def data(self): 632 | """ 633 | Get the data. This method will return various types based on the data type. 634 | 635 | RegSZ: 636 | Return a string containing the data, doing the best we can to convert it 637 | to ASCII or UNICODE. 638 | RegExpandSZ: 639 | Return a string containing the data, doing the best we can to convert it 640 | to ASCII or UNICODE. The special variables are not expanded. 641 | RegMultiSZ: 642 | Return a list of strings. 643 | RegNone: 644 | See RegBin 645 | RegDword: 646 | Return an unsigned integer containing the data. 647 | RegQword: 648 | Return an unsigned integer containing the data. 649 | RegBin: 650 | Return a sequence of bytes containing the binary data. 651 | RegBigEndian: 652 | Not currently supported. TODO. 653 | RegLink: 654 | Not currently supported. TODO. 655 | RegResourceList: 656 | Not currently supported. TODO. 657 | RegFullResourceDescriptor: 658 | Not currently supported. TODO. 659 | RegResourceRequirementsList: 660 | Not currently supported. TODO. 661 | """ 662 | data_type = self.data_type() 663 | data_length = self.data_length() 664 | data_offset = self.data_offset() 665 | 666 | if data_type == RegSZ or data_type == RegExpandSZ: 667 | if data_length >= 0x80000000: 668 | # data is contained in the data_offset field 669 | s = struct.unpack_from("<%ds" % (4), self._buf, data_offset)[0] 670 | elif 0x3fd8 < data_length < 0x80000000: 671 | d = HBINCell(self._buf, data_offset, self) 672 | if d.data_id() == "db": 673 | # this should always be the case 674 | # but empirical testing does not confirm this 675 | s = d.child().large_data(data_length) 676 | else: 677 | s = d.raw_data()[:data_length] 678 | else: 679 | d = HBINCell(self._buf, data_offset, self) 680 | s = struct.unpack_from("<%ds" % (data_length), self._buf, d.data_offset())[0] 681 | 682 | try: 683 | s = s.decode("utf16").encode("utf8").decode("utf8") # iron out the kinks by 684 | except UnicodeDecodeError: # converting to and back to a Python str 685 | try: 686 | s = s.decode("utf8").encode("utf8").decode("utf8") 687 | except UnicodeDecodeError: 688 | try: 689 | s = s.decode("utf8", "replace").encode("utf8").decode("utf8") 690 | except: 691 | print "Well at this point you are screwed." 692 | raise 693 | s = s.partition('\x00')[0] 694 | return s 695 | elif data_type == RegBin or data_type == RegNone: 696 | if data_length >= 0x80000000: 697 | data_length -= 0x80000000 698 | return self._buf[data_offset:data_offset + data_length] 699 | elif 0x3fd8 < data_length < 0x80000000: 700 | d = HBINCell(self._buf, data_offset, self) 701 | if d.data_id() == "db": 702 | # this should always be the case 703 | # but empirical testing does not confirm this 704 | return d.child().large_data(data_length) 705 | else: 706 | return d.raw_data()[:data_length] 707 | return self._buf[data_offset + 4:data_offset + 4 + data_length] 708 | elif data_type == RegDWord: 709 | return self.unpack_dword(0x8) 710 | elif data_type == RegMultiSZ: 711 | if data_length >= 0x80000000: 712 | # this means data_length < 5, so it must be 4, and 713 | # be composed of completely \x00, so the strings are empty 714 | return [] 715 | elif 0x3fd8 < data_length < 0x80000000: 716 | d = HBINCell(self._buf, data_offset, self) 717 | if d.data_id() == "db": 718 | s = d.child().large_data(data_length) 719 | else: 720 | s = d.raw_data()[:data_length] 721 | else: 722 | s = self._buf[data_offset + 4:data_offset + 4 + data_length] 723 | s = s.decode("utf16") 724 | return s.split("\x00") 725 | elif data_type == RegQWord: 726 | d = HBINCell(self._buf, data_offset, self) 727 | return struct.unpack_from("