├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── main.py └── recuperabit ├── __init__.py ├── fs ├── __init__.py ├── constants.py ├── core_types.py ├── ntfs.py └── ntfs_fmt.py ├── logic.py └── utils.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: thelazza 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Code ### 2 | .vscode/* 3 | !.vscode/tasks.json 4 | !.vscode/launch.json 5 | *.code-workspace 6 | 7 | ### Python ### 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | pip-wheel-metadata/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | pytestdebug.log 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | doc/_build/ 82 | 83 | # PyBuilder 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | .python-version 95 | 96 | # pipenv 97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 100 | # install all needed dependencies. 101 | #Pipfile.lock 102 | 103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 104 | __pypackages__/ 105 | 106 | # Celery stuff 107 | celerybeat-schedule 108 | celerybeat.pid 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | pythonenv* 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # profiling data 145 | .prof 146 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # ![RecuperaBit](http://i.imgur.com/Q6mM385.jpg) 4 | 5 |

GPL-3.0 license 6 | Latest release 7 | Stars counter 8 | Stadium badge 9 | Donate on Ko-fi

10 | 11 | A software which attempts to reconstruct file system structures and recover 12 | files. Currently it supports only NTFS. 13 | 14 | RecuperaBit attempts reconstruction of the directory structure regardless of: 15 | 16 | - missing partition table 17 | - unknown partition boundaries 18 | - partially-overwritten metadata 19 | - quick format 20 | 21 | You can get more information about **the reconstruction algorithms** and the 22 | architecture used in RecuperaBit by reading 23 | [my MSc thesis](https://www.scribd.com/doc/309337813/) or checking out [the 24 | slides](http://www.slideshare.net/TheLazza/recuperabit-forensic-file-system-reconstruction-given-partially-corrupted-metadata). 25 | 26 | ## Usage 27 | 28 | usage: main.py [-h] [-s SAVEFILE] [-w] [-o OUTPUTDIR] path 29 | 30 | Reconstruct the directory structure of possibly damaged filesystems. 31 | 32 | positional arguments: 33 | path path to the disk image 34 | 35 | optional arguments: 36 | -h, --help show this help message and exit 37 | -s SAVEFILE, --savefile SAVEFILE 38 | path of the scan save file 39 | -w, --overwrite force overwrite of the save file 40 | -o OUTPUTDIR, --outputdir OUTPUTDIR 41 | directory for restored contents and output files 42 | 43 | The main argument is the `path` to a bitstream image of a disk or partition. 44 | RecuperaBit automatically determines the sectors from which partitions start. 45 | 46 | RecuperaBit does not modify the disk image, however it does read some parts of 47 | it multiple times through the execution. It should also work on real devices, 48 | such as `/dev/sda` but **this is not advised** for damaged drives. RecuperaBit 49 | might worsen the situation by "stressing" a damaged drive or it could crash due 50 | to an I/O error. 51 | 52 | Optionally, a save file can be specified with `-s`. The first time, after the 53 | scanning process, results are saved in the file. After the first run, the file 54 | is read to only analyze interesting sectors and speed up the loading phase. 55 | 56 | Overwriting the save file can be forced with `-w`. 57 | 58 | RecuperaBit includes a small command line that allows the user to recover files 59 | and export the contents of a partition in CSV or 60 | [body file](http://wiki.sleuthkit.org/index.php?title=Body_file) format. These 61 | are exported in the directory specified by `-o` (or `recuperabit_output`). 62 | 63 | ### Limitation 64 | 65 | Currently RecuperaBit does not work with compressed files on an NTFS filesystem. 66 | If you have deep knowledge of the inner workings of file compression on NTFS 67 | filesystem, your help would be much appreciated, as available documentation is 68 | quite sparse on the topic. 69 | 70 | ### Pypy 71 | 72 | RecuperaBit can be run with the standard cPython implementation, however speed 73 | can be increased by using it with the Pypy interpreter and JIT compiler: 74 | 75 | pypy3 main.py /path/to/disk.img 76 | 77 | ### Recovery of File Contents 78 | 79 | Files can be restored one at a time or recursively, starting from a directory. 80 | After the scanning process has completed, you can check the list of partitions 81 | that can be recovered by issuing the following command at the prompt: 82 | 83 | recoverable 84 | 85 | Each line shows information about a partition. Let's consider the following 86 | output example: 87 | 88 | Partition #0 -> Partition (NTFS, 15.00 MB, 11 files, Recoverable, Offset: 2048, Offset (b): 1048576, Sec/Clus: 8, MFT offset: 2080, MFT mirror offset: 17400) 89 | 90 | If you want to recover files starting from a specific directory, you can either 91 | print the tree on screen with the `tree` command (very verbose for large drives) 92 | or you can export a CSV list of files (see `help` for details). 93 | 94 | If you rather want to extract all files from the *Root* and the *Lost Files* 95 | nodes, you need to know the identifier for the root directory, depending on 96 | the file system type. The following are those of file systems supported by 97 | RecuperaBit: 98 | 99 | | File System Type | Root Id | 100 | |------------------|---------| 101 | | NTFS | 5 | 102 | 103 | The id for *Lost Files* is -1 **for every file system.** 104 | 105 | Therefore, to restore `Partition #0` in our example, you need to run: 106 | 107 | restore 0 5 108 | restore 0 -1 109 | 110 | The files will be saved inside the output directory specified by `-o`. 111 | 112 | ## License 113 | 114 | This software is released under the GNU GPLv3. See `LICENSE` for more details. 115 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lazza/RecuperaBit/e05079ef0f40a1198c7633fce9d1b9eaef9c5679/__init__.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Main RecuperaBit process.""" 3 | 4 | # RecuperaBit 5 | # Copyright 2014-2021 Andrea Lazzarotto 6 | # 7 | # This file is part of RecuperaBit. 8 | # 9 | # RecuperaBit is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # RecuperaBit is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with RecuperaBit. If not, see . 21 | 22 | 23 | import argparse 24 | import codecs 25 | import itertools 26 | import locale 27 | import logging 28 | import os.path 29 | import pickle 30 | import sys 31 | try: 32 | import readline 33 | readline # ignore unused import warning 34 | except ImportError: 35 | pass 36 | 37 | from recuperabit import logic, utils 38 | # scanners 39 | from recuperabit.fs.ntfs import NTFSScanner 40 | 41 | from typing import TYPE_CHECKING 42 | if TYPE_CHECKING: 43 | from recuperabit.fs.core_types import Partition 44 | 45 | __author__ = "Andrea Lazzarotto" 46 | __copyright__ = "(c) 2014-2021, Andrea Lazzarotto" 47 | __license__ = "GPLv3" 48 | __version__ = "1.1.6" 49 | __maintainer__ = "Andrea Lazzarotto" 50 | __email__ = "andrea.lazzarotto@gmail.com" 51 | 52 | 53 | # classes of available scanners 54 | plugins = ( 55 | NTFSScanner, 56 | ) 57 | 58 | commands = ( 59 | ('help', 'Print this help message'), 60 | ('recoverable', 'List recoverable partitions'), 61 | ('recoverable_size ', 'List recoverable partitions based on the minimum '), 62 | ('other', 'List unrecoverable partitions'), 63 | ('allparts', 'List all partitions'), 64 | ('tree ', 'Show contents of partition (tree)'), 65 | ('csv ', 'Save a CSV representation in a file'), 66 | ('bodyfile ', 'Save a body file representation in a file'), 67 | ('tikzplot []', 'Produce LaTeX code to draw a Tikz figure'), 68 | ('restore ', 'Recursively restore files from '), 69 | ('locate ', 'Print all file paths that match a string'), 70 | ('traceback ', 'Print ids and paths for all ancestors of '), 71 | ('merge ', 'Merge the two partitions into the first one'), 72 | ('quit', 'Close the program') 73 | ) 74 | 75 | rebuilt = set() 76 | 77 | 78 | def list_parts(parts, shorthands, test): 79 | """List partitions corresponding to test.""" 80 | for i, part in shorthands: 81 | if test(parts[part]): 82 | print('Partition #' + str(i), '->', parts[part]) 83 | 84 | 85 | def check_valid_part(num, parts, shorthands, rebuild=True): 86 | """Check if the required partition is valid.""" 87 | try: 88 | i = int(num) 89 | except ValueError: 90 | print('Value is not valid!') 91 | return None 92 | if i in range(len(shorthands)): 93 | i, par = shorthands[i] 94 | part = parts[par] 95 | if rebuild and par not in rebuilt: 96 | print('Rebuilding partition...') 97 | part.rebuild() 98 | rebuilt.add(par) 99 | print('Done') 100 | return part 101 | print('No partition with given ID!') 102 | return None 103 | 104 | 105 | def interpret(cmd, arguments, parts: dict[int, 'Partition'], shorthands, outdir): 106 | """Perform command required by user.""" 107 | if cmd == 'help': 108 | print('Available commands:') 109 | for name, desc in commands: 110 | print(' %s%s' % (name.ljust(28), desc)) 111 | elif cmd == 'tree': 112 | if len(arguments) != 1: 113 | print('Wrong number of parameters!') 114 | else: 115 | part = check_valid_part(arguments[0], parts, shorthands) 116 | if part is not None: 117 | print('-'*10) 118 | print(utils.tree_folder(part.root)) 119 | print(utils.tree_folder(part.lost)) 120 | print('-'*10) 121 | elif cmd == 'bodyfile': 122 | if len(arguments) != 2: 123 | print('Wrong number of parameters!') 124 | else: 125 | part = check_valid_part(arguments[0], parts, shorthands) 126 | if part is not None: 127 | contents = [ 128 | '# ---' + repr(part) + '---', 129 | '# Full paths' 130 | ] + utils.bodyfile_folder(part.root) + [ 131 | '# \n# Orphaned files' 132 | ] + utils.bodyfile_folder(part.lost) 133 | fname = os.path.join(outdir, arguments[1]) 134 | try: 135 | with codecs.open(fname, 'w', encoding='utf8') as outfile: 136 | outfile.write('\n'.join(contents)) 137 | print('Saved body file to %s' % fname) 138 | except IOError: 139 | print('Cannot open file %s for output!' % fname) 140 | elif cmd == 'csv': 141 | if len(arguments) != 2: 142 | print('Wrong number of parameters!') 143 | else: 144 | part = check_valid_part(arguments[0], parts, shorthands) 145 | if part is not None: 146 | contents = utils.csv_part(part) 147 | fname = os.path.join(outdir, arguments[1]) 148 | try: 149 | with codecs.open(fname, 'w', encoding='utf8') as outfile: 150 | outfile.write( 151 | '\n'.join(contents) 152 | ) 153 | print('Saved CSV file to %s' % fname) 154 | except IOError: 155 | print('Cannot open file %s for output!' % fname) 156 | elif cmd == 'tikzplot': 157 | if len(arguments) not in (1, 2): 158 | print('Wrong number of parameters!') 159 | else: 160 | part = check_valid_part(arguments[0], parts, shorthands) 161 | if part is not None: 162 | if len(arguments) > 1: 163 | fname = os.path.join(outdir, arguments[1]) 164 | try: 165 | with codecs.open(fname, 'w') as outfile: 166 | outfile.write(utils.tikz_part(part) + '\n') 167 | print('Saved Tikz code to %s' % fname) 168 | except IOError: 169 | print('Cannot open file %s for output!' % fname) 170 | else: 171 | print(utils.tikz_part(part)) 172 | elif cmd == 'restore': 173 | if len(arguments) != 2: 174 | print('Wrong number of parameters!') 175 | else: 176 | partid = arguments[0] 177 | part = check_valid_part(partid, parts, shorthands) 178 | if part is not None: 179 | index = arguments[1] 180 | partition_dir = os.path.join(outdir, 'Partition' + str(partid)) 181 | myfile = None 182 | try: 183 | indexi = int(index) 184 | except ValueError: 185 | indexi = index 186 | for i in [index, indexi]: 187 | myfile = part.get(i, myfile) 188 | if myfile is None: 189 | print('The index is not valid') 190 | else: 191 | logic.recursive_restore(myfile, part, partition_dir) 192 | elif cmd == 'locate': 193 | if len(arguments) != 2: 194 | print('Wrong number of parameters!') 195 | else: 196 | part = check_valid_part(arguments[0], parts, shorthands) 197 | if part is not None: 198 | text = arguments[1] 199 | results = utils.locate(part, text) 200 | for node, path in results: 201 | desc = ( 202 | ' [GHOST]' if node.is_ghost else 203 | ' [DELETED]' if node.is_deleted else '' 204 | ) 205 | print('[%s]: %s%s' % (node.index, path, desc)) 206 | elif cmd == 'traceback': 207 | if len(arguments) != 2: 208 | print('Wrong number of parameters!') 209 | else: 210 | partid = arguments[0] 211 | part = check_valid_part(partid, parts, shorthands) 212 | if part is not None: 213 | index = arguments[1] 214 | myfile = None 215 | try: 216 | indexi = int(index) 217 | except ValueError: 218 | indexi = index 219 | for i in [index, indexi]: 220 | myfile = part.get(i, myfile) 221 | if myfile is None: 222 | print('The index is not valid') 223 | else: 224 | while myfile is not None: 225 | print('[{}] {}'.format(myfile.index, myfile.full_path(part))) 226 | myfile = part.get(myfile.parent) 227 | elif cmd == 'merge': 228 | if len(arguments) != 2: 229 | print('Wrong number of parameters!') 230 | else: 231 | part1 = check_valid_part(arguments[0], parts, shorthands, rebuild=False) 232 | part2 = check_valid_part(arguments[1], parts, shorthands, rebuild=False) 233 | if None in (part1, part2): 234 | return 235 | if part1.fs_type != part2.fs_type: 236 | print('Cannot merge partitions with types (%s, %s)' % (part1.fs_type, part2.fs_type)) 237 | return 238 | print('Merging partitions...') 239 | utils.merge(part1, part2) 240 | source_position = int(arguments[1]) 241 | destination_position = int(arguments[0]) 242 | _, par_source = shorthands[source_position] 243 | _, par_destination = shorthands[destination_position] 244 | del shorthands[source_position] 245 | del parts[par_source] 246 | for par in (par_source, par_destination): 247 | try: 248 | rebuilt.remove(par) 249 | except: 250 | pass 251 | print('There are now %d partitions.' % (len(parts), )) 252 | elif cmd == 'recoverable': 253 | list_parts(parts, shorthands, lambda x: x.recoverable) 254 | elif cmd == 'recoverable_size': 255 | if len(arguments) != 1: 256 | print('Wrong number of parameters!') 257 | else: 258 | list_parts(parts, shorthands, lambda x: x.size is not None and x.size > int(arguments[0])) 259 | elif cmd == 'other': 260 | list_parts(parts, shorthands, lambda x: not x.recoverable) 261 | elif cmd == 'allparts': 262 | list_parts(parts, shorthands, lambda x: True) 263 | elif cmd == 'quit': 264 | exit(0) 265 | else: 266 | print('Unknown command.') 267 | 268 | 269 | def main(): 270 | """Wrap the program logic inside a function.""" 271 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) 272 | 273 | print(r" ___ ___ _ _ ") 274 | print(r" | _ \___ __ _ _ _ __ ___ _ _ __ _| _ |_) |_ ") 275 | print(r" | / -_) _| || | '_ \/ -_) '_/ _` | _ \ | _|") 276 | print(r" |_|_\___\__|\_,_| .__/\___|_| \__,_|___/_|\__|") 277 | print(" |_| v{}".format(__version__)) 278 | print(' ', __copyright__, '<%s>' % __email__) 279 | print(' Released under the', __license__) 280 | print('') 281 | 282 | parser = argparse.ArgumentParser( 283 | description='Reconstruct the directory structure of possibly damaged ' 284 | 'filesystems.' 285 | ) 286 | parser.add_argument('path', type=str, help='path to the disk image') 287 | parser.add_argument( 288 | '-s', '--savefile', type=str, help='path of the scan save file' 289 | ) 290 | parser.add_argument( 291 | '-w', '--overwrite', action='store_true', 292 | help='force overwrite of the save file' 293 | ) 294 | parser.add_argument( 295 | '-o', '--outputdir', type=str, help='directory for restored contents' 296 | ' and output files' 297 | ) 298 | args = parser.parse_args() 299 | 300 | try: 301 | image = open(args.path, 'rb') 302 | except IOError: 303 | logging.error('Unable to open image file!') 304 | exit(1) 305 | 306 | read_results = False 307 | write_results = False 308 | 309 | # Set output directory 310 | if args.outputdir is None: 311 | logging.info('No output directory specified, defaulting to ' 312 | 'recuperabit_output') 313 | args.outputdir = 'recuperabit_output' 314 | 315 | # Try to reload information from the savefile 316 | if args.savefile is not None: 317 | if args.overwrite: 318 | logging.info('Results will be saved to %s', args.savefile) 319 | write_results = True 320 | else: 321 | logging.info('Checking if results already exist.') 322 | try: 323 | savefile = open(args.savefile, 'rb') 324 | logging.info('Results will be read from %s', args.savefile) 325 | read_results = True 326 | except IOError: 327 | logging.info('Unable to open save file.') 328 | logging.info('Results will be saved to %s', args.savefile) 329 | write_results = True 330 | 331 | if read_results: 332 | logging.info('The save file exists. Trying to read it...') 333 | try: 334 | indexes = pickle.load(savefile) 335 | savefile.close() 336 | except IndexError: 337 | logging.error('Malformed save file!') 338 | exit(1) 339 | else: 340 | indexes = itertools.count() 341 | 342 | # Ask for confirmation before beginning the process 343 | try: 344 | confirm = input('Type [Enter] to start the analysis or ' 345 | '"exit" / "quit" / "q" to quit: ') 346 | except EOFError: 347 | print('') 348 | exit(0) 349 | if confirm in ('exit', 'quit', 'q'): 350 | exit(0) 351 | 352 | # Create the output directory 353 | if not logic.makedirs(args.outputdir): 354 | logging.error('Cannot create output directory!') 355 | exit(1) 356 | 357 | scanners = [pl(image) for pl in plugins] 358 | 359 | logging.info('Analysis started! This is going to take time...') 360 | interesting = utils.feed_all(image, scanners, indexes) 361 | 362 | logging.info('First scan completed') 363 | 364 | if write_results: 365 | logging.info('Saving results to %s', args.savefile) 366 | with open(args.savefile, 'wb') as savefile: 367 | pickle.dump(interesting, savefile) 368 | 369 | # Ask for partitions 370 | parts: dict[int, 'Partition'] = {} 371 | for scanner in scanners: 372 | parts.update(scanner.get_partitions()) 373 | 374 | shorthands = list(enumerate(parts)) 375 | 376 | logging.info('%i partitions found.', len(parts)) 377 | while True: 378 | print('\nWrite command ("help" for details):') 379 | try: 380 | command = input('> ').split(' ') 381 | except (EOFError, KeyboardInterrupt): 382 | print('') 383 | exit(0) 384 | cmd = command[0] 385 | arguments = command[1:] 386 | interpret(cmd, arguments, parts, shorthands, args.outputdir) 387 | 388 | if __name__ == '__main__': 389 | main() 390 | -------------------------------------------------------------------------------- /recuperabit/__init__.py: -------------------------------------------------------------------------------- 1 | # RecuperaBit 2 | # Copyright 2014-2021 Andrea Lazzarotto 3 | # 4 | # This file is part of RecuperaBit. 5 | # 6 | # RecuperaBit is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # RecuperaBit is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with RecuperaBit. If not, see . 18 | -------------------------------------------------------------------------------- /recuperabit/fs/__init__.py: -------------------------------------------------------------------------------- 1 | # RecuperaBit 2 | # Copyright 2014-2021 Andrea Lazzarotto 3 | # 4 | # This file is part of RecuperaBit. 5 | # 6 | # RecuperaBit is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # RecuperaBit is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with RecuperaBit. If not, see . 18 | -------------------------------------------------------------------------------- /recuperabit/fs/constants.py: -------------------------------------------------------------------------------- 1 | """Information needed by multiple plugins.""" 2 | 3 | # RecuperaBit 4 | # Copyright 2014-2021 Andrea Lazzarotto 5 | # 6 | # This file is part of RecuperaBit. 7 | # 8 | # RecuperaBit is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # RecuperaBit is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with RecuperaBit. If not, see . 20 | 21 | 22 | sector_size: int = 512 23 | max_sectors: int = 256 # Maximum block size for recovery 24 | -------------------------------------------------------------------------------- /recuperabit/fs/core_types.py: -------------------------------------------------------------------------------- 1 | """Recuperabit Core Types. 2 | 3 | This module contains the class declarations of all objects which are used in 4 | the Recuperabit meta file system. Each plug-in is supposed to extend the File 5 | and DiskScanner classes with subclasses implementing the missing methods.""" 6 | 7 | # RecuperaBit 8 | # Copyright 2014-2021 Andrea Lazzarotto 9 | # 10 | # This file is part of RecuperaBit. 11 | # 12 | # RecuperaBit is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # RecuperaBit is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with RecuperaBit. If not, see . 24 | 25 | 26 | import logging 27 | import os.path 28 | from typing import Optional, Dict, Set, List, Tuple, Union, Any, Iterator 29 | from datetime import datetime 30 | 31 | from .constants import sector_size 32 | 33 | from ..utils import readable_bytes 34 | 35 | 36 | class File(object): 37 | """Filesystem-independent representation of a file. Aka Node.""" 38 | def __init__(self, index: Union[int, str], name: str, size: Optional[int], is_directory: bool = False, 39 | is_deleted: bool = False, is_ghost: bool = False) -> None: 40 | self.index: Union[int, str] = index 41 | self.name: str = name 42 | self.size: Optional[int] = size 43 | self.is_directory: bool = is_directory 44 | self.is_deleted: bool = is_deleted 45 | self.is_ghost: bool = is_ghost 46 | self.parent: Optional[Union[int, str]] = None 47 | self.mac: Dict[str, Optional[datetime]] = { 48 | 'modification': None, 49 | 'access': None, 50 | 'creation': None 51 | } 52 | self.children: Set['File'] = set() 53 | self.children_names: Set[str] = set() # Avoid name clashes breaking restore 54 | self.offset: Optional[int] = None # Offset from beginning of disk 55 | 56 | def set_parent(self, parent: Optional[Union[int, str]]) -> None: 57 | """Set a pointer to the parent directory.""" 58 | self.parent = parent 59 | 60 | def set_mac(self, modification: Optional[datetime], access: Optional[datetime], creation: Optional[datetime]) -> None: 61 | """Set the modification, access and creation times.""" 62 | self.mac['modification'] = modification 63 | self.mac['access'] = access 64 | self.mac['creation'] = creation 65 | 66 | def get_mac(self) -> List[Optional[datetime]]: 67 | """Get the modification, access and creation times.""" 68 | keys = ('modification', 'access', 'creation') 69 | return [self.mac[k] for k in keys] 70 | 71 | def set_offset(self, offset: Optional[int]) -> None: 72 | """Set the offset of the file record with respect to the disk image.""" 73 | self.offset = offset 74 | 75 | def get_offset(self) -> Optional[int]: 76 | """Get the offset of the file record with respect to the disk image.""" 77 | return self.offset 78 | 79 | def add_child(self, node: 'File') -> None: 80 | """Add a new child to this directory.""" 81 | original_name = node.name 82 | i = 0 83 | # Check for multiple rebuilds 84 | if node in self.children: 85 | return 86 | # Avoid name clashes 87 | while node.name in self.children_names: 88 | node.name = original_name + '_%03d' % i 89 | i += 1 90 | if node.name != original_name: 91 | logging.warning(u'Renamed {} from {}'.format(node, original_name)) 92 | self.children.add(node) 93 | self.children_names.add(node.name) 94 | 95 | def full_path(self, part: 'Partition') -> str: 96 | """Return the full path of this file.""" 97 | if self.parent is not None: 98 | parent = part[self.parent] 99 | return os.path.join(parent.full_path(part), self.name) 100 | else: 101 | return self.name 102 | 103 | def get_content(self, partition: 'Partition') -> Optional[Union[bytes, Iterator[bytes]]]: 104 | # pylint: disable=W0613 105 | """Extract the content of the file. 106 | 107 | This method is intentionally not implemented because it depends on each 108 | plug-in for a specific file system.""" 109 | if self.is_directory or self.is_ghost: 110 | return None 111 | raise NotImplementedError 112 | 113 | # pylint: disable=R0201 114 | def ignore(self) -> bool: 115 | """The following method is used by the restore procedure to check 116 | files that should not be recovered. For example, in NTFS file 117 | $BadClus:$Bad shall not be recovered because it creates an output 118 | with the same size as the partition (usually many GBs).""" 119 | return False 120 | 121 | def __repr__(self) -> str: 122 | return ( 123 | u'File(#%s, ^^%s^^, %s, offset = %s sectors)' % 124 | (self.index, self.parent, self.name, self.offset) 125 | ) 126 | 127 | 128 | class Partition(object): 129 | """Simplified representation of the contents of a partition. 130 | 131 | Parameter root_id represents the identifier assigned to the root directory 132 | of a partition. This can be file system dependent.""" 133 | def __init__(self, fs_type: str, root_id: Union[int, str], scanner: 'DiskScanner') -> None: 134 | self.fs_type: str = fs_type 135 | self.root_id: Union[int, str] = root_id 136 | self.size: Optional[int] = None 137 | self.offset: Optional[int] = None 138 | self.root: Optional[File] = None 139 | self.lost: File = File(-1, 'LostFiles', 0, is_directory=True, is_ghost=True) 140 | self.files: Dict[Union[int, str], File] = {} 141 | self.recoverable: bool = False 142 | self.scanner: 'DiskScanner' = scanner 143 | 144 | def add_file(self, node: File) -> None: 145 | """Insert a new file in the partition.""" 146 | index = node.index 147 | self.files[index] = node 148 | 149 | def set_root(self, node: File) -> None: 150 | """Set the root directory.""" 151 | if not node.is_directory: 152 | raise TypeError('Not a directory') 153 | self.root = node 154 | self.root.set_parent(None) 155 | 156 | def set_size(self, size: int) -> None: 157 | """Set the (estimated) size of the partition.""" 158 | self.size = size 159 | 160 | def set_offset(self, offset: int) -> None: 161 | """Set the offset from the beginning of the disk.""" 162 | self.offset = offset 163 | 164 | def set_recoverable(self, recoverable: bool) -> None: 165 | """State if the partition contents are also recoverable.""" 166 | self.recoverable = recoverable 167 | 168 | def rebuild(self) -> None: 169 | """Rebuild the partition structure. 170 | 171 | This method processes the contents of files and it rebuilds the 172 | directory tree as accurately as possible.""" 173 | root_id = self.root_id 174 | rootname = 'Root' 175 | 176 | if root_id not in self.files: 177 | self.files[root_id] = File( 178 | root_id, rootname, 0, is_directory=True, is_ghost=True 179 | ) 180 | 181 | # Convert keys to list to avoid RuntimeError 182 | for identifier in list(self.files): 183 | node = self.files[identifier] 184 | if node.index == root_id: 185 | self.set_root(node) 186 | node.name = rootname 187 | else: 188 | parent_id = node.parent 189 | exists = parent_id is not None 190 | valid = parent_id in self.files 191 | if exists and valid: 192 | parent_node = self.files[parent_id] 193 | elif exists and not valid: 194 | parent_node = File(parent_id, 'Dir_' + str(parent_id), 195 | 0, is_directory=True, is_ghost=True) 196 | parent_node.set_parent(-1) 197 | self.files[parent_id] = parent_node 198 | self.lost.add_child(parent_node) 199 | else: 200 | parent_node = self.lost 201 | node.set_parent(-1) 202 | parent_node.add_child(node) 203 | return 204 | 205 | # pylint: disable=R0201 206 | def additional_repr(self) -> List[Tuple[str, Any]]: 207 | """Return additional values to show in the string representation.""" 208 | return [] 209 | 210 | def __repr__(self) -> str: 211 | size = ( 212 | readable_bytes(self.size * sector_size) 213 | if self.size is not None else '??? b' 214 | ) 215 | data = [ 216 | ('Offset', self.offset), 217 | ( 218 | 'Offset (b)', 219 | self.offset * sector_size 220 | if self.offset is not None else None 221 | ), 222 | ] 223 | data += self.additional_repr() 224 | return u'Partition (%s, %s, %d files,%s %s)' % ( 225 | self.fs_type, 226 | size, 227 | len(self.files), 228 | ' Recoverable,' if self.recoverable else '', 229 | ', '.join(a+': '+str(b) for a, b in data) 230 | ) 231 | 232 | def __getitem__(self, index: Union[int, str]) -> File: 233 | if index in self.files: 234 | return self.files[index] 235 | if index == self.lost.index: 236 | return self.lost 237 | raise KeyError 238 | 239 | def get(self, index: Union[int, str], default: Optional[File] = None) -> Optional[File]: 240 | """Get a file or the special LostFiles directory.""" 241 | try: 242 | return self.__getitem__(index) 243 | except KeyError: 244 | return default 245 | 246 | 247 | class DiskScanner(object): 248 | """Abstract stub for the implementation of disk scanners.""" 249 | def __init__(self, pointer: Any) -> None: 250 | self.image: Any = pointer 251 | 252 | def get_image(self) -> Any: 253 | """Return the image reference.""" 254 | return self.image 255 | 256 | @staticmethod 257 | def get_image(scanner: 'DiskScanner') -> Any: 258 | """Static method to get image from scanner instance.""" 259 | return scanner.image 260 | 261 | def feed(self, index: int, sector: bytes) -> Optional[str]: 262 | """Feed a new sector.""" 263 | raise NotImplementedError 264 | 265 | def get_partitions(self) -> Dict[int, Partition]: 266 | """Get a list of the found partitions.""" 267 | raise NotImplementedError 268 | -------------------------------------------------------------------------------- /recuperabit/fs/ntfs.py: -------------------------------------------------------------------------------- 1 | """NTFS plug-in. 2 | 3 | This plug-in contains the necessary logic to parse traces of NTFS file systems, 4 | including MFT entries and directory indexes.""" 5 | 6 | # RecuperaBit 7 | # Copyright 2014-2021 Andrea Lazzarotto 8 | # 9 | # This file is part of RecuperaBit. 10 | # 11 | # RecuperaBit is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # RecuperaBit is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with RecuperaBit. If not, see . 23 | 24 | 25 | import logging 26 | from collections import Counter 27 | from typing import Any, Dict, List, Optional, Tuple, Union, Iterator, Set 28 | 29 | from .constants import max_sectors, sector_size 30 | from .core_types import DiskScanner, File, Partition 31 | from .ntfs_fmt import (attr_header_fmt, attr_names, attr_nonresident_fmt, 32 | attr_resident_fmt, attr_types_fmt, attribute_list_parser, 33 | boot_sector_fmt, entry_fmt, indx_dir_entry_fmt, indx_fmt, 34 | indx_header_fmt) 35 | 36 | from ..logic import SparseList, approximate_matching 37 | from ..utils import merge, sectors, unpack 38 | 39 | # Some attributes may appear multiple times 40 | multiple_attributes: Set[str] = set([ 41 | '$FILE_NAME', 42 | '$DATA', 43 | '$INDEX_ROOT', 44 | '$INDEX_ALLOCATION', 45 | '$BITMAP' 46 | ]) 47 | 48 | # Size of records in sectors 49 | FILE_size: int = 2 50 | INDX_size: int = 8 51 | 52 | 53 | def best_name(entries: List[Tuple[int, str]]) -> Optional[str]: 54 | """Return the best file name available. 55 | 56 | This function accepts a list of tuples formed by a namespace and a string. 57 | In case of more than one choice, it returns preferrably the one in the NTFS 58 | namespace (code == 3).""" 59 | if len(entries) == 0: 60 | return None 61 | 62 | entries.sort() 63 | if entries[-1][0] == 3: 64 | name = entries[-1][1] 65 | else: 66 | name = entries[0][1] 67 | return name if len(name) else None 68 | 69 | 70 | def parse_mft_attr(attr: bytes) -> Tuple[Dict[str, Any], Optional[str]]: 71 | """Parse the contents of a MFT attribute.""" 72 | header = unpack(attr, attr_header_fmt) 73 | attr_type = header['type'] 74 | 75 | if attr_type not in attr_names: 76 | return header, None 77 | 78 | if header['non_resident']: 79 | nonresident = unpack(attr, attr_nonresident_fmt) 80 | if nonresident['runlist'] is None: 81 | nonresident['runlist'] = list() 82 | header.update(nonresident) 83 | else: 84 | resident = unpack(attr, attr_resident_fmt) 85 | header.update(resident) 86 | offset = header['content_off'] 87 | content = attr[offset:] 88 | 89 | name = attr_names[attr_type] 90 | if not header['non_resident'] and name in attr_types_fmt: 91 | size = header['content_size'] 92 | data = unpack(content[:size], attr_types_fmt[name]) 93 | header['content'] = data 94 | 95 | return header, name 96 | 97 | 98 | def _apply_fixup_values(header: Dict[str, Any], entry: bytearray) -> None: 99 | """Apply the fixup values to FILE and INDX records.""" 100 | offset = header['off_fixup'] 101 | for i in range(1, header['n_entries']): 102 | pos = sector_size * i 103 | entry[pos-2:pos] = entry[offset + 2*i:offset + 2*(i+1)] 104 | 105 | 106 | def _attributes_reader(entry: bytes, offset: int) -> Dict[str, Any]: 107 | """Read every attribute.""" 108 | attributes = {} 109 | while offset < len(entry) - 16: 110 | try: 111 | attr, name = parse_mft_attr(entry[offset:]) 112 | except TypeError: 113 | # The attribute was broken, we need to terminate here 114 | return attributes 115 | attr['dump_offset'] = offset 116 | if attr['length'] == 0: 117 | # End of attribute list 118 | break 119 | else: 120 | offset = offset + attr['length'] 121 | if name is None: 122 | # Skip broken/unknown attribute 123 | continue 124 | if name not in attributes: 125 | if name not in multiple_attributes: 126 | attributes[name] = attr 127 | else: 128 | attributes[name] = [attr] 129 | else: 130 | if name in multiple_attributes: 131 | attributes[name].append(attr) 132 | else: 133 | logging.error('Cannot handle multiple attribute %s', name) 134 | return attributes 135 | 136 | 137 | def parse_file_record(entry: bytes) -> Dict[str, Any]: 138 | """Parse the contents of a FILE record (MFT entry).""" 139 | header = unpack(entry, entry_fmt) 140 | if (header['size_alloc'] is None or 141 | header['size_alloc'] > len(entry) or 142 | len(entry) < FILE_size*sector_size): 143 | header['valid'] = False 144 | return header 145 | 146 | # Old versions of NTFS don't have a MFT record number. 147 | if header['off_fixup'] < 48: 148 | header['record_n'] = None 149 | 150 | _apply_fixup_values(header, entry) 151 | 152 | attributes = _attributes_reader(entry, header['off_first']) 153 | header['valid'] = True 154 | header['attributes'] = attributes 155 | return header 156 | 157 | 158 | def parse_indx_record(entry: bytes) -> Dict[str, Any]: 159 | """Parse the contents of a INDX record (directory index).""" 160 | header = unpack(entry, indx_fmt) 161 | 162 | _apply_fixup_values(header, entry) 163 | 164 | node_data = unpack(entry[24:], indx_header_fmt) 165 | node_data['off_start_list'] += 24 166 | node_data['off_end_list'] += 24 167 | node_data['off_end_buffer'] += 24 168 | header.update(node_data) 169 | 170 | offset = header['off_start_list'] 171 | entries = [] 172 | while offset < header['off_end_list']: 173 | entry_data = unpack(entry[offset:], indx_dir_entry_fmt) 174 | if entry_data['content_length']: 175 | try: 176 | file_name = unpack( 177 | entry[offset + 16:], 178 | attr_types_fmt['$FILE_NAME'] 179 | ) 180 | except (UnicodeDecodeError, TypeError): # Invalid file name or invalid name length 181 | break 182 | # Perform checks to avoid false positives 183 | name_ok = file_name['name'] is not None 184 | namespace_ok = 0 <= file_name['namespace'] <= 3 185 | size_ok = file_name['real_size'] <= file_name['allocated_size'] 186 | features_ok = not ( 187 | file_name['flags'] == 0 and 188 | file_name['parent_seq'] > 1024 189 | ) 190 | if name_ok and namespace_ok and size_ok and features_ok: 191 | entry_data['file_info'] = file_name 192 | entries.append(entry_data) 193 | else: 194 | break 195 | if entry_data['entry_length']: 196 | offset += entry_data['entry_length'] 197 | else: 198 | break 199 | header['entries'] = entries 200 | header['valid'] = len(entries) > 0 201 | return header 202 | 203 | 204 | def _integrate_attribute_list(parsed: Dict[str, Any], part: 'NTFSPartition', image: Any) -> None: 205 | """Integrate missing attributes in the parsed MTF entry.""" 206 | base_record = parsed['record_n'] 207 | attrs = parsed['attributes'] 208 | attr = attrs['$ATTRIBUTE_LIST'] 209 | 210 | spc = part.sec_per_clus 211 | if 'runlist' in attr: 212 | clusters_pos = 0 213 | entries = [] 214 | size = attr['real_size'] 215 | for entry in attr['runlist']: 216 | clusters_pos += entry['offset'] 217 | length = min(entry['length'] * spc * sector_size, size) 218 | size -= length 219 | real_pos = clusters_pos * spc + part.offset 220 | dump = sectors(image, real_pos, length, 1) 221 | entries += attribute_list_parser(dump) 222 | attr['content'] = {'entries': entries} 223 | else: 224 | entries = attr['content']['entries'] 225 | 226 | # Divide entries by type 227 | types = set(e['type'] for e in entries) 228 | entries_by_type = { 229 | t: set( 230 | e['file_ref'] for e in entries 231 | if e['type'] == t and e['file_ref'] is not None 232 | ) 233 | for t in types 234 | } 235 | # Remove completely "local" types or empty lists 236 | for num in list(entries_by_type): 237 | files = entries_by_type[num] 238 | if ( 239 | len(files) == 0 or 240 | (len(files) == 1 and next(iter(files)) == base_record) 241 | ): 242 | del entries_by_type[num] 243 | 244 | mft_pos = part.mft_pos 245 | for num in entries_by_type: 246 | # Read contents of child entries 247 | for index in entries_by_type[num]: 248 | real_pos = mft_pos + index * FILE_size 249 | dump = sectors(image, real_pos, FILE_size) 250 | child_parsed = parse_file_record(dump) 251 | if 'attributes' not in child_parsed: 252 | continue 253 | # Update the main entry (parsed) 254 | if child_parsed['base_record'] == base_record: 255 | child_attrs = child_parsed['attributes'] 256 | for name in child_attrs: 257 | if name in multiple_attributes: 258 | try: 259 | attrs[name] += child_attrs[name] 260 | except KeyError: 261 | attrs[name] = child_attrs[name] 262 | else: 263 | attrs[name] = child_attrs[name] 264 | 265 | 266 | class NTFSFile(File): 267 | """NTFS File.""" 268 | def __init__(self, parsed: Dict[str, Any], offset: Optional[int], is_ghost: bool = False, ads: str = '') -> None: 269 | index = parsed['record_n'] 270 | ads_suffix = ':' + ads if ads != '' else ads 271 | if ads != '': 272 | index = str(index) + ads_suffix 273 | attrs = parsed['attributes'] 274 | filenames = attrs['$FILE_NAME'] 275 | datas = attrs.get('$DATA', []) 276 | 277 | size = None 278 | for attr in datas: 279 | if attr['name'] == ads: 280 | if 'real_size' in attr: 281 | size = attr['real_size'] 282 | elif not attr['non_resident']: 283 | size = attr['content_size'] 284 | break 285 | 286 | filtered = [ 287 | f for f in filenames if 'content' in f and 288 | f['content'] is not None and 289 | 'name_length' in f['content'] and 290 | f['content']['name_length'] > 0 and 291 | f['content']['name'] is not None 292 | ] 293 | name = best_name([ 294 | (f['content']['namespace'], f['content']['name'] + ads_suffix) 295 | for f in filtered 296 | ]) 297 | hasname = name is not None 298 | 299 | if not hasname: 300 | name = 'File_%s' % index 301 | 302 | std_info = attrs.get('$STANDARD_INFORMATION') 303 | 304 | is_dir = (parsed['flags'] & 0x02) > 0 and not len(ads) 305 | is_del = (parsed['flags'] & 0x01) == 0 306 | File.__init__(self, index, name, size, is_dir, is_del, is_ghost) 307 | 308 | time_attribute = None 309 | 310 | # Additional attributes 311 | if hasname: 312 | first = filtered[0]['content'] 313 | parent_id = first['parent_entry'] 314 | File.set_parent(self, parent_id) 315 | File.set_offset(self, offset) 316 | time_attribute = std_info or filtered[0] 317 | if time_attribute and 'content' in time_attribute: 318 | File.set_mac( 319 | self, time_attribute['content']['modification_time'], 320 | time_attribute['content']['access_time'], 321 | time_attribute['content']['creation_time'], 322 | ) 323 | self.ads = ads 324 | 325 | @staticmethod 326 | def _padded_bytes(image: Any, offset: int, size: int) -> bytes: 327 | dump = sectors(image, offset, size, 1) 328 | if len(dump) < size: 329 | logging.warning( 330 | 'Failed to read byte(s). Padding with 0x00. Offset: {} Size: ' 331 | '{}'.format(offset, size)) 332 | dump += bytearray(b'\x00' * (size - len(dump))) 333 | return dump 334 | 335 | def content_iterator(self, partition: 'NTFSPartition', image: Any, datas: List[Dict[str, Any]]) -> Iterator[bytes]: 336 | """Return an iterator for the contents of this file.""" 337 | vcn = 0 338 | spc = partition.sec_per_clus 339 | for attr in datas: 340 | diff = attr['start_VCN'] - vcn 341 | if diff > 0: 342 | # We do not try to fill with zeroes as this might produce huge useless files 343 | logging.warning( 344 | u'Missing part for {}, {} clusters skipped'.format(self, diff) 345 | ) 346 | vcn += diff 347 | yield b'' 348 | 349 | clusters_pos = 0 350 | size = attr['real_size'] 351 | 352 | if 'runlist' not in attr: 353 | logging.error( 354 | u'Cannot restore {}, missing runlist'.format(self) 355 | ) 356 | break 357 | 358 | for entry in attr['runlist']: 359 | length = min(entry['length'] * spc * sector_size, size) 360 | size -= length 361 | # Sparse runlist 362 | if entry['offset'] is None: 363 | while length > 0: 364 | amount = min(max_sectors*sector_size, length) 365 | length -= amount 366 | yield b'\x00' * amount 367 | continue 368 | # Normal runlists 369 | clusters_pos += entry['offset'] 370 | real_pos = clusters_pos * spc + partition.offset 371 | # Avoid to fill memory with huge blocks 372 | offset = 0 373 | while length > 0: 374 | amount = min(max_sectors*sector_size, length) 375 | position = real_pos*sector_size + offset 376 | partial = self._padded_bytes(image, position, amount) 377 | length -= amount 378 | offset += amount 379 | yield bytes(partial) 380 | vcn = attr['end_VCN'] + 1 381 | 382 | def get_content(self, partition: 'NTFSPartition') -> Optional[Union[bytes, Iterator[bytes]]]: 383 | """Extract the content of the file. 384 | 385 | This method works by extracting the $DATA attribute.""" 386 | if self.is_ghost: 387 | logging.error(u'Cannot restore ghost file {}'.format(self)) 388 | return None 389 | 390 | image = DiskScanner.get_image(partition.scanner) 391 | dump = sectors(image, File.get_offset(self), FILE_size) 392 | parsed = parse_file_record(dump) 393 | 394 | if not parsed['valid'] or 'attributes' not in parsed: 395 | logging.error(u'Invalid MFT entry for {}'.format(self)) 396 | return None 397 | attrs = parsed['attributes'] 398 | if ('$ATTRIBUTE_LIST' in attrs and 399 | partition.sec_per_clus is not None): 400 | _integrate_attribute_list(parsed, partition, image) 401 | if '$DATA' not in attrs: 402 | attrs['$DATA'] = [] 403 | datas = [d for d in attrs['$DATA'] if d['name'] == self.ads] 404 | if not len(datas): 405 | if not self.is_directory: 406 | logging.error(u'Cannot restore $DATA attribute(s) ' 407 | 'for {}'.format(self)) 408 | return None 409 | 410 | # TODO implemented compressed attributes 411 | for d in datas: 412 | if d['flags'] & 0x01: 413 | logging.error(u'Cannot restore compressed $DATA attribute(s) ' 414 | 'for {}'.format(self)) 415 | return None 416 | elif d['flags'] & 0x4000: 417 | logging.warning(u'Found encrypted $DATA attribute(s) ' 418 | 'for {}'.format(self)) 419 | 420 | # Handle resident file content 421 | if len(datas) == 1 and not datas[0]['non_resident']: 422 | single = datas[0] 423 | start = single['dump_offset'] + single['content_off'] 424 | end = start + single['content_size'] 425 | content = dump[start:end] 426 | return bytes(content) 427 | else: 428 | if partition.sec_per_clus is None: 429 | logging.error(u'Cannot restore non-resident $DATA ' 430 | 'attribute(s) for {}'.format(self)) 431 | return None 432 | non_resident = sorted( 433 | (d for d in attrs['$DATA'] if d['non_resident']), 434 | key=lambda x: x['start_VCN'] 435 | ) 436 | if len(non_resident) != len(datas): 437 | logging.warning( 438 | u'Found leftover resident $DATA attributes for ' 439 | '{}'.format(self) 440 | ) 441 | return self.content_iterator(partition, image, non_resident) 442 | 443 | def ignore(self) -> bool: 444 | """Determine which files should be ignored.""" 445 | return ( 446 | (self.index == '8:$Bad') or 447 | (self.parent == 11 and self.ads == '$J') # $UsnJrnl 448 | ) 449 | 450 | 451 | class NTFSPartition(Partition): 452 | """Partition with additional fields for NTFS recovery.""" 453 | def __init__(self, scanner: 'NTFSScanner', position: Optional[int] = None) -> None: 454 | Partition.__init__(self, 'NTFS', 5, scanner) 455 | self.sec_per_clus: Optional[int] = None 456 | self.mft_pos: Optional[int] = position 457 | self.mftmirr_pos: Optional[int] = None 458 | 459 | def additional_repr(self) -> List[Tuple[str, Any]]: 460 | """Return additional values to show in the string representation.""" 461 | return [ 462 | ('Sec/Clus', self.sec_per_clus), 463 | ('MFT offset', self.mft_pos), 464 | ('MFT mirror offset', self.mftmirr_pos) 465 | ] 466 | 467 | 468 | class NTFSScanner(DiskScanner): 469 | """NTFS Disk Scanner.""" 470 | def __init__(self, pointer: Any) -> None: 471 | DiskScanner.__init__(self, pointer) 472 | self.found_file: Set[int] = set() 473 | self.parsed_file_review: Dict[int, Dict[str, Any]] = {} 474 | self.found_indx: Set[int] = set() 475 | self.parsed_indx: Dict[int, Dict[str, Any]] = {} 476 | self.indx_list: Optional[SparseList[int]] = None 477 | self.found_boot: List[int] = [] 478 | self.found_spc: List[int] = [] 479 | 480 | def feed(self, index: int, sector: bytes) -> Optional[str]: 481 | """Feed a new sector.""" 482 | # check boot sector 483 | if sector.endswith(b'\x55\xAA') and b'NTFS' in sector[:8]: 484 | self.found_boot.append(index) 485 | return 'NTFS boot sector' 486 | 487 | # check file record 488 | if sector.startswith((b'FILE', b'BAAD')): 489 | self.found_file.add(index) 490 | return 'NTFS file record' 491 | 492 | # check index record 493 | if sector.startswith(b'INDX'): 494 | self.found_indx.add(index) 495 | return 'NTFS index record' 496 | 497 | @staticmethod 498 | def add_indx_entries(entries: List[Dict[str, Any]], part: NTFSPartition) -> None: 499 | """Insert new ghost files which were not already found.""" 500 | for rec in entries: 501 | if (rec['record_n'] not in part.files and 502 | rec['$FILE_NAME'] is not None): 503 | # Compatibility with the structure of a MFT entry 504 | rec['attributes'] = { 505 | '$FILE_NAME': [{'content': rec['$FILE_NAME']}] 506 | } 507 | """Although the structure of r is similar to that of a MFT 508 | entry, flags were about the index, not about the file. We 509 | don't know if the element is a directory or not, hence we 510 | mark it as a file. It can be deduced if it is a directory 511 | by looking at the number of children, after the 512 | reconstruction.""" 513 | rec['flags'] = 0x1 514 | part.add_file(NTFSFile(rec, None, is_ghost=True)) 515 | 516 | def add_from_indx_root(self, parsed: Dict[str, Any], part: NTFSPartition) -> None: 517 | """Add ghost entries to part from INDEX_ROOT attributes in parsed.""" 518 | for attribute in parsed['attributes']['$INDEX_ROOT']: 519 | if (attribute.get('content') is None or 520 | attribute['content'].get('records') is None): 521 | continue 522 | self.add_indx_entries(attribute['content']['records'], part) 523 | 524 | def most_likely_sec_per_clus(self) -> List[int]: 525 | """Determine the most likely value of sec_per_clus of each partition, 526 | to speed up the search.""" 527 | counter = Counter() 528 | counter.update(self.found_spc) 529 | counter.update(2**i for i in range(8)) 530 | return [i for i, _ in counter.most_common()] 531 | 532 | def find_boundary(self, part: NTFSPartition, mft_address: int, multipliers: List[int]) -> Tuple[Optional[int], Optional[int]]: 533 | """Determine the starting sector of a partition with INDX records.""" 534 | nodes = ( 535 | self.parsed_file_review[node.offset] 536 | for node in part.files.values() 537 | if node.offset in self.parsed_file_review and 538 | '$INDEX_ALLOCATION' in 539 | self.parsed_file_review[node.offset]['attributes'] 540 | ) 541 | 542 | text_list = self.indx_list 543 | width = text_list.__len__() 544 | 545 | base_pattern = {} 546 | for parsed in nodes: 547 | for attr in parsed['attributes']['$INDEX_ALLOCATION']: 548 | clusters_pos = 0 549 | if 'runlist' not in attr: 550 | continue 551 | runlist = attr['runlist'] 552 | for entry in runlist: 553 | clusters_pos += entry['offset'] 554 | base_pattern[clusters_pos] = parsed['record_n'] 555 | if not len(base_pattern): 556 | return (None, None) 557 | 558 | results = [] 559 | min_support = 2 560 | for sec_per_clus in multipliers: 561 | pattern = { 562 | i * sec_per_clus: base_pattern[i] 563 | for i in base_pattern 564 | } 565 | 566 | delta = min(pattern) 567 | normalized = { 568 | i-delta: pattern[i] 569 | for i in pattern if i-delta <= width 570 | # Avoid extremely long, useless patterns 571 | } 572 | if len(normalized) < min_support: 573 | continue 574 | 575 | pattern_list = SparseList(normalized) 576 | solution = approximate_matching( 577 | text_list, pattern_list, mft_address + delta, k=min_support 578 | ) 579 | if solution is not None: 580 | # Avoid negative offsets and ambiguous situations 581 | solution[0] = [i-delta for i in solution[0] if i-delta >= 0] 582 | if len(solution[0]) == 1: 583 | positions, amount, perc = solution 584 | results.append((positions, perc, sec_per_clus)) 585 | # Reasonably, this is a correct match 586 | if perc > 0.25 and amount > 256: 587 | break 588 | min_support = max(min_support, solution[1]) 589 | 590 | if len(results): 591 | results.sort(key=lambda r: r[1]) 592 | positions, _, spc = results[0] 593 | return (positions[0], spc) 594 | else: 595 | return (None, None) 596 | 597 | def add_from_indx_allocation(self, parsed: Dict[str, Any], part: NTFSPartition) -> None: 598 | """Add ghost entries to part from INDEX_ALLOCATION attributes in parsed. 599 | 600 | This procedure requires that the beginning of the partition has already 601 | been discovered.""" 602 | read_again = set() 603 | for attr in parsed['attributes']['$INDEX_ALLOCATION']: 604 | clusters_pos = 0 605 | if 'runlist' not in attr: 606 | continue 607 | runlist = attr['runlist'] 608 | for entry in runlist: 609 | clusters_pos += entry['offset'] 610 | real_pos = clusters_pos * part.sec_per_clus + part.offset 611 | if real_pos in self.parsed_indx: 612 | content = self.parsed_indx[real_pos] 613 | # Check if the entry matches 614 | if parsed['record_n'] == content['parent']: 615 | discovered = set( 616 | c for c in content['children'] 617 | if c not in part.files 618 | ) 619 | # If there are new files, read the INDX again 620 | if len(discovered): 621 | read_again.add(real_pos) 622 | 623 | img = DiskScanner.get_image(self) 624 | for position in read_again: 625 | dump = sectors(img, position, INDX_size) 626 | entries = parse_indx_record(dump)['entries'] 627 | self.add_indx_entries(entries, part) 628 | 629 | def add_from_attribute_list(self, parsed: Dict[str, Any], part: NTFSPartition, offset: int) -> None: 630 | """Add additional entries to part from attributes in ATTRIBUTE_LIST. 631 | 632 | Files with many attributes may have additional attributes not in the 633 | MFT entry. When this happens, it is necessary to find the other 634 | attributes. They may contain additional information, such as $DATA 635 | attributes for ADS. This procedure requires that the beginning of the 636 | partition has already been discovered.""" 637 | image = DiskScanner.get_image(self) 638 | _integrate_attribute_list(parsed, part, image) 639 | 640 | attrs = parsed['attributes'] 641 | if '$DATA' in attrs: 642 | for attribute in attrs['$DATA']: 643 | ads_name = attribute['name'] 644 | if ads_name and len(ads_name): 645 | part.add_file(NTFSFile(parsed, offset, ads=ads_name)) 646 | 647 | def add_from_mft_mirror(self, part: NTFSPartition) -> None: 648 | """Fix the first file records using the MFT mirror.""" 649 | img = DiskScanner.get_image(self) 650 | mirrpos = part.mftmirr_pos 651 | if mirrpos is None: 652 | return 653 | 654 | for i in range(4): 655 | node = part.get(i) 656 | if node is None or node.is_ghost: 657 | position = mirrpos + i * FILE_size 658 | dump = sectors(img, position, FILE_size) 659 | parsed = parse_file_record(dump) 660 | if parsed['valid'] and '$FILE_NAME' in parsed['attributes']: 661 | node = NTFSFile(parsed, position) 662 | part.add_file(node) 663 | logging.info( 664 | u'Repaired MFT entry #%s - %s in partition at offset ' 665 | '%s from backup', node.index, node.name, part.offset 666 | ) 667 | 668 | def finalize_reconstruction(self, part: NTFSPartition) -> None: 669 | """Finish information gathering from a file. 670 | 671 | This procedure requires that the beginning of the 672 | partition has already been discovered.""" 673 | logging.info('Adding extra attributes from $ATTRIBUTE_LIST') 674 | # Select elements with many attributes 675 | many_attributes_it = ( 676 | node for node in list(part.files.values()) 677 | if node.offset in self.parsed_file_review and 678 | '$ATTRIBUTE_LIST' in 679 | self.parsed_file_review[node.offset]['attributes'] 680 | ) 681 | for node in many_attributes_it: 682 | parsed = self.parsed_file_review[node.offset] 683 | self.add_from_attribute_list(parsed, part, node.offset) 684 | 685 | logging.info('Adding ghost entries from $INDEX_ALLOCATION') 686 | # Select only elements with $INDEX_ALLOCATION 687 | allocation_it = ( 688 | node for node in list(part.files.values()) 689 | if node.offset in self.parsed_file_review and 690 | '$INDEX_ALLOCATION' in 691 | self.parsed_file_review[node.offset]['attributes'] 692 | ) 693 | for node in allocation_it: 694 | parsed = self.parsed_file_review[node.offset] 695 | self.add_from_indx_allocation(parsed, part) 696 | 697 | def get_partitions(self) -> Dict[int, NTFSPartition]: 698 | """Get a list of the found partitions.""" 699 | partitioned_files: Dict[int, NTFSPartition] = {} 700 | img = DiskScanner.get_image(self) 701 | 702 | logging.info('Parsing MFT entries') 703 | for position in self.found_file: 704 | dump = sectors(img, position, FILE_size) 705 | parsed = parse_file_record(dump) 706 | attrs = parsed.get('attributes', {}) 707 | if not parsed['valid'] or '$FILE_NAME' not in attrs: 708 | continue 709 | 710 | # Partition files based on corresponding entry 0 711 | if parsed['record_n'] is not None: 712 | offset = position - parsed['record_n'] * FILE_size 713 | try: 714 | part = partitioned_files[offset] 715 | except KeyError: 716 | partitioned_files[offset] = NTFSPartition(self, offset) 717 | part = partitioned_files[offset] 718 | attributes = parsed['attributes'] 719 | if '$DATA' in attributes: 720 | for attribute in attributes['$DATA']: 721 | ads_name = attribute['name'] 722 | if ads_name: 723 | part.add_file(NTFSFile(parsed, position, ads=ads_name)) 724 | """Add the file again, just in case the $DATA attributes are 725 | missing.""" 726 | part.add_file(NTFSFile(parsed, position)) 727 | 728 | # Handle information deduced from INDX records 729 | if '$INDEX_ROOT' in attrs: 730 | self.add_from_indx_root(parsed, part) 731 | # Save for later use 732 | if '$INDEX_ALLOCATION' in attrs or '$ATTRIBUTE_LIST' in attrs: 733 | self.parsed_file_review[position] = parsed 734 | # TODO [Future] handle files for which there is no record_number 735 | 736 | # Parse INDX records 737 | logging.info('Parsing INDX records') 738 | for position in self.found_indx: 739 | dump = sectors(img, position, INDX_size) 740 | parsed = parse_indx_record(dump) 741 | if not parsed['valid']: 742 | continue 743 | 744 | entries = parsed['entries'] 745 | referred = (el['file_info']['parent_entry'] for el in entries) 746 | record_n = Counter(referred).most_common(1)[0][0] 747 | # Save references for future access 748 | self.parsed_indx[position] = { 749 | 'parent': record_n, 750 | 'children': set(el['record_n'] for el in entries) 751 | } 752 | 753 | indx_info = self.parsed_indx 754 | self.indx_list = SparseList({ 755 | pos: indx_info[pos]['parent'] for pos in indx_info 756 | }) 757 | 758 | # Extract boot record information 759 | logging.info('Reading boot sectors') 760 | for index in self.found_boot: 761 | dump = sectors(img, index, 1) 762 | parsed = unpack(dump, boot_sector_fmt) 763 | sec_per_clus = parsed['sectors_per_cluster'] 764 | self.found_spc.append(sec_per_clus) 765 | relative = parsed['MFT_addr'] * sec_per_clus 766 | mirr_relative = parsed['MFTmirr_addr'] * sec_per_clus 767 | part = None 768 | # Look for matching partition, either as boot sector or backup 769 | for delta in (0, parsed['sectors']): 770 | index = index - delta 771 | address = relative + index 772 | # Set partition as recoverable 773 | if address in partitioned_files: 774 | part = partitioned_files[address] 775 | part.set_recoverable(True) 776 | part.set_size(parsed['sectors']) 777 | part.offset = index 778 | part.sec_per_clus = sec_per_clus 779 | part.mftmirr_pos = mirr_relative + index 780 | break 781 | 782 | # Repair MFT if the mirror is available 783 | for address in list(partitioned_files): 784 | # This could have been deleted in a previous iteration 785 | if address not in partitioned_files: 786 | continue 787 | part = partitioned_files[address] 788 | mirrpos = part.mftmirr_pos 789 | if mirrpos is None: 790 | entry = part.get(1) # $MFTMirr 791 | if entry is None: 792 | continue 793 | else: 794 | # Infer MFT mirror position 795 | dump = sectors(img, entry.offset, FILE_size) 796 | mirror = parse_file_record(dump) 797 | if (mirror['valid'] and 'attributes' in mirror and 798 | '$DATA' in mirror['attributes']): 799 | datas = mirror['attributes']['$DATA'] 800 | if (len(datas) == 1 and datas[0]['non_resident'] and 801 | 'runlist' in datas[0] and 802 | len(datas[0]['runlist']) > 0 and 803 | 'offset' in datas[0]['runlist'][0]): 804 | relative = datas[0]['runlist'][0]['offset'] 805 | spc = part.sec_per_clus 806 | if spc is None: 807 | continue 808 | mirrpos = relative * spc + part.offset 809 | part.mftmirr_pos = mirrpos 810 | 811 | self.add_from_mft_mirror(part) 812 | 813 | # Remove bogus partitions generated by MFT mirrors 814 | if mirrpos in partitioned_files: 815 | bogus = partitioned_files[mirrpos] 816 | # Check if it looks like a MFT mirror 817 | if len(bogus.files) == 4 and max(bogus.files) < 4: 818 | logging.debug( 819 | 'Dropping bogus NTFS partition with MFT ' 820 | 'position %d generated by MFT mirror of ' 821 | 'partition at offset %d', 822 | bogus.mft_pos, part.offset 823 | ) 824 | partitioned_files.pop(mirrpos) 825 | 826 | # Acquire additional information from $INDEX_ALLOCATION 827 | logging.info('Finding partition geometry') 828 | most_likely = self.most_likely_sec_per_clus() 829 | for address in partitioned_files: 830 | part = partitioned_files[address] 831 | if part.offset is None: 832 | # Find geometry by approximate string matching 833 | offset, sec_per_clus = self.find_boundary( 834 | part, address, most_likely 835 | ) 836 | if offset is not None: 837 | part.set_recoverable(True) 838 | part.offset = offset 839 | part.sec_per_clus = sec_per_clus 840 | else: 841 | offset, sec_per_clus = part.offset, part.sec_per_clus 842 | if offset is not None: 843 | logging.info( 844 | 'Finalizing MFT reconstruction of partition at offset %i', 845 | offset 846 | ) 847 | self.finalize_reconstruction(part) 848 | 849 | # Merge pieces from fragmented MFT 850 | for address in list(partitioned_files): 851 | # This could have been deleted in a previous iteration 852 | if address not in partitioned_files: 853 | continue 854 | part = partitioned_files[address] 855 | entry = part.get(0) # $MFT 856 | if entry is None or part.sec_per_clus is None: 857 | continue 858 | dump = sectors(img, entry.offset, FILE_size) 859 | parsed = parse_file_record(dump) 860 | if not parsed['valid'] or 'attributes' not in parsed: 861 | continue 862 | 863 | if '$ATTRIBUTE_LIST' in parsed['attributes']: 864 | _integrate_attribute_list(parsed, part, img) 865 | attrs = parsed['attributes'] 866 | if '$DATA' not in attrs or len(attrs['$DATA']) < 1: 867 | continue 868 | 869 | if 'runlist' not in attrs['$DATA'][0]: 870 | continue 871 | runlist = attrs['$DATA'][0]['runlist'] 872 | if len(runlist) > 1: 873 | logging.info( 874 | 'MFT for partition at offset %d is fragmented. Trying to ' 875 | 'merge %d parts...', part.offset, len(runlist) 876 | ) 877 | clusters_pos = runlist[0]['offset'] 878 | spc = part.sec_per_clus 879 | size = runlist[0]['length'] 880 | for entry in runlist[1:]: 881 | clusters_pos += entry['offset'] 882 | real_pos = clusters_pos * part.sec_per_clus + part.offset 883 | position = real_pos - size*spc 884 | if position in partitioned_files: 885 | piece = partitioned_files[position] 886 | if piece.offset is None or piece.offset == part.offset: 887 | conflicts = [ 888 | i for i in piece.files if 889 | not piece.files[i].is_ghost and 890 | i in part.files and 891 | not part.files[i].is_ghost 892 | ] 893 | if not len(conflicts): 894 | logging.debug( 895 | 'Merging partition with MFT offset %d into' 896 | ' %s (fragmented MFT)', piece.mft_pos, part 897 | ) 898 | # Merge the partitions 899 | merge(part, piece) 900 | # Remove the fragment 901 | partitioned_files.pop(position) 902 | else: 903 | logging.debug( 904 | 'NOT merging partition with MFT offset %d into' 905 | ' %s (possible fragmented MFT) due to conflicts', piece.mft_pos, part 906 | ) 907 | size += entry['length'] 908 | 909 | return partitioned_files 910 | -------------------------------------------------------------------------------- /recuperabit/fs/ntfs_fmt.py: -------------------------------------------------------------------------------- 1 | """NTFS format descriptors.""" 2 | 3 | # RecuperaBit 4 | # Copyright 2014-2021 Andrea Lazzarotto 5 | # 6 | # This file is part of RecuperaBit. 7 | # 8 | # RecuperaBit is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # RecuperaBit is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with RecuperaBit. If not, see . 20 | 21 | 22 | from datetime import datetime, timezone, timedelta 23 | 24 | from ..utils import printable, unpack 25 | 26 | 27 | time_start = datetime(1601, 1, 1, tzinfo=timezone.utc) 28 | 29 | def printable_name(name): 30 | """Return a printable name decoded in UTF-16.""" 31 | decoded = [] 32 | parts = (name[i:i+2] for i in range(0, len(name), 2)) 33 | for part in parts: 34 | try: 35 | decoded.append(part.decode('utf-16')) 36 | except UnicodeDecodeError: 37 | decoded.append('\x00') 38 | joined = ''.join(decoded) 39 | # basic check for false positives 40 | if '\x00\x00\x00' in joined: 41 | return None 42 | return printable(joined, '#') 43 | 44 | 45 | def windows_time(timestamp): 46 | """Convert a date-time value from Microsoft filetime to UTC.""" 47 | try: 48 | value = int.from_bytes(timestamp, byteorder='little', signed=False) 49 | converted = time_start + timedelta(milliseconds = value//10000) 50 | return converted 51 | except (ValueError, OverflowError, OSError): 52 | return None 53 | 54 | 55 | def index_entries(dump): 56 | """Interpret the entries of an index.""" 57 | offset = 0 58 | entries = [] 59 | while offset < len(dump): 60 | parsed = unpack(dump[offset:], indx_dir_entry_fmt) 61 | filename = parsed['$FILE_NAME'] 62 | entry_length = parsed['entry_length'] 63 | valid_length = entry_length > 0 64 | has_name = 'name' in filename 65 | valid_name = has_name and len(filename['name']) > 0 66 | if valid_length and valid_name: 67 | if parsed['content_length']: 68 | entries.append(parsed) 69 | offset += entry_length 70 | else: 71 | break 72 | # Last entry 73 | if parsed['flags'] & 0x2: 74 | break 75 | # TODO handle carving of remnant entries in slack space 76 | return entries 77 | 78 | 79 | def index_root_parser(dump): 80 | """Parse the entries contained in a $INDEX_ROOT attribute.""" 81 | header = unpack(dump, indx_header_fmt) 82 | offset = header['off_start_list'] 83 | entries = index_entries(dump[offset:]) 84 | return entries 85 | 86 | 87 | def runlist_unpack(runlist): 88 | """Parse an attribute runlist.""" 89 | pieces = [] 90 | while len(runlist) and runlist[0] != 0: 91 | off_bytes, len_bytes = divmod(runlist[0], 2**4) 92 | end = len_bytes + off_bytes 93 | decoded = unpack(runlist, [ 94 | ('length', ('i', 1, len_bytes)), 95 | ('offset', ('+i', len_bytes + 1, end)) 96 | ]) 97 | if decoded['length'] is None or decoded['offset'] is None: 98 | break 99 | pieces.append(decoded) 100 | runlist = runlist[end+1:] 101 | return pieces 102 | 103 | 104 | def attribute_list_parser(dump): 105 | """Parse entries contained in a $ATTRIBUTE_LIST attribute.""" 106 | content = [] 107 | while len(dump): 108 | decoded = unpack(dump, [ 109 | ('type', ('i', 0, 3)), 110 | ('length', ('i', 4, 5)), 111 | ('name_length', ('i', 6, 6)), 112 | ('name_off', ('i', 7, 7)), 113 | ('start_VCN', ('i', 8, 15)), 114 | ('file_ref', ('i', 16, 19)), 115 | ('id', ('i', 24, 24)) 116 | ]) 117 | length = decoded['length'] 118 | # Check either if the length is 0 or if it is None 119 | if not length: 120 | break 121 | content.append(decoded) 122 | dump = dump[length:] 123 | return content 124 | 125 | 126 | def try_filename(dump): 127 | """Try to parse a $FILE_NAME attribute.""" 128 | try: 129 | unpack(dump, attr_types_fmt['$FILE_NAME']) 130 | except TypeError: # Broken attribute 131 | return {} 132 | 133 | entry_fmt = [ 134 | ('signature', ('s', 0, 3)), 135 | ('off_fixup', ('i', 4, 5)), 136 | ('n_entries', ('i', 6, 7)), 137 | ('LSN', ('i', 8, 15)), 138 | ('seq_val', ('i', 16, 17)), 139 | ('link_count', ('i', 18, 19)), 140 | ('off_first', ('i', 20, 21)), 141 | ('flags', ('i', 22, 23)), 142 | ('size_used', ('i', 24, 27)), 143 | ('size_alloc', ('i', 28, 31)), 144 | ('base_record', ('i', 32, 35)), 145 | ('record_n', ('i', 44, 47)) # Available only for NTFS >= 3.1 146 | ] 147 | 148 | boot_sector_fmt = [ 149 | ('OEM_name', ('s', 3, 10)), 150 | ('bytes_per_sector', ('i', 11, 12)), 151 | ('sectors_per_cluster', ('i', 13, 13)), 152 | ('sectors', ('i', 40, 47)), 153 | ('MFT_addr', ('i', 48, 55)), 154 | ('MFTmirr_addr', ('i', 56, 63)), 155 | ('MFT_entry_size', ('i', 64, 64)), 156 | ('idx_size', ('i', 68, 68)), 157 | ('signature', ('s', 510, 511)) 158 | ] 159 | 160 | indx_fmt = [ 161 | ('signature', ('s', 0, 3)), 162 | ('off_fixup', ('i', 4, 5)), 163 | ('n_entries', ('i', 6, 7)), 164 | ('LSN', ('i', 8, 15)), 165 | ('seq_val', ('i', 16, 17)) 166 | ] 167 | 168 | indx_header_fmt = [ 169 | ('off_start_list', ('i', 0, 3)), 170 | ('off_end_list', ('i', 4, 7)), 171 | ('off_end_buffer', ('i', 8, 11)), 172 | ('flags', ('i', 12, 15)) 173 | ] 174 | 175 | indx_dir_entry_fmt = [ 176 | ('record_n', ('i', 0, 3)), 177 | ('entry_length', ('i', 8, 9)), 178 | ('content_length', ('i', 10, 11)), 179 | ('flags', ('i', 12, 15)), 180 | ('$FILE_NAME', ( 181 | try_filename, 16, lambda r: 15 + ( 182 | r['content_length'] if r['content_length'] is not None else 0 183 | ) 184 | )) 185 | # The following is not very useful so it's not worth computing 186 | # 'VCN_child', ( 187 | # lambda s: int(str(s[::-1]).encode('hex'),16) if len(s) else None, 188 | # lambda r: r['entry_length'] - (8 if r['flags'] & 0x1 else 0), 189 | # lambda r: r['entry_length'] 190 | # ) 191 | ] 192 | 193 | attr_header_fmt = [ 194 | ('type', ('i', 0, 3)), 195 | ('length', ('i', 4, 7)), 196 | ('non_resident', ('i', 8, 8)), 197 | ('name_length', ('i', 9, 9)), 198 | ('name_off', ('i', 10, 11)), 199 | ('flags', ('i', 12, 13)), 200 | ('id', ('i', 14, 15)), 201 | ('name', ( 202 | printable_name, 203 | lambda r: r['name_off'], 204 | lambda r: r['name_off'] + r['name_length']*2 - 1 205 | )) 206 | ] 207 | 208 | attr_resident_fmt = [ 209 | ('content_size', ('i', 16, 19)), 210 | ('content_off', ('i', 20, 21)) 211 | ] 212 | 213 | attr_nonresident_fmt = [ 214 | ('start_VCN', ('i', 16, 23)), 215 | ('end_VCN', ('i', 24, 31)), 216 | ('runlist_offset', ('i', 32, 33)), 217 | ('compression_unit', ('i', 34, 35)), 218 | ('allocated_size', ('i', 40, 47)), 219 | ('real_size', ('i', 48, 55)), 220 | ('initialized_size', ('i', 56, 63)), 221 | ('runlist', ( 222 | runlist_unpack, 223 | lambda r: r['runlist_offset'], 224 | lambda r: r['allocated_size'] 225 | )) 226 | ] 227 | 228 | attr_names = { 229 | 16: '$STANDARD_INFORMATION', 230 | 32: '$ATTRIBUTE_LIST', 231 | 48: '$FILE_NAME', 232 | 80: '$SECURITY_DESCRIPTOR', 233 | 96: '$VOLUME_NAME', 234 | 112: '$VOLUME_INFORMATION', 235 | 128: '$DATA', 236 | 144: '$INDEX_ROOT', 237 | 160: '$INDEX_ALLOCATION', 238 | 176: '$BITMAP' 239 | } 240 | 241 | # This structure extracts only interesting attributes. 242 | attr_types_fmt = { 243 | '$STANDARD_INFORMATION': [ 244 | ('creation_time', (windows_time, 0, 7)), 245 | ('modification_time', (windows_time, 8, 15)), 246 | ('MFT_modification_time', (windows_time, 16, 23)), 247 | ('access_time', (windows_time, 24, 31)), 248 | ('flags', ('i', 32, 35)) 249 | ], 250 | '$ATTRIBUTE_LIST': [ 251 | ('entries', (attribute_list_parser, 0, 1024)) 252 | ], 253 | '$FILE_NAME': [ 254 | ('parent_entry', ('i', 0, 5)), 255 | ('parent_seq', ('i', 6, 7)), 256 | ('creation_time', (windows_time, 8, 15)), 257 | ('modification_time', (windows_time, 16, 23)), 258 | ('MFT_modification_time', (windows_time, 24, 31)), 259 | ('access_time', (windows_time, 32, 39)), 260 | ('allocated_size', ('i', 40, 47)), 261 | ('real_size', ('i', 48, 55)), 262 | ('flags', ('i', 56, 59)), 263 | ('name_length', ('i', 64, 64)), 264 | ('namespace', ('i', 65, 65)), 265 | ('name', (printable_name, 66, lambda r: r['name_length']*2 + 65)) 266 | ], 267 | '$INDEX_ROOT': [ 268 | ('attr_type', ('i', 0, 3)), 269 | ('sorting_rule', ('i', 4, 7)), 270 | ('record_bytes', ('i', 8, 11)), 271 | ('record_clusters', ('i', 12, 12)), 272 | ('records', (index_root_parser, 16, lambda r: r['record_bytes'])) 273 | ] 274 | } 275 | -------------------------------------------------------------------------------- /recuperabit/logic.py: -------------------------------------------------------------------------------- 1 | """Filesystem-independent algorithmic logic.""" 2 | 3 | # RecuperaBit 4 | # Copyright 2014-2021 Andrea Lazzarotto 5 | # 6 | # This file is part of RecuperaBit. 7 | # 8 | # RecuperaBit is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # RecuperaBit is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with RecuperaBit. If not, see . 20 | 21 | 22 | import bisect 23 | import codecs 24 | import logging 25 | import os 26 | import os.path 27 | import sys 28 | import time 29 | import types 30 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, Iterator, Set, Tuple, TypeVar, Generic 31 | 32 | T = TypeVar('T') 33 | 34 | if TYPE_CHECKING: 35 | from .fs.core_types import File, Partition 36 | 37 | 38 | class SparseList(Generic[T]): 39 | """List which only stores values at some places.""" 40 | def __init__(self, data: Optional[Dict[int, T]] = None, default: Optional[T] = None) -> None: 41 | self.keys: List[int] = [] # This is always kept in order 42 | self.elements: Dict[int, T] = {} 43 | self.default: Optional[T] = default 44 | if data is not None: 45 | self.keys = sorted(data) 46 | self.elements.update(data) 47 | 48 | def __len__(self) -> int: 49 | try: 50 | return self.keys[-1] + 1 51 | except IndexError: 52 | return 0 53 | 54 | def __getitem__(self, index: int) -> Optional[T]: 55 | return self.elements.get(index, self.default) 56 | 57 | def __setitem__(self, index: int, item: T) -> None: 58 | if item == self.default: 59 | if index in self.elements: 60 | del self.elements[index] 61 | del self.keys[bisect.bisect_left(self.keys, index)] 62 | else: 63 | if index not in self.elements: 64 | bisect.insort(self.keys, index) 65 | self.elements[index] = item 66 | 67 | def __contains__(self, element: T) -> bool: 68 | return element in self.elements.values() 69 | 70 | def __iter__(self) -> Iterator[int]: 71 | return self.keys.__iter__() 72 | 73 | def __repr__(self) -> str: 74 | elems = [] 75 | prevk = 0 76 | if len(self.elements) > 0: 77 | k = self.keys[0] 78 | elems.append(str(k) + ' -> ' + repr(self.elements[k])) 79 | prevk = self.keys[0] 80 | for i in range(1, len(self.elements)): 81 | nextk = self.keys[i] 82 | if nextk <= prevk + 2: 83 | while prevk < nextk - 1: 84 | elems.append('__') 85 | prevk += 1 86 | elems.append(repr(self.elements[nextk])) 87 | else: 88 | elems.append('\n... ' + str(nextk) + ' -> ' + 89 | repr(self.elements[nextk])) 90 | prevk = nextk 91 | 92 | return '[' + ', '.join(elems) + ']' 93 | 94 | def iterkeys(self) -> Iterator[int]: 95 | """An iterator over the keys of actual elements.""" 96 | return self.__iter__() 97 | 98 | def iterkeys_rev(self) -> Iterator[int]: 99 | """An iterator over the keys of actual elements (reversed).""" 100 | i = len(self.keys) 101 | while i > 0: 102 | i -= 1 103 | yield self.keys[i] 104 | 105 | def itervalues(self) -> Iterator[T]: 106 | """An iterator over the elements.""" 107 | for k in self.keys: 108 | yield self.elements[k] 109 | 110 | def wipe_interval(self, bottom: int, top: int) -> None: 111 | """Remove elements between bottom and top.""" 112 | new_keys = set() 113 | if bottom > top: 114 | for k in self.keys: 115 | if top <= k < bottom: 116 | new_keys.add(k) 117 | else: 118 | del self.elements[k] 119 | else: 120 | for k in self.keys: 121 | if bottom <= k < top: 122 | del self.elements[k] 123 | else: 124 | new_keys.add(k) 125 | self.keys = sorted(new_keys) 126 | 127 | 128 | def preprocess_pattern(pattern: SparseList[T]) -> Dict[T, List[int]]: 129 | """Preprocess a SparseList for approximate string matching. 130 | 131 | This function performs preprocessing for the Baeza-Yates--Perleberg 132 | fast and practical approximate string matching algorithm.""" 133 | result: Dict[T, List[int]] = {} 134 | length = pattern.__len__() 135 | for k in pattern: 136 | name = pattern[k] 137 | if name not in result: 138 | result[name] = [length-k-1] 139 | elif name != result[name][-1]: 140 | result[name].append(length-k-1) 141 | return result 142 | 143 | 144 | def approximate_matching(records: SparseList[T], pattern: SparseList[T], stop: int, k: int = 1) -> Optional[List[Union[Set[int], int, float]]]: 145 | """Find the best match for a given pattern. 146 | 147 | The Baeza-Yates--Perleberg algorithm requires a preprocessed pattern. This 148 | function takes as input a SparseList of records and pattern that will be 149 | preprocessed. The records in the SparseList should be formed by single 150 | elements. If they have another shape, e.g. tuples of the form 151 | (namespace, name), the get function can be used to tell the algorithm how 152 | to access them. k is the minimum value for support.""" 153 | 154 | msize = pattern.__len__() 155 | if records.__len__() == 0 or msize == 0: 156 | return None 157 | 158 | lookup = preprocess_pattern(pattern) 159 | count: SparseList[int] = SparseList(default=0) 160 | match_offsets: Set[int] = set() 161 | 162 | i = 0 163 | j = 0 # previous value of i 164 | 165 | # logging.debug('Starting approximate matching up to %i', stop) 166 | # Loop only on indexes where there are elements 167 | for i in records: 168 | if i > stop+msize-1: 169 | break 170 | 171 | # zero-out the parts that were skipped 172 | count.wipe_interval(j % msize, i % msize) 173 | j = i 174 | 175 | offsets = set(lookup.get(records[i], [])) 176 | for off in offsets: 177 | count[(i + off) % msize] += 1 178 | score = count[(i + off) % msize] 179 | if score == k: 180 | match_offsets.add(i+off-msize+1) 181 | if score > k: 182 | k = score 183 | match_offsets = set([i+off-msize+1]) 184 | 185 | if len(match_offsets): 186 | logging.debug( 187 | 'Found MATCH in positions {} ' 188 | 'with weight {} ({}%)'.format( 189 | match_offsets, k, 190 | k * 100.0 / len(pattern.keys) 191 | ) 192 | ) 193 | return [match_offsets, k, float(k) / len(pattern.keys)] 194 | else: 195 | # logging.debug('No match found') 196 | return None 197 | 198 | 199 | def makedirs(path: str) -> bool: 200 | """Make directories if they do not exist.""" 201 | try: 202 | os.makedirs(path) 203 | except OSError: 204 | _, value, _ = sys.exc_info() 205 | # The directory already exists = no problem 206 | if value.errno != 17: 207 | logging.error(value) 208 | return False 209 | return True 210 | 211 | 212 | def recursive_restore(node: 'File', part: 'Partition', outputdir: str, make_dirs: bool = True) -> None: 213 | """Restore a directory structure starting from a file node.""" 214 | parent_path = str( 215 | part[node.parent].full_path(part) if node.parent is not None 216 | else '' 217 | ) 218 | 219 | file_path = os.path.join(parent_path, node.name) 220 | restore_parent_path = os.path.join(outputdir, parent_path) 221 | restore_path = os.path.join(outputdir, file_path) 222 | 223 | try: 224 | content = node.get_content(part) 225 | except NotImplementedError: 226 | logging.error(u'Restore of #%s %s is not supported', node.index, 227 | file_path) 228 | content = None 229 | 230 | if make_dirs: 231 | if not makedirs(restore_parent_path): 232 | return 233 | 234 | is_directory = node.is_directory or len(node.children) > 0 235 | 236 | if is_directory: 237 | logging.info(u'Restoring #%s %s', node.index, file_path) 238 | if not makedirs(restore_path): 239 | return 240 | 241 | if is_directory and content is not None: 242 | logging.warning(u'Directory %s has data content!', file_path) 243 | restore_path += '_recuperabit_content' 244 | 245 | try: 246 | if content is not None: 247 | logging.info(u'Restoring #%s %s', node.index, file_path) 248 | with codecs.open(restore_path, 'wb') as outfile: 249 | if isinstance(content, types.GeneratorType): 250 | for piece in content: 251 | outfile.write(piece) 252 | else: 253 | outfile.write(content) 254 | else: 255 | if not is_directory: 256 | # Empty file 257 | open(restore_path, 'wb').close() 258 | except IOError: 259 | logging.error(u'IOError when trying to create %s', restore_path) 260 | 261 | try: 262 | # Restore Modification + Access time 263 | mtime, atime, _ = node.get_mac() 264 | if mtime is not None: 265 | atime = time.mktime(atime.astimezone().timetuple()) 266 | mtime = time.mktime(mtime.astimezone().timetuple()) 267 | os.utime(restore_path, (atime, mtime)) 268 | except IOError: 269 | logging.error(u'IOError while setting atime and mtime of %s', restore_path) 270 | 271 | if is_directory: 272 | for child in node.children: 273 | if not child.ignore(): 274 | recursive_restore(child, part, outputdir, make_dirs=False) 275 | else: 276 | logging.info(u'Skipping ignored file {}'.format(child)) 277 | -------------------------------------------------------------------------------- /recuperabit/utils.py: -------------------------------------------------------------------------------- 1 | """Collection of utility functions.""" 2 | 3 | # RecuperaBit 4 | # Copyright 2014-2021 Andrea Lazzarotto 5 | # 6 | # This file is part of RecuperaBit. 7 | # 8 | # RecuperaBit is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # RecuperaBit is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with RecuperaBit. If not, see . 20 | 21 | 22 | from datetime import datetime 23 | import logging 24 | import pprint 25 | import string 26 | import sys 27 | import time 28 | from typing import TYPE_CHECKING, Any, Iterable, Optional, List, Dict, Tuple, Union, Callable 29 | import unicodedata 30 | import io 31 | 32 | from .fs.constants import sector_size 33 | 34 | printer: pprint.PrettyPrinter = pprint.PrettyPrinter(indent=4) 35 | all_chars = (chr(i) for i in range(sys.maxunicode)) 36 | unicode_printable: set[str] = set( 37 | c for c in all_chars 38 | if not unicodedata.category(c)[0].startswith('C') 39 | ) 40 | ascii_printable: set[str] = set(string.printable[:-5]) 41 | 42 | if TYPE_CHECKING: 43 | from .fs.core_types import File, Partition 44 | 45 | 46 | def sectors(image: io.BufferedReader, offset: int, size: int, bsize: int = sector_size, fill: bool = True) -> Optional[bytearray]: 47 | """Read from a file descriptor.""" 48 | read = True 49 | try: 50 | image.seek(offset * bsize) 51 | except (IOError, OverflowError, ValueError): 52 | read = False 53 | if read: 54 | try: 55 | dump = image.read(size * bsize) 56 | except (IOError, MemoryError): 57 | logging.warning( 58 | "Cannot read sector(s). Filling with 0x00. Offset: {} Size: " 59 | "{} Bsize: {}".format(offset, size, bsize) 60 | ) 61 | read = False 62 | if not read: 63 | if fill: 64 | dump = size * bsize * b'\x00' 65 | else: 66 | return None 67 | return bytearray(dump) 68 | 69 | def unixtime(dtime: Optional[datetime]) -> float: 70 | """Convert datetime to UNIX epoch.""" 71 | if dtime is None: 72 | return 0.0 73 | try: 74 | return time.mktime(dtime.timetuple()) 75 | except ValueError: 76 | return 0.0 77 | 78 | 79 | # format: 80 | # [(label, (formatter, lower, higher)), ...] 81 | def unpack(data: bytes, fmt: List[Tuple[str, Tuple[Union[str, Callable[[bytes], Any]], Union[int, Callable[[Dict[str, Any]], Optional[int]]], Union[int, Callable[[Dict[str, Any]], Optional[int]]]]]]) -> Dict[str, Any]: 82 | """Extract formatted information from a string of bytes.""" 83 | result: Dict[str, Any] = {} 84 | for label, description in fmt: 85 | formatter, lower, higher = description 86 | # If lower is a function, then apply it 87 | low = lower(result) if callable(lower) else lower 88 | high = higher(result) if callable(higher) else higher 89 | 90 | if low is None or high is None: 91 | result[label] = None 92 | continue 93 | 94 | if callable(formatter): 95 | result[label] = formatter(data[low:high+1]) 96 | else: 97 | if formatter == 's': 98 | result[label] = str(data[low:high+1]) 99 | if formatter.startswith('utf'): 100 | result[label] = data[low:high+1].decode(formatter) 101 | if formatter.endswith('i') and len(formatter) < 4: 102 | # Use little-endian by default. Big-endian with >i. 103 | # Force sign-extension of first bit with >+i / +i. 104 | chunk = data[low:high+1] 105 | 106 | signed = False 107 | if '+' in formatter: 108 | signed = True 109 | 110 | byteorder = 'little' 111 | if formatter.startswith('>'): 112 | byteorder = 'big' 113 | 114 | if len(chunk): 115 | result[label] = int.from_bytes(chunk, byteorder=byteorder, signed=signed) 116 | else: 117 | result[label] = None 118 | return result 119 | 120 | 121 | def feed_all(image: io.BufferedReader, scanners: List[Any], indexes: Iterable[int]) -> List[int]: 122 | # Scan the disk image and feed the scanners 123 | interesting: List[int] = [] 124 | for index in indexes: 125 | sector = sectors(image, index, 1, fill=False) 126 | if not sector: 127 | break 128 | 129 | for instance in scanners: 130 | res = instance.feed(index, sector) 131 | if res is not None: 132 | logging.info('Found {} at sector {}'.format(res, index)) 133 | interesting.append(index) 134 | return interesting 135 | 136 | 137 | def printable(text: str, default: str = '.', alphabet: Optional[set[str]] = None) -> str: 138 | """Replace unprintable characters in a text with a default one.""" 139 | if alphabet is None: 140 | alphabet = unicode_printable 141 | return ''.join((i if i in alphabet else default) for i in text) 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | def readable_bytes(amount: Optional[int]) -> str: 150 | """Return a human readable string representing a size in bytes.""" 151 | if amount is None: 152 | return '??? B' 153 | if amount < 1: 154 | return '%.2f B' % amount 155 | powers = { 156 | 0: '', 1: 'K', 2: 'M', 3: 'G', 4: 'T' 157 | } 158 | biggest = max(i for i in powers if amount / 1024.**i >= 1) 159 | scaled = amount / 1024.**biggest 160 | return '%.2f %sB' % (scaled, powers[biggest]) 161 | 162 | 163 | def _file_tree_repr(node: 'File') -> str: 164 | """Give a nice representation for the tree.""" 165 | desc = ( 166 | ' [GHOST]' if node.is_ghost else 167 | ' [DELETED]' if node.is_deleted else '' 168 | ) 169 | tail = '/' if node.is_directory else '' 170 | data = [ 171 | ('Id', node.index), 172 | ('Offset', node.offset), 173 | ( 174 | 'Offset bytes', 175 | node.offset * sector_size 176 | if node.offset is not None else None 177 | ) 178 | # ('MAC', node.mac) 179 | ] 180 | if not node.is_directory: 181 | data += [('Size', readable_bytes(node.size))] 182 | return u'%s%s (%s) %s' % ( 183 | node.name, tail, ', '.join(a + ': ' + str(b) for a, b in data), desc 184 | ) 185 | 186 | 187 | def tree_folder(directory: 'File', padding: int = 0) -> str: 188 | """Return a tree-like textual representation of a directory.""" 189 | lines: List[str] = [] 190 | pad = ' ' * padding 191 | lines.append( 192 | pad + _file_tree_repr(directory) 193 | ) 194 | padding = padding + 2 195 | pad = ' ' * padding 196 | for entry in directory.children: 197 | if len(entry.children) or entry.is_directory: 198 | lines.append(tree_folder(entry, padding)) 199 | else: 200 | lines.append( 201 | pad + _file_tree_repr(entry) 202 | ) 203 | return '\n'.join(lines) 204 | 205 | 206 | def _bodyfile_repr(node: 'File', path: str) -> str: 207 | """Return a body file line for node.""" 208 | end = '/' if node.is_directory or len(node.children) else '' 209 | return '|'.join(str(el) for el in [ 210 | '0', # MD5 211 | path + node.name + end, # name 212 | node.index, # inode 213 | '0', '0', '0', # mode, UID, GID 214 | node.size if node.size is not None else 0, 215 | unixtime(node.mac['access']), 216 | unixtime(node.mac['modification']), 217 | unixtime(node.mac['creation']), 218 | '0' 219 | ]) 220 | 221 | 222 | def bodyfile_folder(directory: 'File', path: str = '') -> List[str]: 223 | """Create a body file compatible with TSK 3.x. 224 | 225 | Format: 226 | '#MD5|name|inode|mode_as_string|UID|GID|size|atime|mtime|ctime|crtime' 227 | See also: http://wiki.sleuthkit.org/index.php?title=Body_file""" 228 | lines: List[str] = [_bodyfile_repr(directory, path)] 229 | path += directory.name + '/' 230 | for entry in directory.children: 231 | if len(entry.children) or entry.is_directory: 232 | lines += bodyfile_folder(entry, path) 233 | else: 234 | lines.append(_bodyfile_repr(entry, path)) 235 | return lines 236 | 237 | 238 | def _ltx_clean(label: Any) -> str: 239 | """Small filter to prepare strings to be included in LaTeX code.""" 240 | clean = str(label).replace('$', r'\$').replace('_', r'\_') 241 | if clean[0] == '-': 242 | clean = r'\textminus{}' + clean[1:] 243 | return clean 244 | 245 | 246 | def _tikz_repr(node: 'File') -> str: 247 | """Represent the node for a Tikz diagram.""" 248 | return r'node %s{%s\enskip{}%s}' % ( 249 | '[ghost]' if node.is_ghost else '[deleted]' if node.is_deleted else '', 250 | _ltx_clean(node.index), _ltx_clean(node.name) 251 | ) 252 | 253 | 254 | def tikz_child(directory: 'File', padding: int = 0) -> Tuple[str, int]: 255 | """Write a child row for Tikz representation.""" 256 | pad = ' ' * padding 257 | lines: List[str] = [r'%schild {%s' % (pad, _tikz_repr(directory))] 258 | count: int = len(directory.children) 259 | for entry in directory.children: 260 | content, number = tikz_child(entry, padding+4) 261 | lines.append(content) 262 | count += number 263 | lines.append('}') 264 | for entry in range(count): 265 | lines.append('child [missing] {}') 266 | return '\n'.join(lines).replace('\n}', '}'), count 267 | 268 | 269 | def tikz_part(part: 'Partition') -> str: 270 | """Create LaTeX code to represent the directory structure as a nice Tikz 271 | diagram. 272 | 273 | See also: http://www.texample.net/tikz/examples/filesystem-tree/""" 274 | 275 | preamble = (r"""%\usepackage{tikz} 276 | %\usetikzlibrary{trees}""") 277 | 278 | begin_tree = r"""\begin{tikzpicture}[% 279 | grow via three points={one child at (1.75em,-1.75em) and 280 | two children at (1.75em,-1.75em) and (1.75em,-3.5em)}, 281 | edge from parent path={(\tikzparentnode.south) |- (\tikzchildnode.west)}] 282 | \scriptsize 283 | """ 284 | end_tree = r"""\end{tikzpicture}""" 285 | 286 | lines = [r'\node [root] {File System Structure}'] 287 | lines += [tikz_child(entry, 4)[0] for entry in (part.root, part.lost)] 288 | lines.append(';') 289 | 290 | return '%s\n\n%s\n%s\n%s' % ( 291 | preamble, begin_tree, '\n'.join(lines), end_tree 292 | ) 293 | 294 | 295 | def csv_part(part: 'Partition') -> list[str]: 296 | """Provide a CSV representation for a partition.""" 297 | contents = [ 298 | ','.join(('Id', 'Parent', 'Name', 'Full Path', 'Modification Time', 299 | 'Access Time', 'Creation Time', 'Size (bytes)', 300 | 'Size (human)', 'Offset (bytes)', 'Offset (sectors)', 301 | 'Directory', 'Deleted', 'Ghost')) 302 | ] 303 | for index in part.files: 304 | obj = part.files[index] 305 | contents.append( 306 | u'%s,%s,"%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' % ( 307 | obj.index, obj.parent, obj.name, 308 | obj.full_path(part), 309 | obj.mac['modification'], obj.mac['access'], 310 | obj.mac['creation'], obj.size, 311 | readable_bytes(obj.size), 312 | (obj.offset * sector_size 313 | if obj.offset is not None else None), 314 | obj.offset, 315 | '1' if obj.is_directory else '', 316 | '1' if obj.is_deleted else '', 317 | '1' if obj.is_ghost else '' 318 | ) 319 | ) 320 | return contents 321 | 322 | 323 | def _sub_locate(text: str, directory: 'File', part: 'Partition') -> List[Tuple['File', str]]: 324 | """Helper for locate.""" 325 | lines: List[Tuple['File', str]] = [] 326 | for entry in sorted(directory.children, key=lambda node: node.name): 327 | path = entry.full_path(part) 328 | if text in path.lower(): 329 | lines.append((entry, path)) 330 | if len(entry.children) or entry.is_directory: 331 | lines += _sub_locate(text, entry, part) 332 | return lines 333 | 334 | 335 | def locate(part: 'Partition', text: str) -> List[Tuple['File', str]]: 336 | """Return paths of files matching the text.""" 337 | lines: List[Tuple['File', str]] = [] 338 | text = text.lower() 339 | lines += _sub_locate(text, part.lost, part) 340 | lines += _sub_locate(text, part.root, part) 341 | return lines 342 | 343 | 344 | def merge(part: 'Partition', piece: 'Partition') -> None: 345 | """Merge piece into part (both are partitions).""" 346 | for index in piece.files: 347 | if ( 348 | index not in part.files or 349 | part.files[index].is_ghost 350 | ): 351 | part.add_file(piece.files[index]) 352 | --------------------------------------------------------------------------------