├── .github
    └── FUNDING.yml
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── main.py
└── recuperabit
    ├── __init__.py
    ├── fs
        ├── __init__.py
        ├── constants.py
        ├── core_types.py
        ├── ntfs.py
        └── ntfs_fmt.py
    ├── logic.py
    └── utils.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | ko_fi: thelazza
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ### Code ###
  2 | .vscode/*
  3 | !.vscode/tasks.json
  4 | !.vscode/launch.json
  5 | *.code-workspace
  6 | 
  7 | ### Python ###
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | pytestdebug.log
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | doc/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # pipenv
 97 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 98 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 99 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | #   install all needed dependencies.
101 | #Pipfile.lock
102 | 
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 | __pypackages__/
105 | 
106 | # Celery stuff
107 | celerybeat-schedule
108 | celerybeat.pid
109 | 
110 | # SageMath parsed files
111 | *.sage.py
112 | 
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | pythonenv*
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 
141 | # pytype static type analyzer
142 | .pytype/
143 | 
144 | # profiling data
145 | .prof
146 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # ![RecuperaBit](http://i.imgur.com/Q6mM385.jpg)
  4 | 
  5 | <p><a class="badge-chip" href="https://github.com/Lazza/RecuperaBit/blob/master/LICENSE.md"><img alt="GPL-3.0 license" src="https://badgen.net/github/license/Lazza/RecuperaBit"></a>
  6 | <a class="badge-chip" href="https://github.com/Lazza/RecuperaBit/releases"><img alt="Latest release" src="https://badgen.net/github/release/Lazza/RecuperaBit"></a>
  7 | <a class="badge-chip" href="https://github.com/Lazza/RecuperaBit/stargazers"><img alt="Stars counter" src="https://badgen.net/github/stars/Lazza/RecuperaBit"></a>
  8 | <a class="badge-chip" href="https://project-types.github.io/#stadium"><img alt="Stadium badge" src="https://badgen.net/static/project type/stadium/orange"></a>
  9 | <a class="badge-chip" href="https://ko-fi.com/thelazza"><img alt="Donate on Ko-fi" src="https://badgen.net/static/ko-fi/donate/yellow"></a></p>
 10 | 
 11 | A software which attempts to reconstruct file system structures and recover
 12 | files. Currently it supports only NTFS.
 13 | 
 14 | RecuperaBit attempts reconstruction of the directory structure regardless of:
 15 | 
 16 | - missing partition table
 17 | - unknown partition boundaries
 18 | - partially-overwritten metadata
 19 | - quick format
 20 | 
 21 | You can get more information about **the reconstruction algorithms** and the
 22 | architecture used in RecuperaBit by reading
 23 | [my MSc thesis](https://www.scribd.com/doc/309337813/) or checking out [the
 24 | slides](http://www.slideshare.net/TheLazza/recuperabit-forensic-file-system-reconstruction-given-partially-corrupted-metadata).
 25 | 
 26 | ## Usage
 27 | 
 28 |     usage: main.py [-h] [-s SAVEFILE] [-w] [-o OUTPUTDIR] path
 29 | 
 30 |     Reconstruct the directory structure of possibly damaged filesystems.
 31 | 
 32 |     positional arguments:
 33 |       path                  path to the disk image
 34 | 
 35 |     optional arguments:
 36 |       -h, --help            show this help message and exit
 37 |       -s SAVEFILE, --savefile SAVEFILE
 38 |                             path of the scan save file
 39 |       -w, --overwrite       force overwrite of the save file
 40 |       -o OUTPUTDIR, --outputdir OUTPUTDIR
 41 |                             directory for restored contents and output files
 42 | 
 43 | The main argument is the `path` to a bitstream image of a disk or partition.
 44 | RecuperaBit automatically determines the sectors from which partitions start.
 45 | 
 46 | RecuperaBit does not modify the disk image, however it does read some parts of
 47 | it multiple times through the execution. It should also work on real devices,
 48 | such as `/dev/sda` but **this is not advised** for damaged drives. RecuperaBit
 49 | might worsen the situation by "stressing" a damaged drive or it could crash due
 50 | to an I/O error.
 51 | 
 52 | Optionally, a save file can be specified with `-s`. The first time, after the
 53 | scanning process, results are saved in the file. After the first run, the file
 54 | is read to only analyze interesting sectors and speed up the loading phase.
 55 | 
 56 | Overwriting the save file can be forced with `-w`.
 57 | 
 58 | RecuperaBit includes a small command line that allows the user to recover files
 59 | and export the contents of a partition in CSV or
 60 | [body file](http://wiki.sleuthkit.org/index.php?title=Body_file) format. These
 61 | are exported in the directory specified by `-o` (or `recuperabit_output`).
 62 | 
 63 | ### Limitation
 64 | 
 65 | Currently RecuperaBit does not work with compressed files on an NTFS filesystem.
 66 | If you have deep knowledge of the inner workings of file compression on NTFS
 67 | filesystem, your help would be much appreciated, as available documentation is
 68 | quite sparse on the topic.
 69 | 
 70 | ### Pypy
 71 | 
 72 | RecuperaBit can be run with the standard cPython implementation, however speed
 73 | can be increased by using it with the Pypy interpreter and JIT compiler:
 74 | 
 75 |     pypy3 main.py /path/to/disk.img
 76 | 
 77 | ### Recovery of File Contents
 78 | 
 79 | Files can be restored one at a time or recursively, starting from a directory.
 80 | After the scanning process has completed, you can check the list of partitions
 81 | that can be recovered by issuing the following command at the prompt:
 82 | 
 83 |     recoverable
 84 | 
 85 | Each line shows information about a partition. Let's consider the following
 86 | output example:
 87 | 
 88 |     Partition #0 -> Partition (NTFS, 15.00 MB, 11 files, Recoverable, Offset: 2048, Offset (b): 1048576, Sec/Clus: 8, MFT offset: 2080, MFT mirror offset: 17400)
 89 | 
 90 | If you want to recover files starting from a specific directory, you can either
 91 | print the tree on screen with the `tree` command (very verbose for large drives)
 92 | or you can export a CSV list of files (see `help` for details).
 93 | 
 94 | If you rather want to extract all files from the *Root* and the *Lost Files*
 95 | nodes, you need to know the identifier for the root directory, depending on
 96 | the file system type. The following are those of file systems supported by
 97 | RecuperaBit:
 98 | 
 99 | | File System Type | Root Id |
100 | |------------------|---------|
101 | | NTFS             | 5       |
102 | 
103 | The id for *Lost Files* is -1 **for every file system.**
104 | 
105 | Therefore, to restore `Partition #0` in our example, you need to run:
106 | 
107 |     restore 0 5
108 |     restore 0 -1
109 | 
110 | The files will be saved inside the output directory specified by `-o`.
111 | 
112 | ## License
113 | 
114 | This software is released under the GNU GPLv3. See `LICENSE` for more details.
115 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lazza/RecuperaBit/e05079ef0f40a1198c7633fce9d1b9eaef9c5679/__init__.py


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """Main RecuperaBit process."""
  3 | 
  4 | # RecuperaBit
  5 | # Copyright 2014-2021 Andrea Lazzarotto
  6 | #
  7 | # This file is part of RecuperaBit.
  8 | #
  9 | # RecuperaBit is free software: you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation, either version 3 of the License, or
 12 | # (at your option) any later version.
 13 | #
 14 | # RecuperaBit is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 17 | # GNU General Public License for more details.
 18 | #
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | 
 23 | import argparse
 24 | import codecs
 25 | import itertools
 26 | import locale
 27 | import logging
 28 | import os.path
 29 | import pickle
 30 | import sys
 31 | try:
 32 |     import readline
 33 |     readline # ignore unused import warning
 34 | except ImportError:
 35 |     pass
 36 | 
 37 | from recuperabit import logic, utils
 38 | # scanners
 39 | from recuperabit.fs.ntfs import NTFSScanner
 40 | 
 41 | from typing import TYPE_CHECKING
 42 | if TYPE_CHECKING:
 43 |     from recuperabit.fs.core_types import Partition
 44 | 
 45 | __author__ = "Andrea Lazzarotto"
 46 | __copyright__ = "(c) 2014-2021, Andrea Lazzarotto"
 47 | __license__ = "GPLv3"
 48 | __version__ = "1.1.6"
 49 | __maintainer__ = "Andrea Lazzarotto"
 50 | __email__ = "andrea.lazzarotto@gmail.com"
 51 | 
 52 | 
 53 | # classes of available scanners
 54 | plugins = (
 55 |     NTFSScanner,
 56 | )
 57 | 
 58 | commands = (
 59 |     ('help', 'Print this help message'),
 60 |     ('recoverable', 'List recoverable partitions'),
 61 |     ('recoverable_size <size>', 'List recoverable partitions based on the minimum <size>'),
 62 |     ('other', 'List unrecoverable partitions'),
 63 |     ('allparts', 'List all partitions'),
 64 |     ('tree <part#>', 'Show contents of partition (tree)'),
 65 |     ('csv <part#> <path>', 'Save a CSV representation in a file'),
 66 |     ('bodyfile <part#> <path>', 'Save a body file representation in a file'),
 67 |     ('tikzplot <part#> [<path>]', 'Produce LaTeX code to draw a Tikz figure'),
 68 |     ('restore <part#> <file>', 'Recursively restore files from <file>'),
 69 |     ('locate <part#> <text>', 'Print all file paths that match a string'),
 70 |     ('traceback <part#> <file>', 'Print ids and paths for all ancestors of <file>'),
 71 |     ('merge <part#> <part#>', 'Merge the two partitions into the first one'),
 72 |     ('quit', 'Close the program')
 73 | )
 74 | 
 75 | rebuilt = set()
 76 | 
 77 | 
 78 | def list_parts(parts, shorthands, test):
 79 |     """List partitions corresponding to test."""
 80 |     for i, part in shorthands:
 81 |         if test(parts[part]):
 82 |             print('Partition #' + str(i), '->', parts[part])
 83 | 
 84 | 
 85 | def check_valid_part(num, parts, shorthands, rebuild=True):
 86 |     """Check if the required partition is valid."""
 87 |     try:
 88 |         i = int(num)
 89 |     except ValueError:
 90 |         print('Value is not valid!')
 91 |         return None
 92 |     if i in range(len(shorthands)):
 93 |         i, par = shorthands[i]
 94 |         part = parts[par]
 95 |         if rebuild and par not in rebuilt:
 96 |             print('Rebuilding partition...')
 97 |             part.rebuild()
 98 |             rebuilt.add(par)
 99 |             print('Done')
100 |         return part
101 |     print('No partition with given ID!')
102 |     return None
103 | 
104 | 
105 | def interpret(cmd, arguments, parts: dict[int, 'Partition'], shorthands, outdir):
106 |     """Perform command required by user."""
107 |     if cmd == 'help':
108 |         print('Available commands:')
109 |         for name, desc in commands:
110 |             print('    %s%s' % (name.ljust(28), desc))
111 |     elif cmd == 'tree':
112 |         if len(arguments) != 1:
113 |             print('Wrong number of parameters!')
114 |         else:
115 |             part = check_valid_part(arguments[0], parts, shorthands)
116 |             if part is not None:
117 |                 print('-'*10)
118 |                 print(utils.tree_folder(part.root))
119 |                 print(utils.tree_folder(part.lost))
120 |                 print('-'*10)
121 |     elif cmd == 'bodyfile':
122 |         if len(arguments) != 2:
123 |             print('Wrong number of parameters!')
124 |         else:
125 |             part = check_valid_part(arguments[0], parts, shorthands)
126 |             if part is not None:
127 |                 contents = [
128 |                     '# ---' + repr(part) + '---',
129 |                     '# Full paths'
130 |                 ] + utils.bodyfile_folder(part.root) + [
131 |                     '# \n# Orphaned files'
132 |                 ] + utils.bodyfile_folder(part.lost)
133 |                 fname = os.path.join(outdir, arguments[1])
134 |                 try:
135 |                     with codecs.open(fname, 'w', encoding='utf8') as outfile:
136 |                         outfile.write('\n'.join(contents))
137 |                         print('Saved body file to %s' % fname)
138 |                 except IOError:
139 |                     print('Cannot open file %s for output!' % fname)
140 |     elif cmd == 'csv':
141 |         if len(arguments) != 2:
142 |             print('Wrong number of parameters!')
143 |         else:
144 |             part = check_valid_part(arguments[0], parts, shorthands)
145 |             if part is not None:
146 |                 contents = utils.csv_part(part)
147 |                 fname = os.path.join(outdir, arguments[1])
148 |                 try:
149 |                     with codecs.open(fname, 'w', encoding='utf8') as outfile:
150 |                         outfile.write(
151 |                             '\n'.join(contents)
152 |                         )
153 |                         print('Saved CSV file to %s' % fname)
154 |                 except IOError:
155 |                     print('Cannot open file %s for output!' % fname)
156 |     elif cmd == 'tikzplot':
157 |         if len(arguments) not in (1, 2):
158 |             print('Wrong number of parameters!')
159 |         else:
160 |             part = check_valid_part(arguments[0], parts, shorthands)
161 |             if part is not None:
162 |                 if len(arguments) > 1:
163 |                     fname = os.path.join(outdir, arguments[1])
164 |                     try:
165 |                         with codecs.open(fname, 'w') as outfile:
166 |                             outfile.write(utils.tikz_part(part) + '\n')
167 |                             print('Saved Tikz code to %s' % fname)
168 |                     except IOError:
169 |                         print('Cannot open file %s for output!' % fname)
170 |                 else:
171 |                     print(utils.tikz_part(part))
172 |     elif cmd == 'restore':
173 |         if len(arguments) != 2:
174 |             print('Wrong number of parameters!')
175 |         else:
176 |             partid = arguments[0]
177 |             part = check_valid_part(partid, parts, shorthands)
178 |             if part is not None:
179 |                 index = arguments[1]
180 |                 partition_dir = os.path.join(outdir, 'Partition' + str(partid))
181 |                 myfile = None
182 |                 try:
183 |                     indexi = int(index)
184 |                 except ValueError:
185 |                     indexi = index
186 |                 for i in [index, indexi]:
187 |                     myfile = part.get(i, myfile)
188 |                 if myfile is None:
189 |                     print('The index is not valid')
190 |                 else:
191 |                     logic.recursive_restore(myfile, part, partition_dir)
192 |     elif cmd == 'locate':
193 |         if len(arguments) != 2:
194 |             print('Wrong number of parameters!')
195 |         else:
196 |             part = check_valid_part(arguments[0], parts, shorthands)
197 |             if part is not None:
198 |                 text = arguments[1]
199 |                 results = utils.locate(part, text)
200 |                 for node, path in results:
201 |                     desc = (
202 |                         ' [GHOST]' if node.is_ghost else
203 |                         ' [DELETED]' if node.is_deleted else ''
204 |                     )
205 |                     print('[%s]: %s%s' % (node.index, path, desc))
206 |     elif cmd == 'traceback':
207 |         if len(arguments) != 2:
208 |             print('Wrong number of parameters!')
209 |         else:
210 |             partid = arguments[0]
211 |             part = check_valid_part(partid, parts, shorthands)
212 |             if part is not None:
213 |                 index = arguments[1]
214 |                 myfile = None
215 |                 try:
216 |                     indexi = int(index)
217 |                 except ValueError:
218 |                     indexi = index
219 |                 for i in [index, indexi]:
220 |                     myfile = part.get(i, myfile)
221 |                 if myfile is None:
222 |                     print('The index is not valid')
223 |                 else:
224 |                     while myfile is not None:
225 |                         print('[{}] {}'.format(myfile.index, myfile.full_path(part)))
226 |                         myfile = part.get(myfile.parent)
227 |     elif cmd == 'merge':
228 |         if len(arguments) != 2:
229 |             print('Wrong number of parameters!')
230 |         else:
231 |             part1 = check_valid_part(arguments[0], parts, shorthands, rebuild=False)
232 |             part2 = check_valid_part(arguments[1], parts, shorthands, rebuild=False)
233 |             if None in (part1, part2):
234 |                 return
235 |             if part1.fs_type != part2.fs_type:
236 |                 print('Cannot merge partitions with types (%s, %s)' % (part1.fs_type, part2.fs_type))
237 |                 return
238 |             print('Merging partitions...')
239 |             utils.merge(part1, part2)
240 |             source_position = int(arguments[1])
241 |             destination_position = int(arguments[0])
242 |             _, par_source = shorthands[source_position]
243 |             _, par_destination = shorthands[destination_position]
244 |             del shorthands[source_position]
245 |             del parts[par_source]
246 |             for par in (par_source, par_destination):
247 |                 try:
248 |                     rebuilt.remove(par)
249 |                 except:
250 |                     pass
251 |             print('There are now %d partitions.' % (len(parts), ))
252 |     elif cmd == 'recoverable':
253 |         list_parts(parts, shorthands, lambda x: x.recoverable)
254 |     elif cmd == 'recoverable_size':
255 |         if len(arguments) != 1:
256 |             print('Wrong number of parameters!')
257 |         else:
258 |             list_parts(parts, shorthands, lambda x: x.size is not None and x.size > int(arguments[0]))
259 |     elif cmd == 'other':
260 |         list_parts(parts, shorthands, lambda x: not x.recoverable)
261 |     elif cmd == 'allparts':
262 |         list_parts(parts, shorthands, lambda x: True)
263 |     elif cmd == 'quit':
264 |         exit(0)
265 |     else:
266 |         print('Unknown command.')
267 | 
268 | 
269 | def main():
270 |     """Wrap the program logic inside a function."""
271 |     logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
272 | 
273 |     print(r"     ___                                ___ _ _   ")
274 |     print(r"    | _ \___ __ _  _ _ __  ___ _ _ __ _| _ |_) |_ ")
275 |     print(r"    |   / -_) _| || | '_ \/ -_) '_/ _` | _ \ |  _|")
276 |     print(r"    |_|_\___\__|\_,_| .__/\___|_| \__,_|___/_|\__|")
277 |     print("                    |_|   v{}".format(__version__))
278 |     print('   ', __copyright__, '<%s>' % __email__)
279 |     print('    Released under the', __license__)
280 |     print('')
281 | 
282 |     parser = argparse.ArgumentParser(
283 |         description='Reconstruct the directory structure of possibly damaged '
284 |                     'filesystems.'
285 |     )
286 |     parser.add_argument('path', type=str, help='path to the disk image')
287 |     parser.add_argument(
288 |         '-s', '--savefile', type=str, help='path of the scan save file'
289 |     )
290 |     parser.add_argument(
291 |         '-w', '--overwrite', action='store_true',
292 |         help='force overwrite of the save file'
293 |     )
294 |     parser.add_argument(
295 |         '-o', '--outputdir', type=str, help='directory for restored contents'
296 |         ' and output files'
297 |     )
298 |     args = parser.parse_args()
299 | 
300 |     try:
301 |         image = open(args.path, 'rb')
302 |     except IOError:
303 |         logging.error('Unable to open image file!')
304 |         exit(1)
305 | 
306 |     read_results = False
307 |     write_results = False
308 | 
309 |     # Set output directory
310 |     if args.outputdir is None:
311 |         logging.info('No output directory specified, defaulting to '
312 |                      'recuperabit_output')
313 |         args.outputdir = 'recuperabit_output'
314 | 
315 |     # Try to reload information from the savefile
316 |     if args.savefile is not None:
317 |         if args.overwrite:
318 |             logging.info('Results will be saved to %s', args.savefile)
319 |             write_results = True
320 |         else:
321 |             logging.info('Checking if results already exist.')
322 |             try:
323 |                 savefile = open(args.savefile, 'rb')
324 |                 logging.info('Results will be read from %s', args.savefile)
325 |                 read_results = True
326 |             except IOError:
327 |                 logging.info('Unable to open save file.')
328 |                 logging.info('Results will be saved to %s', args.savefile)
329 |                 write_results = True
330 | 
331 |     if read_results:
332 |         logging.info('The save file exists. Trying to read it...')
333 |         try:
334 |             indexes = pickle.load(savefile)
335 |             savefile.close()
336 |         except IndexError:
337 |             logging.error('Malformed save file!')
338 |             exit(1)
339 |     else:
340 |         indexes = itertools.count()
341 | 
342 |     # Ask for confirmation before beginning the process
343 |     try:
344 |         confirm = input('Type [Enter] to start the analysis or '
345 |                             '"exit" / "quit" / "q" to quit: ')
346 |     except EOFError:
347 |         print('')
348 |         exit(0)
349 |     if confirm in ('exit', 'quit', 'q'):
350 |         exit(0)
351 | 
352 |     # Create the output directory
353 |     if not logic.makedirs(args.outputdir):
354 |         logging.error('Cannot create output directory!')
355 |         exit(1)
356 | 
357 |     scanners = [pl(image) for pl in plugins]
358 | 
359 |     logging.info('Analysis started! This is going to take time...')
360 |     interesting = utils.feed_all(image, scanners, indexes)
361 | 
362 |     logging.info('First scan completed')
363 | 
364 |     if write_results:
365 |         logging.info('Saving results to %s', args.savefile)
366 |         with open(args.savefile, 'wb') as savefile:
367 |             pickle.dump(interesting, savefile)
368 | 
369 |     # Ask for partitions
370 |     parts: dict[int, 'Partition'] = {}
371 |     for scanner in scanners:
372 |         parts.update(scanner.get_partitions())
373 | 
374 |     shorthands = list(enumerate(parts))
375 | 
376 |     logging.info('%i partitions found.', len(parts))
377 |     while True:
378 |         print('\nWrite command ("help" for details):')
379 |         try:
380 |             command = input('> ').split(' ')
381 |         except (EOFError, KeyboardInterrupt):
382 |             print('')
383 |             exit(0)
384 |         cmd = command[0]
385 |         arguments = command[1:]
386 |         interpret(cmd, arguments, parts, shorthands, args.outputdir)
387 | 
388 | if __name__ == '__main__':
389 |     main()
390 | 


--------------------------------------------------------------------------------
/recuperabit/__init__.py:
--------------------------------------------------------------------------------
 1 | # RecuperaBit
 2 | # Copyright 2014-2021 Andrea Lazzarotto
 3 | #
 4 | # This file is part of RecuperaBit.
 5 | #
 6 | # RecuperaBit is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # RecuperaBit is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
18 | 


--------------------------------------------------------------------------------
/recuperabit/fs/__init__.py:
--------------------------------------------------------------------------------
 1 | # RecuperaBit
 2 | # Copyright 2014-2021 Andrea Lazzarotto
 3 | #
 4 | # This file is part of RecuperaBit.
 5 | #
 6 | # RecuperaBit is free software: you can redistribute it and/or modify
 7 | # it under the terms of the GNU General Public License as published by
 8 | # the Free Software Foundation, either version 3 of the License, or
 9 | # (at your option) any later version.
10 | #
11 | # RecuperaBit is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
18 | 


--------------------------------------------------------------------------------
/recuperabit/fs/constants.py:
--------------------------------------------------------------------------------
 1 | """Information needed by multiple plugins."""
 2 | 
 3 | # RecuperaBit
 4 | # Copyright 2014-2021 Andrea Lazzarotto
 5 | #
 6 | # This file is part of RecuperaBit.
 7 | #
 8 | # RecuperaBit is free software: you can redistribute it and/or modify
 9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # RecuperaBit is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
20 | 
21 | 
22 | sector_size: int = 512
23 | max_sectors: int = 256   # Maximum block size for recovery
24 | 


--------------------------------------------------------------------------------
/recuperabit/fs/core_types.py:
--------------------------------------------------------------------------------
  1 | """Recuperabit Core Types.
  2 | 
  3 | This module contains the class declarations of all objects which are used in
  4 | the Recuperabit meta file system. Each plug-in is supposed to extend the File
  5 | and DiskScanner classes with subclasses implementing the missing methods."""
  6 | 
  7 | # RecuperaBit
  8 | # Copyright 2014-2021 Andrea Lazzarotto
  9 | #
 10 | # This file is part of RecuperaBit.
 11 | #
 12 | # RecuperaBit is free software: you can redistribute it and/or modify
 13 | # it under the terms of the GNU General Public License as published by
 14 | # the Free Software Foundation, either version 3 of the License, or
 15 | # (at your option) any later version.
 16 | #
 17 | # RecuperaBit is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 20 | # GNU General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU General Public License
 23 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
 24 | 
 25 | 
 26 | import logging
 27 | import os.path
 28 | from typing import Optional, Dict, Set, List, Tuple, Union, Any, Iterator
 29 | from datetime import datetime
 30 | 
 31 | from .constants import sector_size
 32 | 
 33 | from ..utils import readable_bytes
 34 | 
 35 | 
 36 | class File(object):
 37 |     """Filesystem-independent representation of a file. Aka Node."""
 38 |     def __init__(self, index: Union[int, str], name: str, size: Optional[int], is_directory: bool = False,
 39 |                  is_deleted: bool = False, is_ghost: bool = False) -> None:
 40 |         self.index: Union[int, str] = index
 41 |         self.name: str = name
 42 |         self.size: Optional[int] = size
 43 |         self.is_directory: bool = is_directory
 44 |         self.is_deleted: bool = is_deleted
 45 |         self.is_ghost: bool = is_ghost
 46 |         self.parent: Optional[Union[int, str]] = None
 47 |         self.mac: Dict[str, Optional[datetime]] = {
 48 |             'modification': None,
 49 |             'access': None,
 50 |             'creation': None
 51 |         }
 52 |         self.children: Set['File'] = set()
 53 |         self.children_names: Set[str] = set()     # Avoid name clashes breaking restore
 54 |         self.offset: Optional[int] = None  # Offset from beginning of disk
 55 | 
 56 |     def set_parent(self, parent: Optional[Union[int, str]]) -> None:
 57 |         """Set a pointer to the parent directory."""
 58 |         self.parent = parent
 59 | 
 60 |     def set_mac(self, modification: Optional[datetime], access: Optional[datetime], creation: Optional[datetime]) -> None:
 61 |         """Set the modification, access and creation times."""
 62 |         self.mac['modification'] = modification
 63 |         self.mac['access'] = access
 64 |         self.mac['creation'] = creation
 65 | 
 66 |     def get_mac(self) -> List[Optional[datetime]]:
 67 |         """Get the modification, access and creation times."""
 68 |         keys = ('modification', 'access', 'creation')
 69 |         return [self.mac[k] for k in keys]
 70 | 
 71 |     def set_offset(self, offset: Optional[int]) -> None:
 72 |         """Set the offset of the file record with respect to the disk image."""
 73 |         self.offset = offset
 74 | 
 75 |     def get_offset(self) -> Optional[int]:
 76 |         """Get the offset of the file record with respect to the disk image."""
 77 |         return self.offset
 78 | 
 79 |     def add_child(self, node: 'File') -> None:
 80 |         """Add a new child to this directory."""
 81 |         original_name = node.name
 82 |         i = 0
 83 |         # Check for multiple rebuilds
 84 |         if node in self.children:
 85 |             return
 86 |         # Avoid name clashes
 87 |         while node.name in self.children_names:
 88 |             node.name = original_name + '_%03d' % i
 89 |             i += 1
 90 |         if node.name != original_name:
 91 |             logging.warning(u'Renamed {} from {}'.format(node, original_name))
 92 |         self.children.add(node)
 93 |         self.children_names.add(node.name)
 94 | 
 95 |     def full_path(self, part: 'Partition') -> str:
 96 |         """Return the full path of this file."""
 97 |         if self.parent is not None:
 98 |             parent = part[self.parent]
 99 |             return os.path.join(parent.full_path(part), self.name)
100 |         else:
101 |             return self.name
102 | 
103 |     def get_content(self, partition: 'Partition') -> Optional[Union[bytes, Iterator[bytes]]]:
104 |         # pylint: disable=W0613
105 |         """Extract the content of the file.
106 | 
107 |         This method is intentionally not implemented because it depends on each
108 |         plug-in for a specific file system."""
109 |         if self.is_directory or self.is_ghost:
110 |             return None
111 |         raise NotImplementedError
112 | 
113 |     # pylint: disable=R0201
114 |     def ignore(self) -> bool:
115 |         """The following method is used by the restore procedure to check
116 |         files that should not be recovered. For example, in NTFS file
117 |         $BadClus:$Bad shall not be recovered because it creates an output
118 |         with the same size as the partition (usually many GBs)."""
119 |         return False
120 | 
121 |     def __repr__(self) -> str:
122 |         return (
123 |             u'File(#%s, ^^%s^^, %s, offset = %s sectors)' %
124 |             (self.index, self.parent, self.name, self.offset)
125 |         )
126 | 
127 | 
128 | class Partition(object):
129 |     """Simplified representation of the contents of a partition.
130 | 
131 |     Parameter root_id represents the identifier assigned to the root directory
132 |     of a partition. This can be file system dependent."""
133 |     def __init__(self, fs_type: str, root_id: Union[int, str], scanner: 'DiskScanner') -> None:
134 |         self.fs_type: str = fs_type
135 |         self.root_id: Union[int, str] = root_id
136 |         self.size: Optional[int] = None
137 |         self.offset: Optional[int] = None
138 |         self.root: Optional[File] = None
139 |         self.lost: File = File(-1, 'LostFiles', 0, is_directory=True, is_ghost=True)
140 |         self.files: Dict[Union[int, str], File] = {}
141 |         self.recoverable: bool = False
142 |         self.scanner: 'DiskScanner' = scanner
143 | 
144 |     def add_file(self, node: File) -> None:
145 |         """Insert a new file in the partition."""
146 |         index = node.index
147 |         self.files[index] = node
148 | 
149 |     def set_root(self, node: File) -> None:
150 |         """Set the root directory."""
151 |         if not node.is_directory:
152 |             raise TypeError('Not a directory')
153 |         self.root = node
154 |         self.root.set_parent(None)
155 | 
156 |     def set_size(self, size: int) -> None:
157 |         """Set the (estimated) size of the partition."""
158 |         self.size = size
159 | 
160 |     def set_offset(self, offset: int) -> None:
161 |         """Set the offset from the beginning of the disk."""
162 |         self.offset = offset
163 | 
164 |     def set_recoverable(self, recoverable: bool) -> None:
165 |         """State if the partition contents are also recoverable."""
166 |         self.recoverable = recoverable
167 | 
168 |     def rebuild(self) -> None:
169 |         """Rebuild the partition structure.
170 | 
171 |         This method processes the contents of files and it rebuilds the
172 |         directory tree as accurately as possible."""
173 |         root_id = self.root_id
174 |         rootname = 'Root'
175 | 
176 |         if root_id not in self.files:
177 |             self.files[root_id] = File(
178 |                 root_id, rootname, 0, is_directory=True, is_ghost=True
179 |             )
180 | 
181 |         # Convert keys to list to avoid RuntimeError
182 |         for identifier in list(self.files):
183 |             node = self.files[identifier]
184 |             if node.index == root_id:
185 |                 self.set_root(node)
186 |                 node.name = rootname
187 |             else:
188 |                 parent_id = node.parent
189 |                 exists = parent_id is not None
190 |                 valid = parent_id in self.files
191 |                 if exists and valid:
192 |                     parent_node = self.files[parent_id]
193 |                 elif exists and not valid:
194 |                     parent_node = File(parent_id, 'Dir_' + str(parent_id),
195 |                                        0, is_directory=True, is_ghost=True)
196 |                     parent_node.set_parent(-1)
197 |                     self.files[parent_id] = parent_node
198 |                     self.lost.add_child(parent_node)
199 |                 else:
200 |                     parent_node = self.lost
201 |                     node.set_parent(-1)
202 |                 parent_node.add_child(node)
203 |         return
204 | 
205 |     # pylint: disable=R0201
206 |     def additional_repr(self) -> List[Tuple[str, Any]]:
207 |         """Return additional values to show in the string representation."""
208 |         return []
209 | 
210 |     def __repr__(self) -> str:
211 |         size = (
212 |             readable_bytes(self.size * sector_size)
213 |             if self.size is not None else '??? b'
214 |         )
215 |         data = [
216 |             ('Offset', self.offset),
217 |             (
218 |                 'Offset (b)',
219 |                 self.offset * sector_size
220 |                 if self.offset is not None else None
221 |             ),
222 |         ]
223 |         data += self.additional_repr()
224 |         return u'Partition (%s, %s, %d files,%s %s)' % (
225 |             self.fs_type,
226 |             size,
227 |             len(self.files),
228 |             ' Recoverable,' if self.recoverable else '',
229 |             ', '.join(a+': '+str(b) for a, b in data)
230 |         )
231 | 
232 |     def __getitem__(self, index: Union[int, str]) -> File:
233 |         if index in self.files:
234 |             return self.files[index]
235 |         if index == self.lost.index:
236 |             return self.lost
237 |         raise KeyError
238 | 
239 |     def get(self, index: Union[int, str], default: Optional[File] = None) -> Optional[File]:
240 |         """Get a file or the special LostFiles directory."""
241 |         try:
242 |             return self.__getitem__(index)
243 |         except KeyError:
244 |             return default
245 | 
246 | 
247 | class DiskScanner(object):
248 |     """Abstract stub for the implementation of disk scanners."""
249 |     def __init__(self, pointer: Any) -> None:
250 |         self.image: Any = pointer
251 | 
252 |     def get_image(self) -> Any:
253 |         """Return the image reference."""
254 |         return self.image
255 | 
256 |     @staticmethod
257 |     def get_image(scanner: 'DiskScanner') -> Any:
258 |         """Static method to get image from scanner instance."""
259 |         return scanner.image
260 | 
261 |     def feed(self, index: int, sector: bytes) -> Optional[str]:
262 |         """Feed a new sector."""
263 |         raise NotImplementedError
264 | 
265 |     def get_partitions(self) -> Dict[int, Partition]:
266 |         """Get a list of the found partitions."""
267 |         raise NotImplementedError
268 | 


--------------------------------------------------------------------------------
/recuperabit/fs/ntfs.py:
--------------------------------------------------------------------------------
  1 | """NTFS plug-in.
  2 | 
  3 | This plug-in contains the necessary logic to parse traces of NTFS file systems,
  4 | including MFT entries and directory indexes."""
  5 | 
  6 | # RecuperaBit
  7 | # Copyright 2014-2021 Andrea Lazzarotto
  8 | #
  9 | # This file is part of RecuperaBit.
 10 | #
 11 | # RecuperaBit is free software: you can redistribute it and/or modify
 12 | # it under the terms of the GNU General Public License as published by
 13 | # the Free Software Foundation, either version 3 of the License, or
 14 | # (at your option) any later version.
 15 | #
 16 | # RecuperaBit is distributed in the hope that it will be useful,
 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 19 | # GNU General Public License for more details.
 20 | #
 21 | # You should have received a copy of the GNU General Public License
 22 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
 23 | 
 24 | 
 25 | import logging
 26 | from collections import Counter
 27 | from typing import Any, Dict, List, Optional, Tuple, Union, Iterator, Set
 28 | 
 29 | from .constants import max_sectors, sector_size
 30 | from .core_types import DiskScanner, File, Partition
 31 | from .ntfs_fmt import (attr_header_fmt, attr_names, attr_nonresident_fmt,
 32 |                       attr_resident_fmt, attr_types_fmt, attribute_list_parser,
 33 |                       boot_sector_fmt, entry_fmt, indx_dir_entry_fmt, indx_fmt,
 34 |                       indx_header_fmt)
 35 | 
 36 | from ..logic import SparseList, approximate_matching
 37 | from ..utils import merge, sectors, unpack
 38 | 
 39 | # Some attributes may appear multiple times
 40 | multiple_attributes: Set[str] = set([
 41 |     '$FILE_NAME',
 42 |     '$DATA',
 43 |     '$INDEX_ROOT',
 44 |     '$INDEX_ALLOCATION',
 45 |     '$BITMAP'
 46 | ])
 47 | 
 48 | # Size of records in sectors
 49 | FILE_size: int = 2
 50 | INDX_size: int = 8
 51 | 
 52 | 
 53 | def best_name(entries: List[Tuple[int, str]]) -> Optional[str]:
 54 |     """Return the best file name available.
 55 | 
 56 |     This function accepts a list of tuples formed by a namespace and a string.
 57 |     In case of more than one choice, it returns preferrably the one in the NTFS
 58 |     namespace (code == 3)."""
 59 |     if len(entries) == 0:
 60 |         return None
 61 | 
 62 |     entries.sort()
 63 |     if entries[-1][0] == 3:
 64 |         name = entries[-1][1]
 65 |     else:
 66 |         name = entries[0][1]
 67 |     return name if len(name) else None
 68 | 
 69 | 
 70 | def parse_mft_attr(attr: bytes) -> Tuple[Dict[str, Any], Optional[str]]:
 71 |     """Parse the contents of a MFT attribute."""
 72 |     header = unpack(attr, attr_header_fmt)
 73 |     attr_type = header['type']
 74 | 
 75 |     if attr_type not in attr_names:
 76 |         return header, None
 77 | 
 78 |     if header['non_resident']:
 79 |         nonresident = unpack(attr, attr_nonresident_fmt)
 80 |         if nonresident['runlist'] is None:
 81 |             nonresident['runlist'] = list()
 82 |         header.update(nonresident)
 83 |     else:
 84 |         resident = unpack(attr, attr_resident_fmt)
 85 |         header.update(resident)
 86 |         offset = header['content_off']
 87 |         content = attr[offset:]
 88 | 
 89 |     name = attr_names[attr_type]
 90 |     if not header['non_resident'] and name in attr_types_fmt:
 91 |         size = header['content_size']
 92 |         data = unpack(content[:size], attr_types_fmt[name])
 93 |         header['content'] = data
 94 | 
 95 |     return header, name
 96 | 
 97 | 
 98 | def _apply_fixup_values(header: Dict[str, Any], entry: bytearray) -> None:
 99 |     """Apply the fixup values to FILE and INDX records."""
100 |     offset = header['off_fixup']
101 |     for i in range(1, header['n_entries']):
102 |         pos = sector_size * i
103 |         entry[pos-2:pos] = entry[offset + 2*i:offset + 2*(i+1)]
104 | 
105 | 
106 | def _attributes_reader(entry: bytes, offset: int) -> Dict[str, Any]:
107 |     """Read every attribute."""
108 |     attributes = {}
109 |     while offset < len(entry) - 16:
110 |         try:
111 |             attr, name = parse_mft_attr(entry[offset:])
112 |         except TypeError:
113 |             # The attribute was broken, we need to terminate here
114 |             return attributes
115 |         attr['dump_offset'] = offset
116 |         if attr['length'] == 0:
117 |             # End of attribute list
118 |             break
119 |         else:
120 |             offset = offset + attr['length']
121 |             if name is None:
122 |                 # Skip broken/unknown attribute
123 |                 continue
124 |             if name not in attributes:
125 |                 if name not in multiple_attributes:
126 |                     attributes[name] = attr
127 |                 else:
128 |                     attributes[name] = [attr]
129 |             else:
130 |                 if name in multiple_attributes:
131 |                     attributes[name].append(attr)
132 |                 else:
133 |                     logging.error('Cannot handle multiple attribute %s', name)
134 |     return attributes
135 | 
136 | 
137 | def parse_file_record(entry: bytes) -> Dict[str, Any]:
138 |     """Parse the contents of a FILE record (MFT entry)."""
139 |     header = unpack(entry, entry_fmt)
140 |     if (header['size_alloc'] is None or
141 |             header['size_alloc'] > len(entry) or
142 |             len(entry) < FILE_size*sector_size):
143 |         header['valid'] = False
144 |         return header
145 | 
146 |     # Old versions of NTFS don't have a MFT record number.
147 |     if header['off_fixup'] < 48:
148 |         header['record_n'] = None
149 | 
150 |     _apply_fixup_values(header, entry)
151 | 
152 |     attributes = _attributes_reader(entry, header['off_first'])
153 |     header['valid'] = True
154 |     header['attributes'] = attributes
155 |     return header
156 | 
157 | 
158 | def parse_indx_record(entry: bytes) -> Dict[str, Any]:
159 |     """Parse the contents of a INDX record (directory index)."""
160 |     header = unpack(entry, indx_fmt)
161 | 
162 |     _apply_fixup_values(header, entry)
163 | 
164 |     node_data = unpack(entry[24:], indx_header_fmt)
165 |     node_data['off_start_list'] += 24
166 |     node_data['off_end_list'] += 24
167 |     node_data['off_end_buffer'] += 24
168 |     header.update(node_data)
169 | 
170 |     offset = header['off_start_list']
171 |     entries = []
172 |     while offset < header['off_end_list']:
173 |         entry_data = unpack(entry[offset:], indx_dir_entry_fmt)
174 |         if entry_data['content_length']:
175 |             try:
176 |                 file_name = unpack(
177 |                     entry[offset + 16:],
178 |                     attr_types_fmt['$FILE_NAME']
179 |                 )
180 |             except (UnicodeDecodeError, TypeError):  # Invalid file name or invalid name length
181 |                 break
182 |             # Perform checks to avoid false positives
183 |             name_ok = file_name['name'] is not None
184 |             namespace_ok = 0 <= file_name['namespace'] <= 3
185 |             size_ok = file_name['real_size'] <= file_name['allocated_size']
186 |             features_ok = not (
187 |                 file_name['flags'] == 0 and
188 |                 file_name['parent_seq'] > 1024
189 |             )
190 |             if name_ok and namespace_ok and size_ok and features_ok:
191 |                 entry_data['file_info'] = file_name
192 |                 entries.append(entry_data)
193 |             else:
194 |                 break
195 |         if entry_data['entry_length']:
196 |             offset += entry_data['entry_length']
197 |         else:
198 |             break
199 |     header['entries'] = entries
200 |     header['valid'] = len(entries) > 0
201 |     return header
202 | 
203 | 
204 | def _integrate_attribute_list(parsed: Dict[str, Any], part: 'NTFSPartition', image: Any) -> None:
205 |     """Integrate missing attributes in the parsed MTF entry."""
206 |     base_record = parsed['record_n']
207 |     attrs = parsed['attributes']
208 |     attr = attrs['$ATTRIBUTE_LIST']
209 | 
210 |     spc = part.sec_per_clus
211 |     if 'runlist' in attr:
212 |         clusters_pos = 0
213 |         entries = []
214 |         size = attr['real_size']
215 |         for entry in attr['runlist']:
216 |             clusters_pos += entry['offset']
217 |             length = min(entry['length'] * spc * sector_size, size)
218 |             size -= length
219 |             real_pos = clusters_pos * spc + part.offset
220 |             dump = sectors(image, real_pos, length, 1)
221 |             entries += attribute_list_parser(dump)
222 |         attr['content'] = {'entries': entries}
223 |     else:
224 |         entries = attr['content']['entries']
225 | 
226 |     # Divide entries by type
227 |     types = set(e['type'] for e in entries)
228 |     entries_by_type = {
229 |         t: set(
230 |             e['file_ref'] for e in entries
231 |             if e['type'] == t and e['file_ref'] is not None
232 |         )
233 |         for t in types
234 |     }
235 |     # Remove completely "local" types or empty lists
236 |     for num in list(entries_by_type):
237 |         files = entries_by_type[num]
238 |         if (
239 |             len(files) == 0 or
240 |             (len(files) == 1 and next(iter(files)) == base_record)
241 |         ):
242 |             del entries_by_type[num]
243 | 
244 |     mft_pos = part.mft_pos
245 |     for num in entries_by_type:
246 |         # Read contents of child entries
247 |         for index in entries_by_type[num]:
248 |             real_pos = mft_pos + index * FILE_size
249 |             dump = sectors(image, real_pos, FILE_size)
250 |             child_parsed = parse_file_record(dump)
251 |             if 'attributes' not in child_parsed:
252 |                 continue
253 |             # Update the main entry (parsed)
254 |             if child_parsed['base_record'] == base_record:
255 |                 child_attrs = child_parsed['attributes']
256 |                 for name in child_attrs:
257 |                     if name in multiple_attributes:
258 |                         try:
259 |                             attrs[name] += child_attrs[name]
260 |                         except KeyError:
261 |                             attrs[name] = child_attrs[name]
262 |                     else:
263 |                         attrs[name] = child_attrs[name]
264 | 
265 | 
266 | class NTFSFile(File):
267 |     """NTFS File."""
268 |     def __init__(self, parsed: Dict[str, Any], offset: Optional[int], is_ghost: bool = False, ads: str = '') -> None:
269 |         index = parsed['record_n']
270 |         ads_suffix = ':' + ads if ads != '' else ads
271 |         if ads != '':
272 |             index = str(index) + ads_suffix
273 |         attrs = parsed['attributes']
274 |         filenames = attrs['$FILE_NAME']
275 |         datas = attrs.get('$DATA', [])
276 | 
277 |         size = None
278 |         for attr in datas:
279 |             if attr['name'] == ads:
280 |                 if 'real_size' in attr:
281 |                     size = attr['real_size']
282 |                 elif not attr['non_resident']:
283 |                     size = attr['content_size']
284 |                 break
285 | 
286 |         filtered = [
287 |             f for f in filenames if 'content' in f and
288 |             f['content'] is not None and
289 |             'name_length' in f['content'] and
290 |             f['content']['name_length'] > 0 and
291 |             f['content']['name'] is not None
292 |         ]
293 |         name = best_name([
294 |             (f['content']['namespace'], f['content']['name'] + ads_suffix)
295 |             for f in filtered
296 |         ])
297 |         hasname = name is not None
298 | 
299 |         if not hasname:
300 |             name = 'File_%s' % index
301 | 
302 |         std_info = attrs.get('$STANDARD_INFORMATION')
303 | 
304 |         is_dir = (parsed['flags'] & 0x02) > 0 and not len(ads)
305 |         is_del = (parsed['flags'] & 0x01) == 0
306 |         File.__init__(self, index, name, size, is_dir, is_del, is_ghost)
307 | 
308 |         time_attribute = None
309 | 
310 |         # Additional attributes
311 |         if hasname:
312 |             first = filtered[0]['content']
313 |             parent_id = first['parent_entry']
314 |             File.set_parent(self, parent_id)
315 |             File.set_offset(self, offset)
316 |             time_attribute = std_info or filtered[0]
317 |         if time_attribute and 'content' in time_attribute:
318 |             File.set_mac(
319 |                 self, time_attribute['content']['modification_time'],
320 |                 time_attribute['content']['access_time'],
321 |                 time_attribute['content']['creation_time'],
322 |             )
323 |         self.ads = ads
324 | 
325 |     @staticmethod
326 |     def _padded_bytes(image: Any, offset: int, size: int) -> bytes:
327 |         dump = sectors(image, offset, size, 1)
328 |         if len(dump) < size:
329 |             logging.warning(
330 |                 'Failed to read byte(s). Padding with 0x00. Offset: {} Size: '
331 |                 '{}'.format(offset, size))
332 |             dump += bytearray(b'\x00' * (size - len(dump)))
333 |         return dump
334 | 
335 |     def content_iterator(self, partition: 'NTFSPartition', image: Any, datas: List[Dict[str, Any]]) -> Iterator[bytes]:
336 |         """Return an iterator for the contents of this file."""
337 |         vcn = 0
338 |         spc = partition.sec_per_clus
339 |         for attr in datas:
340 |             diff = attr['start_VCN'] - vcn
341 |             if diff > 0:
342 |                 # We do not try to fill with zeroes as this might produce huge useless files
343 |                 logging.warning(
344 |                     u'Missing part for {}, {} clusters skipped'.format(self, diff)
345 |                 )
346 |                 vcn += diff
347 |                 yield b''
348 | 
349 |             clusters_pos = 0
350 |             size = attr['real_size']
351 | 
352 |             if 'runlist' not in attr:
353 |                 logging.error(
354 |                     u'Cannot restore {}, missing runlist'.format(self)
355 |                 )
356 |                 break
357 | 
358 |             for entry in attr['runlist']:
359 |                 length = min(entry['length'] * spc * sector_size, size)
360 |                 size -= length
361 |                 # Sparse runlist
362 |                 if entry['offset'] is None:
363 |                     while length > 0:
364 |                         amount = min(max_sectors*sector_size, length)
365 |                         length -= amount
366 |                         yield b'\x00' * amount
367 |                     continue
368 |                 # Normal runlists
369 |                 clusters_pos += entry['offset']
370 |                 real_pos = clusters_pos * spc + partition.offset
371 |                 # Avoid to fill memory with huge blocks
372 |                 offset = 0
373 |                 while length > 0:
374 |                     amount = min(max_sectors*sector_size, length)
375 |                     position = real_pos*sector_size + offset
376 |                     partial = self._padded_bytes(image, position, amount)
377 |                     length -= amount
378 |                     offset += amount
379 |                     yield bytes(partial)
380 |             vcn = attr['end_VCN'] + 1
381 | 
382 |     def get_content(self, partition: 'NTFSPartition') -> Optional[Union[bytes, Iterator[bytes]]]:
383 |         """Extract the content of the file.
384 | 
385 |         This method works by extracting the $DATA attribute."""
386 |         if self.is_ghost:
387 |             logging.error(u'Cannot restore ghost file {}'.format(self))
388 |             return None
389 | 
390 |         image = DiskScanner.get_image(partition.scanner)
391 |         dump = sectors(image, File.get_offset(self), FILE_size)
392 |         parsed = parse_file_record(dump)
393 | 
394 |         if not parsed['valid'] or 'attributes' not in parsed:
395 |             logging.error(u'Invalid MFT entry for {}'.format(self))
396 |             return None
397 |         attrs = parsed['attributes']
398 |         if ('$ATTRIBUTE_LIST' in attrs and
399 |                 partition.sec_per_clus is not None):
400 |             _integrate_attribute_list(parsed, partition, image)
401 |         if '$DATA' not in attrs:
402 |             attrs['$DATA'] = []
403 |         datas = [d for d in attrs['$DATA'] if d['name'] == self.ads]
404 |         if not len(datas):
405 |             if not self.is_directory:
406 |                 logging.error(u'Cannot restore $DATA attribute(s) '
407 |                               'for {}'.format(self))
408 |             return None
409 | 
410 |         # TODO implemented compressed attributes
411 |         for d in datas:
412 |             if d['flags'] & 0x01:
413 |                 logging.error(u'Cannot restore compressed $DATA attribute(s) '
414 |                               'for {}'.format(self))
415 |                 return None
416 |             elif d['flags'] & 0x4000:
417 |                 logging.warning(u'Found encrypted $DATA attribute(s) '
418 |                                 'for {}'.format(self))
419 | 
420 |         # Handle resident file content
421 |         if len(datas) == 1 and not datas[0]['non_resident']:
422 |             single = datas[0]
423 |             start = single['dump_offset'] + single['content_off']
424 |             end = start + single['content_size']
425 |             content = dump[start:end]
426 |             return bytes(content)
427 |         else:
428 |             if partition.sec_per_clus is None:
429 |                 logging.error(u'Cannot restore non-resident $DATA '
430 |                               'attribute(s) for {}'.format(self))
431 |                 return None
432 |             non_resident = sorted(
433 |                 (d for d in attrs['$DATA'] if d['non_resident']),
434 |                 key=lambda x: x['start_VCN']
435 |             )
436 |             if len(non_resident) != len(datas):
437 |                 logging.warning(
438 |                     u'Found leftover resident $DATA attributes for '
439 |                     '{}'.format(self)
440 |                 )
441 |             return self.content_iterator(partition, image, non_resident)
442 | 
443 |     def ignore(self) -> bool:
444 |         """Determine which files should be ignored."""
445 |         return (
446 |             (self.index == '8:$Bad') or
447 |             (self.parent == 11 and self.ads == '$J')    # $UsnJrnl
448 |         )
449 | 
450 | 
451 | class NTFSPartition(Partition):
452 |     """Partition with additional fields for NTFS recovery."""
453 |     def __init__(self, scanner: 'NTFSScanner', position: Optional[int] = None) -> None:
454 |         Partition.__init__(self, 'NTFS', 5, scanner)
455 |         self.sec_per_clus: Optional[int] = None
456 |         self.mft_pos: Optional[int] = position
457 |         self.mftmirr_pos: Optional[int] = None
458 | 
459 |     def additional_repr(self) -> List[Tuple[str, Any]]:
460 |         """Return additional values to show in the string representation."""
461 |         return [
462 |             ('Sec/Clus', self.sec_per_clus),
463 |             ('MFT offset', self.mft_pos),
464 |             ('MFT mirror offset', self.mftmirr_pos)
465 |         ]
466 | 
467 | 
468 | class NTFSScanner(DiskScanner):
469 |     """NTFS Disk Scanner."""
470 |     def __init__(self, pointer: Any) -> None:
471 |         DiskScanner.__init__(self, pointer)
472 |         self.found_file: Set[int] = set()
473 |         self.parsed_file_review: Dict[int, Dict[str, Any]] = {}
474 |         self.found_indx: Set[int] = set()
475 |         self.parsed_indx: Dict[int, Dict[str, Any]] = {}
476 |         self.indx_list: Optional[SparseList[int]] = None
477 |         self.found_boot: List[int] = []
478 |         self.found_spc: List[int] = []
479 | 
480 |     def feed(self, index: int, sector: bytes) -> Optional[str]:
481 |         """Feed a new sector."""
482 |         # check boot sector
483 |         if sector.endswith(b'\x55\xAA') and b'NTFS' in sector[:8]:
484 |             self.found_boot.append(index)
485 |             return 'NTFS boot sector'
486 | 
487 |         # check file record
488 |         if sector.startswith((b'FILE', b'BAAD')):
489 |             self.found_file.add(index)
490 |             return 'NTFS file record'
491 | 
492 |         # check index record
493 |         if sector.startswith(b'INDX'):
494 |             self.found_indx.add(index)
495 |             return 'NTFS index record'
496 | 
497 |     @staticmethod
498 |     def add_indx_entries(entries: List[Dict[str, Any]], part: NTFSPartition) -> None:
499 |         """Insert new ghost files which were not already found."""
500 |         for rec in entries:
501 |             if (rec['record_n'] not in part.files and
502 |                     rec['$FILE_NAME'] is not None):
503 |                 # Compatibility with the structure of a MFT entry
504 |                 rec['attributes'] = {
505 |                     '$FILE_NAME': [{'content': rec['$FILE_NAME']}]
506 |                 }
507 |                 """Although the structure of r is similar to that of a MFT
508 |                 entry, flags were about the index, not about the file. We
509 |                 don't know if the element is a directory or not, hence we
510 |                 mark it as a file. It can be deduced if it is a directory
511 |                 by looking at the number of children, after the
512 |                 reconstruction."""
513 |                 rec['flags'] = 0x1
514 |                 part.add_file(NTFSFile(rec, None, is_ghost=True))
515 | 
516 |     def add_from_indx_root(self, parsed: Dict[str, Any], part: NTFSPartition) -> None:
517 |         """Add ghost entries to part from INDEX_ROOT attributes in parsed."""
518 |         for attribute in parsed['attributes']['$INDEX_ROOT']:
519 |             if (attribute.get('content') is None or
520 |                     attribute['content'].get('records') is None):
521 |                 continue
522 |             self.add_indx_entries(attribute['content']['records'], part)
523 | 
524 |     def most_likely_sec_per_clus(self) -> List[int]:
525 |         """Determine the most likely value of sec_per_clus of each partition,
526 |         to speed up the search."""
527 |         counter = Counter()
528 |         counter.update(self.found_spc)
529 |         counter.update(2**i for i in range(8))
530 |         return [i for i, _ in counter.most_common()]
531 | 
532 |     def find_boundary(self, part: NTFSPartition, mft_address: int, multipliers: List[int]) -> Tuple[Optional[int], Optional[int]]:
533 |         """Determine the starting sector of a partition with INDX records."""
534 |         nodes = (
535 |             self.parsed_file_review[node.offset]
536 |             for node in part.files.values()
537 |             if node.offset in self.parsed_file_review and
538 |             '$INDEX_ALLOCATION' in
539 |             self.parsed_file_review[node.offset]['attributes']
540 |         )
541 | 
542 |         text_list = self.indx_list
543 |         width = text_list.__len__()
544 | 
545 |         base_pattern = {}
546 |         for parsed in nodes:
547 |             for attr in parsed['attributes']['$INDEX_ALLOCATION']:
548 |                 clusters_pos = 0
549 |                 if 'runlist' not in attr:
550 |                     continue
551 |                 runlist = attr['runlist']
552 |                 for entry in runlist:
553 |                     clusters_pos += entry['offset']
554 |                     base_pattern[clusters_pos] = parsed['record_n']
555 |         if not len(base_pattern):
556 |             return (None, None)
557 | 
558 |         results = []
559 |         min_support = 2
560 |         for sec_per_clus in multipliers:
561 |             pattern = {
562 |                 i * sec_per_clus: base_pattern[i]
563 |                 for i in base_pattern
564 |             }
565 | 
566 |             delta = min(pattern)
567 |             normalized = {
568 |                 i-delta: pattern[i]
569 |                 for i in pattern if i-delta <= width
570 |                 # Avoid extremely long, useless patterns
571 |             }
572 |             if len(normalized) < min_support:
573 |                 continue
574 | 
575 |             pattern_list = SparseList(normalized)
576 |             solution = approximate_matching(
577 |                 text_list, pattern_list, mft_address + delta, k=min_support
578 |             )
579 |             if solution is not None:
580 |                 # Avoid negative offsets and ambiguous situations
581 |                 solution[0] = [i-delta for i in solution[0] if i-delta >= 0]
582 |                 if len(solution[0]) == 1:
583 |                     positions, amount, perc = solution
584 |                     results.append((positions, perc, sec_per_clus))
585 |                     # Reasonably, this is a correct match
586 |                     if perc > 0.25 and amount > 256:
587 |                         break
588 |                 min_support = max(min_support, solution[1])
589 | 
590 |         if len(results):
591 |             results.sort(key=lambda r: r[1])
592 |             positions, _, spc = results[0]
593 |             return (positions[0], spc)
594 |         else:
595 |             return (None, None)
596 | 
597 |     def add_from_indx_allocation(self, parsed: Dict[str, Any], part: NTFSPartition) -> None:
598 |         """Add ghost entries to part from INDEX_ALLOCATION attributes in parsed.
599 | 
600 |         This procedure requires that the beginning of the partition has already
601 |         been discovered."""
602 |         read_again = set()
603 |         for attr in parsed['attributes']['$INDEX_ALLOCATION']:
604 |             clusters_pos = 0
605 |             if 'runlist' not in attr:
606 |                 continue
607 |             runlist = attr['runlist']
608 |             for entry in runlist:
609 |                 clusters_pos += entry['offset']
610 |                 real_pos = clusters_pos * part.sec_per_clus + part.offset
611 |                 if real_pos in self.parsed_indx:
612 |                     content = self.parsed_indx[real_pos]
613 |                     # Check if the entry matches
614 |                     if parsed['record_n'] == content['parent']:
615 |                         discovered = set(
616 |                             c for c in content['children']
617 |                             if c not in part.files
618 |                         )
619 |                         # If there are new files, read the INDX again
620 |                         if len(discovered):
621 |                             read_again.add(real_pos)
622 | 
623 |         img = DiskScanner.get_image(self)
624 |         for position in read_again:
625 |             dump = sectors(img, position, INDX_size)
626 |             entries = parse_indx_record(dump)['entries']
627 |             self.add_indx_entries(entries, part)
628 | 
629 |     def add_from_attribute_list(self, parsed: Dict[str, Any], part: NTFSPartition, offset: int) -> None:
630 |         """Add additional entries to part from attributes in ATTRIBUTE_LIST.
631 | 
632 |         Files with many attributes may have additional attributes not in the
633 |         MFT entry. When this happens, it is necessary to find the other
634 |         attributes. They may contain additional information, such as $DATA
635 |         attributes for ADS. This procedure requires that the beginning of the
636 |         partition has already been discovered."""
637 |         image = DiskScanner.get_image(self)
638 |         _integrate_attribute_list(parsed, part, image)
639 | 
640 |         attrs = parsed['attributes']
641 |         if '$DATA' in attrs:
642 |             for attribute in attrs['$DATA']:
643 |                 ads_name = attribute['name']
644 |                 if ads_name and len(ads_name):
645 |                     part.add_file(NTFSFile(parsed, offset, ads=ads_name))
646 | 
647 |     def add_from_mft_mirror(self, part: NTFSPartition) -> None:
648 |         """Fix the first file records using the MFT mirror."""
649 |         img = DiskScanner.get_image(self)
650 |         mirrpos = part.mftmirr_pos
651 |         if mirrpos is None:
652 |             return
653 | 
654 |         for i in range(4):
655 |             node = part.get(i)
656 |             if node is None or node.is_ghost:
657 |                 position = mirrpos + i * FILE_size
658 |                 dump = sectors(img, position, FILE_size)
659 |                 parsed = parse_file_record(dump)
660 |                 if parsed['valid'] and '$FILE_NAME' in parsed['attributes']:
661 |                     node = NTFSFile(parsed, position)
662 |                     part.add_file(node)
663 |                     logging.info(
664 |                         u'Repaired MFT entry #%s - %s in partition at offset '
665 |                         '%s from backup', node.index, node.name, part.offset
666 |                     )
667 | 
668 |     def finalize_reconstruction(self, part: NTFSPartition) -> None:
669 |         """Finish information gathering from a file.
670 | 
671 |         This procedure requires that the beginning of the
672 |         partition has already been discovered."""
673 |         logging.info('Adding extra attributes from $ATTRIBUTE_LIST')
674 |         # Select elements with many attributes
675 |         many_attributes_it = (
676 |             node for node in list(part.files.values())
677 |             if node.offset in self.parsed_file_review and
678 |             '$ATTRIBUTE_LIST' in
679 |             self.parsed_file_review[node.offset]['attributes']
680 |         )
681 |         for node in many_attributes_it:
682 |             parsed = self.parsed_file_review[node.offset]
683 |             self.add_from_attribute_list(parsed, part, node.offset)
684 | 
685 |         logging.info('Adding ghost entries from $INDEX_ALLOCATION')
686 |         # Select only elements with $INDEX_ALLOCATION
687 |         allocation_it = (
688 |             node for node in list(part.files.values())
689 |             if node.offset in self.parsed_file_review and
690 |             '$INDEX_ALLOCATION' in
691 |             self.parsed_file_review[node.offset]['attributes']
692 |         )
693 |         for node in allocation_it:
694 |             parsed = self.parsed_file_review[node.offset]
695 |             self.add_from_indx_allocation(parsed, part)
696 | 
697 |     def get_partitions(self) -> Dict[int, NTFSPartition]:
698 |         """Get a list of the found partitions."""
699 |         partitioned_files: Dict[int, NTFSPartition] = {}
700 |         img = DiskScanner.get_image(self)
701 | 
702 |         logging.info('Parsing MFT entries')
703 |         for position in self.found_file:
704 |             dump = sectors(img, position, FILE_size)
705 |             parsed = parse_file_record(dump)
706 |             attrs = parsed.get('attributes', {})
707 |             if not parsed['valid'] or '$FILE_NAME' not in attrs:
708 |                 continue
709 | 
710 |             # Partition files based on corresponding entry 0
711 |             if parsed['record_n'] is not None:
712 |                 offset = position - parsed['record_n'] * FILE_size
713 |                 try:
714 |                     part = partitioned_files[offset]
715 |                 except KeyError:
716 |                     partitioned_files[offset] = NTFSPartition(self, offset)
717 |                     part = partitioned_files[offset]
718 |                 attributes = parsed['attributes']
719 |                 if '$DATA' in attributes:
720 |                     for attribute in attributes['$DATA']:
721 |                         ads_name = attribute['name']
722 |                         if ads_name:
723 |                             part.add_file(NTFSFile(parsed, position, ads=ads_name))
724 |                 """Add the file again, just in case the $DATA attributes are
725 |                 missing."""
726 |                 part.add_file(NTFSFile(parsed, position))
727 | 
728 |                 # Handle information deduced from INDX records
729 |                 if '$INDEX_ROOT' in attrs:
730 |                     self.add_from_indx_root(parsed, part)
731 |                 # Save for later use
732 |                 if '$INDEX_ALLOCATION' in attrs or '$ATTRIBUTE_LIST' in attrs:
733 |                     self.parsed_file_review[position] = parsed
734 |             # TODO [Future] handle files for which there is no record_number
735 | 
736 |         # Parse INDX records
737 |         logging.info('Parsing INDX records')
738 |         for position in self.found_indx:
739 |             dump = sectors(img, position, INDX_size)
740 |             parsed = parse_indx_record(dump)
741 |             if not parsed['valid']:
742 |                 continue
743 | 
744 |             entries = parsed['entries']
745 |             referred = (el['file_info']['parent_entry'] for el in entries)
746 |             record_n = Counter(referred).most_common(1)[0][0]
747 |             # Save references for future access
748 |             self.parsed_indx[position] = {
749 |                 'parent': record_n,
750 |                 'children': set(el['record_n'] for el in entries)
751 |             }
752 | 
753 |         indx_info = self.parsed_indx
754 |         self.indx_list = SparseList({
755 |             pos: indx_info[pos]['parent'] for pos in indx_info
756 |         })
757 | 
758 |         # Extract boot record information
759 |         logging.info('Reading boot sectors')
760 |         for index in self.found_boot:
761 |             dump = sectors(img, index, 1)
762 |             parsed = unpack(dump, boot_sector_fmt)
763 |             sec_per_clus = parsed['sectors_per_cluster']
764 |             self.found_spc.append(sec_per_clus)
765 |             relative = parsed['MFT_addr'] * sec_per_clus
766 |             mirr_relative = parsed['MFTmirr_addr'] * sec_per_clus
767 |             part = None
768 |             # Look for matching partition, either as boot sector or backup
769 |             for delta in (0, parsed['sectors']):
770 |                 index = index - delta
771 |                 address = relative + index
772 |                 # Set partition as recoverable
773 |                 if address in partitioned_files:
774 |                     part = partitioned_files[address]
775 |                     part.set_recoverable(True)
776 |                     part.set_size(parsed['sectors'])
777 |                     part.offset = index
778 |                     part.sec_per_clus = sec_per_clus
779 |                     part.mftmirr_pos = mirr_relative + index
780 |                     break
781 | 
782 |         # Repair MFT if the mirror is available
783 |         for address in list(partitioned_files):
784 |             # This could have been deleted in a previous iteration
785 |             if address not in partitioned_files:
786 |                 continue
787 |             part = partitioned_files[address]
788 |             mirrpos = part.mftmirr_pos
789 |             if mirrpos is None:
790 |                 entry = part.get(1)     # $MFTMirr
791 |                 if entry is None:
792 |                     continue
793 |                 else:
794 |                     # Infer MFT mirror position
795 |                     dump = sectors(img, entry.offset, FILE_size)
796 |                     mirror = parse_file_record(dump)
797 |                     if (mirror['valid'] and 'attributes' in mirror and
798 |                             '$DATA' in mirror['attributes']):
799 |                         datas = mirror['attributes']['$DATA']
800 |                         if (len(datas) == 1 and datas[0]['non_resident'] and
801 |                                 'runlist' in datas[0] and
802 |                                 len(datas[0]['runlist']) > 0 and
803 |                                 'offset' in datas[0]['runlist'][0]):
804 |                             relative = datas[0]['runlist'][0]['offset']
805 |                             spc = part.sec_per_clus
806 |                             if spc is None:
807 |                                 continue
808 |                             mirrpos = relative * spc + part.offset
809 |                             part.mftmirr_pos = mirrpos
810 | 
811 |             self.add_from_mft_mirror(part)
812 | 
813 |             # Remove bogus partitions generated by MFT mirrors
814 |             if mirrpos in partitioned_files:
815 |                 bogus = partitioned_files[mirrpos]
816 |                 # Check if it looks like a MFT mirror
817 |                 if len(bogus.files) == 4 and max(bogus.files) < 4:
818 |                     logging.debug(
819 |                         'Dropping bogus NTFS partition with MFT '
820 |                         'position %d generated by MFT mirror of '
821 |                         'partition at offset %d',
822 |                         bogus.mft_pos, part.offset
823 |                     )
824 |                     partitioned_files.pop(mirrpos)
825 | 
826 |         # Acquire additional information from $INDEX_ALLOCATION
827 |         logging.info('Finding partition geometry')
828 |         most_likely = self.most_likely_sec_per_clus()
829 |         for address in partitioned_files:
830 |             part = partitioned_files[address]
831 |             if part.offset is None:
832 |                 # Find geometry by approximate string matching
833 |                 offset, sec_per_clus = self.find_boundary(
834 |                     part, address, most_likely
835 |                 )
836 |                 if offset is not None:
837 |                     part.set_recoverable(True)
838 |                     part.offset = offset
839 |                     part.sec_per_clus = sec_per_clus
840 |             else:
841 |                 offset, sec_per_clus = part.offset, part.sec_per_clus
842 |             if offset is not None:
843 |                 logging.info(
844 |                     'Finalizing MFT reconstruction of partition at offset %i',
845 |                     offset
846 |                 )
847 |                 self.finalize_reconstruction(part)
848 | 
849 |         # Merge pieces from fragmented MFT
850 |         for address in list(partitioned_files):
851 |             # This could have been deleted in a previous iteration
852 |             if address not in partitioned_files:
853 |                 continue
854 |             part = partitioned_files[address]
855 |             entry = part.get(0)     # $MFT
856 |             if entry is None or part.sec_per_clus is None:
857 |                 continue
858 |             dump = sectors(img, entry.offset, FILE_size)
859 |             parsed = parse_file_record(dump)
860 |             if not parsed['valid'] or 'attributes' not in parsed:
861 |                 continue
862 | 
863 |             if '$ATTRIBUTE_LIST' in parsed['attributes']:
864 |                 _integrate_attribute_list(parsed, part, img)
865 |             attrs = parsed['attributes']
866 |             if '$DATA' not in attrs or len(attrs['$DATA']) < 1:
867 |                 continue
868 | 
869 |             if 'runlist' not in attrs['$DATA'][0]:
870 |                 continue
871 |             runlist = attrs['$DATA'][0]['runlist']
872 |             if len(runlist) > 1:
873 |                 logging.info(
874 |                     'MFT for partition at offset %d is fragmented. Trying to '
875 |                     'merge %d parts...', part.offset, len(runlist)
876 |                 )
877 |                 clusters_pos = runlist[0]['offset']
878 |                 spc = part.sec_per_clus
879 |                 size = runlist[0]['length']
880 |                 for entry in runlist[1:]:
881 |                     clusters_pos += entry['offset']
882 |                     real_pos = clusters_pos * part.sec_per_clus + part.offset
883 |                     position = real_pos - size*spc
884 |                     if position in partitioned_files:
885 |                         piece = partitioned_files[position]
886 |                         if piece.offset is None or piece.offset == part.offset:
887 |                             conflicts = [
888 |                                 i for i in piece.files if
889 |                                 not piece.files[i].is_ghost and
890 |                                 i in part.files and
891 |                                 not part.files[i].is_ghost
892 |                             ]
893 |                             if not len(conflicts):
894 |                                 logging.debug(
895 |                                     'Merging partition with MFT offset %d into'
896 |                                     ' %s (fragmented MFT)', piece.mft_pos, part
897 |                                 )
898 |                                 # Merge the partitions
899 |                                 merge(part, piece)
900 |                                 # Remove the fragment
901 |                                 partitioned_files.pop(position)
902 |                             else:
903 |                                 logging.debug(
904 |                                     'NOT merging partition with MFT offset %d into'
905 |                                     ' %s (possible fragmented MFT) due to conflicts', piece.mft_pos, part
906 |                                 )
907 |                     size += entry['length']
908 | 
909 |         return partitioned_files
910 | 


--------------------------------------------------------------------------------
/recuperabit/fs/ntfs_fmt.py:
--------------------------------------------------------------------------------
  1 | """NTFS format descriptors."""
  2 | 
  3 | # RecuperaBit
  4 | # Copyright 2014-2021 Andrea Lazzarotto
  5 | #
  6 | # This file is part of RecuperaBit.
  7 | #
  8 | # RecuperaBit is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # RecuperaBit is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
 20 | 
 21 | 
 22 | from datetime import datetime, timezone, timedelta
 23 | 
 24 | from ..utils import printable, unpack
 25 | 
 26 | 
 27 | time_start = datetime(1601, 1, 1, tzinfo=timezone.utc)
 28 | 
 29 | def printable_name(name):
 30 |     """Return a printable name decoded in UTF-16."""
 31 |     decoded = []
 32 |     parts = (name[i:i+2] for i in range(0, len(name), 2))
 33 |     for part in parts:
 34 |         try:
 35 |             decoded.append(part.decode('utf-16'))
 36 |         except UnicodeDecodeError:
 37 |             decoded.append('\x00')
 38 |     joined = ''.join(decoded)
 39 |     # basic check for false positives
 40 |     if '\x00\x00\x00' in joined:
 41 |         return None
 42 |     return printable(joined, '#')
 43 | 
 44 | 
 45 | def windows_time(timestamp):
 46 |     """Convert a date-time value from Microsoft filetime to UTC."""
 47 |     try:
 48 |         value = int.from_bytes(timestamp, byteorder='little', signed=False)
 49 |         converted = time_start + timedelta(milliseconds = value//10000)
 50 |         return converted
 51 |     except (ValueError, OverflowError, OSError):
 52 |         return None
 53 | 
 54 | 
 55 | def index_entries(dump):
 56 |     """Interpret the entries of an index."""
 57 |     offset = 0
 58 |     entries = []
 59 |     while offset < len(dump):
 60 |         parsed = unpack(dump[offset:], indx_dir_entry_fmt)
 61 |         filename = parsed['$FILE_NAME']
 62 |         entry_length = parsed['entry_length']
 63 |         valid_length = entry_length > 0
 64 |         has_name = 'name' in filename
 65 |         valid_name = has_name and len(filename['name']) > 0
 66 |         if valid_length and valid_name:
 67 |             if parsed['content_length']:
 68 |                 entries.append(parsed)
 69 |             offset += entry_length
 70 |         else:
 71 |             break
 72 |         # Last entry
 73 |         if parsed['flags'] & 0x2:
 74 |             break
 75 |         # TODO handle carving of remnant entries in slack space
 76 |     return entries
 77 | 
 78 | 
 79 | def index_root_parser(dump):
 80 |     """Parse the entries contained in a $INDEX_ROOT attribute."""
 81 |     header = unpack(dump, indx_header_fmt)
 82 |     offset = header['off_start_list']
 83 |     entries = index_entries(dump[offset:])
 84 |     return entries
 85 | 
 86 | 
 87 | def runlist_unpack(runlist):
 88 |     """Parse an attribute runlist."""
 89 |     pieces = []
 90 |     while len(runlist) and runlist[0] != 0:
 91 |         off_bytes, len_bytes = divmod(runlist[0], 2**4)
 92 |         end = len_bytes + off_bytes
 93 |         decoded = unpack(runlist, [
 94 |             ('length', ('i', 1, len_bytes)),
 95 |             ('offset', ('+i', len_bytes + 1, end))
 96 |         ])
 97 |         if decoded['length'] is None or decoded['offset'] is None:
 98 |             break
 99 |         pieces.append(decoded)
100 |         runlist = runlist[end+1:]
101 |     return pieces
102 | 
103 | 
104 | def attribute_list_parser(dump):
105 |     """Parse entries contained in a $ATTRIBUTE_LIST attribute."""
106 |     content = []
107 |     while len(dump):
108 |         decoded = unpack(dump, [
109 |             ('type', ('i', 0, 3)),
110 |             ('length', ('i', 4, 5)),
111 |             ('name_length', ('i', 6, 6)),
112 |             ('name_off', ('i', 7, 7)),
113 |             ('start_VCN', ('i', 8, 15)),
114 |             ('file_ref', ('i', 16, 19)),
115 |             ('id', ('i', 24, 24))
116 |         ])
117 |         length = decoded['length']
118 |         # Check either if the length is 0 or if it is None
119 |         if not length:
120 |             break
121 |         content.append(decoded)
122 |         dump = dump[length:]
123 |     return content
124 | 
125 | 
126 | def try_filename(dump):
127 |     """Try to parse a $FILE_NAME attribute."""
128 |     try:
129 |         unpack(dump, attr_types_fmt['$FILE_NAME'])
130 |     except TypeError:   # Broken attribute
131 |         return {}
132 | 
133 | entry_fmt = [
134 |     ('signature', ('s', 0, 3)),
135 |     ('off_fixup', ('i', 4, 5)),
136 |     ('n_entries', ('i', 6, 7)),
137 |     ('LSN', ('i', 8, 15)),
138 |     ('seq_val', ('i', 16, 17)),
139 |     ('link_count', ('i', 18, 19)),
140 |     ('off_first', ('i', 20, 21)),
141 |     ('flags', ('i', 22, 23)),
142 |     ('size_used', ('i', 24, 27)),
143 |     ('size_alloc', ('i', 28, 31)),
144 |     ('base_record', ('i', 32, 35)),
145 |     ('record_n', ('i', 44, 47))   # Available only for NTFS >= 3.1
146 | ]
147 | 
148 | boot_sector_fmt = [
149 |     ('OEM_name', ('s', 3, 10)),
150 |     ('bytes_per_sector', ('i', 11, 12)),
151 |     ('sectors_per_cluster', ('i', 13, 13)),
152 |     ('sectors', ('i', 40, 47)),
153 |     ('MFT_addr', ('i', 48, 55)),
154 |     ('MFTmirr_addr', ('i', 56, 63)),
155 |     ('MFT_entry_size', ('i', 64, 64)),
156 |     ('idx_size', ('i', 68, 68)),
157 |     ('signature', ('s', 510, 511))
158 | ]
159 | 
160 | indx_fmt = [
161 |     ('signature', ('s', 0, 3)),
162 |     ('off_fixup', ('i', 4, 5)),
163 |     ('n_entries', ('i', 6, 7)),
164 |     ('LSN', ('i', 8, 15)),
165 |     ('seq_val', ('i', 16, 17))
166 | ]
167 | 
168 | indx_header_fmt = [
169 |     ('off_start_list', ('i', 0, 3)),
170 |     ('off_end_list', ('i', 4, 7)),
171 |     ('off_end_buffer', ('i', 8, 11)),
172 |     ('flags', ('i', 12, 15))
173 | ]
174 | 
175 | indx_dir_entry_fmt = [
176 |     ('record_n', ('i', 0, 3)),
177 |     ('entry_length', ('i', 8, 9)),
178 |     ('content_length', ('i', 10, 11)),
179 |     ('flags', ('i', 12, 15)),
180 |     ('$FILE_NAME', (
181 |         try_filename, 16, lambda r: 15 + (
182 |             r['content_length'] if r['content_length'] is not None else 0
183 |         )
184 |     ))
185 |     # The following is not very useful so it's not worth computing
186 |     # 'VCN_child', (
187 |     #     lambda s: int(str(s[::-1]).encode('hex'),16) if len(s) else None,
188 |     #     lambda r: r['entry_length'] - (8 if r['flags'] & 0x1 else 0),
189 |     #     lambda r: r['entry_length']
190 |     # )
191 | ]
192 | 
193 | attr_header_fmt = [
194 |     ('type', ('i', 0, 3)),
195 |     ('length', ('i', 4, 7)),
196 |     ('non_resident', ('i', 8, 8)),
197 |     ('name_length', ('i', 9, 9)),
198 |     ('name_off', ('i', 10, 11)),
199 |     ('flags', ('i', 12, 13)),
200 |     ('id', ('i', 14, 15)),
201 |     ('name', (
202 |         printable_name,
203 |         lambda r: r['name_off'],
204 |         lambda r: r['name_off'] + r['name_length']*2 - 1
205 |     ))
206 | ]
207 | 
208 | attr_resident_fmt = [
209 |     ('content_size', ('i', 16, 19)),
210 |     ('content_off', ('i', 20, 21))
211 | ]
212 | 
213 | attr_nonresident_fmt = [
214 |     ('start_VCN', ('i', 16, 23)),
215 |     ('end_VCN', ('i', 24, 31)),
216 |     ('runlist_offset', ('i', 32, 33)),
217 |     ('compression_unit', ('i', 34, 35)),
218 |     ('allocated_size', ('i', 40, 47)),
219 |     ('real_size', ('i', 48, 55)),
220 |     ('initialized_size', ('i', 56, 63)),
221 |     ('runlist', (
222 |         runlist_unpack,
223 |         lambda r: r['runlist_offset'],
224 |         lambda r: r['allocated_size']
225 |     ))
226 | ]
227 | 
228 | attr_names = {
229 |     16: '$STANDARD_INFORMATION',
230 |     32: '$ATTRIBUTE_LIST',
231 |     48: '$FILE_NAME',
232 |     80: '$SECURITY_DESCRIPTOR',
233 |     96: '$VOLUME_NAME',
234 |     112: '$VOLUME_INFORMATION',
235 |     128: '$DATA',
236 |     144: '$INDEX_ROOT',
237 |     160: '$INDEX_ALLOCATION',
238 |     176: '$BITMAP'
239 | }
240 | 
241 | # This structure extracts only interesting attributes.
242 | attr_types_fmt = {
243 |     '$STANDARD_INFORMATION': [
244 |         ('creation_time', (windows_time, 0, 7)),
245 |         ('modification_time', (windows_time, 8, 15)),
246 |         ('MFT_modification_time', (windows_time, 16, 23)),
247 |         ('access_time', (windows_time, 24, 31)),
248 |         ('flags', ('i', 32, 35))
249 |     ],
250 |     '$ATTRIBUTE_LIST': [
251 |         ('entries', (attribute_list_parser, 0, 1024))
252 |     ],
253 |     '$FILE_NAME': [
254 |         ('parent_entry', ('i', 0, 5)),
255 |         ('parent_seq', ('i', 6, 7)),
256 |         ('creation_time', (windows_time, 8, 15)),
257 |         ('modification_time', (windows_time, 16, 23)),
258 |         ('MFT_modification_time', (windows_time, 24, 31)),
259 |         ('access_time', (windows_time, 32, 39)),
260 |         ('allocated_size', ('i', 40, 47)),
261 |         ('real_size', ('i', 48, 55)),
262 |         ('flags', ('i', 56, 59)),
263 |         ('name_length', ('i', 64, 64)),
264 |         ('namespace', ('i', 65, 65)),
265 |         ('name', (printable_name, 66, lambda r: r['name_length']*2 + 65))
266 |     ],
267 |     '$INDEX_ROOT': [
268 |         ('attr_type', ('i', 0, 3)),
269 |         ('sorting_rule', ('i', 4, 7)),
270 |         ('record_bytes', ('i', 8, 11)),
271 |         ('record_clusters', ('i', 12, 12)),
272 |         ('records', (index_root_parser, 16, lambda r: r['record_bytes']))
273 |     ]
274 | }
275 | 


--------------------------------------------------------------------------------
/recuperabit/logic.py:
--------------------------------------------------------------------------------
  1 | """Filesystem-independent algorithmic logic."""
  2 | 
  3 | # RecuperaBit
  4 | # Copyright 2014-2021 Andrea Lazzarotto
  5 | #
  6 | # This file is part of RecuperaBit.
  7 | #
  8 | # RecuperaBit is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # RecuperaBit is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
 20 | 
 21 | 
 22 | import bisect
 23 | import codecs
 24 | import logging
 25 | import os
 26 | import os.path
 27 | import sys
 28 | import time
 29 | import types
 30 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, Iterator, Set, Tuple, TypeVar, Generic
 31 | 
 32 | T = TypeVar('T')
 33 | 
 34 | if TYPE_CHECKING:
 35 |     from .fs.core_types import File, Partition
 36 | 
 37 | 
 38 | class SparseList(Generic[T]):
 39 |     """List which only stores values at some places."""
 40 |     def __init__(self, data: Optional[Dict[int, T]] = None, default: Optional[T] = None) -> None:
 41 |         self.keys: List[int] = []  # This is always kept in order
 42 |         self.elements: Dict[int, T] = {}
 43 |         self.default: Optional[T] = default
 44 |         if data is not None:
 45 |             self.keys = sorted(data)
 46 |             self.elements.update(data)
 47 | 
 48 |     def __len__(self) -> int:
 49 |         try:
 50 |             return self.keys[-1] + 1
 51 |         except IndexError:
 52 |             return 0
 53 | 
 54 |     def __getitem__(self, index: int) -> Optional[T]:
 55 |         return self.elements.get(index, self.default)
 56 | 
 57 |     def __setitem__(self, index: int, item: T) -> None:
 58 |         if item == self.default:
 59 |             if index in self.elements:
 60 |                 del self.elements[index]
 61 |                 del self.keys[bisect.bisect_left(self.keys, index)]
 62 |         else:
 63 |             if index not in self.elements:
 64 |                 bisect.insort(self.keys, index)
 65 |             self.elements[index] = item
 66 | 
 67 |     def __contains__(self, element: T) -> bool:
 68 |         return element in self.elements.values()
 69 | 
 70 |     def __iter__(self) -> Iterator[int]:
 71 |         return self.keys.__iter__()
 72 | 
 73 |     def __repr__(self) -> str:
 74 |         elems = []
 75 |         prevk = 0
 76 |         if len(self.elements) > 0:
 77 |             k = self.keys[0]
 78 |             elems.append(str(k) + ' -> ' + repr(self.elements[k]))
 79 |             prevk = self.keys[0]
 80 |         for i in range(1, len(self.elements)):
 81 |             nextk = self.keys[i]
 82 |             if nextk <= prevk + 2:
 83 |                 while prevk < nextk - 1:
 84 |                     elems.append('__')
 85 |                     prevk += 1
 86 |                 elems.append(repr(self.elements[nextk]))
 87 |             else:
 88 |                 elems.append('\n... ' + str(nextk) + ' -> ' +
 89 |                              repr(self.elements[nextk]))
 90 |             prevk = nextk
 91 | 
 92 |         return '[' + ', '.join(elems) + ']'
 93 | 
 94 |     def iterkeys(self) -> Iterator[int]:
 95 |         """An iterator over the keys of actual elements."""
 96 |         return self.__iter__()
 97 | 
 98 |     def iterkeys_rev(self) -> Iterator[int]:
 99 |         """An iterator over the keys of actual elements (reversed)."""
100 |         i = len(self.keys)
101 |         while i > 0:
102 |             i -= 1
103 |             yield self.keys[i]
104 | 
105 |     def itervalues(self) -> Iterator[T]:
106 |         """An iterator over the elements."""
107 |         for k in self.keys:
108 |             yield self.elements[k]
109 | 
110 |     def wipe_interval(self, bottom: int, top: int) -> None:
111 |         """Remove elements between bottom and top."""
112 |         new_keys = set()
113 |         if bottom > top:
114 |             for k in self.keys:
115 |                 if top <= k < bottom:
116 |                     new_keys.add(k)
117 |                 else:
118 |                     del self.elements[k]
119 |         else:
120 |             for k in self.keys:
121 |                 if bottom <= k < top:
122 |                     del self.elements[k]
123 |                 else:
124 |                     new_keys.add(k)
125 |         self.keys = sorted(new_keys)
126 | 
127 | 
128 | def preprocess_pattern(pattern: SparseList[T]) -> Dict[T, List[int]]:
129 |     """Preprocess a SparseList for approximate string matching.
130 | 
131 |     This function performs preprocessing for the Baeza-Yates--Perleberg
132 |     fast and practical approximate string matching algorithm."""
133 |     result: Dict[T, List[int]] = {}
134 |     length = pattern.__len__()
135 |     for k in pattern:
136 |         name = pattern[k]
137 |         if name not in result:
138 |             result[name] = [length-k-1]
139 |         elif name != result[name][-1]:
140 |             result[name].append(length-k-1)
141 |     return result
142 | 
143 | 
144 | def approximate_matching(records: SparseList[T], pattern: SparseList[T], stop: int, k: int = 1) -> Optional[List[Union[Set[int], int, float]]]:
145 |     """Find the best match for a given pattern.
146 | 
147 |     The Baeza-Yates--Perleberg algorithm requires a preprocessed pattern. This
148 |     function takes as input a SparseList of records and pattern that will be
149 |     preprocessed. The records in the SparseList should be formed by single
150 |     elements. If they have another shape, e.g. tuples of the form
151 |     (namespace, name), the get function can be used to tell the algorithm how
152 |     to access them. k is the minimum value for support."""
153 | 
154 |     msize = pattern.__len__()
155 |     if records.__len__() == 0 or msize == 0:
156 |         return None
157 | 
158 |     lookup = preprocess_pattern(pattern)
159 |     count: SparseList[int] = SparseList(default=0)
160 |     match_offsets: Set[int] = set()
161 | 
162 |     i = 0
163 |     j = 0   # previous value of i
164 | 
165 |     # logging.debug('Starting approximate matching up to %i', stop)
166 |     # Loop only on indexes where there are elements
167 |     for i in records:
168 |         if i > stop+msize-1:
169 |             break
170 | 
171 |         # zero-out the parts that were skipped
172 |         count.wipe_interval(j % msize, i % msize)
173 |         j = i
174 | 
175 |         offsets = set(lookup.get(records[i], []))
176 |         for off in offsets:
177 |             count[(i + off) % msize] += 1
178 |             score = count[(i + off) % msize]
179 |             if score == k:
180 |                 match_offsets.add(i+off-msize+1)
181 |             if score > k:
182 |                 k = score
183 |                 match_offsets = set([i+off-msize+1])
184 | 
185 |     if len(match_offsets):
186 |         logging.debug(
187 |             'Found MATCH in positions {} '
188 |             'with weight {} ({}%)'.format(
189 |                 match_offsets, k,
190 |                 k * 100.0 / len(pattern.keys)
191 |             )
192 |         )
193 |         return [match_offsets, k, float(k) / len(pattern.keys)]
194 |     else:
195 |         # logging.debug('No match found')
196 |         return None
197 | 
198 | 
199 | def makedirs(path: str) -> bool:
200 |     """Make directories if they do not exist."""
201 |     try:
202 |         os.makedirs(path)
203 |     except OSError:
204 |         _, value, _ = sys.exc_info()
205 |         # The directory already exists = no problem
206 |         if value.errno != 17:
207 |             logging.error(value)
208 |             return False
209 |     return True
210 | 
211 | 
212 | def recursive_restore(node: 'File', part: 'Partition', outputdir: str, make_dirs: bool = True) -> None:
213 |     """Restore a directory structure starting from a file node."""
214 |     parent_path = str(
215 |         part[node.parent].full_path(part) if node.parent is not None
216 |         else ''
217 |     )
218 | 
219 |     file_path = os.path.join(parent_path, node.name)
220 |     restore_parent_path = os.path.join(outputdir, parent_path)
221 |     restore_path = os.path.join(outputdir, file_path)
222 | 
223 |     try:
224 |         content = node.get_content(part)
225 |     except NotImplementedError:
226 |         logging.error(u'Restore of #%s %s is not supported', node.index,
227 |                       file_path)
228 |         content = None
229 | 
230 |     if make_dirs:
231 |         if not makedirs(restore_parent_path):
232 |             return
233 | 
234 |     is_directory = node.is_directory or len(node.children) > 0
235 | 
236 |     if is_directory:
237 |         logging.info(u'Restoring #%s %s', node.index, file_path)
238 |         if not makedirs(restore_path):
239 |             return
240 | 
241 |     if is_directory and content is not None:
242 |         logging.warning(u'Directory %s has data content!', file_path)
243 |         restore_path += '_recuperabit_content'
244 | 
245 |     try:
246 |         if content is not None:
247 |             logging.info(u'Restoring #%s %s', node.index, file_path)
248 |             with codecs.open(restore_path, 'wb') as outfile:
249 |                 if isinstance(content, types.GeneratorType):
250 |                     for piece in content:
251 |                         outfile.write(piece)
252 |                 else:
253 |                     outfile.write(content)
254 |         else:
255 |             if not is_directory:
256 |                 # Empty file
257 |                 open(restore_path, 'wb').close()
258 |     except IOError:
259 |         logging.error(u'IOError when trying to create %s', restore_path)
260 | 
261 |     try:
262 |         # Restore Modification + Access time
263 |         mtime, atime, _ = node.get_mac()
264 |         if mtime is not None:
265 |             atime = time.mktime(atime.astimezone().timetuple())
266 |             mtime = time.mktime(mtime.astimezone().timetuple())
267 |             os.utime(restore_path, (atime, mtime))
268 |     except IOError:
269 |         logging.error(u'IOError while setting atime and mtime of %s', restore_path)
270 | 
271 |     if is_directory:
272 |         for child in node.children:
273 |             if not child.ignore():
274 |                 recursive_restore(child, part, outputdir, make_dirs=False)
275 |             else:
276 |                 logging.info(u'Skipping ignored file {}'.format(child))
277 | 


--------------------------------------------------------------------------------
/recuperabit/utils.py:
--------------------------------------------------------------------------------
  1 | """Collection of utility functions."""
  2 | 
  3 | # RecuperaBit
  4 | # Copyright 2014-2021 Andrea Lazzarotto
  5 | #
  6 | # This file is part of RecuperaBit.
  7 | #
  8 | # RecuperaBit is free software: you can redistribute it and/or modify
  9 | # it under the terms of the GNU General Public License as published by
 10 | # the Free Software Foundation, either version 3 of the License, or
 11 | # (at your option) any later version.
 12 | #
 13 | # RecuperaBit is distributed in the hope that it will be useful,
 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | # GNU General Public License for more details.
 17 | #
 18 | # You should have received a copy of the GNU General Public License
 19 | # along with RecuperaBit. If not, see <http://www.gnu.org/licenses/>.
 20 | 
 21 | 
 22 | from datetime import datetime
 23 | import logging
 24 | import pprint
 25 | import string
 26 | import sys
 27 | import time
 28 | from typing import TYPE_CHECKING, Any, Iterable, Optional, List, Dict, Tuple, Union, Callable
 29 | import unicodedata
 30 | import io
 31 | 
 32 | from .fs.constants import sector_size
 33 | 
 34 | printer: pprint.PrettyPrinter = pprint.PrettyPrinter(indent=4)
 35 | all_chars = (chr(i) for i in range(sys.maxunicode))
 36 | unicode_printable: set[str] = set(
 37 |     c for c in all_chars
 38 |     if not unicodedata.category(c)[0].startswith('C')
 39 | )
 40 | ascii_printable: set[str] = set(string.printable[:-5])
 41 | 
 42 | if TYPE_CHECKING:
 43 |     from .fs.core_types import File, Partition
 44 | 
 45 | 
 46 | def sectors(image: io.BufferedReader, offset: int, size: int, bsize: int = sector_size, fill: bool = True) -> Optional[bytearray]:
 47 |     """Read from a file descriptor."""
 48 |     read = True
 49 |     try:
 50 |         image.seek(offset * bsize)
 51 |     except (IOError, OverflowError, ValueError):
 52 |         read = False
 53 |     if read:
 54 |         try:
 55 |             dump = image.read(size * bsize)
 56 |         except (IOError, MemoryError):
 57 |             logging.warning(
 58 |                 "Cannot read sector(s). Filling with 0x00. Offset: {} Size: "
 59 |                 "{} Bsize: {}".format(offset, size, bsize)
 60 |             )
 61 |             read = False
 62 |     if not read:
 63 |         if fill:
 64 |             dump = size * bsize * b'\x00'
 65 |         else:
 66 |             return None
 67 |     return bytearray(dump)
 68 | 
 69 | def unixtime(dtime: Optional[datetime]) -> float:
 70 |     """Convert datetime to UNIX epoch."""
 71 |     if dtime is None:
 72 |         return 0.0
 73 |     try:
 74 |         return time.mktime(dtime.timetuple())
 75 |     except ValueError:
 76 |         return 0.0
 77 | 
 78 | 
 79 | # format:
 80 | # [(label, (formatter, lower, higher)), ...]
 81 | def unpack(data: bytes, fmt: List[Tuple[str, Tuple[Union[str, Callable[[bytes], Any]], Union[int, Callable[[Dict[str, Any]], Optional[int]]], Union[int, Callable[[Dict[str, Any]], Optional[int]]]]]]) -> Dict[str, Any]:
 82 |     """Extract formatted information from a string of bytes."""
 83 |     result: Dict[str, Any] = {}
 84 |     for label, description in fmt:
 85 |         formatter, lower, higher = description
 86 |         # If lower is a function, then apply it
 87 |         low = lower(result) if callable(lower) else lower
 88 |         high = higher(result) if callable(higher) else higher
 89 | 
 90 |         if low is None or high is None:
 91 |             result[label] = None
 92 |             continue
 93 | 
 94 |         if callable(formatter):
 95 |             result[label] = formatter(data[low:high+1])
 96 |         else:
 97 |             if formatter == 's':
 98 |                 result[label] = str(data[low:high+1])
 99 |             if formatter.startswith('utf'):
100 |                 result[label] = data[low:high+1].decode(formatter)
101 |             if formatter.endswith('i') and len(formatter) < 4:
102 |                 # Use little-endian by default. Big-endian with >i.
103 |                 # Force sign-extension of first bit with >+i / +i.
104 |                 chunk = data[low:high+1]
105 | 
106 |                 signed = False
107 |                 if '+' in formatter:
108 |                     signed = True
109 | 
110 |                 byteorder = 'little'
111 |                 if formatter.startswith('>'):
112 |                     byteorder = 'big'
113 | 
114 |                 if len(chunk):
115 |                     result[label] = int.from_bytes(chunk, byteorder=byteorder, signed=signed)
116 |                 else:
117 |                     result[label] = None
118 |     return result
119 | 
120 | 
121 | def feed_all(image: io.BufferedReader, scanners: List[Any], indexes: Iterable[int]) -> List[int]:
122 |     # Scan the disk image and feed the scanners
123 |     interesting: List[int] = []
124 |     for index in indexes:
125 |         sector = sectors(image, index, 1, fill=False)
126 |         if not sector:
127 |             break
128 | 
129 |         for instance in scanners:
130 |             res = instance.feed(index, sector)
131 |             if res is not None:
132 |                 logging.info('Found {} at sector {}'.format(res, index))
133 |                 interesting.append(index)
134 |     return interesting
135 | 
136 | 
137 | def printable(text: str, default: str = '.', alphabet: Optional[set[str]] = None) -> str:
138 |     """Replace unprintable characters in a text with a default one."""
139 |     if alphabet is None:
140 |         alphabet = unicode_printable
141 |     return ''.join((i if i in alphabet else default) for i in text)
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | def readable_bytes(amount: Optional[int]) -> str:
150 |     """Return a human readable string representing a size in bytes."""
151 |     if amount is None:
152 |         return '??? B'
153 |     if amount < 1:
154 |         return '%.2f B' % amount
155 |     powers = {
156 |         0: '', 1: 'K', 2: 'M', 3: 'G', 4: 'T'
157 |     }
158 |     biggest = max(i for i in powers if amount / 1024.**i >= 1)
159 |     scaled = amount / 1024.**biggest
160 |     return '%.2f %sB' % (scaled, powers[biggest])
161 | 
162 | 
163 | def _file_tree_repr(node: 'File') -> str:
164 |     """Give a nice representation for the tree."""
165 |     desc = (
166 |         ' [GHOST]' if node.is_ghost else
167 |         ' [DELETED]' if node.is_deleted else ''
168 |     )
169 |     tail = '/' if node.is_directory else ''
170 |     data = [
171 |         ('Id', node.index),
172 |         ('Offset', node.offset),
173 |         (
174 |             'Offset bytes',
175 |             node.offset * sector_size
176 |             if node.offset is not None else None
177 |         )
178 |         # ('MAC', node.mac)
179 |     ]
180 |     if not node.is_directory:
181 |         data += [('Size', readable_bytes(node.size))]
182 |     return u'%s%s (%s) %s' % (
183 |         node.name, tail, ', '.join(a + ': ' + str(b) for a, b in data), desc
184 |     )
185 | 
186 | 
187 | def tree_folder(directory: 'File', padding: int = 0) -> str:
188 |     """Return a tree-like textual representation of a directory."""
189 |     lines: List[str] = []
190 |     pad = ' ' * padding
191 |     lines.append(
192 |         pad + _file_tree_repr(directory)
193 |     )
194 |     padding = padding + 2
195 |     pad = ' ' * padding
196 |     for entry in directory.children:
197 |         if len(entry.children) or entry.is_directory:
198 |             lines.append(tree_folder(entry, padding))
199 |         else:
200 |             lines.append(
201 |                 pad + _file_tree_repr(entry)
202 |             )
203 |     return '\n'.join(lines)
204 | 
205 | 
206 | def _bodyfile_repr(node: 'File', path: str) -> str:
207 |     """Return a body file line for node."""
208 |     end = '/' if node.is_directory or len(node.children) else ''
209 |     return '|'.join(str(el) for el in [
210 |         '0',                        # MD5
211 |         path + node.name + end,     # name
212 |         node.index,                 # inode
213 |         '0', '0', '0',              # mode, UID, GID
214 |         node.size if node.size is not None else 0,
215 |         unixtime(node.mac['access']),
216 |         unixtime(node.mac['modification']),
217 |         unixtime(node.mac['creation']),
218 |         '0'
219 |     ])
220 | 
221 | 
222 | def bodyfile_folder(directory: 'File', path: str = '') -> List[str]:
223 |     """Create a body file compatible with TSK 3.x.
224 | 
225 |     Format:
226 |     '#MD5|name|inode|mode_as_string|UID|GID|size|atime|mtime|ctime|crtime'
227 |     See also: http://wiki.sleuthkit.org/index.php?title=Body_file"""
228 |     lines: List[str] = [_bodyfile_repr(directory, path)]
229 |     path += directory.name + '/'
230 |     for entry in directory.children:
231 |         if len(entry.children) or entry.is_directory:
232 |             lines += bodyfile_folder(entry, path)
233 |         else:
234 |             lines.append(_bodyfile_repr(entry, path))
235 |     return lines
236 | 
237 | 
238 | def _ltx_clean(label: Any) -> str:
239 |     """Small filter to prepare strings to be included in LaTeX code."""
240 |     clean = str(label).replace('$', r'\$').replace('_', r'\_')
241 |     if clean[0] == '-':
242 |         clean = r'\textminus{}' + clean[1:]
243 |     return clean
244 | 
245 | 
246 | def _tikz_repr(node: 'File') -> str:
247 |     """Represent the node for a Tikz diagram."""
248 |     return r'node %s{%s\enskip{}%s}' % (
249 |         '[ghost]' if node.is_ghost else '[deleted]' if node.is_deleted else '',
250 |         _ltx_clean(node.index), _ltx_clean(node.name)
251 |     )
252 | 
253 | 
254 | def tikz_child(directory: 'File', padding: int = 0) -> Tuple[str, int]:
255 |     """Write a child row for Tikz representation."""
256 |     pad = ' ' * padding
257 |     lines: List[str] = [r'%schild {%s' % (pad, _tikz_repr(directory))]
258 |     count: int = len(directory.children)
259 |     for entry in directory.children:
260 |         content, number = tikz_child(entry, padding+4)
261 |         lines.append(content)
262 |         count += number
263 |     lines.append('}')
264 |     for entry in range(count):
265 |         lines.append('child [missing] {}')
266 |     return '\n'.join(lines).replace('\n}', '}'), count
267 | 
268 | 
269 | def tikz_part(part: 'Partition') -> str:
270 |     """Create LaTeX code to represent the directory structure as a nice Tikz
271 |     diagram.
272 | 
273 |     See also: http://www.texample.net/tikz/examples/filesystem-tree/"""
274 | 
275 |     preamble = (r"""%\usepackage{tikz}
276 |     %\usetikzlibrary{trees}""")
277 | 
278 |     begin_tree = r"""\begin{tikzpicture}[%
279 |     grow via three points={one child at (1.75em,-1.75em) and
280 |     two children at (1.75em,-1.75em) and (1.75em,-3.5em)},
281 |     edge from parent path={(\tikzparentnode.south) |- (\tikzchildnode.west)}]
282 |     \scriptsize
283 |     """
284 |     end_tree = r"""\end{tikzpicture}"""
285 | 
286 |     lines = [r'\node [root] {File System Structure}']
287 |     lines += [tikz_child(entry, 4)[0] for entry in (part.root, part.lost)]
288 |     lines.append(';')
289 | 
290 |     return '%s\n\n%s\n%s\n%s' % (
291 |         preamble, begin_tree, '\n'.join(lines), end_tree
292 |     )
293 | 
294 | 
295 | def csv_part(part: 'Partition') -> list[str]:
296 |     """Provide a CSV representation for a partition."""
297 |     contents = [
298 |         ','.join(('Id', 'Parent', 'Name', 'Full Path', 'Modification Time',
299 |                   'Access Time', 'Creation Time', 'Size (bytes)',
300 |                   'Size (human)', 'Offset (bytes)', 'Offset (sectors)',
301 |                   'Directory', 'Deleted', 'Ghost'))
302 |     ]
303 |     for index in part.files:
304 |         obj = part.files[index]
305 |         contents.append(
306 |                 u'%s,%s,"%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' % (
307 |                     obj.index, obj.parent, obj.name,
308 |                     obj.full_path(part),
309 |                     obj.mac['modification'], obj.mac['access'],
310 |                     obj.mac['creation'], obj.size,
311 |                     readable_bytes(obj.size),
312 |                     (obj.offset * sector_size
313 |                      if obj.offset is not None else None),
314 |                     obj.offset,
315 |                     '1' if obj.is_directory else '',
316 |                     '1' if obj.is_deleted else '',
317 |                     '1' if obj.is_ghost else ''
318 |                 )
319 |         )
320 |     return contents
321 | 
322 | 
323 | def _sub_locate(text: str, directory: 'File', part: 'Partition') -> List[Tuple['File', str]]:
324 |     """Helper for locate."""
325 |     lines: List[Tuple['File', str]] = []
326 |     for entry in sorted(directory.children, key=lambda node: node.name):
327 |         path = entry.full_path(part)
328 |         if text in path.lower():
329 |             lines.append((entry, path))
330 |         if len(entry.children) or entry.is_directory:
331 |             lines += _sub_locate(text, entry, part)
332 |     return lines
333 | 
334 | 
335 | def locate(part: 'Partition', text: str) -> List[Tuple['File', str]]:
336 |     """Return paths of files matching the text."""
337 |     lines: List[Tuple['File', str]] = []
338 |     text = text.lower()
339 |     lines += _sub_locate(text, part.lost, part)
340 |     lines += _sub_locate(text, part.root, part)
341 |     return lines
342 | 
343 | 
344 | def merge(part: 'Partition', piece: 'Partition') -> None:
345 |     """Merge piece into part (both are partitions)."""
346 |     for index in piece.files:
347 |         if (
348 |             index not in part.files or
349 |             part.files[index].is_ghost
350 |         ):
351 |             part.add_file(piece.files[index])
352 | 


--------------------------------------------------------------------------------