├── .github
└── FUNDING.yml
├── .gitignore
├── LICENSE
├── README.md
├── __init__.py
├── main.py
└── recuperabit
├── __init__.py
├── fs
├── __init__.py
├── constants.py
├── core_types.py
├── ntfs.py
└── ntfs_fmt.py
├── logic.py
└── utils.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | ko_fi: thelazza
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ### Code ###
2 | .vscode/*
3 | !.vscode/tasks.json
4 | !.vscode/launch.json
5 | *.code-workspace
6 |
7 | ### Python ###
8 | # Byte-compiled / optimized / DLL files
9 | __pycache__/
10 | *.py[cod]
11 | *$py.class
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | lib/
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | wheels/
30 | pip-wheel-metadata/
31 | share/python-wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 | MANIFEST
36 |
37 | # PyInstaller
38 | # Usually these files are written by a python script from a template
39 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
40 | *.manifest
41 | *.spec
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .nox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.cover
57 | *.py,cover
58 | .hypothesis/
59 | .pytest_cache/
60 | pytestdebug.log
61 |
62 | # Translations
63 | *.mo
64 | *.pot
65 |
66 | # Django stuff:
67 | *.log
68 | local_settings.py
69 | db.sqlite3
70 | db.sqlite3-journal
71 |
72 | # Flask stuff:
73 | instance/
74 | .webassets-cache
75 |
76 | # Scrapy stuff:
77 | .scrapy
78 |
79 | # Sphinx documentation
80 | docs/_build/
81 | doc/_build/
82 |
83 | # PyBuilder
84 | target/
85 |
86 | # Jupyter Notebook
87 | .ipynb_checkpoints
88 |
89 | # IPython
90 | profile_default/
91 | ipython_config.py
92 |
93 | # pyenv
94 | .python-version
95 |
96 | # pipenv
97 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
99 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
100 | # install all needed dependencies.
101 | #Pipfile.lock
102 |
103 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104 | __pypackages__/
105 |
106 | # Celery stuff
107 | celerybeat-schedule
108 | celerybeat.pid
109 |
110 | # SageMath parsed files
111 | *.sage.py
112 |
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | pythonenv*
122 |
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 |
127 | # Rope project settings
128 | .ropeproject
129 |
130 | # mkdocs documentation
131 | /site
132 |
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 |
138 | # Pyre type checker
139 | .pyre/
140 |
141 | # pytype static type analyzer
142 | .pytype/
143 |
144 | # profiling data
145 | .prof
146 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 | {one line to give the program's name and a brief idea of what it does.}
635 | Copyright (C) {year} {name of author}
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | {project} Copyright (C) {year} {fullname}
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # 
4 |
5 |
6 |
7 |
8 |
9 | 
10 |
11 | A software which attempts to reconstruct file system structures and recover
12 | files. Currently it supports only NTFS.
13 |
14 | RecuperaBit attempts reconstruction of the directory structure regardless of:
15 |
16 | - missing partition table
17 | - unknown partition boundaries
18 | - partially-overwritten metadata
19 | - quick format
20 |
21 | You can get more information about **the reconstruction algorithms** and the
22 | architecture used in RecuperaBit by reading
23 | [my MSc thesis](https://www.scribd.com/doc/309337813/) or checking out [the
24 | slides](http://www.slideshare.net/TheLazza/recuperabit-forensic-file-system-reconstruction-given-partially-corrupted-metadata).
25 |
26 | ## Usage
27 |
28 | usage: main.py [-h] [-s SAVEFILE] [-w] [-o OUTPUTDIR] path
29 |
30 | Reconstruct the directory structure of possibly damaged filesystems.
31 |
32 | positional arguments:
33 | path path to the disk image
34 |
35 | optional arguments:
36 | -h, --help show this help message and exit
37 | -s SAVEFILE, --savefile SAVEFILE
38 | path of the scan save file
39 | -w, --overwrite force overwrite of the save file
40 | -o OUTPUTDIR, --outputdir OUTPUTDIR
41 | directory for restored contents and output files
42 |
43 | The main argument is the `path` to a bitstream image of a disk or partition.
44 | RecuperaBit automatically determines the sectors from which partitions start.
45 |
46 | RecuperaBit does not modify the disk image, however it does read some parts of
47 | it multiple times through the execution. It should also work on real devices,
48 | such as `/dev/sda` but **this is not advised** for damaged drives. RecuperaBit
49 | might worsen the situation by "stressing" a damaged drive or it could crash due
50 | to an I/O error.
51 |
52 | Optionally, a save file can be specified with `-s`. The first time, after the
53 | scanning process, results are saved in the file. After the first run, the file
54 | is read to only analyze interesting sectors and speed up the loading phase.
55 |
56 | Overwriting the save file can be forced with `-w`.
57 |
58 | RecuperaBit includes a small command line that allows the user to recover files
59 | and export the contents of a partition in CSV or
60 | [body file](http://wiki.sleuthkit.org/index.php?title=Body_file) format. These
61 | are exported in the directory specified by `-o` (or `recuperabit_output`).
62 |
63 | ### Limitation
64 |
65 | Currently RecuperaBit does not work with compressed files on an NTFS filesystem.
66 | If you have deep knowledge of the inner workings of file compression on NTFS
67 | filesystem, your help would be much appreciated, as available documentation is
68 | quite sparse on the topic.
69 |
70 | ### Pypy
71 |
72 | RecuperaBit can be run with the standard cPython implementation, however speed
73 | can be increased by using it with the Pypy interpreter and JIT compiler:
74 |
75 | pypy3 main.py /path/to/disk.img
76 |
77 | ### Recovery of File Contents
78 |
79 | Files can be restored one at a time or recursively, starting from a directory.
80 | After the scanning process has completed, you can check the list of partitions
81 | that can be recovered by issuing the following command at the prompt:
82 |
83 | recoverable
84 |
85 | Each line shows information about a partition. Let's consider the following
86 | output example:
87 |
88 | Partition #0 -> Partition (NTFS, 15.00 MB, 11 files, Recoverable, Offset: 2048, Offset (b): 1048576, Sec/Clus: 8, MFT offset: 2080, MFT mirror offset: 17400)
89 |
90 | If you want to recover files starting from a specific directory, you can either
91 | print the tree on screen with the `tree` command (very verbose for large drives)
92 | or you can export a CSV list of files (see `help` for details).
93 |
94 | If you rather want to extract all files from the *Root* and the *Lost Files*
95 | nodes, you need to know the identifier for the root directory, depending on
96 | the file system type. The following are those of file systems supported by
97 | RecuperaBit:
98 |
99 | | File System Type | Root Id |
100 | |------------------|---------|
101 | | NTFS | 5 |
102 |
103 | The id for *Lost Files* is -1 **for every file system.**
104 |
105 | Therefore, to restore `Partition #0` in our example, you need to run:
106 |
107 | restore 0 5
108 | restore 0 -1
109 |
110 | The files will be saved inside the output directory specified by `-o`.
111 |
112 | ## License
113 |
114 | This software is released under the GNU GPLv3. See `LICENSE` for more details.
115 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lazza/RecuperaBit/e05079ef0f40a1198c7633fce9d1b9eaef9c5679/__init__.py
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Main RecuperaBit process."""
3 |
4 | # RecuperaBit
5 | # Copyright 2014-2021 Andrea Lazzarotto
6 | #
7 | # This file is part of RecuperaBit.
8 | #
9 | # RecuperaBit is free software: you can redistribute it and/or modify
10 | # it under the terms of the GNU General Public License as published by
11 | # the Free Software Foundation, either version 3 of the License, or
12 | # (at your option) any later version.
13 | #
14 | # RecuperaBit is distributed in the hope that it will be useful,
15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | # GNU General Public License for more details.
18 | #
19 | # You should have received a copy of the GNU General Public License
20 | # along with RecuperaBit. If not, see .
21 |
22 |
23 | import argparse
24 | import codecs
25 | import itertools
26 | import locale
27 | import logging
28 | import os.path
29 | import pickle
30 | import sys
31 | try:
32 | import readline
33 | readline # ignore unused import warning
34 | except ImportError:
35 | pass
36 |
37 | from recuperabit import logic, utils
38 | # scanners
39 | from recuperabit.fs.ntfs import NTFSScanner
40 |
41 | from typing import TYPE_CHECKING
42 | if TYPE_CHECKING:
43 | from recuperabit.fs.core_types import Partition
44 |
45 | __author__ = "Andrea Lazzarotto"
46 | __copyright__ = "(c) 2014-2021, Andrea Lazzarotto"
47 | __license__ = "GPLv3"
48 | __version__ = "1.1.6"
49 | __maintainer__ = "Andrea Lazzarotto"
50 | __email__ = "andrea.lazzarotto@gmail.com"
51 |
52 |
53 | # classes of available scanners
54 | plugins = (
55 | NTFSScanner,
56 | )
57 |
58 | commands = (
59 | ('help', 'Print this help message'),
60 | ('recoverable', 'List recoverable partitions'),
61 | ('recoverable_size ', 'List recoverable partitions based on the minimum '),
62 | ('other', 'List unrecoverable partitions'),
63 | ('allparts', 'List all partitions'),
64 | ('tree ', 'Show contents of partition (tree)'),
65 | ('csv ', 'Save a CSV representation in a file'),
66 | ('bodyfile ', 'Save a body file representation in a file'),
67 | ('tikzplot []', 'Produce LaTeX code to draw a Tikz figure'),
68 | ('restore ', 'Recursively restore files from '),
69 | ('locate ', 'Print all file paths that match a string'),
70 | ('traceback ', 'Print ids and paths for all ancestors of '),
71 | ('merge ', 'Merge the two partitions into the first one'),
72 | ('quit', 'Close the program')
73 | )
74 |
75 | rebuilt = set()
76 |
77 |
78 | def list_parts(parts, shorthands, test):
79 | """List partitions corresponding to test."""
80 | for i, part in shorthands:
81 | if test(parts[part]):
82 | print('Partition #' + str(i), '->', parts[part])
83 |
84 |
85 | def check_valid_part(num, parts, shorthands, rebuild=True):
86 | """Check if the required partition is valid."""
87 | try:
88 | i = int(num)
89 | except ValueError:
90 | print('Value is not valid!')
91 | return None
92 | if i in range(len(shorthands)):
93 | i, par = shorthands[i]
94 | part = parts[par]
95 | if rebuild and par not in rebuilt:
96 | print('Rebuilding partition...')
97 | part.rebuild()
98 | rebuilt.add(par)
99 | print('Done')
100 | return part
101 | print('No partition with given ID!')
102 | return None
103 |
104 |
105 | def interpret(cmd, arguments, parts: dict[int, 'Partition'], shorthands, outdir):
106 | """Perform command required by user."""
107 | if cmd == 'help':
108 | print('Available commands:')
109 | for name, desc in commands:
110 | print(' %s%s' % (name.ljust(28), desc))
111 | elif cmd == 'tree':
112 | if len(arguments) != 1:
113 | print('Wrong number of parameters!')
114 | else:
115 | part = check_valid_part(arguments[0], parts, shorthands)
116 | if part is not None:
117 | print('-'*10)
118 | print(utils.tree_folder(part.root))
119 | print(utils.tree_folder(part.lost))
120 | print('-'*10)
121 | elif cmd == 'bodyfile':
122 | if len(arguments) != 2:
123 | print('Wrong number of parameters!')
124 | else:
125 | part = check_valid_part(arguments[0], parts, shorthands)
126 | if part is not None:
127 | contents = [
128 | '# ---' + repr(part) + '---',
129 | '# Full paths'
130 | ] + utils.bodyfile_folder(part.root) + [
131 | '# \n# Orphaned files'
132 | ] + utils.bodyfile_folder(part.lost)
133 | fname = os.path.join(outdir, arguments[1])
134 | try:
135 | with codecs.open(fname, 'w', encoding='utf8') as outfile:
136 | outfile.write('\n'.join(contents))
137 | print('Saved body file to %s' % fname)
138 | except IOError:
139 | print('Cannot open file %s for output!' % fname)
140 | elif cmd == 'csv':
141 | if len(arguments) != 2:
142 | print('Wrong number of parameters!')
143 | else:
144 | part = check_valid_part(arguments[0], parts, shorthands)
145 | if part is not None:
146 | contents = utils.csv_part(part)
147 | fname = os.path.join(outdir, arguments[1])
148 | try:
149 | with codecs.open(fname, 'w', encoding='utf8') as outfile:
150 | outfile.write(
151 | '\n'.join(contents)
152 | )
153 | print('Saved CSV file to %s' % fname)
154 | except IOError:
155 | print('Cannot open file %s for output!' % fname)
156 | elif cmd == 'tikzplot':
157 | if len(arguments) not in (1, 2):
158 | print('Wrong number of parameters!')
159 | else:
160 | part = check_valid_part(arguments[0], parts, shorthands)
161 | if part is not None:
162 | if len(arguments) > 1:
163 | fname = os.path.join(outdir, arguments[1])
164 | try:
165 | with codecs.open(fname, 'w') as outfile:
166 | outfile.write(utils.tikz_part(part) + '\n')
167 | print('Saved Tikz code to %s' % fname)
168 | except IOError:
169 | print('Cannot open file %s for output!' % fname)
170 | else:
171 | print(utils.tikz_part(part))
172 | elif cmd == 'restore':
173 | if len(arguments) != 2:
174 | print('Wrong number of parameters!')
175 | else:
176 | partid = arguments[0]
177 | part = check_valid_part(partid, parts, shorthands)
178 | if part is not None:
179 | index = arguments[1]
180 | partition_dir = os.path.join(outdir, 'Partition' + str(partid))
181 | myfile = None
182 | try:
183 | indexi = int(index)
184 | except ValueError:
185 | indexi = index
186 | for i in [index, indexi]:
187 | myfile = part.get(i, myfile)
188 | if myfile is None:
189 | print('The index is not valid')
190 | else:
191 | logic.recursive_restore(myfile, part, partition_dir)
192 | elif cmd == 'locate':
193 | if len(arguments) != 2:
194 | print('Wrong number of parameters!')
195 | else:
196 | part = check_valid_part(arguments[0], parts, shorthands)
197 | if part is not None:
198 | text = arguments[1]
199 | results = utils.locate(part, text)
200 | for node, path in results:
201 | desc = (
202 | ' [GHOST]' if node.is_ghost else
203 | ' [DELETED]' if node.is_deleted else ''
204 | )
205 | print('[%s]: %s%s' % (node.index, path, desc))
206 | elif cmd == 'traceback':
207 | if len(arguments) != 2:
208 | print('Wrong number of parameters!')
209 | else:
210 | partid = arguments[0]
211 | part = check_valid_part(partid, parts, shorthands)
212 | if part is not None:
213 | index = arguments[1]
214 | myfile = None
215 | try:
216 | indexi = int(index)
217 | except ValueError:
218 | indexi = index
219 | for i in [index, indexi]:
220 | myfile = part.get(i, myfile)
221 | if myfile is None:
222 | print('The index is not valid')
223 | else:
224 | while myfile is not None:
225 | print('[{}] {}'.format(myfile.index, myfile.full_path(part)))
226 | myfile = part.get(myfile.parent)
227 | elif cmd == 'merge':
228 | if len(arguments) != 2:
229 | print('Wrong number of parameters!')
230 | else:
231 | part1 = check_valid_part(arguments[0], parts, shorthands, rebuild=False)
232 | part2 = check_valid_part(arguments[1], parts, shorthands, rebuild=False)
233 | if None in (part1, part2):
234 | return
235 | if part1.fs_type != part2.fs_type:
236 | print('Cannot merge partitions with types (%s, %s)' % (part1.fs_type, part2.fs_type))
237 | return
238 | print('Merging partitions...')
239 | utils.merge(part1, part2)
240 | source_position = int(arguments[1])
241 | destination_position = int(arguments[0])
242 | _, par_source = shorthands[source_position]
243 | _, par_destination = shorthands[destination_position]
244 | del shorthands[source_position]
245 | del parts[par_source]
246 | for par in (par_source, par_destination):
247 | try:
248 | rebuilt.remove(par)
249 | except:
250 | pass
251 | print('There are now %d partitions.' % (len(parts), ))
252 | elif cmd == 'recoverable':
253 | list_parts(parts, shorthands, lambda x: x.recoverable)
254 | elif cmd == 'recoverable_size':
255 | if len(arguments) != 1:
256 | print('Wrong number of parameters!')
257 | else:
258 | list_parts(parts, shorthands, lambda x: x.size is not None and x.size > int(arguments[0]))
259 | elif cmd == 'other':
260 | list_parts(parts, shorthands, lambda x: not x.recoverable)
261 | elif cmd == 'allparts':
262 | list_parts(parts, shorthands, lambda x: True)
263 | elif cmd == 'quit':
264 | exit(0)
265 | else:
266 | print('Unknown command.')
267 |
268 |
269 | def main():
270 | """Wrap the program logic inside a function."""
271 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
272 |
273 | print(r" ___ ___ _ _ ")
274 | print(r" | _ \___ __ _ _ _ __ ___ _ _ __ _| _ |_) |_ ")
275 | print(r" | / -_) _| || | '_ \/ -_) '_/ _` | _ \ | _|")
276 | print(r" |_|_\___\__|\_,_| .__/\___|_| \__,_|___/_|\__|")
277 | print(" |_| v{}".format(__version__))
278 | print(' ', __copyright__, '<%s>' % __email__)
279 | print(' Released under the', __license__)
280 | print('')
281 |
282 | parser = argparse.ArgumentParser(
283 | description='Reconstruct the directory structure of possibly damaged '
284 | 'filesystems.'
285 | )
286 | parser.add_argument('path', type=str, help='path to the disk image')
287 | parser.add_argument(
288 | '-s', '--savefile', type=str, help='path of the scan save file'
289 | )
290 | parser.add_argument(
291 | '-w', '--overwrite', action='store_true',
292 | help='force overwrite of the save file'
293 | )
294 | parser.add_argument(
295 | '-o', '--outputdir', type=str, help='directory for restored contents'
296 | ' and output files'
297 | )
298 | args = parser.parse_args()
299 |
300 | try:
301 | image = open(args.path, 'rb')
302 | except IOError:
303 | logging.error('Unable to open image file!')
304 | exit(1)
305 |
306 | read_results = False
307 | write_results = False
308 |
309 | # Set output directory
310 | if args.outputdir is None:
311 | logging.info('No output directory specified, defaulting to '
312 | 'recuperabit_output')
313 | args.outputdir = 'recuperabit_output'
314 |
315 | # Try to reload information from the savefile
316 | if args.savefile is not None:
317 | if args.overwrite:
318 | logging.info('Results will be saved to %s', args.savefile)
319 | write_results = True
320 | else:
321 | logging.info('Checking if results already exist.')
322 | try:
323 | savefile = open(args.savefile, 'rb')
324 | logging.info('Results will be read from %s', args.savefile)
325 | read_results = True
326 | except IOError:
327 | logging.info('Unable to open save file.')
328 | logging.info('Results will be saved to %s', args.savefile)
329 | write_results = True
330 |
331 | if read_results:
332 | logging.info('The save file exists. Trying to read it...')
333 | try:
334 | indexes = pickle.load(savefile)
335 | savefile.close()
336 | except IndexError:
337 | logging.error('Malformed save file!')
338 | exit(1)
339 | else:
340 | indexes = itertools.count()
341 |
342 | # Ask for confirmation before beginning the process
343 | try:
344 | confirm = input('Type [Enter] to start the analysis or '
345 | '"exit" / "quit" / "q" to quit: ')
346 | except EOFError:
347 | print('')
348 | exit(0)
349 | if confirm in ('exit', 'quit', 'q'):
350 | exit(0)
351 |
352 | # Create the output directory
353 | if not logic.makedirs(args.outputdir):
354 | logging.error('Cannot create output directory!')
355 | exit(1)
356 |
357 | scanners = [pl(image) for pl in plugins]
358 |
359 | logging.info('Analysis started! This is going to take time...')
360 | interesting = utils.feed_all(image, scanners, indexes)
361 |
362 | logging.info('First scan completed')
363 |
364 | if write_results:
365 | logging.info('Saving results to %s', args.savefile)
366 | with open(args.savefile, 'wb') as savefile:
367 | pickle.dump(interesting, savefile)
368 |
369 | # Ask for partitions
370 | parts: dict[int, 'Partition'] = {}
371 | for scanner in scanners:
372 | parts.update(scanner.get_partitions())
373 |
374 | shorthands = list(enumerate(parts))
375 |
376 | logging.info('%i partitions found.', len(parts))
377 | while True:
378 | print('\nWrite command ("help" for details):')
379 | try:
380 | command = input('> ').split(' ')
381 | except (EOFError, KeyboardInterrupt):
382 | print('')
383 | exit(0)
384 | cmd = command[0]
385 | arguments = command[1:]
386 | interpret(cmd, arguments, parts, shorthands, args.outputdir)
387 |
388 | if __name__ == '__main__':
389 | main()
390 |
--------------------------------------------------------------------------------
/recuperabit/__init__.py:
--------------------------------------------------------------------------------
1 | # RecuperaBit
2 | # Copyright 2014-2021 Andrea Lazzarotto
3 | #
4 | # This file is part of RecuperaBit.
5 | #
6 | # RecuperaBit is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU General Public License as published by
8 | # the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # RecuperaBit is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with RecuperaBit. If not, see .
18 |
--------------------------------------------------------------------------------
/recuperabit/fs/__init__.py:
--------------------------------------------------------------------------------
1 | # RecuperaBit
2 | # Copyright 2014-2021 Andrea Lazzarotto
3 | #
4 | # This file is part of RecuperaBit.
5 | #
6 | # RecuperaBit is free software: you can redistribute it and/or modify
7 | # it under the terms of the GNU General Public License as published by
8 | # the Free Software Foundation, either version 3 of the License, or
9 | # (at your option) any later version.
10 | #
11 | # RecuperaBit is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | # GNU General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU General Public License
17 | # along with RecuperaBit. If not, see .
18 |
--------------------------------------------------------------------------------
/recuperabit/fs/constants.py:
--------------------------------------------------------------------------------
1 | """Information needed by multiple plugins."""
2 |
3 | # RecuperaBit
4 | # Copyright 2014-2021 Andrea Lazzarotto
5 | #
6 | # This file is part of RecuperaBit.
7 | #
8 | # RecuperaBit is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # RecuperaBit is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with RecuperaBit. If not, see .
20 |
21 |
22 | sector_size: int = 512
23 | max_sectors: int = 256 # Maximum block size for recovery
24 |
--------------------------------------------------------------------------------
/recuperabit/fs/core_types.py:
--------------------------------------------------------------------------------
1 | """Recuperabit Core Types.
2 |
3 | This module contains the class declarations of all objects which are used in
4 | the Recuperabit meta file system. Each plug-in is supposed to extend the File
5 | and DiskScanner classes with subclasses implementing the missing methods."""
6 |
7 | # RecuperaBit
8 | # Copyright 2014-2021 Andrea Lazzarotto
9 | #
10 | # This file is part of RecuperaBit.
11 | #
12 | # RecuperaBit is free software: you can redistribute it and/or modify
13 | # it under the terms of the GNU General Public License as published by
14 | # the Free Software Foundation, either version 3 of the License, or
15 | # (at your option) any later version.
16 | #
17 | # RecuperaBit is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | # GNU General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU General Public License
23 | # along with RecuperaBit. If not, see .
24 |
25 |
26 | import logging
27 | import os.path
28 | from typing import Optional, Dict, Set, List, Tuple, Union, Any, Iterator
29 | from datetime import datetime
30 |
31 | from .constants import sector_size
32 |
33 | from ..utils import readable_bytes
34 |
35 |
36 | class File(object):
37 | """Filesystem-independent representation of a file. Aka Node."""
38 | def __init__(self, index: Union[int, str], name: str, size: Optional[int], is_directory: bool = False,
39 | is_deleted: bool = False, is_ghost: bool = False) -> None:
40 | self.index: Union[int, str] = index
41 | self.name: str = name
42 | self.size: Optional[int] = size
43 | self.is_directory: bool = is_directory
44 | self.is_deleted: bool = is_deleted
45 | self.is_ghost: bool = is_ghost
46 | self.parent: Optional[Union[int, str]] = None
47 | self.mac: Dict[str, Optional[datetime]] = {
48 | 'modification': None,
49 | 'access': None,
50 | 'creation': None
51 | }
52 | self.children: Set['File'] = set()
53 | self.children_names: Set[str] = set() # Avoid name clashes breaking restore
54 | self.offset: Optional[int] = None # Offset from beginning of disk
55 |
56 | def set_parent(self, parent: Optional[Union[int, str]]) -> None:
57 | """Set a pointer to the parent directory."""
58 | self.parent = parent
59 |
60 | def set_mac(self, modification: Optional[datetime], access: Optional[datetime], creation: Optional[datetime]) -> None:
61 | """Set the modification, access and creation times."""
62 | self.mac['modification'] = modification
63 | self.mac['access'] = access
64 | self.mac['creation'] = creation
65 |
66 | def get_mac(self) -> List[Optional[datetime]]:
67 | """Get the modification, access and creation times."""
68 | keys = ('modification', 'access', 'creation')
69 | return [self.mac[k] for k in keys]
70 |
71 | def set_offset(self, offset: Optional[int]) -> None:
72 | """Set the offset of the file record with respect to the disk image."""
73 | self.offset = offset
74 |
75 | def get_offset(self) -> Optional[int]:
76 | """Get the offset of the file record with respect to the disk image."""
77 | return self.offset
78 |
79 | def add_child(self, node: 'File') -> None:
80 | """Add a new child to this directory."""
81 | original_name = node.name
82 | i = 0
83 | # Check for multiple rebuilds
84 | if node in self.children:
85 | return
86 | # Avoid name clashes
87 | while node.name in self.children_names:
88 | node.name = original_name + '_%03d' % i
89 | i += 1
90 | if node.name != original_name:
91 | logging.warning(u'Renamed {} from {}'.format(node, original_name))
92 | self.children.add(node)
93 | self.children_names.add(node.name)
94 |
95 | def full_path(self, part: 'Partition') -> str:
96 | """Return the full path of this file."""
97 | if self.parent is not None:
98 | parent = part[self.parent]
99 | return os.path.join(parent.full_path(part), self.name)
100 | else:
101 | return self.name
102 |
103 | def get_content(self, partition: 'Partition') -> Optional[Union[bytes, Iterator[bytes]]]:
104 | # pylint: disable=W0613
105 | """Extract the content of the file.
106 |
107 | This method is intentionally not implemented because it depends on each
108 | plug-in for a specific file system."""
109 | if self.is_directory or self.is_ghost:
110 | return None
111 | raise NotImplementedError
112 |
113 | # pylint: disable=R0201
114 | def ignore(self) -> bool:
115 | """The following method is used by the restore procedure to check
116 | files that should not be recovered. For example, in NTFS file
117 | $BadClus:$Bad shall not be recovered because it creates an output
118 | with the same size as the partition (usually many GBs)."""
119 | return False
120 |
121 | def __repr__(self) -> str:
122 | return (
123 | u'File(#%s, ^^%s^^, %s, offset = %s sectors)' %
124 | (self.index, self.parent, self.name, self.offset)
125 | )
126 |
127 |
128 | class Partition(object):
129 | """Simplified representation of the contents of a partition.
130 |
131 | Parameter root_id represents the identifier assigned to the root directory
132 | of a partition. This can be file system dependent."""
133 | def __init__(self, fs_type: str, root_id: Union[int, str], scanner: 'DiskScanner') -> None:
134 | self.fs_type: str = fs_type
135 | self.root_id: Union[int, str] = root_id
136 | self.size: Optional[int] = None
137 | self.offset: Optional[int] = None
138 | self.root: Optional[File] = None
139 | self.lost: File = File(-1, 'LostFiles', 0, is_directory=True, is_ghost=True)
140 | self.files: Dict[Union[int, str], File] = {}
141 | self.recoverable: bool = False
142 | self.scanner: 'DiskScanner' = scanner
143 |
144 | def add_file(self, node: File) -> None:
145 | """Insert a new file in the partition."""
146 | index = node.index
147 | self.files[index] = node
148 |
149 | def set_root(self, node: File) -> None:
150 | """Set the root directory."""
151 | if not node.is_directory:
152 | raise TypeError('Not a directory')
153 | self.root = node
154 | self.root.set_parent(None)
155 |
156 | def set_size(self, size: int) -> None:
157 | """Set the (estimated) size of the partition."""
158 | self.size = size
159 |
160 | def set_offset(self, offset: int) -> None:
161 | """Set the offset from the beginning of the disk."""
162 | self.offset = offset
163 |
164 | def set_recoverable(self, recoverable: bool) -> None:
165 | """State if the partition contents are also recoverable."""
166 | self.recoverable = recoverable
167 |
168 | def rebuild(self) -> None:
169 | """Rebuild the partition structure.
170 |
171 | This method processes the contents of files and it rebuilds the
172 | directory tree as accurately as possible."""
173 | root_id = self.root_id
174 | rootname = 'Root'
175 |
176 | if root_id not in self.files:
177 | self.files[root_id] = File(
178 | root_id, rootname, 0, is_directory=True, is_ghost=True
179 | )
180 |
181 | # Convert keys to list to avoid RuntimeError
182 | for identifier in list(self.files):
183 | node = self.files[identifier]
184 | if node.index == root_id:
185 | self.set_root(node)
186 | node.name = rootname
187 | else:
188 | parent_id = node.parent
189 | exists = parent_id is not None
190 | valid = parent_id in self.files
191 | if exists and valid:
192 | parent_node = self.files[parent_id]
193 | elif exists and not valid:
194 | parent_node = File(parent_id, 'Dir_' + str(parent_id),
195 | 0, is_directory=True, is_ghost=True)
196 | parent_node.set_parent(-1)
197 | self.files[parent_id] = parent_node
198 | self.lost.add_child(parent_node)
199 | else:
200 | parent_node = self.lost
201 | node.set_parent(-1)
202 | parent_node.add_child(node)
203 | return
204 |
205 | # pylint: disable=R0201
206 | def additional_repr(self) -> List[Tuple[str, Any]]:
207 | """Return additional values to show in the string representation."""
208 | return []
209 |
210 | def __repr__(self) -> str:
211 | size = (
212 | readable_bytes(self.size * sector_size)
213 | if self.size is not None else '??? b'
214 | )
215 | data = [
216 | ('Offset', self.offset),
217 | (
218 | 'Offset (b)',
219 | self.offset * sector_size
220 | if self.offset is not None else None
221 | ),
222 | ]
223 | data += self.additional_repr()
224 | return u'Partition (%s, %s, %d files,%s %s)' % (
225 | self.fs_type,
226 | size,
227 | len(self.files),
228 | ' Recoverable,' if self.recoverable else '',
229 | ', '.join(a+': '+str(b) for a, b in data)
230 | )
231 |
232 | def __getitem__(self, index: Union[int, str]) -> File:
233 | if index in self.files:
234 | return self.files[index]
235 | if index == self.lost.index:
236 | return self.lost
237 | raise KeyError
238 |
239 | def get(self, index: Union[int, str], default: Optional[File] = None) -> Optional[File]:
240 | """Get a file or the special LostFiles directory."""
241 | try:
242 | return self.__getitem__(index)
243 | except KeyError:
244 | return default
245 |
246 |
247 | class DiskScanner(object):
248 | """Abstract stub for the implementation of disk scanners."""
249 | def __init__(self, pointer: Any) -> None:
250 | self.image: Any = pointer
251 |
252 | def get_image(self) -> Any:
253 | """Return the image reference."""
254 | return self.image
255 |
256 | @staticmethod
257 | def get_image(scanner: 'DiskScanner') -> Any:
258 | """Static method to get image from scanner instance."""
259 | return scanner.image
260 |
261 | def feed(self, index: int, sector: bytes) -> Optional[str]:
262 | """Feed a new sector."""
263 | raise NotImplementedError
264 |
265 | def get_partitions(self) -> Dict[int, Partition]:
266 | """Get a list of the found partitions."""
267 | raise NotImplementedError
268 |
--------------------------------------------------------------------------------
/recuperabit/fs/ntfs.py:
--------------------------------------------------------------------------------
1 | """NTFS plug-in.
2 |
3 | This plug-in contains the necessary logic to parse traces of NTFS file systems,
4 | including MFT entries and directory indexes."""
5 |
6 | # RecuperaBit
7 | # Copyright 2014-2021 Andrea Lazzarotto
8 | #
9 | # This file is part of RecuperaBit.
10 | #
11 | # RecuperaBit is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU General Public License as published by
13 | # the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # RecuperaBit is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU General Public License
22 | # along with RecuperaBit. If not, see .
23 |
24 |
25 | import logging
26 | from collections import Counter
27 | from typing import Any, Dict, List, Optional, Tuple, Union, Iterator, Set
28 |
29 | from .constants import max_sectors, sector_size
30 | from .core_types import DiskScanner, File, Partition
31 | from .ntfs_fmt import (attr_header_fmt, attr_names, attr_nonresident_fmt,
32 | attr_resident_fmt, attr_types_fmt, attribute_list_parser,
33 | boot_sector_fmt, entry_fmt, indx_dir_entry_fmt, indx_fmt,
34 | indx_header_fmt)
35 |
36 | from ..logic import SparseList, approximate_matching
37 | from ..utils import merge, sectors, unpack
38 |
39 | # Some attributes may appear multiple times
40 | multiple_attributes: Set[str] = set([
41 | '$FILE_NAME',
42 | '$DATA',
43 | '$INDEX_ROOT',
44 | '$INDEX_ALLOCATION',
45 | '$BITMAP'
46 | ])
47 |
48 | # Size of records in sectors
49 | FILE_size: int = 2
50 | INDX_size: int = 8
51 |
52 |
53 | def best_name(entries: List[Tuple[int, str]]) -> Optional[str]:
54 | """Return the best file name available.
55 |
56 | This function accepts a list of tuples formed by a namespace and a string.
57 | In case of more than one choice, it returns preferrably the one in the NTFS
58 | namespace (code == 3)."""
59 | if len(entries) == 0:
60 | return None
61 |
62 | entries.sort()
63 | if entries[-1][0] == 3:
64 | name = entries[-1][1]
65 | else:
66 | name = entries[0][1]
67 | return name if len(name) else None
68 |
69 |
70 | def parse_mft_attr(attr: bytes) -> Tuple[Dict[str, Any], Optional[str]]:
71 | """Parse the contents of a MFT attribute."""
72 | header = unpack(attr, attr_header_fmt)
73 | attr_type = header['type']
74 |
75 | if attr_type not in attr_names:
76 | return header, None
77 |
78 | if header['non_resident']:
79 | nonresident = unpack(attr, attr_nonresident_fmt)
80 | if nonresident['runlist'] is None:
81 | nonresident['runlist'] = list()
82 | header.update(nonresident)
83 | else:
84 | resident = unpack(attr, attr_resident_fmt)
85 | header.update(resident)
86 | offset = header['content_off']
87 | content = attr[offset:]
88 |
89 | name = attr_names[attr_type]
90 | if not header['non_resident'] and name in attr_types_fmt:
91 | size = header['content_size']
92 | data = unpack(content[:size], attr_types_fmt[name])
93 | header['content'] = data
94 |
95 | return header, name
96 |
97 |
98 | def _apply_fixup_values(header: Dict[str, Any], entry: bytearray) -> None:
99 | """Apply the fixup values to FILE and INDX records."""
100 | offset = header['off_fixup']
101 | for i in range(1, header['n_entries']):
102 | pos = sector_size * i
103 | entry[pos-2:pos] = entry[offset + 2*i:offset + 2*(i+1)]
104 |
105 |
106 | def _attributes_reader(entry: bytes, offset: int) -> Dict[str, Any]:
107 | """Read every attribute."""
108 | attributes = {}
109 | while offset < len(entry) - 16:
110 | try:
111 | attr, name = parse_mft_attr(entry[offset:])
112 | except TypeError:
113 | # The attribute was broken, we need to terminate here
114 | return attributes
115 | attr['dump_offset'] = offset
116 | if attr['length'] == 0:
117 | # End of attribute list
118 | break
119 | else:
120 | offset = offset + attr['length']
121 | if name is None:
122 | # Skip broken/unknown attribute
123 | continue
124 | if name not in attributes:
125 | if name not in multiple_attributes:
126 | attributes[name] = attr
127 | else:
128 | attributes[name] = [attr]
129 | else:
130 | if name in multiple_attributes:
131 | attributes[name].append(attr)
132 | else:
133 | logging.error('Cannot handle multiple attribute %s', name)
134 | return attributes
135 |
136 |
137 | def parse_file_record(entry: bytes) -> Dict[str, Any]:
138 | """Parse the contents of a FILE record (MFT entry)."""
139 | header = unpack(entry, entry_fmt)
140 | if (header['size_alloc'] is None or
141 | header['size_alloc'] > len(entry) or
142 | len(entry) < FILE_size*sector_size):
143 | header['valid'] = False
144 | return header
145 |
146 | # Old versions of NTFS don't have a MFT record number.
147 | if header['off_fixup'] < 48:
148 | header['record_n'] = None
149 |
150 | _apply_fixup_values(header, entry)
151 |
152 | attributes = _attributes_reader(entry, header['off_first'])
153 | header['valid'] = True
154 | header['attributes'] = attributes
155 | return header
156 |
157 |
158 | def parse_indx_record(entry: bytes) -> Dict[str, Any]:
159 | """Parse the contents of a INDX record (directory index)."""
160 | header = unpack(entry, indx_fmt)
161 |
162 | _apply_fixup_values(header, entry)
163 |
164 | node_data = unpack(entry[24:], indx_header_fmt)
165 | node_data['off_start_list'] += 24
166 | node_data['off_end_list'] += 24
167 | node_data['off_end_buffer'] += 24
168 | header.update(node_data)
169 |
170 | offset = header['off_start_list']
171 | entries = []
172 | while offset < header['off_end_list']:
173 | entry_data = unpack(entry[offset:], indx_dir_entry_fmt)
174 | if entry_data['content_length']:
175 | try:
176 | file_name = unpack(
177 | entry[offset + 16:],
178 | attr_types_fmt['$FILE_NAME']
179 | )
180 | except (UnicodeDecodeError, TypeError): # Invalid file name or invalid name length
181 | break
182 | # Perform checks to avoid false positives
183 | name_ok = file_name['name'] is not None
184 | namespace_ok = 0 <= file_name['namespace'] <= 3
185 | size_ok = file_name['real_size'] <= file_name['allocated_size']
186 | features_ok = not (
187 | file_name['flags'] == 0 and
188 | file_name['parent_seq'] > 1024
189 | )
190 | if name_ok and namespace_ok and size_ok and features_ok:
191 | entry_data['file_info'] = file_name
192 | entries.append(entry_data)
193 | else:
194 | break
195 | if entry_data['entry_length']:
196 | offset += entry_data['entry_length']
197 | else:
198 | break
199 | header['entries'] = entries
200 | header['valid'] = len(entries) > 0
201 | return header
202 |
203 |
204 | def _integrate_attribute_list(parsed: Dict[str, Any], part: 'NTFSPartition', image: Any) -> None:
205 | """Integrate missing attributes in the parsed MTF entry."""
206 | base_record = parsed['record_n']
207 | attrs = parsed['attributes']
208 | attr = attrs['$ATTRIBUTE_LIST']
209 |
210 | spc = part.sec_per_clus
211 | if 'runlist' in attr:
212 | clusters_pos = 0
213 | entries = []
214 | size = attr['real_size']
215 | for entry in attr['runlist']:
216 | clusters_pos += entry['offset']
217 | length = min(entry['length'] * spc * sector_size, size)
218 | size -= length
219 | real_pos = clusters_pos * spc + part.offset
220 | dump = sectors(image, real_pos, length, 1)
221 | entries += attribute_list_parser(dump)
222 | attr['content'] = {'entries': entries}
223 | else:
224 | entries = attr['content']['entries']
225 |
226 | # Divide entries by type
227 | types = set(e['type'] for e in entries)
228 | entries_by_type = {
229 | t: set(
230 | e['file_ref'] for e in entries
231 | if e['type'] == t and e['file_ref'] is not None
232 | )
233 | for t in types
234 | }
235 | # Remove completely "local" types or empty lists
236 | for num in list(entries_by_type):
237 | files = entries_by_type[num]
238 | if (
239 | len(files) == 0 or
240 | (len(files) == 1 and next(iter(files)) == base_record)
241 | ):
242 | del entries_by_type[num]
243 |
244 | mft_pos = part.mft_pos
245 | for num in entries_by_type:
246 | # Read contents of child entries
247 | for index in entries_by_type[num]:
248 | real_pos = mft_pos + index * FILE_size
249 | dump = sectors(image, real_pos, FILE_size)
250 | child_parsed = parse_file_record(dump)
251 | if 'attributes' not in child_parsed:
252 | continue
253 | # Update the main entry (parsed)
254 | if child_parsed['base_record'] == base_record:
255 | child_attrs = child_parsed['attributes']
256 | for name in child_attrs:
257 | if name in multiple_attributes:
258 | try:
259 | attrs[name] += child_attrs[name]
260 | except KeyError:
261 | attrs[name] = child_attrs[name]
262 | else:
263 | attrs[name] = child_attrs[name]
264 |
265 |
266 | class NTFSFile(File):
267 | """NTFS File."""
268 | def __init__(self, parsed: Dict[str, Any], offset: Optional[int], is_ghost: bool = False, ads: str = '') -> None:
269 | index = parsed['record_n']
270 | ads_suffix = ':' + ads if ads != '' else ads
271 | if ads != '':
272 | index = str(index) + ads_suffix
273 | attrs = parsed['attributes']
274 | filenames = attrs['$FILE_NAME']
275 | datas = attrs.get('$DATA', [])
276 |
277 | size = None
278 | for attr in datas:
279 | if attr['name'] == ads:
280 | if 'real_size' in attr:
281 | size = attr['real_size']
282 | elif not attr['non_resident']:
283 | size = attr['content_size']
284 | break
285 |
286 | filtered = [
287 | f for f in filenames if 'content' in f and
288 | f['content'] is not None and
289 | 'name_length' in f['content'] and
290 | f['content']['name_length'] > 0 and
291 | f['content']['name'] is not None
292 | ]
293 | name = best_name([
294 | (f['content']['namespace'], f['content']['name'] + ads_suffix)
295 | for f in filtered
296 | ])
297 | hasname = name is not None
298 |
299 | if not hasname:
300 | name = 'File_%s' % index
301 |
302 | std_info = attrs.get('$STANDARD_INFORMATION')
303 |
304 | is_dir = (parsed['flags'] & 0x02) > 0 and not len(ads)
305 | is_del = (parsed['flags'] & 0x01) == 0
306 | File.__init__(self, index, name, size, is_dir, is_del, is_ghost)
307 |
308 | time_attribute = None
309 |
310 | # Additional attributes
311 | if hasname:
312 | first = filtered[0]['content']
313 | parent_id = first['parent_entry']
314 | File.set_parent(self, parent_id)
315 | File.set_offset(self, offset)
316 | time_attribute = std_info or filtered[0]
317 | if time_attribute and 'content' in time_attribute:
318 | File.set_mac(
319 | self, time_attribute['content']['modification_time'],
320 | time_attribute['content']['access_time'],
321 | time_attribute['content']['creation_time'],
322 | )
323 | self.ads = ads
324 |
325 | @staticmethod
326 | def _padded_bytes(image: Any, offset: int, size: int) -> bytes:
327 | dump = sectors(image, offset, size, 1)
328 | if len(dump) < size:
329 | logging.warning(
330 | 'Failed to read byte(s). Padding with 0x00. Offset: {} Size: '
331 | '{}'.format(offset, size))
332 | dump += bytearray(b'\x00' * (size - len(dump)))
333 | return dump
334 |
335 | def content_iterator(self, partition: 'NTFSPartition', image: Any, datas: List[Dict[str, Any]]) -> Iterator[bytes]:
336 | """Return an iterator for the contents of this file."""
337 | vcn = 0
338 | spc = partition.sec_per_clus
339 | for attr in datas:
340 | diff = attr['start_VCN'] - vcn
341 | if diff > 0:
342 | # We do not try to fill with zeroes as this might produce huge useless files
343 | logging.warning(
344 | u'Missing part for {}, {} clusters skipped'.format(self, diff)
345 | )
346 | vcn += diff
347 | yield b''
348 |
349 | clusters_pos = 0
350 | size = attr['real_size']
351 |
352 | if 'runlist' not in attr:
353 | logging.error(
354 | u'Cannot restore {}, missing runlist'.format(self)
355 | )
356 | break
357 |
358 | for entry in attr['runlist']:
359 | length = min(entry['length'] * spc * sector_size, size)
360 | size -= length
361 | # Sparse runlist
362 | if entry['offset'] is None:
363 | while length > 0:
364 | amount = min(max_sectors*sector_size, length)
365 | length -= amount
366 | yield b'\x00' * amount
367 | continue
368 | # Normal runlists
369 | clusters_pos += entry['offset']
370 | real_pos = clusters_pos * spc + partition.offset
371 | # Avoid to fill memory with huge blocks
372 | offset = 0
373 | while length > 0:
374 | amount = min(max_sectors*sector_size, length)
375 | position = real_pos*sector_size + offset
376 | partial = self._padded_bytes(image, position, amount)
377 | length -= amount
378 | offset += amount
379 | yield bytes(partial)
380 | vcn = attr['end_VCN'] + 1
381 |
382 | def get_content(self, partition: 'NTFSPartition') -> Optional[Union[bytes, Iterator[bytes]]]:
383 | """Extract the content of the file.
384 |
385 | This method works by extracting the $DATA attribute."""
386 | if self.is_ghost:
387 | logging.error(u'Cannot restore ghost file {}'.format(self))
388 | return None
389 |
390 | image = DiskScanner.get_image(partition.scanner)
391 | dump = sectors(image, File.get_offset(self), FILE_size)
392 | parsed = parse_file_record(dump)
393 |
394 | if not parsed['valid'] or 'attributes' not in parsed:
395 | logging.error(u'Invalid MFT entry for {}'.format(self))
396 | return None
397 | attrs = parsed['attributes']
398 | if ('$ATTRIBUTE_LIST' in attrs and
399 | partition.sec_per_clus is not None):
400 | _integrate_attribute_list(parsed, partition, image)
401 | if '$DATA' not in attrs:
402 | attrs['$DATA'] = []
403 | datas = [d for d in attrs['$DATA'] if d['name'] == self.ads]
404 | if not len(datas):
405 | if not self.is_directory:
406 | logging.error(u'Cannot restore $DATA attribute(s) '
407 | 'for {}'.format(self))
408 | return None
409 |
410 | # TODO implemented compressed attributes
411 | for d in datas:
412 | if d['flags'] & 0x01:
413 | logging.error(u'Cannot restore compressed $DATA attribute(s) '
414 | 'for {}'.format(self))
415 | return None
416 | elif d['flags'] & 0x4000:
417 | logging.warning(u'Found encrypted $DATA attribute(s) '
418 | 'for {}'.format(self))
419 |
420 | # Handle resident file content
421 | if len(datas) == 1 and not datas[0]['non_resident']:
422 | single = datas[0]
423 | start = single['dump_offset'] + single['content_off']
424 | end = start + single['content_size']
425 | content = dump[start:end]
426 | return bytes(content)
427 | else:
428 | if partition.sec_per_clus is None:
429 | logging.error(u'Cannot restore non-resident $DATA '
430 | 'attribute(s) for {}'.format(self))
431 | return None
432 | non_resident = sorted(
433 | (d for d in attrs['$DATA'] if d['non_resident']),
434 | key=lambda x: x['start_VCN']
435 | )
436 | if len(non_resident) != len(datas):
437 | logging.warning(
438 | u'Found leftover resident $DATA attributes for '
439 | '{}'.format(self)
440 | )
441 | return self.content_iterator(partition, image, non_resident)
442 |
443 | def ignore(self) -> bool:
444 | """Determine which files should be ignored."""
445 | return (
446 | (self.index == '8:$Bad') or
447 | (self.parent == 11 and self.ads == '$J') # $UsnJrnl
448 | )
449 |
450 |
451 | class NTFSPartition(Partition):
452 | """Partition with additional fields for NTFS recovery."""
453 | def __init__(self, scanner: 'NTFSScanner', position: Optional[int] = None) -> None:
454 | Partition.__init__(self, 'NTFS', 5, scanner)
455 | self.sec_per_clus: Optional[int] = None
456 | self.mft_pos: Optional[int] = position
457 | self.mftmirr_pos: Optional[int] = None
458 |
459 | def additional_repr(self) -> List[Tuple[str, Any]]:
460 | """Return additional values to show in the string representation."""
461 | return [
462 | ('Sec/Clus', self.sec_per_clus),
463 | ('MFT offset', self.mft_pos),
464 | ('MFT mirror offset', self.mftmirr_pos)
465 | ]
466 |
467 |
468 | class NTFSScanner(DiskScanner):
469 | """NTFS Disk Scanner."""
470 | def __init__(self, pointer: Any) -> None:
471 | DiskScanner.__init__(self, pointer)
472 | self.found_file: Set[int] = set()
473 | self.parsed_file_review: Dict[int, Dict[str, Any]] = {}
474 | self.found_indx: Set[int] = set()
475 | self.parsed_indx: Dict[int, Dict[str, Any]] = {}
476 | self.indx_list: Optional[SparseList[int]] = None
477 | self.found_boot: List[int] = []
478 | self.found_spc: List[int] = []
479 |
480 | def feed(self, index: int, sector: bytes) -> Optional[str]:
481 | """Feed a new sector."""
482 | # check boot sector
483 | if sector.endswith(b'\x55\xAA') and b'NTFS' in sector[:8]:
484 | self.found_boot.append(index)
485 | return 'NTFS boot sector'
486 |
487 | # check file record
488 | if sector.startswith((b'FILE', b'BAAD')):
489 | self.found_file.add(index)
490 | return 'NTFS file record'
491 |
492 | # check index record
493 | if sector.startswith(b'INDX'):
494 | self.found_indx.add(index)
495 | return 'NTFS index record'
496 |
497 | @staticmethod
498 | def add_indx_entries(entries: List[Dict[str, Any]], part: NTFSPartition) -> None:
499 | """Insert new ghost files which were not already found."""
500 | for rec in entries:
501 | if (rec['record_n'] not in part.files and
502 | rec['$FILE_NAME'] is not None):
503 | # Compatibility with the structure of a MFT entry
504 | rec['attributes'] = {
505 | '$FILE_NAME': [{'content': rec['$FILE_NAME']}]
506 | }
507 | """Although the structure of r is similar to that of a MFT
508 | entry, flags were about the index, not about the file. We
509 | don't know if the element is a directory or not, hence we
510 | mark it as a file. It can be deduced if it is a directory
511 | by looking at the number of children, after the
512 | reconstruction."""
513 | rec['flags'] = 0x1
514 | part.add_file(NTFSFile(rec, None, is_ghost=True))
515 |
516 | def add_from_indx_root(self, parsed: Dict[str, Any], part: NTFSPartition) -> None:
517 | """Add ghost entries to part from INDEX_ROOT attributes in parsed."""
518 | for attribute in parsed['attributes']['$INDEX_ROOT']:
519 | if (attribute.get('content') is None or
520 | attribute['content'].get('records') is None):
521 | continue
522 | self.add_indx_entries(attribute['content']['records'], part)
523 |
524 | def most_likely_sec_per_clus(self) -> List[int]:
525 | """Determine the most likely value of sec_per_clus of each partition,
526 | to speed up the search."""
527 | counter = Counter()
528 | counter.update(self.found_spc)
529 | counter.update(2**i for i in range(8))
530 | return [i for i, _ in counter.most_common()]
531 |
532 | def find_boundary(self, part: NTFSPartition, mft_address: int, multipliers: List[int]) -> Tuple[Optional[int], Optional[int]]:
533 | """Determine the starting sector of a partition with INDX records."""
534 | nodes = (
535 | self.parsed_file_review[node.offset]
536 | for node in part.files.values()
537 | if node.offset in self.parsed_file_review and
538 | '$INDEX_ALLOCATION' in
539 | self.parsed_file_review[node.offset]['attributes']
540 | )
541 |
542 | text_list = self.indx_list
543 | width = text_list.__len__()
544 |
545 | base_pattern = {}
546 | for parsed in nodes:
547 | for attr in parsed['attributes']['$INDEX_ALLOCATION']:
548 | clusters_pos = 0
549 | if 'runlist' not in attr:
550 | continue
551 | runlist = attr['runlist']
552 | for entry in runlist:
553 | clusters_pos += entry['offset']
554 | base_pattern[clusters_pos] = parsed['record_n']
555 | if not len(base_pattern):
556 | return (None, None)
557 |
558 | results = []
559 | min_support = 2
560 | for sec_per_clus in multipliers:
561 | pattern = {
562 | i * sec_per_clus: base_pattern[i]
563 | for i in base_pattern
564 | }
565 |
566 | delta = min(pattern)
567 | normalized = {
568 | i-delta: pattern[i]
569 | for i in pattern if i-delta <= width
570 | # Avoid extremely long, useless patterns
571 | }
572 | if len(normalized) < min_support:
573 | continue
574 |
575 | pattern_list = SparseList(normalized)
576 | solution = approximate_matching(
577 | text_list, pattern_list, mft_address + delta, k=min_support
578 | )
579 | if solution is not None:
580 | # Avoid negative offsets and ambiguous situations
581 | solution[0] = [i-delta for i in solution[0] if i-delta >= 0]
582 | if len(solution[0]) == 1:
583 | positions, amount, perc = solution
584 | results.append((positions, perc, sec_per_clus))
585 | # Reasonably, this is a correct match
586 | if perc > 0.25 and amount > 256:
587 | break
588 | min_support = max(min_support, solution[1])
589 |
590 | if len(results):
591 | results.sort(key=lambda r: r[1])
592 | positions, _, spc = results[0]
593 | return (positions[0], spc)
594 | else:
595 | return (None, None)
596 |
597 | def add_from_indx_allocation(self, parsed: Dict[str, Any], part: NTFSPartition) -> None:
598 | """Add ghost entries to part from INDEX_ALLOCATION attributes in parsed.
599 |
600 | This procedure requires that the beginning of the partition has already
601 | been discovered."""
602 | read_again = set()
603 | for attr in parsed['attributes']['$INDEX_ALLOCATION']:
604 | clusters_pos = 0
605 | if 'runlist' not in attr:
606 | continue
607 | runlist = attr['runlist']
608 | for entry in runlist:
609 | clusters_pos += entry['offset']
610 | real_pos = clusters_pos * part.sec_per_clus + part.offset
611 | if real_pos in self.parsed_indx:
612 | content = self.parsed_indx[real_pos]
613 | # Check if the entry matches
614 | if parsed['record_n'] == content['parent']:
615 | discovered = set(
616 | c for c in content['children']
617 | if c not in part.files
618 | )
619 | # If there are new files, read the INDX again
620 | if len(discovered):
621 | read_again.add(real_pos)
622 |
623 | img = DiskScanner.get_image(self)
624 | for position in read_again:
625 | dump = sectors(img, position, INDX_size)
626 | entries = parse_indx_record(dump)['entries']
627 | self.add_indx_entries(entries, part)
628 |
629 | def add_from_attribute_list(self, parsed: Dict[str, Any], part: NTFSPartition, offset: int) -> None:
630 | """Add additional entries to part from attributes in ATTRIBUTE_LIST.
631 |
632 | Files with many attributes may have additional attributes not in the
633 | MFT entry. When this happens, it is necessary to find the other
634 | attributes. They may contain additional information, such as $DATA
635 | attributes for ADS. This procedure requires that the beginning of the
636 | partition has already been discovered."""
637 | image = DiskScanner.get_image(self)
638 | _integrate_attribute_list(parsed, part, image)
639 |
640 | attrs = parsed['attributes']
641 | if '$DATA' in attrs:
642 | for attribute in attrs['$DATA']:
643 | ads_name = attribute['name']
644 | if ads_name and len(ads_name):
645 | part.add_file(NTFSFile(parsed, offset, ads=ads_name))
646 |
647 | def add_from_mft_mirror(self, part: NTFSPartition) -> None:
648 | """Fix the first file records using the MFT mirror."""
649 | img = DiskScanner.get_image(self)
650 | mirrpos = part.mftmirr_pos
651 | if mirrpos is None:
652 | return
653 |
654 | for i in range(4):
655 | node = part.get(i)
656 | if node is None or node.is_ghost:
657 | position = mirrpos + i * FILE_size
658 | dump = sectors(img, position, FILE_size)
659 | parsed = parse_file_record(dump)
660 | if parsed['valid'] and '$FILE_NAME' in parsed['attributes']:
661 | node = NTFSFile(parsed, position)
662 | part.add_file(node)
663 | logging.info(
664 | u'Repaired MFT entry #%s - %s in partition at offset '
665 | '%s from backup', node.index, node.name, part.offset
666 | )
667 |
668 | def finalize_reconstruction(self, part: NTFSPartition) -> None:
669 | """Finish information gathering from a file.
670 |
671 | This procedure requires that the beginning of the
672 | partition has already been discovered."""
673 | logging.info('Adding extra attributes from $ATTRIBUTE_LIST')
674 | # Select elements with many attributes
675 | many_attributes_it = (
676 | node for node in list(part.files.values())
677 | if node.offset in self.parsed_file_review and
678 | '$ATTRIBUTE_LIST' in
679 | self.parsed_file_review[node.offset]['attributes']
680 | )
681 | for node in many_attributes_it:
682 | parsed = self.parsed_file_review[node.offset]
683 | self.add_from_attribute_list(parsed, part, node.offset)
684 |
685 | logging.info('Adding ghost entries from $INDEX_ALLOCATION')
686 | # Select only elements with $INDEX_ALLOCATION
687 | allocation_it = (
688 | node for node in list(part.files.values())
689 | if node.offset in self.parsed_file_review and
690 | '$INDEX_ALLOCATION' in
691 | self.parsed_file_review[node.offset]['attributes']
692 | )
693 | for node in allocation_it:
694 | parsed = self.parsed_file_review[node.offset]
695 | self.add_from_indx_allocation(parsed, part)
696 |
697 | def get_partitions(self) -> Dict[int, NTFSPartition]:
698 | """Get a list of the found partitions."""
699 | partitioned_files: Dict[int, NTFSPartition] = {}
700 | img = DiskScanner.get_image(self)
701 |
702 | logging.info('Parsing MFT entries')
703 | for position in self.found_file:
704 | dump = sectors(img, position, FILE_size)
705 | parsed = parse_file_record(dump)
706 | attrs = parsed.get('attributes', {})
707 | if not parsed['valid'] or '$FILE_NAME' not in attrs:
708 | continue
709 |
710 | # Partition files based on corresponding entry 0
711 | if parsed['record_n'] is not None:
712 | offset = position - parsed['record_n'] * FILE_size
713 | try:
714 | part = partitioned_files[offset]
715 | except KeyError:
716 | partitioned_files[offset] = NTFSPartition(self, offset)
717 | part = partitioned_files[offset]
718 | attributes = parsed['attributes']
719 | if '$DATA' in attributes:
720 | for attribute in attributes['$DATA']:
721 | ads_name = attribute['name']
722 | if ads_name:
723 | part.add_file(NTFSFile(parsed, position, ads=ads_name))
724 | """Add the file again, just in case the $DATA attributes are
725 | missing."""
726 | part.add_file(NTFSFile(parsed, position))
727 |
728 | # Handle information deduced from INDX records
729 | if '$INDEX_ROOT' in attrs:
730 | self.add_from_indx_root(parsed, part)
731 | # Save for later use
732 | if '$INDEX_ALLOCATION' in attrs or '$ATTRIBUTE_LIST' in attrs:
733 | self.parsed_file_review[position] = parsed
734 | # TODO [Future] handle files for which there is no record_number
735 |
736 | # Parse INDX records
737 | logging.info('Parsing INDX records')
738 | for position in self.found_indx:
739 | dump = sectors(img, position, INDX_size)
740 | parsed = parse_indx_record(dump)
741 | if not parsed['valid']:
742 | continue
743 |
744 | entries = parsed['entries']
745 | referred = (el['file_info']['parent_entry'] for el in entries)
746 | record_n = Counter(referred).most_common(1)[0][0]
747 | # Save references for future access
748 | self.parsed_indx[position] = {
749 | 'parent': record_n,
750 | 'children': set(el['record_n'] for el in entries)
751 | }
752 |
753 | indx_info = self.parsed_indx
754 | self.indx_list = SparseList({
755 | pos: indx_info[pos]['parent'] for pos in indx_info
756 | })
757 |
758 | # Extract boot record information
759 | logging.info('Reading boot sectors')
760 | for index in self.found_boot:
761 | dump = sectors(img, index, 1)
762 | parsed = unpack(dump, boot_sector_fmt)
763 | sec_per_clus = parsed['sectors_per_cluster']
764 | self.found_spc.append(sec_per_clus)
765 | relative = parsed['MFT_addr'] * sec_per_clus
766 | mirr_relative = parsed['MFTmirr_addr'] * sec_per_clus
767 | part = None
768 | # Look for matching partition, either as boot sector or backup
769 | for delta in (0, parsed['sectors']):
770 | index = index - delta
771 | address = relative + index
772 | # Set partition as recoverable
773 | if address in partitioned_files:
774 | part = partitioned_files[address]
775 | part.set_recoverable(True)
776 | part.set_size(parsed['sectors'])
777 | part.offset = index
778 | part.sec_per_clus = sec_per_clus
779 | part.mftmirr_pos = mirr_relative + index
780 | break
781 |
782 | # Repair MFT if the mirror is available
783 | for address in list(partitioned_files):
784 | # This could have been deleted in a previous iteration
785 | if address not in partitioned_files:
786 | continue
787 | part = partitioned_files[address]
788 | mirrpos = part.mftmirr_pos
789 | if mirrpos is None:
790 | entry = part.get(1) # $MFTMirr
791 | if entry is None:
792 | continue
793 | else:
794 | # Infer MFT mirror position
795 | dump = sectors(img, entry.offset, FILE_size)
796 | mirror = parse_file_record(dump)
797 | if (mirror['valid'] and 'attributes' in mirror and
798 | '$DATA' in mirror['attributes']):
799 | datas = mirror['attributes']['$DATA']
800 | if (len(datas) == 1 and datas[0]['non_resident'] and
801 | 'runlist' in datas[0] and
802 | len(datas[0]['runlist']) > 0 and
803 | 'offset' in datas[0]['runlist'][0]):
804 | relative = datas[0]['runlist'][0]['offset']
805 | spc = part.sec_per_clus
806 | if spc is None:
807 | continue
808 | mirrpos = relative * spc + part.offset
809 | part.mftmirr_pos = mirrpos
810 |
811 | self.add_from_mft_mirror(part)
812 |
813 | # Remove bogus partitions generated by MFT mirrors
814 | if mirrpos in partitioned_files:
815 | bogus = partitioned_files[mirrpos]
816 | # Check if it looks like a MFT mirror
817 | if len(bogus.files) == 4 and max(bogus.files) < 4:
818 | logging.debug(
819 | 'Dropping bogus NTFS partition with MFT '
820 | 'position %d generated by MFT mirror of '
821 | 'partition at offset %d',
822 | bogus.mft_pos, part.offset
823 | )
824 | partitioned_files.pop(mirrpos)
825 |
826 | # Acquire additional information from $INDEX_ALLOCATION
827 | logging.info('Finding partition geometry')
828 | most_likely = self.most_likely_sec_per_clus()
829 | for address in partitioned_files:
830 | part = partitioned_files[address]
831 | if part.offset is None:
832 | # Find geometry by approximate string matching
833 | offset, sec_per_clus = self.find_boundary(
834 | part, address, most_likely
835 | )
836 | if offset is not None:
837 | part.set_recoverable(True)
838 | part.offset = offset
839 | part.sec_per_clus = sec_per_clus
840 | else:
841 | offset, sec_per_clus = part.offset, part.sec_per_clus
842 | if offset is not None:
843 | logging.info(
844 | 'Finalizing MFT reconstruction of partition at offset %i',
845 | offset
846 | )
847 | self.finalize_reconstruction(part)
848 |
849 | # Merge pieces from fragmented MFT
850 | for address in list(partitioned_files):
851 | # This could have been deleted in a previous iteration
852 | if address not in partitioned_files:
853 | continue
854 | part = partitioned_files[address]
855 | entry = part.get(0) # $MFT
856 | if entry is None or part.sec_per_clus is None:
857 | continue
858 | dump = sectors(img, entry.offset, FILE_size)
859 | parsed = parse_file_record(dump)
860 | if not parsed['valid'] or 'attributes' not in parsed:
861 | continue
862 |
863 | if '$ATTRIBUTE_LIST' in parsed['attributes']:
864 | _integrate_attribute_list(parsed, part, img)
865 | attrs = parsed['attributes']
866 | if '$DATA' not in attrs or len(attrs['$DATA']) < 1:
867 | continue
868 |
869 | if 'runlist' not in attrs['$DATA'][0]:
870 | continue
871 | runlist = attrs['$DATA'][0]['runlist']
872 | if len(runlist) > 1:
873 | logging.info(
874 | 'MFT for partition at offset %d is fragmented. Trying to '
875 | 'merge %d parts...', part.offset, len(runlist)
876 | )
877 | clusters_pos = runlist[0]['offset']
878 | spc = part.sec_per_clus
879 | size = runlist[0]['length']
880 | for entry in runlist[1:]:
881 | clusters_pos += entry['offset']
882 | real_pos = clusters_pos * part.sec_per_clus + part.offset
883 | position = real_pos - size*spc
884 | if position in partitioned_files:
885 | piece = partitioned_files[position]
886 | if piece.offset is None or piece.offset == part.offset:
887 | conflicts = [
888 | i for i in piece.files if
889 | not piece.files[i].is_ghost and
890 | i in part.files and
891 | not part.files[i].is_ghost
892 | ]
893 | if not len(conflicts):
894 | logging.debug(
895 | 'Merging partition with MFT offset %d into'
896 | ' %s (fragmented MFT)', piece.mft_pos, part
897 | )
898 | # Merge the partitions
899 | merge(part, piece)
900 | # Remove the fragment
901 | partitioned_files.pop(position)
902 | else:
903 | logging.debug(
904 | 'NOT merging partition with MFT offset %d into'
905 | ' %s (possible fragmented MFT) due to conflicts', piece.mft_pos, part
906 | )
907 | size += entry['length']
908 |
909 | return partitioned_files
910 |
--------------------------------------------------------------------------------
/recuperabit/fs/ntfs_fmt.py:
--------------------------------------------------------------------------------
1 | """NTFS format descriptors."""
2 |
3 | # RecuperaBit
4 | # Copyright 2014-2021 Andrea Lazzarotto
5 | #
6 | # This file is part of RecuperaBit.
7 | #
8 | # RecuperaBit is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # RecuperaBit is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with RecuperaBit. If not, see .
20 |
21 |
22 | from datetime import datetime, timezone, timedelta
23 |
24 | from ..utils import printable, unpack
25 |
26 |
27 | time_start = datetime(1601, 1, 1, tzinfo=timezone.utc)
28 |
29 | def printable_name(name):
30 | """Return a printable name decoded in UTF-16."""
31 | decoded = []
32 | parts = (name[i:i+2] for i in range(0, len(name), 2))
33 | for part in parts:
34 | try:
35 | decoded.append(part.decode('utf-16'))
36 | except UnicodeDecodeError:
37 | decoded.append('\x00')
38 | joined = ''.join(decoded)
39 | # basic check for false positives
40 | if '\x00\x00\x00' in joined:
41 | return None
42 | return printable(joined, '#')
43 |
44 |
45 | def windows_time(timestamp):
46 | """Convert a date-time value from Microsoft filetime to UTC."""
47 | try:
48 | value = int.from_bytes(timestamp, byteorder='little', signed=False)
49 | converted = time_start + timedelta(milliseconds = value//10000)
50 | return converted
51 | except (ValueError, OverflowError, OSError):
52 | return None
53 |
54 |
55 | def index_entries(dump):
56 | """Interpret the entries of an index."""
57 | offset = 0
58 | entries = []
59 | while offset < len(dump):
60 | parsed = unpack(dump[offset:], indx_dir_entry_fmt)
61 | filename = parsed['$FILE_NAME']
62 | entry_length = parsed['entry_length']
63 | valid_length = entry_length > 0
64 | has_name = 'name' in filename
65 | valid_name = has_name and len(filename['name']) > 0
66 | if valid_length and valid_name:
67 | if parsed['content_length']:
68 | entries.append(parsed)
69 | offset += entry_length
70 | else:
71 | break
72 | # Last entry
73 | if parsed['flags'] & 0x2:
74 | break
75 | # TODO handle carving of remnant entries in slack space
76 | return entries
77 |
78 |
79 | def index_root_parser(dump):
80 | """Parse the entries contained in a $INDEX_ROOT attribute."""
81 | header = unpack(dump, indx_header_fmt)
82 | offset = header['off_start_list']
83 | entries = index_entries(dump[offset:])
84 | return entries
85 |
86 |
87 | def runlist_unpack(runlist):
88 | """Parse an attribute runlist."""
89 | pieces = []
90 | while len(runlist) and runlist[0] != 0:
91 | off_bytes, len_bytes = divmod(runlist[0], 2**4)
92 | end = len_bytes + off_bytes
93 | decoded = unpack(runlist, [
94 | ('length', ('i', 1, len_bytes)),
95 | ('offset', ('+i', len_bytes + 1, end))
96 | ])
97 | if decoded['length'] is None or decoded['offset'] is None:
98 | break
99 | pieces.append(decoded)
100 | runlist = runlist[end+1:]
101 | return pieces
102 |
103 |
104 | def attribute_list_parser(dump):
105 | """Parse entries contained in a $ATTRIBUTE_LIST attribute."""
106 | content = []
107 | while len(dump):
108 | decoded = unpack(dump, [
109 | ('type', ('i', 0, 3)),
110 | ('length', ('i', 4, 5)),
111 | ('name_length', ('i', 6, 6)),
112 | ('name_off', ('i', 7, 7)),
113 | ('start_VCN', ('i', 8, 15)),
114 | ('file_ref', ('i', 16, 19)),
115 | ('id', ('i', 24, 24))
116 | ])
117 | length = decoded['length']
118 | # Check either if the length is 0 or if it is None
119 | if not length:
120 | break
121 | content.append(decoded)
122 | dump = dump[length:]
123 | return content
124 |
125 |
126 | def try_filename(dump):
127 | """Try to parse a $FILE_NAME attribute."""
128 | try:
129 | unpack(dump, attr_types_fmt['$FILE_NAME'])
130 | except TypeError: # Broken attribute
131 | return {}
132 |
133 | entry_fmt = [
134 | ('signature', ('s', 0, 3)),
135 | ('off_fixup', ('i', 4, 5)),
136 | ('n_entries', ('i', 6, 7)),
137 | ('LSN', ('i', 8, 15)),
138 | ('seq_val', ('i', 16, 17)),
139 | ('link_count', ('i', 18, 19)),
140 | ('off_first', ('i', 20, 21)),
141 | ('flags', ('i', 22, 23)),
142 | ('size_used', ('i', 24, 27)),
143 | ('size_alloc', ('i', 28, 31)),
144 | ('base_record', ('i', 32, 35)),
145 | ('record_n', ('i', 44, 47)) # Available only for NTFS >= 3.1
146 | ]
147 |
148 | boot_sector_fmt = [
149 | ('OEM_name', ('s', 3, 10)),
150 | ('bytes_per_sector', ('i', 11, 12)),
151 | ('sectors_per_cluster', ('i', 13, 13)),
152 | ('sectors', ('i', 40, 47)),
153 | ('MFT_addr', ('i', 48, 55)),
154 | ('MFTmirr_addr', ('i', 56, 63)),
155 | ('MFT_entry_size', ('i', 64, 64)),
156 | ('idx_size', ('i', 68, 68)),
157 | ('signature', ('s', 510, 511))
158 | ]
159 |
160 | indx_fmt = [
161 | ('signature', ('s', 0, 3)),
162 | ('off_fixup', ('i', 4, 5)),
163 | ('n_entries', ('i', 6, 7)),
164 | ('LSN', ('i', 8, 15)),
165 | ('seq_val', ('i', 16, 17))
166 | ]
167 |
168 | indx_header_fmt = [
169 | ('off_start_list', ('i', 0, 3)),
170 | ('off_end_list', ('i', 4, 7)),
171 | ('off_end_buffer', ('i', 8, 11)),
172 | ('flags', ('i', 12, 15))
173 | ]
174 |
175 | indx_dir_entry_fmt = [
176 | ('record_n', ('i', 0, 3)),
177 | ('entry_length', ('i', 8, 9)),
178 | ('content_length', ('i', 10, 11)),
179 | ('flags', ('i', 12, 15)),
180 | ('$FILE_NAME', (
181 | try_filename, 16, lambda r: 15 + (
182 | r['content_length'] if r['content_length'] is not None else 0
183 | )
184 | ))
185 | # The following is not very useful so it's not worth computing
186 | # 'VCN_child', (
187 | # lambda s: int(str(s[::-1]).encode('hex'),16) if len(s) else None,
188 | # lambda r: r['entry_length'] - (8 if r['flags'] & 0x1 else 0),
189 | # lambda r: r['entry_length']
190 | # )
191 | ]
192 |
193 | attr_header_fmt = [
194 | ('type', ('i', 0, 3)),
195 | ('length', ('i', 4, 7)),
196 | ('non_resident', ('i', 8, 8)),
197 | ('name_length', ('i', 9, 9)),
198 | ('name_off', ('i', 10, 11)),
199 | ('flags', ('i', 12, 13)),
200 | ('id', ('i', 14, 15)),
201 | ('name', (
202 | printable_name,
203 | lambda r: r['name_off'],
204 | lambda r: r['name_off'] + r['name_length']*2 - 1
205 | ))
206 | ]
207 |
208 | attr_resident_fmt = [
209 | ('content_size', ('i', 16, 19)),
210 | ('content_off', ('i', 20, 21))
211 | ]
212 |
213 | attr_nonresident_fmt = [
214 | ('start_VCN', ('i', 16, 23)),
215 | ('end_VCN', ('i', 24, 31)),
216 | ('runlist_offset', ('i', 32, 33)),
217 | ('compression_unit', ('i', 34, 35)),
218 | ('allocated_size', ('i', 40, 47)),
219 | ('real_size', ('i', 48, 55)),
220 | ('initialized_size', ('i', 56, 63)),
221 | ('runlist', (
222 | runlist_unpack,
223 | lambda r: r['runlist_offset'],
224 | lambda r: r['allocated_size']
225 | ))
226 | ]
227 |
228 | attr_names = {
229 | 16: '$STANDARD_INFORMATION',
230 | 32: '$ATTRIBUTE_LIST',
231 | 48: '$FILE_NAME',
232 | 80: '$SECURITY_DESCRIPTOR',
233 | 96: '$VOLUME_NAME',
234 | 112: '$VOLUME_INFORMATION',
235 | 128: '$DATA',
236 | 144: '$INDEX_ROOT',
237 | 160: '$INDEX_ALLOCATION',
238 | 176: '$BITMAP'
239 | }
240 |
241 | # This structure extracts only interesting attributes.
242 | attr_types_fmt = {
243 | '$STANDARD_INFORMATION': [
244 | ('creation_time', (windows_time, 0, 7)),
245 | ('modification_time', (windows_time, 8, 15)),
246 | ('MFT_modification_time', (windows_time, 16, 23)),
247 | ('access_time', (windows_time, 24, 31)),
248 | ('flags', ('i', 32, 35))
249 | ],
250 | '$ATTRIBUTE_LIST': [
251 | ('entries', (attribute_list_parser, 0, 1024))
252 | ],
253 | '$FILE_NAME': [
254 | ('parent_entry', ('i', 0, 5)),
255 | ('parent_seq', ('i', 6, 7)),
256 | ('creation_time', (windows_time, 8, 15)),
257 | ('modification_time', (windows_time, 16, 23)),
258 | ('MFT_modification_time', (windows_time, 24, 31)),
259 | ('access_time', (windows_time, 32, 39)),
260 | ('allocated_size', ('i', 40, 47)),
261 | ('real_size', ('i', 48, 55)),
262 | ('flags', ('i', 56, 59)),
263 | ('name_length', ('i', 64, 64)),
264 | ('namespace', ('i', 65, 65)),
265 | ('name', (printable_name, 66, lambda r: r['name_length']*2 + 65))
266 | ],
267 | '$INDEX_ROOT': [
268 | ('attr_type', ('i', 0, 3)),
269 | ('sorting_rule', ('i', 4, 7)),
270 | ('record_bytes', ('i', 8, 11)),
271 | ('record_clusters', ('i', 12, 12)),
272 | ('records', (index_root_parser, 16, lambda r: r['record_bytes']))
273 | ]
274 | }
275 |
--------------------------------------------------------------------------------
/recuperabit/logic.py:
--------------------------------------------------------------------------------
1 | """Filesystem-independent algorithmic logic."""
2 |
3 | # RecuperaBit
4 | # Copyright 2014-2021 Andrea Lazzarotto
5 | #
6 | # This file is part of RecuperaBit.
7 | #
8 | # RecuperaBit is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # RecuperaBit is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with RecuperaBit. If not, see .
20 |
21 |
22 | import bisect
23 | import codecs
24 | import logging
25 | import os
26 | import os.path
27 | import sys
28 | import time
29 | import types
30 | from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, Iterator, Set, Tuple, TypeVar, Generic
31 |
32 | T = TypeVar('T')
33 |
34 | if TYPE_CHECKING:
35 | from .fs.core_types import File, Partition
36 |
37 |
38 | class SparseList(Generic[T]):
39 | """List which only stores values at some places."""
40 | def __init__(self, data: Optional[Dict[int, T]] = None, default: Optional[T] = None) -> None:
41 | self.keys: List[int] = [] # This is always kept in order
42 | self.elements: Dict[int, T] = {}
43 | self.default: Optional[T] = default
44 | if data is not None:
45 | self.keys = sorted(data)
46 | self.elements.update(data)
47 |
48 | def __len__(self) -> int:
49 | try:
50 | return self.keys[-1] + 1
51 | except IndexError:
52 | return 0
53 |
54 | def __getitem__(self, index: int) -> Optional[T]:
55 | return self.elements.get(index, self.default)
56 |
57 | def __setitem__(self, index: int, item: T) -> None:
58 | if item == self.default:
59 | if index in self.elements:
60 | del self.elements[index]
61 | del self.keys[bisect.bisect_left(self.keys, index)]
62 | else:
63 | if index not in self.elements:
64 | bisect.insort(self.keys, index)
65 | self.elements[index] = item
66 |
67 | def __contains__(self, element: T) -> bool:
68 | return element in self.elements.values()
69 |
70 | def __iter__(self) -> Iterator[int]:
71 | return self.keys.__iter__()
72 |
73 | def __repr__(self) -> str:
74 | elems = []
75 | prevk = 0
76 | if len(self.elements) > 0:
77 | k = self.keys[0]
78 | elems.append(str(k) + ' -> ' + repr(self.elements[k]))
79 | prevk = self.keys[0]
80 | for i in range(1, len(self.elements)):
81 | nextk = self.keys[i]
82 | if nextk <= prevk + 2:
83 | while prevk < nextk - 1:
84 | elems.append('__')
85 | prevk += 1
86 | elems.append(repr(self.elements[nextk]))
87 | else:
88 | elems.append('\n... ' + str(nextk) + ' -> ' +
89 | repr(self.elements[nextk]))
90 | prevk = nextk
91 |
92 | return '[' + ', '.join(elems) + ']'
93 |
94 | def iterkeys(self) -> Iterator[int]:
95 | """An iterator over the keys of actual elements."""
96 | return self.__iter__()
97 |
98 | def iterkeys_rev(self) -> Iterator[int]:
99 | """An iterator over the keys of actual elements (reversed)."""
100 | i = len(self.keys)
101 | while i > 0:
102 | i -= 1
103 | yield self.keys[i]
104 |
105 | def itervalues(self) -> Iterator[T]:
106 | """An iterator over the elements."""
107 | for k in self.keys:
108 | yield self.elements[k]
109 |
110 | def wipe_interval(self, bottom: int, top: int) -> None:
111 | """Remove elements between bottom and top."""
112 | new_keys = set()
113 | if bottom > top:
114 | for k in self.keys:
115 | if top <= k < bottom:
116 | new_keys.add(k)
117 | else:
118 | del self.elements[k]
119 | else:
120 | for k in self.keys:
121 | if bottom <= k < top:
122 | del self.elements[k]
123 | else:
124 | new_keys.add(k)
125 | self.keys = sorted(new_keys)
126 |
127 |
128 | def preprocess_pattern(pattern: SparseList[T]) -> Dict[T, List[int]]:
129 | """Preprocess a SparseList for approximate string matching.
130 |
131 | This function performs preprocessing for the Baeza-Yates--Perleberg
132 | fast and practical approximate string matching algorithm."""
133 | result: Dict[T, List[int]] = {}
134 | length = pattern.__len__()
135 | for k in pattern:
136 | name = pattern[k]
137 | if name not in result:
138 | result[name] = [length-k-1]
139 | elif name != result[name][-1]:
140 | result[name].append(length-k-1)
141 | return result
142 |
143 |
144 | def approximate_matching(records: SparseList[T], pattern: SparseList[T], stop: int, k: int = 1) -> Optional[List[Union[Set[int], int, float]]]:
145 | """Find the best match for a given pattern.
146 |
147 | The Baeza-Yates--Perleberg algorithm requires a preprocessed pattern. This
148 | function takes as input a SparseList of records and pattern that will be
149 | preprocessed. The records in the SparseList should be formed by single
150 | elements. If they have another shape, e.g. tuples of the form
151 | (namespace, name), the get function can be used to tell the algorithm how
152 | to access them. k is the minimum value for support."""
153 |
154 | msize = pattern.__len__()
155 | if records.__len__() == 0 or msize == 0:
156 | return None
157 |
158 | lookup = preprocess_pattern(pattern)
159 | count: SparseList[int] = SparseList(default=0)
160 | match_offsets: Set[int] = set()
161 |
162 | i = 0
163 | j = 0 # previous value of i
164 |
165 | # logging.debug('Starting approximate matching up to %i', stop)
166 | # Loop only on indexes where there are elements
167 | for i in records:
168 | if i > stop+msize-1:
169 | break
170 |
171 | # zero-out the parts that were skipped
172 | count.wipe_interval(j % msize, i % msize)
173 | j = i
174 |
175 | offsets = set(lookup.get(records[i], []))
176 | for off in offsets:
177 | count[(i + off) % msize] += 1
178 | score = count[(i + off) % msize]
179 | if score == k:
180 | match_offsets.add(i+off-msize+1)
181 | if score > k:
182 | k = score
183 | match_offsets = set([i+off-msize+1])
184 |
185 | if len(match_offsets):
186 | logging.debug(
187 | 'Found MATCH in positions {} '
188 | 'with weight {} ({}%)'.format(
189 | match_offsets, k,
190 | k * 100.0 / len(pattern.keys)
191 | )
192 | )
193 | return [match_offsets, k, float(k) / len(pattern.keys)]
194 | else:
195 | # logging.debug('No match found')
196 | return None
197 |
198 |
199 | def makedirs(path: str) -> bool:
200 | """Make directories if they do not exist."""
201 | try:
202 | os.makedirs(path)
203 | except OSError:
204 | _, value, _ = sys.exc_info()
205 | # The directory already exists = no problem
206 | if value.errno != 17:
207 | logging.error(value)
208 | return False
209 | return True
210 |
211 |
212 | def recursive_restore(node: 'File', part: 'Partition', outputdir: str, make_dirs: bool = True) -> None:
213 | """Restore a directory structure starting from a file node."""
214 | parent_path = str(
215 | part[node.parent].full_path(part) if node.parent is not None
216 | else ''
217 | )
218 |
219 | file_path = os.path.join(parent_path, node.name)
220 | restore_parent_path = os.path.join(outputdir, parent_path)
221 | restore_path = os.path.join(outputdir, file_path)
222 |
223 | try:
224 | content = node.get_content(part)
225 | except NotImplementedError:
226 | logging.error(u'Restore of #%s %s is not supported', node.index,
227 | file_path)
228 | content = None
229 |
230 | if make_dirs:
231 | if not makedirs(restore_parent_path):
232 | return
233 |
234 | is_directory = node.is_directory or len(node.children) > 0
235 |
236 | if is_directory:
237 | logging.info(u'Restoring #%s %s', node.index, file_path)
238 | if not makedirs(restore_path):
239 | return
240 |
241 | if is_directory and content is not None:
242 | logging.warning(u'Directory %s has data content!', file_path)
243 | restore_path += '_recuperabit_content'
244 |
245 | try:
246 | if content is not None:
247 | logging.info(u'Restoring #%s %s', node.index, file_path)
248 | with codecs.open(restore_path, 'wb') as outfile:
249 | if isinstance(content, types.GeneratorType):
250 | for piece in content:
251 | outfile.write(piece)
252 | else:
253 | outfile.write(content)
254 | else:
255 | if not is_directory:
256 | # Empty file
257 | open(restore_path, 'wb').close()
258 | except IOError:
259 | logging.error(u'IOError when trying to create %s', restore_path)
260 |
261 | try:
262 | # Restore Modification + Access time
263 | mtime, atime, _ = node.get_mac()
264 | if mtime is not None:
265 | atime = time.mktime(atime.astimezone().timetuple())
266 | mtime = time.mktime(mtime.astimezone().timetuple())
267 | os.utime(restore_path, (atime, mtime))
268 | except IOError:
269 | logging.error(u'IOError while setting atime and mtime of %s', restore_path)
270 |
271 | if is_directory:
272 | for child in node.children:
273 | if not child.ignore():
274 | recursive_restore(child, part, outputdir, make_dirs=False)
275 | else:
276 | logging.info(u'Skipping ignored file {}'.format(child))
277 |
--------------------------------------------------------------------------------
/recuperabit/utils.py:
--------------------------------------------------------------------------------
1 | """Collection of utility functions."""
2 |
3 | # RecuperaBit
4 | # Copyright 2014-2021 Andrea Lazzarotto
5 | #
6 | # This file is part of RecuperaBit.
7 | #
8 | # RecuperaBit is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU General Public License as published by
10 | # the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # RecuperaBit is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU General Public License
19 | # along with RecuperaBit. If not, see .
20 |
21 |
22 | from datetime import datetime
23 | import logging
24 | import pprint
25 | import string
26 | import sys
27 | import time
28 | from typing import TYPE_CHECKING, Any, Iterable, Optional, List, Dict, Tuple, Union, Callable
29 | import unicodedata
30 | import io
31 |
32 | from .fs.constants import sector_size
33 |
34 | printer: pprint.PrettyPrinter = pprint.PrettyPrinter(indent=4)
35 | all_chars = (chr(i) for i in range(sys.maxunicode))
36 | unicode_printable: set[str] = set(
37 | c for c in all_chars
38 | if not unicodedata.category(c)[0].startswith('C')
39 | )
40 | ascii_printable: set[str] = set(string.printable[:-5])
41 |
42 | if TYPE_CHECKING:
43 | from .fs.core_types import File, Partition
44 |
45 |
46 | def sectors(image: io.BufferedReader, offset: int, size: int, bsize: int = sector_size, fill: bool = True) -> Optional[bytearray]:
47 | """Read from a file descriptor."""
48 | read = True
49 | try:
50 | image.seek(offset * bsize)
51 | except (IOError, OverflowError, ValueError):
52 | read = False
53 | if read:
54 | try:
55 | dump = image.read(size * bsize)
56 | except (IOError, MemoryError):
57 | logging.warning(
58 | "Cannot read sector(s). Filling with 0x00. Offset: {} Size: "
59 | "{} Bsize: {}".format(offset, size, bsize)
60 | )
61 | read = False
62 | if not read:
63 | if fill:
64 | dump = size * bsize * b'\x00'
65 | else:
66 | return None
67 | return bytearray(dump)
68 |
69 | def unixtime(dtime: Optional[datetime]) -> float:
70 | """Convert datetime to UNIX epoch."""
71 | if dtime is None:
72 | return 0.0
73 | try:
74 | return time.mktime(dtime.timetuple())
75 | except ValueError:
76 | return 0.0
77 |
78 |
79 | # format:
80 | # [(label, (formatter, lower, higher)), ...]
81 | def unpack(data: bytes, fmt: List[Tuple[str, Tuple[Union[str, Callable[[bytes], Any]], Union[int, Callable[[Dict[str, Any]], Optional[int]]], Union[int, Callable[[Dict[str, Any]], Optional[int]]]]]]) -> Dict[str, Any]:
82 | """Extract formatted information from a string of bytes."""
83 | result: Dict[str, Any] = {}
84 | for label, description in fmt:
85 | formatter, lower, higher = description
86 | # If lower is a function, then apply it
87 | low = lower(result) if callable(lower) else lower
88 | high = higher(result) if callable(higher) else higher
89 |
90 | if low is None or high is None:
91 | result[label] = None
92 | continue
93 |
94 | if callable(formatter):
95 | result[label] = formatter(data[low:high+1])
96 | else:
97 | if formatter == 's':
98 | result[label] = str(data[low:high+1])
99 | if formatter.startswith('utf'):
100 | result[label] = data[low:high+1].decode(formatter)
101 | if formatter.endswith('i') and len(formatter) < 4:
102 | # Use little-endian by default. Big-endian with >i.
103 | # Force sign-extension of first bit with >+i / +i.
104 | chunk = data[low:high+1]
105 |
106 | signed = False
107 | if '+' in formatter:
108 | signed = True
109 |
110 | byteorder = 'little'
111 | if formatter.startswith('>'):
112 | byteorder = 'big'
113 |
114 | if len(chunk):
115 | result[label] = int.from_bytes(chunk, byteorder=byteorder, signed=signed)
116 | else:
117 | result[label] = None
118 | return result
119 |
120 |
121 | def feed_all(image: io.BufferedReader, scanners: List[Any], indexes: Iterable[int]) -> List[int]:
122 | # Scan the disk image and feed the scanners
123 | interesting: List[int] = []
124 | for index in indexes:
125 | sector = sectors(image, index, 1, fill=False)
126 | if not sector:
127 | break
128 |
129 | for instance in scanners:
130 | res = instance.feed(index, sector)
131 | if res is not None:
132 | logging.info('Found {} at sector {}'.format(res, index))
133 | interesting.append(index)
134 | return interesting
135 |
136 |
137 | def printable(text: str, default: str = '.', alphabet: Optional[set[str]] = None) -> str:
138 | """Replace unprintable characters in a text with a default one."""
139 | if alphabet is None:
140 | alphabet = unicode_printable
141 | return ''.join((i if i in alphabet else default) for i in text)
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 | def readable_bytes(amount: Optional[int]) -> str:
150 | """Return a human readable string representing a size in bytes."""
151 | if amount is None:
152 | return '??? B'
153 | if amount < 1:
154 | return '%.2f B' % amount
155 | powers = {
156 | 0: '', 1: 'K', 2: 'M', 3: 'G', 4: 'T'
157 | }
158 | biggest = max(i for i in powers if amount / 1024.**i >= 1)
159 | scaled = amount / 1024.**biggest
160 | return '%.2f %sB' % (scaled, powers[biggest])
161 |
162 |
163 | def _file_tree_repr(node: 'File') -> str:
164 | """Give a nice representation for the tree."""
165 | desc = (
166 | ' [GHOST]' if node.is_ghost else
167 | ' [DELETED]' if node.is_deleted else ''
168 | )
169 | tail = '/' if node.is_directory else ''
170 | data = [
171 | ('Id', node.index),
172 | ('Offset', node.offset),
173 | (
174 | 'Offset bytes',
175 | node.offset * sector_size
176 | if node.offset is not None else None
177 | )
178 | # ('MAC', node.mac)
179 | ]
180 | if not node.is_directory:
181 | data += [('Size', readable_bytes(node.size))]
182 | return u'%s%s (%s) %s' % (
183 | node.name, tail, ', '.join(a + ': ' + str(b) for a, b in data), desc
184 | )
185 |
186 |
187 | def tree_folder(directory: 'File', padding: int = 0) -> str:
188 | """Return a tree-like textual representation of a directory."""
189 | lines: List[str] = []
190 | pad = ' ' * padding
191 | lines.append(
192 | pad + _file_tree_repr(directory)
193 | )
194 | padding = padding + 2
195 | pad = ' ' * padding
196 | for entry in directory.children:
197 | if len(entry.children) or entry.is_directory:
198 | lines.append(tree_folder(entry, padding))
199 | else:
200 | lines.append(
201 | pad + _file_tree_repr(entry)
202 | )
203 | return '\n'.join(lines)
204 |
205 |
206 | def _bodyfile_repr(node: 'File', path: str) -> str:
207 | """Return a body file line for node."""
208 | end = '/' if node.is_directory or len(node.children) else ''
209 | return '|'.join(str(el) for el in [
210 | '0', # MD5
211 | path + node.name + end, # name
212 | node.index, # inode
213 | '0', '0', '0', # mode, UID, GID
214 | node.size if node.size is not None else 0,
215 | unixtime(node.mac['access']),
216 | unixtime(node.mac['modification']),
217 | unixtime(node.mac['creation']),
218 | '0'
219 | ])
220 |
221 |
222 | def bodyfile_folder(directory: 'File', path: str = '') -> List[str]:
223 | """Create a body file compatible with TSK 3.x.
224 |
225 | Format:
226 | '#MD5|name|inode|mode_as_string|UID|GID|size|atime|mtime|ctime|crtime'
227 | See also: http://wiki.sleuthkit.org/index.php?title=Body_file"""
228 | lines: List[str] = [_bodyfile_repr(directory, path)]
229 | path += directory.name + '/'
230 | for entry in directory.children:
231 | if len(entry.children) or entry.is_directory:
232 | lines += bodyfile_folder(entry, path)
233 | else:
234 | lines.append(_bodyfile_repr(entry, path))
235 | return lines
236 |
237 |
238 | def _ltx_clean(label: Any) -> str:
239 | """Small filter to prepare strings to be included in LaTeX code."""
240 | clean = str(label).replace('$', r'\$').replace('_', r'\_')
241 | if clean[0] == '-':
242 | clean = r'\textminus{}' + clean[1:]
243 | return clean
244 |
245 |
246 | def _tikz_repr(node: 'File') -> str:
247 | """Represent the node for a Tikz diagram."""
248 | return r'node %s{%s\enskip{}%s}' % (
249 | '[ghost]' if node.is_ghost else '[deleted]' if node.is_deleted else '',
250 | _ltx_clean(node.index), _ltx_clean(node.name)
251 | )
252 |
253 |
254 | def tikz_child(directory: 'File', padding: int = 0) -> Tuple[str, int]:
255 | """Write a child row for Tikz representation."""
256 | pad = ' ' * padding
257 | lines: List[str] = [r'%schild {%s' % (pad, _tikz_repr(directory))]
258 | count: int = len(directory.children)
259 | for entry in directory.children:
260 | content, number = tikz_child(entry, padding+4)
261 | lines.append(content)
262 | count += number
263 | lines.append('}')
264 | for entry in range(count):
265 | lines.append('child [missing] {}')
266 | return '\n'.join(lines).replace('\n}', '}'), count
267 |
268 |
269 | def tikz_part(part: 'Partition') -> str:
270 | """Create LaTeX code to represent the directory structure as a nice Tikz
271 | diagram.
272 |
273 | See also: http://www.texample.net/tikz/examples/filesystem-tree/"""
274 |
275 | preamble = (r"""%\usepackage{tikz}
276 | %\usetikzlibrary{trees}""")
277 |
278 | begin_tree = r"""\begin{tikzpicture}[%
279 | grow via three points={one child at (1.75em,-1.75em) and
280 | two children at (1.75em,-1.75em) and (1.75em,-3.5em)},
281 | edge from parent path={(\tikzparentnode.south) |- (\tikzchildnode.west)}]
282 | \scriptsize
283 | """
284 | end_tree = r"""\end{tikzpicture}"""
285 |
286 | lines = [r'\node [root] {File System Structure}']
287 | lines += [tikz_child(entry, 4)[0] for entry in (part.root, part.lost)]
288 | lines.append(';')
289 |
290 | return '%s\n\n%s\n%s\n%s' % (
291 | preamble, begin_tree, '\n'.join(lines), end_tree
292 | )
293 |
294 |
295 | def csv_part(part: 'Partition') -> list[str]:
296 | """Provide a CSV representation for a partition."""
297 | contents = [
298 | ','.join(('Id', 'Parent', 'Name', 'Full Path', 'Modification Time',
299 | 'Access Time', 'Creation Time', 'Size (bytes)',
300 | 'Size (human)', 'Offset (bytes)', 'Offset (sectors)',
301 | 'Directory', 'Deleted', 'Ghost'))
302 | ]
303 | for index in part.files:
304 | obj = part.files[index]
305 | contents.append(
306 | u'%s,%s,"%s","%s",%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' % (
307 | obj.index, obj.parent, obj.name,
308 | obj.full_path(part),
309 | obj.mac['modification'], obj.mac['access'],
310 | obj.mac['creation'], obj.size,
311 | readable_bytes(obj.size),
312 | (obj.offset * sector_size
313 | if obj.offset is not None else None),
314 | obj.offset,
315 | '1' if obj.is_directory else '',
316 | '1' if obj.is_deleted else '',
317 | '1' if obj.is_ghost else ''
318 | )
319 | )
320 | return contents
321 |
322 |
323 | def _sub_locate(text: str, directory: 'File', part: 'Partition') -> List[Tuple['File', str]]:
324 | """Helper for locate."""
325 | lines: List[Tuple['File', str]] = []
326 | for entry in sorted(directory.children, key=lambda node: node.name):
327 | path = entry.full_path(part)
328 | if text in path.lower():
329 | lines.append((entry, path))
330 | if len(entry.children) or entry.is_directory:
331 | lines += _sub_locate(text, entry, part)
332 | return lines
333 |
334 |
335 | def locate(part: 'Partition', text: str) -> List[Tuple['File', str]]:
336 | """Return paths of files matching the text."""
337 | lines: List[Tuple['File', str]] = []
338 | text = text.lower()
339 | lines += _sub_locate(text, part.lost, part)
340 | lines += _sub_locate(text, part.root, part)
341 | return lines
342 |
343 |
344 | def merge(part: 'Partition', piece: 'Partition') -> None:
345 | """Merge piece into part (both are partitions)."""
346 | for index in piece.files:
347 | if (
348 | index not in part.files or
349 | part.files[index].is_ghost
350 | ):
351 | part.add_file(piece.files[index])
352 |
--------------------------------------------------------------------------------