├── .github
    └── workflows
    │   └── build.yml
├── CMakeLists.txt
├── LICENCE
├── README.md
├── ntfs2btrfs.8.in
└── src
    ├── blake2-impl.h
    ├── blake2b-ref.c
    ├── btrfs.h
    ├── compress.cpp
    ├── config.h.in
    ├── crc32c-gas.S
    ├── crc32c-masm.asm
    ├── crc32c.c
    ├── crc32c.h
    ├── decomp.cpp
    ├── ebiggers
        ├── aligned_malloc.c
        ├── common_defs.h
        ├── decompress_common.c
        ├── decompress_common.h
        ├── lzx_common.c
        ├── lzx_common.h
        ├── lzx_constants.h
        ├── lzx_decompress.c
        ├── system_compression.h
        ├── xpress_constants.h
        └── xpress_decompress.c
    ├── ntfs.cpp
    ├── ntfs.h
    ├── ntfs2btrfs.cpp
    ├── ntfs2btrfs.h
    ├── rollback.cpp
    ├── sha256.c
    ├── xxhash.c
    └── xxhash.h


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build x86_64-pc-linux-gnu
 2 | on: [push]
 3 | jobs:
 4 |   x86_64-pc-linux-gnu:
 5 |     runs-on: ubuntu-rolling
 6 |     steps:
 7 |       - run: apt-get update
 8 |       - run: apt-get install -y g++ git cmake nodejs pkg-config libfmt-dev liblzo2-dev libzstd-dev zlib1g-dev
 9 |       - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV
10 |       - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA}
11 |       - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }}
12 |       - run: mkdir -p debug-work
13 |       - run: mkdir -p release-work
14 |       - run: |
15 |           cmake -DCMAKE_BUILD_TYPE=Debug \
16 |             -DCMAKE_INSTALL_PREFIX=${PWD}/install/debug \
17 |             -DCMAKE_INSTALL_INCLUDEDIR=../include \
18 |             -DWITH_OPENSSL=ON -DENABLE_KRB5=ON \
19 |             -S ${SHORT_SHA} -B debug-work && \
20 |           cmake --build debug-work --parallel `nproc` && \
21 |           cmake --install debug-work
22 |       - run: |
23 |           cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
24 |             -DCMAKE_INSTALL_PREFIX=${PWD}/install \
25 |             -DWITH_OPENSSL=ON -DENABLE_KRB5=ON \
26 |             -S ${SHORT_SHA} -B release-work && \
27 |           cmake --build release-work --parallel `nproc` && \
28 |           cmake --install release-work
29 |       - uses: actions/upload-artifact@v3
30 |         with:
31 |           name: ${{ github.sha }}
32 |           overwrite: true
33 |           path: |
34 |             install
35 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14.3)
 2 | 
 3 | cmake_policy(SET CMP0091 NEW)
 4 | set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
 5 | 
 6 | project(ntfs2btrfs VERSION 20240115)
 7 | 
 8 | include(GNUInstallDirs)
 9 | 
10 | option(WITH_ZLIB "Include zlib support" ON)
11 | option(WITH_LZO "Include lzo support" ON)
12 | option(WITH_ZSTD "Include zstd support" ON)
13 | 
14 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
15 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ntfs2btrfs.8.in ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8)
16 | 
17 | set(CMAKE_CXX_STANDARD 20)
18 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
19 | 
20 | find_package(fmt REQUIRED)
21 | find_package(PkgConfig REQUIRED)
22 | 
23 | if(WITH_ZLIB)
24 |     find_package(ZLIB REQUIRED)
25 | endif()
26 | 
27 | if(WITH_LZO)
28 |     pkg_check_modules(LZO REQUIRED lzo2)
29 | endif()
30 | 
31 | if(WITH_ZSTD)
32 |     pkg_check_modules(ZSTD REQUIRED libzstd)
33 | endif()
34 | 
35 | set(SRC_FILES src/ntfs2btrfs.cpp
36 |     src/ntfs.cpp
37 |     src/decomp.cpp
38 |     src/compress.cpp
39 |     src/rollback.cpp
40 |     src/crc32c.c
41 |     src/xxhash.c
42 |     src/sha256.c
43 |     src/blake2b-ref.c
44 |     src/ebiggers/lzx_decompress.c
45 |     src/ebiggers/lzx_common.c
46 |     src/ebiggers/aligned_malloc.c
47 |     src/ebiggers/decompress_common.c
48 |     src/ebiggers/xpress_decompress.c)
49 | 
50 | if(MSVC)
51 |     enable_language(ASM_MASM)
52 |     set(SRC_FILES ${SRC_FILES} src/crc32c-masm.asm)
53 | else()
54 |     enable_language(ASM)
55 |     set(SRC_FILES ${SRC_FILES} src/crc32c-gas.S)
56 | endif()
57 | 
58 | add_executable(ntfs2btrfs ${SRC_FILES})
59 | 
60 | if(CMAKE_BUILD_TYPE MATCHES "Debug")
61 |     add_definitions(-D_GLIBCXX_DEBUG)
62 | endif()
63 | 
64 | target_link_libraries(ntfs2btrfs fmt::fmt-header-only)
65 | 
66 | if(WITH_ZLIB)
67 |     target_link_libraries(ntfs2btrfs ZLIB::ZLIB)
68 | endif()
69 | 
70 | if(WITH_LZO)
71 |     target_link_libraries(ntfs2btrfs ${LZO_LINK_LIBRARIES})
72 | endif()
73 | 
74 | if(WITH_ZSTD)
75 |     target_link_libraries(ntfs2btrfs ${ZSTD_LINK_LIBRARIES})
76 | endif()
77 | 
78 | include_directories(${CMAKE_CURRENT_BINARY_DIR})
79 | 
80 | # Work around bug in MSVC version of cmake - see https://gitlab.kitware.com/cmake/cmake/-/merge_requests/4257
81 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreaded         "")
82 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDLL      "")
83 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebug    "")
84 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebugDLL "")
85 | 
86 | if(MSVC)
87 |     target_compile_options(ntfs2btrfs PRIVATE /W4)
88 | else()
89 |     target_compile_options(ntfs2btrfs PRIVATE -Wall -Wextra -Wno-address-of-packed-member -Wconversion -Wno-unknown-pragmas -Werror=pointer-arith)
90 | endif()
91 | 
92 | install(TARGETS ntfs2btrfs DESTINATION ${CMAKE_INSTALL_SBINDIR})
93 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8 DESTINATION ${CMAKE_INSTALL_MANDIR}/man8)
94 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Ntfs2btrfs
  2 | ==========
  3 | 
  4 | Ntfs2btrfs is a tool which does in-place conversion of Microsoft's NTFS
  5 | filesystem to the open-source filesystem Btrfs, much as `btrfs-convert`
  6 | does for ext2. The original image is saved as a reflink copy at
  7 | `image/ntfs.img`, and if you want to keep the conversion you can delete
  8 | this to free up space.
  9 | 
 10 | Although I believe this tool to be stable, please note that I take no
 11 | responsibility if something goes awry!
 12 | 
 13 | You're probably also interested in [WinBtrfs](https://github.com/maharmstone/btrfs),
 14 | which is a Btrfs filesystem driver for Windows.
 15 | 
 16 | Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's
 17 | "WOF compressed data", and whose code I've used here.
 18 | 
 19 | Usage
 20 | -----
 21 | 
 22 | On Windows, from an Administrator command prompt:
 23 | 
 24 | `ntfs2btrfs.exe D:\`
 25 | 
 26 | Bear in mind that it won't work with your boot drive or a drive containing a
 27 | pagefile that's currently in use.
 28 | 
 29 | If you are using WinBtrfs, you will need to clear the readonly flag on the
 30 | `image` subvolume before you can delete it.
 31 | 
 32 | On Linux, as root:
 33 | 
 34 | `ntfs2btrfs /dev/sda1`
 35 | 
 36 | Installation
 37 | ------------
 38 | 
 39 | On Windows, go to the [Releases page](https://github.com/maharmstone/ntfs2btrfs/releases) and
 40 | download the latest Zip file, or use [Scoop](https://github.com/ScoopInstaller/Main/blob/master/bucket/ntfs2btrfs.json).
 41 | 
 42 | For Linux:
 43 | * [Arch](https://aur.archlinux.org/packages/ntfs2btrfs)
 44 | * [Fedora](https://src.fedoraproject.org/rpms/ntfs2btrfs) (thanks to [Conan-Kudo](https://github.com/Conan-Kudo))
 45 | * Gentoo - available as sys-fs/ntfs2btrfs in the guru repository
 46 | * [Debian](https://packages.debian.org/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko))
 47 | * [Ubuntu](https://packages.ubuntu.com/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko))
 48 | * [openSUSE](https://build.opensuse.org/package/show/filesystems/ntfs2btrfs) (thanks to David Sterba)
 49 | 
 50 | For other distributions or operating systems, you will need to compile it yourself - see
 51 | below.
 52 | 
 53 | Changelog
 54 | ---------
 55 | 
 56 | * 20240115
 57 |   * Fixed compilation on GCC 14 (`-Werror=incompatible-pointer-types` now enabled by default)
 58 | 
 59 | * 20230501
 60 |   * Fixed inline extent items being written out of order (not diagnosed by `btrfs check`)
 61 |   * Fixed metadata items being written with wrong level value (not diagnosed by `btrfs check`)
 62 |   * ADSes with overly-long names now get skipped
 63 | 
 64 | * 20220812
 65 |   * Added --no-datasum option, to skip calculating checksums
 66 |   * LXSS / WSL metadata is now preserved
 67 |   * Fixed lowercase drive letters not being recognized
 68 |   * Fixed crash due to iterator invalidation (thanks to nyanpasu64)
 69 |   * Fixed corruption when NTFS places file in last megabyte of disk
 70 | 
 71 | * 20210923
 72 |   * Added (Btrfs) compression support (zlib, lzo, and zstd)
 73 |   * Added support for other hash algorithms: xxhash, sha256, and blake2
 74 |   * Added support for rolling back to NTFS
 75 |   * Added support for NT4-style security descriptors
 76 |   * Increased conversion speed for volume with many inodes
 77 |   * Fixed bug when fragmented file was in superblock location
 78 |   * Fixed buffer overflow when reading security descriptors
 79 |   * Fixed bug where filesystems would be corrupted in a way that `btrfs check` doesn't pick up
 80 | 
 81 | * 20210523
 82 |   * Improved handling of large compressed files
 83 | 
 84 | * 20210402 (source code only release)
 85 |   * Fixes for compilation on non-amd64 architectures
 86 | 
 87 | * 20210105
 88 |   * Added support for NTFS compression
 89 |   * Added support for "WOF compressed data"
 90 |   * Fixed problems caused by sparse files
 91 |   * Miscellaneous bug fixes
 92 | 
 93 | * 20201108
 94 |   * Improved error handling
 95 |   * Added better message if NTFS is corrupted or unclean
 96 |   * Better handling of relocations
 97 | 
 98 | * 20200330
 99 |   * Initial release
100 | 
101 | Compilation
102 | -----------
103 | 
104 | On Windows, open the source directory in a recent version of MSVC, right-click
105 | on CMakeLists.txt, and click Compile.
106 | 
107 | On Linux:
108 | 
109 |     mkdir build
110 |     cd build
111 |     cmake ..
112 |     make
113 | 
114 | You'll also need [libfmt](https://github.com/fmtlib/fmt) installed - it should be
115 | in your package manager.
116 | 
117 | Compression support requires zlib, lzo, and/or zstd - again, they will be in your
118 | package manager. See also the cmake options WITH_ZLIB, WITH_LZO, and WITH_ZSTD,
119 | if you want to disable this.
120 | 
121 | What works
122 | ----------
123 | 
124 | * Files
125 | * Directories
126 | * Symlinks
127 | * Other reparse points
128 | * Security descriptors
129 | * Alternate data streams
130 | * DOS attributes (hidden, system, etc.)
131 | * Rollback to original NTFS image
132 | * Preservation of LXSS metadata
133 | 
134 | What doesn't work
135 | -----------------
136 | 
137 | * Windows' old extended attributes (you're not using these)
138 | * Large (i.e >16KB) ADSes (you're not using these either)
139 | * Preservation of the case-sensitivity flag
140 | * Unusual cluster sizes (i.e. not 4 KB)
141 | * Encrypted files
142 | 
143 | Can I boot Windows from Btrfs with this?
144 | ----------------------------------------
145 | 
146 | Yes, if the stars are right. See [Quibble](https://github.com/maharmstone/quibble).
147 | 


--------------------------------------------------------------------------------
/ntfs2btrfs.8.in:
--------------------------------------------------------------------------------
 1 | .TH NTFS2BTRFS "8" "January 2024" "ntfs2btrfs @PROJECT_VERSION@" "System Administration"
 2 | .SH NAME
 3 | ntfs2btrfs \- convert ntfs filesystem to btrfs filesystem
 4 | .SH SYNOPSIS
 5 | \fBntfs2btrfs\fR [options] \fIdevice\fR
 6 | .SH DESCRIPTION
 7 | This is a tool which does in-place conversion of Microsoft's NTFS filesystem
 8 | to the open-source filesystem Btrfs, much as \fBbtrfs\-convert\fR does for ext2.
 9 | .SH OPTIONS
10 | .PP
11 | -c \fI<ALGO>\fR, --compress=\fI<ALGO>\fR
12 | .RS 4
13 | Uses the specified algorithm to recompress files that are compressed on the
14 | NTFS volume; valid choices are \fIzstd\fR, \fIlzo\fR, \fIzlib\fR, or \fInone\fR.
15 | If you don't specify any value, \fIzstd\fR will be used, assuming it's been
16 | compiled in. Note that this will be ignored if you also select --no-datasum (see
17 | below).
18 | .RE
19 | .PP
20 | -h \fI<ALGO>\fR, --hash=\fI<ALGO>\fR
21 | .RS 4
22 | Uses the specified checksumming algorithm; valid choices are \fIcrc32c\fR,
23 | \fIxxhash\fR, \fIsha256\fR, and \fIblake2\fR. The first of these will be used by
24 | default, and should be fine for most purposes.
25 | .RE
26 | .PP
27 | -r, --rollback
28 | .RS 4
29 | Tries to restore the original NTFS filesystem. See \fBROLLBACK\fR below.
30 | .RE
31 | .PP
32 | -d, --no-datasum
33 | .RS 4
34 | Skips calculating checksums for existing data. Don't choose this unless you're
35 | sure it's what you want.
36 | .RE
37 | .SH ROLLBACK
38 | The original filesystem image is saved as \fIimage/ntfs.img\fR as a reflink copy. You
39 | can restore this at any time by using the rollback option, provided that you've
40 | not moved the data by doing a balance. Bear in mind that this restores the volume
41 | to how it was when you did the conversion, meaning that any changes you've made
42 | since will be lost.
43 | .PP
44 | If you decide to keep the conversion, you can remove the \fIimage\fR subvolume at
45 | any point to free up space.
46 | .SH XATTRS
47 | Various bits of NTFS-specific data are stored as Btrfs xattrs, in a manner that
48 | the Windows btrfs driver understands (\fBhttps://github.com/maharmstone/btrfs\fR). Some
49 | should also be understood by tools such as Wine and Samba, but YMMV.
50 | .IP \[bu] 2
51 | The NTFS attribute value is stored as a hex string at \fIuser.DOSATTRIB\fR.
52 | .IP \[bu] 2
53 | The reparse points on directories are stored at \fIuser.reparse\fR. NTFS symlinks should
54 | be converted into POSIX symlinks. The data for other reparse points will be stored as
55 | the contents of the files.
56 | .IP \[bu] 2
57 | The NT security descriptor is stored as \fIsecurity.NTACL\fR.
58 | .IP \[bu] 2
59 | Alternate data streams on files are stored in the \fIuser\fR namespace, e.g. \fI:Zone.Identifier\fR
60 | becomes \fIuser.Zone.Identifier\fR.
61 | .SH SEE ALSO
62 | .BR btrfs (8),
63 | .BR mkfs.btrfs (8).
64 | .SH AUTHOR
65 | Written by Mark Harmstone (\fBmark@harmstone.com\fR).
66 | 
67 | .SH WEB
68 | .IP https://github.com/maharmstone/ntfs2btrfs
69 | 


--------------------------------------------------------------------------------
/src/blake2-impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |    BLAKE2 reference source code package - reference C implementations
  3 | 
  4 |    Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
  5 |    terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
  6 |    your option.  The terms of these licenses can be found at:
  7 | 
  8 |    - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
  9 |    - OpenSSL license   : https://www.openssl.org/source/license.html
 10 |    - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |    More information about the BLAKE2 hash function can be found at
 13 |    https://blake2.net.
 14 | */
 15 | #pragma once
 16 | 
 17 | #include <stdint.h>
 18 | #include <string.h>
 19 | 
 20 | #define NATIVE_LITTLE_ENDIAN
 21 | 
 22 | #if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
 23 |   #if   defined(_MSC_VER)
 24 |     #define BLAKE2_INLINE __inline
 25 |   #elif defined(__GNUC__)
 26 |     #define BLAKE2_INLINE __inline__
 27 |   #else
 28 |     #define BLAKE2_INLINE
 29 |   #endif
 30 | #else
 31 |   #define BLAKE2_INLINE inline
 32 | #endif
 33 | 
 34 | static BLAKE2_INLINE uint32_t load32( const void *src )
 35 | {
 36 | #if defined(NATIVE_LITTLE_ENDIAN)
 37 |   uint32_t w;
 38 |   memcpy(&w, src, sizeof w);
 39 |   return w;
 40 | #else
 41 |   const uint8_t *p = ( const uint8_t * )src;
 42 |   return (( uint32_t )( p[0] ) <<  0) |
 43 |          (( uint32_t )( p[1] ) <<  8) |
 44 |          (( uint32_t )( p[2] ) << 16) |
 45 |          (( uint32_t )( p[3] ) << 24) ;
 46 | #endif
 47 | }
 48 | 
 49 | static BLAKE2_INLINE uint64_t load64( const void *src )
 50 | {
 51 | #if defined(NATIVE_LITTLE_ENDIAN)
 52 |   uint64_t w;
 53 |   memcpy(&w, src, sizeof w);
 54 |   return w;
 55 | #else
 56 |   const uint8_t *p = ( const uint8_t * )src;
 57 |   return (( uint64_t )( p[0] ) <<  0) |
 58 |          (( uint64_t )( p[1] ) <<  8) |
 59 |          (( uint64_t )( p[2] ) << 16) |
 60 |          (( uint64_t )( p[3] ) << 24) |
 61 |          (( uint64_t )( p[4] ) << 32) |
 62 |          (( uint64_t )( p[5] ) << 40) |
 63 |          (( uint64_t )( p[6] ) << 48) |
 64 |          (( uint64_t )( p[7] ) << 56) ;
 65 | #endif
 66 | }
 67 | 
 68 | static BLAKE2_INLINE uint16_t load16( const void *src )
 69 | {
 70 | #if defined(NATIVE_LITTLE_ENDIAN)
 71 |   uint16_t w;
 72 |   memcpy(&w, src, sizeof w);
 73 |   return w;
 74 | #else
 75 |   const uint8_t *p = ( const uint8_t * )src;
 76 |   return ( uint16_t )((( uint32_t )( p[0] ) <<  0) |
 77 |                       (( uint32_t )( p[1] ) <<  8));
 78 | #endif
 79 | }
 80 | 
 81 | static BLAKE2_INLINE void store16( void *dst, uint16_t w )
 82 | {
 83 | #if defined(NATIVE_LITTLE_ENDIAN)
 84 |   memcpy(dst, &w, sizeof w);
 85 | #else
 86 |   uint8_t *p = ( uint8_t * )dst;
 87 |   *p++ = ( uint8_t )w; w >>= 8;
 88 |   *p++ = ( uint8_t )w;
 89 | #endif
 90 | }
 91 | 
 92 | static BLAKE2_INLINE void store32( void *dst, uint32_t w )
 93 | {
 94 | #if defined(NATIVE_LITTLE_ENDIAN)
 95 |   memcpy(dst, &w, sizeof w);
 96 | #else
 97 |   uint8_t *p = ( uint8_t * )dst;
 98 |   p[0] = (uint8_t)(w >>  0);
 99 |   p[1] = (uint8_t)(w >>  8);
100 |   p[2] = (uint8_t)(w >> 16);
101 |   p[3] = (uint8_t)(w >> 24);
102 | #endif
103 | }
104 | 
105 | static BLAKE2_INLINE void store64( void *dst, uint64_t w )
106 | {
107 | #if defined(NATIVE_LITTLE_ENDIAN)
108 |   memcpy(dst, &w, sizeof w);
109 | #else
110 |   uint8_t *p = ( uint8_t * )dst;
111 |   p[0] = (uint8_t)(w >>  0);
112 |   p[1] = (uint8_t)(w >>  8);
113 |   p[2] = (uint8_t)(w >> 16);
114 |   p[3] = (uint8_t)(w >> 24);
115 |   p[4] = (uint8_t)(w >> 32);
116 |   p[5] = (uint8_t)(w >> 40);
117 |   p[6] = (uint8_t)(w >> 48);
118 |   p[7] = (uint8_t)(w >> 56);
119 | #endif
120 | }
121 | 
122 | static BLAKE2_INLINE uint64_t load48( const void *src )
123 | {
124 |   const uint8_t *p = ( const uint8_t * )src;
125 |   return (( uint64_t )( p[0] ) <<  0) |
126 |          (( uint64_t )( p[1] ) <<  8) |
127 |          (( uint64_t )( p[2] ) << 16) |
128 |          (( uint64_t )( p[3] ) << 24) |
129 |          (( uint64_t )( p[4] ) << 32) |
130 |          (( uint64_t )( p[5] ) << 40) ;
131 | }
132 | 
133 | static BLAKE2_INLINE void store48( void *dst, uint64_t w )
134 | {
135 |   uint8_t *p = ( uint8_t * )dst;
136 |   p[0] = (uint8_t)(w >>  0);
137 |   p[1] = (uint8_t)(w >>  8);
138 |   p[2] = (uint8_t)(w >> 16);
139 |   p[3] = (uint8_t)(w >> 24);
140 |   p[4] = (uint8_t)(w >> 32);
141 |   p[5] = (uint8_t)(w >> 40);
142 | }
143 | 
144 | static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
145 | {
146 |   return ( w >> c ) | ( w << ( 32 - c ) );
147 | }
148 | 
149 | static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
150 | {
151 |   return ( w >> c ) | ( w << ( 64 - c ) );
152 | }
153 | 
154 | #if defined(_MSC_VER)
155 | #define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop))
156 | #else
157 | #define BLAKE2_PACKED(x) x __attribute__((packed))
158 | #endif
159 | 
160 | enum blake2b_constant
161 | {
162 |     BLAKE2B_BLOCKBYTES = 128,
163 |     BLAKE2B_OUTBYTES   = 64,
164 |     BLAKE2B_KEYBYTES   = 64,
165 |     BLAKE2B_SALTBYTES  = 16,
166 |     BLAKE2B_PERSONALBYTES = 16
167 | };
168 | 
169 | typedef struct blake2b_state__
170 | {
171 |     uint64_t h[8];
172 |     uint64_t t[2];
173 |     uint64_t f[2];
174 |     uint8_t  buf[BLAKE2B_BLOCKBYTES];
175 |     size_t   buflen;
176 |     size_t   outlen;
177 |     uint8_t  last_node;
178 | } blake2b_state;
179 | 
180 | BLAKE2_PACKED(struct blake2b_param__
181 | {
182 |     uint8_t  digest_length; /* 1 */
183 |     uint8_t  key_length;    /* 2 */
184 |     uint8_t  fanout;        /* 3 */
185 |     uint8_t  depth;         /* 4 */
186 |     uint32_t leaf_length;   /* 8 */
187 |     uint32_t node_offset;   /* 12 */
188 |     uint32_t xof_length;    /* 16 */
189 |     uint8_t  node_depth;    /* 17 */
190 |     uint8_t  inner_length;  /* 18 */
191 |     uint8_t  reserved[14];  /* 32 */
192 |     uint8_t  salt[BLAKE2B_SALTBYTES]; /* 48 */
193 |     uint8_t  personal[BLAKE2B_PERSONALBYTES];  /* 64 */
194 | });
195 | 
196 | typedef struct blake2b_param__ blake2b_param;
197 | 


--------------------------------------------------------------------------------
/src/blake2b-ref.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    BLAKE2 reference source code package - reference C implementations
  3 | 
  4 |    Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
  5 |    terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
  6 |    your option.  The terms of these licenses can be found at:
  7 | 
  8 |    - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
  9 |    - OpenSSL license   : https://www.openssl.org/source/license.html
 10 |    - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
 11 | 
 12 |    More information about the BLAKE2 hash function can be found at
 13 |    https://blake2.net.
 14 | */
 15 | 
 16 | #include <stdint.h>
 17 | #include <string.h>
 18 | #include <stdio.h>
 19 | 
 20 | #include "blake2-impl.h"
 21 | 
 22 | static const uint64_t blake2b_IV[8] =
 23 | {
 24 |   0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
 25 |   0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
 26 |   0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
 27 |   0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
 28 | };
 29 | 
 30 | static const uint8_t blake2b_sigma[12][16] =
 31 | {
 32 |   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
 33 |   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
 34 |   { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
 35 |   {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
 36 |   {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
 37 |   {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
 38 |   { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
 39 |   { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
 40 |   {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
 41 |   { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
 42 |   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
 43 |   { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
 44 | };
 45 | 
 46 | static int blake2b_update(blake2b_state* S, const void* in, size_t inlen);
 47 | 
 48 | static void blake2b_set_lastnode( blake2b_state *S )
 49 | {
 50 |   S->f[1] = (uint64_t)-1;
 51 | }
 52 | 
 53 | /* Some helper functions, not necessarily useful */
 54 | static int blake2b_is_lastblock( const blake2b_state *S )
 55 | {
 56 |   return S->f[0] != 0;
 57 | }
 58 | 
 59 | static void blake2b_set_lastblock( blake2b_state *S )
 60 | {
 61 |   if( S->last_node ) blake2b_set_lastnode( S );
 62 | 
 63 |   S->f[0] = (uint64_t)-1;
 64 | }
 65 | 
 66 | static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
 67 | {
 68 |   S->t[0] += inc;
 69 |   S->t[1] += ( S->t[0] < inc );
 70 | }
 71 | 
 72 | static void blake2b_init0( blake2b_state *S )
 73 | {
 74 |   size_t i;
 75 |   memset( S, 0, sizeof( blake2b_state ) );
 76 | 
 77 |   for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
 78 | }
 79 | 
 80 | /* init xors IV with input parameter block */
 81 | static void blake2b_init_param( blake2b_state *S, const blake2b_param *P )
 82 | {
 83 |   const uint8_t *p = ( const uint8_t * )( P );
 84 |   size_t i;
 85 | 
 86 |   blake2b_init0( S );
 87 | 
 88 |   /* IV XOR ParamBlock */
 89 |   for( i = 0; i < 8; ++i )
 90 |     S->h[i] ^= load64( p + sizeof( S->h[i] ) * i );
 91 | 
 92 |   S->outlen = P->digest_length;
 93 | }
 94 | 
 95 | 
 96 | 
 97 | static void blake2b_init( blake2b_state *S, size_t outlen )
 98 | {
 99 |   blake2b_param P[1];
100 | 
101 |   P->digest_length = (uint8_t)outlen;
102 |   P->key_length    = 0;
103 |   P->fanout        = 1;
104 |   P->depth         = 1;
105 |   store32( &P->leaf_length, 0 );
106 |   store32( &P->node_offset, 0 );
107 |   store32( &P->xof_length, 0 );
108 |   P->node_depth    = 0;
109 |   P->inner_length  = 0;
110 |   memset( P->reserved, 0, sizeof( P->reserved ) );
111 |   memset( P->salt,     0, sizeof( P->salt ) );
112 |   memset( P->personal, 0, sizeof( P->personal ) );
113 | 
114 |   blake2b_init_param( S, P );
115 | }
116 | 
117 | #define G(r,i,a,b,c,d)                      \
118 |   do {                                      \
119 |     a = a + b + m[blake2b_sigma[r][2*i+0]]; \
120 |     d = rotr64(d ^ a, 32);                  \
121 |     c = c + d;                              \
122 |     b = rotr64(b ^ c, 24);                  \
123 |     a = a + b + m[blake2b_sigma[r][2*i+1]]; \
124 |     d = rotr64(d ^ a, 16);                  \
125 |     c = c + d;                              \
126 |     b = rotr64(b ^ c, 63);                  \
127 |   } while(0)
128 | 
129 | #define ROUND(r)                    \
130 |   do {                              \
131 |     G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
132 |     G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
133 |     G(r,2,v[ 2],v[ 6],v[10],v[14]); \
134 |     G(r,3,v[ 3],v[ 7],v[11],v[15]); \
135 |     G(r,4,v[ 0],v[ 5],v[10],v[15]); \
136 |     G(r,5,v[ 1],v[ 6],v[11],v[12]); \
137 |     G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
138 |     G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
139 |   } while(0)
140 | 
141 | static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
142 | {
143 |   uint64_t m[16];
144 |   uint64_t v[16];
145 |   size_t i;
146 | 
147 |   for( i = 0; i < 16; ++i ) {
148 |     m[i] = load64( block + i * sizeof( m[i] ) );
149 |   }
150 | 
151 |   for( i = 0; i < 8; ++i ) {
152 |     v[i] = S->h[i];
153 |   }
154 | 
155 |   v[ 8] = blake2b_IV[0];
156 |   v[ 9] = blake2b_IV[1];
157 |   v[10] = blake2b_IV[2];
158 |   v[11] = blake2b_IV[3];
159 |   v[12] = blake2b_IV[4] ^ S->t[0];
160 |   v[13] = blake2b_IV[5] ^ S->t[1];
161 |   v[14] = blake2b_IV[6] ^ S->f[0];
162 |   v[15] = blake2b_IV[7] ^ S->f[1];
163 | 
164 |   ROUND( 0 );
165 |   ROUND( 1 );
166 |   ROUND( 2 );
167 |   ROUND( 3 );
168 |   ROUND( 4 );
169 |   ROUND( 5 );
170 |   ROUND( 6 );
171 |   ROUND( 7 );
172 |   ROUND( 8 );
173 |   ROUND( 9 );
174 |   ROUND( 10 );
175 |   ROUND( 11 );
176 | 
177 |   for( i = 0; i < 8; ++i ) {
178 |     S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
179 |   }
180 | }
181 | 
182 | #undef G
183 | #undef ROUND
184 | 
185 | static int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
186 | {
187 |   const unsigned char * in = (const unsigned char *)pin;
188 |   if( inlen > 0 )
189 |   {
190 |     size_t left = S->buflen;
191 |     size_t fill = BLAKE2B_BLOCKBYTES - left;
192 |     if( inlen > fill )
193 |     {
194 |       S->buflen = 0;
195 |       memcpy( S->buf + left, in, fill ); /* Fill buffer */
196 |       blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
197 |       blake2b_compress( S, S->buf ); /* Compress */
198 |       in += fill; inlen -= fill;
199 |       while(inlen > BLAKE2B_BLOCKBYTES) {
200 |         blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
201 |         blake2b_compress( S, in );
202 |         in += BLAKE2B_BLOCKBYTES;
203 |         inlen -= BLAKE2B_BLOCKBYTES;
204 |       }
205 |     }
206 |     memcpy( S->buf + S->buflen, in, inlen );
207 |     S->buflen += inlen;
208 |   }
209 |   return 0;
210 | }
211 | 
212 | static int blake2b_final( blake2b_state *S, void *out, size_t outlen )
213 | {
214 |   uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
215 |   size_t i;
216 | 
217 |   if( out == NULL || outlen < S->outlen )
218 |     return -1;
219 | 
220 |   if( blake2b_is_lastblock( S ) )
221 |     return -1;
222 | 
223 |   blake2b_increment_counter( S, S->buflen );
224 |   blake2b_set_lastblock( S );
225 |   memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
226 |   blake2b_compress( S, S->buf );
227 | 
228 |   for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
229 |     store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
230 | 
231 |   memcpy( out, buffer, S->outlen );
232 | 
233 |   return 0;
234 | }
235 | 
236 | /* inlen, at least, should be uint64_t. Others can be size_t. */
237 | void blake2b( void *out, size_t outlen, const void *in, size_t inlen )
238 | {
239 |   blake2b_state S[1];
240 | 
241 |   blake2b_init( S, outlen );
242 | 
243 |   blake2b_update( S, ( const uint8_t * )in, inlen );
244 |   blake2b_final( S, out, outlen );
245 | }
246 | 


--------------------------------------------------------------------------------
/src/btrfs.h:
--------------------------------------------------------------------------------
  1 | /* btrfs.h
  2 |  * Generic btrfs header file. Thanks to whoever it was who wrote
  3 |  * https://btrfs.wiki.kernel.org/index.php/On-disk_Format - you saved me a lot of time!
  4 |  *
  5 |  * I release this file, and this file only, into the public domain - do whatever
  6 |  * you want with it. You don't have to, but I'd appreciate if you let me know if you
  7 |  * use it anything cool - mark@harmstone.com. */
  8 | 
  9 | #pragma once
 10 | 
 11 | #include <stdint.h>
 12 | 
 13 | static const uint64_t superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4000000000000, 0 };
 14 | 
 15 | #define BTRFS_MAGIC         0x4d5f53665248425f
 16 | #define MAX_LABEL_SIZE      0x100
 17 | #define SUBVOL_ROOT_INODE   0x100
 18 | 
 19 | enum class btrfs_key_type : uint8_t {
 20 |     INODE_ITEM = 0x01,
 21 |     INODE_REF = 0x0C,
 22 |     INODE_EXTREF = 0x0D,
 23 |     XATTR_ITEM = 0x18,
 24 |     ORPHAN_INODE = 0x30,
 25 |     DIR_ITEM = 0x54,
 26 |     DIR_INDEX = 0x60,
 27 |     EXTENT_DATA = 0x6C,
 28 |     EXTENT_CSUM = 0x80,
 29 |     ROOT_ITEM = 0x84,
 30 |     ROOT_BACKREF = 0x90,
 31 |     ROOT_REF = 0x9C,
 32 |     EXTENT_ITEM = 0xA8,
 33 |     METADATA_ITEM = 0xA9,
 34 |     TREE_BLOCK_REF = 0xB0,
 35 |     EXTENT_DATA_REF = 0xB2,
 36 |     EXTENT_REF_V0 = 0xB4,
 37 |     SHARED_BLOCK_REF = 0xB6,
 38 |     SHARED_DATA_REF = 0xB8,
 39 |     BLOCK_GROUP_ITEM = 0xC0,
 40 |     FREE_SPACE_INFO = 0xC6,
 41 |     FREE_SPACE_EXTENT = 0xC7,
 42 |     FREE_SPACE_BITMAP = 0xC8,
 43 |     DEV_EXTENT = 0xCC,
 44 |     DEV_ITEM = 0xD8,
 45 |     CHUNK_ITEM = 0xE4,
 46 |     TEMP_ITEM = 0xF8,
 47 |     DEV_STATS = 0xF9,
 48 |     SUBVOL_UUID = 0xFB,
 49 |     SUBVOL_REC_UUID = 0xFC
 50 | };
 51 | 
 52 | #define BTRFS_ROOT_ROOT         1
 53 | #define BTRFS_ROOT_EXTENT       2
 54 | #define BTRFS_ROOT_CHUNK        3
 55 | #define BTRFS_ROOT_DEVTREE      4
 56 | #define BTRFS_ROOT_FSTREE       5
 57 | #define BTRFS_ROOT_TREEDIR      6
 58 | #define BTRFS_ROOT_CHECKSUM     7
 59 | #define BTRFS_ROOT_UUID         9
 60 | #define BTRFS_ROOT_FREE_SPACE   0xa
 61 | #define BTRFS_ROOT_DATA_RELOC   0xFFFFFFFFFFFFFFF7
 62 | 
 63 | enum class btrfs_compression : uint8_t {
 64 |     none = 0,
 65 |     zlib = 1,
 66 |     lzo = 2,
 67 |     zstd = 3
 68 | };
 69 | 
 70 | #define BTRFS_ENCRYPTION_NONE   0
 71 | 
 72 | #define BTRFS_ENCODING_NONE     0
 73 | 
 74 | enum class btrfs_extent_type : uint8_t {
 75 |     inline_extent = 0,
 76 |     regular = 1,
 77 |     prealloc = 2
 78 | };
 79 | 
 80 | #define BLOCK_FLAG_DATA         0x001
 81 | #define BLOCK_FLAG_SYSTEM       0x002
 82 | #define BLOCK_FLAG_METADATA     0x004
 83 | #define BLOCK_FLAG_RAID0        0x008
 84 | #define BLOCK_FLAG_RAID1        0x010
 85 | #define BLOCK_FLAG_DUPLICATE    0x020
 86 | #define BLOCK_FLAG_RAID10       0x040
 87 | #define BLOCK_FLAG_RAID5        0x080
 88 | #define BLOCK_FLAG_RAID6        0x100
 89 | #define BLOCK_FLAG_RAID1C3      0x200
 90 | #define BLOCK_FLAG_RAID1C4      0x400
 91 | 
 92 | #define FREE_SPACE_CACHE_ID     0xFFFFFFFFFFFFFFF5
 93 | #define EXTENT_CSUM_ID          0xFFFFFFFFFFFFFFF6
 94 | #define BALANCE_ITEM_ID         0xFFFFFFFFFFFFFFFC
 95 | 
 96 | #define BTRFS_INODE_NODATASUM   0x001
 97 | #define BTRFS_INODE_NODATACOW   0x002
 98 | #define BTRFS_INODE_READONLY    0x004
 99 | #define BTRFS_INODE_NOCOMPRESS  0x008
100 | #define BTRFS_INODE_PREALLOC    0x010
101 | #define BTRFS_INODE_SYNC        0x020
102 | #define BTRFS_INODE_IMMUTABLE   0x040
103 | #define BTRFS_INODE_APPEND      0x080
104 | #define BTRFS_INODE_NODUMP      0x100
105 | #define BTRFS_INODE_NOATIME     0x200
106 | #define BTRFS_INODE_DIRSYNC     0x400
107 | #define BTRFS_INODE_COMPRESS    0x800
108 | 
109 | #define BTRFS_SUBVOL_READONLY   0x1
110 | 
111 | #define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE          0x1
112 | #define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID    0x2
113 | 
114 | #define BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF      0x0001
115 | #define BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL     0x0002
116 | #define BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS       0x0004
117 | #define BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO       0x0008
118 | #define BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD      0x0010
119 | #define BTRFS_INCOMPAT_FLAGS_BIG_METADATA       0x0020
120 | #define BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF      0x0040
121 | #define BTRFS_INCOMPAT_FLAGS_RAID56             0x0080
122 | #define BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA    0x0100
123 | #define BTRFS_INCOMPAT_FLAGS_NO_HOLES           0x0200
124 | #define BTRFS_INCOMPAT_FLAGS_METADATA_UUID      0x0400
125 | #define BTRFS_INCOMPAT_FLAGS_RAID1C34           0x0800
126 | 
127 | #define BTRFS_SUPERBLOCK_FLAGS_SEEDING   0x100000000
128 | 
129 | #define BTRFS_ORPHAN_INODE_OBJID         0xFFFFFFFFFFFFFFFB
130 | 
131 | enum class btrfs_csum_type : uint16_t {
132 |     crc32c = 0,
133 |     xxhash = 1,
134 |     sha256 = 2,
135 |     blake2 = 3
136 | };
137 | 
138 | #pragma pack(push, 1)
139 | 
140 | typedef struct {
141 |     uint8_t uuid[16];
142 | } BTRFS_UUID;
143 | 
144 | typedef struct {
145 |     uint64_t obj_id;
146 |     btrfs_key_type obj_type;
147 |     uint64_t offset;
148 | } KEY;
149 | 
150 | #define HEADER_FLAG_WRITTEN         0x000000000000001
151 | #define HEADER_FLAG_SHARED_BACKREF  0x000000000000002
152 | #define HEADER_FLAG_MIXED_BACKREF   0x100000000000000
153 | 
154 | typedef struct {
155 |     uint8_t csum[32];
156 |     BTRFS_UUID fs_uuid;
157 |     uint64_t address;
158 |     uint64_t flags;
159 |     BTRFS_UUID chunk_tree_uuid;
160 |     uint64_t generation;
161 |     uint64_t tree_id;
162 |     uint32_t num_items;
163 |     uint8_t level;
164 | } tree_header;
165 | 
166 | typedef struct {
167 |     KEY key;
168 |     uint32_t offset;
169 |     uint32_t size;
170 | } leaf_node;
171 | 
172 | typedef struct {
173 |     KEY key;
174 |     uint64_t address;
175 |     uint64_t generation;
176 | } internal_node;
177 | 
178 | typedef struct {
179 |     uint64_t dev_id;
180 |     uint64_t num_bytes;
181 |     uint64_t bytes_used;
182 |     uint32_t optimal_io_align;
183 |     uint32_t optimal_io_width;
184 |     uint32_t minimal_io_size;
185 |     uint64_t type;
186 |     uint64_t generation;
187 |     uint64_t start_offset;
188 |     uint32_t dev_group;
189 |     uint8_t seek_speed;
190 |     uint8_t bandwidth;
191 |     BTRFS_UUID device_uuid;
192 |     BTRFS_UUID fs_uuid;
193 | } DEV_ITEM;
194 | 
195 | #define SYS_CHUNK_ARRAY_SIZE 0x800
196 | #define BTRFS_NUM_BACKUP_ROOTS 4
197 | 
198 | typedef struct {
199 |     uint64_t root_tree_addr;
200 |     uint64_t root_tree_generation;
201 |     uint64_t chunk_tree_addr;
202 |     uint64_t chunk_tree_generation;
203 |     uint64_t extent_tree_addr;
204 |     uint64_t extent_tree_generation;
205 |     uint64_t fs_tree_addr;
206 |     uint64_t fs_tree_generation;
207 |     uint64_t dev_root_addr;
208 |     uint64_t dev_root_generation;
209 |     uint64_t csum_root_addr;
210 |     uint64_t csum_root_generation;
211 |     uint64_t total_bytes;
212 |     uint64_t bytes_used;
213 |     uint64_t num_devices;
214 |     uint64_t reserved[4];
215 |     uint8_t root_level;
216 |     uint8_t chunk_root_level;
217 |     uint8_t extent_root_level;
218 |     uint8_t fs_root_level;
219 |     uint8_t dev_root_level;
220 |     uint8_t csum_root_level;
221 |     uint8_t reserved2[10];
222 | } superblock_backup;
223 | 
224 | typedef struct {
225 |     uint8_t checksum[32];
226 |     BTRFS_UUID uuid;
227 |     uint64_t sb_phys_addr;
228 |     uint64_t flags;
229 |     uint64_t magic;
230 |     uint64_t generation;
231 |     uint64_t root_tree_addr;
232 |     uint64_t chunk_tree_addr;
233 |     uint64_t log_tree_addr;
234 |     uint64_t log_root_transid;
235 |     uint64_t total_bytes;
236 |     uint64_t bytes_used;
237 |     uint64_t root_dir_objectid;
238 |     uint64_t num_devices;
239 |     uint32_t sector_size;
240 |     uint32_t node_size;
241 |     uint32_t leaf_size;
242 |     uint32_t stripe_size;
243 |     uint32_t n;
244 |     uint64_t chunk_root_generation;
245 |     uint64_t compat_flags;
246 |     uint64_t compat_ro_flags;
247 |     uint64_t incompat_flags;
248 |     enum btrfs_csum_type csum_type;
249 |     uint8_t root_level;
250 |     uint8_t chunk_root_level;
251 |     uint8_t log_root_level;
252 |     DEV_ITEM dev_item;
253 |     char label[MAX_LABEL_SIZE];
254 |     uint64_t cache_generation;
255 |     uint64_t uuid_tree_generation;
256 |     uint64_t reserved[30];
257 |     uint8_t sys_chunk_array[SYS_CHUNK_ARRAY_SIZE];
258 |     superblock_backup backup[BTRFS_NUM_BACKUP_ROOTS];
259 |     uint8_t reserved2[565];
260 | } superblock;
261 | 
262 | enum class btrfs_inode_type : uint8_t {
263 |     unknown = 0,
264 |     file = 1,
265 |     directory = 2,
266 |     chardev = 3,
267 |     blockdev = 4,
268 |     fifo = 5,
269 |     socket = 6,
270 |     symlink = 7,
271 |     ea = 8
272 | };
273 | 
274 | typedef struct {
275 |     KEY key;
276 |     uint64_t transid;
277 |     uint16_t m;
278 |     uint16_t n;
279 |     enum btrfs_inode_type type;
280 |     char name[1];
281 | } DIR_ITEM;
282 | 
283 | typedef struct {
284 |     uint64_t seconds;
285 |     uint32_t nanoseconds;
286 | } BTRFS_TIME;
287 | 
288 | typedef struct {
289 |     uint64_t generation;
290 |     uint64_t transid;
291 |     uint64_t st_size;
292 |     uint64_t st_blocks;
293 |     uint64_t block_group;
294 |     uint32_t st_nlink;
295 |     uint32_t st_uid;
296 |     uint32_t st_gid;
297 |     uint32_t st_mode;
298 |     uint64_t st_rdev;
299 |     uint64_t flags;
300 |     uint64_t sequence;
301 |     uint8_t reserved[32];
302 |     BTRFS_TIME st_atime;
303 |     BTRFS_TIME st_ctime;
304 |     BTRFS_TIME st_mtime;
305 |     BTRFS_TIME otime;
306 | } INODE_ITEM;
307 | 
308 | typedef struct {
309 |     INODE_ITEM inode;
310 |     uint64_t generation;
311 |     uint64_t objid;
312 |     uint64_t block_number;
313 |     uint64_t byte_limit;
314 |     uint64_t bytes_used;
315 |     uint64_t last_snapshot_generation;
316 |     uint64_t flags;
317 |     uint32_t num_references;
318 |     KEY drop_progress;
319 |     uint8_t drop_level;
320 |     uint8_t root_level;
321 |     uint64_t generation2;
322 |     BTRFS_UUID uuid;
323 |     BTRFS_UUID parent_uuid;
324 |     BTRFS_UUID received_uuid;
325 |     uint64_t ctransid;
326 |     uint64_t otransid;
327 |     uint64_t stransid;
328 |     uint64_t rtransid;
329 |     BTRFS_TIME ctime;
330 |     BTRFS_TIME otime;
331 |     BTRFS_TIME stime;
332 |     BTRFS_TIME rtime;
333 |     uint64_t reserved[8];
334 | } ROOT_ITEM;
335 | 
336 | typedef struct {
337 |     uint64_t size;
338 |     uint64_t root_id;
339 |     uint64_t stripe_length;
340 |     uint64_t type;
341 |     uint32_t opt_io_alignment;
342 |     uint32_t opt_io_width;
343 |     uint32_t sector_size;
344 |     uint16_t num_stripes;
345 |     uint16_t sub_stripes;
346 | } CHUNK_ITEM;
347 | 
348 | typedef struct {
349 |     uint64_t dev_id;
350 |     uint64_t offset;
351 |     BTRFS_UUID dev_uuid;
352 | } CHUNK_ITEM_STRIPE;
353 | 
354 | typedef struct {
355 |     uint64_t generation;
356 |     uint64_t decoded_size;
357 |     enum btrfs_compression compression;
358 |     uint8_t encryption;
359 |     uint16_t encoding;
360 |     enum btrfs_extent_type type;
361 |     uint8_t data[1];
362 | } EXTENT_DATA;
363 | 
364 | typedef struct {
365 |     uint64_t address;
366 |     uint64_t size;
367 |     uint64_t offset;
368 |     uint64_t num_bytes;
369 | } EXTENT_DATA2;
370 | 
371 | typedef struct {
372 |     uint64_t index;
373 |     uint16_t n;
374 |     char name[1];
375 | } INODE_REF;
376 | 
377 | typedef struct {
378 |     uint64_t dir;
379 |     uint64_t index;
380 |     uint16_t n;
381 |     char name[1];
382 | } INODE_EXTREF;
383 | 
384 | #define EXTENT_ITEM_DATA            0x001
385 | #define EXTENT_ITEM_TREE_BLOCK      0x002
386 | #define EXTENT_ITEM_SHARED_BACKREFS 0x100
387 | 
388 | typedef struct {
389 |     uint64_t refcount;
390 |     uint64_t generation;
391 |     uint64_t flags;
392 | } EXTENT_ITEM;
393 | 
394 | typedef struct {
395 |     KEY firstitem;
396 |     uint8_t level;
397 | } EXTENT_ITEM2;
398 | 
399 | typedef struct {
400 |     uint32_t refcount;
401 | } EXTENT_ITEM_V0;
402 | 
403 | typedef struct {
404 |     EXTENT_ITEM extent_item;
405 |     KEY firstitem;
406 |     uint8_t level;
407 | } EXTENT_ITEM_TREE;
408 | 
409 | typedef struct {
410 |     uint64_t offset;
411 | } TREE_BLOCK_REF;
412 | 
413 | typedef struct {
414 |     uint64_t root;
415 |     uint64_t objid;
416 |     uint64_t offset;
417 |     uint32_t count;
418 | } EXTENT_DATA_REF;
419 | 
420 | typedef struct {
421 |     uint64_t used;
422 |     uint64_t chunk_tree;
423 |     uint64_t flags;
424 | } BLOCK_GROUP_ITEM;
425 | 
426 | typedef struct {
427 |     uint64_t root;
428 |     uint64_t gen;
429 |     uint64_t objid;
430 |     uint32_t count;
431 | } EXTENT_REF_V0;
432 | 
433 | typedef struct {
434 |     uint64_t offset;
435 | } SHARED_BLOCK_REF;
436 | 
437 | typedef struct {
438 |     uint64_t offset;
439 |     uint32_t count;
440 | } SHARED_DATA_REF;
441 | 
442 | static const uint8_t FREE_SPACE_EXTENT = 1;
443 | static const uint8_t FREE_SPACE_BITMAP = 2;
444 | 
445 | typedef struct {
446 |     uint64_t offset;
447 |     uint64_t size;
448 |     uint8_t type;
449 | } FREE_SPACE_ENTRY;
450 | 
451 | typedef struct {
452 |     KEY key;
453 |     uint64_t generation;
454 |     uint64_t num_entries;
455 |     uint64_t num_bitmaps;
456 | } FREE_SPACE_ITEM;
457 | 
458 | typedef struct {
459 |     uint64_t dir;
460 |     uint64_t index;
461 |     uint16_t n;
462 |     char name[1];
463 | } ROOT_REF;
464 | 
465 | typedef struct {
466 |     uint64_t chunktree;
467 |     uint64_t objid;
468 |     uint64_t address;
469 |     uint64_t length;
470 |     BTRFS_UUID chunktree_uuid;
471 | } DEV_EXTENT;
472 | 
473 | #define BALANCE_FLAGS_DATA          0x1
474 | #define BALANCE_FLAGS_SYSTEM        0x2
475 | #define BALANCE_FLAGS_METADATA      0x4
476 | 
477 | #define BALANCE_ARGS_FLAGS_PROFILES         0x001
478 | #define BALANCE_ARGS_FLAGS_USAGE            0x002
479 | #define BALANCE_ARGS_FLAGS_DEVID            0x004
480 | #define BALANCE_ARGS_FLAGS_DRANGE           0x008
481 | #define BALANCE_ARGS_FLAGS_VRANGE           0x010
482 | #define BALANCE_ARGS_FLAGS_LIMIT            0x020
483 | #define BALANCE_ARGS_FLAGS_LIMIT_RANGE      0x040
484 | #define BALANCE_ARGS_FLAGS_STRIPES_RANGE    0x080
485 | #define BALANCE_ARGS_FLAGS_CONVERT          0x100
486 | #define BALANCE_ARGS_FLAGS_SOFT             0x200
487 | #define BALANCE_ARGS_FLAGS_USAGE_RANGE      0x400
488 | 
489 | typedef struct {
490 |     uint64_t profiles;
491 | 
492 |     union {
493 |             uint64_t usage;
494 |             struct {
495 |                     uint32_t usage_start;
496 |                     uint32_t usage_end;
497 |             } s;
498 |     } u1;
499 | 
500 |     uint64_t devid;
501 |     uint64_t drange_start;
502 |     uint64_t drange_end;
503 |     uint64_t vrange_start;
504 |     uint64_t vrange_end;
505 |     uint64_t convert;
506 |     uint64_t flags;
507 | 
508 |     union {
509 |             uint64_t limit;
510 |             struct {
511 |                     uint32_t limit_start;
512 |                     uint32_t limit_end;
513 |             } s;
514 |     } u2;
515 | 
516 |     uint32_t stripes_start;
517 |     uint32_t stripes_end;
518 |     uint8_t reserved[48];
519 | } BALANCE_ARGS;
520 | 
521 | typedef struct {
522 |     uint64_t flags;
523 |     BALANCE_ARGS data;
524 |     BALANCE_ARGS metadata;
525 |     BALANCE_ARGS system;
526 |     uint8_t reserved[32];
527 | } BALANCE_ITEM;
528 | 
529 | #define BTRFS_FREE_SPACE_USING_BITMAPS      1
530 | 
531 | typedef struct {
532 |     uint32_t count;
533 |     uint32_t flags;
534 | } FREE_SPACE_INFO;
535 | 
536 | #define BTRFS_DEV_STAT_WRITE_ERRORS          0
537 | #define BTRFS_DEV_STAT_READ_ERRORS           1
538 | #define BTRFS_DEV_STAT_FLUSH_ERRORS          2
539 | #define BTRFS_DEV_STAT_CORRUPTION_ERRORS     3
540 | #define BTRFS_DEV_STAT_GENERATION_ERRORS     4
541 | 
542 | #define BTRFS_SEND_CMD_SUBVOL          1
543 | #define BTRFS_SEND_CMD_SNAPSHOT        2
544 | #define BTRFS_SEND_CMD_MKFILE          3
545 | #define BTRFS_SEND_CMD_MKDIR           4
546 | #define BTRFS_SEND_CMD_MKNOD           5
547 | #define BTRFS_SEND_CMD_MKFIFO          6
548 | #define BTRFS_SEND_CMD_MKSOCK          7
549 | #define BTRFS_SEND_CMD_SYMLINK         8
550 | #define BTRFS_SEND_CMD_RENAME          9
551 | #define BTRFS_SEND_CMD_LINK           10
552 | #define BTRFS_SEND_CMD_UNLINK         11
553 | #define BTRFS_SEND_CMD_RMDIR          12
554 | #define BTRFS_SEND_CMD_SET_XATTR      13
555 | #define BTRFS_SEND_CMD_REMOVE_XATTR   14
556 | #define BTRFS_SEND_CMD_WRITE          15
557 | #define BTRFS_SEND_CMD_CLONE          16
558 | #define BTRFS_SEND_CMD_TRUNCATE       17
559 | #define BTRFS_SEND_CMD_CHMOD          18
560 | #define BTRFS_SEND_CMD_CHOWN          19
561 | #define BTRFS_SEND_CMD_UTIMES         20
562 | #define BTRFS_SEND_CMD_END            21
563 | #define BTRFS_SEND_CMD_UPDATE_EXTENT  22
564 | 
565 | #define BTRFS_SEND_TLV_UUID             1
566 | #define BTRFS_SEND_TLV_TRANSID          2
567 | #define BTRFS_SEND_TLV_INODE            3
568 | #define BTRFS_SEND_TLV_SIZE             4
569 | #define BTRFS_SEND_TLV_MODE             5
570 | #define BTRFS_SEND_TLV_UID              6
571 | #define BTRFS_SEND_TLV_GID              7
572 | #define BTRFS_SEND_TLV_RDEV             8
573 | #define BTRFS_SEND_TLV_CTIME            9
574 | #define BTRFS_SEND_TLV_MTIME           10
575 | #define BTRFS_SEND_TLV_ATIME           11
576 | #define BTRFS_SEND_TLV_OTIME           12
577 | #define BTRFS_SEND_TLV_XATTR_NAME      13
578 | #define BTRFS_SEND_TLV_XATTR_DATA      14
579 | #define BTRFS_SEND_TLV_PATH            15
580 | #define BTRFS_SEND_TLV_PATH_TO         16
581 | #define BTRFS_SEND_TLV_PATH_LINK       17
582 | #define BTRFS_SEND_TLV_OFFSET          18
583 | #define BTRFS_SEND_TLV_DATA            19
584 | #define BTRFS_SEND_TLV_CLONE_UUID      20
585 | #define BTRFS_SEND_TLV_CLONE_CTRANSID  21
586 | #define BTRFS_SEND_TLV_CLONE_PATH      22
587 | #define BTRFS_SEND_TLV_CLONE_OFFSET    23
588 | #define BTRFS_SEND_TLV_CLONE_LENGTH    24
589 | 
590 | #define BTRFS_SEND_MAGIC "btrfs-stream"
591 | 
592 | typedef struct {
593 |     uint8_t magic[13];
594 |     uint32_t version;
595 | } btrfs_send_header;
596 | 
597 | typedef struct {
598 |     uint32_t length;
599 |     uint16_t cmd;
600 |     uint32_t csum;
601 | } btrfs_send_command;
602 | 
603 | typedef struct {
604 |     uint16_t type;
605 |     uint16_t length;
606 | } btrfs_send_tlv;
607 | 
608 | #pragma pack(pop)
609 | 


--------------------------------------------------------------------------------
/src/compress.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2021
  2 |  *
  3 |  * This file is part of ntfs2btrfs.
  4 |  *
  5 |  * Ntfs2btrfs is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * Ntfs2btrfs is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with Ntfs2btrfs. If not, see <https://www.gnu.org/licenses/>. */
 17 | 
 18 | #include "ntfs2btrfs.h"
 19 | 
 20 | #ifdef WITH_ZLIB
 21 | #include <zlib.h>
 22 | #endif
 23 | 
 24 | #ifdef WITH_LZO
 25 | #include <lzo/lzo1x.h>
 26 | #endif
 27 | 
 28 | #ifdef WITH_ZSTD
 29 | #include <zstd.h>
 30 | #endif
 31 | 
 32 | using namespace std;
 33 | 
 34 | #ifdef WITH_ZLIB
 35 | optional<buffer_t> zlib_compress(string_view data, uint32_t cluster_size) {
 36 |     z_stream c_stream;
 37 |     int ret;
 38 |     buffer_t out(data.length());
 39 | 
 40 |     c_stream.zalloc = Z_NULL;
 41 |     c_stream.zfree = Z_NULL;
 42 |     c_stream.opaque = (voidpf)0;
 43 | 
 44 |     ret = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION);
 45 | 
 46 |     if (ret != Z_OK)
 47 |         throw formatted_error("deflateInit returned {}", ret);
 48 | 
 49 |     c_stream.next_in = (uint8_t*)data.data();
 50 |     c_stream.avail_in = (unsigned int)data.length();
 51 | 
 52 |     c_stream.next_out = (uint8_t*)out.data();
 53 |     c_stream.avail_out = (unsigned int)out.size();
 54 | 
 55 |     do {
 56 |         ret = deflate(&c_stream, Z_FINISH);
 57 | 
 58 |         if (ret != Z_OK && ret != Z_STREAM_END) {
 59 |             deflateEnd(&c_stream);
 60 |             throw formatted_error("deflate returned {}", ret);
 61 |         }
 62 | 
 63 |         if (c_stream.avail_in == 0 || c_stream.avail_out == 0)
 64 |             break;
 65 |     } while (ret != Z_STREAM_END);
 66 | 
 67 |     deflateEnd(&c_stream);
 68 | 
 69 |     if (c_stream.avail_in > 0) // compressed version would be longer than uncompressed
 70 |         return nullopt;
 71 | 
 72 |     if (c_stream.total_out > data.length() - cluster_size) // space saving less than one sector
 73 |         return nullopt;
 74 | 
 75 |     // round to sector, and zero end
 76 |     out.resize((c_stream.total_out + cluster_size - 1) & ~(cluster_size - 1), 0);
 77 | 
 78 |     return out;
 79 | }
 80 | #endif
 81 | 
 82 | #ifdef WITH_LZO
 83 | static __inline size_t lzo_max_outlen(size_t inlen) {
 84 |     return inlen + (inlen / 16) + 64 + 3; // formula comes from LZO.FAQ
 85 | }
 86 | 
 87 | optional<buffer_t> lzo_compress(string_view data, uint32_t cluster_size) {
 88 |     size_t num_pages;
 89 | 
 90 |     num_pages = data.length() / cluster_size;
 91 | 
 92 |     // Four-byte overall header
 93 |     // Another four-byte header page
 94 |     // Each page has a maximum size of lzo_max_outlen(cluster_size)
 95 |     // Plus another four bytes for possible padding
 96 |     buffer_t outbuf(sizeof(uint32_t) + ((lzo_max_outlen(cluster_size) + (2 * sizeof(uint32_t))) * num_pages));
 97 |     buffer_t wrkmem(LZO1X_MEM_COMPRESS);
 98 | 
 99 |     auto out_size = (uint32_t*)outbuf.data();
100 |     *out_size = sizeof(uint32_t);
101 | 
102 |     auto in = (lzo_bytep)data.data();
103 |     auto out = (lzo_bytep)(outbuf.data() + (2 * sizeof(uint32_t)));
104 | 
105 |     for (unsigned int i = 0; i < num_pages; i++) {
106 |         auto pagelen = (uint32_t*)(out - sizeof(uint32_t));
107 |         lzo_uint outlen;
108 | 
109 |         auto ret = lzo1x_1_compress(in, cluster_size, out, &outlen, wrkmem.data());
110 |         if (ret != LZO_E_OK)
111 |             throw formatted_error("lzo1x_1_compress returned {}", ret);
112 | 
113 |         *pagelen = (uint32_t)outlen;
114 |         *out_size += (uint32_t)(outlen + sizeof(uint32_t));
115 | 
116 |         in += cluster_size;
117 |         out += outlen + sizeof(uint32_t);
118 | 
119 |         // new page needs to start at a 32-bit boundary
120 |         if (cluster_size - (*out_size % cluster_size) < sizeof(uint32_t)) {
121 |             memset(out, 0, cluster_size - (*out_size % cluster_size));
122 |             out += cluster_size - (*out_size % cluster_size);
123 |             *out_size += cluster_size - (*out_size % cluster_size);
124 |         }
125 | 
126 |         if (*out_size >= data.length())
127 |             return nullopt;
128 |     }
129 | 
130 |     outbuf.resize(*out_size);
131 | 
132 |     if (outbuf.size() > data.length() - cluster_size)
133 |         return nullopt;
134 | 
135 |     outbuf.resize((outbuf.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0);
136 | 
137 |     return outbuf;
138 | }
139 | #endif
140 | 
141 | #ifdef WITH_ZSTD
142 | optional<buffer_t> zstd_compress(string_view data, uint32_t cluster_size) {
143 |     buffer_t out(ZSTD_compressBound(data.length()));
144 | 
145 |     auto ret = ZSTD_compress(out.data(), out.size(), data.data(), data.length(), 1);
146 |     if (ZSTD_isError(ret))
147 |         throw formatted_error("ZSTD_compress returned {}", ret);
148 | 
149 |     if (ret > data.length() - cluster_size)
150 |         return nullopt;
151 | 
152 |     out.resize(ret);
153 |     out.resize((out.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0);
154 | 
155 |     return out;
156 | }
157 | #endif
158 | 


--------------------------------------------------------------------------------
/src/config.h.in:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | #define PROJECT_VER  "@PROJECT_VERSION@"
4 | #cmakedefine WITH_ZLIB 1
5 | #cmakedefine WITH_LZO 1
6 | #cmakedefine WITH_ZSTD 1
7 | 


--------------------------------------------------------------------------------
/src/crc32c-gas.S:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2020
  2 |  *
  3 |  * This file is part of WinBtrfs.
  4 |  *
  5 |  * WinBtrfs is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU Lesser General Public Licence as published by
  7 |  * the Free Software Foundation, either version 3 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * WinBtrfs is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU Lesser General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU Lesser General Public Licence
 16 |  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
 17 | 
 18 | #ifdef __i386__
 19 | 
 20 | .intel_syntax noprefix
 21 | 
 22 | #ifdef __MINGW32__
 23 | .extern _crctable
 24 | .global _calc_crc32c_sw@12
 25 | .global _calc_crc32c_hw@12
 26 | #else
 27 | .extern crctable
 28 | .global calc_crc32c_sw
 29 | .global calc_crc32c_hw
 30 | #endif
 31 | 
 32 | /* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
 33 | 
 34 | #ifdef __MINGW32__
 35 | _calc_crc32c_sw@12:
 36 | #else
 37 | calc_crc32c_sw:
 38 | #endif
 39 | 
 40 | push ebp
 41 | mov ebp, esp
 42 | 
 43 | push esi
 44 | push ebx
 45 | 
 46 | mov eax, [ebp+8]
 47 | mov edx, [ebp+12]
 48 | mov ebx, [ebp+16]
 49 | 
 50 | /* eax = crc / seed
 51 |  * ebx = len
 52 |  * esi = tmp
 53 |  * edx = buf
 54 |  * ecx = tmp2 */
 55 | 
 56 | crcloop:
 57 | test ebx, ebx
 58 | jz crcend
 59 | 
 60 | mov esi, eax
 61 | shr esi, 8
 62 | mov cl, byte ptr [edx]
 63 | xor al, cl
 64 | and eax, 255
 65 | shl eax, 2
 66 | 
 67 | #ifdef __MINGW32__
 68 | mov eax, [_crctable + eax]
 69 | #else
 70 | mov eax, [crctable + eax]
 71 | #endif
 72 | 
 73 | xor eax, esi
 74 | 
 75 | inc edx
 76 | dec ebx
 77 | 
 78 | jmp crcloop
 79 | 
 80 | crcend:
 81 | pop ebx
 82 | pop esi
 83 | 
 84 | pop ebp
 85 | 
 86 | ret 12
 87 | 
 88 | /****************************************************/
 89 | 
 90 | /* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
 91 | 
 92 | #ifdef __MINGW32__
 93 | _calc_crc32c_hw@12:
 94 | #else
 95 | calc_crc32c_hw:
 96 | #endif
 97 | 
 98 | push ebp
 99 | mov ebp, esp
100 | 
101 | mov eax, [ebp+8]
102 | mov edx, [ebp+12]
103 | mov ecx, [ebp+16]
104 | 
105 | /* eax = crc / seed
106 |  * ecx = len
107 |  * edx = buf */
108 | 
109 | crchw_loop:
110 | cmp ecx, 4
111 | jl crchw_stragglers
112 | 
113 | crc32 eax, dword ptr [edx]
114 | 
115 | add edx, 4
116 | sub ecx, 4
117 | jmp crchw_loop
118 | 
119 | crchw_stragglers:
120 | cmp ecx, 2
121 | jl crchw_stragglers2
122 | 
123 | crc32 eax, word ptr [edx]
124 | 
125 | add edx, 2
126 | sub ecx, 2
127 | 
128 | crchw_stragglers2:
129 | test ecx, ecx
130 | jz crchw_end
131 | 
132 | crc32 eax, byte ptr [edx]
133 | inc edx
134 | dec ecx
135 | jmp crchw_stragglers2
136 | 
137 | crchw_end:
138 | pop ebp
139 | 
140 | ret 12
141 | 
142 | #elif defined(__x86_64__)
143 | 
144 | .intel_syntax noprefix
145 | 
146 | .extern crctable
147 | .global calc_crc32c_sw
148 | .global calc_crc32c_hw
149 | 
150 | /* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
151 | 
152 | calc_crc32c_sw:
153 | 
154 | /* rax = crc / seed
155 |  * rdx = buf
156 |  * r8 = len
157 |  * rcx = tmp
158 |  * r10 = tmp2
159 |  * r11 = crctable */
160 | 
161 | lea r11, [rip + crctable]
162 | mov rax, rcx
163 | 
164 | crcloop:
165 | test r8, r8
166 | jz crcend
167 | 
168 | mov rcx, rax
169 | shr rcx, 8
170 | mov r10b, byte ptr [rdx]
171 | xor al, r10b
172 | and rax, 255
173 | shl rax, 2
174 | mov eax, [r11 + rax]
175 | xor rax, rcx
176 | 
177 | inc rdx
178 | dec r8
179 | 
180 | jmp crcloop
181 | 
182 | crcend:
183 | ret
184 | 
185 | /****************************************************/
186 | 
187 | /* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */
188 | 
189 | calc_crc32c_hw:
190 | 
191 | /* rax = crc / seed
192 |  * rdx = buf
193 |  * r8 = len */
194 | 
195 | mov rax, rcx
196 | 
197 | crchw_loop:
198 | cmp r8, 8
199 | jl crchw_stragglers
200 | 
201 | crc32 rax, qword ptr [rdx]
202 | 
203 | add rdx, 8
204 | sub r8, 8
205 | jmp crchw_loop
206 | 
207 | crchw_stragglers:
208 | cmp r8, 4
209 | jl crchw_stragglers2
210 | 
211 | crc32 eax, dword ptr [rdx]
212 | 
213 | add rdx, 4
214 | sub r8, 4
215 | 
216 | crchw_stragglers2:
217 | cmp r8, 2
218 | jl crchw_stragglers3
219 | 
220 | crc32 eax, word ptr [rdx]
221 | 
222 | add rdx, 2
223 | sub r8, 2
224 | 
225 | crchw_stragglers3:
226 | test r8, r8
227 | jz crchw_end
228 | 
229 | crc32 eax, byte ptr [rdx]
230 | inc rdx
231 | dec r8
232 | jmp crchw_stragglers3
233 | 
234 | crchw_end:
235 | ret
236 | 
237 | #endif
238 | 
239 | #if defined(__linux__) && defined(__ELF__)
240 | .section .note.GNU-stack,"",%progbits
241 | #endif
242 | 


--------------------------------------------------------------------------------
/src/crc32c-masm.asm:
--------------------------------------------------------------------------------
  1 | ; Copyright (c) Mark Harmstone 2020
  2 | ;
  3 | ; This file is part of WinBtrfs.
  4 | ;
  5 | ; WinBtrfs is free software: you can redistribute it and/or modify
  6 | ; it under the terms of the GNU Lesser General Public Licence as published by
  7 | ; the Free Software Foundation, either version 3 of the Licence, or
  8 | ; (at your option) any later version.
  9 | ;
 10 | ; WinBtrfs is distributed in the hope that it will be useful,
 11 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 | ; GNU Lesser General Public Licence for more details.
 14 | ;
 15 | ; You should have received a copy of the GNU Lesser General Public Licence
 16 | ; along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>.
 17 | 
 18 | IFDEF RAX
 19 | ELSE
 20 | .686P
 21 | ENDIF
 22 | 
 23 | _TEXT  SEGMENT
 24 | 
 25 | IFDEF RAX
 26 | 
 27 | EXTERN crctable:qword
 28 | 
 29 | PUBLIC calc_crc32c_sw
 30 | 
 31 | ; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen);
 32 | 
 33 | calc_crc32c_sw:
 34 | 
 35 | ; rax = crc / seed
 36 | ; rdx = buf
 37 | ; r8 = len
 38 | ; rcx = tmp
 39 | ; r10 = tmp2
 40 | 
 41 | mov rax, rcx
 42 | 
 43 | crcloop:
 44 | test r8, r8
 45 | jz crcend
 46 | 
 47 | mov rcx, rax
 48 | shr rcx, 8
 49 | mov r10b, byte ptr [rdx]
 50 | xor al, r10b
 51 | and rax, 255
 52 | shl rax, 2
 53 | mov r10, offset crctable
 54 | mov eax, dword ptr [r10 + rax]
 55 | xor rax, rcx
 56 | 
 57 | inc rdx
 58 | dec r8
 59 | 
 60 | jmp crcloop
 61 | 
 62 | crcend:
 63 | ret
 64 | 
 65 | ; ****************************************************
 66 | 
 67 | ; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen);
 68 | 
 69 | PUBLIC calc_crc32c_hw
 70 | 
 71 | calc_crc32c_hw:
 72 | 
 73 | ; rax = crc / seed
 74 | ; rdx = buf
 75 | ; r8 = len
 76 | 
 77 | mov rax, rcx
 78 | 
 79 | crchw_loop:
 80 | cmp r8, 8
 81 | jl crchw_stragglers
 82 | 
 83 | crc32 rax, qword ptr [rdx]
 84 | 
 85 | add rdx, 8
 86 | sub r8, 8
 87 | jmp crchw_loop
 88 | 
 89 | crchw_stragglers:
 90 | cmp r8, 4
 91 | jl crchw_stragglers2
 92 | 
 93 | crc32 eax, dword ptr [rdx]
 94 | 
 95 | add rdx, 4
 96 | sub r8, 4
 97 | 
 98 | crchw_stragglers2:
 99 | cmp r8, 2
100 | jl crchw_stragglers3
101 | 
102 | crc32 eax, word ptr [rdx]
103 | 
104 | add rdx, 2
105 | sub r8, 2
106 | 
107 | crchw_stragglers3:
108 | test r8, r8
109 | jz crchw_end
110 | 
111 | crc32 eax, byte ptr [rdx]
112 | inc rdx
113 | dec r8
114 | jmp crchw_stragglers3
115 | 
116 | crchw_end:
117 | ret
118 | 
119 | ELSE
120 | 
121 | EXTERN _crctable:ABS
122 | 
123 | ; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen);
124 | 
125 | PUBLIC _calc_crc32c_sw@12
126 | 
127 | _calc_crc32c_sw@12:
128 | 
129 | push ebp
130 | mov ebp, esp
131 | 
132 | push esi
133 | push ebx
134 | 
135 | mov eax, [ebp+8]
136 | mov edx, [ebp+12]
137 | mov ebx, [ebp+16]
138 | 
139 | ; eax = crc / seed
140 | ; ebx = len
141 | ; esi = tmp
142 | ; edx = buf
143 | ; ecx = tmp2
144 | 
145 | crcloop:
146 | test ebx, ebx
147 | jz crcend
148 | 
149 | mov esi, eax
150 | shr esi, 8
151 | mov cl, byte ptr [edx]
152 | xor al, cl
153 | and eax, 255
154 | shl eax, 2
155 | mov eax, [_crctable + eax]
156 | xor eax, esi
157 | 
158 | inc edx
159 | dec ebx
160 | 
161 | jmp crcloop
162 | 
163 | crcend:
164 | pop ebx
165 | pop esi
166 | 
167 | pop ebp
168 | 
169 | ret 12
170 | 
171 | ; ****************************************************
172 | 
173 | ; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen);
174 | 
175 | PUBLIC _calc_crc32c_hw@12
176 | 
177 | _calc_crc32c_hw@12:
178 | 
179 | push ebp
180 | mov ebp, esp
181 | 
182 | mov eax, [ebp+8]
183 | mov edx, [ebp+12]
184 | mov ecx, [ebp+16]
185 | 
186 | ; eax = crc / seed
187 | ; ecx = len
188 | ; edx = buf
189 | 
190 | crchw_loop:
191 | cmp ecx, 4
192 | jl crchw_stragglers
193 | 
194 | crc32 eax, dword ptr [edx]
195 | 
196 | add edx, 4
197 | sub ecx, 4
198 | jmp crchw_loop
199 | 
200 | crchw_stragglers:
201 | cmp ecx, 2
202 | jl crchw_stragglers2
203 | 
204 | crc32 eax, word ptr [edx]
205 | 
206 | add edx, 2
207 | sub ecx, 2
208 | 
209 | crchw_stragglers2:
210 | test ecx, ecx
211 | jz crchw_end
212 | 
213 | crc32 eax, byte ptr [edx]
214 | inc edx
215 | dec ecx
216 | jmp crchw_stragglers2
217 | 
218 | crchw_end:
219 | pop ebp
220 | 
221 | ret 12
222 | 
223 | ENDIF
224 | 
225 | _TEXT  ENDS
226 | 
227 | end
228 | 


--------------------------------------------------------------------------------
/src/crc32c.c:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) Mark Harmstone 2016-17
 2 |  *
 3 |  * This file is part of WinBtrfs.
 4 |  *
 5 |  * WinBtrfs is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU Lesser General Public Licence as published by
 7 |  * the Free Software Foundation, either version 3 of the Licence, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * WinBtrfs is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU Lesser General Public Licence for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public Licence
16 |  * along with WinBtrfs.  If not, see <http://www.gnu.org/licenses/>. */
17 | 
18 | #include "crc32c.h"
19 | #include <stdint.h>
20 | #include <stdbool.h>
21 | 
22 | crc_func calc_crc32c = calc_crc32c_sw;
23 | 
24 | #ifdef __cplusplus
25 | extern "C"
26 | {
27 | #endif
28 | 
29 | const uint32_t crctable[] = {
30 |     0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
31 |     0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
32 |     0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
33 |     0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
34 |     0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
35 |     0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
36 |     0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
37 |     0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
38 |     0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
39 |     0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
40 |     0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
41 |     0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
42 |     0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
43 |     0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
44 |     0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
45 |     0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
46 |     0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
47 |     0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
48 |     0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
49 |     0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
50 |     0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
51 |     0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
52 |     0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
53 |     0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
54 |     0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
55 |     0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
56 |     0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
57 |     0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
58 |     0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
59 |     0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
60 |     0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
61 |     0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
62 | };
63 | 
64 | // x86 and amd64 versions live in asm files
65 | #if !defined(__i386__) && !defined(__x86_64__) && !defined(_M_IX86) && !defined(_M_X64)
66 | uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen) {
67 |     uint32_t rem = seed;
68 | 
69 |     for (uint32_t i = 0; i < msglen; i++) {
70 |         rem = crctable[(rem ^ msg[i]) & 0xff] ^ (rem >> 8);
71 |     }
72 | 
73 |     return rem;
74 | }
75 | #endif
76 | 
77 | #ifdef __cplusplus
78 | }
79 | #endif
80 | 


--------------------------------------------------------------------------------
/src/crc32c.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) Mark Harmstone 2020
 2 |  *
 3 |  * This file is part of ntfs2btrfs.
 4 |  *
 5 |  * Ntfs2btrfs is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public Licence as published by
 7 |  * the Free Software Foundation, either version 2 of the Licence, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * Ntfs2btrfs is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 |  * GNU General Public Licence for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public Licence
16 |  * along with Ntfs2btrfs. If not, see <https://www.gnu.org/licenses/>. */
17 | 
18 | #pragma once
19 | 
20 | #include <stdint.h>
21 | 
22 | #ifndef _WIN32
23 | #ifdef __i386__
24 | #define __stdcall __attribute__((stdcall))
25 | #elif defined(__x86_64__)
26 | #define __stdcall __attribute__((ms_abi))
27 | #else
28 | #define __stdcall
29 | #endif
30 | #endif
31 | 
32 | #ifdef __cplusplus
33 | extern "C"
34 | {
35 | #endif
36 | 
37 | #if defined(__i386__) || defined(__x86_64__)
38 | uint32_t __stdcall calc_crc32c_hw(uint32_t seed, const uint8_t* msg, uint32_t msglen);
39 | #endif
40 | 
41 | uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen);
42 | 
43 | typedef uint32_t (__stdcall *crc_func)(uint32_t seed, const uint8_t* msg, uint32_t msglen);
44 | 
45 | extern crc_func calc_crc32c;
46 | 
47 | #ifdef __cplusplus
48 | }
49 | #endif
50 | 


--------------------------------------------------------------------------------
/src/decomp.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2020
  2 |  *
  3 |  * This file is part of ntfs2btrfs.
  4 |  *
  5 |  * Ntfs2btrfs is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * Ntfs2btrfs is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with Ntfs2btrfs. If not, see <https://www.gnu.org/licenses/>. */
 17 | 
 18 | #include "ntfs2btrfs.h"
 19 | #include "ebiggers/system_compression.h"
 20 | 
 21 | #define LZX_CHUNK_SIZE 32768
 22 | 
 23 | using namespace std;
 24 | 
 25 | static buffer_t lznt1_decompress_chunk(string_view data) {
 26 |     buffer_t s;
 27 | 
 28 |     while (!data.empty()) {
 29 |         auto fg = (uint8_t)data[0];
 30 | 
 31 |         data = data.substr(1);
 32 | 
 33 |         if (fg == 0) {
 34 |             if (data.length() < 8) {
 35 |                 s.insert(s.end(), data.begin(), data.end());
 36 | 
 37 |                 return s;
 38 |             } else {
 39 |                 s.insert(s.end(), data.begin(), data.begin() + 8);
 40 |                 data = data.substr(8);
 41 |             }
 42 |         } else {
 43 |             for (unsigned int i = 0; i < 8; i++) {
 44 |                 if (data.empty())
 45 |                     return s;
 46 | 
 47 |                 if (!(fg & 1)) {
 48 |                     s.insert(s.end(), data.begin(), data.begin() + 1);
 49 |                     data = data.substr(1);
 50 |                 } else {
 51 |                     if (data.length() < sizeof(uint16_t))
 52 |                         throw formatted_error("Compressed chunk was {} bytes, expected at least 2.", data.length());
 53 | 
 54 |                     // See https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-xca/90fc6a28-f627-4ee5-82ce-445a6cf98b22
 55 | 
 56 |                     auto v = *(uint16_t*)data.data();
 57 | 
 58 |                     data = data.substr(2);
 59 | 
 60 |                     // Shamelessly stolen from https://github.com/you0708/lznt1 - thank you!
 61 | 
 62 |                     uint64_t u = s.size() - 1;
 63 |                     uint64_t lm = 0xfff;
 64 |                     uint64_t os = 12;
 65 | 
 66 |                     while (u >= 0x10) {
 67 |                         lm >>= 1;
 68 |                         os--;
 69 |                         u >>= 1;
 70 |                     }
 71 | 
 72 |                     auto l = (v & lm) + 3;
 73 |                     auto d = (v >> os) + 1;
 74 | 
 75 |                     s.reserve((uint32_t)(s.size() + l));
 76 | 
 77 |                     while (l > 0) {
 78 |                         s.resize(s.size() + 1);
 79 |                         s[s.size() - 1] = s[s.size() - d - 1];
 80 |                         l--;
 81 |                     }
 82 |                 }
 83 | 
 84 |                 fg >>= 1;
 85 |             }
 86 |         }
 87 |     }
 88 | 
 89 |     return s;
 90 | }
 91 | 
 92 | buffer_t lznt1_decompress(string_view compdata, uint32_t size) {
 93 |     buffer_t ret(size);
 94 |     uint8_t* ptr;
 95 | 
 96 |     memset(ret.data(), 0, ret.size());
 97 | 
 98 |     ptr = ret.data();
 99 | 
100 |     while (true) {
101 |         if (compdata.length() < sizeof(uint16_t))
102 |             throw formatted_error("compdata was {} bytes, expected at least 2.", compdata.length());
103 | 
104 |         auto h = *(uint16_t*)compdata.data();
105 | 
106 |         if (h == 0)
107 |             return ret;
108 | 
109 |         compdata = compdata.substr(2);
110 | 
111 |         auto sig = (h & 0x7000) >> 12;
112 | 
113 |         if (sig != 3)
114 |             throw formatted_error("Compression signature was {}, expected 3.", sig);
115 | 
116 |         auto len = (uint32_t)(((uint64_t)h & 0xfff) + 1);
117 | 
118 |         if (compdata.length() < len)
119 |             throw formatted_error("compdata was {} bytes, expected at least {}.", compdata.length(), len);
120 | 
121 |         auto data = string_view(compdata.data(), len);
122 | 
123 |         compdata = compdata.substr(len);
124 | 
125 |         if (h & 0x8000) {
126 |             auto c = lznt1_decompress_chunk(data);
127 | 
128 |             if (ptr + c.size() >= ret.data() + size) {
129 |                 memcpy(ptr, c.data(), size - (ptr - ret.data()));
130 | 
131 |                 return ret;
132 |             } else {
133 |                 memcpy(ptr, c.data(), c.size());
134 |                 ptr += c.size();
135 |             }
136 |         } else {
137 |             if (ptr + data.length() >= ret.data() + size) {
138 |                 memcpy(ptr, data.data(), size - (ptr - ret.data()));
139 | 
140 |                 return ret;
141 |             } else {
142 |                 memcpy(ptr, data.data(), data.length());
143 |                 ptr += data.length();
144 |             }
145 |         }
146 |     }
147 | 
148 |     return ret;
149 | }
150 | 
151 | buffer_t do_lzx_decompress(string_view compdata, uint32_t size) {
152 |     auto ctx = lzx_allocate_decompressor(LZX_CHUNK_SIZE);
153 | 
154 |     if (!ctx)
155 |         throw formatted_error("lzx_allocate_decompressor returned NULL.");
156 | 
157 |     uint64_t num_chunks = (size + LZX_CHUNK_SIZE - 1) / LZX_CHUNK_SIZE;
158 |     auto offsets = (uint32_t*)compdata.data();
159 | 
160 |     buffer_t ret(size);
161 | 
162 |     auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)),
163 |                             (uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t))));
164 | 
165 |     for (uint64_t i = 0; i < num_chunks; i++) {
166 |         uint64_t off = i == 0 ? 0 : offsets[i - 1];
167 |         uint32_t complen;
168 | 
169 |         if (i == 0)
170 |             complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length();
171 |         else if (i == num_chunks - 1)
172 |             complen = (uint32_t)data.length() - offsets[i - 1];
173 |         else
174 |             complen = offsets[i] - offsets[i - 1];
175 | 
176 |         if (complen == (i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE)) {
177 |             // stored uncompressed
178 |             memcpy(ret.data() + (i * LZX_CHUNK_SIZE), data.data() + off, complen);
179 |         } else {
180 |             auto err = lzx_decompress(ctx, data.data() + off, complen, ret.data() + (i * LZX_CHUNK_SIZE),
181 |                                       (uint32_t)(i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE));
182 | 
183 |             if (err != 0) {
184 |                 lzx_free_decompressor(ctx);
185 |                 throw formatted_error("lzx_decompress returned {}.", err);
186 |             }
187 |         }
188 |     }
189 | 
190 |     lzx_free_decompressor(ctx);
191 | 
192 |     return ret;
193 | }
194 | 
195 | buffer_t do_xpress_decompress(string_view compdata, uint32_t size, uint32_t chunk_size) {
196 |     auto ctx = xpress_allocate_decompressor();
197 | 
198 |     if (!ctx)
199 |         throw formatted_error("xpress_allocate_decompressor returned NULL.");
200 | 
201 |     uint64_t num_chunks = (size + chunk_size - 1) / chunk_size;
202 |     auto offsets = (uint32_t*)compdata.data();
203 | 
204 |     buffer_t ret(size);
205 | 
206 |     auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)),
207 |                             (uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t))));
208 | 
209 |     for (uint64_t i = 0; i < num_chunks; i++) {
210 |         uint64_t off = i == 0 ? 0 : offsets[i - 1];
211 |         uint32_t complen;
212 | 
213 |         if (i == 0)
214 |             complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length();
215 |         else if (i == num_chunks - 1)
216 |             complen = (uint32_t)data.length() - offsets[i - 1];
217 |         else
218 |             complen = offsets[i] - offsets[i - 1];
219 | 
220 |         if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) {
221 |             // stored uncompressed
222 |             memcpy(ret.data() + (i * chunk_size), data.data() + off, complen);
223 |         } else {
224 |             auto err = xpress_decompress(ctx, data.data() + off, complen, ret.data() + (i * chunk_size),
225 |                                          (size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size));
226 | 
227 |             if (err != 0) {
228 |                 xpress_free_decompressor(ctx);
229 |                 throw formatted_error("xpress_decompress returned {}.", err);
230 |             }
231 |         }
232 |     }
233 | 
234 |     xpress_free_decompressor(ctx);
235 | 
236 |     return ret;
237 | }
238 | 


--------------------------------------------------------------------------------
/src/ebiggers/aligned_malloc.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * aligned_malloc.c - aligned memory allocation
 3 |  *
 4 |  * This file provides portable aligned memory allocation functions that only use
 5 |  * malloc() and free().  This avoids portability problems with posix_memalign(),
 6 |  * aligned_alloc(), etc.
 7 |  */
 8 | 
 9 | #include <stdlib.h>
10 | 
11 | #include "common_defs.h"
12 | 
13 | void *
14 | aligned_malloc(size_t size, size_t alignment)
15 | {
16 | 	const uintptr_t mask = alignment - 1;
17 | 	char *ptr = NULL;
18 | 	char *raw_ptr;
19 | 
20 | 	raw_ptr = malloc(mask + sizeof(size_t) + size);
21 | 	if (raw_ptr) {
22 | 		ptr = (char *)raw_ptr + sizeof(size_t);
23 | 		ptr = (void *)(((uintptr_t)ptr + mask) & ~mask);
24 | 		*((size_t *)ptr - 1) = ptr - raw_ptr;
25 | 	}
26 | 	return ptr;
27 | }
28 | 
29 | void
30 | aligned_free(void *ptr)
31 | {
32 | 	if (ptr)
33 | 		free((char *)ptr - *((size_t *)ptr - 1));
34 | }
35 | 


--------------------------------------------------------------------------------
/src/ebiggers/common_defs.h:
--------------------------------------------------------------------------------
  1 | #ifndef _COMMON_DEFS_H
  2 | #define _COMMON_DEFS_H
  3 | 
  4 | // #include <ntfs-3g/endians.h>
  5 | // #include <ntfs-3g/types.h>
  6 | #include <stdint.h>
  7 | 
  8 | typedef uint8_t u8;
  9 | typedef uint16_t u16;
 10 | typedef uint32_t u32;
 11 | typedef uint64_t u64;
 12 | typedef int32_t s32;
 13 | 
 14 | /* ========================================================================== */
 15 | /*                              Type definitions                              */
 16 | /* ========================================================================== */
 17 | 
 18 | /*
 19 |  * Type of a machine word.  'unsigned long' would be logical, but that is only
 20 |  * 32 bits on x86_64 Windows.  The same applies to 'uint_fast32_t'.  So the best
 21 |  * we can do without a bunch of #ifdefs appears to be 'size_t'.
 22 |  */
 23 | typedef size_t machine_word_t;
 24 | 
 25 | #define WORDBYTES	sizeof(machine_word_t)
 26 | #define WORDBITS	(8 * WORDBYTES)
 27 | 
 28 | /* ========================================================================== */
 29 | /*                         Compiler-specific definitions                      */
 30 | /* ========================================================================== */
 31 | 
 32 | #ifdef __GNUC__  /* GCC, or GCC-compatible compiler such as clang */
 33 | #  define forceinline		inline __attribute__((always_inline))
 34 | #  define likely(expr)		__builtin_expect(!!(expr), 1)
 35 | #  define unlikely(expr)	__builtin_expect(!!(expr), 0)
 36 | #  define _aligned_attribute(n)	__attribute__((aligned(n)))
 37 | #  define bsr32(n)		(31 - __builtin_clz(n))
 38 | #  define bsr64(n)		(63 - __builtin_clzll(n))
 39 | #  define bsf32(n)		__builtin_ctz(n)
 40 | #  define bsf64(n)		__builtin_ctzll(n)
 41 | #  ifndef min
 42 | #    define min(a, b)  ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
 43 | 			(_a < _b) ? _a : _b; })
 44 | #  endif
 45 | #  ifndef max
 46 | #    define max(a, b)  ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
 47 | 			(_a > _b) ? _a : _b; })
 48 | #  endif
 49 | 
 50 | #  define DEFINE_UNALIGNED_TYPE(type)				\
 51 | struct type##_unaligned {					\
 52 | 	type v;							\
 53 | } __attribute__((packed));					\
 54 | 								\
 55 | static inline type						\
 56 | load_##type##_unaligned(const void *p)				\
 57 | {								\
 58 | 	return ((const struct type##_unaligned *)p)->v;		\
 59 | }								\
 60 | 								\
 61 | static inline void						\
 62 | store_##type##_unaligned(type val, void *p)			\
 63 | {								\
 64 | 	((struct type##_unaligned *)p)->v = val;		\
 65 | }
 66 | 
 67 | #endif /* __GNUC__ */
 68 | 
 69 | /* Declare that the annotated function should always be inlined.  This might be
 70 |  * desirable in highly tuned code, e.g. compression codecs */
 71 | #ifndef forceinline
 72 | #  define forceinline		inline
 73 | #endif
 74 | 
 75 | /* Hint that the expression is usually true */
 76 | #ifndef likely
 77 | #  define likely(expr)		(expr)
 78 | #endif
 79 | 
 80 | /* Hint that the expression is usually false */
 81 | #ifndef unlikely
 82 | #  define unlikely(expr)	(expr)
 83 | #endif
 84 | 
 85 | /* Declare that the annotated variable, or variables of the annotated type, are
 86 |  * to be aligned on n-byte boundaries */
 87 | #ifndef _aligned_attribute
 88 | #  define _aligned_attribute(n)
 89 | #endif
 90 | 
 91 | /* min() and max() macros */
 92 | #ifndef min
 93 | #  define min(a, b)	((a) < (b) ? (a) : (b))
 94 | #endif
 95 | #ifndef max
 96 | #  define max(a, b)	((a) > (b) ? (a) : (b))
 97 | #endif
 98 | 
 99 | /* STATIC_ASSERT() - verify the truth of an expression at compilation time */
100 | #define STATIC_ASSERT(expr)	((void)sizeof(char[1 - 2 * !(expr)]))
101 | 
102 | /* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time
103 |  * and also produce a result of value '0' to be used in constant expressions */
104 | #define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)]))
105 | 
106 | /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
107 |  * can be performed efficiently on the target platform.  */
108 | #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED)
109 | #  define UNALIGNED_ACCESS_IS_FAST 1
110 | #else
111 | #  define UNALIGNED_ACCESS_IS_FAST 0
112 | #endif
113 | 
114 | /*
115 |  * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
116 |  * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
117 |  * which load and store variables of type 'type' from/to unaligned memory
118 |  * addresses.
119 |  */
120 | #ifndef DEFINE_UNALIGNED_TYPE
121 | 
122 | #include <string.h>
123 | /*
124 |  * Although memcpy() may seem inefficient, it *usually* gets optimized
125 |  * appropriately by modern compilers.  It's portable and may be the best we can
126 |  * do for a fallback...
127 |  */
128 | #define DEFINE_UNALIGNED_TYPE(type)				\
129 | 								\
130 | static forceinline type						\
131 | load_##type##_unaligned(const void *p)				\
132 | {								\
133 | 	type v;							\
134 | 	memcpy(&v, p, sizeof(v));				\
135 | 	return v;						\
136 | }								\
137 | 								\
138 | static forceinline void						\
139 | store_##type##_unaligned(type v, void *p)			\
140 | {								\
141 | 	memcpy(p, &v, sizeof(v));				\
142 | }
143 | 
144 | #endif /* !DEFINE_UNALIGNED_TYPE */
145 | 
146 | 
147 | /* ========================================================================== */
148 | /*                          Unaligned memory accesses                         */
149 | /* ========================================================================== */
150 | 
151 | #define load_word_unaligned	load_machine_word_t_unaligned
152 | #define store_word_unaligned	store_machine_word_t_unaligned
153 | 
154 | /* ========================================================================== */
155 | /*                             Bit scan functions                             */
156 | /* ========================================================================== */
157 | 
158 | /*
159 |  * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
160 |  * significant end) of the *most* significant 1 bit in the input value.  The
161 |  * input value must be nonzero!
162 |  */
163 | 
164 | #ifndef bsr32
165 | static forceinline unsigned
166 | bsr32(u32 v)
167 | {
168 | 	unsigned bit = 0;
169 | 	while ((v >>= 1) != 0)
170 | 		bit++;
171 | 	return bit;
172 | }
173 | #endif
174 | 
175 | #ifndef bsr64
176 | static forceinline unsigned
177 | bsr64(u64 v)
178 | {
179 | 	unsigned bit = 0;
180 | 	while ((v >>= 1) != 0)
181 | 		bit++;
182 | 	return bit;
183 | }
184 | #endif
185 | 
186 | static forceinline unsigned
187 | bsrw(machine_word_t v)
188 | {
189 | 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
190 | 	if (WORDBITS == 32)
191 | 		return bsr32(v);
192 | 	else
193 | 		return bsr64(v);
194 | }
195 | 
196 | /*
197 |  * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
198 |  * significant end) of the *least* significant 1 bit in the input value.  The
199 |  * input value must be nonzero!
200 |  */
201 | 
202 | #ifndef bsf32
203 | static forceinline unsigned
204 | bsf32(u32 v)
205 | {
206 | 	unsigned bit;
207 | 	for (bit = 0; !(v & 1); bit++, v >>= 1)
208 | 		;
209 | 	return bit;
210 | }
211 | #endif
212 | 
213 | #ifndef bsf64
214 | static forceinline unsigned
215 | bsf64(u64 v)
216 | {
217 | 	unsigned bit;
218 | 	for (bit = 0; !(v & 1); bit++, v >>= 1)
219 | 		;
220 | 	return bit;
221 | }
222 | #endif
223 | 
224 | static forceinline unsigned
225 | bsfw(machine_word_t v)
226 | {
227 | 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
228 | 	if (WORDBITS == 32)
229 | 		return bsf32(v);
230 | 	else
231 | 		return bsf64(v);
232 | }
233 | 
234 | /* Return the log base 2 of 'n', rounded up to the nearest integer. */
235 | static forceinline unsigned
236 | ilog2_ceil(size_t n)
237 | {
238 |         if (n <= 1)
239 |                 return 0;
240 |         return 1 + bsrw(n - 1);
241 | }
242 | 
243 | /* ========================================================================== */
244 | /*                          Aligned memory allocation                         */
245 | /* ========================================================================== */
246 | 
247 | extern void *aligned_malloc(size_t size, size_t alignment);
248 | extern void aligned_free(void *ptr);
249 | 
250 | #endif /* _COMMON_DEFS_H */
251 | 


--------------------------------------------------------------------------------
/src/ebiggers/decompress_common.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * decompress_common.c
  3 |  *
  4 |  * Code for decompression shared among multiple compression formats.
  5 |  *
  6 |  * The following copying information applies to this specific source code file:
  7 |  *
  8 |  * Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
  9 |  *
 10 |  * To the extent possible under law, the author(s) have dedicated all copyright
 11 |  * and related and neighboring rights to this software to the public domain
 12 |  * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
 13 |  * Dedication (the "CC0").
 14 |  *
 15 |  * This software is distributed in the hope that it will be useful, but WITHOUT
 16 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 17 |  * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
 18 |  *
 19 |  * You should have received a copy of the CC0 along with this software; if not
 20 |  * see <http://creativecommons.org/publicdomain/zero/1.0/>.
 21 |  */
 22 | 
 23 | #ifdef HAVE_CONFIG_H
 24 | #  include "config.h"
 25 | #endif
 26 | 
 27 | #include <string.h>
 28 | 
 29 | #ifdef __SSE2__
 30 | #  include <emmintrin.h>
 31 | #endif
 32 | 
 33 | #include "decompress_common.h"
 34 | 
 35 | /*
 36 |  * make_huffman_decode_table() -
 37 |  *
 38 |  * Given an alphabet of symbols and the length of each symbol's codeword in a
 39 |  * canonical prefix code, build a table for quickly decoding symbols that were
 40 |  * encoded with that code.
 41 |  *
 42 |  * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols
 43 |  * such that no whole codeword is a prefix of any other.  A prefix code might be
 44 |  * a _Huffman code_, which means that it is an optimum prefix code for a given
 45 |  * list of symbol frequencies and was generated by the Huffman algorithm.
 46 |  * Although the prefix codes processed here will ordinarily be "Huffman codes",
 47 |  * strictly speaking the decoder cannot know whether a given code was actually
 48 |  * generated by the Huffman algorithm or not.
 49 |  *
 50 |  * A prefix code is _canonical_ if and only if a longer codeword never
 51 |  * lexicographically precedes a shorter codeword, and the lexicographic ordering
 52 |  * of codewords of equal length is the same as the lexicographic ordering of the
 53 |  * corresponding symbols.  The advantage of using a canonical prefix code is
 54 |  * that the codewords can be reconstructed from only the symbol => codeword
 55 |  * length mapping.  This eliminates the need to transmit the codewords
 56 |  * explicitly.  Instead, they can be enumerated in lexicographic order after
 57 |  * sorting the symbols primarily by increasing codeword length and secondarily
 58 |  * by increasing symbol value.
 59 |  *
 60 |  * However, the decoder's real goal is to decode symbols with the code, not just
 61 |  * generate the list of codewords.  Consequently, this function directly builds
 62 |  * a table for efficiently decoding symbols using the code.  The basic idea is
 63 |  * that given the next 'max_codeword_len' bits of input, the decoder can look up
 64 |  * the next decoded symbol by indexing a table containing '2^max_codeword_len'
 65 |  * entries.  A codeword with length 'max_codeword_len' will have exactly one
 66 |  * entry in this table, whereas a codeword shorter than 'max_codeword_len' will
 67 |  * have multiple entries in this table.  Precisely, a codeword of length 'n'
 68 |  * will have '2^(max_codeword_len - n)' entries.  The index of each such entry,
 69 |  * considered as a bitstring of length 'max_codeword_len', will contain the
 70 |  * corresponding codeword as a prefix.
 71 |  *
 72 |  * That's the basic idea, but we extend it in two ways:
 73 |  *
 74 |  * - Often the maximum codeword length is too long for it to be efficient to
 75 |  *   build the full decode table whenever a new code is used.  Instead, we build
 76 |  *   a "root" table using only '2^table_bits' entries, where 'table_bits <=
 77 |  *   max_codeword_len'.  Then, a lookup of 'table_bits' bits produces either a
 78 |  *   symbol directly (for codewords not longer than 'table_bits'), or the index
 79 |  *   of a subtable which must be indexed with additional bits of input to fully
 80 |  *   decode the symbol (for codewords longer than 'table_bits').
 81 |  *
 82 |  * - Whenever the decoder decodes a symbol, it needs to know the codeword length
 83 |  *   so that it can remove the appropriate number of input bits.  The obvious
 84 |  *   solution would be to simply retain the codeword lengths array and use the
 85 |  *   decoded symbol as an index into it.  However, that would require two array
 86 |  *   accesses when decoding each symbol.  Our strategy is to instead store the
 87 |  *   codeword length directly in the decode table entry along with the symbol.
 88 |  *
 89 |  * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table
 90 |  * entries, and see read_huffsym() for full details on how symbols are decoded.
 91 |  *
 92 |  * @decode_table:
 93 |  *	The array in which to build the decode table.  This must have been
 94 |  *	declared by the DECODE_TABLE() macro.  This may alias @lens, since all
 95 |  *	@lens are consumed before the decode table is written to.
 96 |  *
 97 |  * @num_syms:
 98 |  *	The number of symbols in the alphabet.
 99 |  *
100 |  * @table_bits:
101 |  *	The log base 2 of the number of entries in the root table.
102 |  *
103 |  * @lens:
104 |  *	An array of length @num_syms, indexed by symbol, that gives the length
105 |  *	of the codeword, in bits, for each symbol.  The length can be 0, which
106 |  *	means that the symbol does not have a codeword assigned.  In addition,
107 |  *	@lens may alias @decode_table, as noted above.
108 |  *
109 |  * @max_codeword_len:
110 |  *	The maximum codeword length permitted for this code.  All entries in
111 |  *	'lens' must be less than or equal to this value.
112 |  *
113 |  * @working_space
114 |  *	A temporary array that was declared with DECODE_TABLE_WORKING_SPACE().
115 |  *
116 |  * Returns 0 on success, or -1 if the lengths do not form a valid prefix code.
117 |  */
118 | int
119 | make_huffman_decode_table(u16 decode_table[], unsigned num_syms,
120 | 			  unsigned table_bits, const u8 lens[],
121 | 			  unsigned max_codeword_len, u16 working_space[])
122 | {
123 | 	u16 * const len_counts = &working_space[0];
124 | 	u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
125 | 	u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
126 | 	s32 remainder = 1;
127 | 	uint8_t *entry_ptr = (uint8_t *)decode_table;
128 | 	unsigned codeword_len = 1;
129 | 	unsigned sym_idx;
130 | 	unsigned codeword;
131 | 	unsigned subtable_pos;
132 | 	unsigned subtable_bits;
133 | 	unsigned subtable_prefix;
134 | 
135 | 	/* Count how many codewords have each length, including 0.  */
136 | 	for (unsigned len = 0; len <= max_codeword_len; len++)
137 | 		len_counts[len] = 0;
138 | 	for (unsigned sym = 0; sym < num_syms; sym++)
139 | 		len_counts[lens[sym]]++;
140 | 
141 | 	/* It is already guaranteed that all lengths are <= max_codeword_len,
142 | 	 * but it cannot be assumed they form a complete prefix code.  A
143 | 	 * codeword of length n should require a proportion of the codespace
144 | 	 * equaling (1/2)^n.  The code is complete if and only if, by this
145 | 	 * measure, the codespace is exactly filled by the lengths.  */
146 | 	for (unsigned len = 1; len <= max_codeword_len; len++) {
147 | 		remainder = (remainder << 1) - len_counts[len];
148 | 		/* Do the lengths overflow the codespace? */
149 | 		if (unlikely(remainder < 0))
150 | 			return -1;
151 | 	}
152 | 
153 | 	if (remainder != 0) {
154 | 		/* The lengths do not fill the codespace; that is, they form an
155 | 		 * incomplete code.  This is permitted only if the code is empty
156 | 		 * (contains no symbols). */
157 | 
158 | 		if (unlikely(remainder != 1U << max_codeword_len))
159 | 			return -1;
160 | 
161 | 		/* The code is empty.  When processing a well-formed stream, the
162 | 		 * decode table need not be initialized in this case.  However,
163 | 		 * we cannot assume the stream is well-formed, so we must
164 | 		 * initialize the decode table anyway.  Setting all entries to 0
165 | 		 * makes the decode table always produce symbol '0' without
166 | 		 * consuming any bits, which is good enough. */
167 | 		memset(decode_table, 0, sizeof(decode_table[0]) << table_bits);
168 | 		return 0;
169 | 	}
170 | 
171 | 	/* Sort the symbols primarily by increasing codeword length and
172 | 	 * secondarily by increasing symbol value. */
173 | 
174 | 	/* Initialize 'offsets' so that 'offsets[len]' is the number of
175 | 	 * codewords shorter than 'len' bits, including length 0. */
176 | 	offsets[0] = 0;
177 | 	for (unsigned len = 0; len < max_codeword_len; len++)
178 | 		offsets[len + 1] = offsets[len] + len_counts[len];
179 | 
180 | 	/* Use the 'offsets' array to sort the symbols. */
181 | 	for (unsigned sym = 0; sym < num_syms; sym++)
182 | 		sorted_syms[offsets[lens[sym]]++] = sym;
183 | 
184 | 	/*
185 | 	 * Fill the root table entries for codewords no longer than table_bits.
186 | 	 *
187 | 	 * The table will start with entries for the shortest codeword(s), which
188 | 	 * will have the most entries.  From there, the number of entries per
189 | 	 * codeword will decrease.  As an optimization, we may begin filling
190 | 	 * entries with SSE2 vector accesses (8 entries/store), then change to
191 | 	 * word accesses (2 or 4 entries/store), then change to 16-bit accesses
192 | 	 * (1 entry/store).
193 | 	 */
194 | 	sym_idx = offsets[0];
195 | 
196 | #ifdef __SSE2__
197 | 	/* Fill entries one 128-bit vector (8 entries) at a time. */
198 | 	for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
199 | 				    (sizeof(__m128i) / sizeof(decode_table[0]));
200 | 	     stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
201 | 	{
202 | 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
203 | 		for (; sym_idx < end_sym_idx; sym_idx++) {
204 | 			/* Note: unlike in the "word" version below, the __m128i
205 | 			 * type already has __attribute__((may_alias)), so using
206 | 			 * it to access an array of u16 will not violate strict
207 | 			 * aliasing.  */
208 | 			__m128i v = _mm_set1_epi16(
209 | 				MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
210 | 							codeword_len));
211 | 			unsigned n = stores_per_loop;
212 | 			do {
213 | 				*(__m128i *)entry_ptr = v;
214 | 				entry_ptr += sizeof(v);
215 | 			} while (--n);
216 | 		}
217 | 	}
218 | #endif /* __SSE2__ */
219 | 
220 | #ifdef __GNUC__
221 | 	/* Fill entries one word (2 or 4 entries) at a time. */
222 | 	for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
223 | 					(WORDBYTES / sizeof(decode_table[0]));
224 | 	     stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
225 | 	{
226 | 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
227 | 		for (; sym_idx < end_sym_idx; sym_idx++) {
228 | 
229 | 			/* Accessing the array of u16 as u32 or u64 would
230 | 			 * violate strict aliasing and would require compiling
231 | 			 * the code with -fno-strict-aliasing to guarantee
232 | 			 * correctness.  To work around this problem, use the
233 | 			 * gcc 'may_alias' extension.  */
234 | 			typedef machine_word_t
235 | 				__attribute__((may_alias)) aliased_word_t;
236 | 			aliased_word_t v = repeat_u16(
237 | 				MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
238 | 							codeword_len));
239 | 			unsigned n = stores_per_loop;
240 | 			do {
241 | 				*(aliased_word_t *)entry_ptr = v;
242 | 				entry_ptr += sizeof(v);
243 | 			} while (--n);
244 | 		}
245 | 	}
246 | #endif /* __GNUC__ */
247 | 
248 | 	/* Fill entries one at a time. */
249 | 	for (unsigned stores_per_loop = (1U << (table_bits - codeword_len));
250 | 	     stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
251 | 	{
252 | 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
253 | 		for (; sym_idx < end_sym_idx; sym_idx++) {
254 | 			u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
255 | 							codeword_len);
256 | 			unsigned n = stores_per_loop;
257 | 			do {
258 | 				*(u16 *)entry_ptr = v;
259 | 				entry_ptr += sizeof(v);
260 | 			} while (--n);
261 | 		}
262 | 	}
263 | 
264 | 	/* If all symbols were processed, then no subtables are required. */
265 | 	if (sym_idx == num_syms)
266 | 		return 0;
267 | 
268 | 	/* At least one subtable is required.  Process the remaining symbols. */
269 | 	codeword = ((u16 *)entry_ptr - decode_table) << 1;
270 | 	subtable_pos = 1U << table_bits;
271 | 	subtable_bits = table_bits;
272 | 	subtable_prefix = -1;
273 | 	do {
274 | 		while (len_counts[codeword_len] == 0) {
275 | 			codeword_len++;
276 | 			codeword <<= 1;
277 | 		}
278 | 
279 | 		unsigned prefix = codeword >> (codeword_len - table_bits);
280 | 
281 | 		/* Start a new subtable if the first 'table_bits' bits of the
282 | 		 * codeword don't match the prefix for the previous subtable, or
283 | 		 * if this will be the first subtable. */
284 | 		if (prefix != subtable_prefix) {
285 | 
286 | 			subtable_prefix = prefix;
287 | 
288 | 			/*
289 | 			 * Calculate the subtable length.  If the codeword
290 | 			 * length exceeds 'table_bits' by n, then the subtable
291 | 			 * needs at least 2^n entries.  But it may need more; if
292 | 			 * there are fewer than 2^n codewords of length
293 | 			 * 'table_bits + n' remaining, then n will need to be
294 | 			 * incremented to bring in longer codewords until the
295 | 			 * subtable can be filled completely.  Note that it
296 | 			 * always will, eventually, be possible to fill the
297 | 			 * subtable, since it was previously verified that the
298 | 			 * code is complete.
299 | 			 */
300 | 			subtable_bits = codeword_len - table_bits;
301 | 			remainder = (s32)1 << subtable_bits;
302 | 			for (;;) {
303 | 				remainder -= len_counts[table_bits +
304 | 							subtable_bits];
305 | 				if (remainder <= 0)
306 | 					break;
307 | 				subtable_bits++;
308 | 				remainder <<= 1;
309 | 			}
310 | 
311 | 			/* Create the entry that points from the root table to
312 | 			 * the subtable.  This entry contains the index of the
313 | 			 * start of the subtable and the number of bits with
314 | 			 * which the subtable is indexed (the log base 2 of the
315 | 			 * number of entries it contains).  */
316 | 			decode_table[subtable_prefix] =
317 | 				MAKE_DECODE_TABLE_ENTRY(subtable_pos,
318 | 							subtable_bits);
319 | 		}
320 | 
321 | 		/* Fill the subtable entries for this symbol. */
322 | 		u16 entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
323 | 						    codeword_len - table_bits);
324 | 		unsigned n = 1U << (subtable_bits - (codeword_len -
325 | 						     table_bits));
326 | 		do {
327 | 			decode_table[subtable_pos++] = entry;
328 | 		} while (--n);
329 | 
330 | 		len_counts[codeword_len]--;
331 | 		codeword++;
332 | 	} while (++sym_idx < num_syms);
333 | 
334 | 	return 0;
335 | }
336 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_common.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lzx_common.c - Common code for LZX compression and decompression.
  3 |  */
  4 | 
  5 | /*
  6 |  * Copyright (C) 2012-2016 Eric Biggers
  7 |  *
  8 |  * This program is free software: you can redistribute it and/or modify it under
  9 |  * the terms of the GNU General Public License as published by the Free Software
 10 |  * Foundation, either version 2 of the License, or (at your option) any later
 11 |  * version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful, but WITHOUT
 14 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 15 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 16 |  * details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License along with
 19 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
 20 |  */
 21 | 
 22 | #ifdef HAVE_CONFIG_H
 23 | #  include "config.h"
 24 | #endif
 25 | 
 26 | #include <string.h>
 27 | 
 28 | #ifdef __SSE2__
 29 | #  include <emmintrin.h>
 30 | #endif
 31 | 
 32 | #ifdef __AVX2__
 33 | #  include <immintrin.h>
 34 | #endif
 35 | 
 36 | #include "common_defs.h"
 37 | #include "lzx_common.h"
 38 | 
 39 | /* Mapping: offset slot => first match offset that uses that offset slot.
 40 |  * The offset slots for repeat offsets map to "fake" offsets < 1.  */
 41 | const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = {
 42 |         -2     , -1     , 0      , 1      , 2      ,    /* 0  --- 4  */
 43 |         4      , 6      , 10     , 14     , 22     ,    /* 5  --- 9  */
 44 |         30     , 46     , 62     , 94     , 126    ,    /* 10 --- 14 */
 45 |         190    , 254    , 382    , 510    , 766    ,    /* 15 --- 19 */
 46 |         1022   , 1534   , 2046   , 3070   , 4094   ,    /* 20 --- 24 */
 47 |         6142   , 8190   , 12286  , 16382  , 24574  ,    /* 25 --- 29 */
 48 |         32766  , 49150  , 65534  , 98302  , 131070 ,    /* 30 --- 34 */
 49 |         196606 , 262142 , 393214 , 524286 , 655358 ,    /* 35 --- 39 */
 50 |         786430 , 917502 , 1048574, 1179646, 1310718,    /* 40 --- 44 */
 51 |         1441790, 1572862, 1703934, 1835006, 1966078,    /* 45 --- 49 */
 52 |         2097150                                         /* extra     */
 53 | };
 54 | 
 55 | /* Mapping: offset slot => how many extra bits must be read and added to the
 56 |  * corresponding offset slot base to decode the match offset.  */
 57 | const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = {
 58 | 	0 , 0 , 0 , 0 , 1 ,
 59 | 	1 , 2 , 2 , 3 , 3 ,
 60 | 	4 , 4 , 5 , 5 , 6 ,
 61 | 	6 , 7 , 7 , 8 , 8 ,
 62 | 	9 , 9 , 10, 10, 11,
 63 | 	11, 12, 12, 13, 13,
 64 | 	14, 14, 15, 15, 16,
 65 | 	16, 17, 17, 17, 17,
 66 | 	17, 17, 17, 17, 17,
 67 | 	17, 17, 17, 17, 17,
 68 | };
 69 | 
 70 | /* Round the specified buffer size up to the next valid LZX window size, and
 71 |  * return its order (log2).  Or, if the buffer size is 0 or greater than the
 72 |  * largest valid LZX window size, return 0.  */
 73 | unsigned
 74 | lzx_get_window_order(size_t max_bufsize)
 75 | {
 76 | 	if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE)
 77 | 		return 0;
 78 | 
 79 | 	return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER);
 80 | }
 81 | 
 82 | /* Given a valid LZX window order, return the number of symbols that will exist
 83 |  * in the main Huffman code.  */
 84 | unsigned
 85 | lzx_get_num_main_syms(unsigned window_order)
 86 | {
 87 | 	/* Note: one would expect that the maximum match offset would be
 88 | 	 * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two
 89 | 	 * bytes were to match the last two bytes.  However, the format
 90 | 	 * disallows this case.  This reduces the number of needed offset slots
 91 | 	 * by 1.  */
 92 | 	u32 window_size = (u32)1 << window_order;
 93 | 	u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1;
 94 | 	unsigned num_offset_slots = 30;
 95 | 	while (max_offset >= lzx_offset_slot_base[num_offset_slots])
 96 | 		num_offset_slots++;
 97 | 
 98 | 	return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS);
 99 | }
100 | 
101 | static void
102 | do_translate_target(void *target, s32 input_pos)
103 | {
104 | 	s32 abs_offset, rel_offset;
105 | 
106 | 	rel_offset = *(int32_t*)target;
107 | 	if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
108 | 		if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
109 | 			/* "good translation" */
110 | 			abs_offset = rel_offset + input_pos;
111 | 		} else {
112 | 			/* "compensating translation" */
113 | 			abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
114 | 		}
115 |         *(uint32_t*)target = abs_offset;
116 | 	}
117 | }
118 | 
119 | static void
120 | undo_translate_target(void *target, s32 input_pos)
121 | {
122 | 	s32 abs_offset, rel_offset;
123 | 
124 | 	abs_offset = *(int32_t*)target;
125 | 	if (abs_offset >= 0) {
126 | 		if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
127 | 			/* "good translation" */
128 | 			rel_offset = abs_offset - input_pos;
129 |             *(uint32_t*)target = rel_offset;
130 | 		}
131 | 	} else {
132 | 		if (abs_offset >= -input_pos) {
133 | 			/* "compensating translation" */
134 | 			rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
135 |             *(uint32_t*)target = rel_offset;
136 | 		}
137 | 	}
138 | }
139 | 
140 | /*
141 |  * Do or undo the 'E8' preprocessing used in LZX.  Before compression, the
142 |  * uncompressed data is preprocessed by changing the targets of x86 CALL
143 |  * instructions from relative offsets to absolute offsets.  After decompression,
144 |  * the translation is undone by changing the targets of x86 CALL instructions
145 |  * from absolute offsets to relative offsets.
146 |  *
147 |  * Note that despite its intent, E8 preprocessing can be done on any data even
148 |  * if it is not actually x86 machine code.  In fact, E8 preprocessing appears to
149 |  * always be used in LZX-compressed resources in WIM files; there is no bit to
150 |  * indicate whether it is used or not, unlike in the LZX compressed format as
151 |  * used in cabinet files, where a bit is reserved for that purpose.
152 |  *
153 |  * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data,
154 |  * which really means the 5-byte call instruction cannot start in the last 10
155 |  * bytes of the uncompressed data.  This is one of the errors in the LZX
156 |  * documentation.
157 |  *
158 |  * E8 preprocessing does not appear to be disabled after the 32768th chunk of a
159 |  * WIM resource, which apparently is another difference from the LZX compression
160 |  * used in cabinet files.
161 |  *
162 |  * E8 processing is supposed to take the file size as a parameter, as it is used
163 |  * in calculating the translated jump targets.  But in WIM files, this file size
164 |  * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).
165 |  */
166 | static void
167 | lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32))
168 | {
169 | 
170 | #if !defined(__SSE2__) && !defined(__AVX2__)
171 | 	/*
172 | 	 * A worthwhile optimization is to push the end-of-buffer check into the
173 | 	 * relatively rare E8 case.  This is possible if we replace the last six
174 | 	 * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
175 | 	 * before reaching end-of-buffer.  In addition, this scheme guarantees
176 | 	 * that no translation can begin following an E8 byte in the last 10
177 | 	 * bytes because a 4-byte offset containing E8 as its high byte is a
178 | 	 * large negative number that is not valid for translation.  That is
179 | 	 * exactly what we need.
180 | 	 */
181 | 	u8 *tail;
182 | 	u8 saved_bytes[6];
183 | 	u8 *p;
184 | 
185 | 	if (size <= 10)
186 | 		return;
187 | 
188 | 	tail = &data[size - 6];
189 | 	memcpy(saved_bytes, tail, 6);
190 | 	memset(tail, 0xE8, 6);
191 | 	p = data;
192 | 	for (;;) {
193 | 		while (*p != 0xE8)
194 | 			p++;
195 | 		if (p >= tail)
196 | 			break;
197 | 		(*process_target)(p + 1, p - data);
198 | 		p += 5;
199 | 	}
200 | 	memcpy(tail, saved_bytes, 6);
201 | #else
202 | 	/* SSE2 or AVX-2 optimized version for x86_64  */
203 | 
204 | 	u8 *p = data;
205 | 	u64 valid_mask = ~0;
206 | 
207 | 	if (size <= 10)
208 | 		return;
209 | #ifdef __AVX2__
210 | #  define ALIGNMENT_REQUIRED 32
211 | #else
212 | #  define ALIGNMENT_REQUIRED 16
213 | #endif
214 | 
215 | 	/* Process one byte at a time until the pointer is properly aligned.  */
216 | 	while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) {
217 | 		if (p >= data + size - 10)
218 | 			return;
219 | 		if (*p == 0xE8 && (valid_mask & 1)) {
220 | 			(*process_target)(p + 1, p - data);
221 | 			valid_mask &= ~0x1F;
222 | 		}
223 | 		p++;
224 | 		valid_mask >>= 1;
225 | 		valid_mask |= (u64)1 << 63;
226 | 	}
227 | 
228 | 	if (data + size - p >= 64) {
229 | 
230 | 		/* Vectorized processing  */
231 | 
232 | 		/* Note: we use a "trap" E8 byte to eliminate the need to check
233 | 		 * for end-of-buffer in the inner loop.  This byte is carefully
234 | 		 * positioned so that it will never be changed by a previous
235 | 		 * translation before it is detected.  */
236 | 
237 | 		u8 *trap = p + ((data + size - p) & ~31) - 32 + 4;
238 | 		u8 saved_byte = *trap;
239 | 		*trap = 0xE8;
240 | 
241 | 		for (;;) {
242 | 			u32 e8_mask;
243 | 			u8 *orig_p = p;
244 | 		#ifdef __AVX2__
245 | 			const __m256i e8_bytes = _mm256_set1_epi8(0xE8);
246 | 			for (;;) {
247 | 				__m256i bytes = *(const __m256i *)p;
248 | 				__m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes);
249 | 				e8_mask = _mm256_movemask_epi8(cmpresult);
250 | 				if (e8_mask)
251 | 					break;
252 | 				p += 32;
253 | 			}
254 | 		#else
255 | 			const __m128i e8_bytes = _mm_set1_epi8(0xE8);
256 | 			for (;;) {
257 | 				/* Read the next 32 bytes of data and test them
258 | 				 * for E8 bytes.  */
259 | 				__m128i bytes1 = *(const __m128i *)p;
260 | 				__m128i bytes2 = *(const __m128i *)(p + 16);
261 | 				__m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes);
262 | 				__m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes);
263 | 				u32 mask1 = _mm_movemask_epi8(cmpresult1);
264 | 				u32 mask2 = _mm_movemask_epi8(cmpresult2);
265 | 				/* The masks have a bit set for each E8 byte.
266 | 				 * We stay in this fast inner loop as long as
267 | 				 * there are no E8 bytes.  */
268 | 				if (mask1 | mask2) {
269 | 					e8_mask = mask1 | (mask2 << 16);
270 | 					break;
271 | 				}
272 | 				p += 32;
273 | 			}
274 | 		#endif
275 | 
276 | 			/* Did we pass over data with no E8 bytes?  */
277 | 			if (p != orig_p)
278 | 				valid_mask = ~0;
279 | 
280 | 			/* Are we nearing end-of-buffer?  */
281 | 			if (p == trap - 4)
282 | 				break;
283 | 
284 | 			/* Process the E8 bytes.  However, the AND with
285 | 			 * 'valid_mask' ensures we never process an E8 byte that
286 | 			 * was itself part of a translation target.  */
287 | 			while ((e8_mask &= valid_mask)) {
288 | 				unsigned bit = bsf32(e8_mask);
289 | 				(*process_target)(p + bit + 1, p + bit - data);
290 | 				valid_mask &= ~((u64)0x1F << bit);
291 | 			}
292 | 
293 | 			valid_mask >>= 32;
294 | 			valid_mask |= 0xFFFFFFFF00000000;
295 | 			p += 32;
296 | 		}
297 | 
298 | 		*trap = saved_byte;
299 | 	}
300 | 
301 | 	/* Approaching the end of the buffer; process one byte a time.  */
302 | 	while (p < data + size - 10) {
303 | 		if (*p == 0xE8 && (valid_mask & 1)) {
304 | 			(*process_target)(p + 1, p - data);
305 | 			valid_mask &= ~0x1F;
306 | 		}
307 | 		p++;
308 | 		valid_mask >>= 1;
309 | 		valid_mask |= (u64)1 << 63;
310 | 	}
311 | #endif /* __SSE2__ || __AVX2__ */
312 | }
313 | 
314 | void
315 | lzx_preprocess(u8 *data, u32 size)
316 | {
317 | 	lzx_e8_filter(data, size, do_translate_target);
318 | }
319 | 
320 | void
321 | lzx_postprocess(u8 *data, u32 size)
322 | {
323 | 	lzx_e8_filter(data, size, undo_translate_target);
324 | }
325 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * lzx_common.h
 3 |  *
 4 |  * Declarations shared between LZX compression and decompression.
 5 |  */
 6 | 
 7 | #ifndef _LZX_COMMON_H
 8 | #define _LZX_COMMON_H
 9 | 
10 | #include "lzx_constants.h"
11 | #include "common_defs.h"
12 | 
13 | extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1];
14 | 
15 | extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
16 | 
17 | extern unsigned
18 | lzx_get_window_order(size_t max_bufsize);
19 | 
20 | extern unsigned
21 | lzx_get_num_main_syms(unsigned window_order);
22 | 
23 | extern void
24 | lzx_preprocess(u8 *data, u32 size);
25 | 
26 | extern void
27 | lzx_postprocess(u8 *data, u32 size);
28 | 
29 | #endif /* _LZX_COMMON_H */
30 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_constants.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lzx_constants.h
  3 |  *
  4 |  * Constants for the LZX compression format.
  5 |  */
  6 | 
  7 | #ifndef _LZX_CONSTANTS_H
  8 | #define _LZX_CONSTANTS_H
  9 | 
 10 | /* Number of literal byte values.  */
 11 | #define LZX_NUM_CHARS	256
 12 | 
 13 | /* The smallest and largest allowed match lengths.  */
 14 | #define LZX_MIN_MATCH_LEN	2
 15 | #define LZX_MAX_MATCH_LEN	257
 16 | 
 17 | /* Number of distinct match lengths that can be represented.  */
 18 | #define LZX_NUM_LENS		(LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
 19 | 
 20 | /* Number of match lengths for which no length symbol is required.  */
 21 | #define LZX_NUM_PRIMARY_LENS	7
 22 | #define LZX_NUM_LEN_HEADERS	(LZX_NUM_PRIMARY_LENS + 1)
 23 | 
 24 | /* Valid values of the 3-bit block type field.  */
 25 | #define LZX_BLOCKTYPE_VERBATIM       1
 26 | #define LZX_BLOCKTYPE_ALIGNED        2
 27 | #define LZX_BLOCKTYPE_UNCOMPRESSED   3
 28 | 
 29 | /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum
 30 |  * sizes of the sliding window.  */
 31 | #define LZX_MIN_WINDOW_ORDER	15
 32 | #define LZX_MAX_WINDOW_ORDER	21
 33 | #define LZX_MIN_WINDOW_SIZE	(1UL << LZX_MIN_WINDOW_ORDER)  /* 32768   */
 34 | #define LZX_MAX_WINDOW_SIZE	(1UL << LZX_MAX_WINDOW_ORDER)  /* 2097152 */
 35 | 
 36 | /* Maximum number of offset slots.  (The actual number of offset slots depends
 37 |  * on the window size.)  */
 38 | #define LZX_MAX_OFFSET_SLOTS	50
 39 | 
 40 | /* Maximum number of symbols in the main code.  (The actual number of symbols in
 41 |  * the main code depends on the window size.)  */
 42 | #define LZX_MAINCODE_MAX_NUM_SYMBOLS	\
 43 | 	(LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
 44 | 
 45 | /* Number of symbols in the length code.  */
 46 | #define LZX_LENCODE_NUM_SYMBOLS		(LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
 47 | 
 48 | /* Number of symbols in the pre-code.  */
 49 | #define LZX_PRECODE_NUM_SYMBOLS		20
 50 | 
 51 | /* Number of bits in which each pre-code codeword length is represented.  */
 52 | #define LZX_PRECODE_ELEMENT_SIZE	4
 53 | 
 54 | /* Number of low-order bits of each match offset that are entropy-encoded in
 55 |  * aligned offset blocks.  */
 56 | #define LZX_NUM_ALIGNED_OFFSET_BITS	3
 57 | 
 58 | /* Number of symbols in the aligned offset code.  */
 59 | #define LZX_ALIGNEDCODE_NUM_SYMBOLS	(1 << LZX_NUM_ALIGNED_OFFSET_BITS)
 60 | 
 61 | /* Mask for the match offset bits that are entropy-encoded in aligned offset
 62 |  * blocks.  */
 63 | #define LZX_ALIGNED_OFFSET_BITMASK	((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
 64 | 
 65 | /* Number of bits in which each aligned offset codeword length is represented.  */
 66 | #define LZX_ALIGNEDCODE_ELEMENT_SIZE	3
 67 | 
 68 | /* The first offset slot which requires an aligned offset symbol in aligned
 69 |  * offset blocks.  */
 70 | #define LZX_MIN_ALIGNED_OFFSET_SLOT	8
 71 | 
 72 | /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT.  */
 73 | #define LZX_MIN_ALIGNED_OFFSET		14
 74 | 
 75 | /* The maximum number of extra offset bits in verbatim blocks.  (One would need
 76 |  * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset
 77 |  * bits in *aligned* blocks.)  */
 78 | #define LZX_MAX_NUM_EXTRA_BITS		17
 79 | 
 80 | /* Maximum lengths (in bits) for length-limited Huffman code construction.  */
 81 | #define LZX_MAX_MAIN_CODEWORD_LEN	16
 82 | #define LZX_MAX_LEN_CODEWORD_LEN	16
 83 | #define LZX_MAX_PRE_CODEWORD_LEN	((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
 84 | #define LZX_MAX_ALIGNED_CODEWORD_LEN	((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
 85 | 
 86 | /* For LZX-compressed blocks in WIM resources, this value is always used as the
 87 |  * filesize parameter for the call instruction (0xe8 byte) preprocessing, even
 88 |  * though the blocks themselves are not this size, and the size of the actual
 89 |  * file resource in the WIM file is very likely to be something entirely
 90 |  * different as well.  */
 91 | #define LZX_WIM_MAGIC_FILESIZE	12000000
 92 | 
 93 | /* Assumed LZX block size when the encoded block size begins with a 0 bit.
 94 |  * This is probably WIM-specific.  */
 95 | #define LZX_DEFAULT_BLOCK_SIZE	32768
 96 | 
 97 | /* Number of offsets in the recent (or "repeat") offsets queue.  */
 98 | #define LZX_NUM_RECENT_OFFSETS	3
 99 | 
100 | /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT).  */
101 | #define LZX_OFFSET_ADJUSTMENT	(LZX_NUM_RECENT_OFFSETS - 1)
102 | 
103 | #endif /* _LZX_CONSTANTS_H */
104 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lzx_decompress.c
  3 |  *
  4 |  * A decompressor for the LZX compression format, as used in WIM files.
  5 |  */
  6 | 
  7 | /*
  8 |  * Copyright (C) 2012-2016 Eric Biggers
  9 |  *
 10 |  * This program is free software: you can redistribute it and/or modify it under
 11 |  * the terms of the GNU General Public License as published by the Free Software
 12 |  * Foundation, either version 2 of the License, or (at your option) any later
 13 |  * version.
 14 |  *
 15 |  * This program is distributed in the hope that it will be useful, but WITHOUT
 16 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 17 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 18 |  * details.
 19 |  *
 20 |  * You should have received a copy of the GNU General Public License along with
 21 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
 22 |  */
 23 | 
 24 | /*
 25 |  * LZX is an LZ77 and Huffman-code based compression format that has many
 26 |  * similarities to DEFLATE (the format used by zlib/gzip).  The compression
 27 |  * ratio is as good or better than DEFLATE.  See lzx_compress.c for a format
 28 |  * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
 29 |  * historical overview.  Here I make some pragmatic notes.
 30 |  *
 31 |  * The old specification for LZX is the document "Microsoft LZX Data Compression
 32 |  * Format" (1997).  It defines the LZX format as used in cabinet files.  Allowed
 33 |  * window sizes are 2^n where 15 <= n <= 21.  However, this document contains
 34 |  * several errors, so don't read too much into it...
 35 |  *
 36 |  * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
 37 |  * Compression and Decompression" (2014).  It defines the LZX format as used by
 38 |  * Microsoft's binary patcher.  It corrects several errors in the 1997 document
 39 |  * and extends the format in several ways --- namely, optional reference data,
 40 |  * up to 2^25 byte windows, and longer match lengths.
 41 |  *
 42 |  * WIM files use a more restricted form of LZX.  No LZX DELTA extensions are
 43 |  * present, the window is not "sliding", E8 preprocessing is done
 44 |  * unconditionally with a fixed file size, and the maximum window size is always
 45 |  * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
 46 |  * This code is primarily intended to implement this form of LZX.  But although
 47 |  * not compatible with WIMGAPI, this code also supports maximum window sizes up
 48 |  * to 2^21 bytes.
 49 |  *
 50 |  * TODO: Add support for window sizes up to 2^25 bytes.
 51 |  */
 52 | 
 53 | #ifdef HAVE_CONFIG_H
 54 | #  include "config.h"
 55 | #endif
 56 | 
 57 | #include <string.h>
 58 | 
 59 | #include "decompress_common.h"
 60 | #include "lzx_common.h"
 61 | #include "system_compression.h"
 62 | 
 63 | /* These values are chosen for fast decompression.  */
 64 | #define LZX_MAINCODE_TABLEBITS		11
 65 | #define LZX_LENCODE_TABLEBITS		9
 66 | #define LZX_PRECODE_TABLEBITS		6
 67 | #define LZX_ALIGNEDCODE_TABLEBITS	7
 68 | 
 69 | #define LZX_READ_LENS_MAX_OVERRUN 50
 70 | 
 71 | struct lzx_decompressor {
 72 | 
 73 | 	DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS,
 74 | 		     LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
 75 | 	u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
 76 | 
 77 | 	DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS,
 78 | 		     LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
 79 | 	u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
 80 | 
 81 | 	union {
 82 | 		DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS,
 83 | 			     LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
 84 | 		u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
 85 | 	};
 86 | 
 87 | 	union {
 88 | 		DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS,
 89 | 			     LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
 90 | 		u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
 91 | 		u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
 92 | 	};
 93 | 
 94 | 	union {
 95 | 		DECODE_TABLE_WORKING_SPACE(maincode_working_space,
 96 | 					   LZX_MAINCODE_MAX_NUM_SYMBOLS,
 97 | 					   LZX_MAX_MAIN_CODEWORD_LEN);
 98 | 		DECODE_TABLE_WORKING_SPACE(lencode_working_space,
 99 | 					   LZX_LENCODE_NUM_SYMBOLS,
100 | 					   LZX_MAX_LEN_CODEWORD_LEN);
101 | 		DECODE_TABLE_WORKING_SPACE(alignedcode_working_space,
102 | 					   LZX_ALIGNEDCODE_NUM_SYMBOLS,
103 | 					   LZX_MAX_ALIGNED_CODEWORD_LEN);
104 | 		DECODE_TABLE_WORKING_SPACE(precode_working_space,
105 | 					   LZX_PRECODE_NUM_SYMBOLS,
106 | 					   LZX_MAX_PRE_CODEWORD_LEN);
107 | 	};
108 | 
109 | 	unsigned window_order;
110 | 	unsigned num_main_syms;
111 | 
112 | 	/* Like lzx_extra_offset_bits[], but does not include the entropy-coded
113 | 	 * bits of aligned offset blocks */
114 | 	u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
115 | 
116 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT);
117 | 
118 | /* Read a Huffman-encoded symbol using the precode. */
119 | static forceinline unsigned
120 | read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
121 | {
122 | 	return read_huffsym(is, d->precode_decode_table,
123 | 			    LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
124 | }
125 | 
126 | /* Read a Huffman-encoded symbol using the main code. */
127 | static forceinline unsigned
128 | read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
129 | {
130 | 	return read_huffsym(is, d->maincode_decode_table,
131 | 			    LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
132 | }
133 | 
134 | /* Read a Huffman-encoded symbol using the length code. */
135 | static forceinline unsigned
136 | read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
137 | {
138 | 	return read_huffsym(is, d->lencode_decode_table,
139 | 			    LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
140 | }
141 | 
142 | /* Read a Huffman-encoded symbol using the aligned offset code. */
143 | static forceinline unsigned
144 | read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
145 | {
146 | 	return read_huffsym(is, d->alignedcode_decode_table,
147 | 			    LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
148 | }
149 | 
150 | /*
151 |  * Read a precode from the compressed input bitstream, then use it to decode
152 |  * @num_lens codeword length values and write them to @lens.
153 |  */
154 | static int
155 | lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
156 | 		       u8 *lens, unsigned num_lens)
157 | {
158 | 	u8 *len_ptr = lens;
159 | 	u8 *lens_end = lens + num_lens;
160 | 
161 | 	/* Read the lengths of the precode codewords.  These are stored
162 | 	 * explicitly. */
163 | 	for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
164 | 		d->precode_lens[i] =
165 | 			bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
166 | 	}
167 | 
168 | 	/* Build the decoding table for the precode. */
169 | 	if (make_huffman_decode_table(d->precode_decode_table,
170 | 				      LZX_PRECODE_NUM_SYMBOLS,
171 | 				      LZX_PRECODE_TABLEBITS,
172 | 				      d->precode_lens,
173 | 				      LZX_MAX_PRE_CODEWORD_LEN,
174 | 				      d->precode_working_space))
175 | 		return -1;
176 | 
177 | 	/* Decode the codeword lengths.  */
178 | 	do {
179 | 		unsigned presym;
180 | 		u8 len;
181 | 
182 | 		/* Read the next precode symbol.  */
183 | 		presym = read_presym(d, is);
184 | 		if (presym < 17) {
185 | 			/* Difference from old length  */
186 | 			len = *len_ptr - presym;
187 | 			if ((int8_t)len < 0)
188 | 				len += 17;
189 | 			*len_ptr++ = len;
190 | 		} else {
191 | 			/* Special RLE values  */
192 | 
193 | 			unsigned run_len;
194 | 
195 | 			if (presym == 17) {
196 | 				/* Run of 0's  */
197 | 				run_len = 4 + bitstream_read_bits(is, 4);
198 | 				len = 0;
199 | 			} else if (presym == 18) {
200 | 				/* Longer run of 0's  */
201 | 				run_len = 20 + bitstream_read_bits(is, 5);
202 | 				len = 0;
203 | 			} else {
204 | 				/* Run of identical lengths  */
205 | 				run_len = 4 + bitstream_read_bits(is, 1);
206 | 				presym = read_presym(d, is);
207 | 				if (unlikely(presym > 17))
208 | 					return -1;
209 | 				len = *len_ptr - presym;
210 | 				if ((int8_t)len < 0)
211 | 					len += 17;
212 | 			}
213 | 
214 | 			do {
215 | 				*len_ptr++ = len;
216 | 			} while (--run_len);
217 | 			/*
218 | 			 * The worst case overrun is when presym == 18,
219 | 			 * run_len == 20 + 31, and only 1 length was remaining.
220 | 			 * So LZX_READ_LENS_MAX_OVERRUN == 50.
221 | 			 *
222 | 			 * Overrun while reading the first half of maincode_lens
223 | 			 * can corrupt the previous values in the second half.
224 | 			 * This doesn't really matter because the resulting
225 | 			 * lengths will still be in range, and data that
226 | 			 * generates overruns is invalid anyway.
227 | 			 */
228 | 		}
229 | 	} while (len_ptr < lens_end);
230 | 
231 | 	return 0;
232 | }
233 | 
234 | /*
235 |  * Read the header of an LZX block.  For all block types, the block type and
236 |  * size is saved in *block_type_ret and *block_size_ret, respectively.  For
237 |  * compressed blocks, the codeword lengths are also saved.  For uncompressed
238 |  * blocks, the recent offsets queue is also updated.
239 |  */
240 | static int
241 | lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
242 | 		      u32 recent_offsets[], int *block_type_ret,
243 | 		      u32 *block_size_ret)
244 | {
245 | 	int block_type;
246 | 	u32 block_size;
247 | 
248 | 	bitstream_ensure_bits(is, 4);
249 | 
250 | 	/* Read the block type. */
251 | 	block_type = bitstream_pop_bits(is, 3);
252 | 
253 | 	/* Read the block size. */
254 | 	if (bitstream_pop_bits(is, 1)) {
255 | 		block_size = LZX_DEFAULT_BLOCK_SIZE;
256 | 	} else {
257 | 		block_size = bitstream_read_bits(is, 16);
258 | 		if (d->window_order >= 16) {
259 | 			block_size <<= 8;
260 | 			block_size |= bitstream_read_bits(is, 8);
261 | 		}
262 | 	}
263 | 
264 | 	switch (block_type) {
265 | 
266 | 	case LZX_BLOCKTYPE_ALIGNED:
267 | 
268 | 		/* Read the aligned offset codeword lengths. */
269 | 
270 | 		for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
271 | 			d->alignedcode_lens[i] =
272 | 				bitstream_read_bits(is,
273 | 						    LZX_ALIGNEDCODE_ELEMENT_SIZE);
274 | 		}
275 | 
276 | 		/* Fall though, since the rest of the header for aligned offset
277 | 		 * blocks is the same as that for verbatim blocks.  */
278 | 
279 | 	case LZX_BLOCKTYPE_VERBATIM:
280 | 
281 | 		/* Read the main codeword lengths, which are divided into two
282 | 		 * parts: literal symbols and match headers. */
283 | 
284 | 		if (lzx_read_codeword_lens(d, is, d->maincode_lens,
285 | 					   LZX_NUM_CHARS))
286 | 			return -1;
287 | 
288 | 		if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
289 | 					   d->num_main_syms - LZX_NUM_CHARS))
290 | 			return -1;
291 | 
292 | 
293 | 		/* Read the length codeword lengths. */
294 | 
295 | 		if (lzx_read_codeword_lens(d, is, d->lencode_lens,
296 | 					   LZX_LENCODE_NUM_SYMBOLS))
297 | 			return -1;
298 | 
299 | 		break;
300 | 
301 | 	case LZX_BLOCKTYPE_UNCOMPRESSED:
302 | 		/*
303 | 		 * The header of an uncompressed block contains new values for
304 | 		 * the recent offsets queue, starting on the next 16-bit
305 | 		 * boundary in the bitstream.  Careful: if the stream is
306 | 		 * *already* aligned, the correct thing to do is to throw away
307 | 		 * the next 16 bits (this is probably a mistake in the format).
308 | 		 */
309 | 		bitstream_ensure_bits(is, 1);
310 | 		bitstream_align(is);
311 | 		recent_offsets[0] = bitstream_read_u32(is);
312 | 		recent_offsets[1] = bitstream_read_u32(is);
313 | 		recent_offsets[2] = bitstream_read_u32(is);
314 | 
315 | 		/* Offsets of 0 are invalid.  */
316 | 		if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
317 | 		    recent_offsets[2] == 0)
318 | 			return -1;
319 | 		break;
320 | 
321 | 	default:
322 | 		/* Unrecognized block type.  */
323 | 		return -1;
324 | 	}
325 | 
326 | 	*block_type_ret = block_type;
327 | 	*block_size_ret = block_size;
328 | 	return 0;
329 | }
330 | 
331 | /* Decompress a block of LZX-compressed data. */
332 | static int
333 | lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
334 | 		     int block_type, u32 block_size,
335 | 		     u8 * const out_begin, u8 *out_next, u32 recent_offsets[])
336 | {
337 | 	u8 * const block_end = out_next + block_size;
338 | 	unsigned min_aligned_offset_slot;
339 | 
340 | 	/*
341 | 	 * Build the Huffman decode tables.  We always need to build the main
342 | 	 * and length decode tables.  For aligned blocks we additionally need to
343 | 	 * build the aligned offset decode table.
344 | 	 */
345 | 
346 | 	if (make_huffman_decode_table(d->maincode_decode_table,
347 | 				      d->num_main_syms,
348 | 				      LZX_MAINCODE_TABLEBITS,
349 | 				      d->maincode_lens,
350 | 				      LZX_MAX_MAIN_CODEWORD_LEN,
351 | 				      d->maincode_working_space))
352 | 		return -1;
353 | 
354 | 	if (make_huffman_decode_table(d->lencode_decode_table,
355 | 				      LZX_LENCODE_NUM_SYMBOLS,
356 | 				      LZX_LENCODE_TABLEBITS,
357 | 				      d->lencode_lens,
358 | 				      LZX_MAX_LEN_CODEWORD_LEN,
359 | 				      d->lencode_working_space))
360 | 		return -1;
361 | 
362 | 	if (block_type == LZX_BLOCKTYPE_ALIGNED) {
363 | 		if (make_huffman_decode_table(d->alignedcode_decode_table,
364 | 					      LZX_ALIGNEDCODE_NUM_SYMBOLS,
365 | 					      LZX_ALIGNEDCODE_TABLEBITS,
366 | 					      d->alignedcode_lens,
367 | 					      LZX_MAX_ALIGNED_CODEWORD_LEN,
368 | 					      d->alignedcode_working_space))
369 | 			return -1;
370 | 		min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
371 | 		memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
372 | 		       sizeof(lzx_extra_offset_bits));
373 | 	} else {
374 | 		min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
375 | 		memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
376 | 		       sizeof(lzx_extra_offset_bits));
377 | 	}
378 | 
379 | 	/* Decode the literals and matches. */
380 | 
381 | 	do {
382 | 		unsigned mainsym;
383 | 		unsigned length;
384 | 		u32 offset;
385 | 		unsigned offset_slot;
386 | 
387 | 		mainsym = read_mainsym(d, is);
388 | 		if (mainsym < LZX_NUM_CHARS) {
389 | 			/* Literal */
390 | 			*out_next++ = mainsym;
391 | 			continue;
392 | 		}
393 | 
394 | 		/* Match */
395 | 
396 | 		/* Decode the length header and offset slot.  */
397 | 		STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
398 | 		length = mainsym % LZX_NUM_LEN_HEADERS;
399 | 		offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
400 | 
401 | 		/* If needed, read a length symbol to decode the full length. */
402 | 		if (length == LZX_NUM_PRIMARY_LENS)
403 | 			length += read_lensym(d, is);
404 | 		length += LZX_MIN_MATCH_LEN;
405 | 
406 | 		if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
407 | 			/* Repeat offset  */
408 | 
409 | 			/* Note: This isn't a real LRU queue, since using the R2
410 | 			 * offset doesn't bump the R1 offset down to R2. */
411 | 			offset = recent_offsets[offset_slot];
412 | 			recent_offsets[offset_slot] = recent_offsets[0];
413 | 		} else {
414 | 			/* Explicit offset  */
415 | 			offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
416 | 			if (offset_slot >= min_aligned_offset_slot) {
417 | 				offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
418 | 					 read_alignedsym(d, is);
419 | 			}
420 | 			offset += lzx_offset_slot_base[offset_slot];
421 | 
422 | 			/* Update the match offset LRU queue.  */
423 | 			STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
424 | 			recent_offsets[2] = recent_offsets[1];
425 | 			recent_offsets[1] = recent_offsets[0];
426 | 		}
427 | 		recent_offsets[0] = offset;
428 | 
429 | 		/* Validate the match and copy it to the current position.  */
430 | 		if (unlikely(lz_copy(length, offset, out_begin,
431 | 				     out_next, block_end, LZX_MIN_MATCH_LEN)))
432 | 			return -1;
433 | 		out_next += length;
434 | 	} while (out_next != block_end);
435 | 
436 | 	return 0;
437 | }
438 | 
439 | int
440 | lzx_decompress(struct lzx_decompressor *d,
441 | 	       const void *compressed_data, size_t compressed_size,
442 | 	       void *uncompressed_data, size_t uncompressed_size)
443 | {
444 | 	u8 * const out_begin = uncompressed_data;
445 | 	u8 *out_next = out_begin;
446 | 	u8 * const out_end = out_begin + uncompressed_size;
447 | 	struct input_bitstream is;
448 | 	STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
449 | 	u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
450 | 	unsigned may_have_e8_byte = 0;
451 | 
452 | 	init_input_bitstream(&is, compressed_data, compressed_size);
453 | 
454 | 	/* Codeword lengths begin as all 0's for delta encoding purposes. */
455 | 	memset(d->maincode_lens, 0, d->num_main_syms);
456 | 	memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
457 | 
458 | 	/* Decompress blocks until we have all the uncompressed data. */
459 | 
460 | 	while (out_next != out_end) {
461 | 		int block_type;
462 | 		u32 block_size;
463 | 
464 | 		if (lzx_read_block_header(d, &is, recent_offsets,
465 | 					  &block_type, &block_size))
466 | 			return -1;
467 | 
468 | 		if (block_size < 1 || block_size > out_end - out_next)
469 | 			return -1;
470 | 
471 | 		if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
472 | 
473 | 			/* Compressed block */
474 | 			if (lzx_decompress_block(d, &is, block_type, block_size,
475 | 						 out_begin, out_next,
476 | 						 recent_offsets))
477 | 				return -1;
478 | 
479 | 			/* If the first E8 byte was in this block, then it must
480 | 			 * have been encoded as a literal using mainsym E8. */
481 | 			may_have_e8_byte |= d->maincode_lens[0xE8];
482 | 		} else {
483 | 
484 | 			/* Uncompressed block */
485 | 			if (bitstream_read_bytes(&is, out_next, block_size))
486 | 				return -1;
487 | 
488 | 			/* Re-align the bitstream if needed. */
489 | 			if (block_size & 1)
490 | 				bitstream_read_byte(&is);
491 | 
492 | 			/* There may have been an E8 byte in the block. */
493 | 			may_have_e8_byte = 1;
494 | 		}
495 | 		out_next += block_size;
496 | 	}
497 | 
498 | 	/* Postprocess the data unless it cannot possibly contain E8 bytes. */
499 | 	if (may_have_e8_byte)
500 | 		lzx_postprocess(uncompressed_data, uncompressed_size);
501 | 
502 | 	return 0;
503 | }
504 | 
505 | struct lzx_decompressor *
506 | lzx_allocate_decompressor(size_t max_block_size)
507 | {
508 | 	unsigned window_order;
509 | 	struct lzx_decompressor *d;
510 | 
511 | 	window_order = lzx_get_window_order(max_block_size);
512 | 	if (window_order == 0) {
513 | 		errno = EINVAL;
514 | 		return NULL;
515 | 	}
516 | 
517 | 	d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT);
518 | 	if (!d)
519 | 		return NULL;
520 | 
521 | 	d->window_order = window_order;
522 | 	d->num_main_syms = lzx_get_num_main_syms(window_order);
523 | 
524 | 	/* Initialize 'd->extra_offset_bits_minus_aligned'. */
525 | 	STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
526 | 		      sizeof(lzx_extra_offset_bits));
527 | 	STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
528 | 		      sizeof(lzx_extra_offset_bits));
529 | 	memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
530 | 	       sizeof(lzx_extra_offset_bits));
531 | 	for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
532 | 	     offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++)
533 | 	{
534 | 		d->extra_offset_bits_minus_aligned[offset_slot] -=
535 | 				LZX_NUM_ALIGNED_OFFSET_BITS;
536 | 	}
537 | 
538 | 	return d;
539 | }
540 | 
541 | void
542 | lzx_free_decompressor(struct lzx_decompressor *d)
543 | {
544 | 	aligned_free(d);
545 | }
546 | 


--------------------------------------------------------------------------------
/src/ebiggers/system_compression.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * system_compression.h - declarations for accessing System Compressed files
 3 |  *
 4 |  * Copyright (C) 2015 Eric Biggers
 5 |  *
 6 |  * This program is free software: you can redistribute it and/or modify it under
 7 |  * the terms of the GNU General Public License as published by the Free Software
 8 |  * Foundation, either version 2 of the License, or (at your option) any later
 9 |  * version.
10 |  *
11 |  * This program is distributed in the hope that it will be useful, but WITHOUT
12 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
14 |  * details.
15 |  *
16 |  * You should have received a copy of the GNU General Public License along with
17 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
18 |  */
19 | 
20 | #pragma once
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | #include <stddef.h>
27 | #include <sys/types.h>
28 | 
29 | /* System compressed file access  */
30 | 
31 | struct ntfs_system_decompression_ctx;
32 | 
33 | extern void
34 | ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx);
35 | 
36 | /* XPRESS decompression  */
37 | 
38 | struct xpress_decompressor;
39 | 
40 | extern struct xpress_decompressor *xpress_allocate_decompressor(void);
41 | 
42 | extern int xpress_decompress(struct xpress_decompressor *decompressor,
43 | 		      const void *compressed_data, size_t compressed_size,
44 | 		      void *uncompressed_data, size_t uncompressed_size);
45 | 
46 | extern void xpress_free_decompressor(struct xpress_decompressor *decompressor);
47 | 
48 | /* LZX decompression  */
49 | 
50 | struct lzx_decompressor;
51 | 
52 | extern struct lzx_decompressor *
53 | lzx_allocate_decompressor(size_t max_block_size);
54 | 
55 | extern int lzx_decompress(struct lzx_decompressor *decompressor,
56 | 			  const void *compressed_data, size_t compressed_size,
57 | 			  void *uncompressed_data, size_t uncompressed_size);
58 | 
59 | extern void lzx_free_decompressor(struct lzx_decompressor *decompressor);
60 | 
61 | #ifdef __cplusplus
62 | }
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/ebiggers/xpress_constants.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * xpress_constants.h
 3 |  *
 4 |  * Constants for the XPRESS compression format.
 5 |  */
 6 | 
 7 | #ifndef _XPRESS_CONSTANTS_H
 8 | #define _XPRESS_CONSTANTS_H
 9 | 
10 | #define XPRESS_NUM_CHARS	256
11 | #define XPRESS_NUM_SYMBOLS	512
12 | #define XPRESS_MAX_CODEWORD_LEN	15
13 | 
14 | #define XPRESS_END_OF_DATA	256
15 | 
16 | #define XPRESS_MIN_OFFSET	1
17 | #define XPRESS_MAX_OFFSET	65535
18 | 
19 | #define XPRESS_MIN_MATCH_LEN	3
20 | #define XPRESS_MAX_MATCH_LEN	65538
21 | 
22 | #endif /* _XPRESS_CONSTANTS_H */
23 | 


--------------------------------------------------------------------------------
/src/ebiggers/xpress_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * xpress_decompress.c
  3 |  *
  4 |  * A decompressor for the XPRESS compression format (Huffman variant).
  5 |  */
  6 | 
  7 | /*
  8 |  *
  9 |  * Copyright (C) 2012-2016 Eric Biggers
 10 |  *
 11 |  * This program is free software: you can redistribute it and/or modify it under
 12 |  * the terms of the GNU General Public License as published by the Free Software
 13 |  * Foundation, either version 2 of the License, or (at your option) any later
 14 |  * version.
 15 |  *
 16 |  * This program is distributed in the hope that it will be useful, but WITHOUT
 17 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 18 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 19 |  * details.
 20 |  *
 21 |  * You should have received a copy of the GNU General Public License along with
 22 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |  */
 24 | 
 25 | 
 26 | /*
 27 |  * The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
 28 |  * That means it is fairly similar to LZX compression, but XPRESS is simpler, so
 29 |  * it is a little faster to compress and decompress.
 30 |  *
 31 |  * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
 32 |  * Xpress Compression Algorithm".  In the MSDN library, it can currently be
 33 |  * found under Open Specifications => Protocols => Windows Protocols => Windows
 34 |  * Server Protocols => [MS-XCA] Xpress Compression Algorithm".  The format in
 35 |  * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
 36 |  * (there apparently are some other versions of XPRESS as well).
 37 |  *
 38 |  * If you are already familiar with the LZ77 algorithm and Huffman coding, the
 39 |  * XPRESS format is fairly simple.  The compressed data begins with 256 bytes
 40 |  * that contain 512 4-bit integers that are the lengths of the symbols in the
 41 |  * Huffman code used for match/literal headers.  In contrast with more
 42 |  * complicated formats such as DEFLATE and LZX, this is the only Huffman code
 43 |  * that is used for the entirety of the XPRESS compressed data, and the codeword
 44 |  * lengths are not encoded with a pretree.
 45 |  *
 46 |  * The rest of the compressed data is Huffman-encoded symbols.  Values 0 through
 47 |  * 255 represent the corresponding literal bytes.  Values 256 through 511
 48 |  * represent matches and may require extra bits or bytes to be read to get the
 49 |  * match offset and match length.
 50 |  *
 51 |  * The trickiest part is probably the way in which literal bytes for match
 52 |  * lengths are interleaved in the bitstream.
 53 |  *
 54 |  * Also, a caveat--- according to Microsoft's documentation for XPRESS,
 55 |  *
 56 |  *	"Some implementation of the decompression algorithm expect an extra
 57 |  *	symbol to mark the end of the data.  Specifically, some implementations
 58 |  *	fail during decompression if the Huffman symbol 256 is not found after
 59 |  *	the actual data."
 60 |  *
 61 |  * This is the case with Microsoft's implementation in WIMGAPI, for example.  So
 62 |  * although our implementation doesn't currently check for this extra symbol,
 63 |  * compressors would be wise to add it.
 64 |  */
 65 | 
 66 | #ifdef HAVE_CONFIG_H
 67 | #  include "config.h"
 68 | #endif
 69 | 
 70 | #include "decompress_common.h"
 71 | #include "system_compression.h"
 72 | #include "xpress_constants.h"
 73 | 
 74 | /* This value is chosen for fast decompression.  */
 75 | #define XPRESS_TABLEBITS 11
 76 | 
 77 | struct xpress_decompressor {
 78 | 	union {
 79 | 		DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS,
 80 | 			     XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
 81 | 		u8 lens[XPRESS_NUM_SYMBOLS];
 82 | 	};
 83 | 	DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS,
 84 | 				   XPRESS_MAX_CODEWORD_LEN);
 85 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 86 | 
 87 | int
 88 | xpress_decompress(struct xpress_decompressor * d,
 89 |                  const void *compressed_data, size_t compressed_size,
 90 |                  void *uncompressed_data, size_t uncompressed_size)
 91 | {
 92 | 	const u8 * const in_begin = compressed_data;
 93 | 	u8 * const out_begin = uncompressed_data;
 94 | 	u8 *out_next = out_begin;
 95 | 	u8 * const out_end = out_begin + uncompressed_size;
 96 | 	struct input_bitstream is;
 97 | 
 98 | 	/* Read the Huffman codeword lengths.  */
 99 | 	if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
100 | 		return -1;
101 | 	for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
102 | 		d->lens[2 * i + 0] = in_begin[i] & 0xf;
103 | 		d->lens[2 * i + 1] = in_begin[i] >> 4;
104 | 	}
105 | 
106 | 	/* Build a decoding table for the Huffman code.  */
107 | 	if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
108 | 				      XPRESS_TABLEBITS, d->lens,
109 | 				      XPRESS_MAX_CODEWORD_LEN,
110 | 				      d->working_space))
111 | 		return -1;
112 | 
113 | 	/* Decode the matches and literals.  */
114 | 
115 | 	init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
116 | 			     compressed_size - XPRESS_NUM_SYMBOLS / 2);
117 | 
118 | 	while (out_next != out_end) {
119 | 		unsigned sym;
120 | 		unsigned log2_offset;
121 | 		u32 length;
122 | 		u32 offset;
123 | 
124 | 		sym = read_huffsym(&is, d->decode_table,
125 | 				   XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
126 | 		if (sym < XPRESS_NUM_CHARS) {
127 | 			/* Literal  */
128 | 			*out_next++ = sym;
129 | 		} else {
130 | 			/* Match  */
131 | 			length = sym & 0xf;
132 | 			log2_offset = (sym >> 4) & 0xf;
133 | 
134 | 			bitstream_ensure_bits(&is, 16);
135 | 
136 | 			offset = ((u32)1 << log2_offset) |
137 | 				 bitstream_pop_bits(&is, log2_offset);
138 | 
139 | 			if (length == 0xf) {
140 | 				length += bitstream_read_byte(&is);
141 | 				if (length == 0xf + 0xff)
142 | 					length = bitstream_read_u16(&is);
143 | 			}
144 | 			length += XPRESS_MIN_MATCH_LEN;
145 | 
146 | 			if (unlikely(lz_copy(length, offset,
147 | 					     out_begin, out_next, out_end,
148 | 					     XPRESS_MIN_MATCH_LEN)))
149 | 				return -1;
150 | 
151 | 			out_next += length;
152 | 		}
153 | 	}
154 | 	return 0;
155 | }
156 | 
157 | struct xpress_decompressor *
158 | xpress_allocate_decompressor(void)
159 | {
160 | 	return aligned_malloc(sizeof(struct xpress_decompressor),
161 | 			      DECODE_TABLE_ALIGNMENT);
162 | }
163 | 
164 | void
165 | xpress_free_decompressor(struct xpress_decompressor *d)
166 | {
167 | 	aligned_free(d);
168 | }
169 | 


--------------------------------------------------------------------------------
/src/ntfs.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2020
  2 |  *
  3 |  * This file is part of ntfs2btrfs.
  4 |  *
  5 |  * Ntfs2btrfs is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * Ntfs2btrfs is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with Ntfs2btrfs. If not, see <https://www.gnu.org/licenses/>. */
 17 | 
 18 | #pragma once
 19 | 
 20 | #include "ntfs2btrfs.h"
 21 | #include <stdint.h>
 22 | #include <vector>
 23 | #include <string>
 24 | #include <list>
 25 | #include <functional>
 26 | #include <memory>
 27 | 
 28 | #ifdef _WIN32
 29 | #include <windows.h>
 30 | #else
 31 | #include <unistd.h>
 32 | #endif
 33 | 
 34 | #pragma pack(push,1)
 35 | 
 36 | typedef struct {
 37 |     uint8_t Jmp[3];
 38 |     uint8_t FsName[8];
 39 |     uint16_t BytesPerSector;
 40 |     uint8_t SectorsPerCluster;
 41 |     uint16_t ReservedSectors;
 42 |     uint8_t Unused1[5];
 43 |     uint8_t Media;
 44 |     uint8_t Unused2[2];
 45 |     uint16_t SectorsPerTrack;
 46 |     uint16_t Heads;
 47 |     uint32_t HiddenSectors;
 48 |     uint32_t Unused3;
 49 |     uint32_t Unknown;
 50 |     uint64_t TotalSectors;
 51 |     uint64_t MFT;
 52 |     uint64_t MFTMirr;
 53 |     int8_t ClustersPerMFTRecord;
 54 |     uint8_t Padding1[3];
 55 |     int8_t ClustersPerIndexRecord;
 56 |     uint8_t Padding2[3];
 57 |     uint64_t SerialNumber;
 58 |     uint32_t Checksum;
 59 | } NTFS_BOOT_SECTOR;
 60 | 
 61 | #define NTFS_FS_NAME "NTFS    "
 62 | 
 63 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header
 64 | #define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff
 65 | #define ATTRIBUTE_FLAG_SPARSE 0x8000
 66 | #define ATTRIBUTE_FLAG_ENCRYPTED 0x4000
 67 | 
 68 | enum class NTFS_ATTRIBUTE_FORM : uint8_t {
 69 |     RESIDENT_FORM = 0,
 70 |     NONRESIDENT_FORM = 1
 71 | };
 72 | 
 73 | enum class ntfs_attribute : uint32_t {
 74 |     STANDARD_INFORMATION = 0x10,
 75 |     ATTRIBUTE_LIST = 0x20,
 76 |     FILE_NAME = 0x30,
 77 |     VOLUME_VERSION = 0x40,
 78 |     SECURITY_DESCRIPTOR = 0x50,
 79 |     VOLUME_NAME = 0x60,
 80 |     VOLUME_INFORMATION = 0x70,
 81 |     DATA = 0x80,
 82 |     INDEX_ROOT = 0x90,
 83 |     INDEX_ALLOCATION = 0xA0,
 84 |     BITMAP = 0xB0,
 85 |     REPARSE_POINT = 0xC0,
 86 |     EA_INFORMATION = 0xD0,
 87 |     EA = 0xE0,
 88 |     PROPERTY_SET = 0xF0,
 89 |     LOGGED_UTILITY_STREAM = 0x100,
 90 | };
 91 | 
 92 | template<>
 93 | struct fmt::formatter<enum ntfs_attribute> {
 94 |     constexpr auto parse(format_parse_context& ctx) {
 95 |         auto it = ctx.begin();
 96 | 
 97 |         if (it != ctx.end() && *it != '}')
 98 |             throw format_error("invalid format");
 99 | 
100 |         return it;
101 |     }
102 | 
103 |     template<typename format_context>
104 |     auto format(enum ntfs_attribute att, format_context& ctx) const {
105 |         switch (att) {
106 |             case ntfs_attribute::STANDARD_INFORMATION:
107 |                 return fmt::format_to(ctx.out(), "STANDARD_INFORMATION");
108 | 
109 |             case ntfs_attribute::ATTRIBUTE_LIST:
110 |                 return fmt::format_to(ctx.out(), "ATTRIBUTE_LIST");
111 | 
112 |             case ntfs_attribute::FILE_NAME:
113 |                 return fmt::format_to(ctx.out(), "FILE_NAME");
114 | 
115 |             case ntfs_attribute::VOLUME_VERSION:
116 |                 return fmt::format_to(ctx.out(), "VOLUME_VERSION");
117 | 
118 |             case ntfs_attribute::SECURITY_DESCRIPTOR:
119 |                 return fmt::format_to(ctx.out(), "SECURITY_DESCRIPTOR");
120 | 
121 |             case ntfs_attribute::VOLUME_NAME:
122 |                 return fmt::format_to(ctx.out(), "VOLUME_NAME");
123 | 
124 |             case ntfs_attribute::VOLUME_INFORMATION:
125 |                 return fmt::format_to(ctx.out(), "VOLUME_INFORMATION");
126 | 
127 |             case ntfs_attribute::DATA:
128 |                 return fmt::format_to(ctx.out(), "DATA");
129 | 
130 |             case ntfs_attribute::INDEX_ROOT:
131 |                 return fmt::format_to(ctx.out(), "INDEX_ROOT");
132 | 
133 |             case ntfs_attribute::INDEX_ALLOCATION:
134 |                 return fmt::format_to(ctx.out(), "INDEX_ALLOCATION");
135 | 
136 |             case ntfs_attribute::BITMAP:
137 |                 return fmt::format_to(ctx.out(), "BITMAP");
138 | 
139 |             case ntfs_attribute::REPARSE_POINT:
140 |                 return fmt::format_to(ctx.out(), "REPARSE_POINT");
141 | 
142 |             case ntfs_attribute::EA_INFORMATION:
143 |                 return fmt::format_to(ctx.out(), "EA_INFORMATION");
144 | 
145 |             case ntfs_attribute::EA:
146 |                 return fmt::format_to(ctx.out(), "EA");
147 | 
148 |             case ntfs_attribute::PROPERTY_SET:
149 |                 return fmt::format_to(ctx.out(), "PROPERTY_SET");
150 | 
151 |             case ntfs_attribute::LOGGED_UTILITY_STREAM:
152 |                 return fmt::format_to(ctx.out(), "LOGGED_UTILITY_STREAM");
153 | 
154 |             default:
155 |                 return fmt::format_to(ctx.out(), "{:x}", (uint32_t)att);
156 |         }
157 |     }
158 | };
159 | 
160 | typedef struct _ATTRIBUTE_RECORD_HEADER {
161 |     enum ntfs_attribute TypeCode;
162 |     uint16_t RecordLength;
163 |     uint16_t Unknown;
164 |     NTFS_ATTRIBUTE_FORM FormCode;
165 |     uint8_t NameLength;
166 |     uint16_t NameOffset;
167 |     uint16_t Flags;
168 |     uint16_t Instance;
169 |     union {
170 |         struct {
171 |             uint32_t ValueLength;
172 |             uint16_t ValueOffset;
173 |             uint8_t Reserved[2];
174 |         } Resident;
175 |         struct {
176 |             uint64_t LowestVcn;
177 |             uint64_t HighestVcn;
178 |             uint16_t MappingPairsOffset;
179 |             uint16_t CompressionUnit;
180 |             uint32_t Padding;
181 |             uint64_t AllocatedLength;
182 |             uint64_t FileSize;
183 |             uint64_t ValidDataLength;
184 |             uint64_t TotalAllocated;
185 |         } Nonresident;
186 |     } Form;
187 | } ATTRIBUTE_RECORD_HEADER;
188 | 
189 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header
190 | typedef struct {
191 |     uint32_t Signature;
192 |     uint16_t UpdateSequenceArrayOffset;
193 |     uint16_t UpdateSequenceArraySize;
194 | } MULTI_SECTOR_HEADER;
195 | 
196 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference
197 | typedef struct {
198 |     uint64_t SegmentNumber : 48;
199 |     uint64_t SequenceNumber : 16;
200 | } MFT_SEGMENT_REFERENCE;
201 | 
202 | // based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and
203 | // http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html
204 | typedef struct {
205 |     MULTI_SECTOR_HEADER MultiSectorHeader;
206 |     uint64_t LogFileSequenceNumber;
207 |     uint16_t SequenceNumber;
208 |     uint16_t HardLinkCount;
209 |     uint16_t FirstAttributeOffset;
210 |     uint16_t Flags;
211 |     uint32_t EntryUsedSize;
212 |     uint32_t EntryAllocatedSize;
213 |     MFT_SEGMENT_REFERENCE BaseFileRecordSegment;
214 |     uint16_t NextAttributeID;
215 | } FILE_RECORD_SEGMENT_HEADER;
216 | 
217 | #define FILE_RECORD_SEGMENT_IN_USE      1
218 | #define FILE_RECORD_IS_DIRECTORY        2
219 | 
220 | static const uint32_t NTFS_FILE_SIGNATURE = 0x454c4946; // "FILE"
221 | 
222 | #define NTFS_VOLUME_INODE       3
223 | #define NTFS_ROOT_DIR_INODE     5
224 | #define NTFS_BITMAP_INODE       6
225 | #define NTFS_SECURE_INODE       9
226 | 
227 | // https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html
228 | 
229 | typedef struct {
230 |     int64_t CreationTime;
231 |     int64_t LastAccessTime;
232 |     int64_t LastWriteTime;
233 |     int64_t ChangeTime;
234 |     uint32_t FileAttributes;
235 |     uint32_t MaximumVersions;
236 |     uint32_t VersionNumber;
237 |     uint32_t ClassId;
238 |     uint32_t OwnerId;
239 |     uint32_t SecurityId;
240 |     uint64_t QuotaCharged;
241 |     uint64_t USN;
242 | } STANDARD_INFORMATION;
243 | 
244 | #define FILE_ATTRIBUTE_READONLY             0x00000001
245 | #define FILE_ATTRIBUTE_HIDDEN               0x00000002
246 | #define FILE_ATTRIBUTE_SYSTEM               0x00000004
247 | #define FILE_ATTRIBUTE_DIRECTORY            0x00000010
248 | #define FILE_ATTRIBUTE_ARCHIVE              0x00000020
249 | #define FILE_ATTRIBUTE_DEVICE               0x00000040
250 | #define FILE_ATTRIBUTE_NORMAL               0x00000080
251 | #define FILE_ATTRIBUTE_TEMPORARY            0x00000100
252 | #define FILE_ATTRIBUTE_SPARSE_FILE          0x00000200
253 | #define FILE_ATTRIBUTE_REPARSE_POINT        0x00000400
254 | #define FILE_ATTRIBUTE_COMPRESSED           0x00000800
255 | #define FILE_ATTRIBUTE_OFFLINE              0x00001000
256 | #define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED  0x00002000
257 | #define FILE_ATTRIBUTE_ENCRYPTED            0x00004000
258 | #define FILE_ATTRIBUTE_VIRTUAL              0x00010000
259 | 
260 | #define FILE_ATTRIBUTE_DIRECTORY_MFT        0x10000000
261 | 
262 | // https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html
263 | 
264 | enum class file_name_type : uint8_t {
265 |     POSIX = 0,
266 |     WINDOWS = 1,
267 |     DOS = 2,
268 |     WINDOWS_AND_DOS = 3
269 | };
270 | 
271 | typedef struct {
272 |     MFT_SEGMENT_REFERENCE Parent;
273 |     int64_t CreationTime;
274 |     int64_t LastAccessTime;
275 |     int64_t LastWriteTime;
276 |     int64_t ChangeTime;
277 |     uint64_t AllocationSize;
278 |     uint64_t EndOfFile;
279 |     uint32_t FileAttributes;
280 |     uint32_t EaSize;
281 |     uint8_t FileNameLength;
282 |     file_name_type Namespace;
283 |     char16_t FileName[1];
284 | } FILE_NAME;
285 | 
286 | // https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html
287 | 
288 | typedef struct {
289 |     uint32_t first_entry;
290 |     uint32_t total_size;
291 |     uint32_t allocated_size;
292 |     uint32_t flags;
293 | } index_node_header;
294 | 
295 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html
296 | 
297 | #define INDEX_ENTRY_SUBNODE     1
298 | #define INDEX_ENTRY_LAST        2
299 | 
300 | typedef struct {
301 |     MFT_SEGMENT_REFERENCE file_reference;
302 |     uint16_t entry_length;
303 |     uint16_t stream_length;
304 |     uint32_t flags;
305 | } index_entry;
306 | 
307 | // https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html
308 | 
309 | typedef struct {
310 |     uint32_t attribute_type;
311 |     uint32_t collation_rule;
312 |     uint32_t bytes_per_index_record;
313 |     uint8_t clusters_per_index_record;
314 |     uint8_t padding[3];
315 |     index_node_header node_header;
316 |     index_entry entries[1];
317 | } index_root;
318 | 
319 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html
320 | 
321 | typedef struct {
322 |     MULTI_SECTOR_HEADER MultiSectorHeader;
323 |     uint64_t sequence_number;
324 |     uint64_t vcn;
325 |     index_node_header header;
326 |     uint16_t update_sequence;
327 | } index_record;
328 | 
329 | #define INDEX_RECORD_MAGIC 0x58444e49 // "INDX"
330 | 
331 | // https://flatcap.org/linux-ntfs/ntfs/files/secure.html
332 | 
333 | typedef struct {
334 |     uint32_t hash;
335 |     uint32_t id;
336 |     uint64_t offset;
337 |     uint32_t length;
338 | } sd_entry;
339 | 
340 | // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/ns-ntifs-_reparse_data_buffer
341 | 
342 | typedef struct {
343 |     uint32_t ReparseTag;
344 |     uint16_t ReparseDataLength;
345 |     uint16_t Reserved;
346 | 
347 |     union {
348 |         struct {
349 |             uint16_t SubstituteNameOffset;
350 |             uint16_t SubstituteNameLength;
351 |             uint16_t PrintNameOffset;
352 |             uint16_t PrintNameLength;
353 |             uint32_t Flags;
354 |             char16_t PathBuffer[1];
355 |         } SymbolicLinkReparseBuffer;
356 | 
357 |         struct {
358 |             uint16_t SubstituteNameOffset;
359 |             uint16_t SubstituteNameLength;
360 |             uint16_t PrintNameOffset;
361 |             uint16_t PrintNameLength;
362 |             char16_t PathBuffer[1];
363 |         } MountPointReparseBuffer;
364 | 
365 |         struct {
366 |             uint8_t DataBuffer[1];
367 |         } GenericReparseBuffer;
368 | 
369 |         struct {
370 |             uint32_t unknown;
371 |             char name[1];
372 |         } LxSymlink; // undocumented
373 |     };
374 | } REPARSE_DATA_BUFFER;
375 | 
376 | typedef struct {
377 |     uint32_t unknown;
378 |     char name[1];
379 | } REPARSE_DATA_BUFFER_LX_SYMLINK;
380 | 
381 | #ifndef IO_REPARSE_TAG_SYMLINK
382 | #define IO_REPARSE_TAG_SYMLINK      0xa000000c
383 | #endif
384 | 
385 | #define IO_REPARSE_TAG_LX_SYMLINK   0xa000001d
386 | 
387 | #ifndef IO_REPARSE_TAG_WOF
388 | #define IO_REPARSE_TAG_WOF          0x80000017
389 | #endif
390 | 
391 | #ifndef SYMLINK_FLAG_RELATIVE
392 | #define SYMLINK_FLAG_RELATIVE       0x00000001
393 | #endif
394 | 
395 | // https://flatcap.org/linux-ntfs/ntfs/attributes/volume_information.html
396 | 
397 | typedef struct {
398 |     uint64_t Unknown1;
399 |     uint8_t MajorVersion;
400 |     uint8_t MinorVersion;
401 |     uint16_t Flags;
402 |     uint32_t Unknown2;
403 | } VOLUME_INFORMATION;
404 | 
405 | #define NTFS_VOLUME_DIRTY               0x0001
406 | #define NTFS_VOLUME_RESIZE_JOURNAL      0x0002
407 | #define NTFS_VOLUME_UPGRADE_ON_MOUNT    0x0004
408 | #define NTFS_VOLUME_MOUNTED_ON_NT4      0x0008
409 | #define NTFS_VOLUME_DELETE_USN_UNDERWAY 0x0010
410 | #define NTFS_VOLUME_REPAIR_OBJECT_IDS   0x0020
411 | #define NTFS_VOLUME_MODIFIED_BY_CHKDSK  0x8000
412 | 
413 | // https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html
414 | 
415 | typedef struct {
416 |     enum ntfs_attribute type;
417 |     uint16_t record_length;
418 |     uint8_t name_length;
419 |     uint8_t name_offset;
420 |     uint64_t starting_vcn;
421 |     MFT_SEGMENT_REFERENCE file_reference;
422 |     uint16_t instance;
423 | } attribute_list_entry;
424 | 
425 | #define WOF_CURRENT_VERSION         1
426 | 
427 | #define WOF_PROVIDER_WIM            1
428 | #define WOF_PROVIDER_FILE           2
429 | 
430 | typedef struct {
431 |     uint32_t ReparseTag;
432 |     uint16_t ReparseDataLength;
433 |     uint16_t Reserved;
434 |     uint8_t DataBuffer[1];
435 | } reparse_point_header; // edited form of REPARSE_DATA_BUFFER
436 | 
437 | typedef struct {
438 |     uint32_t Version;
439 |     uint32_t Provider;
440 | } wof_external_info; // WOF_EXTERNAL_INFO in winioctl.h
441 | 
442 | #define FILE_PROVIDER_CURRENT_VERSION           1
443 | 
444 | #define FILE_PROVIDER_COMPRESSION_XPRESS4K          0
445 | #define FILE_PROVIDER_COMPRESSION_LZX               1
446 | #define FILE_PROVIDER_COMPRESSION_XPRESS8K          2
447 | #define FILE_PROVIDER_COMPRESSION_XPRESS16K         3
448 | 
449 | typedef struct {
450 |     uint32_t Version;
451 |     uint32_t Algorithm;
452 | } file_provider_external_info_v0; // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h
453 | 
454 | // cf. https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ns-wdm-_file_full_ea_information
455 | 
456 | typedef struct {
457 |     uint32_t NextEntryOffset;
458 |     uint8_t Flags;
459 |     uint8_t EaNameLength;
460 |     uint16_t EaValueLength;
461 |     char EaName[1];
462 | } ea_data;
463 | 
464 | typedef struct {
465 |     uint32_t major;
466 |     uint32_t minor;
467 | } lxdev;
468 | 
469 | // https://dfir.ru/2019/01/19/ntfs-today/
470 | 
471 | typedef struct {
472 |     uint16_t format;
473 |     uint16_t version;
474 |     uint32_t mode;
475 |     uint32_t uid;
476 |     uint32_t gid;
477 |     uint32_t rdev;
478 |     uint32_t atime_ns;
479 |     uint32_t mtime_ns;
480 |     uint32_t ctime_ns;
481 |     uint64_t atime;
482 |     uint64_t mtime;
483 |     uint64_t ctime;
484 | } lxattrb;
485 | 
486 | #pragma pack(pop)
487 | 
488 | class ntfs;
489 | 
490 | struct mapping {
491 |     mapping(uint64_t lcn, uint64_t vcn, uint64_t length) : lcn(lcn), vcn(vcn), length(length) { }
492 | 
493 |     uint64_t lcn;
494 |     uint64_t vcn;
495 |     uint64_t length;
496 | };
497 | 
498 | class ntfs_file {
499 | public:
500 |     ntfs_file(ntfs& dev, uint64_t inode);
501 |     buffer_t read(uint64_t offset = 0, uint32_t length = 0, enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u"");
502 |     std::list<mapping> read_mappings(enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u"");
503 | 
504 |     bool is_directory() const {
505 |         return file_record->Flags & FILE_RECORD_IS_DIRECTORY;
506 |     }
507 | 
508 |     void loop_through_atts(const std::function<bool(const ATTRIBUTE_RECORD_HEADER&, std::string_view, std::u16string_view)>& func);
509 |     std::string get_filename();
510 | 
511 |     FILE_RECORD_SEGMENT_HEADER* file_record;
512 | 
513 | private:
514 |     buffer_t read_nonresident_attribute(uint64_t offset, uint32_t length, const ATTRIBUTE_RECORD_HEADER* att);
515 | 
516 |     buffer_t file_record_buf;
517 |     ntfs& dev;
518 |     uint64_t inode;
519 | };
520 | 
521 | class ntfs {
522 | public:
523 |     ntfs(const std::string& fn);
524 | 
525 |     ~ntfs() {
526 | #ifdef _WIN32
527 |         CloseHandle(h);
528 | #else
529 |         close(fd);
530 | #endif
531 |     }
532 | 
533 |     void seek(uint64_t pos);
534 |     void read(uint8_t* buf, size_t length);
535 |     void write(const uint8_t* buf, size_t length);
536 |     std::string_view find_sd(uint32_t id, ntfs_file& secure);
537 | 
538 |     std::unique_ptr<ntfs_file> mft;
539 |     buffer_t boot_sector_buf;
540 |     NTFS_BOOT_SECTOR* boot_sector;
541 |     uint64_t file_record_size;
542 |     std::map<uint32_t, buffer_t> sd_list;
543 | 
544 | #ifdef _WIN32
545 |     HANDLE h;
546 | #else
547 |     int fd;
548 | #endif
549 | };
550 | 
551 | // ntfs.cpp
552 | void read_nonresident_mappings(const ATTRIBUTE_RECORD_HEADER& att, std::list<mapping>& mappings,
553 |                                uint32_t cluster_size, uint64_t vdl);
554 | void populate_skip_list(ntfs& dev, uint64_t inode, std::list<uint64_t>& skiplist);
555 | 


--------------------------------------------------------------------------------
/src/ntfs2btrfs.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2020
  2 |  *
  3 |  * This file is part of ntfs2btrfs.
  4 |  *
  5 |  * Ntfs2btrfs is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * Ntfs2btrfs is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with Ntfs2btrfs. If not, see <https://www.gnu.org/licenses/>. */
 17 | 
 18 | #pragma once
 19 | 
 20 | #include "btrfs.h"
 21 | #include "config.h"
 22 | #include <string.h>
 23 | #include <map>
 24 | #include <list>
 25 | #include <string>
 26 | #include <vector>
 27 | #include <optional>
 28 | 
 29 | #ifdef _WIN32
 30 | #include <windows.h>
 31 | #endif
 32 | 
 33 | #pragma warning(push)
 34 | #pragma warning(disable : 26495 26451 26437 26812)
 35 | #include <fmt/format.h>
 36 | #include <fmt/compile.h>
 37 | #pragma warning(pop)
 38 | 
 39 | #ifdef _MSC_VER
 40 | 
 41 | #ifdef _M_IX86
 42 | #define __i386__
 43 | #elif defined(_M_X64)
 44 | #define __x86_64__
 45 | #endif
 46 | 
 47 | #endif
 48 | 
 49 | #ifdef _WIN32
 50 | class last_error : public std::exception {
 51 | public:
 52 |     last_error(std::string_view function, int le) {
 53 |         std::string nice_msg;
 54 | 
 55 |         {
 56 |             char* fm;
 57 | 
 58 |             if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr,
 59 |                                le, 0, reinterpret_cast<LPSTR>(&fm), 0, nullptr)) {
 60 |                 try {
 61 |                     std::string_view s = fm;
 62 | 
 63 |                     while (!s.empty() && (s[s.length() - 1] == u'\r' || s[s.length() - 1] == u'\n')) {
 64 |                         s.remove_suffix(1);
 65 |                     }
 66 | 
 67 |                     nice_msg = s;
 68 |                 } catch (...) {
 69 |                     LocalFree(fm);
 70 |                     throw;
 71 |                 }
 72 | 
 73 |                 LocalFree(fm);
 74 |             }
 75 |         }
 76 | 
 77 |         msg = std::string(function) + " failed (error " + std::to_string(le) + (!nice_msg.empty() ? (", " + nice_msg) : "") + ").";
 78 |     }
 79 | 
 80 |     const char* what() const noexcept {
 81 |         return msg.c_str();
 82 |     }
 83 | 
 84 | private:
 85 |     std::string msg;
 86 | };
 87 | 
 88 | class handle_closer {
 89 | public:
 90 |     typedef HANDLE pointer;
 91 | 
 92 |     void operator()(HANDLE h) {
 93 |         if (h == INVALID_HANDLE_VALUE)
 94 |             return;
 95 | 
 96 |         CloseHandle(h);
 97 |     }
 98 | };
 99 | 
100 | typedef std::unique_ptr<HANDLE, handle_closer> unique_handle;
101 | #endif
102 | 
103 | class _formatted_error : public std::exception {
104 | public:
105 |     template<typename T, typename... Args>
106 |     _formatted_error(const T& s, Args&&... args) {
107 |         msg = fmt::format(s, std::forward<Args>(args)...);
108 |     }
109 | 
110 |     const char* what() const noexcept {
111 |         return msg.c_str();
112 |     }
113 | 
114 | private:
115 |     std::string msg;
116 | };
117 | 
118 | #define formatted_error(s, ...) _formatted_error(FMT_COMPILE(s), ##__VA_ARGS__)
119 | 
120 | struct space {
121 |     space(uint64_t offset, uint64_t length) : offset(offset), length(length) { }
122 | 
123 |     uint64_t offset;
124 |     uint64_t length;
125 | };
126 | 
127 | struct chunk {
128 |     chunk(uint64_t offset, uint64_t length, uint64_t disk_start, uint64_t type) : offset(offset), length(length), disk_start(disk_start), type(type) { }
129 | 
130 |     uint64_t offset;
131 |     uint64_t length;
132 |     uint64_t disk_start;
133 |     uint64_t type;
134 |     std::list<space> space_list;
135 |     bool added = false;
136 |     uint64_t used = 0;
137 | };
138 | 
139 | struct data_alloc {
140 |     data_alloc(uint64_t offset, uint64_t length, uint64_t inode = 0, uint64_t file_offset = 0, bool relocated = false, bool not_in_img = false) :
141 |     offset(offset), length(length), inode(inode), file_offset(file_offset), relocated(relocated), not_in_img(not_in_img) { }
142 | 
143 |     uint64_t offset;
144 |     uint64_t length;
145 |     uint64_t inode;
146 |     uint64_t file_offset;
147 |     bool relocated;
148 |     bool not_in_img;
149 | };
150 | 
151 | template<typename T, typename A = std::allocator<T>>
152 | class default_init_allocator : public A {
153 | public:
154 |     typedef std::allocator_traits<A> a_t;
155 | 
156 |     template<typename U>
157 |     struct rebind {
158 |         using other = default_init_allocator<U, typename a_t::template rebind_alloc<U>>;
159 |     };
160 | 
161 |     using A::A;
162 | 
163 |     template<typename U>
164 |     void construct(U* ptr) noexcept(std::is_nothrow_default_constructible<U>::value) {
165 |         ::new(static_cast<void*>(ptr)) U;
166 |     }
167 | 
168 |     template<typename U, typename...Args>
169 |     void construct(U* ptr, Args&&... args) {
170 |         a_t::construct(static_cast<A&>(*this), ptr, std::forward<Args>(args)...);
171 |     }
172 | };
173 | 
174 | using buffer_t = std::vector<uint8_t, default_init_allocator<uint8_t>>;
175 | 
176 | static bool inline operator<(const KEY& a, const KEY& b) {
177 |     if (a.obj_id < b.obj_id)
178 |         return true;
179 |     else if (a.obj_id > b.obj_id)
180 |         return false;
181 | 
182 |     if (a.obj_type < b.obj_type)
183 |         return true;
184 |     else if (a.obj_type > b.obj_type)
185 |         return false;
186 | 
187 |     if (a.offset < b.offset)
188 |         return true;
189 | 
190 |     return false;
191 | }
192 | 
193 | class ntfs;
194 | 
195 | class root {
196 | public:
197 |     root(uint64_t id) : id(id) { }
198 | 
199 |     void create_trees(root& extent_root, enum btrfs_csum_type csum_type);
200 |     void write_trees(ntfs& dev);
201 | 
202 |     uint64_t id;
203 |     std::map<KEY, buffer_t> items;
204 |     std::list<buffer_t> trees;
205 |     uint64_t tree_addr;
206 |     uint8_t level;
207 |     uint64_t metadata_size = 0;
208 |     std::list<std::pair<uint64_t, uint8_t>> addresses, old_addresses;
209 |     bool allocations_done = false;
210 |     bool readonly = false;
211 |     std::map<uint64_t, uint64_t> dir_seqs;
212 |     std::map<uint64_t, uint64_t> dir_size;
213 | };
214 | 
215 | // from sys/stat.h
216 | #define __S_IFMT        0170000 /* These bits determine file type.  */
217 | #define __S_IFDIR       0040000 /* Directory.  */
218 | #define __S_IFCHR       0020000 /* Character device.  */
219 | #define __S_IFBLK       0060000 /* Block device.  */
220 | #define __S_IFREG       0100000 /* Regular file.  */
221 | #define __S_IFIFO       0010000 /* FIFO.  */
222 | #define __S_IFLNK       0120000 /* Symbolic link.  */
223 | #define __S_IFSOCK      0140000 /* Socket.  */
224 | #define __S_ISTYPE(mode, mask)  (((mode) & __S_IFMT) == (mask))
225 | 
226 | #ifndef S_ISDIR
227 | #define S_ISDIR(mode)    __S_ISTYPE((mode), __S_IFDIR)
228 | #endif
229 | 
230 | #ifndef S_IRUSR
231 | #define S_IRUSR 0000400
232 | #endif
233 | 
234 | #ifndef S_IWUSR
235 | #define S_IWUSR 0000200
236 | #endif
237 | 
238 | #ifndef S_IXUSR
239 | #define S_IXUSR 0000100
240 | #endif
241 | 
242 | #ifndef S_IRGRP
243 | #define S_IRGRP (S_IRUSR >> 3)
244 | #endif
245 | 
246 | #ifndef S_IWGRP
247 | #define S_IWGRP (S_IWUSR >> 3)
248 | #endif
249 | 
250 | #ifndef S_IXGRP
251 | #define S_IXGRP (S_IXUSR >> 3)
252 | #endif
253 | 
254 | #ifndef S_IROTH
255 | #define S_IROTH (S_IRGRP >> 3)
256 | #endif
257 | 
258 | #ifndef S_IWOTH
259 | #define S_IWOTH (S_IWGRP >> 3)
260 | #endif
261 | 
262 | #ifndef S_IXOTH
263 | #define S_IXOTH (S_IXGRP >> 3)
264 | #endif
265 | 
266 | #ifndef S_ISUID
267 | #define S_ISUID 0004000
268 | #endif
269 | 
270 | #ifndef S_ISGID
271 | #define S_ISGID 0002000
272 | #endif
273 | 
274 | #ifndef S_ISVTX
275 | #define S_ISVTX 0001000
276 | #endif
277 | 
278 | #pragma pack(push,1)
279 | 
280 | typedef struct {
281 |     CHUNK_ITEM chunk_item;
282 |     CHUNK_ITEM_STRIPE stripe;
283 | } chunk_item_one_stripe;
284 | 
285 | typedef struct {
286 |     EXTENT_ITEM extent_item;
287 |     btrfs_key_type type;
288 |     TREE_BLOCK_REF tbr;
289 | } metadata_item;
290 | 
291 | typedef struct {
292 |     EXTENT_ITEM extent_item;
293 |     btrfs_key_type type;
294 |     EXTENT_DATA_REF edr;
295 | } data_item;
296 | 
297 | typedef struct {
298 |     EXTENT_ITEM extent_item;
299 |     btrfs_key_type type1;
300 |     EXTENT_DATA_REF edr1;
301 |     btrfs_key_type type2;
302 |     EXTENT_DATA_REF edr2;
303 | } data_item2;
304 | 
305 | #pragma pack(pop)
306 | 
307 | struct relocation {
308 |     relocation(uint64_t old_start, uint64_t length, uint64_t new_start) : old_start(old_start), length(length), new_start(new_start) { }
309 | 
310 |     uint64_t old_start;
311 |     uint64_t length;
312 |     uint64_t new_start;
313 | };
314 | 
315 | static inline uint64_t sector_align(uint64_t v, uint64_t s) {
316 |     return ((v + s - 1) / s) * s;
317 | }
318 | 
319 | template<>
320 | struct fmt::formatter<enum btrfs_key_type> {
321 |     constexpr auto parse(format_parse_context& ctx) {
322 |         auto it = ctx.begin();
323 | 
324 |         if (it != ctx.end() && *it != '}')
325 |             throw format_error("invalid format");
326 | 
327 |         return it;
328 |     }
329 | 
330 |     template<typename format_context>
331 |     auto format(enum btrfs_key_type k, format_context& ctx) const {
332 |         switch (k) {
333 |             case btrfs_key_type::INODE_ITEM:
334 |                 return fmt::format_to(ctx.out(), "INODE_ITEM");
335 |             case btrfs_key_type::INODE_REF:
336 |                 return fmt::format_to(ctx.out(), "INODE_REF");
337 |             case btrfs_key_type::INODE_EXTREF:
338 |                 return fmt::format_to(ctx.out(), "INODE_EXTREF");
339 |             case btrfs_key_type::XATTR_ITEM:
340 |                 return fmt::format_to(ctx.out(), "XATTR_ITEM");
341 |             case btrfs_key_type::ORPHAN_INODE:
342 |                 return fmt::format_to(ctx.out(), "ORPHAN_INODE");
343 |             case btrfs_key_type::DIR_ITEM:
344 |                 return fmt::format_to(ctx.out(), "DIR_ITEM");
345 |             case btrfs_key_type::DIR_INDEX:
346 |                 return fmt::format_to(ctx.out(), "DIR_INDEX");
347 |             case btrfs_key_type::EXTENT_DATA:
348 |                 return fmt::format_to(ctx.out(), "EXTENT_DATA");
349 |             case btrfs_key_type::EXTENT_CSUM:
350 |                 return fmt::format_to(ctx.out(), "EXTENT_CSUM");
351 |             case btrfs_key_type::ROOT_ITEM:
352 |                 return fmt::format_to(ctx.out(), "ROOT_ITEM");
353 |             case btrfs_key_type::ROOT_BACKREF:
354 |                 return fmt::format_to(ctx.out(), "ROOT_BACKREF");
355 |             case btrfs_key_type::ROOT_REF:
356 |                 return fmt::format_to(ctx.out(), "ROOT_REF");
357 |             case btrfs_key_type::EXTENT_ITEM:
358 |                 return fmt::format_to(ctx.out(), "EXTENT_ITEM");
359 |             case btrfs_key_type::METADATA_ITEM:
360 |                 return fmt::format_to(ctx.out(), "METADATA_ITEM");
361 |             case btrfs_key_type::TREE_BLOCK_REF:
362 |                 return fmt::format_to(ctx.out(), "TREE_BLOCK_REF");
363 |             case btrfs_key_type::EXTENT_DATA_REF:
364 |                 return fmt::format_to(ctx.out(), "EXTENT_DATA_REF");
365 |             case btrfs_key_type::EXTENT_REF_V0:
366 |                 return fmt::format_to(ctx.out(), "EXTENT_REF_V0");
367 |             case btrfs_key_type::SHARED_BLOCK_REF:
368 |                 return fmt::format_to(ctx.out(), "SHARED_BLOCK_REF");
369 |             case btrfs_key_type::SHARED_DATA_REF:
370 |                 return fmt::format_to(ctx.out(), "SHARED_DATA_REF");
371 |             case btrfs_key_type::BLOCK_GROUP_ITEM:
372 |                 return fmt::format_to(ctx.out(), "BLOCK_GROUP_ITEM");
373 |             case btrfs_key_type::FREE_SPACE_INFO:
374 |                 return fmt::format_to(ctx.out(), "FREE_SPACE_INFO");
375 |             case btrfs_key_type::FREE_SPACE_EXTENT:
376 |                 return fmt::format_to(ctx.out(), "FREE_SPACE_EXTENT");
377 |             case btrfs_key_type::FREE_SPACE_BITMAP:
378 |                 return fmt::format_to(ctx.out(), "FREE_SPACE_BITMAP");
379 |             case btrfs_key_type::DEV_EXTENT:
380 |                 return fmt::format_to(ctx.out(), "DEV_EXTENT");
381 |             case btrfs_key_type::DEV_ITEM:
382 |                 return fmt::format_to(ctx.out(), "DEV_ITEM");
383 |             case btrfs_key_type::CHUNK_ITEM:
384 |                 return fmt::format_to(ctx.out(), "CHUNK_ITEM");
385 |             case btrfs_key_type::TEMP_ITEM:
386 |                 return fmt::format_to(ctx.out(), "TEMP_ITEM");
387 |             case btrfs_key_type::DEV_STATS:
388 |                 return fmt::format_to(ctx.out(), "DEV_STATS");
389 |             case btrfs_key_type::SUBVOL_UUID:
390 |                 return fmt::format_to(ctx.out(), "SUBVOL_UUID");
391 |             case btrfs_key_type::SUBVOL_REC_UUID:
392 |                 return fmt::format_to(ctx.out(), "SUBVOL_REC_UUID");
393 |             default:
394 |                 return fmt::format_to(ctx.out(), "{:x}", (uint8_t)k);
395 |         }
396 |     }
397 | };
398 | 
399 | static const uint64_t image_subvol_id = 0x100;
400 | static const char image_filename[] = "ntfs.img";
401 | 
402 | // decomp.cpp
403 | buffer_t lznt1_decompress(std::string_view compdata, uint32_t size);
404 | buffer_t do_lzx_decompress(std::string_view compdata, uint32_t size);
405 | buffer_t do_xpress_decompress(std::string_view compdata, uint32_t size, uint32_t chunk_size);
406 | 
407 | // compress.cpp
408 | #ifdef WITH_ZLIB
409 | std::optional<buffer_t> zlib_compress(std::string_view data, uint32_t cluster_size);
410 | #endif
411 | #ifdef WITH_LZO
412 | std::optional<buffer_t> lzo_compress(std::string_view data, uint32_t cluster_size);
413 | #endif
414 | #ifdef WITH_ZSTD
415 | std::optional<buffer_t> zstd_compress(std::string_view data, uint32_t cluster_size);
416 | #endif
417 | 
418 | // sha256.c
419 | extern "C" void calc_sha256(uint8_t* hash, const void* input, size_t len);
420 | 
421 | // blake2b-ref.c
422 | extern "C" void blake2b(void *out, size_t outlen, const void* in, size_t inlen);
423 | 
424 | // rollback.cpp
425 | void rollback(const std::string& fn);
426 | 
427 | // ntfs2btrfs.cpp
428 | std::string utf16_to_utf8(std::u16string_view sv);
429 | 


--------------------------------------------------------------------------------
/src/rollback.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2021
  2 |  *
  3 |  * This file is part of ntfs2btrfs.
  4 |  *
  5 |  * Ntfs2btrfs is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * Ntfs2btrfs is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with Ntfs2btrfs. If not, see <https://www.gnu.org/licenses/>. */
 17 | 
 18 | #include "ntfs2btrfs.h"
 19 | #include "crc32c.h"
 20 | #include <iostream>
 21 | #include <fstream>
 22 | #include <functional>
 23 | #include <codecvt>
 24 | 
 25 | using namespace std;
 26 | 
 27 | using chunks_t = map<uint64_t, buffer_t>;
 28 | 
 29 | #define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \
 30 |                             BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \
 31 |                             BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES | \
 32 |                             BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD | BTRFS_INCOMPAT_FLAGS_METADATA_UUID | BTRFS_INCOMPAT_FLAGS_RAID1C34)
 33 | 
 34 | class btrfs {
 35 | public:
 36 |     btrfs(const string& fn);
 37 |     uint64_t find_root_addr(uint64_t root);
 38 |     bool walk_tree(uint64_t addr, const function<bool(const KEY&, string_view)>& func);
 39 |     const pair<const uint64_t, buffer_t>& find_chunk(uint64_t addr);
 40 |     buffer_t raw_read(uint64_t phys_addr, uint32_t len);
 41 |     void raw_write(uint64_t phys_addr, const buffer_t& buf);
 42 | 
 43 | private:
 44 |     superblock read_superblock();
 45 |     void read_chunks();
 46 |     buffer_t read(uint64_t addr, uint32_t len);
 47 | 
 48 | #ifdef _WIN32
 49 |     unique_handle h;
 50 |     bool drive = false;
 51 | #else
 52 |     fstream f;
 53 | #endif
 54 |     superblock sb;
 55 |     chunks_t chunks;
 56 | };
 57 | 
 58 | 
 59 | btrfs::btrfs(const string& fn) {
 60 | #ifdef _WIN32
 61 |     DWORD ret;
 62 |     wstring_convert<codecvt_utf8_utf16<char16_t>, char16_t> convert;
 63 |     u16string namew;
 64 | 
 65 |     if ((fn.length() == 2 || fn.length() == 3) && fn[0] >= 'A' && fn[0] <= 'Z' && fn[1] == ':' && (fn.length() == 2 || fn[2] == '\\')) {
 66 |         namew = u"\\\\.\\X:";
 67 |         namew[4] = fn[0];
 68 |         drive = true;
 69 |     } else
 70 |         namew = convert.from_bytes(fn.data(), fn.data() + fn.length());
 71 | 
 72 |     h.reset(CreateFileW((WCHAR*)namew.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE,
 73 |                         nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr));
 74 | 
 75 |     if (h.get() == INVALID_HANDLE_VALUE)
 76 |         throw last_error("CreateFile", GetLastError());
 77 | 
 78 |     if (drive) {
 79 |         if (!DeviceIoControl(h.get(), FSCTL_LOCK_VOLUME, nullptr, 0, nullptr, 0, &ret, nullptr))
 80 |             throw last_error("FSCTL_LOCK_VOLUME", GetLastError());
 81 |     }
 82 | #else
 83 |     f = fstream(fn, ios_base::in | ios_base::out | ios::binary);
 84 | 
 85 |     if (!f.good())
 86 |         throw formatted_error("Failed to open {}.", fn);
 87 | #endif
 88 | 
 89 |     sb = read_superblock();
 90 | 
 91 |     read_chunks();
 92 | }
 93 | 
 94 | superblock btrfs::read_superblock() {
 95 |     optional<superblock> sb;
 96 |     uint64_t device_size;
 97 | 
 98 |     // find length of volume
 99 | 
100 | #ifdef _WIN32
101 |     if (drive) {
102 |         GET_LENGTH_INFORMATION gli;
103 |         DWORD ret;
104 | 
105 |         if (!DeviceIoControl(h.get(), IOCTL_DISK_GET_LENGTH_INFO, nullptr, 0, &gli, sizeof(gli), &ret, nullptr))
106 |             throw last_error("IOCTL_DISK_GET_LENGTH_INFO", GetLastError());
107 | 
108 |         device_size = gli.Length.QuadPart;
109 |     } else {
110 |         LARGE_INTEGER li;
111 | 
112 |         if (!GetFileSizeEx(h.get(), &li))
113 |             throw last_error("GetFileSizeEx", GetLastError());
114 | 
115 |         device_size = li.QuadPart;
116 |     }
117 | #else
118 |     f.seekg(0, ios::end);
119 | 
120 |     if (f.fail())
121 |         throw runtime_error("Error seeking to end of device.");
122 | 
123 |     device_size = f.tellg();
124 | #endif
125 | 
126 |     unsigned int i = 0;
127 |     while (superblock_addrs[i] != 0 && superblock_addrs[i] + sizeof(superblock) < device_size) {
128 |         auto buf = raw_read(superblock_addrs[i], sizeof(superblock));
129 | 
130 |         const auto& sb2 = *(superblock*)buf.data();
131 | 
132 |         if (sb2.magic != BTRFS_MAGIC) {
133 |             i++;
134 |             continue;
135 |         }
136 | 
137 |         // FIXME - check checksum
138 | 
139 |         if (!sb.has_value() || sb2.generation > sb.value().generation)
140 |             sb = sb2;
141 | 
142 |         i++;
143 |     }
144 | 
145 |     if (!sb.has_value())
146 |         throw runtime_error("Not a Btrfs volume.");
147 | 
148 |     if (sb.value().incompat_flags & ~INCOMPAT_SUPPORTED)
149 |         throw formatted_error("Unsupported incompat flags {:x}.", sb.value().incompat_flags & ~INCOMPAT_SUPPORTED);
150 | 
151 |     return sb.value();
152 | }
153 | 
154 | const pair<const uint64_t, buffer_t>& btrfs::find_chunk(uint64_t addr) {
155 |     for (const auto& c : chunks) {
156 |         if (addr < c.first)
157 |             continue;
158 | 
159 |         const auto& ci = *(CHUNK_ITEM*)c.second.data();
160 | 
161 |         if (addr < c.first + ci.size)
162 |             return c;
163 |     }
164 | 
165 |     throw formatted_error("Could not find chunk for virtual address {:x}.", addr);
166 | }
167 | 
168 | buffer_t btrfs::raw_read(uint64_t phys_addr, uint32_t len) {
169 | #ifdef _WIN32
170 |     LARGE_INTEGER posli;
171 | 
172 |     posli.QuadPart = phys_addr;
173 | 
174 |     if (!SetFilePointerEx(h.get(), posli, nullptr, FILE_BEGIN))
175 |         throw last_error("SetFilePointerEx", GetLastError());
176 | #else
177 |     f.seekg(phys_addr);
178 | 
179 |     if (f.fail())
180 |         throw formatted_error("Error seeking to {:x}.", phys_addr);
181 | #endif
182 | 
183 |     buffer_t ret(len);
184 | 
185 | #ifdef _WIN32
186 |     DWORD read;
187 | 
188 |     if (!ReadFile(h.get(), ret.data(), (DWORD)len, &read, nullptr))
189 |         throw last_error("ReadFile", GetLastError());
190 | #else
191 |     f.read((char*)ret.data(), ret.size());
192 | 
193 |     if (f.fail())
194 |         throw formatted_error("Error reading {:x} bytes at {:x}.", ret.size(), phys_addr);
195 | #endif
196 | 
197 |     return ret;
198 | }
199 | 
200 | void btrfs::raw_write(uint64_t phys_addr, const buffer_t& buf) {
201 | #ifdef _WIN32
202 |     LARGE_INTEGER posli;
203 | 
204 |     posli.QuadPart = phys_addr;
205 | 
206 |     if (!SetFilePointerEx(h.get(), posli, nullptr, FILE_BEGIN))
207 |         throw last_error("SetFilePointerEx", GetLastError());
208 | #else
209 |     f.seekg(phys_addr);
210 | 
211 |     if (f.fail())
212 |         throw formatted_error("Error seeking to {:x}.", phys_addr);
213 | #endif
214 | 
215 | #ifdef _WIN32
216 |     DWORD written;
217 | 
218 |     if (!WriteFile(h.get(), buf.data(), (DWORD)buf.size(), &written, nullptr))
219 |         throw last_error("WriteFile", GetLastError());
220 | #else
221 |     f.write((char*)buf.data(), buf.size());
222 | 
223 |     if (f.fail())
224 |         throw formatted_error("Error writing {:x} bytes at {:x}.", buf.size(), phys_addr);
225 | #endif
226 | }
227 | 
228 | buffer_t btrfs::read(uint64_t addr, uint32_t len) {
229 |     const auto& cp = find_chunk(addr);
230 |     const auto& c = *(CHUNK_ITEM*)cp.second.data();
231 | 
232 |     if (c.type & BLOCK_FLAG_RAID0)
233 |         throw runtime_error("FIXME - RAID 0");
234 |     else if (c.type & BLOCK_FLAG_RAID1)
235 |         throw runtime_error("FIXME - RAID 1");
236 |     else if (c.type & BLOCK_FLAG_DUPLICATE)
237 |         throw runtime_error("FIXME - DUPLICATE");
238 |     else if (c.type & BLOCK_FLAG_RAID10)
239 |         throw runtime_error("FIXME - RAID10");
240 |     else if (c.type & BLOCK_FLAG_RAID5)
241 |         throw runtime_error("FIXME - RAID5");
242 |     else if (c.type & BLOCK_FLAG_RAID6)
243 |         throw runtime_error("FIXME - RAID6");
244 |     else if (c.type & BLOCK_FLAG_RAID1C3)
245 |         throw runtime_error("FIXME - RAID1C3");
246 |     else if (c.type & BLOCK_FLAG_RAID1C4)
247 |         throw runtime_error("FIXME - RAID1C4");
248 | 
249 |     // SINGLE
250 | 
251 |     if (c.num_stripes == 0)
252 |         throw runtime_error("CHUNK_ITEM had num_stripes == 0");
253 | 
254 |     auto* cis = (CHUNK_ITEM_STRIPE*)(&c + 1);
255 | 
256 |     if (cis[0].dev_id != sb.dev_item.dev_id)
257 |         throw runtime_error("Reading from other device not implemented.");
258 | 
259 |     return raw_read(addr - cp.first + cis[0].offset, len);
260 | }
261 | 
262 | bool btrfs::walk_tree(uint64_t addr, const function<bool(const KEY&, string_view)>& func) {
263 |     auto tree = read(addr, sb.node_size);
264 | 
265 |     // FIXME - check checksum
266 | 
267 |     auto& th = *(tree_header*)tree.data();
268 | 
269 |     // if root is not 0, recurse
270 |     if (th.level != 0) {
271 |         auto nodes = (internal_node*)(&th + 1);
272 | 
273 |         for (unsigned int i = 0; i < th.num_items; i++) {
274 |             auto ret = walk_tree(nodes[i].address, func);
275 | 
276 |             if (!ret)
277 |                 return false;
278 |         }
279 | 
280 |         return true;
281 |     }
282 | 
283 |     auto nodes = (leaf_node*)(&th + 1);
284 | 
285 |     for (unsigned int i = 0; i < th.num_items; i++) {
286 |         const auto& n = nodes[i];
287 |         bool b;
288 | 
289 |         if (n.size == 0)
290 |             b = func(n.key, {});
291 |         else
292 |             b = func(n.key, { (char*)&th + sizeof(tree_header) + n.offset, n.size });
293 | 
294 |         if (!b)
295 |             return false;
296 |     }
297 | 
298 |     return true;
299 | }
300 | 
301 | void btrfs::read_chunks() {
302 |     auto ptr = (uint8_t*)&sb.sys_chunk_array;
303 | 
304 |     do {
305 |         auto& key = *(KEY*)ptr;
306 | 
307 |         if (key.obj_type != btrfs_key_type::CHUNK_ITEM)
308 |             break;
309 | 
310 |         auto& ci = *(CHUNK_ITEM*)(ptr + sizeof(key));
311 | 
312 |         basic_string_view<uint8_t> chunk_item{ptr + sizeof(key), sizeof(ci) + (ci.num_stripes * sizeof(CHUNK_ITEM_STRIPE))};
313 | 
314 |         chunks.emplace(key.offset, buffer_t{chunk_item.data(), chunk_item.data() + chunk_item.size()});
315 | 
316 |         ptr += sizeof(key) + chunk_item.size();
317 |     } while (ptr < &sb.sys_chunk_array[SYS_CHUNK_ARRAY_SIZE]);
318 | 
319 | #if 0
320 |     for (const auto& c : chunks) {
321 |         fmt::print("{:x}\n", c.first);
322 | 
323 |         const auto& ci = *(CHUNK_ITEM*)c.second.data();
324 | 
325 |         fmt::print("  size {:x}, root_id {:x}, stripe_length {:x}, type {:x}, opt_io_alignment {:x}, opt_io_width {:x}, sector_size {:x}, num_stripes {:x}, sub_stripes {:x}\n",
326 |                    ci.size, ci.root_id, ci.stripe_length, ci.type, ci.opt_io_alignment, ci.opt_io_width, ci.sector_size, ci.num_stripes, ci.sub_stripes);
327 | 
328 |         auto* cis = (CHUNK_ITEM_STRIPE*)(&ci + 1);
329 | 
330 |         for (unsigned int i = 0; i < ci.num_stripes; i++) {
331 |             fmt::print("  dev_id {:x}, offset {:x}\n", cis[i].dev_id, cis[i].offset);
332 |         }
333 |     }
334 | #endif
335 | 
336 |     chunks_t chunks2;
337 | 
338 |     walk_tree(sb.chunk_tree_addr, [&](const KEY& key, string_view data) {
339 |         if (key.obj_type != btrfs_key_type::CHUNK_ITEM)
340 |             return true;
341 | 
342 |         chunks2.emplace(key.offset, buffer_t{data.data(), data.data() + data.size()});
343 | 
344 |         return true;
345 |     });
346 | 
347 |     chunks.swap(chunks2);
348 | }
349 | 
350 | uint64_t btrfs::find_root_addr(uint64_t root) {
351 |     optional<uint64_t> ret;
352 | 
353 |     walk_tree(sb.root_tree_addr, [&](const KEY& key, string_view data) {
354 |         if (key.obj_id != root || key.obj_type != btrfs_key_type::ROOT_ITEM)
355 |             return true;
356 | 
357 |         const auto& ri = *(ROOT_ITEM*)data.data();
358 | 
359 |         ret = ri.block_number;
360 | 
361 |         return false;
362 |     });
363 | 
364 |     if (!ret.has_value())
365 |         throw formatted_error("Could not find address for root {:x}.", root);
366 | 
367 |     return ret.value();
368 | }
369 | 
370 | void rollback(const string& fn) {
371 |     btrfs b(fn);
372 | 
373 |     auto img_root_addr = b.find_root_addr(image_subvol_id);
374 | 
375 |     // find file called ntfs.img
376 | 
377 |     uint64_t inode = 0;
378 |     uint32_t hash = calc_crc32c(0xfffffffe, (const uint8_t*)image_filename, sizeof(image_filename) - 1);
379 | 
380 |     b.walk_tree(img_root_addr, [&](const KEY& key, string_view data) {
381 |         if (key.obj_id > SUBVOL_ROOT_INODE || (key.obj_id == SUBVOL_ROOT_INODE && key.obj_type > btrfs_key_type::DIR_ITEM))
382 |             return false;
383 | 
384 |         if (key.obj_id == SUBVOL_ROOT_INODE && key.obj_type == btrfs_key_type::DIR_ITEM && key.offset == hash) {
385 |             auto& di = *(DIR_ITEM*)data.data();
386 | 
387 |             // FIXME - handle hash collisions
388 | 
389 |             if (di.n == sizeof(image_filename) - 1 && !memcmp(di.name, image_filename, di.n)) {
390 |                 if (di.key.obj_type != btrfs_key_type::INODE_ITEM)
391 |                     throw formatted_error("DIR_ITEM for {} pointed to object type {}, expected INODE_ITEM.",
392 |                                           string_view(di.name, di.n), di.key.obj_type);
393 | 
394 |                 inode = di.key.obj_id;
395 |             }
396 | 
397 |             return false;
398 |         }
399 | 
400 |         return true;
401 |     });
402 | 
403 |     if (inode == 0)
404 |         throw formatted_error("Could not find {} in subvol {:x}.", image_filename, image_subvol_id);
405 | 
406 |     // parse extent data
407 | 
408 |     map<uint64_t, pair<uint64_t, uint64_t>> extents;
409 | 
410 |     b.walk_tree(img_root_addr, [&](const KEY& key, string_view data) {
411 |         if (key.obj_id > inode || (key.obj_id == inode && key.obj_type > btrfs_key_type::EXTENT_DATA))
412 |             return false;
413 | 
414 |         if (key.obj_id != inode || key.obj_type != btrfs_key_type::EXTENT_DATA)
415 |             return true;
416 | 
417 |         const auto& ed = *(EXTENT_DATA*)data.data();
418 | 
419 |         if (ed.compression != btrfs_compression::none)
420 |             throw runtime_error("NTFS image has been compressed, cannot process.");
421 | 
422 |         if (ed.type == btrfs_extent_type::prealloc)
423 |             return true; // treat as if sparse
424 | 
425 |         if (ed.type == btrfs_extent_type::inline_extent)
426 |             throw runtime_error("NTFS image has inline extents, cannot process.");
427 | 
428 |         if (ed.type != btrfs_extent_type::regular)
429 |             throw formatted_error("Unknown extent type {}.", (unsigned int)ed.type);
430 | 
431 |         const auto& ed2 = *(EXTENT_DATA2*)ed.data;
432 | 
433 |         if (ed2.address == 0 && ed2.size == 0)
434 |             return true; // sparse, skip
435 | 
436 |         extents.emplace(key.offset, make_pair(ed2.address, ed2.size));
437 | 
438 |         return true;
439 |     });
440 | 
441 |     // resolve logical addresses to physical
442 | 
443 |     map<uint64_t, buffer_t> relocs;
444 | 
445 |     for (const auto& e : extents) {
446 |         auto off = e.first;
447 |         auto addr = e.second.first;
448 |         auto len = e.second.second;
449 | 
450 |         auto& c = b.find_chunk(addr);
451 |         auto& ci = *(CHUNK_ITEM*)c.second.data();
452 | 
453 |         if (ci.type & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE |
454 |                        BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 |
455 |                        BLOCK_FLAG_RAID1C3 | BLOCK_FLAG_RAID1C4)) {
456 |             throw formatted_error("Data chunk {:x} was not SINGLE, cannot process.",
457 |                                   c.first);
458 |         }
459 | 
460 |         auto* cis = (CHUNK_ITEM_STRIPE*)(&ci + 1);
461 | 
462 |         auto physoff = addr - c.first + cis[0].offset;
463 | 
464 |         if (off == physoff) // identity map
465 |             continue;
466 | 
467 |         relocs.emplace(off, buffer_t{});
468 | 
469 |         auto& r = relocs.at(off);
470 |         auto buf = b.raw_read(physoff, (uint32_t)len); // FIXME - check csum?
471 | 
472 |         r.swap(buf);
473 |     }
474 | 
475 |     for (const auto& r : relocs) {
476 |         b.raw_write(r.first, r.second);
477 |     }
478 | 
479 |     // FIXME - TRIM?
480 | 
481 |     fmt::print("Device successfully rolled back to NTFS.\n");
482 | }
483 | 


--------------------------------------------------------------------------------
/src/sha256.c:
--------------------------------------------------------------------------------
  1 | #include <stdint.h>
  2 | #include <string.h>
  3 | 
  4 | // Public domain code from https://github.com/amosnier/sha-2
  5 | 
  6 | // FIXME - x86 SHA extensions
  7 | 
  8 | #define CHUNK_SIZE 64
  9 | #define TOTAL_LEN_LEN 8
 10 | 
 11 | /*
 12 |  * ABOUT bool: this file does not use bool in order to be as pre-C99 compatible as possible.
 13 |  */
 14 | 
 15 | /*
 16 |  * Comments from pseudo-code at https://en.wikipedia.org/wiki/SHA-2 are reproduced here.
 17 |  * When useful for clarification, portions of the pseudo-code are reproduced here too.
 18 |  */
 19 | 
 20 | /*
 21 |  * Initialize array of round constants:
 22 |  * (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
 23 |  */
 24 | static const uint32_t k[] = {
 25 | 	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
 26 | 	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
 27 | 	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
 28 | 	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
 29 | 	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
 30 | 	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
 31 | 	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
 32 | 	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
 33 | };
 34 | 
 35 | struct buffer_state {
 36 | 	const uint8_t * p;
 37 | 	size_t len;
 38 | 	size_t total_len;
 39 | 	int single_one_delivered; /* bool */
 40 | 	int total_len_delivered; /* bool */
 41 | };
 42 | 
 43 | static inline uint32_t right_rot(uint32_t value, unsigned int count)
 44 | {
 45 | 	/*
 46 | 	 * Defined behaviour in standard C for all count where 0 < count < 32,
 47 | 	 * which is what we need here.
 48 | 	 */
 49 | 	return value >> count | value << (32 - count);
 50 | }
 51 | 
 52 | static void init_buf_state(struct buffer_state * state, const void * input, size_t len)
 53 | {
 54 | 	state->p = input;
 55 | 	state->len = len;
 56 | 	state->total_len = len;
 57 | 	state->single_one_delivered = 0;
 58 | 	state->total_len_delivered = 0;
 59 | }
 60 | 
 61 | /* Return value: bool */
 62 | static int calc_chunk(uint8_t chunk[CHUNK_SIZE], struct buffer_state * state)
 63 | {
 64 | 	size_t space_in_chunk;
 65 | 
 66 | 	if (state->total_len_delivered) {
 67 | 		return 0;
 68 | 	}
 69 | 
 70 | 	if (state->len >= CHUNK_SIZE) {
 71 | 		memcpy(chunk, state->p, CHUNK_SIZE);
 72 | 		state->p += CHUNK_SIZE;
 73 | 		state->len -= CHUNK_SIZE;
 74 | 		return 1;
 75 | 	}
 76 | 
 77 | 	memcpy(chunk, state->p, state->len);
 78 | 	chunk += state->len;
 79 | 	space_in_chunk = CHUNK_SIZE - state->len;
 80 | 	state->p += state->len;
 81 | 	state->len = 0;
 82 | 
 83 | 	/* If we are here, space_in_chunk is one at minimum. */
 84 | 	if (!state->single_one_delivered) {
 85 | 		*chunk++ = 0x80;
 86 | 		space_in_chunk -= 1;
 87 | 		state->single_one_delivered = 1;
 88 | 	}
 89 | 
 90 | 	/*
 91 | 	 * Now:
 92 | 	 * - either there is enough space left for the total length, and we can conclude,
 93 | 	 * - or there is too little space left, and we have to pad the rest of this chunk with zeroes.
 94 | 	 * In the latter case, we will conclude at the next invokation of this function.
 95 | 	 */
 96 | 	if (space_in_chunk >= TOTAL_LEN_LEN) {
 97 | 		const size_t left = space_in_chunk - TOTAL_LEN_LEN;
 98 | 		size_t len = state->total_len;
 99 | 		int i;
100 | 		memset(chunk, 0x00, left);
101 | 		chunk += left;
102 | 
103 | 		/* Storing of len * 8 as a big endian 64-bit without overflow. */
104 | 		chunk[7] = (uint8_t) (len << 3);
105 | 		len >>= 5;
106 | 		for (i = 6; i >= 0; i--) {
107 | 			chunk[i] = (uint8_t) len;
108 | 			len >>= 8;
109 | 		}
110 | 		state->total_len_delivered = 1;
111 | 	} else {
112 | 		memset(chunk, 0x00, space_in_chunk);
113 | 	}
114 | 
115 | 	return 1;
116 | }
117 | 
118 | /*
119 |  * Limitations:
120 |  * - Since input is a pointer in RAM, the data to hash should be in RAM, which could be a problem
121 |  *   for large data sizes.
122 |  * - SHA algorithms theoretically operate on bit strings. However, this implementation has no support
123 |  *   for bit string lengths that are not multiples of eight, and it really operates on arrays of bytes.
124 |  *   In particular, the len parameter is a number of bytes.
125 |  */
126 | void calc_sha256(uint8_t* hash, const void* input, size_t len)
127 | {
128 | 	/*
129 | 	 * Note 1: All integers (expect indexes) are 32-bit unsigned integers and addition is calculated modulo 2^32.
130 | 	 * Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 = i = 63
131 | 	 * Note 3: The compression function uses 8 working variables, a through h
132 | 	 * Note 4: Big-endian convention is used when expressing the constants in this pseudocode,
133 | 	 *     and when parsing message block data from bytes to words, for example,
134 | 	 *     the first word of the input message "abc" after padding is 0x61626380
135 | 	 */
136 | 
137 | 	/*
138 | 	 * Initialize hash values:
139 | 	 * (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19):
140 | 	 */
141 | 	uint32_t h[] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
142 | 	unsigned i, j;
143 | 
144 | 	/* 512-bit chunks is what we will operate on. */
145 | 	uint8_t chunk[64];
146 | 
147 | 	struct buffer_state state;
148 | 
149 | 	init_buf_state(&state, input, len);
150 | 
151 | 	while (calc_chunk(chunk, &state)) {
152 | 		uint32_t ah[8];
153 | 
154 | 		const uint8_t *p = chunk;
155 | 
156 | 		/* Initialize working variables to current hash value: */
157 | 		for (i = 0; i < 8; i++)
158 | 			ah[i] = h[i];
159 | 
160 | 		/* Compression function main loop: */
161 | 		for (i = 0; i < 4; i++) {
162 | 			/*
163 | 			 * The w-array is really w[64], but since we only need
164 | 			 * 16 of them at a time, we save stack by calculating
165 | 			 * 16 at a time.
166 | 			 *
167 | 			 * This optimization was not there initially and the
168 | 			 * rest of the comments about w[64] are kept in their
169 | 			 * initial state.
170 | 			 */
171 | 
172 | 			/*
173 | 			 * create a 64-entry message schedule array w[0..63] of 32-bit words
174 | 			 * (The initial values in w[0..63] don't matter, so many implementations zero them here)
175 | 			 * copy chunk into first 16 words w[0..15] of the message schedule array
176 | 			 */
177 | 			uint32_t w[16];
178 | 
179 | 			for (j = 0; j < 16; j++) {
180 | 				if (i == 0) {
181 | 					w[j] = (uint32_t) p[0] << 24 | (uint32_t) p[1] << 16 |
182 | 						(uint32_t) p[2] << 8 | (uint32_t) p[3];
183 | 					p += 4;
184 | 				} else {
185 | 					/* Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: */
186 | 					const uint32_t s0 = right_rot(w[(j + 1) & 0xf], 7) ^ right_rot(w[(j + 1) & 0xf], 18) ^ (w[(j + 1) & 0xf] >> 3);
187 | 					const uint32_t s1 = right_rot(w[(j + 14) & 0xf], 17) ^ right_rot(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10);
188 | 					w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1;
189 | 				}
190 | 				const uint32_t s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25);
191 | 				const uint32_t ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]);
192 | 				const uint32_t temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j];
193 | 				const uint32_t s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22);
194 | 				const uint32_t maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]);
195 | 				const uint32_t temp2 = s0 + maj;
196 | 
197 | 				ah[7] = ah[6];
198 | 				ah[6] = ah[5];
199 | 				ah[5] = ah[4];
200 | 				ah[4] = ah[3] + temp1;
201 | 				ah[3] = ah[2];
202 | 				ah[2] = ah[1];
203 | 				ah[1] = ah[0];
204 | 				ah[0] = temp1 + temp2;
205 | 			}
206 | 		}
207 | 
208 | 		/* Add the compressed chunk to the current hash value: */
209 | 		for (i = 0; i < 8; i++)
210 | 			h[i] += ah[i];
211 | 	}
212 | 
213 | 	/* Produce the final hash value (big-endian): */
214 | 	for (i = 0, j = 0; i < 8; i++)
215 | 	{
216 | 		hash[j++] = (uint8_t) (h[i] >> 24);
217 | 		hash[j++] = (uint8_t) (h[i] >> 16);
218 | 		hash[j++] = (uint8_t) (h[i] >> 8);
219 | 		hash[j++] = (uint8_t) h[i];
220 | 	}
221 | }
222 | 


--------------------------------------------------------------------------------
/src/xxhash.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |    xxHash - Extremely Fast Hash algorithm
  3 |    Header File
  4 |    Copyright (C) 2012-2016, Yann Collet.
  5 | 
  6 |    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  7 | 
  8 |    Redistribution and use in source and binary forms, with or without
  9 |    modification, are permitted provided that the following conditions are
 10 |    met:
 11 | 
 12 |        * Redistributions of source code must retain the above copyright
 13 |    notice, this list of conditions and the following disclaimer.
 14 |        * Redistributions in binary form must reproduce the above
 15 |    copyright notice, this list of conditions and the following disclaimer
 16 |    in the documentation and/or other materials provided with the
 17 |    distribution.
 18 | 
 19 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23 |    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | 
 31 |    You can contact the author at :
 32 |    - xxHash source repository : https://github.com/Cyan4973/xxHash
 33 | */
 34 | 
 35 | /* Notice extracted from xxHash homepage :
 36 | 
 37 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
 38 | It also successfully passes all tests from the SMHasher suite.
 39 | 
 40 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
 41 | 
 42 | Name            Speed       Q.Score   Author
 43 | xxHash          5.4 GB/s     10
 44 | CrapWow         3.2 GB/s      2       Andrew
 45 | MumurHash 3a    2.7 GB/s     10       Austin Appleby
 46 | SpookyHash      2.0 GB/s     10       Bob Jenkins
 47 | SBox            1.4 GB/s      9       Bret Mulvey
 48 | Lookup3         1.2 GB/s      9       Bob Jenkins
 49 | SuperFastHash   1.2 GB/s      1       Paul Hsieh
 50 | CityHash64      1.05 GB/s    10       Pike & Alakuijala
 51 | FNV             0.55 GB/s     5       Fowler, Noll, Vo
 52 | CRC32           0.43 GB/s     9
 53 | MD5-32          0.33 GB/s    10       Ronald L. Rivest
 54 | SHA1-32         0.28 GB/s    10
 55 | 
 56 | Q.Score is a measure of quality of the hash function.
 57 | It depends on successfully passing SMHasher test set.
 58 | 10 is a perfect score.
 59 | 
 60 | A 64-bits version, named XXH64, is available since r35.
 61 | It offers much better speed, but for 64-bits applications only.
 62 | Name     Speed on 64 bits    Speed on 32 bits
 63 | XXH64       13.8 GB/s            1.9 GB/s
 64 | XXH32        6.8 GB/s            6.0 GB/s
 65 | */
 66 | 
 67 | #if defined (__cplusplus)
 68 | extern "C" {
 69 | #endif
 70 | 
 71 | #ifndef XXHASH_H_5627135585666179
 72 | #define XXHASH_H_5627135585666179 1
 73 | 
 74 | 
 75 | /* ****************************
 76 | *  Definitions
 77 | ******************************/
 78 | #include <stddef.h>   /* size_t */
 79 | typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 80 | 
 81 | 
 82 | /* ****************************
 83 | *  API modifier
 84 | ******************************/
 85 | /** XXH_PRIVATE_API
 86 | *   This is useful if you want to include xxhash functions in `static` mode
 87 | *   in order to inline them, and remove their symbol from the public list.
 88 | *   Methodology :
 89 | *     #define XXH_PRIVATE_API
 90 | *     #include "xxhash.h"
 91 | *   `xxhash.c` is automatically included.
 92 | *   It's not useful to compile and link it as a separate module anymore.
 93 | */
 94 | #ifdef XXH_PRIVATE_API
 95 | #  ifndef XXH_STATIC_LINKING_ONLY
 96 | #    define XXH_STATIC_LINKING_ONLY
 97 | #  endif
 98 | #  if defined(__GNUC__)
 99 | #    define XXH_PUBLIC_API static __inline __attribute__((unused))
100 | #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
101 | #    define XXH_PUBLIC_API static inline
102 | #  elif defined(_MSC_VER)
103 | #    define XXH_PUBLIC_API static __inline
104 | #  else
105 | #    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
106 | #  endif
107 | #else
108 | #  define XXH_PUBLIC_API   /* do nothing */
109 | #endif /* XXH_PRIVATE_API */
110 | 
111 | /*!XXH_NAMESPACE, aka Namespace Emulation :
112 | 
113 | If you want to include _and expose_ xxHash functions from within your own library,
114 | but also want to avoid symbol collisions with another library which also includes xxHash,
115 | 
116 | you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
117 | with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
118 | 
119 | Note that no change is required within the calling program as long as it includes `xxhash.h` :
120 | regular symbol name will be automatically translated by this header.
121 | */
122 | #ifdef XXH_NAMESPACE
123 | #  define XXH_CAT(A,B) A##B
124 | #  define XXH_NAME2(A,B) XXH_CAT(A,B)
125 | #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
126 | #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
127 | #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
128 | #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
129 | #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
130 | #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
131 | #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
132 | #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
133 | #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
134 | #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
135 | #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
136 | #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
137 | #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
138 | #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
139 | #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
140 | #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
141 | #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
142 | #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
143 | #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
144 | #endif
145 | 
146 | 
147 | /* *************************************
148 | *  Version
149 | ***************************************/
150 | #define XXH_VERSION_MAJOR    0
151 | #define XXH_VERSION_MINOR    6
152 | #define XXH_VERSION_RELEASE  2
153 | #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
154 | XXH_PUBLIC_API unsigned XXH_versionNumber (void);
155 | 
156 | 
157 | /* ****************************
158 | *  Simple Hash Functions
159 | ******************************/
160 | typedef unsigned int       XXH32_hash_t;
161 | typedef unsigned long long XXH64_hash_t;
162 | 
163 | XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
164 | XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
165 | 
166 | /*!
167 | XXH32() :
168 |     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
169 |     The memory between input & input+length must be valid (allocated and read-accessible).
170 |     "seed" can be used to alter the result predictably.
171 |     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
172 | XXH64() :
173 |     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
174 |     "seed" can be used to alter the result predictably.
175 |     This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
176 | */
177 | 
178 | 
179 | /* ****************************
180 | *  Streaming Hash Functions
181 | ******************************/
182 | typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
183 | typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
184 | 
185 | /*! State allocation, compatible with dynamic libraries */
186 | 
187 | XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
188 | XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
189 | 
190 | XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
191 | XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
192 | 
193 | 
194 | /* hash streaming */
195 | 
196 | XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
197 | XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
198 | XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
199 | 
200 | XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
201 | XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
202 | XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
203 | 
204 | /*
205 | These functions generate the xxHash of an input provided in multiple segments.
206 | Note that, for small input, they are slower than single-call functions, due to state management.
207 | For small input, prefer `XXH32()` and `XXH64()` .
208 | 
209 | XXH state must first be allocated, using XXH*_createState() .
210 | 
211 | Start a new hash by initializing state with a seed, using XXH*_reset().
212 | 
213 | Then, feed the hash state by calling XXH*_update() as many times as necessary.
214 | Obviously, input must be allocated and read accessible.
215 | The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
216 | 
217 | Finally, a hash value can be produced anytime, by using XXH*_digest().
218 | This function returns the nn-bits hash as an int or long long.
219 | 
220 | It's still possible to continue inserting input into the hash state after a digest,
221 | and generate some new hashes later on, by calling again XXH*_digest().
222 | 
223 | When done, free XXH state space if it was allocated dynamically.
224 | */
225 | 
226 | 
227 | /* **************************
228 | *  Utils
229 | ****************************/
230 | #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
231 | #  define restrict   /* disable restrict */
232 | #endif
233 | 
234 | XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
235 | XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
236 | 
237 | 
238 | /* **************************
239 | *  Canonical representation
240 | ****************************/
241 | /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
242 | *  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
243 | *  These functions allow transformation of hash result into and from its canonical format.
244 | *  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
245 | */
246 | typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
247 | typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
248 | 
249 | XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
250 | XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
251 | 
252 | XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
253 | XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
254 | 
255 | #endif /* XXHASH_H_5627135585666179 */
256 | 
257 | 
258 | 
259 | /* ================================================================================================
260 |    This section contains definitions which are not guaranteed to remain stable.
261 |    They may change in future versions, becoming incompatible with a different version of the library.
262 |    They shall only be used with static linking.
263 |    Never use these definitions in association with dynamic linking !
264 | =================================================================================================== */
265 | #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
266 | #define XXH_STATIC_H_3543687687345
267 | 
268 | /* These definitions are only meant to allow allocation of XXH state
269 |    statically, on stack, or in a struct for example.
270 |    Do not use members directly. */
271 | 
272 |    struct XXH32_state_s {
273 |        unsigned total_len_32;
274 |        unsigned large_len;
275 |        unsigned v1;
276 |        unsigned v2;
277 |        unsigned v3;
278 |        unsigned v4;
279 |        unsigned mem32[4];   /* buffer defined as U32 for alignment */
280 |        unsigned memsize;
281 |        unsigned reserved;   /* never read nor write, will be removed in a future version */
282 |    };   /* typedef'd to XXH32_state_t */
283 | 
284 |    struct XXH64_state_s {
285 |        unsigned long long total_len;
286 |        unsigned long long v1;
287 |        unsigned long long v2;
288 |        unsigned long long v3;
289 |        unsigned long long v4;
290 |        unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
291 |        unsigned memsize;
292 |        unsigned reserved[2];          /* never read nor write, will be removed in a future version */
293 |    };   /* typedef'd to XXH64_state_t */
294 | 
295 | 
296 | #  ifdef XXH_PRIVATE_API
297 | #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
298 | #  endif
299 | 
300 | #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
301 | 
302 | 
303 | #if defined (__cplusplus)
304 | }
305 | #endif
306 | 


--------------------------------------------------------------------------------