├── .github └── workflows │ └── build.yml ├── CMakeLists.txt ├── LICENCE ├── README.md ├── ntfs2btrfs.8.in └── src ├── blake2-impl.h ├── blake2b-ref.c ├── btrfs.h ├── compress.cpp ├── config.h.in ├── crc32c-gas.S ├── crc32c-masm.asm ├── crc32c.c ├── crc32c.h ├── decomp.cpp ├── ebiggers ├── aligned_malloc.c ├── common_defs.h ├── decompress_common.c ├── decompress_common.h ├── lzx_common.c ├── lzx_common.h ├── lzx_constants.h ├── lzx_decompress.c ├── system_compression.h ├── xpress_constants.h └── xpress_decompress.c ├── ntfs.cpp ├── ntfs.h ├── ntfs2btrfs.cpp ├── ntfs2btrfs.h ├── rollback.cpp ├── sha256.c ├── xxhash.c └── xxhash.h /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build x86_64-pc-linux-gnu 2 | on: [push] 3 | jobs: 4 | x86_64-pc-linux-gnu: 5 | runs-on: ubuntu-rolling 6 | steps: 7 | - run: apt-get update 8 | - run: apt-get install -y g++ git cmake nodejs pkg-config libfmt-dev liblzo2-dev libzstd-dev zlib1g-dev 9 | - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV 10 | - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA} 11 | - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }} 12 | - run: mkdir -p debug-work 13 | - run: mkdir -p release-work 14 | - run: | 15 | cmake -DCMAKE_BUILD_TYPE=Debug \ 16 | -DCMAKE_INSTALL_PREFIX=${PWD}/install/debug \ 17 | -DCMAKE_INSTALL_INCLUDEDIR=../include \ 18 | -DWITH_OPENSSL=ON -DENABLE_KRB5=ON \ 19 | -S ${SHORT_SHA} -B debug-work && \ 20 | cmake --build debug-work --parallel `nproc` && \ 21 | cmake --install debug-work 22 | - run: | 23 | cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ 24 | -DCMAKE_INSTALL_PREFIX=${PWD}/install \ 25 | -DWITH_OPENSSL=ON -DENABLE_KRB5=ON \ 26 | -S ${SHORT_SHA} -B release-work && \ 27 | cmake --build release-work --parallel `nproc` && \ 28 | cmake --install release-work 29 | - uses: actions/upload-artifact@v3 30 | with: 31 | name: ${{ github.sha }} 32 | overwrite: true 33 | path: | 34 | install 35 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14.3) 2 | 3 | cmake_policy(SET CMP0091 NEW) 4 | set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") 5 | 6 | project(ntfs2btrfs VERSION 20240115) 7 | 8 | include(GNUInstallDirs) 9 | 10 | option(WITH_ZLIB "Include zlib support" ON) 11 | option(WITH_LZO "Include lzo support" ON) 12 | option(WITH_ZSTD "Include zstd support" ON) 13 | 14 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) 15 | configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ntfs2btrfs.8.in ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8) 16 | 17 | set(CMAKE_CXX_STANDARD 20) 18 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 19 | 20 | find_package(fmt REQUIRED) 21 | find_package(PkgConfig REQUIRED) 22 | 23 | if(WITH_ZLIB) 24 | find_package(ZLIB REQUIRED) 25 | endif() 26 | 27 | if(WITH_LZO) 28 | pkg_check_modules(LZO REQUIRED lzo2) 29 | endif() 30 | 31 | if(WITH_ZSTD) 32 | pkg_check_modules(ZSTD REQUIRED libzstd) 33 | endif() 34 | 35 | set(SRC_FILES src/ntfs2btrfs.cpp 36 | src/ntfs.cpp 37 | src/decomp.cpp 38 | src/compress.cpp 39 | src/rollback.cpp 40 | src/crc32c.c 41 | src/xxhash.c 42 | src/sha256.c 43 | src/blake2b-ref.c 44 | src/ebiggers/lzx_decompress.c 45 | src/ebiggers/lzx_common.c 46 | src/ebiggers/aligned_malloc.c 47 | src/ebiggers/decompress_common.c 48 | src/ebiggers/xpress_decompress.c) 49 | 50 | if(MSVC) 51 | enable_language(ASM_MASM) 52 | set(SRC_FILES ${SRC_FILES} src/crc32c-masm.asm) 53 | else() 54 | enable_language(ASM) 55 | set(SRC_FILES ${SRC_FILES} src/crc32c-gas.S) 56 | endif() 57 | 58 | add_executable(ntfs2btrfs ${SRC_FILES}) 59 | 60 | if(CMAKE_BUILD_TYPE MATCHES "Debug") 61 | add_definitions(-D_GLIBCXX_DEBUG) 62 | endif() 63 | 64 | target_link_libraries(ntfs2btrfs fmt::fmt-header-only) 65 | 66 | if(WITH_ZLIB) 67 | target_link_libraries(ntfs2btrfs ZLIB::ZLIB) 68 | endif() 69 | 70 | if(WITH_LZO) 71 | target_link_libraries(ntfs2btrfs ${LZO_LINK_LIBRARIES}) 72 | endif() 73 | 74 | if(WITH_ZSTD) 75 | target_link_libraries(ntfs2btrfs ${ZSTD_LINK_LIBRARIES}) 76 | endif() 77 | 78 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 79 | 80 | # Work around bug in MSVC version of cmake - see https://gitlab.kitware.com/cmake/cmake/-/merge_requests/4257 81 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreaded "") 82 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDLL "") 83 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebug "") 84 | set(CMAKE_ASM_MASM_COMPILE_OPTIONS_MSVC_RUNTIME_LIBRARY_MultiThreadedDebugDLL "") 85 | 86 | if(MSVC) 87 | target_compile_options(ntfs2btrfs PRIVATE /W4) 88 | else() 89 | target_compile_options(ntfs2btrfs PRIVATE -Wall -Wextra -Wno-address-of-packed-member -Wconversion -Wno-unknown-pragmas -Werror=pointer-arith) 90 | endif() 91 | 92 | install(TARGETS ntfs2btrfs DESTINATION ${CMAKE_INSTALL_SBINDIR}) 93 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ntfs2btrfs.8 DESTINATION ${CMAKE_INSTALL_MANDIR}/man8) 94 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Ntfs2btrfs 2 | ========== 3 | 4 | Ntfs2btrfs is a tool which does in-place conversion of Microsoft's NTFS 5 | filesystem to the open-source filesystem Btrfs, much as `btrfs-convert` 6 | does for ext2. The original image is saved as a reflink copy at 7 | `image/ntfs.img`, and if you want to keep the conversion you can delete 8 | this to free up space. 9 | 10 | Although I believe this tool to be stable, please note that I take no 11 | responsibility if something goes awry! 12 | 13 | You're probably also interested in [WinBtrfs](https://github.com/maharmstone/btrfs), 14 | which is a Btrfs filesystem driver for Windows. 15 | 16 | Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's 17 | "WOF compressed data", and whose code I've used here. 18 | 19 | Usage 20 | ----- 21 | 22 | On Windows, from an Administrator command prompt: 23 | 24 | `ntfs2btrfs.exe D:\` 25 | 26 | Bear in mind that it won't work with your boot drive or a drive containing a 27 | pagefile that's currently in use. 28 | 29 | If you are using WinBtrfs, you will need to clear the readonly flag on the 30 | `image` subvolume before you can delete it. 31 | 32 | On Linux, as root: 33 | 34 | `ntfs2btrfs /dev/sda1` 35 | 36 | Installation 37 | ------------ 38 | 39 | On Windows, go to the [Releases page](https://github.com/maharmstone/ntfs2btrfs/releases) and 40 | download the latest Zip file, or use [Scoop](https://github.com/ScoopInstaller/Main/blob/master/bucket/ntfs2btrfs.json). 41 | 42 | For Linux: 43 | * [Arch](https://aur.archlinux.org/packages/ntfs2btrfs) 44 | * [Fedora](https://src.fedoraproject.org/rpms/ntfs2btrfs) (thanks to [Conan-Kudo](https://github.com/Conan-Kudo)) 45 | * Gentoo - available as sys-fs/ntfs2btrfs in the guru repository 46 | * [Debian](https://packages.debian.org/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko)) 47 | * [Ubuntu](https://packages.ubuntu.com/ntfs2btrfs) (thanks to [alexmyczko](https://github.com/alexmyczko)) 48 | * [openSUSE](https://build.opensuse.org/package/show/filesystems/ntfs2btrfs) (thanks to David Sterba) 49 | 50 | For other distributions or operating systems, you will need to compile it yourself - see 51 | below. 52 | 53 | Changelog 54 | --------- 55 | 56 | * 20240115 57 | * Fixed compilation on GCC 14 (`-Werror=incompatible-pointer-types` now enabled by default) 58 | 59 | * 20230501 60 | * Fixed inline extent items being written out of order (not diagnosed by `btrfs check`) 61 | * Fixed metadata items being written with wrong level value (not diagnosed by `btrfs check`) 62 | * ADSes with overly-long names now get skipped 63 | 64 | * 20220812 65 | * Added --no-datasum option, to skip calculating checksums 66 | * LXSS / WSL metadata is now preserved 67 | * Fixed lowercase drive letters not being recognized 68 | * Fixed crash due to iterator invalidation (thanks to nyanpasu64) 69 | * Fixed corruption when NTFS places file in last megabyte of disk 70 | 71 | * 20210923 72 | * Added (Btrfs) compression support (zlib, lzo, and zstd) 73 | * Added support for other hash algorithms: xxhash, sha256, and blake2 74 | * Added support for rolling back to NTFS 75 | * Added support for NT4-style security descriptors 76 | * Increased conversion speed for volume with many inodes 77 | * Fixed bug when fragmented file was in superblock location 78 | * Fixed buffer overflow when reading security descriptors 79 | * Fixed bug where filesystems would be corrupted in a way that `btrfs check` doesn't pick up 80 | 81 | * 20210523 82 | * Improved handling of large compressed files 83 | 84 | * 20210402 (source code only release) 85 | * Fixes for compilation on non-amd64 architectures 86 | 87 | * 20210105 88 | * Added support for NTFS compression 89 | * Added support for "WOF compressed data" 90 | * Fixed problems caused by sparse files 91 | * Miscellaneous bug fixes 92 | 93 | * 20201108 94 | * Improved error handling 95 | * Added better message if NTFS is corrupted or unclean 96 | * Better handling of relocations 97 | 98 | * 20200330 99 | * Initial release 100 | 101 | Compilation 102 | ----------- 103 | 104 | On Windows, open the source directory in a recent version of MSVC, right-click 105 | on CMakeLists.txt, and click Compile. 106 | 107 | On Linux: 108 | 109 | mkdir build 110 | cd build 111 | cmake .. 112 | make 113 | 114 | You'll also need [libfmt](https://github.com/fmtlib/fmt) installed - it should be 115 | in your package manager. 116 | 117 | Compression support requires zlib, lzo, and/or zstd - again, they will be in your 118 | package manager. See also the cmake options WITH_ZLIB, WITH_LZO, and WITH_ZSTD, 119 | if you want to disable this. 120 | 121 | What works 122 | ---------- 123 | 124 | * Files 125 | * Directories 126 | * Symlinks 127 | * Other reparse points 128 | * Security descriptors 129 | * Alternate data streams 130 | * DOS attributes (hidden, system, etc.) 131 | * Rollback to original NTFS image 132 | * Preservation of LXSS metadata 133 | 134 | What doesn't work 135 | ----------------- 136 | 137 | * Windows' old extended attributes (you're not using these) 138 | * Large (i.e >16KB) ADSes (you're not using these either) 139 | * Preservation of the case-sensitivity flag 140 | * Unusual cluster sizes (i.e. not 4 KB) 141 | * Encrypted files 142 | 143 | Can I boot Windows from Btrfs with this? 144 | ---------------------------------------- 145 | 146 | Yes, if the stars are right. See [Quibble](https://github.com/maharmstone/quibble). 147 | -------------------------------------------------------------------------------- /ntfs2btrfs.8.in: -------------------------------------------------------------------------------- 1 | .TH NTFS2BTRFS "8" "January 2024" "ntfs2btrfs @PROJECT_VERSION@" "System Administration" 2 | .SH NAME 3 | ntfs2btrfs \- convert ntfs filesystem to btrfs filesystem 4 | .SH SYNOPSIS 5 | \fBntfs2btrfs\fR [options] \fIdevice\fR 6 | .SH DESCRIPTION 7 | This is a tool which does in-place conversion of Microsoft's NTFS filesystem 8 | to the open-source filesystem Btrfs, much as \fBbtrfs\-convert\fR does for ext2. 9 | .SH OPTIONS 10 | .PP 11 | -c \fI\fR, --compress=\fI\fR 12 | .RS 4 13 | Uses the specified algorithm to recompress files that are compressed on the 14 | NTFS volume; valid choices are \fIzstd\fR, \fIlzo\fR, \fIzlib\fR, or \fInone\fR. 15 | If you don't specify any value, \fIzstd\fR will be used, assuming it's been 16 | compiled in. Note that this will be ignored if you also select --no-datasum (see 17 | below). 18 | .RE 19 | .PP 20 | -h \fI\fR, --hash=\fI\fR 21 | .RS 4 22 | Uses the specified checksumming algorithm; valid choices are \fIcrc32c\fR, 23 | \fIxxhash\fR, \fIsha256\fR, and \fIblake2\fR. The first of these will be used by 24 | default, and should be fine for most purposes. 25 | .RE 26 | .PP 27 | -r, --rollback 28 | .RS 4 29 | Tries to restore the original NTFS filesystem. See \fBROLLBACK\fR below. 30 | .RE 31 | .PP 32 | -d, --no-datasum 33 | .RS 4 34 | Skips calculating checksums for existing data. Don't choose this unless you're 35 | sure it's what you want. 36 | .RE 37 | .SH ROLLBACK 38 | The original filesystem image is saved as \fIimage/ntfs.img\fR as a reflink copy. You 39 | can restore this at any time by using the rollback option, provided that you've 40 | not moved the data by doing a balance. Bear in mind that this restores the volume 41 | to how it was when you did the conversion, meaning that any changes you've made 42 | since will be lost. 43 | .PP 44 | If you decide to keep the conversion, you can remove the \fIimage\fR subvolume at 45 | any point to free up space. 46 | .SH XATTRS 47 | Various bits of NTFS-specific data are stored as Btrfs xattrs, in a manner that 48 | the Windows btrfs driver understands (\fBhttps://github.com/maharmstone/btrfs\fR). Some 49 | should also be understood by tools such as Wine and Samba, but YMMV. 50 | .IP \[bu] 2 51 | The NTFS attribute value is stored as a hex string at \fIuser.DOSATTRIB\fR. 52 | .IP \[bu] 2 53 | The reparse points on directories are stored at \fIuser.reparse\fR. NTFS symlinks should 54 | be converted into POSIX symlinks. The data for other reparse points will be stored as 55 | the contents of the files. 56 | .IP \[bu] 2 57 | The NT security descriptor is stored as \fIsecurity.NTACL\fR. 58 | .IP \[bu] 2 59 | Alternate data streams on files are stored in the \fIuser\fR namespace, e.g. \fI:Zone.Identifier\fR 60 | becomes \fIuser.Zone.Identifier\fR. 61 | .SH SEE ALSO 62 | .BR btrfs (8), 63 | .BR mkfs.btrfs (8). 64 | .SH AUTHOR 65 | Written by Mark Harmstone (\fBmark@harmstone.com\fR). 66 | 67 | .SH WEB 68 | .IP https://github.com/maharmstone/ntfs2btrfs 69 | -------------------------------------------------------------------------------- /src/blake2-impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - reference C implementations 3 | 4 | Copyright 2012, Samuel Neves . You may use this under the 5 | terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at 6 | your option. The terms of these licenses can be found at: 7 | 8 | - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 9 | - OpenSSL license : https://www.openssl.org/source/license.html 10 | - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | More information about the BLAKE2 hash function can be found at 13 | https://blake2.net. 14 | */ 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | #define NATIVE_LITTLE_ENDIAN 21 | 22 | #if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) 23 | #if defined(_MSC_VER) 24 | #define BLAKE2_INLINE __inline 25 | #elif defined(__GNUC__) 26 | #define BLAKE2_INLINE __inline__ 27 | #else 28 | #define BLAKE2_INLINE 29 | #endif 30 | #else 31 | #define BLAKE2_INLINE inline 32 | #endif 33 | 34 | static BLAKE2_INLINE uint32_t load32( const void *src ) 35 | { 36 | #if defined(NATIVE_LITTLE_ENDIAN) 37 | uint32_t w; 38 | memcpy(&w, src, sizeof w); 39 | return w; 40 | #else 41 | const uint8_t *p = ( const uint8_t * )src; 42 | return (( uint32_t )( p[0] ) << 0) | 43 | (( uint32_t )( p[1] ) << 8) | 44 | (( uint32_t )( p[2] ) << 16) | 45 | (( uint32_t )( p[3] ) << 24) ; 46 | #endif 47 | } 48 | 49 | static BLAKE2_INLINE uint64_t load64( const void *src ) 50 | { 51 | #if defined(NATIVE_LITTLE_ENDIAN) 52 | uint64_t w; 53 | memcpy(&w, src, sizeof w); 54 | return w; 55 | #else 56 | const uint8_t *p = ( const uint8_t * )src; 57 | return (( uint64_t )( p[0] ) << 0) | 58 | (( uint64_t )( p[1] ) << 8) | 59 | (( uint64_t )( p[2] ) << 16) | 60 | (( uint64_t )( p[3] ) << 24) | 61 | (( uint64_t )( p[4] ) << 32) | 62 | (( uint64_t )( p[5] ) << 40) | 63 | (( uint64_t )( p[6] ) << 48) | 64 | (( uint64_t )( p[7] ) << 56) ; 65 | #endif 66 | } 67 | 68 | static BLAKE2_INLINE uint16_t load16( const void *src ) 69 | { 70 | #if defined(NATIVE_LITTLE_ENDIAN) 71 | uint16_t w; 72 | memcpy(&w, src, sizeof w); 73 | return w; 74 | #else 75 | const uint8_t *p = ( const uint8_t * )src; 76 | return ( uint16_t )((( uint32_t )( p[0] ) << 0) | 77 | (( uint32_t )( p[1] ) << 8)); 78 | #endif 79 | } 80 | 81 | static BLAKE2_INLINE void store16( void *dst, uint16_t w ) 82 | { 83 | #if defined(NATIVE_LITTLE_ENDIAN) 84 | memcpy(dst, &w, sizeof w); 85 | #else 86 | uint8_t *p = ( uint8_t * )dst; 87 | *p++ = ( uint8_t )w; w >>= 8; 88 | *p++ = ( uint8_t )w; 89 | #endif 90 | } 91 | 92 | static BLAKE2_INLINE void store32( void *dst, uint32_t w ) 93 | { 94 | #if defined(NATIVE_LITTLE_ENDIAN) 95 | memcpy(dst, &w, sizeof w); 96 | #else 97 | uint8_t *p = ( uint8_t * )dst; 98 | p[0] = (uint8_t)(w >> 0); 99 | p[1] = (uint8_t)(w >> 8); 100 | p[2] = (uint8_t)(w >> 16); 101 | p[3] = (uint8_t)(w >> 24); 102 | #endif 103 | } 104 | 105 | static BLAKE2_INLINE void store64( void *dst, uint64_t w ) 106 | { 107 | #if defined(NATIVE_LITTLE_ENDIAN) 108 | memcpy(dst, &w, sizeof w); 109 | #else 110 | uint8_t *p = ( uint8_t * )dst; 111 | p[0] = (uint8_t)(w >> 0); 112 | p[1] = (uint8_t)(w >> 8); 113 | p[2] = (uint8_t)(w >> 16); 114 | p[3] = (uint8_t)(w >> 24); 115 | p[4] = (uint8_t)(w >> 32); 116 | p[5] = (uint8_t)(w >> 40); 117 | p[6] = (uint8_t)(w >> 48); 118 | p[7] = (uint8_t)(w >> 56); 119 | #endif 120 | } 121 | 122 | static BLAKE2_INLINE uint64_t load48( const void *src ) 123 | { 124 | const uint8_t *p = ( const uint8_t * )src; 125 | return (( uint64_t )( p[0] ) << 0) | 126 | (( uint64_t )( p[1] ) << 8) | 127 | (( uint64_t )( p[2] ) << 16) | 128 | (( uint64_t )( p[3] ) << 24) | 129 | (( uint64_t )( p[4] ) << 32) | 130 | (( uint64_t )( p[5] ) << 40) ; 131 | } 132 | 133 | static BLAKE2_INLINE void store48( void *dst, uint64_t w ) 134 | { 135 | uint8_t *p = ( uint8_t * )dst; 136 | p[0] = (uint8_t)(w >> 0); 137 | p[1] = (uint8_t)(w >> 8); 138 | p[2] = (uint8_t)(w >> 16); 139 | p[3] = (uint8_t)(w >> 24); 140 | p[4] = (uint8_t)(w >> 32); 141 | p[5] = (uint8_t)(w >> 40); 142 | } 143 | 144 | static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c ) 145 | { 146 | return ( w >> c ) | ( w << ( 32 - c ) ); 147 | } 148 | 149 | static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c ) 150 | { 151 | return ( w >> c ) | ( w << ( 64 - c ) ); 152 | } 153 | 154 | #if defined(_MSC_VER) 155 | #define BLAKE2_PACKED(x) __pragma(pack(push, 1)) x __pragma(pack(pop)) 156 | #else 157 | #define BLAKE2_PACKED(x) x __attribute__((packed)) 158 | #endif 159 | 160 | enum blake2b_constant 161 | { 162 | BLAKE2B_BLOCKBYTES = 128, 163 | BLAKE2B_OUTBYTES = 64, 164 | BLAKE2B_KEYBYTES = 64, 165 | BLAKE2B_SALTBYTES = 16, 166 | BLAKE2B_PERSONALBYTES = 16 167 | }; 168 | 169 | typedef struct blake2b_state__ 170 | { 171 | uint64_t h[8]; 172 | uint64_t t[2]; 173 | uint64_t f[2]; 174 | uint8_t buf[BLAKE2B_BLOCKBYTES]; 175 | size_t buflen; 176 | size_t outlen; 177 | uint8_t last_node; 178 | } blake2b_state; 179 | 180 | BLAKE2_PACKED(struct blake2b_param__ 181 | { 182 | uint8_t digest_length; /* 1 */ 183 | uint8_t key_length; /* 2 */ 184 | uint8_t fanout; /* 3 */ 185 | uint8_t depth; /* 4 */ 186 | uint32_t leaf_length; /* 8 */ 187 | uint32_t node_offset; /* 12 */ 188 | uint32_t xof_length; /* 16 */ 189 | uint8_t node_depth; /* 17 */ 190 | uint8_t inner_length; /* 18 */ 191 | uint8_t reserved[14]; /* 32 */ 192 | uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */ 193 | uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */ 194 | }); 195 | 196 | typedef struct blake2b_param__ blake2b_param; 197 | -------------------------------------------------------------------------------- /src/blake2b-ref.c: -------------------------------------------------------------------------------- 1 | /* 2 | BLAKE2 reference source code package - reference C implementations 3 | 4 | Copyright 2012, Samuel Neves . You may use this under the 5 | terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at 6 | your option. The terms of these licenses can be found at: 7 | 8 | - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 9 | - OpenSSL license : https://www.openssl.org/source/license.html 10 | - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | More information about the BLAKE2 hash function can be found at 13 | https://blake2.net. 14 | */ 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | #include "blake2-impl.h" 21 | 22 | static const uint64_t blake2b_IV[8] = 23 | { 24 | 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 25 | 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 26 | 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 27 | 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL 28 | }; 29 | 30 | static const uint8_t blake2b_sigma[12][16] = 31 | { 32 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 33 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , 34 | { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , 35 | { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , 36 | { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , 37 | { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , 38 | { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , 39 | { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , 40 | { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , 41 | { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , 42 | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , 43 | { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } 44 | }; 45 | 46 | static int blake2b_update(blake2b_state* S, const void* in, size_t inlen); 47 | 48 | static void blake2b_set_lastnode( blake2b_state *S ) 49 | { 50 | S->f[1] = (uint64_t)-1; 51 | } 52 | 53 | /* Some helper functions, not necessarily useful */ 54 | static int blake2b_is_lastblock( const blake2b_state *S ) 55 | { 56 | return S->f[0] != 0; 57 | } 58 | 59 | static void blake2b_set_lastblock( blake2b_state *S ) 60 | { 61 | if( S->last_node ) blake2b_set_lastnode( S ); 62 | 63 | S->f[0] = (uint64_t)-1; 64 | } 65 | 66 | static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc ) 67 | { 68 | S->t[0] += inc; 69 | S->t[1] += ( S->t[0] < inc ); 70 | } 71 | 72 | static void blake2b_init0( blake2b_state *S ) 73 | { 74 | size_t i; 75 | memset( S, 0, sizeof( blake2b_state ) ); 76 | 77 | for( i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i]; 78 | } 79 | 80 | /* init xors IV with input parameter block */ 81 | static void blake2b_init_param( blake2b_state *S, const blake2b_param *P ) 82 | { 83 | const uint8_t *p = ( const uint8_t * )( P ); 84 | size_t i; 85 | 86 | blake2b_init0( S ); 87 | 88 | /* IV XOR ParamBlock */ 89 | for( i = 0; i < 8; ++i ) 90 | S->h[i] ^= load64( p + sizeof( S->h[i] ) * i ); 91 | 92 | S->outlen = P->digest_length; 93 | } 94 | 95 | 96 | 97 | static void blake2b_init( blake2b_state *S, size_t outlen ) 98 | { 99 | blake2b_param P[1]; 100 | 101 | P->digest_length = (uint8_t)outlen; 102 | P->key_length = 0; 103 | P->fanout = 1; 104 | P->depth = 1; 105 | store32( &P->leaf_length, 0 ); 106 | store32( &P->node_offset, 0 ); 107 | store32( &P->xof_length, 0 ); 108 | P->node_depth = 0; 109 | P->inner_length = 0; 110 | memset( P->reserved, 0, sizeof( P->reserved ) ); 111 | memset( P->salt, 0, sizeof( P->salt ) ); 112 | memset( P->personal, 0, sizeof( P->personal ) ); 113 | 114 | blake2b_init_param( S, P ); 115 | } 116 | 117 | #define G(r,i,a,b,c,d) \ 118 | do { \ 119 | a = a + b + m[blake2b_sigma[r][2*i+0]]; \ 120 | d = rotr64(d ^ a, 32); \ 121 | c = c + d; \ 122 | b = rotr64(b ^ c, 24); \ 123 | a = a + b + m[blake2b_sigma[r][2*i+1]]; \ 124 | d = rotr64(d ^ a, 16); \ 125 | c = c + d; \ 126 | b = rotr64(b ^ c, 63); \ 127 | } while(0) 128 | 129 | #define ROUND(r) \ 130 | do { \ 131 | G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ 132 | G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ 133 | G(r,2,v[ 2],v[ 6],v[10],v[14]); \ 134 | G(r,3,v[ 3],v[ 7],v[11],v[15]); \ 135 | G(r,4,v[ 0],v[ 5],v[10],v[15]); \ 136 | G(r,5,v[ 1],v[ 6],v[11],v[12]); \ 137 | G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ 138 | G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ 139 | } while(0) 140 | 141 | static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] ) 142 | { 143 | uint64_t m[16]; 144 | uint64_t v[16]; 145 | size_t i; 146 | 147 | for( i = 0; i < 16; ++i ) { 148 | m[i] = load64( block + i * sizeof( m[i] ) ); 149 | } 150 | 151 | for( i = 0; i < 8; ++i ) { 152 | v[i] = S->h[i]; 153 | } 154 | 155 | v[ 8] = blake2b_IV[0]; 156 | v[ 9] = blake2b_IV[1]; 157 | v[10] = blake2b_IV[2]; 158 | v[11] = blake2b_IV[3]; 159 | v[12] = blake2b_IV[4] ^ S->t[0]; 160 | v[13] = blake2b_IV[5] ^ S->t[1]; 161 | v[14] = blake2b_IV[6] ^ S->f[0]; 162 | v[15] = blake2b_IV[7] ^ S->f[1]; 163 | 164 | ROUND( 0 ); 165 | ROUND( 1 ); 166 | ROUND( 2 ); 167 | ROUND( 3 ); 168 | ROUND( 4 ); 169 | ROUND( 5 ); 170 | ROUND( 6 ); 171 | ROUND( 7 ); 172 | ROUND( 8 ); 173 | ROUND( 9 ); 174 | ROUND( 10 ); 175 | ROUND( 11 ); 176 | 177 | for( i = 0; i < 8; ++i ) { 178 | S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; 179 | } 180 | } 181 | 182 | #undef G 183 | #undef ROUND 184 | 185 | static int blake2b_update( blake2b_state *S, const void *pin, size_t inlen ) 186 | { 187 | const unsigned char * in = (const unsigned char *)pin; 188 | if( inlen > 0 ) 189 | { 190 | size_t left = S->buflen; 191 | size_t fill = BLAKE2B_BLOCKBYTES - left; 192 | if( inlen > fill ) 193 | { 194 | S->buflen = 0; 195 | memcpy( S->buf + left, in, fill ); /* Fill buffer */ 196 | blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES ); 197 | blake2b_compress( S, S->buf ); /* Compress */ 198 | in += fill; inlen -= fill; 199 | while(inlen > BLAKE2B_BLOCKBYTES) { 200 | blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); 201 | blake2b_compress( S, in ); 202 | in += BLAKE2B_BLOCKBYTES; 203 | inlen -= BLAKE2B_BLOCKBYTES; 204 | } 205 | } 206 | memcpy( S->buf + S->buflen, in, inlen ); 207 | S->buflen += inlen; 208 | } 209 | return 0; 210 | } 211 | 212 | static int blake2b_final( blake2b_state *S, void *out, size_t outlen ) 213 | { 214 | uint8_t buffer[BLAKE2B_OUTBYTES] = {0}; 215 | size_t i; 216 | 217 | if( out == NULL || outlen < S->outlen ) 218 | return -1; 219 | 220 | if( blake2b_is_lastblock( S ) ) 221 | return -1; 222 | 223 | blake2b_increment_counter( S, S->buflen ); 224 | blake2b_set_lastblock( S ); 225 | memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */ 226 | blake2b_compress( S, S->buf ); 227 | 228 | for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */ 229 | store64( buffer + sizeof( S->h[i] ) * i, S->h[i] ); 230 | 231 | memcpy( out, buffer, S->outlen ); 232 | 233 | return 0; 234 | } 235 | 236 | /* inlen, at least, should be uint64_t. Others can be size_t. */ 237 | void blake2b( void *out, size_t outlen, const void *in, size_t inlen ) 238 | { 239 | blake2b_state S[1]; 240 | 241 | blake2b_init( S, outlen ); 242 | 243 | blake2b_update( S, ( const uint8_t * )in, inlen ); 244 | blake2b_final( S, out, outlen ); 245 | } 246 | -------------------------------------------------------------------------------- /src/btrfs.h: -------------------------------------------------------------------------------- 1 | /* btrfs.h 2 | * Generic btrfs header file. Thanks to whoever it was who wrote 3 | * https://btrfs.wiki.kernel.org/index.php/On-disk_Format - you saved me a lot of time! 4 | * 5 | * I release this file, and this file only, into the public domain - do whatever 6 | * you want with it. You don't have to, but I'd appreciate if you let me know if you 7 | * use it anything cool - mark@harmstone.com. */ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | static const uint64_t superblock_addrs[] = { 0x10000, 0x4000000, 0x4000000000, 0x4000000000000, 0 }; 14 | 15 | #define BTRFS_MAGIC 0x4d5f53665248425f 16 | #define MAX_LABEL_SIZE 0x100 17 | #define SUBVOL_ROOT_INODE 0x100 18 | 19 | enum class btrfs_key_type : uint8_t { 20 | INODE_ITEM = 0x01, 21 | INODE_REF = 0x0C, 22 | INODE_EXTREF = 0x0D, 23 | XATTR_ITEM = 0x18, 24 | ORPHAN_INODE = 0x30, 25 | DIR_ITEM = 0x54, 26 | DIR_INDEX = 0x60, 27 | EXTENT_DATA = 0x6C, 28 | EXTENT_CSUM = 0x80, 29 | ROOT_ITEM = 0x84, 30 | ROOT_BACKREF = 0x90, 31 | ROOT_REF = 0x9C, 32 | EXTENT_ITEM = 0xA8, 33 | METADATA_ITEM = 0xA9, 34 | TREE_BLOCK_REF = 0xB0, 35 | EXTENT_DATA_REF = 0xB2, 36 | EXTENT_REF_V0 = 0xB4, 37 | SHARED_BLOCK_REF = 0xB6, 38 | SHARED_DATA_REF = 0xB8, 39 | BLOCK_GROUP_ITEM = 0xC0, 40 | FREE_SPACE_INFO = 0xC6, 41 | FREE_SPACE_EXTENT = 0xC7, 42 | FREE_SPACE_BITMAP = 0xC8, 43 | DEV_EXTENT = 0xCC, 44 | DEV_ITEM = 0xD8, 45 | CHUNK_ITEM = 0xE4, 46 | TEMP_ITEM = 0xF8, 47 | DEV_STATS = 0xF9, 48 | SUBVOL_UUID = 0xFB, 49 | SUBVOL_REC_UUID = 0xFC 50 | }; 51 | 52 | #define BTRFS_ROOT_ROOT 1 53 | #define BTRFS_ROOT_EXTENT 2 54 | #define BTRFS_ROOT_CHUNK 3 55 | #define BTRFS_ROOT_DEVTREE 4 56 | #define BTRFS_ROOT_FSTREE 5 57 | #define BTRFS_ROOT_TREEDIR 6 58 | #define BTRFS_ROOT_CHECKSUM 7 59 | #define BTRFS_ROOT_UUID 9 60 | #define BTRFS_ROOT_FREE_SPACE 0xa 61 | #define BTRFS_ROOT_DATA_RELOC 0xFFFFFFFFFFFFFFF7 62 | 63 | enum class btrfs_compression : uint8_t { 64 | none = 0, 65 | zlib = 1, 66 | lzo = 2, 67 | zstd = 3 68 | }; 69 | 70 | #define BTRFS_ENCRYPTION_NONE 0 71 | 72 | #define BTRFS_ENCODING_NONE 0 73 | 74 | enum class btrfs_extent_type : uint8_t { 75 | inline_extent = 0, 76 | regular = 1, 77 | prealloc = 2 78 | }; 79 | 80 | #define BLOCK_FLAG_DATA 0x001 81 | #define BLOCK_FLAG_SYSTEM 0x002 82 | #define BLOCK_FLAG_METADATA 0x004 83 | #define BLOCK_FLAG_RAID0 0x008 84 | #define BLOCK_FLAG_RAID1 0x010 85 | #define BLOCK_FLAG_DUPLICATE 0x020 86 | #define BLOCK_FLAG_RAID10 0x040 87 | #define BLOCK_FLAG_RAID5 0x080 88 | #define BLOCK_FLAG_RAID6 0x100 89 | #define BLOCK_FLAG_RAID1C3 0x200 90 | #define BLOCK_FLAG_RAID1C4 0x400 91 | 92 | #define FREE_SPACE_CACHE_ID 0xFFFFFFFFFFFFFFF5 93 | #define EXTENT_CSUM_ID 0xFFFFFFFFFFFFFFF6 94 | #define BALANCE_ITEM_ID 0xFFFFFFFFFFFFFFFC 95 | 96 | #define BTRFS_INODE_NODATASUM 0x001 97 | #define BTRFS_INODE_NODATACOW 0x002 98 | #define BTRFS_INODE_READONLY 0x004 99 | #define BTRFS_INODE_NOCOMPRESS 0x008 100 | #define BTRFS_INODE_PREALLOC 0x010 101 | #define BTRFS_INODE_SYNC 0x020 102 | #define BTRFS_INODE_IMMUTABLE 0x040 103 | #define BTRFS_INODE_APPEND 0x080 104 | #define BTRFS_INODE_NODUMP 0x100 105 | #define BTRFS_INODE_NOATIME 0x200 106 | #define BTRFS_INODE_DIRSYNC 0x400 107 | #define BTRFS_INODE_COMPRESS 0x800 108 | 109 | #define BTRFS_SUBVOL_READONLY 0x1 110 | 111 | #define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE 0x1 112 | #define BTRFS_COMPAT_RO_FLAGS_FREE_SPACE_CACHE_VALID 0x2 113 | 114 | #define BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF 0x0001 115 | #define BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL 0x0002 116 | #define BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS 0x0004 117 | #define BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO 0x0008 118 | #define BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD 0x0010 119 | #define BTRFS_INCOMPAT_FLAGS_BIG_METADATA 0x0020 120 | #define BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF 0x0040 121 | #define BTRFS_INCOMPAT_FLAGS_RAID56 0x0080 122 | #define BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA 0x0100 123 | #define BTRFS_INCOMPAT_FLAGS_NO_HOLES 0x0200 124 | #define BTRFS_INCOMPAT_FLAGS_METADATA_UUID 0x0400 125 | #define BTRFS_INCOMPAT_FLAGS_RAID1C34 0x0800 126 | 127 | #define BTRFS_SUPERBLOCK_FLAGS_SEEDING 0x100000000 128 | 129 | #define BTRFS_ORPHAN_INODE_OBJID 0xFFFFFFFFFFFFFFFB 130 | 131 | enum class btrfs_csum_type : uint16_t { 132 | crc32c = 0, 133 | xxhash = 1, 134 | sha256 = 2, 135 | blake2 = 3 136 | }; 137 | 138 | #pragma pack(push, 1) 139 | 140 | typedef struct { 141 | uint8_t uuid[16]; 142 | } BTRFS_UUID; 143 | 144 | typedef struct { 145 | uint64_t obj_id; 146 | btrfs_key_type obj_type; 147 | uint64_t offset; 148 | } KEY; 149 | 150 | #define HEADER_FLAG_WRITTEN 0x000000000000001 151 | #define HEADER_FLAG_SHARED_BACKREF 0x000000000000002 152 | #define HEADER_FLAG_MIXED_BACKREF 0x100000000000000 153 | 154 | typedef struct { 155 | uint8_t csum[32]; 156 | BTRFS_UUID fs_uuid; 157 | uint64_t address; 158 | uint64_t flags; 159 | BTRFS_UUID chunk_tree_uuid; 160 | uint64_t generation; 161 | uint64_t tree_id; 162 | uint32_t num_items; 163 | uint8_t level; 164 | } tree_header; 165 | 166 | typedef struct { 167 | KEY key; 168 | uint32_t offset; 169 | uint32_t size; 170 | } leaf_node; 171 | 172 | typedef struct { 173 | KEY key; 174 | uint64_t address; 175 | uint64_t generation; 176 | } internal_node; 177 | 178 | typedef struct { 179 | uint64_t dev_id; 180 | uint64_t num_bytes; 181 | uint64_t bytes_used; 182 | uint32_t optimal_io_align; 183 | uint32_t optimal_io_width; 184 | uint32_t minimal_io_size; 185 | uint64_t type; 186 | uint64_t generation; 187 | uint64_t start_offset; 188 | uint32_t dev_group; 189 | uint8_t seek_speed; 190 | uint8_t bandwidth; 191 | BTRFS_UUID device_uuid; 192 | BTRFS_UUID fs_uuid; 193 | } DEV_ITEM; 194 | 195 | #define SYS_CHUNK_ARRAY_SIZE 0x800 196 | #define BTRFS_NUM_BACKUP_ROOTS 4 197 | 198 | typedef struct { 199 | uint64_t root_tree_addr; 200 | uint64_t root_tree_generation; 201 | uint64_t chunk_tree_addr; 202 | uint64_t chunk_tree_generation; 203 | uint64_t extent_tree_addr; 204 | uint64_t extent_tree_generation; 205 | uint64_t fs_tree_addr; 206 | uint64_t fs_tree_generation; 207 | uint64_t dev_root_addr; 208 | uint64_t dev_root_generation; 209 | uint64_t csum_root_addr; 210 | uint64_t csum_root_generation; 211 | uint64_t total_bytes; 212 | uint64_t bytes_used; 213 | uint64_t num_devices; 214 | uint64_t reserved[4]; 215 | uint8_t root_level; 216 | uint8_t chunk_root_level; 217 | uint8_t extent_root_level; 218 | uint8_t fs_root_level; 219 | uint8_t dev_root_level; 220 | uint8_t csum_root_level; 221 | uint8_t reserved2[10]; 222 | } superblock_backup; 223 | 224 | typedef struct { 225 | uint8_t checksum[32]; 226 | BTRFS_UUID uuid; 227 | uint64_t sb_phys_addr; 228 | uint64_t flags; 229 | uint64_t magic; 230 | uint64_t generation; 231 | uint64_t root_tree_addr; 232 | uint64_t chunk_tree_addr; 233 | uint64_t log_tree_addr; 234 | uint64_t log_root_transid; 235 | uint64_t total_bytes; 236 | uint64_t bytes_used; 237 | uint64_t root_dir_objectid; 238 | uint64_t num_devices; 239 | uint32_t sector_size; 240 | uint32_t node_size; 241 | uint32_t leaf_size; 242 | uint32_t stripe_size; 243 | uint32_t n; 244 | uint64_t chunk_root_generation; 245 | uint64_t compat_flags; 246 | uint64_t compat_ro_flags; 247 | uint64_t incompat_flags; 248 | enum btrfs_csum_type csum_type; 249 | uint8_t root_level; 250 | uint8_t chunk_root_level; 251 | uint8_t log_root_level; 252 | DEV_ITEM dev_item; 253 | char label[MAX_LABEL_SIZE]; 254 | uint64_t cache_generation; 255 | uint64_t uuid_tree_generation; 256 | uint64_t reserved[30]; 257 | uint8_t sys_chunk_array[SYS_CHUNK_ARRAY_SIZE]; 258 | superblock_backup backup[BTRFS_NUM_BACKUP_ROOTS]; 259 | uint8_t reserved2[565]; 260 | } superblock; 261 | 262 | enum class btrfs_inode_type : uint8_t { 263 | unknown = 0, 264 | file = 1, 265 | directory = 2, 266 | chardev = 3, 267 | blockdev = 4, 268 | fifo = 5, 269 | socket = 6, 270 | symlink = 7, 271 | ea = 8 272 | }; 273 | 274 | typedef struct { 275 | KEY key; 276 | uint64_t transid; 277 | uint16_t m; 278 | uint16_t n; 279 | enum btrfs_inode_type type; 280 | char name[1]; 281 | } DIR_ITEM; 282 | 283 | typedef struct { 284 | uint64_t seconds; 285 | uint32_t nanoseconds; 286 | } BTRFS_TIME; 287 | 288 | typedef struct { 289 | uint64_t generation; 290 | uint64_t transid; 291 | uint64_t st_size; 292 | uint64_t st_blocks; 293 | uint64_t block_group; 294 | uint32_t st_nlink; 295 | uint32_t st_uid; 296 | uint32_t st_gid; 297 | uint32_t st_mode; 298 | uint64_t st_rdev; 299 | uint64_t flags; 300 | uint64_t sequence; 301 | uint8_t reserved[32]; 302 | BTRFS_TIME st_atime; 303 | BTRFS_TIME st_ctime; 304 | BTRFS_TIME st_mtime; 305 | BTRFS_TIME otime; 306 | } INODE_ITEM; 307 | 308 | typedef struct { 309 | INODE_ITEM inode; 310 | uint64_t generation; 311 | uint64_t objid; 312 | uint64_t block_number; 313 | uint64_t byte_limit; 314 | uint64_t bytes_used; 315 | uint64_t last_snapshot_generation; 316 | uint64_t flags; 317 | uint32_t num_references; 318 | KEY drop_progress; 319 | uint8_t drop_level; 320 | uint8_t root_level; 321 | uint64_t generation2; 322 | BTRFS_UUID uuid; 323 | BTRFS_UUID parent_uuid; 324 | BTRFS_UUID received_uuid; 325 | uint64_t ctransid; 326 | uint64_t otransid; 327 | uint64_t stransid; 328 | uint64_t rtransid; 329 | BTRFS_TIME ctime; 330 | BTRFS_TIME otime; 331 | BTRFS_TIME stime; 332 | BTRFS_TIME rtime; 333 | uint64_t reserved[8]; 334 | } ROOT_ITEM; 335 | 336 | typedef struct { 337 | uint64_t size; 338 | uint64_t root_id; 339 | uint64_t stripe_length; 340 | uint64_t type; 341 | uint32_t opt_io_alignment; 342 | uint32_t opt_io_width; 343 | uint32_t sector_size; 344 | uint16_t num_stripes; 345 | uint16_t sub_stripes; 346 | } CHUNK_ITEM; 347 | 348 | typedef struct { 349 | uint64_t dev_id; 350 | uint64_t offset; 351 | BTRFS_UUID dev_uuid; 352 | } CHUNK_ITEM_STRIPE; 353 | 354 | typedef struct { 355 | uint64_t generation; 356 | uint64_t decoded_size; 357 | enum btrfs_compression compression; 358 | uint8_t encryption; 359 | uint16_t encoding; 360 | enum btrfs_extent_type type; 361 | uint8_t data[1]; 362 | } EXTENT_DATA; 363 | 364 | typedef struct { 365 | uint64_t address; 366 | uint64_t size; 367 | uint64_t offset; 368 | uint64_t num_bytes; 369 | } EXTENT_DATA2; 370 | 371 | typedef struct { 372 | uint64_t index; 373 | uint16_t n; 374 | char name[1]; 375 | } INODE_REF; 376 | 377 | typedef struct { 378 | uint64_t dir; 379 | uint64_t index; 380 | uint16_t n; 381 | char name[1]; 382 | } INODE_EXTREF; 383 | 384 | #define EXTENT_ITEM_DATA 0x001 385 | #define EXTENT_ITEM_TREE_BLOCK 0x002 386 | #define EXTENT_ITEM_SHARED_BACKREFS 0x100 387 | 388 | typedef struct { 389 | uint64_t refcount; 390 | uint64_t generation; 391 | uint64_t flags; 392 | } EXTENT_ITEM; 393 | 394 | typedef struct { 395 | KEY firstitem; 396 | uint8_t level; 397 | } EXTENT_ITEM2; 398 | 399 | typedef struct { 400 | uint32_t refcount; 401 | } EXTENT_ITEM_V0; 402 | 403 | typedef struct { 404 | EXTENT_ITEM extent_item; 405 | KEY firstitem; 406 | uint8_t level; 407 | } EXTENT_ITEM_TREE; 408 | 409 | typedef struct { 410 | uint64_t offset; 411 | } TREE_BLOCK_REF; 412 | 413 | typedef struct { 414 | uint64_t root; 415 | uint64_t objid; 416 | uint64_t offset; 417 | uint32_t count; 418 | } EXTENT_DATA_REF; 419 | 420 | typedef struct { 421 | uint64_t used; 422 | uint64_t chunk_tree; 423 | uint64_t flags; 424 | } BLOCK_GROUP_ITEM; 425 | 426 | typedef struct { 427 | uint64_t root; 428 | uint64_t gen; 429 | uint64_t objid; 430 | uint32_t count; 431 | } EXTENT_REF_V0; 432 | 433 | typedef struct { 434 | uint64_t offset; 435 | } SHARED_BLOCK_REF; 436 | 437 | typedef struct { 438 | uint64_t offset; 439 | uint32_t count; 440 | } SHARED_DATA_REF; 441 | 442 | static const uint8_t FREE_SPACE_EXTENT = 1; 443 | static const uint8_t FREE_SPACE_BITMAP = 2; 444 | 445 | typedef struct { 446 | uint64_t offset; 447 | uint64_t size; 448 | uint8_t type; 449 | } FREE_SPACE_ENTRY; 450 | 451 | typedef struct { 452 | KEY key; 453 | uint64_t generation; 454 | uint64_t num_entries; 455 | uint64_t num_bitmaps; 456 | } FREE_SPACE_ITEM; 457 | 458 | typedef struct { 459 | uint64_t dir; 460 | uint64_t index; 461 | uint16_t n; 462 | char name[1]; 463 | } ROOT_REF; 464 | 465 | typedef struct { 466 | uint64_t chunktree; 467 | uint64_t objid; 468 | uint64_t address; 469 | uint64_t length; 470 | BTRFS_UUID chunktree_uuid; 471 | } DEV_EXTENT; 472 | 473 | #define BALANCE_FLAGS_DATA 0x1 474 | #define BALANCE_FLAGS_SYSTEM 0x2 475 | #define BALANCE_FLAGS_METADATA 0x4 476 | 477 | #define BALANCE_ARGS_FLAGS_PROFILES 0x001 478 | #define BALANCE_ARGS_FLAGS_USAGE 0x002 479 | #define BALANCE_ARGS_FLAGS_DEVID 0x004 480 | #define BALANCE_ARGS_FLAGS_DRANGE 0x008 481 | #define BALANCE_ARGS_FLAGS_VRANGE 0x010 482 | #define BALANCE_ARGS_FLAGS_LIMIT 0x020 483 | #define BALANCE_ARGS_FLAGS_LIMIT_RANGE 0x040 484 | #define BALANCE_ARGS_FLAGS_STRIPES_RANGE 0x080 485 | #define BALANCE_ARGS_FLAGS_CONVERT 0x100 486 | #define BALANCE_ARGS_FLAGS_SOFT 0x200 487 | #define BALANCE_ARGS_FLAGS_USAGE_RANGE 0x400 488 | 489 | typedef struct { 490 | uint64_t profiles; 491 | 492 | union { 493 | uint64_t usage; 494 | struct { 495 | uint32_t usage_start; 496 | uint32_t usage_end; 497 | } s; 498 | } u1; 499 | 500 | uint64_t devid; 501 | uint64_t drange_start; 502 | uint64_t drange_end; 503 | uint64_t vrange_start; 504 | uint64_t vrange_end; 505 | uint64_t convert; 506 | uint64_t flags; 507 | 508 | union { 509 | uint64_t limit; 510 | struct { 511 | uint32_t limit_start; 512 | uint32_t limit_end; 513 | } s; 514 | } u2; 515 | 516 | uint32_t stripes_start; 517 | uint32_t stripes_end; 518 | uint8_t reserved[48]; 519 | } BALANCE_ARGS; 520 | 521 | typedef struct { 522 | uint64_t flags; 523 | BALANCE_ARGS data; 524 | BALANCE_ARGS metadata; 525 | BALANCE_ARGS system; 526 | uint8_t reserved[32]; 527 | } BALANCE_ITEM; 528 | 529 | #define BTRFS_FREE_SPACE_USING_BITMAPS 1 530 | 531 | typedef struct { 532 | uint32_t count; 533 | uint32_t flags; 534 | } FREE_SPACE_INFO; 535 | 536 | #define BTRFS_DEV_STAT_WRITE_ERRORS 0 537 | #define BTRFS_DEV_STAT_READ_ERRORS 1 538 | #define BTRFS_DEV_STAT_FLUSH_ERRORS 2 539 | #define BTRFS_DEV_STAT_CORRUPTION_ERRORS 3 540 | #define BTRFS_DEV_STAT_GENERATION_ERRORS 4 541 | 542 | #define BTRFS_SEND_CMD_SUBVOL 1 543 | #define BTRFS_SEND_CMD_SNAPSHOT 2 544 | #define BTRFS_SEND_CMD_MKFILE 3 545 | #define BTRFS_SEND_CMD_MKDIR 4 546 | #define BTRFS_SEND_CMD_MKNOD 5 547 | #define BTRFS_SEND_CMD_MKFIFO 6 548 | #define BTRFS_SEND_CMD_MKSOCK 7 549 | #define BTRFS_SEND_CMD_SYMLINK 8 550 | #define BTRFS_SEND_CMD_RENAME 9 551 | #define BTRFS_SEND_CMD_LINK 10 552 | #define BTRFS_SEND_CMD_UNLINK 11 553 | #define BTRFS_SEND_CMD_RMDIR 12 554 | #define BTRFS_SEND_CMD_SET_XATTR 13 555 | #define BTRFS_SEND_CMD_REMOVE_XATTR 14 556 | #define BTRFS_SEND_CMD_WRITE 15 557 | #define BTRFS_SEND_CMD_CLONE 16 558 | #define BTRFS_SEND_CMD_TRUNCATE 17 559 | #define BTRFS_SEND_CMD_CHMOD 18 560 | #define BTRFS_SEND_CMD_CHOWN 19 561 | #define BTRFS_SEND_CMD_UTIMES 20 562 | #define BTRFS_SEND_CMD_END 21 563 | #define BTRFS_SEND_CMD_UPDATE_EXTENT 22 564 | 565 | #define BTRFS_SEND_TLV_UUID 1 566 | #define BTRFS_SEND_TLV_TRANSID 2 567 | #define BTRFS_SEND_TLV_INODE 3 568 | #define BTRFS_SEND_TLV_SIZE 4 569 | #define BTRFS_SEND_TLV_MODE 5 570 | #define BTRFS_SEND_TLV_UID 6 571 | #define BTRFS_SEND_TLV_GID 7 572 | #define BTRFS_SEND_TLV_RDEV 8 573 | #define BTRFS_SEND_TLV_CTIME 9 574 | #define BTRFS_SEND_TLV_MTIME 10 575 | #define BTRFS_SEND_TLV_ATIME 11 576 | #define BTRFS_SEND_TLV_OTIME 12 577 | #define BTRFS_SEND_TLV_XATTR_NAME 13 578 | #define BTRFS_SEND_TLV_XATTR_DATA 14 579 | #define BTRFS_SEND_TLV_PATH 15 580 | #define BTRFS_SEND_TLV_PATH_TO 16 581 | #define BTRFS_SEND_TLV_PATH_LINK 17 582 | #define BTRFS_SEND_TLV_OFFSET 18 583 | #define BTRFS_SEND_TLV_DATA 19 584 | #define BTRFS_SEND_TLV_CLONE_UUID 20 585 | #define BTRFS_SEND_TLV_CLONE_CTRANSID 21 586 | #define BTRFS_SEND_TLV_CLONE_PATH 22 587 | #define BTRFS_SEND_TLV_CLONE_OFFSET 23 588 | #define BTRFS_SEND_TLV_CLONE_LENGTH 24 589 | 590 | #define BTRFS_SEND_MAGIC "btrfs-stream" 591 | 592 | typedef struct { 593 | uint8_t magic[13]; 594 | uint32_t version; 595 | } btrfs_send_header; 596 | 597 | typedef struct { 598 | uint32_t length; 599 | uint16_t cmd; 600 | uint32_t csum; 601 | } btrfs_send_command; 602 | 603 | typedef struct { 604 | uint16_t type; 605 | uint16_t length; 606 | } btrfs_send_tlv; 607 | 608 | #pragma pack(pop) 609 | -------------------------------------------------------------------------------- /src/compress.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2021 2 | * 3 | * This file is part of ntfs2btrfs. 4 | * 5 | * Ntfs2btrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * Ntfs2btrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with Ntfs2btrfs. If not, see . */ 17 | 18 | #include "ntfs2btrfs.h" 19 | 20 | #ifdef WITH_ZLIB 21 | #include 22 | #endif 23 | 24 | #ifdef WITH_LZO 25 | #include 26 | #endif 27 | 28 | #ifdef WITH_ZSTD 29 | #include 30 | #endif 31 | 32 | using namespace std; 33 | 34 | #ifdef WITH_ZLIB 35 | optional zlib_compress(string_view data, uint32_t cluster_size) { 36 | z_stream c_stream; 37 | int ret; 38 | buffer_t out(data.length()); 39 | 40 | c_stream.zalloc = Z_NULL; 41 | c_stream.zfree = Z_NULL; 42 | c_stream.opaque = (voidpf)0; 43 | 44 | ret = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); 45 | 46 | if (ret != Z_OK) 47 | throw formatted_error("deflateInit returned {}", ret); 48 | 49 | c_stream.next_in = (uint8_t*)data.data(); 50 | c_stream.avail_in = (unsigned int)data.length(); 51 | 52 | c_stream.next_out = (uint8_t*)out.data(); 53 | c_stream.avail_out = (unsigned int)out.size(); 54 | 55 | do { 56 | ret = deflate(&c_stream, Z_FINISH); 57 | 58 | if (ret != Z_OK && ret != Z_STREAM_END) { 59 | deflateEnd(&c_stream); 60 | throw formatted_error("deflate returned {}", ret); 61 | } 62 | 63 | if (c_stream.avail_in == 0 || c_stream.avail_out == 0) 64 | break; 65 | } while (ret != Z_STREAM_END); 66 | 67 | deflateEnd(&c_stream); 68 | 69 | if (c_stream.avail_in > 0) // compressed version would be longer than uncompressed 70 | return nullopt; 71 | 72 | if (c_stream.total_out > data.length() - cluster_size) // space saving less than one sector 73 | return nullopt; 74 | 75 | // round to sector, and zero end 76 | out.resize((c_stream.total_out + cluster_size - 1) & ~(cluster_size - 1), 0); 77 | 78 | return out; 79 | } 80 | #endif 81 | 82 | #ifdef WITH_LZO 83 | static __inline size_t lzo_max_outlen(size_t inlen) { 84 | return inlen + (inlen / 16) + 64 + 3; // formula comes from LZO.FAQ 85 | } 86 | 87 | optional lzo_compress(string_view data, uint32_t cluster_size) { 88 | size_t num_pages; 89 | 90 | num_pages = data.length() / cluster_size; 91 | 92 | // Four-byte overall header 93 | // Another four-byte header page 94 | // Each page has a maximum size of lzo_max_outlen(cluster_size) 95 | // Plus another four bytes for possible padding 96 | buffer_t outbuf(sizeof(uint32_t) + ((lzo_max_outlen(cluster_size) + (2 * sizeof(uint32_t))) * num_pages)); 97 | buffer_t wrkmem(LZO1X_MEM_COMPRESS); 98 | 99 | auto out_size = (uint32_t*)outbuf.data(); 100 | *out_size = sizeof(uint32_t); 101 | 102 | auto in = (lzo_bytep)data.data(); 103 | auto out = (lzo_bytep)(outbuf.data() + (2 * sizeof(uint32_t))); 104 | 105 | for (unsigned int i = 0; i < num_pages; i++) { 106 | auto pagelen = (uint32_t*)(out - sizeof(uint32_t)); 107 | lzo_uint outlen; 108 | 109 | auto ret = lzo1x_1_compress(in, cluster_size, out, &outlen, wrkmem.data()); 110 | if (ret != LZO_E_OK) 111 | throw formatted_error("lzo1x_1_compress returned {}", ret); 112 | 113 | *pagelen = (uint32_t)outlen; 114 | *out_size += (uint32_t)(outlen + sizeof(uint32_t)); 115 | 116 | in += cluster_size; 117 | out += outlen + sizeof(uint32_t); 118 | 119 | // new page needs to start at a 32-bit boundary 120 | if (cluster_size - (*out_size % cluster_size) < sizeof(uint32_t)) { 121 | memset(out, 0, cluster_size - (*out_size % cluster_size)); 122 | out += cluster_size - (*out_size % cluster_size); 123 | *out_size += cluster_size - (*out_size % cluster_size); 124 | } 125 | 126 | if (*out_size >= data.length()) 127 | return nullopt; 128 | } 129 | 130 | outbuf.resize(*out_size); 131 | 132 | if (outbuf.size() > data.length() - cluster_size) 133 | return nullopt; 134 | 135 | outbuf.resize((outbuf.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0); 136 | 137 | return outbuf; 138 | } 139 | #endif 140 | 141 | #ifdef WITH_ZSTD 142 | optional zstd_compress(string_view data, uint32_t cluster_size) { 143 | buffer_t out(ZSTD_compressBound(data.length())); 144 | 145 | auto ret = ZSTD_compress(out.data(), out.size(), data.data(), data.length(), 1); 146 | if (ZSTD_isError(ret)) 147 | throw formatted_error("ZSTD_compress returned {}", ret); 148 | 149 | if (ret > data.length() - cluster_size) 150 | return nullopt; 151 | 152 | out.resize(ret); 153 | out.resize((out.size() + cluster_size - 1) & ~((uint64_t)cluster_size - 1), 0); 154 | 155 | return out; 156 | } 157 | #endif 158 | -------------------------------------------------------------------------------- /src/config.h.in: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define PROJECT_VER "@PROJECT_VERSION@" 4 | #cmakedefine WITH_ZLIB 1 5 | #cmakedefine WITH_LZO 1 6 | #cmakedefine WITH_ZSTD 1 7 | -------------------------------------------------------------------------------- /src/crc32c-gas.S: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2020 2 | * 3 | * This file is part of WinBtrfs. 4 | * 5 | * WinBtrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Lesser General Public Licence as published by 7 | * the Free Software Foundation, either version 3 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * WinBtrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Lesser General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public Licence 16 | * along with WinBtrfs. If not, see . */ 17 | 18 | #ifdef __i386__ 19 | 20 | .intel_syntax noprefix 21 | 22 | #ifdef __MINGW32__ 23 | .extern _crctable 24 | .global _calc_crc32c_sw@12 25 | .global _calc_crc32c_hw@12 26 | #else 27 | .extern crctable 28 | .global calc_crc32c_sw 29 | .global calc_crc32c_hw 30 | #endif 31 | 32 | /* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ 33 | 34 | #ifdef __MINGW32__ 35 | _calc_crc32c_sw@12: 36 | #else 37 | calc_crc32c_sw: 38 | #endif 39 | 40 | push ebp 41 | mov ebp, esp 42 | 43 | push esi 44 | push ebx 45 | 46 | mov eax, [ebp+8] 47 | mov edx, [ebp+12] 48 | mov ebx, [ebp+16] 49 | 50 | /* eax = crc / seed 51 | * ebx = len 52 | * esi = tmp 53 | * edx = buf 54 | * ecx = tmp2 */ 55 | 56 | crcloop: 57 | test ebx, ebx 58 | jz crcend 59 | 60 | mov esi, eax 61 | shr esi, 8 62 | mov cl, byte ptr [edx] 63 | xor al, cl 64 | and eax, 255 65 | shl eax, 2 66 | 67 | #ifdef __MINGW32__ 68 | mov eax, [_crctable + eax] 69 | #else 70 | mov eax, [crctable + eax] 71 | #endif 72 | 73 | xor eax, esi 74 | 75 | inc edx 76 | dec ebx 77 | 78 | jmp crcloop 79 | 80 | crcend: 81 | pop ebx 82 | pop esi 83 | 84 | pop ebp 85 | 86 | ret 12 87 | 88 | /****************************************************/ 89 | 90 | /* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ 91 | 92 | #ifdef __MINGW32__ 93 | _calc_crc32c_hw@12: 94 | #else 95 | calc_crc32c_hw: 96 | #endif 97 | 98 | push ebp 99 | mov ebp, esp 100 | 101 | mov eax, [ebp+8] 102 | mov edx, [ebp+12] 103 | mov ecx, [ebp+16] 104 | 105 | /* eax = crc / seed 106 | * ecx = len 107 | * edx = buf */ 108 | 109 | crchw_loop: 110 | cmp ecx, 4 111 | jl crchw_stragglers 112 | 113 | crc32 eax, dword ptr [edx] 114 | 115 | add edx, 4 116 | sub ecx, 4 117 | jmp crchw_loop 118 | 119 | crchw_stragglers: 120 | cmp ecx, 2 121 | jl crchw_stragglers2 122 | 123 | crc32 eax, word ptr [edx] 124 | 125 | add edx, 2 126 | sub ecx, 2 127 | 128 | crchw_stragglers2: 129 | test ecx, ecx 130 | jz crchw_end 131 | 132 | crc32 eax, byte ptr [edx] 133 | inc edx 134 | dec ecx 135 | jmp crchw_stragglers2 136 | 137 | crchw_end: 138 | pop ebp 139 | 140 | ret 12 141 | 142 | #elif defined(__x86_64__) 143 | 144 | .intel_syntax noprefix 145 | 146 | .extern crctable 147 | .global calc_crc32c_sw 148 | .global calc_crc32c_hw 149 | 150 | /* uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ 151 | 152 | calc_crc32c_sw: 153 | 154 | /* rax = crc / seed 155 | * rdx = buf 156 | * r8 = len 157 | * rcx = tmp 158 | * r10 = tmp2 159 | * r11 = crctable */ 160 | 161 | lea r11, [rip + crctable] 162 | mov rax, rcx 163 | 164 | crcloop: 165 | test r8, r8 166 | jz crcend 167 | 168 | mov rcx, rax 169 | shr rcx, 8 170 | mov r10b, byte ptr [rdx] 171 | xor al, r10b 172 | and rax, 255 173 | shl rax, 2 174 | mov eax, [r11 + rax] 175 | xor rax, rcx 176 | 177 | inc rdx 178 | dec r8 179 | 180 | jmp crcloop 181 | 182 | crcend: 183 | ret 184 | 185 | /****************************************************/ 186 | 187 | /* uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); */ 188 | 189 | calc_crc32c_hw: 190 | 191 | /* rax = crc / seed 192 | * rdx = buf 193 | * r8 = len */ 194 | 195 | mov rax, rcx 196 | 197 | crchw_loop: 198 | cmp r8, 8 199 | jl crchw_stragglers 200 | 201 | crc32 rax, qword ptr [rdx] 202 | 203 | add rdx, 8 204 | sub r8, 8 205 | jmp crchw_loop 206 | 207 | crchw_stragglers: 208 | cmp r8, 4 209 | jl crchw_stragglers2 210 | 211 | crc32 eax, dword ptr [rdx] 212 | 213 | add rdx, 4 214 | sub r8, 4 215 | 216 | crchw_stragglers2: 217 | cmp r8, 2 218 | jl crchw_stragglers3 219 | 220 | crc32 eax, word ptr [rdx] 221 | 222 | add rdx, 2 223 | sub r8, 2 224 | 225 | crchw_stragglers3: 226 | test r8, r8 227 | jz crchw_end 228 | 229 | crc32 eax, byte ptr [rdx] 230 | inc rdx 231 | dec r8 232 | jmp crchw_stragglers3 233 | 234 | crchw_end: 235 | ret 236 | 237 | #endif 238 | 239 | #if defined(__linux__) && defined(__ELF__) 240 | .section .note.GNU-stack,"",%progbits 241 | #endif 242 | -------------------------------------------------------------------------------- /src/crc32c-masm.asm: -------------------------------------------------------------------------------- 1 | ; Copyright (c) Mark Harmstone 2020 2 | ; 3 | ; This file is part of WinBtrfs. 4 | ; 5 | ; WinBtrfs is free software: you can redistribute it and/or modify 6 | ; it under the terms of the GNU Lesser General Public Licence as published by 7 | ; the Free Software Foundation, either version 3 of the Licence, or 8 | ; (at your option) any later version. 9 | ; 10 | ; WinBtrfs is distributed in the hope that it will be useful, 11 | ; but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | ; GNU Lesser General Public Licence for more details. 14 | ; 15 | ; You should have received a copy of the GNU Lesser General Public Licence 16 | ; along with WinBtrfs. If not, see . 17 | 18 | IFDEF RAX 19 | ELSE 20 | .686P 21 | ENDIF 22 | 23 | _TEXT SEGMENT 24 | 25 | IFDEF RAX 26 | 27 | EXTERN crctable:qword 28 | 29 | PUBLIC calc_crc32c_sw 30 | 31 | ; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); 32 | 33 | calc_crc32c_sw: 34 | 35 | ; rax = crc / seed 36 | ; rdx = buf 37 | ; r8 = len 38 | ; rcx = tmp 39 | ; r10 = tmp2 40 | 41 | mov rax, rcx 42 | 43 | crcloop: 44 | test r8, r8 45 | jz crcend 46 | 47 | mov rcx, rax 48 | shr rcx, 8 49 | mov r10b, byte ptr [rdx] 50 | xor al, r10b 51 | and rax, 255 52 | shl rax, 2 53 | mov r10, offset crctable 54 | mov eax, dword ptr [r10 + rax] 55 | xor rax, rcx 56 | 57 | inc rdx 58 | dec r8 59 | 60 | jmp crcloop 61 | 62 | crcend: 63 | ret 64 | 65 | ; **************************************************** 66 | 67 | ; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); 68 | 69 | PUBLIC calc_crc32c_hw 70 | 71 | calc_crc32c_hw: 72 | 73 | ; rax = crc / seed 74 | ; rdx = buf 75 | ; r8 = len 76 | 77 | mov rax, rcx 78 | 79 | crchw_loop: 80 | cmp r8, 8 81 | jl crchw_stragglers 82 | 83 | crc32 rax, qword ptr [rdx] 84 | 85 | add rdx, 8 86 | sub r8, 8 87 | jmp crchw_loop 88 | 89 | crchw_stragglers: 90 | cmp r8, 4 91 | jl crchw_stragglers2 92 | 93 | crc32 eax, dword ptr [rdx] 94 | 95 | add rdx, 4 96 | sub r8, 4 97 | 98 | crchw_stragglers2: 99 | cmp r8, 2 100 | jl crchw_stragglers3 101 | 102 | crc32 eax, word ptr [rdx] 103 | 104 | add rdx, 2 105 | sub r8, 2 106 | 107 | crchw_stragglers3: 108 | test r8, r8 109 | jz crchw_end 110 | 111 | crc32 eax, byte ptr [rdx] 112 | inc rdx 113 | dec r8 114 | jmp crchw_stragglers3 115 | 116 | crchw_end: 117 | ret 118 | 119 | ELSE 120 | 121 | EXTERN _crctable:ABS 122 | 123 | ; uint32_t __stdcall calc_crc32c_sw(uint32_t seed, uint8_t* msg, uint32_t msglen); 124 | 125 | PUBLIC _calc_crc32c_sw@12 126 | 127 | _calc_crc32c_sw@12: 128 | 129 | push ebp 130 | mov ebp, esp 131 | 132 | push esi 133 | push ebx 134 | 135 | mov eax, [ebp+8] 136 | mov edx, [ebp+12] 137 | mov ebx, [ebp+16] 138 | 139 | ; eax = crc / seed 140 | ; ebx = len 141 | ; esi = tmp 142 | ; edx = buf 143 | ; ecx = tmp2 144 | 145 | crcloop: 146 | test ebx, ebx 147 | jz crcend 148 | 149 | mov esi, eax 150 | shr esi, 8 151 | mov cl, byte ptr [edx] 152 | xor al, cl 153 | and eax, 255 154 | shl eax, 2 155 | mov eax, [_crctable + eax] 156 | xor eax, esi 157 | 158 | inc edx 159 | dec ebx 160 | 161 | jmp crcloop 162 | 163 | crcend: 164 | pop ebx 165 | pop esi 166 | 167 | pop ebp 168 | 169 | ret 12 170 | 171 | ; **************************************************** 172 | 173 | ; uint32_t __stdcall calc_crc32c_hw(uint32_t seed, uint8_t* msg, uint32_t msglen); 174 | 175 | PUBLIC _calc_crc32c_hw@12 176 | 177 | _calc_crc32c_hw@12: 178 | 179 | push ebp 180 | mov ebp, esp 181 | 182 | mov eax, [ebp+8] 183 | mov edx, [ebp+12] 184 | mov ecx, [ebp+16] 185 | 186 | ; eax = crc / seed 187 | ; ecx = len 188 | ; edx = buf 189 | 190 | crchw_loop: 191 | cmp ecx, 4 192 | jl crchw_stragglers 193 | 194 | crc32 eax, dword ptr [edx] 195 | 196 | add edx, 4 197 | sub ecx, 4 198 | jmp crchw_loop 199 | 200 | crchw_stragglers: 201 | cmp ecx, 2 202 | jl crchw_stragglers2 203 | 204 | crc32 eax, word ptr [edx] 205 | 206 | add edx, 2 207 | sub ecx, 2 208 | 209 | crchw_stragglers2: 210 | test ecx, ecx 211 | jz crchw_end 212 | 213 | crc32 eax, byte ptr [edx] 214 | inc edx 215 | dec ecx 216 | jmp crchw_stragglers2 217 | 218 | crchw_end: 219 | pop ebp 220 | 221 | ret 12 222 | 223 | ENDIF 224 | 225 | _TEXT ENDS 226 | 227 | end 228 | -------------------------------------------------------------------------------- /src/crc32c.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2016-17 2 | * 3 | * This file is part of WinBtrfs. 4 | * 5 | * WinBtrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU Lesser General Public Licence as published by 7 | * the Free Software Foundation, either version 3 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * WinBtrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU Lesser General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU Lesser General Public Licence 16 | * along with WinBtrfs. If not, see . */ 17 | 18 | #include "crc32c.h" 19 | #include 20 | #include 21 | 22 | crc_func calc_crc32c = calc_crc32c_sw; 23 | 24 | #ifdef __cplusplus 25 | extern "C" 26 | { 27 | #endif 28 | 29 | const uint32_t crctable[] = { 30 | 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, 31 | 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, 32 | 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, 33 | 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, 34 | 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, 35 | 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, 36 | 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, 37 | 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, 38 | 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, 39 | 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, 40 | 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, 41 | 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, 42 | 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, 43 | 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, 44 | 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, 45 | 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, 46 | 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, 47 | 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, 48 | 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, 49 | 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, 50 | 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, 51 | 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, 52 | 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, 53 | 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, 54 | 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, 55 | 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, 56 | 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, 57 | 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, 58 | 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, 59 | 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, 60 | 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, 61 | 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351, 62 | }; 63 | 64 | // x86 and amd64 versions live in asm files 65 | #if !defined(__i386__) && !defined(__x86_64__) && !defined(_M_IX86) && !defined(_M_X64) 66 | uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen) { 67 | uint32_t rem = seed; 68 | 69 | for (uint32_t i = 0; i < msglen; i++) { 70 | rem = crctable[(rem ^ msg[i]) & 0xff] ^ (rem >> 8); 71 | } 72 | 73 | return rem; 74 | } 75 | #endif 76 | 77 | #ifdef __cplusplus 78 | } 79 | #endif 80 | -------------------------------------------------------------------------------- /src/crc32c.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2020 2 | * 3 | * This file is part of ntfs2btrfs. 4 | * 5 | * Ntfs2btrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * Ntfs2btrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with Ntfs2btrfs. If not, see . */ 17 | 18 | #pragma once 19 | 20 | #include 21 | 22 | #ifndef _WIN32 23 | #ifdef __i386__ 24 | #define __stdcall __attribute__((stdcall)) 25 | #elif defined(__x86_64__) 26 | #define __stdcall __attribute__((ms_abi)) 27 | #else 28 | #define __stdcall 29 | #endif 30 | #endif 31 | 32 | #ifdef __cplusplus 33 | extern "C" 34 | { 35 | #endif 36 | 37 | #if defined(__i386__) || defined(__x86_64__) 38 | uint32_t __stdcall calc_crc32c_hw(uint32_t seed, const uint8_t* msg, uint32_t msglen); 39 | #endif 40 | 41 | uint32_t __stdcall calc_crc32c_sw(uint32_t seed, const uint8_t* msg, uint32_t msglen); 42 | 43 | typedef uint32_t (__stdcall *crc_func)(uint32_t seed, const uint8_t* msg, uint32_t msglen); 44 | 45 | extern crc_func calc_crc32c; 46 | 47 | #ifdef __cplusplus 48 | } 49 | #endif 50 | -------------------------------------------------------------------------------- /src/decomp.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2020 2 | * 3 | * This file is part of ntfs2btrfs. 4 | * 5 | * Ntfs2btrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * Ntfs2btrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with Ntfs2btrfs. If not, see . */ 17 | 18 | #include "ntfs2btrfs.h" 19 | #include "ebiggers/system_compression.h" 20 | 21 | #define LZX_CHUNK_SIZE 32768 22 | 23 | using namespace std; 24 | 25 | static buffer_t lznt1_decompress_chunk(string_view data) { 26 | buffer_t s; 27 | 28 | while (!data.empty()) { 29 | auto fg = (uint8_t)data[0]; 30 | 31 | data = data.substr(1); 32 | 33 | if (fg == 0) { 34 | if (data.length() < 8) { 35 | s.insert(s.end(), data.begin(), data.end()); 36 | 37 | return s; 38 | } else { 39 | s.insert(s.end(), data.begin(), data.begin() + 8); 40 | data = data.substr(8); 41 | } 42 | } else { 43 | for (unsigned int i = 0; i < 8; i++) { 44 | if (data.empty()) 45 | return s; 46 | 47 | if (!(fg & 1)) { 48 | s.insert(s.end(), data.begin(), data.begin() + 1); 49 | data = data.substr(1); 50 | } else { 51 | if (data.length() < sizeof(uint16_t)) 52 | throw formatted_error("Compressed chunk was {} bytes, expected at least 2.", data.length()); 53 | 54 | // See https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-xca/90fc6a28-f627-4ee5-82ce-445a6cf98b22 55 | 56 | auto v = *(uint16_t*)data.data(); 57 | 58 | data = data.substr(2); 59 | 60 | // Shamelessly stolen from https://github.com/you0708/lznt1 - thank you! 61 | 62 | uint64_t u = s.size() - 1; 63 | uint64_t lm = 0xfff; 64 | uint64_t os = 12; 65 | 66 | while (u >= 0x10) { 67 | lm >>= 1; 68 | os--; 69 | u >>= 1; 70 | } 71 | 72 | auto l = (v & lm) + 3; 73 | auto d = (v >> os) + 1; 74 | 75 | s.reserve((uint32_t)(s.size() + l)); 76 | 77 | while (l > 0) { 78 | s.resize(s.size() + 1); 79 | s[s.size() - 1] = s[s.size() - d - 1]; 80 | l--; 81 | } 82 | } 83 | 84 | fg >>= 1; 85 | } 86 | } 87 | } 88 | 89 | return s; 90 | } 91 | 92 | buffer_t lznt1_decompress(string_view compdata, uint32_t size) { 93 | buffer_t ret(size); 94 | uint8_t* ptr; 95 | 96 | memset(ret.data(), 0, ret.size()); 97 | 98 | ptr = ret.data(); 99 | 100 | while (true) { 101 | if (compdata.length() < sizeof(uint16_t)) 102 | throw formatted_error("compdata was {} bytes, expected at least 2.", compdata.length()); 103 | 104 | auto h = *(uint16_t*)compdata.data(); 105 | 106 | if (h == 0) 107 | return ret; 108 | 109 | compdata = compdata.substr(2); 110 | 111 | auto sig = (h & 0x7000) >> 12; 112 | 113 | if (sig != 3) 114 | throw formatted_error("Compression signature was {}, expected 3.", sig); 115 | 116 | auto len = (uint32_t)(((uint64_t)h & 0xfff) + 1); 117 | 118 | if (compdata.length() < len) 119 | throw formatted_error("compdata was {} bytes, expected at least {}.", compdata.length(), len); 120 | 121 | auto data = string_view(compdata.data(), len); 122 | 123 | compdata = compdata.substr(len); 124 | 125 | if (h & 0x8000) { 126 | auto c = lznt1_decompress_chunk(data); 127 | 128 | if (ptr + c.size() >= ret.data() + size) { 129 | memcpy(ptr, c.data(), size - (ptr - ret.data())); 130 | 131 | return ret; 132 | } else { 133 | memcpy(ptr, c.data(), c.size()); 134 | ptr += c.size(); 135 | } 136 | } else { 137 | if (ptr + data.length() >= ret.data() + size) { 138 | memcpy(ptr, data.data(), size - (ptr - ret.data())); 139 | 140 | return ret; 141 | } else { 142 | memcpy(ptr, data.data(), data.length()); 143 | ptr += data.length(); 144 | } 145 | } 146 | } 147 | 148 | return ret; 149 | } 150 | 151 | buffer_t do_lzx_decompress(string_view compdata, uint32_t size) { 152 | auto ctx = lzx_allocate_decompressor(LZX_CHUNK_SIZE); 153 | 154 | if (!ctx) 155 | throw formatted_error("lzx_allocate_decompressor returned NULL."); 156 | 157 | uint64_t num_chunks = (size + LZX_CHUNK_SIZE - 1) / LZX_CHUNK_SIZE; 158 | auto offsets = (uint32_t*)compdata.data(); 159 | 160 | buffer_t ret(size); 161 | 162 | auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)), 163 | (uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t)))); 164 | 165 | for (uint64_t i = 0; i < num_chunks; i++) { 166 | uint64_t off = i == 0 ? 0 : offsets[i - 1]; 167 | uint32_t complen; 168 | 169 | if (i == 0) 170 | complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length(); 171 | else if (i == num_chunks - 1) 172 | complen = (uint32_t)data.length() - offsets[i - 1]; 173 | else 174 | complen = offsets[i] - offsets[i - 1]; 175 | 176 | if (complen == (i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE)) { 177 | // stored uncompressed 178 | memcpy(ret.data() + (i * LZX_CHUNK_SIZE), data.data() + off, complen); 179 | } else { 180 | auto err = lzx_decompress(ctx, data.data() + off, complen, ret.data() + (i * LZX_CHUNK_SIZE), 181 | (uint32_t)(i == num_chunks - 1 ? (ret.size() - (i * LZX_CHUNK_SIZE)) : LZX_CHUNK_SIZE)); 182 | 183 | if (err != 0) { 184 | lzx_free_decompressor(ctx); 185 | throw formatted_error("lzx_decompress returned {}.", err); 186 | } 187 | } 188 | } 189 | 190 | lzx_free_decompressor(ctx); 191 | 192 | return ret; 193 | } 194 | 195 | buffer_t do_xpress_decompress(string_view compdata, uint32_t size, uint32_t chunk_size) { 196 | auto ctx = xpress_allocate_decompressor(); 197 | 198 | if (!ctx) 199 | throw formatted_error("xpress_allocate_decompressor returned NULL."); 200 | 201 | uint64_t num_chunks = (size + chunk_size - 1) / chunk_size; 202 | auto offsets = (uint32_t*)compdata.data(); 203 | 204 | buffer_t ret(size); 205 | 206 | auto data = string_view(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)), 207 | (uint32_t)(compdata.length() - ((num_chunks - 1) * sizeof(uint32_t)))); 208 | 209 | for (uint64_t i = 0; i < num_chunks; i++) { 210 | uint64_t off = i == 0 ? 0 : offsets[i - 1]; 211 | uint32_t complen; 212 | 213 | if (i == 0) 214 | complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.length(); 215 | else if (i == num_chunks - 1) 216 | complen = (uint32_t)data.length() - offsets[i - 1]; 217 | else 218 | complen = offsets[i] - offsets[i - 1]; 219 | 220 | if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) { 221 | // stored uncompressed 222 | memcpy(ret.data() + (i * chunk_size), data.data() + off, complen); 223 | } else { 224 | auto err = xpress_decompress(ctx, data.data() + off, complen, ret.data() + (i * chunk_size), 225 | (size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)); 226 | 227 | if (err != 0) { 228 | xpress_free_decompressor(ctx); 229 | throw formatted_error("xpress_decompress returned {}.", err); 230 | } 231 | } 232 | } 233 | 234 | xpress_free_decompressor(ctx); 235 | 236 | return ret; 237 | } 238 | -------------------------------------------------------------------------------- /src/ebiggers/aligned_malloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * aligned_malloc.c - aligned memory allocation 3 | * 4 | * This file provides portable aligned memory allocation functions that only use 5 | * malloc() and free(). This avoids portability problems with posix_memalign(), 6 | * aligned_alloc(), etc. 7 | */ 8 | 9 | #include 10 | 11 | #include "common_defs.h" 12 | 13 | void * 14 | aligned_malloc(size_t size, size_t alignment) 15 | { 16 | const uintptr_t mask = alignment - 1; 17 | char *ptr = NULL; 18 | char *raw_ptr; 19 | 20 | raw_ptr = malloc(mask + sizeof(size_t) + size); 21 | if (raw_ptr) { 22 | ptr = (char *)raw_ptr + sizeof(size_t); 23 | ptr = (void *)(((uintptr_t)ptr + mask) & ~mask); 24 | *((size_t *)ptr - 1) = ptr - raw_ptr; 25 | } 26 | return ptr; 27 | } 28 | 29 | void 30 | aligned_free(void *ptr) 31 | { 32 | if (ptr) 33 | free((char *)ptr - *((size_t *)ptr - 1)); 34 | } 35 | -------------------------------------------------------------------------------- /src/ebiggers/common_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMON_DEFS_H 2 | #define _COMMON_DEFS_H 3 | 4 | // #include 5 | // #include 6 | #include 7 | 8 | typedef uint8_t u8; 9 | typedef uint16_t u16; 10 | typedef uint32_t u32; 11 | typedef uint64_t u64; 12 | typedef int32_t s32; 13 | 14 | /* ========================================================================== */ 15 | /* Type definitions */ 16 | /* ========================================================================== */ 17 | 18 | /* 19 | * Type of a machine word. 'unsigned long' would be logical, but that is only 20 | * 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best 21 | * we can do without a bunch of #ifdefs appears to be 'size_t'. 22 | */ 23 | typedef size_t machine_word_t; 24 | 25 | #define WORDBYTES sizeof(machine_word_t) 26 | #define WORDBITS (8 * WORDBYTES) 27 | 28 | /* ========================================================================== */ 29 | /* Compiler-specific definitions */ 30 | /* ========================================================================== */ 31 | 32 | #ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */ 33 | # define forceinline inline __attribute__((always_inline)) 34 | # define likely(expr) __builtin_expect(!!(expr), 1) 35 | # define unlikely(expr) __builtin_expect(!!(expr), 0) 36 | # define _aligned_attribute(n) __attribute__((aligned(n))) 37 | # define bsr32(n) (31 - __builtin_clz(n)) 38 | # define bsr64(n) (63 - __builtin_clzll(n)) 39 | # define bsf32(n) __builtin_ctz(n) 40 | # define bsf64(n) __builtin_ctzll(n) 41 | # ifndef min 42 | # define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ 43 | (_a < _b) ? _a : _b; }) 44 | # endif 45 | # ifndef max 46 | # define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ 47 | (_a > _b) ? _a : _b; }) 48 | # endif 49 | 50 | # define DEFINE_UNALIGNED_TYPE(type) \ 51 | struct type##_unaligned { \ 52 | type v; \ 53 | } __attribute__((packed)); \ 54 | \ 55 | static inline type \ 56 | load_##type##_unaligned(const void *p) \ 57 | { \ 58 | return ((const struct type##_unaligned *)p)->v; \ 59 | } \ 60 | \ 61 | static inline void \ 62 | store_##type##_unaligned(type val, void *p) \ 63 | { \ 64 | ((struct type##_unaligned *)p)->v = val; \ 65 | } 66 | 67 | #endif /* __GNUC__ */ 68 | 69 | /* Declare that the annotated function should always be inlined. This might be 70 | * desirable in highly tuned code, e.g. compression codecs */ 71 | #ifndef forceinline 72 | # define forceinline inline 73 | #endif 74 | 75 | /* Hint that the expression is usually true */ 76 | #ifndef likely 77 | # define likely(expr) (expr) 78 | #endif 79 | 80 | /* Hint that the expression is usually false */ 81 | #ifndef unlikely 82 | # define unlikely(expr) (expr) 83 | #endif 84 | 85 | /* Declare that the annotated variable, or variables of the annotated type, are 86 | * to be aligned on n-byte boundaries */ 87 | #ifndef _aligned_attribute 88 | # define _aligned_attribute(n) 89 | #endif 90 | 91 | /* min() and max() macros */ 92 | #ifndef min 93 | # define min(a, b) ((a) < (b) ? (a) : (b)) 94 | #endif 95 | #ifndef max 96 | # define max(a, b) ((a) > (b) ? (a) : (b)) 97 | #endif 98 | 99 | /* STATIC_ASSERT() - verify the truth of an expression at compilation time */ 100 | #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) 101 | 102 | /* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time 103 | * and also produce a result of value '0' to be used in constant expressions */ 104 | #define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)])) 105 | 106 | /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses 107 | * can be performed efficiently on the target platform. */ 108 | #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) 109 | # define UNALIGNED_ACCESS_IS_FAST 1 110 | #else 111 | # define UNALIGNED_ACCESS_IS_FAST 0 112 | #endif 113 | 114 | /* 115 | * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type', 116 | * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions 117 | * which load and store variables of type 'type' from/to unaligned memory 118 | * addresses. 119 | */ 120 | #ifndef DEFINE_UNALIGNED_TYPE 121 | 122 | #include 123 | /* 124 | * Although memcpy() may seem inefficient, it *usually* gets optimized 125 | * appropriately by modern compilers. It's portable and may be the best we can 126 | * do for a fallback... 127 | */ 128 | #define DEFINE_UNALIGNED_TYPE(type) \ 129 | \ 130 | static forceinline type \ 131 | load_##type##_unaligned(const void *p) \ 132 | { \ 133 | type v; \ 134 | memcpy(&v, p, sizeof(v)); \ 135 | return v; \ 136 | } \ 137 | \ 138 | static forceinline void \ 139 | store_##type##_unaligned(type v, void *p) \ 140 | { \ 141 | memcpy(p, &v, sizeof(v)); \ 142 | } 143 | 144 | #endif /* !DEFINE_UNALIGNED_TYPE */ 145 | 146 | 147 | /* ========================================================================== */ 148 | /* Unaligned memory accesses */ 149 | /* ========================================================================== */ 150 | 151 | #define load_word_unaligned load_machine_word_t_unaligned 152 | #define store_word_unaligned store_machine_word_t_unaligned 153 | 154 | /* ========================================================================== */ 155 | /* Bit scan functions */ 156 | /* ========================================================================== */ 157 | 158 | /* 159 | * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least 160 | * significant end) of the *most* significant 1 bit in the input value. The 161 | * input value must be nonzero! 162 | */ 163 | 164 | #ifndef bsr32 165 | static forceinline unsigned 166 | bsr32(u32 v) 167 | { 168 | unsigned bit = 0; 169 | while ((v >>= 1) != 0) 170 | bit++; 171 | return bit; 172 | } 173 | #endif 174 | 175 | #ifndef bsr64 176 | static forceinline unsigned 177 | bsr64(u64 v) 178 | { 179 | unsigned bit = 0; 180 | while ((v >>= 1) != 0) 181 | bit++; 182 | return bit; 183 | } 184 | #endif 185 | 186 | static forceinline unsigned 187 | bsrw(machine_word_t v) 188 | { 189 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 190 | if (WORDBITS == 32) 191 | return bsr32(v); 192 | else 193 | return bsr64(v); 194 | } 195 | 196 | /* 197 | * Bit Scan Forward (BSF) - find the 0-based index (relative to the least 198 | * significant end) of the *least* significant 1 bit in the input value. The 199 | * input value must be nonzero! 200 | */ 201 | 202 | #ifndef bsf32 203 | static forceinline unsigned 204 | bsf32(u32 v) 205 | { 206 | unsigned bit; 207 | for (bit = 0; !(v & 1); bit++, v >>= 1) 208 | ; 209 | return bit; 210 | } 211 | #endif 212 | 213 | #ifndef bsf64 214 | static forceinline unsigned 215 | bsf64(u64 v) 216 | { 217 | unsigned bit; 218 | for (bit = 0; !(v & 1); bit++, v >>= 1) 219 | ; 220 | return bit; 221 | } 222 | #endif 223 | 224 | static forceinline unsigned 225 | bsfw(machine_word_t v) 226 | { 227 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 228 | if (WORDBITS == 32) 229 | return bsf32(v); 230 | else 231 | return bsf64(v); 232 | } 233 | 234 | /* Return the log base 2 of 'n', rounded up to the nearest integer. */ 235 | static forceinline unsigned 236 | ilog2_ceil(size_t n) 237 | { 238 | if (n <= 1) 239 | return 0; 240 | return 1 + bsrw(n - 1); 241 | } 242 | 243 | /* ========================================================================== */ 244 | /* Aligned memory allocation */ 245 | /* ========================================================================== */ 246 | 247 | extern void *aligned_malloc(size_t size, size_t alignment); 248 | extern void aligned_free(void *ptr); 249 | 250 | #endif /* _COMMON_DEFS_H */ 251 | -------------------------------------------------------------------------------- /src/ebiggers/decompress_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * decompress_common.c 3 | * 4 | * Code for decompression shared among multiple compression formats. 5 | * 6 | * The following copying information applies to this specific source code file: 7 | * 8 | * Written in 2012-2016 by Eric Biggers 9 | * 10 | * To the extent possible under law, the author(s) have dedicated all copyright 11 | * and related and neighboring rights to this software to the public domain 12 | * worldwide via the Creative Commons Zero 1.0 Universal Public Domain 13 | * Dedication (the "CC0"). 14 | * 15 | * This software is distributed in the hope that it will be useful, but WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | * FOR A PARTICULAR PURPOSE. See the CC0 for more details. 18 | * 19 | * You should have received a copy of the CC0 along with this software; if not 20 | * see . 21 | */ 22 | 23 | #ifdef HAVE_CONFIG_H 24 | # include "config.h" 25 | #endif 26 | 27 | #include 28 | 29 | #ifdef __SSE2__ 30 | # include 31 | #endif 32 | 33 | #include "decompress_common.h" 34 | 35 | /* 36 | * make_huffman_decode_table() - 37 | * 38 | * Given an alphabet of symbols and the length of each symbol's codeword in a 39 | * canonical prefix code, build a table for quickly decoding symbols that were 40 | * encoded with that code. 41 | * 42 | * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols 43 | * such that no whole codeword is a prefix of any other. A prefix code might be 44 | * a _Huffman code_, which means that it is an optimum prefix code for a given 45 | * list of symbol frequencies and was generated by the Huffman algorithm. 46 | * Although the prefix codes processed here will ordinarily be "Huffman codes", 47 | * strictly speaking the decoder cannot know whether a given code was actually 48 | * generated by the Huffman algorithm or not. 49 | * 50 | * A prefix code is _canonical_ if and only if a longer codeword never 51 | * lexicographically precedes a shorter codeword, and the lexicographic ordering 52 | * of codewords of equal length is the same as the lexicographic ordering of the 53 | * corresponding symbols. The advantage of using a canonical prefix code is 54 | * that the codewords can be reconstructed from only the symbol => codeword 55 | * length mapping. This eliminates the need to transmit the codewords 56 | * explicitly. Instead, they can be enumerated in lexicographic order after 57 | * sorting the symbols primarily by increasing codeword length and secondarily 58 | * by increasing symbol value. 59 | * 60 | * However, the decoder's real goal is to decode symbols with the code, not just 61 | * generate the list of codewords. Consequently, this function directly builds 62 | * a table for efficiently decoding symbols using the code. The basic idea is 63 | * that given the next 'max_codeword_len' bits of input, the decoder can look up 64 | * the next decoded symbol by indexing a table containing '2^max_codeword_len' 65 | * entries. A codeword with length 'max_codeword_len' will have exactly one 66 | * entry in this table, whereas a codeword shorter than 'max_codeword_len' will 67 | * have multiple entries in this table. Precisely, a codeword of length 'n' 68 | * will have '2^(max_codeword_len - n)' entries. The index of each such entry, 69 | * considered as a bitstring of length 'max_codeword_len', will contain the 70 | * corresponding codeword as a prefix. 71 | * 72 | * That's the basic idea, but we extend it in two ways: 73 | * 74 | * - Often the maximum codeword length is too long for it to be efficient to 75 | * build the full decode table whenever a new code is used. Instead, we build 76 | * a "root" table using only '2^table_bits' entries, where 'table_bits <= 77 | * max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a 78 | * symbol directly (for codewords not longer than 'table_bits'), or the index 79 | * of a subtable which must be indexed with additional bits of input to fully 80 | * decode the symbol (for codewords longer than 'table_bits'). 81 | * 82 | * - Whenever the decoder decodes a symbol, it needs to know the codeword length 83 | * so that it can remove the appropriate number of input bits. The obvious 84 | * solution would be to simply retain the codeword lengths array and use the 85 | * decoded symbol as an index into it. However, that would require two array 86 | * accesses when decoding each symbol. Our strategy is to instead store the 87 | * codeword length directly in the decode table entry along with the symbol. 88 | * 89 | * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table 90 | * entries, and see read_huffsym() for full details on how symbols are decoded. 91 | * 92 | * @decode_table: 93 | * The array in which to build the decode table. This must have been 94 | * declared by the DECODE_TABLE() macro. This may alias @lens, since all 95 | * @lens are consumed before the decode table is written to. 96 | * 97 | * @num_syms: 98 | * The number of symbols in the alphabet. 99 | * 100 | * @table_bits: 101 | * The log base 2 of the number of entries in the root table. 102 | * 103 | * @lens: 104 | * An array of length @num_syms, indexed by symbol, that gives the length 105 | * of the codeword, in bits, for each symbol. The length can be 0, which 106 | * means that the symbol does not have a codeword assigned. In addition, 107 | * @lens may alias @decode_table, as noted above. 108 | * 109 | * @max_codeword_len: 110 | * The maximum codeword length permitted for this code. All entries in 111 | * 'lens' must be less than or equal to this value. 112 | * 113 | * @working_space 114 | * A temporary array that was declared with DECODE_TABLE_WORKING_SPACE(). 115 | * 116 | * Returns 0 on success, or -1 if the lengths do not form a valid prefix code. 117 | */ 118 | int 119 | make_huffman_decode_table(u16 decode_table[], unsigned num_syms, 120 | unsigned table_bits, const u8 lens[], 121 | unsigned max_codeword_len, u16 working_space[]) 122 | { 123 | u16 * const len_counts = &working_space[0]; 124 | u16 * const offsets = &working_space[1 * (max_codeword_len + 1)]; 125 | u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)]; 126 | s32 remainder = 1; 127 | uint8_t *entry_ptr = (uint8_t *)decode_table; 128 | unsigned codeword_len = 1; 129 | unsigned sym_idx; 130 | unsigned codeword; 131 | unsigned subtable_pos; 132 | unsigned subtable_bits; 133 | unsigned subtable_prefix; 134 | 135 | /* Count how many codewords have each length, including 0. */ 136 | for (unsigned len = 0; len <= max_codeword_len; len++) 137 | len_counts[len] = 0; 138 | for (unsigned sym = 0; sym < num_syms; sym++) 139 | len_counts[lens[sym]]++; 140 | 141 | /* It is already guaranteed that all lengths are <= max_codeword_len, 142 | * but it cannot be assumed they form a complete prefix code. A 143 | * codeword of length n should require a proportion of the codespace 144 | * equaling (1/2)^n. The code is complete if and only if, by this 145 | * measure, the codespace is exactly filled by the lengths. */ 146 | for (unsigned len = 1; len <= max_codeword_len; len++) { 147 | remainder = (remainder << 1) - len_counts[len]; 148 | /* Do the lengths overflow the codespace? */ 149 | if (unlikely(remainder < 0)) 150 | return -1; 151 | } 152 | 153 | if (remainder != 0) { 154 | /* The lengths do not fill the codespace; that is, they form an 155 | * incomplete code. This is permitted only if the code is empty 156 | * (contains no symbols). */ 157 | 158 | if (unlikely(remainder != 1U << max_codeword_len)) 159 | return -1; 160 | 161 | /* The code is empty. When processing a well-formed stream, the 162 | * decode table need not be initialized in this case. However, 163 | * we cannot assume the stream is well-formed, so we must 164 | * initialize the decode table anyway. Setting all entries to 0 165 | * makes the decode table always produce symbol '0' without 166 | * consuming any bits, which is good enough. */ 167 | memset(decode_table, 0, sizeof(decode_table[0]) << table_bits); 168 | return 0; 169 | } 170 | 171 | /* Sort the symbols primarily by increasing codeword length and 172 | * secondarily by increasing symbol value. */ 173 | 174 | /* Initialize 'offsets' so that 'offsets[len]' is the number of 175 | * codewords shorter than 'len' bits, including length 0. */ 176 | offsets[0] = 0; 177 | for (unsigned len = 0; len < max_codeword_len; len++) 178 | offsets[len + 1] = offsets[len] + len_counts[len]; 179 | 180 | /* Use the 'offsets' array to sort the symbols. */ 181 | for (unsigned sym = 0; sym < num_syms; sym++) 182 | sorted_syms[offsets[lens[sym]]++] = sym; 183 | 184 | /* 185 | * Fill the root table entries for codewords no longer than table_bits. 186 | * 187 | * The table will start with entries for the shortest codeword(s), which 188 | * will have the most entries. From there, the number of entries per 189 | * codeword will decrease. As an optimization, we may begin filling 190 | * entries with SSE2 vector accesses (8 entries/store), then change to 191 | * word accesses (2 or 4 entries/store), then change to 16-bit accesses 192 | * (1 entry/store). 193 | */ 194 | sym_idx = offsets[0]; 195 | 196 | #ifdef __SSE2__ 197 | /* Fill entries one 128-bit vector (8 entries) at a time. */ 198 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / 199 | (sizeof(__m128i) / sizeof(decode_table[0])); 200 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 201 | { 202 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 203 | for (; sym_idx < end_sym_idx; sym_idx++) { 204 | /* Note: unlike in the "word" version below, the __m128i 205 | * type already has __attribute__((may_alias)), so using 206 | * it to access an array of u16 will not violate strict 207 | * aliasing. */ 208 | __m128i v = _mm_set1_epi16( 209 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 210 | codeword_len)); 211 | unsigned n = stores_per_loop; 212 | do { 213 | *(__m128i *)entry_ptr = v; 214 | entry_ptr += sizeof(v); 215 | } while (--n); 216 | } 217 | } 218 | #endif /* __SSE2__ */ 219 | 220 | #ifdef __GNUC__ 221 | /* Fill entries one word (2 or 4 entries) at a time. */ 222 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / 223 | (WORDBYTES / sizeof(decode_table[0])); 224 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 225 | { 226 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 227 | for (; sym_idx < end_sym_idx; sym_idx++) { 228 | 229 | /* Accessing the array of u16 as u32 or u64 would 230 | * violate strict aliasing and would require compiling 231 | * the code with -fno-strict-aliasing to guarantee 232 | * correctness. To work around this problem, use the 233 | * gcc 'may_alias' extension. */ 234 | typedef machine_word_t 235 | __attribute__((may_alias)) aliased_word_t; 236 | aliased_word_t v = repeat_u16( 237 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 238 | codeword_len)); 239 | unsigned n = stores_per_loop; 240 | do { 241 | *(aliased_word_t *)entry_ptr = v; 242 | entry_ptr += sizeof(v); 243 | } while (--n); 244 | } 245 | } 246 | #endif /* __GNUC__ */ 247 | 248 | /* Fill entries one at a time. */ 249 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)); 250 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 251 | { 252 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 253 | for (; sym_idx < end_sym_idx; sym_idx++) { 254 | u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 255 | codeword_len); 256 | unsigned n = stores_per_loop; 257 | do { 258 | *(u16 *)entry_ptr = v; 259 | entry_ptr += sizeof(v); 260 | } while (--n); 261 | } 262 | } 263 | 264 | /* If all symbols were processed, then no subtables are required. */ 265 | if (sym_idx == num_syms) 266 | return 0; 267 | 268 | /* At least one subtable is required. Process the remaining symbols. */ 269 | codeword = ((u16 *)entry_ptr - decode_table) << 1; 270 | subtable_pos = 1U << table_bits; 271 | subtable_bits = table_bits; 272 | subtable_prefix = -1; 273 | do { 274 | while (len_counts[codeword_len] == 0) { 275 | codeword_len++; 276 | codeword <<= 1; 277 | } 278 | 279 | unsigned prefix = codeword >> (codeword_len - table_bits); 280 | 281 | /* Start a new subtable if the first 'table_bits' bits of the 282 | * codeword don't match the prefix for the previous subtable, or 283 | * if this will be the first subtable. */ 284 | if (prefix != subtable_prefix) { 285 | 286 | subtable_prefix = prefix; 287 | 288 | /* 289 | * Calculate the subtable length. If the codeword 290 | * length exceeds 'table_bits' by n, then the subtable 291 | * needs at least 2^n entries. But it may need more; if 292 | * there are fewer than 2^n codewords of length 293 | * 'table_bits + n' remaining, then n will need to be 294 | * incremented to bring in longer codewords until the 295 | * subtable can be filled completely. Note that it 296 | * always will, eventually, be possible to fill the 297 | * subtable, since it was previously verified that the 298 | * code is complete. 299 | */ 300 | subtable_bits = codeword_len - table_bits; 301 | remainder = (s32)1 << subtable_bits; 302 | for (;;) { 303 | remainder -= len_counts[table_bits + 304 | subtable_bits]; 305 | if (remainder <= 0) 306 | break; 307 | subtable_bits++; 308 | remainder <<= 1; 309 | } 310 | 311 | /* Create the entry that points from the root table to 312 | * the subtable. This entry contains the index of the 313 | * start of the subtable and the number of bits with 314 | * which the subtable is indexed (the log base 2 of the 315 | * number of entries it contains). */ 316 | decode_table[subtable_prefix] = 317 | MAKE_DECODE_TABLE_ENTRY(subtable_pos, 318 | subtable_bits); 319 | } 320 | 321 | /* Fill the subtable entries for this symbol. */ 322 | u16 entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 323 | codeword_len - table_bits); 324 | unsigned n = 1U << (subtable_bits - (codeword_len - 325 | table_bits)); 326 | do { 327 | decode_table[subtable_pos++] = entry; 328 | } while (--n); 329 | 330 | len_counts[codeword_len]--; 331 | codeword++; 332 | } while (++sym_idx < num_syms); 333 | 334 | return 0; 335 | } 336 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_common.c - Common code for LZX compression and decompression. 3 | */ 4 | 5 | /* 6 | * Copyright (C) 2012-2016 Eric Biggers 7 | * 8 | * This program is free software: you can redistribute it and/or modify it under 9 | * the terms of the GNU General Public License as published by the Free Software 10 | * Foundation, either version 2 of the License, or (at your option) any later 11 | * version. 12 | * 13 | * This program is distributed in the hope that it will be useful, but WITHOUT 14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 | * details. 17 | * 18 | * You should have received a copy of the GNU General Public License along with 19 | * this program. If not, see . 20 | */ 21 | 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif 25 | 26 | #include 27 | 28 | #ifdef __SSE2__ 29 | # include 30 | #endif 31 | 32 | #ifdef __AVX2__ 33 | # include 34 | #endif 35 | 36 | #include "common_defs.h" 37 | #include "lzx_common.h" 38 | 39 | /* Mapping: offset slot => first match offset that uses that offset slot. 40 | * The offset slots for repeat offsets map to "fake" offsets < 1. */ 41 | const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = { 42 | -2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */ 43 | 4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */ 44 | 30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */ 45 | 190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */ 46 | 1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */ 47 | 6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */ 48 | 32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */ 49 | 196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */ 50 | 786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */ 51 | 1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */ 52 | 2097150 /* extra */ 53 | }; 54 | 55 | /* Mapping: offset slot => how many extra bits must be read and added to the 56 | * corresponding offset slot base to decode the match offset. */ 57 | const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = { 58 | 0 , 0 , 0 , 0 , 1 , 59 | 1 , 2 , 2 , 3 , 3 , 60 | 4 , 4 , 5 , 5 , 6 , 61 | 6 , 7 , 7 , 8 , 8 , 62 | 9 , 9 , 10, 10, 11, 63 | 11, 12, 12, 13, 13, 64 | 14, 14, 15, 15, 16, 65 | 16, 17, 17, 17, 17, 66 | 17, 17, 17, 17, 17, 67 | 17, 17, 17, 17, 17, 68 | }; 69 | 70 | /* Round the specified buffer size up to the next valid LZX window size, and 71 | * return its order (log2). Or, if the buffer size is 0 or greater than the 72 | * largest valid LZX window size, return 0. */ 73 | unsigned 74 | lzx_get_window_order(size_t max_bufsize) 75 | { 76 | if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE) 77 | return 0; 78 | 79 | return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER); 80 | } 81 | 82 | /* Given a valid LZX window order, return the number of symbols that will exist 83 | * in the main Huffman code. */ 84 | unsigned 85 | lzx_get_num_main_syms(unsigned window_order) 86 | { 87 | /* Note: one would expect that the maximum match offset would be 88 | * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two 89 | * bytes were to match the last two bytes. However, the format 90 | * disallows this case. This reduces the number of needed offset slots 91 | * by 1. */ 92 | u32 window_size = (u32)1 << window_order; 93 | u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1; 94 | unsigned num_offset_slots = 30; 95 | while (max_offset >= lzx_offset_slot_base[num_offset_slots]) 96 | num_offset_slots++; 97 | 98 | return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS); 99 | } 100 | 101 | static void 102 | do_translate_target(void *target, s32 input_pos) 103 | { 104 | s32 abs_offset, rel_offset; 105 | 106 | rel_offset = *(int32_t*)target; 107 | if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) { 108 | if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) { 109 | /* "good translation" */ 110 | abs_offset = rel_offset + input_pos; 111 | } else { 112 | /* "compensating translation" */ 113 | abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE; 114 | } 115 | *(uint32_t*)target = abs_offset; 116 | } 117 | } 118 | 119 | static void 120 | undo_translate_target(void *target, s32 input_pos) 121 | { 122 | s32 abs_offset, rel_offset; 123 | 124 | abs_offset = *(int32_t*)target; 125 | if (abs_offset >= 0) { 126 | if (abs_offset < LZX_WIM_MAGIC_FILESIZE) { 127 | /* "good translation" */ 128 | rel_offset = abs_offset - input_pos; 129 | *(uint32_t*)target = rel_offset; 130 | } 131 | } else { 132 | if (abs_offset >= -input_pos) { 133 | /* "compensating translation" */ 134 | rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE; 135 | *(uint32_t*)target = rel_offset; 136 | } 137 | } 138 | } 139 | 140 | /* 141 | * Do or undo the 'E8' preprocessing used in LZX. Before compression, the 142 | * uncompressed data is preprocessed by changing the targets of x86 CALL 143 | * instructions from relative offsets to absolute offsets. After decompression, 144 | * the translation is undone by changing the targets of x86 CALL instructions 145 | * from absolute offsets to relative offsets. 146 | * 147 | * Note that despite its intent, E8 preprocessing can be done on any data even 148 | * if it is not actually x86 machine code. In fact, E8 preprocessing appears to 149 | * always be used in LZX-compressed resources in WIM files; there is no bit to 150 | * indicate whether it is used or not, unlike in the LZX compressed format as 151 | * used in cabinet files, where a bit is reserved for that purpose. 152 | * 153 | * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data, 154 | * which really means the 5-byte call instruction cannot start in the last 10 155 | * bytes of the uncompressed data. This is one of the errors in the LZX 156 | * documentation. 157 | * 158 | * E8 preprocessing does not appear to be disabled after the 32768th chunk of a 159 | * WIM resource, which apparently is another difference from the LZX compression 160 | * used in cabinet files. 161 | * 162 | * E8 processing is supposed to take the file size as a parameter, as it is used 163 | * in calculating the translated jump targets. But in WIM files, this file size 164 | * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000). 165 | */ 166 | static void 167 | lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) 168 | { 169 | 170 | #if !defined(__SSE2__) && !defined(__AVX2__) 171 | /* 172 | * A worthwhile optimization is to push the end-of-buffer check into the 173 | * relatively rare E8 case. This is possible if we replace the last six 174 | * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte 175 | * before reaching end-of-buffer. In addition, this scheme guarantees 176 | * that no translation can begin following an E8 byte in the last 10 177 | * bytes because a 4-byte offset containing E8 as its high byte is a 178 | * large negative number that is not valid for translation. That is 179 | * exactly what we need. 180 | */ 181 | u8 *tail; 182 | u8 saved_bytes[6]; 183 | u8 *p; 184 | 185 | if (size <= 10) 186 | return; 187 | 188 | tail = &data[size - 6]; 189 | memcpy(saved_bytes, tail, 6); 190 | memset(tail, 0xE8, 6); 191 | p = data; 192 | for (;;) { 193 | while (*p != 0xE8) 194 | p++; 195 | if (p >= tail) 196 | break; 197 | (*process_target)(p + 1, p - data); 198 | p += 5; 199 | } 200 | memcpy(tail, saved_bytes, 6); 201 | #else 202 | /* SSE2 or AVX-2 optimized version for x86_64 */ 203 | 204 | u8 *p = data; 205 | u64 valid_mask = ~0; 206 | 207 | if (size <= 10) 208 | return; 209 | #ifdef __AVX2__ 210 | # define ALIGNMENT_REQUIRED 32 211 | #else 212 | # define ALIGNMENT_REQUIRED 16 213 | #endif 214 | 215 | /* Process one byte at a time until the pointer is properly aligned. */ 216 | while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) { 217 | if (p >= data + size - 10) 218 | return; 219 | if (*p == 0xE8 && (valid_mask & 1)) { 220 | (*process_target)(p + 1, p - data); 221 | valid_mask &= ~0x1F; 222 | } 223 | p++; 224 | valid_mask >>= 1; 225 | valid_mask |= (u64)1 << 63; 226 | } 227 | 228 | if (data + size - p >= 64) { 229 | 230 | /* Vectorized processing */ 231 | 232 | /* Note: we use a "trap" E8 byte to eliminate the need to check 233 | * for end-of-buffer in the inner loop. This byte is carefully 234 | * positioned so that it will never be changed by a previous 235 | * translation before it is detected. */ 236 | 237 | u8 *trap = p + ((data + size - p) & ~31) - 32 + 4; 238 | u8 saved_byte = *trap; 239 | *trap = 0xE8; 240 | 241 | for (;;) { 242 | u32 e8_mask; 243 | u8 *orig_p = p; 244 | #ifdef __AVX2__ 245 | const __m256i e8_bytes = _mm256_set1_epi8(0xE8); 246 | for (;;) { 247 | __m256i bytes = *(const __m256i *)p; 248 | __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); 249 | e8_mask = _mm256_movemask_epi8(cmpresult); 250 | if (e8_mask) 251 | break; 252 | p += 32; 253 | } 254 | #else 255 | const __m128i e8_bytes = _mm_set1_epi8(0xE8); 256 | for (;;) { 257 | /* Read the next 32 bytes of data and test them 258 | * for E8 bytes. */ 259 | __m128i bytes1 = *(const __m128i *)p; 260 | __m128i bytes2 = *(const __m128i *)(p + 16); 261 | __m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes); 262 | __m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes); 263 | u32 mask1 = _mm_movemask_epi8(cmpresult1); 264 | u32 mask2 = _mm_movemask_epi8(cmpresult2); 265 | /* The masks have a bit set for each E8 byte. 266 | * We stay in this fast inner loop as long as 267 | * there are no E8 bytes. */ 268 | if (mask1 | mask2) { 269 | e8_mask = mask1 | (mask2 << 16); 270 | break; 271 | } 272 | p += 32; 273 | } 274 | #endif 275 | 276 | /* Did we pass over data with no E8 bytes? */ 277 | if (p != orig_p) 278 | valid_mask = ~0; 279 | 280 | /* Are we nearing end-of-buffer? */ 281 | if (p == trap - 4) 282 | break; 283 | 284 | /* Process the E8 bytes. However, the AND with 285 | * 'valid_mask' ensures we never process an E8 byte that 286 | * was itself part of a translation target. */ 287 | while ((e8_mask &= valid_mask)) { 288 | unsigned bit = bsf32(e8_mask); 289 | (*process_target)(p + bit + 1, p + bit - data); 290 | valid_mask &= ~((u64)0x1F << bit); 291 | } 292 | 293 | valid_mask >>= 32; 294 | valid_mask |= 0xFFFFFFFF00000000; 295 | p += 32; 296 | } 297 | 298 | *trap = saved_byte; 299 | } 300 | 301 | /* Approaching the end of the buffer; process one byte a time. */ 302 | while (p < data + size - 10) { 303 | if (*p == 0xE8 && (valid_mask & 1)) { 304 | (*process_target)(p + 1, p - data); 305 | valid_mask &= ~0x1F; 306 | } 307 | p++; 308 | valid_mask >>= 1; 309 | valid_mask |= (u64)1 << 63; 310 | } 311 | #endif /* __SSE2__ || __AVX2__ */ 312 | } 313 | 314 | void 315 | lzx_preprocess(u8 *data, u32 size) 316 | { 317 | lzx_e8_filter(data, size, do_translate_target); 318 | } 319 | 320 | void 321 | lzx_postprocess(u8 *data, u32 size) 322 | { 323 | lzx_e8_filter(data, size, undo_translate_target); 324 | } 325 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_common.h 3 | * 4 | * Declarations shared between LZX compression and decompression. 5 | */ 6 | 7 | #ifndef _LZX_COMMON_H 8 | #define _LZX_COMMON_H 9 | 10 | #include "lzx_constants.h" 11 | #include "common_defs.h" 12 | 13 | extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1]; 14 | 15 | extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; 16 | 17 | extern unsigned 18 | lzx_get_window_order(size_t max_bufsize); 19 | 20 | extern unsigned 21 | lzx_get_num_main_syms(unsigned window_order); 22 | 23 | extern void 24 | lzx_preprocess(u8 *data, u32 size); 25 | 26 | extern void 27 | lzx_postprocess(u8 *data, u32 size); 28 | 29 | #endif /* _LZX_COMMON_H */ 30 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_constants.h 3 | * 4 | * Constants for the LZX compression format. 5 | */ 6 | 7 | #ifndef _LZX_CONSTANTS_H 8 | #define _LZX_CONSTANTS_H 9 | 10 | /* Number of literal byte values. */ 11 | #define LZX_NUM_CHARS 256 12 | 13 | /* The smallest and largest allowed match lengths. */ 14 | #define LZX_MIN_MATCH_LEN 2 15 | #define LZX_MAX_MATCH_LEN 257 16 | 17 | /* Number of distinct match lengths that can be represented. */ 18 | #define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) 19 | 20 | /* Number of match lengths for which no length symbol is required. */ 21 | #define LZX_NUM_PRIMARY_LENS 7 22 | #define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1) 23 | 24 | /* Valid values of the 3-bit block type field. */ 25 | #define LZX_BLOCKTYPE_VERBATIM 1 26 | #define LZX_BLOCKTYPE_ALIGNED 2 27 | #define LZX_BLOCKTYPE_UNCOMPRESSED 3 28 | 29 | /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum 30 | * sizes of the sliding window. */ 31 | #define LZX_MIN_WINDOW_ORDER 15 32 | #define LZX_MAX_WINDOW_ORDER 21 33 | #define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */ 34 | #define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */ 35 | 36 | /* Maximum number of offset slots. (The actual number of offset slots depends 37 | * on the window size.) */ 38 | #define LZX_MAX_OFFSET_SLOTS 50 39 | 40 | /* Maximum number of symbols in the main code. (The actual number of symbols in 41 | * the main code depends on the window size.) */ 42 | #define LZX_MAINCODE_MAX_NUM_SYMBOLS \ 43 | (LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS)) 44 | 45 | /* Number of symbols in the length code. */ 46 | #define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS) 47 | 48 | /* Number of symbols in the pre-code. */ 49 | #define LZX_PRECODE_NUM_SYMBOLS 20 50 | 51 | /* Number of bits in which each pre-code codeword length is represented. */ 52 | #define LZX_PRECODE_ELEMENT_SIZE 4 53 | 54 | /* Number of low-order bits of each match offset that are entropy-encoded in 55 | * aligned offset blocks. */ 56 | #define LZX_NUM_ALIGNED_OFFSET_BITS 3 57 | 58 | /* Number of symbols in the aligned offset code. */ 59 | #define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS) 60 | 61 | /* Mask for the match offset bits that are entropy-encoded in aligned offset 62 | * blocks. */ 63 | #define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1) 64 | 65 | /* Number of bits in which each aligned offset codeword length is represented. */ 66 | #define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 67 | 68 | /* The first offset slot which requires an aligned offset symbol in aligned 69 | * offset blocks. */ 70 | #define LZX_MIN_ALIGNED_OFFSET_SLOT 8 71 | 72 | /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */ 73 | #define LZX_MIN_ALIGNED_OFFSET 14 74 | 75 | /* The maximum number of extra offset bits in verbatim blocks. (One would need 76 | * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset 77 | * bits in *aligned* blocks.) */ 78 | #define LZX_MAX_NUM_EXTRA_BITS 17 79 | 80 | /* Maximum lengths (in bits) for length-limited Huffman code construction. */ 81 | #define LZX_MAX_MAIN_CODEWORD_LEN 16 82 | #define LZX_MAX_LEN_CODEWORD_LEN 16 83 | #define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1) 84 | #define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1) 85 | 86 | /* For LZX-compressed blocks in WIM resources, this value is always used as the 87 | * filesize parameter for the call instruction (0xe8 byte) preprocessing, even 88 | * though the blocks themselves are not this size, and the size of the actual 89 | * file resource in the WIM file is very likely to be something entirely 90 | * different as well. */ 91 | #define LZX_WIM_MAGIC_FILESIZE 12000000 92 | 93 | /* Assumed LZX block size when the encoded block size begins with a 0 bit. 94 | * This is probably WIM-specific. */ 95 | #define LZX_DEFAULT_BLOCK_SIZE 32768 96 | 97 | /* Number of offsets in the recent (or "repeat") offsets queue. */ 98 | #define LZX_NUM_RECENT_OFFSETS 3 99 | 100 | /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */ 101 | #define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1) 102 | 103 | #endif /* _LZX_CONSTANTS_H */ 104 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_decompress.c 3 | * 4 | * A decompressor for the LZX compression format, as used in WIM files. 5 | */ 6 | 7 | /* 8 | * Copyright (C) 2012-2016 Eric Biggers 9 | * 10 | * This program is free software: you can redistribute it and/or modify it under 11 | * the terms of the GNU General Public License as published by the Free Software 12 | * Foundation, either version 2 of the License, or (at your option) any later 13 | * version. 14 | * 15 | * This program is distributed in the hope that it will be useful, but WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 18 | * details. 19 | * 20 | * You should have received a copy of the GNU General Public License along with 21 | * this program. If not, see . 22 | */ 23 | 24 | /* 25 | * LZX is an LZ77 and Huffman-code based compression format that has many 26 | * similarities to DEFLATE (the format used by zlib/gzip). The compression 27 | * ratio is as good or better than DEFLATE. See lzx_compress.c for a format 28 | * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a 29 | * historical overview. Here I make some pragmatic notes. 30 | * 31 | * The old specification for LZX is the document "Microsoft LZX Data Compression 32 | * Format" (1997). It defines the LZX format as used in cabinet files. Allowed 33 | * window sizes are 2^n where 15 <= n <= 21. However, this document contains 34 | * several errors, so don't read too much into it... 35 | * 36 | * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA 37 | * Compression and Decompression" (2014). It defines the LZX format as used by 38 | * Microsoft's binary patcher. It corrects several errors in the 1997 document 39 | * and extends the format in several ways --- namely, optional reference data, 40 | * up to 2^25 byte windows, and longer match lengths. 41 | * 42 | * WIM files use a more restricted form of LZX. No LZX DELTA extensions are 43 | * present, the window is not "sliding", E8 preprocessing is done 44 | * unconditionally with a fixed file size, and the maximum window size is always 45 | * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource). 46 | * This code is primarily intended to implement this form of LZX. But although 47 | * not compatible with WIMGAPI, this code also supports maximum window sizes up 48 | * to 2^21 bytes. 49 | * 50 | * TODO: Add support for window sizes up to 2^25 bytes. 51 | */ 52 | 53 | #ifdef HAVE_CONFIG_H 54 | # include "config.h" 55 | #endif 56 | 57 | #include 58 | 59 | #include "decompress_common.h" 60 | #include "lzx_common.h" 61 | #include "system_compression.h" 62 | 63 | /* These values are chosen for fast decompression. */ 64 | #define LZX_MAINCODE_TABLEBITS 11 65 | #define LZX_LENCODE_TABLEBITS 9 66 | #define LZX_PRECODE_TABLEBITS 6 67 | #define LZX_ALIGNEDCODE_TABLEBITS 7 68 | 69 | #define LZX_READ_LENS_MAX_OVERRUN 50 70 | 71 | struct lzx_decompressor { 72 | 73 | DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS, 74 | LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); 75 | u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; 76 | 77 | DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS, 78 | LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); 79 | u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; 80 | 81 | union { 82 | DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS, 83 | LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); 84 | u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; 85 | }; 86 | 87 | union { 88 | DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS, 89 | LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); 90 | u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; 91 | u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; 92 | }; 93 | 94 | union { 95 | DECODE_TABLE_WORKING_SPACE(maincode_working_space, 96 | LZX_MAINCODE_MAX_NUM_SYMBOLS, 97 | LZX_MAX_MAIN_CODEWORD_LEN); 98 | DECODE_TABLE_WORKING_SPACE(lencode_working_space, 99 | LZX_LENCODE_NUM_SYMBOLS, 100 | LZX_MAX_LEN_CODEWORD_LEN); 101 | DECODE_TABLE_WORKING_SPACE(alignedcode_working_space, 102 | LZX_ALIGNEDCODE_NUM_SYMBOLS, 103 | LZX_MAX_ALIGNED_CODEWORD_LEN); 104 | DECODE_TABLE_WORKING_SPACE(precode_working_space, 105 | LZX_PRECODE_NUM_SYMBOLS, 106 | LZX_MAX_PRE_CODEWORD_LEN); 107 | }; 108 | 109 | unsigned window_order; 110 | unsigned num_main_syms; 111 | 112 | /* Like lzx_extra_offset_bits[], but does not include the entropy-coded 113 | * bits of aligned offset blocks */ 114 | u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS]; 115 | 116 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT); 117 | 118 | /* Read a Huffman-encoded symbol using the precode. */ 119 | static forceinline unsigned 120 | read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) 121 | { 122 | return read_huffsym(is, d->precode_decode_table, 123 | LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); 124 | } 125 | 126 | /* Read a Huffman-encoded symbol using the main code. */ 127 | static forceinline unsigned 128 | read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) 129 | { 130 | return read_huffsym(is, d->maincode_decode_table, 131 | LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); 132 | } 133 | 134 | /* Read a Huffman-encoded symbol using the length code. */ 135 | static forceinline unsigned 136 | read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) 137 | { 138 | return read_huffsym(is, d->lencode_decode_table, 139 | LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); 140 | } 141 | 142 | /* Read a Huffman-encoded symbol using the aligned offset code. */ 143 | static forceinline unsigned 144 | read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) 145 | { 146 | return read_huffsym(is, d->alignedcode_decode_table, 147 | LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); 148 | } 149 | 150 | /* 151 | * Read a precode from the compressed input bitstream, then use it to decode 152 | * @num_lens codeword length values and write them to @lens. 153 | */ 154 | static int 155 | lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is, 156 | u8 *lens, unsigned num_lens) 157 | { 158 | u8 *len_ptr = lens; 159 | u8 *lens_end = lens + num_lens; 160 | 161 | /* Read the lengths of the precode codewords. These are stored 162 | * explicitly. */ 163 | for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) { 164 | d->precode_lens[i] = 165 | bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE); 166 | } 167 | 168 | /* Build the decoding table for the precode. */ 169 | if (make_huffman_decode_table(d->precode_decode_table, 170 | LZX_PRECODE_NUM_SYMBOLS, 171 | LZX_PRECODE_TABLEBITS, 172 | d->precode_lens, 173 | LZX_MAX_PRE_CODEWORD_LEN, 174 | d->precode_working_space)) 175 | return -1; 176 | 177 | /* Decode the codeword lengths. */ 178 | do { 179 | unsigned presym; 180 | u8 len; 181 | 182 | /* Read the next precode symbol. */ 183 | presym = read_presym(d, is); 184 | if (presym < 17) { 185 | /* Difference from old length */ 186 | len = *len_ptr - presym; 187 | if ((int8_t)len < 0) 188 | len += 17; 189 | *len_ptr++ = len; 190 | } else { 191 | /* Special RLE values */ 192 | 193 | unsigned run_len; 194 | 195 | if (presym == 17) { 196 | /* Run of 0's */ 197 | run_len = 4 + bitstream_read_bits(is, 4); 198 | len = 0; 199 | } else if (presym == 18) { 200 | /* Longer run of 0's */ 201 | run_len = 20 + bitstream_read_bits(is, 5); 202 | len = 0; 203 | } else { 204 | /* Run of identical lengths */ 205 | run_len = 4 + bitstream_read_bits(is, 1); 206 | presym = read_presym(d, is); 207 | if (unlikely(presym > 17)) 208 | return -1; 209 | len = *len_ptr - presym; 210 | if ((int8_t)len < 0) 211 | len += 17; 212 | } 213 | 214 | do { 215 | *len_ptr++ = len; 216 | } while (--run_len); 217 | /* 218 | * The worst case overrun is when presym == 18, 219 | * run_len == 20 + 31, and only 1 length was remaining. 220 | * So LZX_READ_LENS_MAX_OVERRUN == 50. 221 | * 222 | * Overrun while reading the first half of maincode_lens 223 | * can corrupt the previous values in the second half. 224 | * This doesn't really matter because the resulting 225 | * lengths will still be in range, and data that 226 | * generates overruns is invalid anyway. 227 | */ 228 | } 229 | } while (len_ptr < lens_end); 230 | 231 | return 0; 232 | } 233 | 234 | /* 235 | * Read the header of an LZX block. For all block types, the block type and 236 | * size is saved in *block_type_ret and *block_size_ret, respectively. For 237 | * compressed blocks, the codeword lengths are also saved. For uncompressed 238 | * blocks, the recent offsets queue is also updated. 239 | */ 240 | static int 241 | lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, 242 | u32 recent_offsets[], int *block_type_ret, 243 | u32 *block_size_ret) 244 | { 245 | int block_type; 246 | u32 block_size; 247 | 248 | bitstream_ensure_bits(is, 4); 249 | 250 | /* Read the block type. */ 251 | block_type = bitstream_pop_bits(is, 3); 252 | 253 | /* Read the block size. */ 254 | if (bitstream_pop_bits(is, 1)) { 255 | block_size = LZX_DEFAULT_BLOCK_SIZE; 256 | } else { 257 | block_size = bitstream_read_bits(is, 16); 258 | if (d->window_order >= 16) { 259 | block_size <<= 8; 260 | block_size |= bitstream_read_bits(is, 8); 261 | } 262 | } 263 | 264 | switch (block_type) { 265 | 266 | case LZX_BLOCKTYPE_ALIGNED: 267 | 268 | /* Read the aligned offset codeword lengths. */ 269 | 270 | for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { 271 | d->alignedcode_lens[i] = 272 | bitstream_read_bits(is, 273 | LZX_ALIGNEDCODE_ELEMENT_SIZE); 274 | } 275 | 276 | /* Fall though, since the rest of the header for aligned offset 277 | * blocks is the same as that for verbatim blocks. */ 278 | 279 | case LZX_BLOCKTYPE_VERBATIM: 280 | 281 | /* Read the main codeword lengths, which are divided into two 282 | * parts: literal symbols and match headers. */ 283 | 284 | if (lzx_read_codeword_lens(d, is, d->maincode_lens, 285 | LZX_NUM_CHARS)) 286 | return -1; 287 | 288 | if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS, 289 | d->num_main_syms - LZX_NUM_CHARS)) 290 | return -1; 291 | 292 | 293 | /* Read the length codeword lengths. */ 294 | 295 | if (lzx_read_codeword_lens(d, is, d->lencode_lens, 296 | LZX_LENCODE_NUM_SYMBOLS)) 297 | return -1; 298 | 299 | break; 300 | 301 | case LZX_BLOCKTYPE_UNCOMPRESSED: 302 | /* 303 | * The header of an uncompressed block contains new values for 304 | * the recent offsets queue, starting on the next 16-bit 305 | * boundary in the bitstream. Careful: if the stream is 306 | * *already* aligned, the correct thing to do is to throw away 307 | * the next 16 bits (this is probably a mistake in the format). 308 | */ 309 | bitstream_ensure_bits(is, 1); 310 | bitstream_align(is); 311 | recent_offsets[0] = bitstream_read_u32(is); 312 | recent_offsets[1] = bitstream_read_u32(is); 313 | recent_offsets[2] = bitstream_read_u32(is); 314 | 315 | /* Offsets of 0 are invalid. */ 316 | if (recent_offsets[0] == 0 || recent_offsets[1] == 0 || 317 | recent_offsets[2] == 0) 318 | return -1; 319 | break; 320 | 321 | default: 322 | /* Unrecognized block type. */ 323 | return -1; 324 | } 325 | 326 | *block_type_ret = block_type; 327 | *block_size_ret = block_size; 328 | return 0; 329 | } 330 | 331 | /* Decompress a block of LZX-compressed data. */ 332 | static int 333 | lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, 334 | int block_type, u32 block_size, 335 | u8 * const out_begin, u8 *out_next, u32 recent_offsets[]) 336 | { 337 | u8 * const block_end = out_next + block_size; 338 | unsigned min_aligned_offset_slot; 339 | 340 | /* 341 | * Build the Huffman decode tables. We always need to build the main 342 | * and length decode tables. For aligned blocks we additionally need to 343 | * build the aligned offset decode table. 344 | */ 345 | 346 | if (make_huffman_decode_table(d->maincode_decode_table, 347 | d->num_main_syms, 348 | LZX_MAINCODE_TABLEBITS, 349 | d->maincode_lens, 350 | LZX_MAX_MAIN_CODEWORD_LEN, 351 | d->maincode_working_space)) 352 | return -1; 353 | 354 | if (make_huffman_decode_table(d->lencode_decode_table, 355 | LZX_LENCODE_NUM_SYMBOLS, 356 | LZX_LENCODE_TABLEBITS, 357 | d->lencode_lens, 358 | LZX_MAX_LEN_CODEWORD_LEN, 359 | d->lencode_working_space)) 360 | return -1; 361 | 362 | if (block_type == LZX_BLOCKTYPE_ALIGNED) { 363 | if (make_huffman_decode_table(d->alignedcode_decode_table, 364 | LZX_ALIGNEDCODE_NUM_SYMBOLS, 365 | LZX_ALIGNEDCODE_TABLEBITS, 366 | d->alignedcode_lens, 367 | LZX_MAX_ALIGNED_CODEWORD_LEN, 368 | d->alignedcode_working_space)) 369 | return -1; 370 | min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; 371 | memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned, 372 | sizeof(lzx_extra_offset_bits)); 373 | } else { 374 | min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS; 375 | memcpy(d->extra_offset_bits, lzx_extra_offset_bits, 376 | sizeof(lzx_extra_offset_bits)); 377 | } 378 | 379 | /* Decode the literals and matches. */ 380 | 381 | do { 382 | unsigned mainsym; 383 | unsigned length; 384 | u32 offset; 385 | unsigned offset_slot; 386 | 387 | mainsym = read_mainsym(d, is); 388 | if (mainsym < LZX_NUM_CHARS) { 389 | /* Literal */ 390 | *out_next++ = mainsym; 391 | continue; 392 | } 393 | 394 | /* Match */ 395 | 396 | /* Decode the length header and offset slot. */ 397 | STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0); 398 | length = mainsym % LZX_NUM_LEN_HEADERS; 399 | offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS; 400 | 401 | /* If needed, read a length symbol to decode the full length. */ 402 | if (length == LZX_NUM_PRIMARY_LENS) 403 | length += read_lensym(d, is); 404 | length += LZX_MIN_MATCH_LEN; 405 | 406 | if (offset_slot < LZX_NUM_RECENT_OFFSETS) { 407 | /* Repeat offset */ 408 | 409 | /* Note: This isn't a real LRU queue, since using the R2 410 | * offset doesn't bump the R1 offset down to R2. */ 411 | offset = recent_offsets[offset_slot]; 412 | recent_offsets[offset_slot] = recent_offsets[0]; 413 | } else { 414 | /* Explicit offset */ 415 | offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]); 416 | if (offset_slot >= min_aligned_offset_slot) { 417 | offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) | 418 | read_alignedsym(d, is); 419 | } 420 | offset += lzx_offset_slot_base[offset_slot]; 421 | 422 | /* Update the match offset LRU queue. */ 423 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); 424 | recent_offsets[2] = recent_offsets[1]; 425 | recent_offsets[1] = recent_offsets[0]; 426 | } 427 | recent_offsets[0] = offset; 428 | 429 | /* Validate the match and copy it to the current position. */ 430 | if (unlikely(lz_copy(length, offset, out_begin, 431 | out_next, block_end, LZX_MIN_MATCH_LEN))) 432 | return -1; 433 | out_next += length; 434 | } while (out_next != block_end); 435 | 436 | return 0; 437 | } 438 | 439 | int 440 | lzx_decompress(struct lzx_decompressor *d, 441 | const void *compressed_data, size_t compressed_size, 442 | void *uncompressed_data, size_t uncompressed_size) 443 | { 444 | u8 * const out_begin = uncompressed_data; 445 | u8 *out_next = out_begin; 446 | u8 * const out_end = out_begin + uncompressed_size; 447 | struct input_bitstream is; 448 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); 449 | u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1}; 450 | unsigned may_have_e8_byte = 0; 451 | 452 | init_input_bitstream(&is, compressed_data, compressed_size); 453 | 454 | /* Codeword lengths begin as all 0's for delta encoding purposes. */ 455 | memset(d->maincode_lens, 0, d->num_main_syms); 456 | memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); 457 | 458 | /* Decompress blocks until we have all the uncompressed data. */ 459 | 460 | while (out_next != out_end) { 461 | int block_type; 462 | u32 block_size; 463 | 464 | if (lzx_read_block_header(d, &is, recent_offsets, 465 | &block_type, &block_size)) 466 | return -1; 467 | 468 | if (block_size < 1 || block_size > out_end - out_next) 469 | return -1; 470 | 471 | if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) { 472 | 473 | /* Compressed block */ 474 | if (lzx_decompress_block(d, &is, block_type, block_size, 475 | out_begin, out_next, 476 | recent_offsets)) 477 | return -1; 478 | 479 | /* If the first E8 byte was in this block, then it must 480 | * have been encoded as a literal using mainsym E8. */ 481 | may_have_e8_byte |= d->maincode_lens[0xE8]; 482 | } else { 483 | 484 | /* Uncompressed block */ 485 | if (bitstream_read_bytes(&is, out_next, block_size)) 486 | return -1; 487 | 488 | /* Re-align the bitstream if needed. */ 489 | if (block_size & 1) 490 | bitstream_read_byte(&is); 491 | 492 | /* There may have been an E8 byte in the block. */ 493 | may_have_e8_byte = 1; 494 | } 495 | out_next += block_size; 496 | } 497 | 498 | /* Postprocess the data unless it cannot possibly contain E8 bytes. */ 499 | if (may_have_e8_byte) 500 | lzx_postprocess(uncompressed_data, uncompressed_size); 501 | 502 | return 0; 503 | } 504 | 505 | struct lzx_decompressor * 506 | lzx_allocate_decompressor(size_t max_block_size) 507 | { 508 | unsigned window_order; 509 | struct lzx_decompressor *d; 510 | 511 | window_order = lzx_get_window_order(max_block_size); 512 | if (window_order == 0) { 513 | errno = EINVAL; 514 | return NULL; 515 | } 516 | 517 | d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT); 518 | if (!d) 519 | return NULL; 520 | 521 | d->window_order = window_order; 522 | d->num_main_syms = lzx_get_num_main_syms(window_order); 523 | 524 | /* Initialize 'd->extra_offset_bits_minus_aligned'. */ 525 | STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) == 526 | sizeof(lzx_extra_offset_bits)); 527 | STATIC_ASSERT(sizeof(d->extra_offset_bits) == 528 | sizeof(lzx_extra_offset_bits)); 529 | memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits, 530 | sizeof(lzx_extra_offset_bits)); 531 | for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; 532 | offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++) 533 | { 534 | d->extra_offset_bits_minus_aligned[offset_slot] -= 535 | LZX_NUM_ALIGNED_OFFSET_BITS; 536 | } 537 | 538 | return d; 539 | } 540 | 541 | void 542 | lzx_free_decompressor(struct lzx_decompressor *d) 543 | { 544 | aligned_free(d); 545 | } 546 | -------------------------------------------------------------------------------- /src/ebiggers/system_compression.h: -------------------------------------------------------------------------------- 1 | /* 2 | * system_compression.h - declarations for accessing System Compressed files 3 | * 4 | * Copyright (C) 2015 Eric Biggers 5 | * 6 | * This program is free software: you can redistribute it and/or modify it under 7 | * the terms of the GNU General Public License as published by the Free Software 8 | * Foundation, either version 2 of the License, or (at your option) any later 9 | * version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 14 | * details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | #include 27 | #include 28 | 29 | /* System compressed file access */ 30 | 31 | struct ntfs_system_decompression_ctx; 32 | 33 | extern void 34 | ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx); 35 | 36 | /* XPRESS decompression */ 37 | 38 | struct xpress_decompressor; 39 | 40 | extern struct xpress_decompressor *xpress_allocate_decompressor(void); 41 | 42 | extern int xpress_decompress(struct xpress_decompressor *decompressor, 43 | const void *compressed_data, size_t compressed_size, 44 | void *uncompressed_data, size_t uncompressed_size); 45 | 46 | extern void xpress_free_decompressor(struct xpress_decompressor *decompressor); 47 | 48 | /* LZX decompression */ 49 | 50 | struct lzx_decompressor; 51 | 52 | extern struct lzx_decompressor * 53 | lzx_allocate_decompressor(size_t max_block_size); 54 | 55 | extern int lzx_decompress(struct lzx_decompressor *decompressor, 56 | const void *compressed_data, size_t compressed_size, 57 | void *uncompressed_data, size_t uncompressed_size); 58 | 59 | extern void lzx_free_decompressor(struct lzx_decompressor *decompressor); 60 | 61 | #ifdef __cplusplus 62 | } 63 | #endif 64 | -------------------------------------------------------------------------------- /src/ebiggers/xpress_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * xpress_constants.h 3 | * 4 | * Constants for the XPRESS compression format. 5 | */ 6 | 7 | #ifndef _XPRESS_CONSTANTS_H 8 | #define _XPRESS_CONSTANTS_H 9 | 10 | #define XPRESS_NUM_CHARS 256 11 | #define XPRESS_NUM_SYMBOLS 512 12 | #define XPRESS_MAX_CODEWORD_LEN 15 13 | 14 | #define XPRESS_END_OF_DATA 256 15 | 16 | #define XPRESS_MIN_OFFSET 1 17 | #define XPRESS_MAX_OFFSET 65535 18 | 19 | #define XPRESS_MIN_MATCH_LEN 3 20 | #define XPRESS_MAX_MATCH_LEN 65538 21 | 22 | #endif /* _XPRESS_CONSTANTS_H */ 23 | -------------------------------------------------------------------------------- /src/ebiggers/xpress_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * xpress_decompress.c 3 | * 4 | * A decompressor for the XPRESS compression format (Huffman variant). 5 | */ 6 | 7 | /* 8 | * 9 | * Copyright (C) 2012-2016 Eric Biggers 10 | * 11 | * This program is free software: you can redistribute it and/or modify it under 12 | * the terms of the GNU General Public License as published by the Free Software 13 | * Foundation, either version 2 of the License, or (at your option) any later 14 | * version. 15 | * 16 | * This program is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 19 | * details. 20 | * 21 | * You should have received a copy of the GNU General Public License along with 22 | * this program. If not, see . 23 | */ 24 | 25 | 26 | /* 27 | * The XPRESS compression format is an LZ77 and Huffman-code based algorithm. 28 | * That means it is fairly similar to LZX compression, but XPRESS is simpler, so 29 | * it is a little faster to compress and decompress. 30 | * 31 | * The XPRESS compression format is mostly documented in a file called "[MS-XCA] 32 | * Xpress Compression Algorithm". In the MSDN library, it can currently be 33 | * found under Open Specifications => Protocols => Windows Protocols => Windows 34 | * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in 35 | * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm" 36 | * (there apparently are some other versions of XPRESS as well). 37 | * 38 | * If you are already familiar with the LZ77 algorithm and Huffman coding, the 39 | * XPRESS format is fairly simple. The compressed data begins with 256 bytes 40 | * that contain 512 4-bit integers that are the lengths of the symbols in the 41 | * Huffman code used for match/literal headers. In contrast with more 42 | * complicated formats such as DEFLATE and LZX, this is the only Huffman code 43 | * that is used for the entirety of the XPRESS compressed data, and the codeword 44 | * lengths are not encoded with a pretree. 45 | * 46 | * The rest of the compressed data is Huffman-encoded symbols. Values 0 through 47 | * 255 represent the corresponding literal bytes. Values 256 through 511 48 | * represent matches and may require extra bits or bytes to be read to get the 49 | * match offset and match length. 50 | * 51 | * The trickiest part is probably the way in which literal bytes for match 52 | * lengths are interleaved in the bitstream. 53 | * 54 | * Also, a caveat--- according to Microsoft's documentation for XPRESS, 55 | * 56 | * "Some implementation of the decompression algorithm expect an extra 57 | * symbol to mark the end of the data. Specifically, some implementations 58 | * fail during decompression if the Huffman symbol 256 is not found after 59 | * the actual data." 60 | * 61 | * This is the case with Microsoft's implementation in WIMGAPI, for example. So 62 | * although our implementation doesn't currently check for this extra symbol, 63 | * compressors would be wise to add it. 64 | */ 65 | 66 | #ifdef HAVE_CONFIG_H 67 | # include "config.h" 68 | #endif 69 | 70 | #include "decompress_common.h" 71 | #include "system_compression.h" 72 | #include "xpress_constants.h" 73 | 74 | /* This value is chosen for fast decompression. */ 75 | #define XPRESS_TABLEBITS 11 76 | 77 | struct xpress_decompressor { 78 | union { 79 | DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS, 80 | XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); 81 | u8 lens[XPRESS_NUM_SYMBOLS]; 82 | }; 83 | DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS, 84 | XPRESS_MAX_CODEWORD_LEN); 85 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT); 86 | 87 | int 88 | xpress_decompress(struct xpress_decompressor * d, 89 | const void *compressed_data, size_t compressed_size, 90 | void *uncompressed_data, size_t uncompressed_size) 91 | { 92 | const u8 * const in_begin = compressed_data; 93 | u8 * const out_begin = uncompressed_data; 94 | u8 *out_next = out_begin; 95 | u8 * const out_end = out_begin + uncompressed_size; 96 | struct input_bitstream is; 97 | 98 | /* Read the Huffman codeword lengths. */ 99 | if (compressed_size < XPRESS_NUM_SYMBOLS / 2) 100 | return -1; 101 | for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { 102 | d->lens[2 * i + 0] = in_begin[i] & 0xf; 103 | d->lens[2 * i + 1] = in_begin[i] >> 4; 104 | } 105 | 106 | /* Build a decoding table for the Huffman code. */ 107 | if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, 108 | XPRESS_TABLEBITS, d->lens, 109 | XPRESS_MAX_CODEWORD_LEN, 110 | d->working_space)) 111 | return -1; 112 | 113 | /* Decode the matches and literals. */ 114 | 115 | init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, 116 | compressed_size - XPRESS_NUM_SYMBOLS / 2); 117 | 118 | while (out_next != out_end) { 119 | unsigned sym; 120 | unsigned log2_offset; 121 | u32 length; 122 | u32 offset; 123 | 124 | sym = read_huffsym(&is, d->decode_table, 125 | XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); 126 | if (sym < XPRESS_NUM_CHARS) { 127 | /* Literal */ 128 | *out_next++ = sym; 129 | } else { 130 | /* Match */ 131 | length = sym & 0xf; 132 | log2_offset = (sym >> 4) & 0xf; 133 | 134 | bitstream_ensure_bits(&is, 16); 135 | 136 | offset = ((u32)1 << log2_offset) | 137 | bitstream_pop_bits(&is, log2_offset); 138 | 139 | if (length == 0xf) { 140 | length += bitstream_read_byte(&is); 141 | if (length == 0xf + 0xff) 142 | length = bitstream_read_u16(&is); 143 | } 144 | length += XPRESS_MIN_MATCH_LEN; 145 | 146 | if (unlikely(lz_copy(length, offset, 147 | out_begin, out_next, out_end, 148 | XPRESS_MIN_MATCH_LEN))) 149 | return -1; 150 | 151 | out_next += length; 152 | } 153 | } 154 | return 0; 155 | } 156 | 157 | struct xpress_decompressor * 158 | xpress_allocate_decompressor(void) 159 | { 160 | return aligned_malloc(sizeof(struct xpress_decompressor), 161 | DECODE_TABLE_ALIGNMENT); 162 | } 163 | 164 | void 165 | xpress_free_decompressor(struct xpress_decompressor *d) 166 | { 167 | aligned_free(d); 168 | } 169 | -------------------------------------------------------------------------------- /src/ntfs.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2020 2 | * 3 | * This file is part of ntfs2btrfs. 4 | * 5 | * Ntfs2btrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * Ntfs2btrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with Ntfs2btrfs. If not, see . */ 17 | 18 | #pragma once 19 | 20 | #include "ntfs2btrfs.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #ifdef _WIN32 29 | #include 30 | #else 31 | #include 32 | #endif 33 | 34 | #pragma pack(push,1) 35 | 36 | typedef struct { 37 | uint8_t Jmp[3]; 38 | uint8_t FsName[8]; 39 | uint16_t BytesPerSector; 40 | uint8_t SectorsPerCluster; 41 | uint16_t ReservedSectors; 42 | uint8_t Unused1[5]; 43 | uint8_t Media; 44 | uint8_t Unused2[2]; 45 | uint16_t SectorsPerTrack; 46 | uint16_t Heads; 47 | uint32_t HiddenSectors; 48 | uint32_t Unused3; 49 | uint32_t Unknown; 50 | uint64_t TotalSectors; 51 | uint64_t MFT; 52 | uint64_t MFTMirr; 53 | int8_t ClustersPerMFTRecord; 54 | uint8_t Padding1[3]; 55 | int8_t ClustersPerIndexRecord; 56 | uint8_t Padding2[3]; 57 | uint64_t SerialNumber; 58 | uint32_t Checksum; 59 | } NTFS_BOOT_SECTOR; 60 | 61 | #define NTFS_FS_NAME "NTFS " 62 | 63 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header 64 | #define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff 65 | #define ATTRIBUTE_FLAG_SPARSE 0x8000 66 | #define ATTRIBUTE_FLAG_ENCRYPTED 0x4000 67 | 68 | enum class NTFS_ATTRIBUTE_FORM : uint8_t { 69 | RESIDENT_FORM = 0, 70 | NONRESIDENT_FORM = 1 71 | }; 72 | 73 | enum class ntfs_attribute : uint32_t { 74 | STANDARD_INFORMATION = 0x10, 75 | ATTRIBUTE_LIST = 0x20, 76 | FILE_NAME = 0x30, 77 | VOLUME_VERSION = 0x40, 78 | SECURITY_DESCRIPTOR = 0x50, 79 | VOLUME_NAME = 0x60, 80 | VOLUME_INFORMATION = 0x70, 81 | DATA = 0x80, 82 | INDEX_ROOT = 0x90, 83 | INDEX_ALLOCATION = 0xA0, 84 | BITMAP = 0xB0, 85 | REPARSE_POINT = 0xC0, 86 | EA_INFORMATION = 0xD0, 87 | EA = 0xE0, 88 | PROPERTY_SET = 0xF0, 89 | LOGGED_UTILITY_STREAM = 0x100, 90 | }; 91 | 92 | template<> 93 | struct fmt::formatter { 94 | constexpr auto parse(format_parse_context& ctx) { 95 | auto it = ctx.begin(); 96 | 97 | if (it != ctx.end() && *it != '}') 98 | throw format_error("invalid format"); 99 | 100 | return it; 101 | } 102 | 103 | template 104 | auto format(enum ntfs_attribute att, format_context& ctx) const { 105 | switch (att) { 106 | case ntfs_attribute::STANDARD_INFORMATION: 107 | return fmt::format_to(ctx.out(), "STANDARD_INFORMATION"); 108 | 109 | case ntfs_attribute::ATTRIBUTE_LIST: 110 | return fmt::format_to(ctx.out(), "ATTRIBUTE_LIST"); 111 | 112 | case ntfs_attribute::FILE_NAME: 113 | return fmt::format_to(ctx.out(), "FILE_NAME"); 114 | 115 | case ntfs_attribute::VOLUME_VERSION: 116 | return fmt::format_to(ctx.out(), "VOLUME_VERSION"); 117 | 118 | case ntfs_attribute::SECURITY_DESCRIPTOR: 119 | return fmt::format_to(ctx.out(), "SECURITY_DESCRIPTOR"); 120 | 121 | case ntfs_attribute::VOLUME_NAME: 122 | return fmt::format_to(ctx.out(), "VOLUME_NAME"); 123 | 124 | case ntfs_attribute::VOLUME_INFORMATION: 125 | return fmt::format_to(ctx.out(), "VOLUME_INFORMATION"); 126 | 127 | case ntfs_attribute::DATA: 128 | return fmt::format_to(ctx.out(), "DATA"); 129 | 130 | case ntfs_attribute::INDEX_ROOT: 131 | return fmt::format_to(ctx.out(), "INDEX_ROOT"); 132 | 133 | case ntfs_attribute::INDEX_ALLOCATION: 134 | return fmt::format_to(ctx.out(), "INDEX_ALLOCATION"); 135 | 136 | case ntfs_attribute::BITMAP: 137 | return fmt::format_to(ctx.out(), "BITMAP"); 138 | 139 | case ntfs_attribute::REPARSE_POINT: 140 | return fmt::format_to(ctx.out(), "REPARSE_POINT"); 141 | 142 | case ntfs_attribute::EA_INFORMATION: 143 | return fmt::format_to(ctx.out(), "EA_INFORMATION"); 144 | 145 | case ntfs_attribute::EA: 146 | return fmt::format_to(ctx.out(), "EA"); 147 | 148 | case ntfs_attribute::PROPERTY_SET: 149 | return fmt::format_to(ctx.out(), "PROPERTY_SET"); 150 | 151 | case ntfs_attribute::LOGGED_UTILITY_STREAM: 152 | return fmt::format_to(ctx.out(), "LOGGED_UTILITY_STREAM"); 153 | 154 | default: 155 | return fmt::format_to(ctx.out(), "{:x}", (uint32_t)att); 156 | } 157 | } 158 | }; 159 | 160 | typedef struct _ATTRIBUTE_RECORD_HEADER { 161 | enum ntfs_attribute TypeCode; 162 | uint16_t RecordLength; 163 | uint16_t Unknown; 164 | NTFS_ATTRIBUTE_FORM FormCode; 165 | uint8_t NameLength; 166 | uint16_t NameOffset; 167 | uint16_t Flags; 168 | uint16_t Instance; 169 | union { 170 | struct { 171 | uint32_t ValueLength; 172 | uint16_t ValueOffset; 173 | uint8_t Reserved[2]; 174 | } Resident; 175 | struct { 176 | uint64_t LowestVcn; 177 | uint64_t HighestVcn; 178 | uint16_t MappingPairsOffset; 179 | uint16_t CompressionUnit; 180 | uint32_t Padding; 181 | uint64_t AllocatedLength; 182 | uint64_t FileSize; 183 | uint64_t ValidDataLength; 184 | uint64_t TotalAllocated; 185 | } Nonresident; 186 | } Form; 187 | } ATTRIBUTE_RECORD_HEADER; 188 | 189 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header 190 | typedef struct { 191 | uint32_t Signature; 192 | uint16_t UpdateSequenceArrayOffset; 193 | uint16_t UpdateSequenceArraySize; 194 | } MULTI_SECTOR_HEADER; 195 | 196 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference 197 | typedef struct { 198 | uint64_t SegmentNumber : 48; 199 | uint64_t SequenceNumber : 16; 200 | } MFT_SEGMENT_REFERENCE; 201 | 202 | // based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and 203 | // http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html 204 | typedef struct { 205 | MULTI_SECTOR_HEADER MultiSectorHeader; 206 | uint64_t LogFileSequenceNumber; 207 | uint16_t SequenceNumber; 208 | uint16_t HardLinkCount; 209 | uint16_t FirstAttributeOffset; 210 | uint16_t Flags; 211 | uint32_t EntryUsedSize; 212 | uint32_t EntryAllocatedSize; 213 | MFT_SEGMENT_REFERENCE BaseFileRecordSegment; 214 | uint16_t NextAttributeID; 215 | } FILE_RECORD_SEGMENT_HEADER; 216 | 217 | #define FILE_RECORD_SEGMENT_IN_USE 1 218 | #define FILE_RECORD_IS_DIRECTORY 2 219 | 220 | static const uint32_t NTFS_FILE_SIGNATURE = 0x454c4946; // "FILE" 221 | 222 | #define NTFS_VOLUME_INODE 3 223 | #define NTFS_ROOT_DIR_INODE 5 224 | #define NTFS_BITMAP_INODE 6 225 | #define NTFS_SECURE_INODE 9 226 | 227 | // https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html 228 | 229 | typedef struct { 230 | int64_t CreationTime; 231 | int64_t LastAccessTime; 232 | int64_t LastWriteTime; 233 | int64_t ChangeTime; 234 | uint32_t FileAttributes; 235 | uint32_t MaximumVersions; 236 | uint32_t VersionNumber; 237 | uint32_t ClassId; 238 | uint32_t OwnerId; 239 | uint32_t SecurityId; 240 | uint64_t QuotaCharged; 241 | uint64_t USN; 242 | } STANDARD_INFORMATION; 243 | 244 | #define FILE_ATTRIBUTE_READONLY 0x00000001 245 | #define FILE_ATTRIBUTE_HIDDEN 0x00000002 246 | #define FILE_ATTRIBUTE_SYSTEM 0x00000004 247 | #define FILE_ATTRIBUTE_DIRECTORY 0x00000010 248 | #define FILE_ATTRIBUTE_ARCHIVE 0x00000020 249 | #define FILE_ATTRIBUTE_DEVICE 0x00000040 250 | #define FILE_ATTRIBUTE_NORMAL 0x00000080 251 | #define FILE_ATTRIBUTE_TEMPORARY 0x00000100 252 | #define FILE_ATTRIBUTE_SPARSE_FILE 0x00000200 253 | #define FILE_ATTRIBUTE_REPARSE_POINT 0x00000400 254 | #define FILE_ATTRIBUTE_COMPRESSED 0x00000800 255 | #define FILE_ATTRIBUTE_OFFLINE 0x00001000 256 | #define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x00002000 257 | #define FILE_ATTRIBUTE_ENCRYPTED 0x00004000 258 | #define FILE_ATTRIBUTE_VIRTUAL 0x00010000 259 | 260 | #define FILE_ATTRIBUTE_DIRECTORY_MFT 0x10000000 261 | 262 | // https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html 263 | 264 | enum class file_name_type : uint8_t { 265 | POSIX = 0, 266 | WINDOWS = 1, 267 | DOS = 2, 268 | WINDOWS_AND_DOS = 3 269 | }; 270 | 271 | typedef struct { 272 | MFT_SEGMENT_REFERENCE Parent; 273 | int64_t CreationTime; 274 | int64_t LastAccessTime; 275 | int64_t LastWriteTime; 276 | int64_t ChangeTime; 277 | uint64_t AllocationSize; 278 | uint64_t EndOfFile; 279 | uint32_t FileAttributes; 280 | uint32_t EaSize; 281 | uint8_t FileNameLength; 282 | file_name_type Namespace; 283 | char16_t FileName[1]; 284 | } FILE_NAME; 285 | 286 | // https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html 287 | 288 | typedef struct { 289 | uint32_t first_entry; 290 | uint32_t total_size; 291 | uint32_t allocated_size; 292 | uint32_t flags; 293 | } index_node_header; 294 | 295 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html 296 | 297 | #define INDEX_ENTRY_SUBNODE 1 298 | #define INDEX_ENTRY_LAST 2 299 | 300 | typedef struct { 301 | MFT_SEGMENT_REFERENCE file_reference; 302 | uint16_t entry_length; 303 | uint16_t stream_length; 304 | uint32_t flags; 305 | } index_entry; 306 | 307 | // https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html 308 | 309 | typedef struct { 310 | uint32_t attribute_type; 311 | uint32_t collation_rule; 312 | uint32_t bytes_per_index_record; 313 | uint8_t clusters_per_index_record; 314 | uint8_t padding[3]; 315 | index_node_header node_header; 316 | index_entry entries[1]; 317 | } index_root; 318 | 319 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html 320 | 321 | typedef struct { 322 | MULTI_SECTOR_HEADER MultiSectorHeader; 323 | uint64_t sequence_number; 324 | uint64_t vcn; 325 | index_node_header header; 326 | uint16_t update_sequence; 327 | } index_record; 328 | 329 | #define INDEX_RECORD_MAGIC 0x58444e49 // "INDX" 330 | 331 | // https://flatcap.org/linux-ntfs/ntfs/files/secure.html 332 | 333 | typedef struct { 334 | uint32_t hash; 335 | uint32_t id; 336 | uint64_t offset; 337 | uint32_t length; 338 | } sd_entry; 339 | 340 | // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/ns-ntifs-_reparse_data_buffer 341 | 342 | typedef struct { 343 | uint32_t ReparseTag; 344 | uint16_t ReparseDataLength; 345 | uint16_t Reserved; 346 | 347 | union { 348 | struct { 349 | uint16_t SubstituteNameOffset; 350 | uint16_t SubstituteNameLength; 351 | uint16_t PrintNameOffset; 352 | uint16_t PrintNameLength; 353 | uint32_t Flags; 354 | char16_t PathBuffer[1]; 355 | } SymbolicLinkReparseBuffer; 356 | 357 | struct { 358 | uint16_t SubstituteNameOffset; 359 | uint16_t SubstituteNameLength; 360 | uint16_t PrintNameOffset; 361 | uint16_t PrintNameLength; 362 | char16_t PathBuffer[1]; 363 | } MountPointReparseBuffer; 364 | 365 | struct { 366 | uint8_t DataBuffer[1]; 367 | } GenericReparseBuffer; 368 | 369 | struct { 370 | uint32_t unknown; 371 | char name[1]; 372 | } LxSymlink; // undocumented 373 | }; 374 | } REPARSE_DATA_BUFFER; 375 | 376 | typedef struct { 377 | uint32_t unknown; 378 | char name[1]; 379 | } REPARSE_DATA_BUFFER_LX_SYMLINK; 380 | 381 | #ifndef IO_REPARSE_TAG_SYMLINK 382 | #define IO_REPARSE_TAG_SYMLINK 0xa000000c 383 | #endif 384 | 385 | #define IO_REPARSE_TAG_LX_SYMLINK 0xa000001d 386 | 387 | #ifndef IO_REPARSE_TAG_WOF 388 | #define IO_REPARSE_TAG_WOF 0x80000017 389 | #endif 390 | 391 | #ifndef SYMLINK_FLAG_RELATIVE 392 | #define SYMLINK_FLAG_RELATIVE 0x00000001 393 | #endif 394 | 395 | // https://flatcap.org/linux-ntfs/ntfs/attributes/volume_information.html 396 | 397 | typedef struct { 398 | uint64_t Unknown1; 399 | uint8_t MajorVersion; 400 | uint8_t MinorVersion; 401 | uint16_t Flags; 402 | uint32_t Unknown2; 403 | } VOLUME_INFORMATION; 404 | 405 | #define NTFS_VOLUME_DIRTY 0x0001 406 | #define NTFS_VOLUME_RESIZE_JOURNAL 0x0002 407 | #define NTFS_VOLUME_UPGRADE_ON_MOUNT 0x0004 408 | #define NTFS_VOLUME_MOUNTED_ON_NT4 0x0008 409 | #define NTFS_VOLUME_DELETE_USN_UNDERWAY 0x0010 410 | #define NTFS_VOLUME_REPAIR_OBJECT_IDS 0x0020 411 | #define NTFS_VOLUME_MODIFIED_BY_CHKDSK 0x8000 412 | 413 | // https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html 414 | 415 | typedef struct { 416 | enum ntfs_attribute type; 417 | uint16_t record_length; 418 | uint8_t name_length; 419 | uint8_t name_offset; 420 | uint64_t starting_vcn; 421 | MFT_SEGMENT_REFERENCE file_reference; 422 | uint16_t instance; 423 | } attribute_list_entry; 424 | 425 | #define WOF_CURRENT_VERSION 1 426 | 427 | #define WOF_PROVIDER_WIM 1 428 | #define WOF_PROVIDER_FILE 2 429 | 430 | typedef struct { 431 | uint32_t ReparseTag; 432 | uint16_t ReparseDataLength; 433 | uint16_t Reserved; 434 | uint8_t DataBuffer[1]; 435 | } reparse_point_header; // edited form of REPARSE_DATA_BUFFER 436 | 437 | typedef struct { 438 | uint32_t Version; 439 | uint32_t Provider; 440 | } wof_external_info; // WOF_EXTERNAL_INFO in winioctl.h 441 | 442 | #define FILE_PROVIDER_CURRENT_VERSION 1 443 | 444 | #define FILE_PROVIDER_COMPRESSION_XPRESS4K 0 445 | #define FILE_PROVIDER_COMPRESSION_LZX 1 446 | #define FILE_PROVIDER_COMPRESSION_XPRESS8K 2 447 | #define FILE_PROVIDER_COMPRESSION_XPRESS16K 3 448 | 449 | typedef struct { 450 | uint32_t Version; 451 | uint32_t Algorithm; 452 | } file_provider_external_info_v0; // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h 453 | 454 | // cf. https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ns-wdm-_file_full_ea_information 455 | 456 | typedef struct { 457 | uint32_t NextEntryOffset; 458 | uint8_t Flags; 459 | uint8_t EaNameLength; 460 | uint16_t EaValueLength; 461 | char EaName[1]; 462 | } ea_data; 463 | 464 | typedef struct { 465 | uint32_t major; 466 | uint32_t minor; 467 | } lxdev; 468 | 469 | // https://dfir.ru/2019/01/19/ntfs-today/ 470 | 471 | typedef struct { 472 | uint16_t format; 473 | uint16_t version; 474 | uint32_t mode; 475 | uint32_t uid; 476 | uint32_t gid; 477 | uint32_t rdev; 478 | uint32_t atime_ns; 479 | uint32_t mtime_ns; 480 | uint32_t ctime_ns; 481 | uint64_t atime; 482 | uint64_t mtime; 483 | uint64_t ctime; 484 | } lxattrb; 485 | 486 | #pragma pack(pop) 487 | 488 | class ntfs; 489 | 490 | struct mapping { 491 | mapping(uint64_t lcn, uint64_t vcn, uint64_t length) : lcn(lcn), vcn(vcn), length(length) { } 492 | 493 | uint64_t lcn; 494 | uint64_t vcn; 495 | uint64_t length; 496 | }; 497 | 498 | class ntfs_file { 499 | public: 500 | ntfs_file(ntfs& dev, uint64_t inode); 501 | buffer_t read(uint64_t offset = 0, uint32_t length = 0, enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u""); 502 | std::list read_mappings(enum ntfs_attribute type = ntfs_attribute::DATA, std::u16string_view name = u""); 503 | 504 | bool is_directory() const { 505 | return file_record->Flags & FILE_RECORD_IS_DIRECTORY; 506 | } 507 | 508 | void loop_through_atts(const std::function& func); 509 | std::string get_filename(); 510 | 511 | FILE_RECORD_SEGMENT_HEADER* file_record; 512 | 513 | private: 514 | buffer_t read_nonresident_attribute(uint64_t offset, uint32_t length, const ATTRIBUTE_RECORD_HEADER* att); 515 | 516 | buffer_t file_record_buf; 517 | ntfs& dev; 518 | uint64_t inode; 519 | }; 520 | 521 | class ntfs { 522 | public: 523 | ntfs(const std::string& fn); 524 | 525 | ~ntfs() { 526 | #ifdef _WIN32 527 | CloseHandle(h); 528 | #else 529 | close(fd); 530 | #endif 531 | } 532 | 533 | void seek(uint64_t pos); 534 | void read(uint8_t* buf, size_t length); 535 | void write(const uint8_t* buf, size_t length); 536 | std::string_view find_sd(uint32_t id, ntfs_file& secure); 537 | 538 | std::unique_ptr mft; 539 | buffer_t boot_sector_buf; 540 | NTFS_BOOT_SECTOR* boot_sector; 541 | uint64_t file_record_size; 542 | std::map sd_list; 543 | 544 | #ifdef _WIN32 545 | HANDLE h; 546 | #else 547 | int fd; 548 | #endif 549 | }; 550 | 551 | // ntfs.cpp 552 | void read_nonresident_mappings(const ATTRIBUTE_RECORD_HEADER& att, std::list& mappings, 553 | uint32_t cluster_size, uint64_t vdl); 554 | void populate_skip_list(ntfs& dev, uint64_t inode, std::list& skiplist); 555 | -------------------------------------------------------------------------------- /src/ntfs2btrfs.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2020 2 | * 3 | * This file is part of ntfs2btrfs. 4 | * 5 | * Ntfs2btrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * Ntfs2btrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with Ntfs2btrfs. If not, see . */ 17 | 18 | #pragma once 19 | 20 | #include "btrfs.h" 21 | #include "config.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #ifdef _WIN32 30 | #include 31 | #endif 32 | 33 | #pragma warning(push) 34 | #pragma warning(disable : 26495 26451 26437 26812) 35 | #include 36 | #include 37 | #pragma warning(pop) 38 | 39 | #ifdef _MSC_VER 40 | 41 | #ifdef _M_IX86 42 | #define __i386__ 43 | #elif defined(_M_X64) 44 | #define __x86_64__ 45 | #endif 46 | 47 | #endif 48 | 49 | #ifdef _WIN32 50 | class last_error : public std::exception { 51 | public: 52 | last_error(std::string_view function, int le) { 53 | std::string nice_msg; 54 | 55 | { 56 | char* fm; 57 | 58 | if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, 59 | le, 0, reinterpret_cast(&fm), 0, nullptr)) { 60 | try { 61 | std::string_view s = fm; 62 | 63 | while (!s.empty() && (s[s.length() - 1] == u'\r' || s[s.length() - 1] == u'\n')) { 64 | s.remove_suffix(1); 65 | } 66 | 67 | nice_msg = s; 68 | } catch (...) { 69 | LocalFree(fm); 70 | throw; 71 | } 72 | 73 | LocalFree(fm); 74 | } 75 | } 76 | 77 | msg = std::string(function) + " failed (error " + std::to_string(le) + (!nice_msg.empty() ? (", " + nice_msg) : "") + ")."; 78 | } 79 | 80 | const char* what() const noexcept { 81 | return msg.c_str(); 82 | } 83 | 84 | private: 85 | std::string msg; 86 | }; 87 | 88 | class handle_closer { 89 | public: 90 | typedef HANDLE pointer; 91 | 92 | void operator()(HANDLE h) { 93 | if (h == INVALID_HANDLE_VALUE) 94 | return; 95 | 96 | CloseHandle(h); 97 | } 98 | }; 99 | 100 | typedef std::unique_ptr unique_handle; 101 | #endif 102 | 103 | class _formatted_error : public std::exception { 104 | public: 105 | template 106 | _formatted_error(const T& s, Args&&... args) { 107 | msg = fmt::format(s, std::forward(args)...); 108 | } 109 | 110 | const char* what() const noexcept { 111 | return msg.c_str(); 112 | } 113 | 114 | private: 115 | std::string msg; 116 | }; 117 | 118 | #define formatted_error(s, ...) _formatted_error(FMT_COMPILE(s), ##__VA_ARGS__) 119 | 120 | struct space { 121 | space(uint64_t offset, uint64_t length) : offset(offset), length(length) { } 122 | 123 | uint64_t offset; 124 | uint64_t length; 125 | }; 126 | 127 | struct chunk { 128 | chunk(uint64_t offset, uint64_t length, uint64_t disk_start, uint64_t type) : offset(offset), length(length), disk_start(disk_start), type(type) { } 129 | 130 | uint64_t offset; 131 | uint64_t length; 132 | uint64_t disk_start; 133 | uint64_t type; 134 | std::list space_list; 135 | bool added = false; 136 | uint64_t used = 0; 137 | }; 138 | 139 | struct data_alloc { 140 | data_alloc(uint64_t offset, uint64_t length, uint64_t inode = 0, uint64_t file_offset = 0, bool relocated = false, bool not_in_img = false) : 141 | offset(offset), length(length), inode(inode), file_offset(file_offset), relocated(relocated), not_in_img(not_in_img) { } 142 | 143 | uint64_t offset; 144 | uint64_t length; 145 | uint64_t inode; 146 | uint64_t file_offset; 147 | bool relocated; 148 | bool not_in_img; 149 | }; 150 | 151 | template> 152 | class default_init_allocator : public A { 153 | public: 154 | typedef std::allocator_traits a_t; 155 | 156 | template 157 | struct rebind { 158 | using other = default_init_allocator>; 159 | }; 160 | 161 | using A::A; 162 | 163 | template 164 | void construct(U* ptr) noexcept(std::is_nothrow_default_constructible::value) { 165 | ::new(static_cast(ptr)) U; 166 | } 167 | 168 | template 169 | void construct(U* ptr, Args&&... args) { 170 | a_t::construct(static_cast(*this), ptr, std::forward(args)...); 171 | } 172 | }; 173 | 174 | using buffer_t = std::vector>; 175 | 176 | static bool inline operator<(const KEY& a, const KEY& b) { 177 | if (a.obj_id < b.obj_id) 178 | return true; 179 | else if (a.obj_id > b.obj_id) 180 | return false; 181 | 182 | if (a.obj_type < b.obj_type) 183 | return true; 184 | else if (a.obj_type > b.obj_type) 185 | return false; 186 | 187 | if (a.offset < b.offset) 188 | return true; 189 | 190 | return false; 191 | } 192 | 193 | class ntfs; 194 | 195 | class root { 196 | public: 197 | root(uint64_t id) : id(id) { } 198 | 199 | void create_trees(root& extent_root, enum btrfs_csum_type csum_type); 200 | void write_trees(ntfs& dev); 201 | 202 | uint64_t id; 203 | std::map items; 204 | std::list trees; 205 | uint64_t tree_addr; 206 | uint8_t level; 207 | uint64_t metadata_size = 0; 208 | std::list> addresses, old_addresses; 209 | bool allocations_done = false; 210 | bool readonly = false; 211 | std::map dir_seqs; 212 | std::map dir_size; 213 | }; 214 | 215 | // from sys/stat.h 216 | #define __S_IFMT 0170000 /* These bits determine file type. */ 217 | #define __S_IFDIR 0040000 /* Directory. */ 218 | #define __S_IFCHR 0020000 /* Character device. */ 219 | #define __S_IFBLK 0060000 /* Block device. */ 220 | #define __S_IFREG 0100000 /* Regular file. */ 221 | #define __S_IFIFO 0010000 /* FIFO. */ 222 | #define __S_IFLNK 0120000 /* Symbolic link. */ 223 | #define __S_IFSOCK 0140000 /* Socket. */ 224 | #define __S_ISTYPE(mode, mask) (((mode) & __S_IFMT) == (mask)) 225 | 226 | #ifndef S_ISDIR 227 | #define S_ISDIR(mode) __S_ISTYPE((mode), __S_IFDIR) 228 | #endif 229 | 230 | #ifndef S_IRUSR 231 | #define S_IRUSR 0000400 232 | #endif 233 | 234 | #ifndef S_IWUSR 235 | #define S_IWUSR 0000200 236 | #endif 237 | 238 | #ifndef S_IXUSR 239 | #define S_IXUSR 0000100 240 | #endif 241 | 242 | #ifndef S_IRGRP 243 | #define S_IRGRP (S_IRUSR >> 3) 244 | #endif 245 | 246 | #ifndef S_IWGRP 247 | #define S_IWGRP (S_IWUSR >> 3) 248 | #endif 249 | 250 | #ifndef S_IXGRP 251 | #define S_IXGRP (S_IXUSR >> 3) 252 | #endif 253 | 254 | #ifndef S_IROTH 255 | #define S_IROTH (S_IRGRP >> 3) 256 | #endif 257 | 258 | #ifndef S_IWOTH 259 | #define S_IWOTH (S_IWGRP >> 3) 260 | #endif 261 | 262 | #ifndef S_IXOTH 263 | #define S_IXOTH (S_IXGRP >> 3) 264 | #endif 265 | 266 | #ifndef S_ISUID 267 | #define S_ISUID 0004000 268 | #endif 269 | 270 | #ifndef S_ISGID 271 | #define S_ISGID 0002000 272 | #endif 273 | 274 | #ifndef S_ISVTX 275 | #define S_ISVTX 0001000 276 | #endif 277 | 278 | #pragma pack(push,1) 279 | 280 | typedef struct { 281 | CHUNK_ITEM chunk_item; 282 | CHUNK_ITEM_STRIPE stripe; 283 | } chunk_item_one_stripe; 284 | 285 | typedef struct { 286 | EXTENT_ITEM extent_item; 287 | btrfs_key_type type; 288 | TREE_BLOCK_REF tbr; 289 | } metadata_item; 290 | 291 | typedef struct { 292 | EXTENT_ITEM extent_item; 293 | btrfs_key_type type; 294 | EXTENT_DATA_REF edr; 295 | } data_item; 296 | 297 | typedef struct { 298 | EXTENT_ITEM extent_item; 299 | btrfs_key_type type1; 300 | EXTENT_DATA_REF edr1; 301 | btrfs_key_type type2; 302 | EXTENT_DATA_REF edr2; 303 | } data_item2; 304 | 305 | #pragma pack(pop) 306 | 307 | struct relocation { 308 | relocation(uint64_t old_start, uint64_t length, uint64_t new_start) : old_start(old_start), length(length), new_start(new_start) { } 309 | 310 | uint64_t old_start; 311 | uint64_t length; 312 | uint64_t new_start; 313 | }; 314 | 315 | static inline uint64_t sector_align(uint64_t v, uint64_t s) { 316 | return ((v + s - 1) / s) * s; 317 | } 318 | 319 | template<> 320 | struct fmt::formatter { 321 | constexpr auto parse(format_parse_context& ctx) { 322 | auto it = ctx.begin(); 323 | 324 | if (it != ctx.end() && *it != '}') 325 | throw format_error("invalid format"); 326 | 327 | return it; 328 | } 329 | 330 | template 331 | auto format(enum btrfs_key_type k, format_context& ctx) const { 332 | switch (k) { 333 | case btrfs_key_type::INODE_ITEM: 334 | return fmt::format_to(ctx.out(), "INODE_ITEM"); 335 | case btrfs_key_type::INODE_REF: 336 | return fmt::format_to(ctx.out(), "INODE_REF"); 337 | case btrfs_key_type::INODE_EXTREF: 338 | return fmt::format_to(ctx.out(), "INODE_EXTREF"); 339 | case btrfs_key_type::XATTR_ITEM: 340 | return fmt::format_to(ctx.out(), "XATTR_ITEM"); 341 | case btrfs_key_type::ORPHAN_INODE: 342 | return fmt::format_to(ctx.out(), "ORPHAN_INODE"); 343 | case btrfs_key_type::DIR_ITEM: 344 | return fmt::format_to(ctx.out(), "DIR_ITEM"); 345 | case btrfs_key_type::DIR_INDEX: 346 | return fmt::format_to(ctx.out(), "DIR_INDEX"); 347 | case btrfs_key_type::EXTENT_DATA: 348 | return fmt::format_to(ctx.out(), "EXTENT_DATA"); 349 | case btrfs_key_type::EXTENT_CSUM: 350 | return fmt::format_to(ctx.out(), "EXTENT_CSUM"); 351 | case btrfs_key_type::ROOT_ITEM: 352 | return fmt::format_to(ctx.out(), "ROOT_ITEM"); 353 | case btrfs_key_type::ROOT_BACKREF: 354 | return fmt::format_to(ctx.out(), "ROOT_BACKREF"); 355 | case btrfs_key_type::ROOT_REF: 356 | return fmt::format_to(ctx.out(), "ROOT_REF"); 357 | case btrfs_key_type::EXTENT_ITEM: 358 | return fmt::format_to(ctx.out(), "EXTENT_ITEM"); 359 | case btrfs_key_type::METADATA_ITEM: 360 | return fmt::format_to(ctx.out(), "METADATA_ITEM"); 361 | case btrfs_key_type::TREE_BLOCK_REF: 362 | return fmt::format_to(ctx.out(), "TREE_BLOCK_REF"); 363 | case btrfs_key_type::EXTENT_DATA_REF: 364 | return fmt::format_to(ctx.out(), "EXTENT_DATA_REF"); 365 | case btrfs_key_type::EXTENT_REF_V0: 366 | return fmt::format_to(ctx.out(), "EXTENT_REF_V0"); 367 | case btrfs_key_type::SHARED_BLOCK_REF: 368 | return fmt::format_to(ctx.out(), "SHARED_BLOCK_REF"); 369 | case btrfs_key_type::SHARED_DATA_REF: 370 | return fmt::format_to(ctx.out(), "SHARED_DATA_REF"); 371 | case btrfs_key_type::BLOCK_GROUP_ITEM: 372 | return fmt::format_to(ctx.out(), "BLOCK_GROUP_ITEM"); 373 | case btrfs_key_type::FREE_SPACE_INFO: 374 | return fmt::format_to(ctx.out(), "FREE_SPACE_INFO"); 375 | case btrfs_key_type::FREE_SPACE_EXTENT: 376 | return fmt::format_to(ctx.out(), "FREE_SPACE_EXTENT"); 377 | case btrfs_key_type::FREE_SPACE_BITMAP: 378 | return fmt::format_to(ctx.out(), "FREE_SPACE_BITMAP"); 379 | case btrfs_key_type::DEV_EXTENT: 380 | return fmt::format_to(ctx.out(), "DEV_EXTENT"); 381 | case btrfs_key_type::DEV_ITEM: 382 | return fmt::format_to(ctx.out(), "DEV_ITEM"); 383 | case btrfs_key_type::CHUNK_ITEM: 384 | return fmt::format_to(ctx.out(), "CHUNK_ITEM"); 385 | case btrfs_key_type::TEMP_ITEM: 386 | return fmt::format_to(ctx.out(), "TEMP_ITEM"); 387 | case btrfs_key_type::DEV_STATS: 388 | return fmt::format_to(ctx.out(), "DEV_STATS"); 389 | case btrfs_key_type::SUBVOL_UUID: 390 | return fmt::format_to(ctx.out(), "SUBVOL_UUID"); 391 | case btrfs_key_type::SUBVOL_REC_UUID: 392 | return fmt::format_to(ctx.out(), "SUBVOL_REC_UUID"); 393 | default: 394 | return fmt::format_to(ctx.out(), "{:x}", (uint8_t)k); 395 | } 396 | } 397 | }; 398 | 399 | static const uint64_t image_subvol_id = 0x100; 400 | static const char image_filename[] = "ntfs.img"; 401 | 402 | // decomp.cpp 403 | buffer_t lznt1_decompress(std::string_view compdata, uint32_t size); 404 | buffer_t do_lzx_decompress(std::string_view compdata, uint32_t size); 405 | buffer_t do_xpress_decompress(std::string_view compdata, uint32_t size, uint32_t chunk_size); 406 | 407 | // compress.cpp 408 | #ifdef WITH_ZLIB 409 | std::optional zlib_compress(std::string_view data, uint32_t cluster_size); 410 | #endif 411 | #ifdef WITH_LZO 412 | std::optional lzo_compress(std::string_view data, uint32_t cluster_size); 413 | #endif 414 | #ifdef WITH_ZSTD 415 | std::optional zstd_compress(std::string_view data, uint32_t cluster_size); 416 | #endif 417 | 418 | // sha256.c 419 | extern "C" void calc_sha256(uint8_t* hash, const void* input, size_t len); 420 | 421 | // blake2b-ref.c 422 | extern "C" void blake2b(void *out, size_t outlen, const void* in, size_t inlen); 423 | 424 | // rollback.cpp 425 | void rollback(const std::string& fn); 426 | 427 | // ntfs2btrfs.cpp 428 | std::string utf16_to_utf8(std::u16string_view sv); 429 | -------------------------------------------------------------------------------- /src/rollback.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2021 2 | * 3 | * This file is part of ntfs2btrfs. 4 | * 5 | * Ntfs2btrfs is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * Ntfs2btrfs is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with Ntfs2btrfs. If not, see . */ 17 | 18 | #include "ntfs2btrfs.h" 19 | #include "crc32c.h" 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | 27 | using chunks_t = map; 28 | 29 | #define INCOMPAT_SUPPORTED (BTRFS_INCOMPAT_FLAGS_MIXED_BACKREF | BTRFS_INCOMPAT_FLAGS_DEFAULT_SUBVOL | BTRFS_INCOMPAT_FLAGS_MIXED_GROUPS | \ 30 | BTRFS_INCOMPAT_FLAGS_COMPRESS_LZO | BTRFS_INCOMPAT_FLAGS_BIG_METADATA | BTRFS_INCOMPAT_FLAGS_RAID56 | \ 31 | BTRFS_INCOMPAT_FLAGS_EXTENDED_IREF | BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA | BTRFS_INCOMPAT_FLAGS_NO_HOLES | \ 32 | BTRFS_INCOMPAT_FLAGS_COMPRESS_ZSTD | BTRFS_INCOMPAT_FLAGS_METADATA_UUID | BTRFS_INCOMPAT_FLAGS_RAID1C34) 33 | 34 | class btrfs { 35 | public: 36 | btrfs(const string& fn); 37 | uint64_t find_root_addr(uint64_t root); 38 | bool walk_tree(uint64_t addr, const function& func); 39 | const pair& find_chunk(uint64_t addr); 40 | buffer_t raw_read(uint64_t phys_addr, uint32_t len); 41 | void raw_write(uint64_t phys_addr, const buffer_t& buf); 42 | 43 | private: 44 | superblock read_superblock(); 45 | void read_chunks(); 46 | buffer_t read(uint64_t addr, uint32_t len); 47 | 48 | #ifdef _WIN32 49 | unique_handle h; 50 | bool drive = false; 51 | #else 52 | fstream f; 53 | #endif 54 | superblock sb; 55 | chunks_t chunks; 56 | }; 57 | 58 | 59 | btrfs::btrfs(const string& fn) { 60 | #ifdef _WIN32 61 | DWORD ret; 62 | wstring_convert, char16_t> convert; 63 | u16string namew; 64 | 65 | if ((fn.length() == 2 || fn.length() == 3) && fn[0] >= 'A' && fn[0] <= 'Z' && fn[1] == ':' && (fn.length() == 2 || fn[2] == '\\')) { 66 | namew = u"\\\\.\\X:"; 67 | namew[4] = fn[0]; 68 | drive = true; 69 | } else 70 | namew = convert.from_bytes(fn.data(), fn.data() + fn.length()); 71 | 72 | h.reset(CreateFileW((WCHAR*)namew.c_str(), GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, 73 | nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr)); 74 | 75 | if (h.get() == INVALID_HANDLE_VALUE) 76 | throw last_error("CreateFile", GetLastError()); 77 | 78 | if (drive) { 79 | if (!DeviceIoControl(h.get(), FSCTL_LOCK_VOLUME, nullptr, 0, nullptr, 0, &ret, nullptr)) 80 | throw last_error("FSCTL_LOCK_VOLUME", GetLastError()); 81 | } 82 | #else 83 | f = fstream(fn, ios_base::in | ios_base::out | ios::binary); 84 | 85 | if (!f.good()) 86 | throw formatted_error("Failed to open {}.", fn); 87 | #endif 88 | 89 | sb = read_superblock(); 90 | 91 | read_chunks(); 92 | } 93 | 94 | superblock btrfs::read_superblock() { 95 | optional sb; 96 | uint64_t device_size; 97 | 98 | // find length of volume 99 | 100 | #ifdef _WIN32 101 | if (drive) { 102 | GET_LENGTH_INFORMATION gli; 103 | DWORD ret; 104 | 105 | if (!DeviceIoControl(h.get(), IOCTL_DISK_GET_LENGTH_INFO, nullptr, 0, &gli, sizeof(gli), &ret, nullptr)) 106 | throw last_error("IOCTL_DISK_GET_LENGTH_INFO", GetLastError()); 107 | 108 | device_size = gli.Length.QuadPart; 109 | } else { 110 | LARGE_INTEGER li; 111 | 112 | if (!GetFileSizeEx(h.get(), &li)) 113 | throw last_error("GetFileSizeEx", GetLastError()); 114 | 115 | device_size = li.QuadPart; 116 | } 117 | #else 118 | f.seekg(0, ios::end); 119 | 120 | if (f.fail()) 121 | throw runtime_error("Error seeking to end of device."); 122 | 123 | device_size = f.tellg(); 124 | #endif 125 | 126 | unsigned int i = 0; 127 | while (superblock_addrs[i] != 0 && superblock_addrs[i] + sizeof(superblock) < device_size) { 128 | auto buf = raw_read(superblock_addrs[i], sizeof(superblock)); 129 | 130 | const auto& sb2 = *(superblock*)buf.data(); 131 | 132 | if (sb2.magic != BTRFS_MAGIC) { 133 | i++; 134 | continue; 135 | } 136 | 137 | // FIXME - check checksum 138 | 139 | if (!sb.has_value() || sb2.generation > sb.value().generation) 140 | sb = sb2; 141 | 142 | i++; 143 | } 144 | 145 | if (!sb.has_value()) 146 | throw runtime_error("Not a Btrfs volume."); 147 | 148 | if (sb.value().incompat_flags & ~INCOMPAT_SUPPORTED) 149 | throw formatted_error("Unsupported incompat flags {:x}.", sb.value().incompat_flags & ~INCOMPAT_SUPPORTED); 150 | 151 | return sb.value(); 152 | } 153 | 154 | const pair& btrfs::find_chunk(uint64_t addr) { 155 | for (const auto& c : chunks) { 156 | if (addr < c.first) 157 | continue; 158 | 159 | const auto& ci = *(CHUNK_ITEM*)c.second.data(); 160 | 161 | if (addr < c.first + ci.size) 162 | return c; 163 | } 164 | 165 | throw formatted_error("Could not find chunk for virtual address {:x}.", addr); 166 | } 167 | 168 | buffer_t btrfs::raw_read(uint64_t phys_addr, uint32_t len) { 169 | #ifdef _WIN32 170 | LARGE_INTEGER posli; 171 | 172 | posli.QuadPart = phys_addr; 173 | 174 | if (!SetFilePointerEx(h.get(), posli, nullptr, FILE_BEGIN)) 175 | throw last_error("SetFilePointerEx", GetLastError()); 176 | #else 177 | f.seekg(phys_addr); 178 | 179 | if (f.fail()) 180 | throw formatted_error("Error seeking to {:x}.", phys_addr); 181 | #endif 182 | 183 | buffer_t ret(len); 184 | 185 | #ifdef _WIN32 186 | DWORD read; 187 | 188 | if (!ReadFile(h.get(), ret.data(), (DWORD)len, &read, nullptr)) 189 | throw last_error("ReadFile", GetLastError()); 190 | #else 191 | f.read((char*)ret.data(), ret.size()); 192 | 193 | if (f.fail()) 194 | throw formatted_error("Error reading {:x} bytes at {:x}.", ret.size(), phys_addr); 195 | #endif 196 | 197 | return ret; 198 | } 199 | 200 | void btrfs::raw_write(uint64_t phys_addr, const buffer_t& buf) { 201 | #ifdef _WIN32 202 | LARGE_INTEGER posli; 203 | 204 | posli.QuadPart = phys_addr; 205 | 206 | if (!SetFilePointerEx(h.get(), posli, nullptr, FILE_BEGIN)) 207 | throw last_error("SetFilePointerEx", GetLastError()); 208 | #else 209 | f.seekg(phys_addr); 210 | 211 | if (f.fail()) 212 | throw formatted_error("Error seeking to {:x}.", phys_addr); 213 | #endif 214 | 215 | #ifdef _WIN32 216 | DWORD written; 217 | 218 | if (!WriteFile(h.get(), buf.data(), (DWORD)buf.size(), &written, nullptr)) 219 | throw last_error("WriteFile", GetLastError()); 220 | #else 221 | f.write((char*)buf.data(), buf.size()); 222 | 223 | if (f.fail()) 224 | throw formatted_error("Error writing {:x} bytes at {:x}.", buf.size(), phys_addr); 225 | #endif 226 | } 227 | 228 | buffer_t btrfs::read(uint64_t addr, uint32_t len) { 229 | const auto& cp = find_chunk(addr); 230 | const auto& c = *(CHUNK_ITEM*)cp.second.data(); 231 | 232 | if (c.type & BLOCK_FLAG_RAID0) 233 | throw runtime_error("FIXME - RAID 0"); 234 | else if (c.type & BLOCK_FLAG_RAID1) 235 | throw runtime_error("FIXME - RAID 1"); 236 | else if (c.type & BLOCK_FLAG_DUPLICATE) 237 | throw runtime_error("FIXME - DUPLICATE"); 238 | else if (c.type & BLOCK_FLAG_RAID10) 239 | throw runtime_error("FIXME - RAID10"); 240 | else if (c.type & BLOCK_FLAG_RAID5) 241 | throw runtime_error("FIXME - RAID5"); 242 | else if (c.type & BLOCK_FLAG_RAID6) 243 | throw runtime_error("FIXME - RAID6"); 244 | else if (c.type & BLOCK_FLAG_RAID1C3) 245 | throw runtime_error("FIXME - RAID1C3"); 246 | else if (c.type & BLOCK_FLAG_RAID1C4) 247 | throw runtime_error("FIXME - RAID1C4"); 248 | 249 | // SINGLE 250 | 251 | if (c.num_stripes == 0) 252 | throw runtime_error("CHUNK_ITEM had num_stripes == 0"); 253 | 254 | auto* cis = (CHUNK_ITEM_STRIPE*)(&c + 1); 255 | 256 | if (cis[0].dev_id != sb.dev_item.dev_id) 257 | throw runtime_error("Reading from other device not implemented."); 258 | 259 | return raw_read(addr - cp.first + cis[0].offset, len); 260 | } 261 | 262 | bool btrfs::walk_tree(uint64_t addr, const function& func) { 263 | auto tree = read(addr, sb.node_size); 264 | 265 | // FIXME - check checksum 266 | 267 | auto& th = *(tree_header*)tree.data(); 268 | 269 | // if root is not 0, recurse 270 | if (th.level != 0) { 271 | auto nodes = (internal_node*)(&th + 1); 272 | 273 | for (unsigned int i = 0; i < th.num_items; i++) { 274 | auto ret = walk_tree(nodes[i].address, func); 275 | 276 | if (!ret) 277 | return false; 278 | } 279 | 280 | return true; 281 | } 282 | 283 | auto nodes = (leaf_node*)(&th + 1); 284 | 285 | for (unsigned int i = 0; i < th.num_items; i++) { 286 | const auto& n = nodes[i]; 287 | bool b; 288 | 289 | if (n.size == 0) 290 | b = func(n.key, {}); 291 | else 292 | b = func(n.key, { (char*)&th + sizeof(tree_header) + n.offset, n.size }); 293 | 294 | if (!b) 295 | return false; 296 | } 297 | 298 | return true; 299 | } 300 | 301 | void btrfs::read_chunks() { 302 | auto ptr = (uint8_t*)&sb.sys_chunk_array; 303 | 304 | do { 305 | auto& key = *(KEY*)ptr; 306 | 307 | if (key.obj_type != btrfs_key_type::CHUNK_ITEM) 308 | break; 309 | 310 | auto& ci = *(CHUNK_ITEM*)(ptr + sizeof(key)); 311 | 312 | basic_string_view chunk_item{ptr + sizeof(key), sizeof(ci) + (ci.num_stripes * sizeof(CHUNK_ITEM_STRIPE))}; 313 | 314 | chunks.emplace(key.offset, buffer_t{chunk_item.data(), chunk_item.data() + chunk_item.size()}); 315 | 316 | ptr += sizeof(key) + chunk_item.size(); 317 | } while (ptr < &sb.sys_chunk_array[SYS_CHUNK_ARRAY_SIZE]); 318 | 319 | #if 0 320 | for (const auto& c : chunks) { 321 | fmt::print("{:x}\n", c.first); 322 | 323 | const auto& ci = *(CHUNK_ITEM*)c.second.data(); 324 | 325 | fmt::print(" size {:x}, root_id {:x}, stripe_length {:x}, type {:x}, opt_io_alignment {:x}, opt_io_width {:x}, sector_size {:x}, num_stripes {:x}, sub_stripes {:x}\n", 326 | ci.size, ci.root_id, ci.stripe_length, ci.type, ci.opt_io_alignment, ci.opt_io_width, ci.sector_size, ci.num_stripes, ci.sub_stripes); 327 | 328 | auto* cis = (CHUNK_ITEM_STRIPE*)(&ci + 1); 329 | 330 | for (unsigned int i = 0; i < ci.num_stripes; i++) { 331 | fmt::print(" dev_id {:x}, offset {:x}\n", cis[i].dev_id, cis[i].offset); 332 | } 333 | } 334 | #endif 335 | 336 | chunks_t chunks2; 337 | 338 | walk_tree(sb.chunk_tree_addr, [&](const KEY& key, string_view data) { 339 | if (key.obj_type != btrfs_key_type::CHUNK_ITEM) 340 | return true; 341 | 342 | chunks2.emplace(key.offset, buffer_t{data.data(), data.data() + data.size()}); 343 | 344 | return true; 345 | }); 346 | 347 | chunks.swap(chunks2); 348 | } 349 | 350 | uint64_t btrfs::find_root_addr(uint64_t root) { 351 | optional ret; 352 | 353 | walk_tree(sb.root_tree_addr, [&](const KEY& key, string_view data) { 354 | if (key.obj_id != root || key.obj_type != btrfs_key_type::ROOT_ITEM) 355 | return true; 356 | 357 | const auto& ri = *(ROOT_ITEM*)data.data(); 358 | 359 | ret = ri.block_number; 360 | 361 | return false; 362 | }); 363 | 364 | if (!ret.has_value()) 365 | throw formatted_error("Could not find address for root {:x}.", root); 366 | 367 | return ret.value(); 368 | } 369 | 370 | void rollback(const string& fn) { 371 | btrfs b(fn); 372 | 373 | auto img_root_addr = b.find_root_addr(image_subvol_id); 374 | 375 | // find file called ntfs.img 376 | 377 | uint64_t inode = 0; 378 | uint32_t hash = calc_crc32c(0xfffffffe, (const uint8_t*)image_filename, sizeof(image_filename) - 1); 379 | 380 | b.walk_tree(img_root_addr, [&](const KEY& key, string_view data) { 381 | if (key.obj_id > SUBVOL_ROOT_INODE || (key.obj_id == SUBVOL_ROOT_INODE && key.obj_type > btrfs_key_type::DIR_ITEM)) 382 | return false; 383 | 384 | if (key.obj_id == SUBVOL_ROOT_INODE && key.obj_type == btrfs_key_type::DIR_ITEM && key.offset == hash) { 385 | auto& di = *(DIR_ITEM*)data.data(); 386 | 387 | // FIXME - handle hash collisions 388 | 389 | if (di.n == sizeof(image_filename) - 1 && !memcmp(di.name, image_filename, di.n)) { 390 | if (di.key.obj_type != btrfs_key_type::INODE_ITEM) 391 | throw formatted_error("DIR_ITEM for {} pointed to object type {}, expected INODE_ITEM.", 392 | string_view(di.name, di.n), di.key.obj_type); 393 | 394 | inode = di.key.obj_id; 395 | } 396 | 397 | return false; 398 | } 399 | 400 | return true; 401 | }); 402 | 403 | if (inode == 0) 404 | throw formatted_error("Could not find {} in subvol {:x}.", image_filename, image_subvol_id); 405 | 406 | // parse extent data 407 | 408 | map> extents; 409 | 410 | b.walk_tree(img_root_addr, [&](const KEY& key, string_view data) { 411 | if (key.obj_id > inode || (key.obj_id == inode && key.obj_type > btrfs_key_type::EXTENT_DATA)) 412 | return false; 413 | 414 | if (key.obj_id != inode || key.obj_type != btrfs_key_type::EXTENT_DATA) 415 | return true; 416 | 417 | const auto& ed = *(EXTENT_DATA*)data.data(); 418 | 419 | if (ed.compression != btrfs_compression::none) 420 | throw runtime_error("NTFS image has been compressed, cannot process."); 421 | 422 | if (ed.type == btrfs_extent_type::prealloc) 423 | return true; // treat as if sparse 424 | 425 | if (ed.type == btrfs_extent_type::inline_extent) 426 | throw runtime_error("NTFS image has inline extents, cannot process."); 427 | 428 | if (ed.type != btrfs_extent_type::regular) 429 | throw formatted_error("Unknown extent type {}.", (unsigned int)ed.type); 430 | 431 | const auto& ed2 = *(EXTENT_DATA2*)ed.data; 432 | 433 | if (ed2.address == 0 && ed2.size == 0) 434 | return true; // sparse, skip 435 | 436 | extents.emplace(key.offset, make_pair(ed2.address, ed2.size)); 437 | 438 | return true; 439 | }); 440 | 441 | // resolve logical addresses to physical 442 | 443 | map relocs; 444 | 445 | for (const auto& e : extents) { 446 | auto off = e.first; 447 | auto addr = e.second.first; 448 | auto len = e.second.second; 449 | 450 | auto& c = b.find_chunk(addr); 451 | auto& ci = *(CHUNK_ITEM*)c.second.data(); 452 | 453 | if (ci.type & (BLOCK_FLAG_RAID0 | BLOCK_FLAG_RAID1 | BLOCK_FLAG_DUPLICATE | 454 | BLOCK_FLAG_RAID10 | BLOCK_FLAG_RAID5 | BLOCK_FLAG_RAID6 | 455 | BLOCK_FLAG_RAID1C3 | BLOCK_FLAG_RAID1C4)) { 456 | throw formatted_error("Data chunk {:x} was not SINGLE, cannot process.", 457 | c.first); 458 | } 459 | 460 | auto* cis = (CHUNK_ITEM_STRIPE*)(&ci + 1); 461 | 462 | auto physoff = addr - c.first + cis[0].offset; 463 | 464 | if (off == physoff) // identity map 465 | continue; 466 | 467 | relocs.emplace(off, buffer_t{}); 468 | 469 | auto& r = relocs.at(off); 470 | auto buf = b.raw_read(physoff, (uint32_t)len); // FIXME - check csum? 471 | 472 | r.swap(buf); 473 | } 474 | 475 | for (const auto& r : relocs) { 476 | b.raw_write(r.first, r.second); 477 | } 478 | 479 | // FIXME - TRIM? 480 | 481 | fmt::print("Device successfully rolled back to NTFS.\n"); 482 | } 483 | -------------------------------------------------------------------------------- /src/sha256.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // Public domain code from https://github.com/amosnier/sha-2 5 | 6 | // FIXME - x86 SHA extensions 7 | 8 | #define CHUNK_SIZE 64 9 | #define TOTAL_LEN_LEN 8 10 | 11 | /* 12 | * ABOUT bool: this file does not use bool in order to be as pre-C99 compatible as possible. 13 | */ 14 | 15 | /* 16 | * Comments from pseudo-code at https://en.wikipedia.org/wiki/SHA-2 are reproduced here. 17 | * When useful for clarification, portions of the pseudo-code are reproduced here too. 18 | */ 19 | 20 | /* 21 | * Initialize array of round constants: 22 | * (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311): 23 | */ 24 | static const uint32_t k[] = { 25 | 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 26 | 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 27 | 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 28 | 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 29 | 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 30 | 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 31 | 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, 32 | 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 33 | }; 34 | 35 | struct buffer_state { 36 | const uint8_t * p; 37 | size_t len; 38 | size_t total_len; 39 | int single_one_delivered; /* bool */ 40 | int total_len_delivered; /* bool */ 41 | }; 42 | 43 | static inline uint32_t right_rot(uint32_t value, unsigned int count) 44 | { 45 | /* 46 | * Defined behaviour in standard C for all count where 0 < count < 32, 47 | * which is what we need here. 48 | */ 49 | return value >> count | value << (32 - count); 50 | } 51 | 52 | static void init_buf_state(struct buffer_state * state, const void * input, size_t len) 53 | { 54 | state->p = input; 55 | state->len = len; 56 | state->total_len = len; 57 | state->single_one_delivered = 0; 58 | state->total_len_delivered = 0; 59 | } 60 | 61 | /* Return value: bool */ 62 | static int calc_chunk(uint8_t chunk[CHUNK_SIZE], struct buffer_state * state) 63 | { 64 | size_t space_in_chunk; 65 | 66 | if (state->total_len_delivered) { 67 | return 0; 68 | } 69 | 70 | if (state->len >= CHUNK_SIZE) { 71 | memcpy(chunk, state->p, CHUNK_SIZE); 72 | state->p += CHUNK_SIZE; 73 | state->len -= CHUNK_SIZE; 74 | return 1; 75 | } 76 | 77 | memcpy(chunk, state->p, state->len); 78 | chunk += state->len; 79 | space_in_chunk = CHUNK_SIZE - state->len; 80 | state->p += state->len; 81 | state->len = 0; 82 | 83 | /* If we are here, space_in_chunk is one at minimum. */ 84 | if (!state->single_one_delivered) { 85 | *chunk++ = 0x80; 86 | space_in_chunk -= 1; 87 | state->single_one_delivered = 1; 88 | } 89 | 90 | /* 91 | * Now: 92 | * - either there is enough space left for the total length, and we can conclude, 93 | * - or there is too little space left, and we have to pad the rest of this chunk with zeroes. 94 | * In the latter case, we will conclude at the next invokation of this function. 95 | */ 96 | if (space_in_chunk >= TOTAL_LEN_LEN) { 97 | const size_t left = space_in_chunk - TOTAL_LEN_LEN; 98 | size_t len = state->total_len; 99 | int i; 100 | memset(chunk, 0x00, left); 101 | chunk += left; 102 | 103 | /* Storing of len * 8 as a big endian 64-bit without overflow. */ 104 | chunk[7] = (uint8_t) (len << 3); 105 | len >>= 5; 106 | for (i = 6; i >= 0; i--) { 107 | chunk[i] = (uint8_t) len; 108 | len >>= 8; 109 | } 110 | state->total_len_delivered = 1; 111 | } else { 112 | memset(chunk, 0x00, space_in_chunk); 113 | } 114 | 115 | return 1; 116 | } 117 | 118 | /* 119 | * Limitations: 120 | * - Since input is a pointer in RAM, the data to hash should be in RAM, which could be a problem 121 | * for large data sizes. 122 | * - SHA algorithms theoretically operate on bit strings. However, this implementation has no support 123 | * for bit string lengths that are not multiples of eight, and it really operates on arrays of bytes. 124 | * In particular, the len parameter is a number of bytes. 125 | */ 126 | void calc_sha256(uint8_t* hash, const void* input, size_t len) 127 | { 128 | /* 129 | * Note 1: All integers (expect indexes) are 32-bit unsigned integers and addition is calculated modulo 2^32. 130 | * Note 2: For each round, there is one round constant k[i] and one entry in the message schedule array w[i], 0 = i = 63 131 | * Note 3: The compression function uses 8 working variables, a through h 132 | * Note 4: Big-endian convention is used when expressing the constants in this pseudocode, 133 | * and when parsing message block data from bytes to words, for example, 134 | * the first word of the input message "abc" after padding is 0x61626380 135 | */ 136 | 137 | /* 138 | * Initialize hash values: 139 | * (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19): 140 | */ 141 | uint32_t h[] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; 142 | unsigned i, j; 143 | 144 | /* 512-bit chunks is what we will operate on. */ 145 | uint8_t chunk[64]; 146 | 147 | struct buffer_state state; 148 | 149 | init_buf_state(&state, input, len); 150 | 151 | while (calc_chunk(chunk, &state)) { 152 | uint32_t ah[8]; 153 | 154 | const uint8_t *p = chunk; 155 | 156 | /* Initialize working variables to current hash value: */ 157 | for (i = 0; i < 8; i++) 158 | ah[i] = h[i]; 159 | 160 | /* Compression function main loop: */ 161 | for (i = 0; i < 4; i++) { 162 | /* 163 | * The w-array is really w[64], but since we only need 164 | * 16 of them at a time, we save stack by calculating 165 | * 16 at a time. 166 | * 167 | * This optimization was not there initially and the 168 | * rest of the comments about w[64] are kept in their 169 | * initial state. 170 | */ 171 | 172 | /* 173 | * create a 64-entry message schedule array w[0..63] of 32-bit words 174 | * (The initial values in w[0..63] don't matter, so many implementations zero them here) 175 | * copy chunk into first 16 words w[0..15] of the message schedule array 176 | */ 177 | uint32_t w[16]; 178 | 179 | for (j = 0; j < 16; j++) { 180 | if (i == 0) { 181 | w[j] = (uint32_t) p[0] << 24 | (uint32_t) p[1] << 16 | 182 | (uint32_t) p[2] << 8 | (uint32_t) p[3]; 183 | p += 4; 184 | } else { 185 | /* Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array: */ 186 | const uint32_t s0 = right_rot(w[(j + 1) & 0xf], 7) ^ right_rot(w[(j + 1) & 0xf], 18) ^ (w[(j + 1) & 0xf] >> 3); 187 | const uint32_t s1 = right_rot(w[(j + 14) & 0xf], 17) ^ right_rot(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10); 188 | w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1; 189 | } 190 | const uint32_t s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25); 191 | const uint32_t ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]); 192 | const uint32_t temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j]; 193 | const uint32_t s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22); 194 | const uint32_t maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]); 195 | const uint32_t temp2 = s0 + maj; 196 | 197 | ah[7] = ah[6]; 198 | ah[6] = ah[5]; 199 | ah[5] = ah[4]; 200 | ah[4] = ah[3] + temp1; 201 | ah[3] = ah[2]; 202 | ah[2] = ah[1]; 203 | ah[1] = ah[0]; 204 | ah[0] = temp1 + temp2; 205 | } 206 | } 207 | 208 | /* Add the compressed chunk to the current hash value: */ 209 | for (i = 0; i < 8; i++) 210 | h[i] += ah[i]; 211 | } 212 | 213 | /* Produce the final hash value (big-endian): */ 214 | for (i = 0, j = 0; i < 8; i++) 215 | { 216 | hash[j++] = (uint8_t) (h[i] >> 24); 217 | hash[j++] = (uint8_t) (h[i] >> 16); 218 | hash[j++] = (uint8_t) (h[i] >> 8); 219 | hash[j++] = (uint8_t) h[i]; 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /src/xxhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | xxHash - Extremely Fast Hash algorithm 3 | Header File 4 | Copyright (C) 2012-2016, Yann Collet. 5 | 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are 10 | met: 11 | 12 | * Redistributions of source code must retain the above copyright 13 | notice, this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above 15 | copyright notice, this list of conditions and the following disclaimer 16 | in the documentation and/or other materials provided with the 17 | distribution. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | You can contact the author at : 32 | - xxHash source repository : https://github.com/Cyan4973/xxHash 33 | */ 34 | 35 | /* Notice extracted from xxHash homepage : 36 | 37 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 38 | It also successfully passes all tests from the SMHasher suite. 39 | 40 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 41 | 42 | Name Speed Q.Score Author 43 | xxHash 5.4 GB/s 10 44 | CrapWow 3.2 GB/s 2 Andrew 45 | MumurHash 3a 2.7 GB/s 10 Austin Appleby 46 | SpookyHash 2.0 GB/s 10 Bob Jenkins 47 | SBox 1.4 GB/s 9 Bret Mulvey 48 | Lookup3 1.2 GB/s 9 Bob Jenkins 49 | SuperFastHash 1.2 GB/s 1 Paul Hsieh 50 | CityHash64 1.05 GB/s 10 Pike & Alakuijala 51 | FNV 0.55 GB/s 5 Fowler, Noll, Vo 52 | CRC32 0.43 GB/s 9 53 | MD5-32 0.33 GB/s 10 Ronald L. Rivest 54 | SHA1-32 0.28 GB/s 10 55 | 56 | Q.Score is a measure of quality of the hash function. 57 | It depends on successfully passing SMHasher test set. 58 | 10 is a perfect score. 59 | 60 | A 64-bits version, named XXH64, is available since r35. 61 | It offers much better speed, but for 64-bits applications only. 62 | Name Speed on 64 bits Speed on 32 bits 63 | XXH64 13.8 GB/s 1.9 GB/s 64 | XXH32 6.8 GB/s 6.0 GB/s 65 | */ 66 | 67 | #if defined (__cplusplus) 68 | extern "C" { 69 | #endif 70 | 71 | #ifndef XXHASH_H_5627135585666179 72 | #define XXHASH_H_5627135585666179 1 73 | 74 | 75 | /* **************************** 76 | * Definitions 77 | ******************************/ 78 | #include /* size_t */ 79 | typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 80 | 81 | 82 | /* **************************** 83 | * API modifier 84 | ******************************/ 85 | /** XXH_PRIVATE_API 86 | * This is useful if you want to include xxhash functions in `static` mode 87 | * in order to inline them, and remove their symbol from the public list. 88 | * Methodology : 89 | * #define XXH_PRIVATE_API 90 | * #include "xxhash.h" 91 | * `xxhash.c` is automatically included. 92 | * It's not useful to compile and link it as a separate module anymore. 93 | */ 94 | #ifdef XXH_PRIVATE_API 95 | # ifndef XXH_STATIC_LINKING_ONLY 96 | # define XXH_STATIC_LINKING_ONLY 97 | # endif 98 | # if defined(__GNUC__) 99 | # define XXH_PUBLIC_API static __inline __attribute__((unused)) 100 | # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 101 | # define XXH_PUBLIC_API static inline 102 | # elif defined(_MSC_VER) 103 | # define XXH_PUBLIC_API static __inline 104 | # else 105 | # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ 106 | # endif 107 | #else 108 | # define XXH_PUBLIC_API /* do nothing */ 109 | #endif /* XXH_PRIVATE_API */ 110 | 111 | /*!XXH_NAMESPACE, aka Namespace Emulation : 112 | 113 | If you want to include _and expose_ xxHash functions from within your own library, 114 | but also want to avoid symbol collisions with another library which also includes xxHash, 115 | 116 | you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library 117 | with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). 118 | 119 | Note that no change is required within the calling program as long as it includes `xxhash.h` : 120 | regular symbol name will be automatically translated by this header. 121 | */ 122 | #ifdef XXH_NAMESPACE 123 | # define XXH_CAT(A,B) A##B 124 | # define XXH_NAME2(A,B) XXH_CAT(A,B) 125 | # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 126 | # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 127 | # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 128 | # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 129 | # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 130 | # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 131 | # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 132 | # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 133 | # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 134 | # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 135 | # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 136 | # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 137 | # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 138 | # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) 139 | # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) 140 | # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) 141 | # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) 142 | # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) 143 | # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) 144 | #endif 145 | 146 | 147 | /* ************************************* 148 | * Version 149 | ***************************************/ 150 | #define XXH_VERSION_MAJOR 0 151 | #define XXH_VERSION_MINOR 6 152 | #define XXH_VERSION_RELEASE 2 153 | #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 154 | XXH_PUBLIC_API unsigned XXH_versionNumber (void); 155 | 156 | 157 | /* **************************** 158 | * Simple Hash Functions 159 | ******************************/ 160 | typedef unsigned int XXH32_hash_t; 161 | typedef unsigned long long XXH64_hash_t; 162 | 163 | XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); 164 | XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 165 | 166 | /*! 167 | XXH32() : 168 | Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 169 | The memory between input & input+length must be valid (allocated and read-accessible). 170 | "seed" can be used to alter the result predictably. 171 | Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 172 | XXH64() : 173 | Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 174 | "seed" can be used to alter the result predictably. 175 | This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 176 | */ 177 | 178 | 179 | /* **************************** 180 | * Streaming Hash Functions 181 | ******************************/ 182 | typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ 183 | typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 184 | 185 | /*! State allocation, compatible with dynamic libraries */ 186 | 187 | XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); 188 | XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 189 | 190 | XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 191 | XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 192 | 193 | 194 | /* hash streaming */ 195 | 196 | XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); 197 | XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 198 | XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 199 | 200 | XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 201 | XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 202 | XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 203 | 204 | /* 205 | These functions generate the xxHash of an input provided in multiple segments. 206 | Note that, for small input, they are slower than single-call functions, due to state management. 207 | For small input, prefer `XXH32()` and `XXH64()` . 208 | 209 | XXH state must first be allocated, using XXH*_createState() . 210 | 211 | Start a new hash by initializing state with a seed, using XXH*_reset(). 212 | 213 | Then, feed the hash state by calling XXH*_update() as many times as necessary. 214 | Obviously, input must be allocated and read accessible. 215 | The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 216 | 217 | Finally, a hash value can be produced anytime, by using XXH*_digest(). 218 | This function returns the nn-bits hash as an int or long long. 219 | 220 | It's still possible to continue inserting input into the hash state after a digest, 221 | and generate some new hashes later on, by calling again XXH*_digest(). 222 | 223 | When done, free XXH state space if it was allocated dynamically. 224 | */ 225 | 226 | 227 | /* ************************** 228 | * Utils 229 | ****************************/ 230 | #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ 231 | # define restrict /* disable restrict */ 232 | #endif 233 | 234 | XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); 235 | XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); 236 | 237 | 238 | /* ************************** 239 | * Canonical representation 240 | ****************************/ 241 | /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. 242 | * The canonical representation uses human-readable write convention, aka big-endian (large digits first). 243 | * These functions allow transformation of hash result into and from its canonical format. 244 | * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. 245 | */ 246 | typedef struct { unsigned char digest[4]; } XXH32_canonical_t; 247 | typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 248 | 249 | XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 250 | XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 251 | 252 | XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 253 | XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 254 | 255 | #endif /* XXHASH_H_5627135585666179 */ 256 | 257 | 258 | 259 | /* ================================================================================================ 260 | This section contains definitions which are not guaranteed to remain stable. 261 | They may change in future versions, becoming incompatible with a different version of the library. 262 | They shall only be used with static linking. 263 | Never use these definitions in association with dynamic linking ! 264 | =================================================================================================== */ 265 | #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) 266 | #define XXH_STATIC_H_3543687687345 267 | 268 | /* These definitions are only meant to allow allocation of XXH state 269 | statically, on stack, or in a struct for example. 270 | Do not use members directly. */ 271 | 272 | struct XXH32_state_s { 273 | unsigned total_len_32; 274 | unsigned large_len; 275 | unsigned v1; 276 | unsigned v2; 277 | unsigned v3; 278 | unsigned v4; 279 | unsigned mem32[4]; /* buffer defined as U32 for alignment */ 280 | unsigned memsize; 281 | unsigned reserved; /* never read nor write, will be removed in a future version */ 282 | }; /* typedef'd to XXH32_state_t */ 283 | 284 | struct XXH64_state_s { 285 | unsigned long long total_len; 286 | unsigned long long v1; 287 | unsigned long long v2; 288 | unsigned long long v3; 289 | unsigned long long v4; 290 | unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 291 | unsigned memsize; 292 | unsigned reserved[2]; /* never read nor write, will be removed in a future version */ 293 | }; /* typedef'd to XXH64_state_t */ 294 | 295 | 296 | # ifdef XXH_PRIVATE_API 297 | # include "xxhash.c" /* include xxhash functions as `static`, for inlining */ 298 | # endif 299 | 300 | #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ 301 | 302 | 303 | #if defined (__cplusplus) 304 | } 305 | #endif 306 | --------------------------------------------------------------------------------