├── .github └── workflows │ ├── build-amd64.yml │ └── build-x86.yml ├── CMakeLists.txt ├── LICENCE ├── README.md ├── mingw-amd64.cmake ├── mingw-x86.cmake └── src ├── ebiggers ├── common_defs.h ├── decompress_common.c ├── decompress_common.h ├── lzx_common.c ├── lzx_common.h ├── lzx_constants.h ├── lzx_decompress.c ├── system_compression.h └── xpress_decompress.c ├── misc.cpp ├── misc.h ├── ntfs.cpp ├── ntfs.h └── quibbleproto.h /.github/workflows/build-amd64.yml: -------------------------------------------------------------------------------- 1 | name: build amd64 2 | on: [push] 3 | jobs: 4 | amd64: 5 | runs-on: ubuntu-rolling 6 | steps: 7 | - run: apt-get update 8 | - run: apt-get install -y g++ git cmake nodejs g++-mingw-w64-x86-64 gnu-efi 9 | - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV 10 | - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA} 11 | - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }} 12 | - run: mkdir -p install/debug 13 | - run: | 14 | cmake -DCMAKE_BUILD_TYPE=Debug \ 15 | -DCMAKE_TOOLCHAIN_FILE=mingw-amd64.cmake \ 16 | -S ${SHORT_SHA} -B debug-work && \ 17 | cmake --build debug-work --parallel `nproc` && \ 18 | cp debug-work/ntfs.efi install/debug/ 19 | - run: | 20 | cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ 21 | -DCMAKE_TOOLCHAIN_FILE=mingw-amd64.cmake \ 22 | -S ${SHORT_SHA} -B release-work && \ 23 | cmake --build release-work --parallel `nproc` && \ 24 | cp release-work/ntfs.efi install/ 25 | - uses: actions/upload-artifact@v3 26 | with: 27 | name: ${{ github.sha }} 28 | overwrite: true 29 | path: | 30 | install 31 | -------------------------------------------------------------------------------- /.github/workflows/build-x86.yml: -------------------------------------------------------------------------------- 1 | name: build x86 2 | on: [push] 3 | jobs: 4 | x86: 5 | runs-on: ubuntu-rolling 6 | steps: 7 | - run: apt-get update 8 | - run: apt-get install -y g++ git cmake nodejs g++-mingw-w64-i686 gnu-efi 9 | - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV 10 | - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA} 11 | - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }} 12 | - run: mkdir -p install/debug 13 | - run: | 14 | cmake -DCMAKE_BUILD_TYPE=Debug \ 15 | -DCMAKE_TOOLCHAIN_FILE=mingw-x86.cmake \ 16 | -S ${SHORT_SHA} -B debug-work && \ 17 | cmake --build debug-work --parallel `nproc` && \ 18 | cp debug-work/ntfs.efi install/debug/ 19 | - run: | 20 | cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ 21 | -DCMAKE_TOOLCHAIN_FILE=mingw-x86.cmake \ 22 | -S ${SHORT_SHA} -B release-work && \ 23 | cmake --build release-work --parallel `nproc` && \ 24 | cp release-work/ntfs.efi install/ 25 | - uses: actions/upload-artifact@v3 26 | with: 27 | name: ${{ github.sha }} 28 | overwrite: true 29 | path: | 30 | install 31 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | 3 | project(ntfs-uefi) 4 | 5 | set(CMAKE_CXX_STANDARD 20) 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 7 | 8 | if(MSVC) 9 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /ENTRY:efi_main") 10 | add_compile_options("/GS-") 11 | string(REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") 12 | string(REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}") 13 | 14 | # work around bug in Visual Studio 15 | if (${MSVC_CXX_ARCHITECTURE_ID} STREQUAL "X86") 16 | set(CMAKE_SYSTEM_PROCESSOR "X86") 17 | endif() 18 | else() 19 | add_compile_options(-fno-stack-check -fno-stack-protector -mno-stack-arg-probe) 20 | endif() 21 | 22 | include_directories(/usr/include/efi) 23 | if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") 24 | include_directories(/usr/include/efi/x86_64) 25 | elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "X86") 26 | include_directories(/usr/include/efi/ia32) 27 | endif() 28 | 29 | set(SRC_FILES src/ntfs.cpp 30 | src/misc.cpp 31 | src/ebiggers/decompress_common.c 32 | src/ebiggers/lzx_common.c 33 | src/ebiggers/lzx_decompress.c 34 | src/ebiggers/xpress_decompress.c 35 | ) 36 | 37 | add_executable(ntfs ${SRC_FILES}) 38 | 39 | set_target_properties(ntfs PROPERTIES SUFFIX ".efi") 40 | 41 | if(${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") 42 | target_compile_options(ntfs PRIVATE "-ffreestanding") 43 | target_compile_options(ntfs PRIVATE "-fno-stack-protector") 44 | target_compile_options(ntfs PRIVATE "-fno-stack-check") 45 | target_compile_options(ntfs PRIVATE "-mno-stack-arg-probe") 46 | 47 | target_link_options(ntfs PRIVATE "-nostartfiles") 48 | target_link_options(ntfs PRIVATE "-shared") 49 | 50 | if(${CMAKE_C_COMPILER_ID} STREQUAL "Clang") 51 | target_link_options(ntfs PRIVATE "-Wl,--subsystem,efi_boot_service_driver") 52 | else() 53 | target_link_options(ntfs PRIVATE "-Wl,--subsystem,11") 54 | endif() 55 | 56 | if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "X86") 57 | target_link_options(ntfs PRIVATE "-e_efi_main") 58 | else() 59 | target_link_options(ntfs PRIVATE "-eefi_main") 60 | endif() 61 | elseif(MSVC) 62 | target_link_options(ntfs PRIVATE "/SUBSYSTEM:EFI_BOOT_SERVICE_DRIVER") 63 | target_compile_options(ntfs PRIVATE "/Oi-") 64 | endif() 65 | 66 | target_compile_options(ntfs PRIVATE 67 | $<$,$,$>: 68 | -Wall -Wextra -Wno-address-of-packed-member -Werror=pointer-arith -fno-exceptions> 69 | $<$: 70 | /W4 /Oi->) 71 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ntfs-efi 2 | --------- 3 | 4 | ntfs-efi is an NTFS filesystem driver for EFI. It is intended for use with the free 5 | Windows bootloader [Quibble](https://github.com/maharmstone/quibble), but you 6 | should be able to use it for anything EFI-related. 7 | 8 | Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's 9 | "WOF compressed data", and whose code I've used here. 10 | 11 | Changelog 12 | --------- 13 | 14 | * 20231107 15 | * Fixed memcpy miscompilation bug 16 | 17 | * 20230328 18 | * Initial release 19 | 20 | To do 21 | ----- 22 | 23 | * LZX WOF compression 24 | * LZNT1 compression 25 | * Hide special files in root 26 | * Free space, volume label, etc. 27 | * Symlinks 28 | * Case-sensitive directories 29 | -------------------------------------------------------------------------------- /mingw-amd64.cmake: -------------------------------------------------------------------------------- 1 | SET(CMAKE_SYSTEM_NAME Windows) 2 | 3 | SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) 4 | SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++) 5 | SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) 6 | SET(CMAKE_SYSTEM_PROCESSOR "AMD64") 7 | 8 | set(CMAKE_EXE_LINKER_FLAGS "-static") 9 | 10 | SET(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32) 11 | 12 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 13 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 14 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 15 | 16 | -------------------------------------------------------------------------------- /mingw-x86.cmake: -------------------------------------------------------------------------------- 1 | SET(CMAKE_SYSTEM_NAME Windows) 2 | 3 | SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc) 4 | SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++) 5 | SET(CMAKE_SYSTEM_PROCESSOR X86) 6 | 7 | set(CMAKE_EXE_LINKER_FLAGS "-static") 8 | 9 | SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32) 10 | 11 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 12 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 13 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 14 | 15 | -------------------------------------------------------------------------------- /src/ebiggers/common_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMON_DEFS_H 2 | #define _COMMON_DEFS_H 3 | 4 | #include 5 | 6 | /* ========================================================================== */ 7 | /* Type definitions */ 8 | /* ========================================================================== */ 9 | 10 | /* 11 | * Type of a machine word. 'unsigned long' would be logical, but that is only 12 | * 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best 13 | * we can do without a bunch of #ifdefs appears to be 'size_t'. 14 | */ 15 | typedef size_t machine_word_t; 16 | 17 | #define WORDBYTES sizeof(machine_word_t) 18 | #define WORDBITS (8 * WORDBYTES) 19 | 20 | /* ========================================================================== */ 21 | /* Compiler-specific definitions */ 22 | /* ========================================================================== */ 23 | 24 | #ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */ 25 | # define forceinline inline __attribute__((always_inline)) 26 | # define likely(expr) __builtin_expect(!!(expr), 1) 27 | # define unlikely(expr) __builtin_expect(!!(expr), 0) 28 | # define _aligned_attribute(n) __attribute__((aligned(n))) 29 | # define bsr32(n) (31 - __builtin_clz(n)) 30 | # define bsr64(n) (63 - __builtin_clzll(n)) 31 | # define bsf32(n) __builtin_ctz(n) 32 | # define bsf64(n) __builtin_ctzll(n) 33 | # ifndef min 34 | # define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ 35 | (_a < _b) ? _a : _b; }) 36 | # endif 37 | # ifndef max 38 | # define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ 39 | (_a > _b) ? _a : _b; }) 40 | # endif 41 | 42 | # define DEFINE_UNALIGNED_TYPE(type) \ 43 | struct type##_unaligned { \ 44 | type v; \ 45 | } __attribute__((packed)); \ 46 | \ 47 | static inline type \ 48 | load_##type##_unaligned(const void *p) \ 49 | { \ 50 | return ((const struct type##_unaligned *)p)->v; \ 51 | } \ 52 | \ 53 | static inline void \ 54 | store_##type##_unaligned(type val, void *p) \ 55 | { \ 56 | ((struct type##_unaligned *)p)->v = val; \ 57 | } 58 | 59 | #endif /* __GNUC__ */ 60 | 61 | /* Declare that the annotated function should always be inlined. This might be 62 | * desirable in highly tuned code, e.g. compression codecs */ 63 | #ifndef forceinline 64 | # define forceinline inline 65 | #endif 66 | 67 | /* Hint that the expression is usually true */ 68 | #ifndef likely 69 | # define likely(expr) (expr) 70 | #endif 71 | 72 | /* Hint that the expression is usually false */ 73 | #ifndef unlikely 74 | # define unlikely(expr) (expr) 75 | #endif 76 | 77 | /* Declare that the annotated variable, or variables of the annotated type, are 78 | * to be aligned on n-byte boundaries */ 79 | #ifndef _aligned_attribute 80 | # define _aligned_attribute(n) 81 | #endif 82 | 83 | /* min() and max() macros */ 84 | #ifndef min 85 | # define min(a, b) ((a) < (b) ? (a) : (b)) 86 | #endif 87 | #ifndef max 88 | # define max(a, b) ((a) > (b) ? (a) : (b)) 89 | #endif 90 | 91 | /* STATIC_ASSERT() - verify the truth of an expression at compilation time */ 92 | #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) 93 | 94 | /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses 95 | * can be performed efficiently on the target platform. */ 96 | #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) 97 | # define UNALIGNED_ACCESS_IS_FAST 1 98 | #else 99 | # define UNALIGNED_ACCESS_IS_FAST 0 100 | #endif 101 | 102 | /* 103 | * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type', 104 | * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions 105 | * which load and store variables of type 'type' from/to unaligned memory 106 | * addresses. 107 | */ 108 | #ifndef DEFINE_UNALIGNED_TYPE 109 | 110 | #include 111 | /* 112 | * Although memcpy() may seem inefficient, it *usually* gets optimized 113 | * appropriately by modern compilers. It's portable and may be the best we can 114 | * do for a fallback... 115 | */ 116 | #define DEFINE_UNALIGNED_TYPE(type) \ 117 | \ 118 | static forceinline type \ 119 | load_##type##_unaligned(const void *p) \ 120 | { \ 121 | type v; \ 122 | memcpy(&v, p, sizeof(v)); \ 123 | return v; \ 124 | } \ 125 | \ 126 | static forceinline void \ 127 | store_##type##_unaligned(type v, void *p) \ 128 | { \ 129 | memcpy(p, &v, sizeof(v)); \ 130 | } 131 | 132 | #endif /* !DEFINE_UNALIGNED_TYPE */ 133 | 134 | 135 | /* ========================================================================== */ 136 | /* Unaligned memory accesses */ 137 | /* ========================================================================== */ 138 | 139 | #define load_word_unaligned load_machine_word_t_unaligned 140 | #define store_word_unaligned store_machine_word_t_unaligned 141 | 142 | /* ========================================================================== */ 143 | /* Bit scan functions */ 144 | /* ========================================================================== */ 145 | 146 | /* 147 | * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least 148 | * significant end) of the *most* significant 1 bit in the input value. The 149 | * input value must be nonzero! 150 | */ 151 | 152 | #ifndef bsr32 153 | static forceinline unsigned 154 | bsr32(uint32_t v) 155 | { 156 | unsigned bit = 0; 157 | while ((v >>= 1) != 0) 158 | bit++; 159 | return bit; 160 | } 161 | #endif 162 | 163 | #ifndef bsr64 164 | static forceinline unsigned 165 | bsr64(uint64_t v) 166 | { 167 | unsigned bit = 0; 168 | while ((v >>= 1) != 0) 169 | bit++; 170 | return bit; 171 | } 172 | #endif 173 | 174 | static forceinline unsigned 175 | bsrw(machine_word_t v) 176 | { 177 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 178 | if (WORDBITS == 32) 179 | return bsr32(v); 180 | else 181 | return bsr64(v); 182 | } 183 | 184 | /* 185 | * Bit Scan Forward (BSF) - find the 0-based index (relative to the least 186 | * significant end) of the *least* significant 1 bit in the input value. The 187 | * input value must be nonzero! 188 | */ 189 | 190 | #ifndef bsf32 191 | static forceinline unsigned 192 | bsf32(uint32_t v) 193 | { 194 | unsigned bit; 195 | for (bit = 0; !(v & 1); bit++, v >>= 1) 196 | ; 197 | return bit; 198 | } 199 | #endif 200 | 201 | #ifndef bsf64 202 | static forceinline unsigned 203 | bsf64(uint64_t v) 204 | { 205 | unsigned bit; 206 | for (bit = 0; !(v & 1); bit++, v >>= 1) 207 | ; 208 | return bit; 209 | } 210 | #endif 211 | 212 | static forceinline unsigned 213 | bsfw(machine_word_t v) 214 | { 215 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 216 | if (WORDBITS == 32) 217 | return bsf32(v); 218 | else 219 | return bsf64(v); 220 | } 221 | 222 | /* Return the log base 2 of 'n', rounded up to the nearest integer. */ 223 | static forceinline unsigned 224 | ilog2_ceil(size_t n) 225 | { 226 | if (n <= 1) 227 | return 0; 228 | return 1 + bsrw(n - 1); 229 | } 230 | 231 | #endif /* _COMMON_DEFS_H */ 232 | -------------------------------------------------------------------------------- /src/ebiggers/decompress_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * decompress_common.c 3 | * 4 | * Code for decompression shared among multiple compression formats. 5 | * 6 | * The following copying information applies to this specific source code file: 7 | * 8 | * Written in 2012-2016 by Eric Biggers 9 | * 10 | * To the extent possible under law, the author(s) have dedicated all copyright 11 | * and related and neighboring rights to this software to the public domain 12 | * worldwide via the Creative Commons Zero 1.0 Universal Public Domain 13 | * Dedication (the "CC0"). 14 | * 15 | * This software is distributed in the hope that it will be useful, but WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | * FOR A PARTICULAR PURPOSE. See the CC0 for more details. 18 | * 19 | * You should have received a copy of the CC0 along with this software; if not 20 | * see . 21 | */ 22 | 23 | #ifdef HAVE_CONFIG_H 24 | # include "config.h" 25 | #endif 26 | 27 | #include 28 | 29 | #ifdef __SSE2__ 30 | # include 31 | #endif 32 | 33 | #include "decompress_common.h" 34 | 35 | /* 36 | * make_huffman_decode_table() - 37 | * 38 | * Given an alphabet of symbols and the length of each symbol's codeword in a 39 | * canonical prefix code, build a table for quickly decoding symbols that were 40 | * encoded with that code. 41 | * 42 | * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols 43 | * such that no whole codeword is a prefix of any other. A prefix code might be 44 | * a _Huffman code_, which means that it is an optimum prefix code for a given 45 | * list of symbol frequencies and was generated by the Huffman algorithm. 46 | * Although the prefix codes processed here will ordinarily be "Huffman codes", 47 | * strictly speaking the decoder cannot know whether a given code was actually 48 | * generated by the Huffman algorithm or not. 49 | * 50 | * A prefix code is _canonical_ if and only if a longer codeword never 51 | * lexicographically precedes a shorter codeword, and the lexicographic ordering 52 | * of codewords of equal length is the same as the lexicographic ordering of the 53 | * corresponding symbols. The advantage of using a canonical prefix code is 54 | * that the codewords can be reconstructed from only the symbol => codeword 55 | * length mapping. This eliminates the need to transmit the codewords 56 | * explicitly. Instead, they can be enumerated in lexicographic order after 57 | * sorting the symbols primarily by increasing codeword length and secondarily 58 | * by increasing symbol value. 59 | * 60 | * However, the decoder's real goal is to decode symbols with the code, not just 61 | * generate the list of codewords. Consequently, this function directly builds 62 | * a table for efficiently decoding symbols using the code. The basic idea is 63 | * that given the next 'max_codeword_len' bits of input, the decoder can look up 64 | * the next decoded symbol by indexing a table containing '2^max_codeword_len' 65 | * entries. A codeword with length 'max_codeword_len' will have exactly one 66 | * entry in this table, whereas a codeword shorter than 'max_codeword_len' will 67 | * have multiple entries in this table. Precisely, a codeword of length 'n' 68 | * will have '2^(max_codeword_len - n)' entries. The index of each such entry, 69 | * considered as a bitstring of length 'max_codeword_len', will contain the 70 | * corresponding codeword as a prefix. 71 | * 72 | * That's the basic idea, but we extend it in two ways: 73 | * 74 | * - Often the maximum codeword length is too long for it to be efficient to 75 | * build the full decode table whenever a new code is used. Instead, we build 76 | * a "root" table using only '2^table_bits' entries, where 'table_bits <= 77 | * max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a 78 | * symbol directly (for codewords not longer than 'table_bits'), or the index 79 | * of a subtable which must be indexed with additional bits of input to fully 80 | * decode the symbol (for codewords longer than 'table_bits'). 81 | * 82 | * - Whenever the decoder decodes a symbol, it needs to know the codeword length 83 | * so that it can remove the appropriate number of input bits. The obvious 84 | * solution would be to simply retain the codeword lengths array and use the 85 | * decoded symbol as an index into it. However, that would require two array 86 | * accesses when decoding each symbol. Our strategy is to instead store the 87 | * codeword length directly in the decode table entry along with the symbol. 88 | * 89 | * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table 90 | * entries, and see read_huffsym() for full details on how symbols are decoded. 91 | * 92 | * @decode_table: 93 | * The array in which to build the decode table. This must have been 94 | * declared by the DECODE_TABLE() macro. This may alias @lens, since all 95 | * @lens are consumed before the decode table is written to. 96 | * 97 | * @num_syms: 98 | * The number of symbols in the alphabet. 99 | * 100 | * @table_bits: 101 | * The log base 2 of the number of entries in the root table. 102 | * 103 | * @lens: 104 | * An array of length @num_syms, indexed by symbol, that gives the length 105 | * of the codeword, in bits, for each symbol. The length can be 0, which 106 | * means that the symbol does not have a codeword assigned. In addition, 107 | * @lens may alias @decode_table, as noted above. 108 | * 109 | * @max_codeword_len: 110 | * The maximum codeword length permitted for this code. All entries in 111 | * 'lens' must be less than or equal to this value. 112 | * 113 | * @working_space 114 | * A temporary array that was declared with DECODE_TABLE_WORKING_SPACE(). 115 | * 116 | * Returns 0 on success, or -1 if the lengths do not form a valid prefix code. 117 | */ 118 | int 119 | make_huffman_decode_table(uint16_t decode_table[], unsigned num_syms, 120 | unsigned table_bits, const uint8_t lens[], 121 | unsigned max_codeword_len, uint16_t working_space[]) 122 | { 123 | uint16_t * const len_counts = &working_space[0]; 124 | uint16_t * const offsets = &working_space[1 * (max_codeword_len + 1)]; 125 | uint16_t * const sorted_syms = &working_space[2 * (max_codeword_len + 1)]; 126 | int32_t remainder = 1; 127 | uint8_t *entry_ptr = (uint8_t*)decode_table; 128 | unsigned codeword_len = 1; 129 | unsigned sym_idx; 130 | unsigned codeword; 131 | unsigned subtable_pos; 132 | unsigned subtable_bits; 133 | unsigned subtable_prefix; 134 | 135 | /* Count how many codewords have each length, including 0. */ 136 | for (unsigned len = 0; len <= max_codeword_len; len++) 137 | len_counts[len] = 0; 138 | for (unsigned sym = 0; sym < num_syms; sym++) 139 | len_counts[lens[sym]]++; 140 | 141 | /* It is already guaranteed that all lengths are <= max_codeword_len, 142 | * but it cannot be assumed they form a complete prefix code. A 143 | * codeword of length n should require a proportion of the codespace 144 | * equaling (1/2)^n. The code is complete if and only if, by this 145 | * measure, the codespace is exactly filled by the lengths. */ 146 | for (unsigned len = 1; len <= max_codeword_len; len++) { 147 | remainder = (remainder << 1) - len_counts[len]; 148 | /* Do the lengths overflow the codespace? */ 149 | if (unlikely(remainder < 0)) 150 | return -1; 151 | } 152 | 153 | if (remainder != 0) { 154 | /* The lengths do not fill the codespace; that is, they form an 155 | * incomplete code. This is permitted only if the code is empty 156 | * (contains no symbols). */ 157 | 158 | if (unlikely(remainder != 1U << max_codeword_len)) 159 | return -1; 160 | 161 | /* The code is empty. When processing a well-formed stream, the 162 | * decode table need not be initialized in this case. However, 163 | * we cannot assume the stream is well-formed, so we must 164 | * initialize the decode table anyway. Setting all entries to 0 165 | * makes the decode table always produce symbol '0' without 166 | * consuming any bits, which is good enough. */ 167 | memset(decode_table, 0, sizeof(decode_table[0]) << table_bits); 168 | return 0; 169 | } 170 | 171 | /* Sort the symbols primarily by increasing codeword length and 172 | * secondarily by increasing symbol value. */ 173 | 174 | /* Initialize 'offsets' so that 'offsets[len]' is the number of 175 | * codewords shorter than 'len' bits, including length 0. */ 176 | offsets[0] = 0; 177 | for (unsigned len = 0; len < max_codeword_len; len++) 178 | offsets[len + 1] = offsets[len] + len_counts[len]; 179 | 180 | /* Use the 'offsets' array to sort the symbols. */ 181 | for (unsigned sym = 0; sym < num_syms; sym++) 182 | sorted_syms[offsets[lens[sym]]++] = sym; 183 | 184 | /* 185 | * Fill the root table entries for codewords no longer than table_bits. 186 | * 187 | * The table will start with entries for the shortest codeword(s), which 188 | * will have the most entries. From there, the number of entries per 189 | * codeword will decrease. As an optimization, we may begin filling 190 | * entries with SSE2 vector accesses (8 entries/store), then change to 191 | * word accesses (2 or 4 entries/store), then change to 16-bit accesses 192 | * (1 entry/store). 193 | */ 194 | sym_idx = offsets[0]; 195 | 196 | #ifdef __SSE2__ 197 | /* Fill entries one 128-bit vector (8 entries) at a time. */ 198 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / 199 | (sizeof(__m128i) / sizeof(decode_table[0])); 200 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 201 | { 202 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 203 | for (; sym_idx < end_sym_idx; sym_idx++) { 204 | /* Note: unlike in the "word" version below, the __m128i 205 | * type already has __attribute__((may_alias)), so using 206 | * it to access an array of u16 will not violate strict 207 | * aliasing. */ 208 | __m128i v = _mm_set1_epi16( 209 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 210 | codeword_len)); 211 | unsigned n = stores_per_loop; 212 | do { 213 | *(__m128i *)entry_ptr = v; 214 | entry_ptr += sizeof(v); 215 | } while (--n); 216 | } 217 | } 218 | #endif /* __SSE2__ */ 219 | 220 | #ifdef __GNUC__ 221 | /* Fill entries one word (2 or 4 entries) at a time. */ 222 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / 223 | (WORDBYTES / sizeof(decode_table[0])); 224 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 225 | { 226 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 227 | for (; sym_idx < end_sym_idx; sym_idx++) { 228 | 229 | /* Accessing the array of u16 as u32 or u64 would 230 | * violate strict aliasing and would require compiling 231 | * the code with -fno-strict-aliasing to guarantee 232 | * correctness. To work around this problem, use the 233 | * gcc 'may_alias' extension. */ 234 | typedef machine_word_t 235 | __attribute__((may_alias)) aliased_word_t; 236 | aliased_word_t v = repeat_u16( 237 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 238 | codeword_len)); 239 | unsigned n = stores_per_loop; 240 | do { 241 | *(aliased_word_t *)entry_ptr = v; 242 | entry_ptr += sizeof(v); 243 | } while (--n); 244 | } 245 | } 246 | #endif /* __GNUC__ */ 247 | 248 | /* Fill entries one at a time. */ 249 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)); 250 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 251 | { 252 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 253 | for (; sym_idx < end_sym_idx; sym_idx++) { 254 | uint16_t v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 255 | codeword_len); 256 | unsigned n = stores_per_loop; 257 | do { 258 | *(uint16_t *)entry_ptr = v; 259 | entry_ptr += sizeof(v); 260 | } while (--n); 261 | } 262 | } 263 | 264 | /* If all symbols were processed, then no subtables are required. */ 265 | if (sym_idx == num_syms) 266 | return 0; 267 | 268 | /* At least one subtable is required. Process the remaining symbols. */ 269 | codeword = ((uint16_t *)entry_ptr - decode_table) << 1; 270 | subtable_pos = 1U << table_bits; 271 | subtable_bits = table_bits; 272 | subtable_prefix = -1; 273 | do { 274 | while (len_counts[codeword_len] == 0) { 275 | codeword_len++; 276 | codeword <<= 1; 277 | } 278 | 279 | unsigned prefix = codeword >> (codeword_len - table_bits); 280 | 281 | /* Start a new subtable if the first 'table_bits' bits of the 282 | * codeword don't match the prefix for the previous subtable, or 283 | * if this will be the first subtable. */ 284 | if (prefix != subtable_prefix) { 285 | 286 | subtable_prefix = prefix; 287 | 288 | /* 289 | * Calculate the subtable length. If the codeword 290 | * length exceeds 'table_bits' by n, then the subtable 291 | * needs at least 2^n entries. But it may need more; if 292 | * there are fewer than 2^n codewords of length 293 | * 'table_bits + n' remaining, then n will need to be 294 | * incremented to bring in longer codewords until the 295 | * subtable can be filled completely. Note that it 296 | * always will, eventually, be possible to fill the 297 | * subtable, since it was previously verified that the 298 | * code is complete. 299 | */ 300 | subtable_bits = codeword_len - table_bits; 301 | remainder = (int32_t)1 << subtable_bits; 302 | for (;;) { 303 | remainder -= len_counts[table_bits + 304 | subtable_bits]; 305 | if (remainder <= 0) 306 | break; 307 | subtable_bits++; 308 | remainder <<= 1; 309 | } 310 | 311 | /* Create the entry that points from the root table to 312 | * the subtable. This entry contains the index of the 313 | * start of the subtable and the number of bits with 314 | * which the subtable is indexed (the log base 2 of the 315 | * number of entries it contains). */ 316 | decode_table[subtable_prefix] = 317 | MAKE_DECODE_TABLE_ENTRY(subtable_pos, 318 | subtable_bits); 319 | } 320 | 321 | /* Fill the subtable entries for this symbol. */ 322 | uint16_t entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 323 | codeword_len - table_bits); 324 | unsigned n = 1U << (subtable_bits - (codeword_len - 325 | table_bits)); 326 | do { 327 | decode_table[subtable_pos++] = entry; 328 | } while (--n); 329 | 330 | len_counts[codeword_len]--; 331 | codeword++; 332 | } while (++sym_idx < num_syms); 333 | 334 | return 0; 335 | } 336 | -------------------------------------------------------------------------------- /src/ebiggers/decompress_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * decompress_common.h 3 | * 4 | * Header for decompression code shared by multiple compression formats. 5 | * 6 | * The following copying information applies to this specific source code file: 7 | * 8 | * Written in 2012-2016 by Eric Biggers 9 | * 10 | * To the extent possible under law, the author(s) have dedicated all copyright 11 | * and related and neighboring rights to this software to the public domain 12 | * worldwide via the Creative Commons Zero 1.0 Universal Public Domain 13 | * Dedication (the "CC0"). 14 | * 15 | * This software is distributed in the hope that it will be useful, but WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | * FOR A PARTICULAR PURPOSE. See the CC0 for more details. 18 | * 19 | * You should have received a copy of the CC0 along with this software; if not 20 | * see . 21 | */ 22 | 23 | #ifndef _DECOMPRESS_COMMON_H 24 | #define _DECOMPRESS_COMMON_H 25 | 26 | #include 27 | 28 | #include "common_defs.h" 29 | 30 | /******************************************************************************/ 31 | /* Input bitstream for XPRESS and LZX */ 32 | /*----------------------------------------------------------------------------*/ 33 | 34 | /* Structure that encapsulates a block of in-memory data being interpreted as a 35 | * stream of bits, optionally with interwoven literal bytes. Bits are assumed 36 | * to be stored in little endian 16-bit coding units, with the bits ordered high 37 | * to low. */ 38 | struct input_bitstream { 39 | 40 | /* Bits that have been read from the input buffer. The bits are 41 | * left-justified; the next bit is always bit 31. */ 42 | uint32_t bitbuf; 43 | 44 | /* Number of bits currently held in @bitbuf. */ 45 | uint32_t bitsleft; 46 | 47 | /* Pointer to the next byte to be retrieved from the input buffer. */ 48 | const uint8_t *next; 49 | 50 | /* Pointer past the end of the input buffer. */ 51 | const uint8_t *end; 52 | }; 53 | 54 | /* Initialize a bitstream to read from the specified input buffer. */ 55 | static forceinline void 56 | init_input_bitstream(struct input_bitstream *is, const void *buffer, uint32_t size) 57 | { 58 | is->bitbuf = 0; 59 | is->bitsleft = 0; 60 | is->next = buffer; 61 | is->end = is->next + size; 62 | } 63 | 64 | /* Note: for performance reasons, the following methods don't return error codes 65 | * to the caller if the input buffer is overrun. Instead, they just assume that 66 | * all overrun data is zeroes. This has no effect on well-formed compressed 67 | * data. The only disadvantage is that bad compressed data may go undetected, 68 | * but even this is irrelevant if higher level code checksums the uncompressed 69 | * data anyway. */ 70 | 71 | /* Ensure the bit buffer variable for the bitstream contains at least @num_bits 72 | * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() 73 | * may be called on the bitstream to peek or remove up to @num_bits bits. */ 74 | static forceinline void 75 | bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits) 76 | { 77 | /* This currently works for at most 17 bits. */ 78 | 79 | if (is->bitsleft >= num_bits) 80 | return; 81 | 82 | if (unlikely(is->end - is->next < 2)) 83 | goto overflow; 84 | 85 | is->bitbuf |= (uint32_t)*((uint16_t*)is->next) << (16 - is->bitsleft); 86 | is->next += 2; 87 | is->bitsleft += 16; 88 | 89 | if (unlikely(num_bits == 17 && is->bitsleft == 16)) { 90 | if (unlikely(is->end - is->next < 2)) 91 | goto overflow; 92 | 93 | is->bitbuf |= (uint32_t)*((uint16_t*)(is->next)); 94 | is->next += 2; 95 | is->bitsleft = 32; 96 | } 97 | 98 | return; 99 | 100 | overflow: 101 | is->bitsleft = 32; 102 | } 103 | 104 | /* Return the next @num_bits bits from the bitstream, without removing them. 105 | * There must be at least @num_bits remaining in the buffer variable, from a 106 | * previous call to bitstream_ensure_bits(). */ 107 | static forceinline uint32_t 108 | bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits) 109 | { 110 | return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); 111 | } 112 | 113 | /* Remove @num_bits from the bitstream. There must be at least @num_bits 114 | * remaining in the buffer variable, from a previous call to 115 | * bitstream_ensure_bits(). */ 116 | static forceinline void 117 | bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits) 118 | { 119 | is->bitbuf <<= num_bits; 120 | is->bitsleft -= num_bits; 121 | } 122 | 123 | /* Remove and return @num_bits bits from the bitstream. There must be at least 124 | * @num_bits remaining in the buffer variable, from a previous call to 125 | * bitstream_ensure_bits(). */ 126 | static forceinline uint32_t 127 | bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits) 128 | { 129 | uint32_t bits = bitstream_peek_bits(is, num_bits); 130 | bitstream_remove_bits(is, num_bits); 131 | return bits; 132 | } 133 | 134 | /* Read and return the next @num_bits bits from the bitstream. */ 135 | static forceinline uint32_t 136 | bitstream_read_bits(struct input_bitstream *is, unsigned num_bits) 137 | { 138 | bitstream_ensure_bits(is, num_bits); 139 | return bitstream_pop_bits(is, num_bits); 140 | } 141 | 142 | /* Read and return the next literal byte embedded in the bitstream. */ 143 | static forceinline uint8_t 144 | bitstream_read_byte(struct input_bitstream *is) 145 | { 146 | if (unlikely(is->end == is->next)) 147 | return 0; 148 | return *is->next++; 149 | } 150 | 151 | /* Read and return the next 16-bit integer embedded in the bitstream. */ 152 | static forceinline uint16_t 153 | bitstream_read_u16(struct input_bitstream *is) 154 | { 155 | uint16_t v; 156 | 157 | if (unlikely(is->end - is->next < 2)) 158 | return 0; 159 | v = *(uint16_t*)is->next; 160 | is->next += 2; 161 | return v; 162 | } 163 | 164 | /* Read and return the next 32-bit integer embedded in the bitstream. */ 165 | static forceinline uint32_t 166 | bitstream_read_u32(struct input_bitstream *is) 167 | { 168 | uint32_t v; 169 | 170 | if (unlikely(is->end - is->next < 4)) 171 | return 0; 172 | v = *(uint32_t*)is->next; 173 | is->next += 4; 174 | return v; 175 | } 176 | 177 | /* Read into @dst_buffer an array of literal bytes embedded in the bitstream. 178 | * Return 0 if there were enough bytes remaining in the input, otherwise -1. */ 179 | static forceinline int 180 | bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count) 181 | { 182 | if (unlikely(is->end - is->next < count)) 183 | return -1; 184 | memcpy(dst_buffer, is->next, count); 185 | is->next += count; 186 | return 0; 187 | } 188 | 189 | /* Align the input bitstream on a coding-unit boundary. */ 190 | static forceinline void 191 | bitstream_align(struct input_bitstream *is) 192 | { 193 | is->bitsleft = 0; 194 | is->bitbuf = 0; 195 | } 196 | 197 | /******************************************************************************/ 198 | /* Huffman decoding */ 199 | /*----------------------------------------------------------------------------*/ 200 | 201 | /* 202 | * Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12 203 | * bits) and 'length' (low 4 bits). The precise meaning of these fields depends 204 | * on the type of entry: 205 | * 206 | * Root table entries which are *not* subtable pointers: 207 | * symbol: symbol to decode 208 | * length: codeword length in bits 209 | * 210 | * Root table entries which are subtable pointers: 211 | * symbol: index of start of subtable 212 | * length: number of bits with which the subtable is indexed 213 | * 214 | * Subtable entries: 215 | * symbol: symbol to decode 216 | * length: codeword length in bits, minus the number of bits with which the 217 | * root table is indexed 218 | */ 219 | #define DECODE_TABLE_SYMBOL_SHIFT 4 220 | #define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1) 221 | #define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1) 222 | #define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH 223 | #define MAKE_DECODE_TABLE_ENTRY(symbol, length) \ 224 | (((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length)) 225 | 226 | /* 227 | * Read and return the next Huffman-encoded symbol from the given bitstream 228 | * using the given decode table. 229 | * 230 | * If the input data is exhausted, then the Huffman symbol will be decoded as if 231 | * the missing bits were all zeroes. 232 | * 233 | * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in 234 | * lzms_decompress.c; keep them in sync! 235 | */ 236 | static forceinline unsigned 237 | read_huffsym(struct input_bitstream *is, const uint16_t decode_table[], 238 | unsigned table_bits, unsigned max_codeword_len) 239 | { 240 | unsigned entry; 241 | unsigned symbol; 242 | unsigned length; 243 | 244 | /* Preload the bitbuffer with 'max_codeword_len' bits so that we're 245 | * guaranteed to be able to fully decode a codeword. */ 246 | bitstream_ensure_bits(is, max_codeword_len); 247 | 248 | /* Index the root table by the next 'table_bits' bits of input. */ 249 | entry = decode_table[bitstream_peek_bits(is, table_bits)]; 250 | 251 | /* Extract the "symbol" and "length" from the entry. */ 252 | symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; 253 | length = entry & DECODE_TABLE_LENGTH_MASK; 254 | 255 | /* If the root table is indexed by the full 'max_codeword_len' bits, 256 | * then there cannot be any subtables, and this will be known at compile 257 | * time. Otherwise, we must check whether the decoded symbol is really 258 | * a subtable pointer. If so, we must discard the bits with which the 259 | * root table was indexed, then index the subtable by the next 'length' 260 | * bits of input to get the real entry. */ 261 | if (max_codeword_len > table_bits && 262 | entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT))) 263 | { 264 | /* Subtable required */ 265 | bitstream_remove_bits(is, table_bits); 266 | entry = decode_table[symbol + bitstream_peek_bits(is, length)]; 267 | symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; 268 | length = entry & DECODE_TABLE_LENGTH_MASK; 269 | } 270 | 271 | /* Discard the bits (or the remaining bits, if a subtable was required) 272 | * of the codeword. */ 273 | bitstream_remove_bits(is, length); 274 | 275 | /* Return the decoded symbol. */ 276 | return symbol; 277 | } 278 | 279 | /* 280 | * The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode 281 | * table entries, including all subtable entries, that may be required for 282 | * decoding a given Huffman code. This depends on three parameters: 283 | * 284 | * num_syms: the maximum number of symbols in the code 285 | * table_bits: the number of bits with which the root table will be indexed 286 | * max_codeword_len: the maximum allowed codeword length in the code 287 | * 288 | * Given these parameters, the utility program 'enough' from zlib, when passed 289 | * the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will 290 | * compute the maximum number of entries required. This has already been done 291 | * for the combinations we need and incorporated into the macro below so that 292 | * the mapping can be done at compilation time. If an unknown combination is 293 | * used, then a compilation error will result. To fix this, use 'enough' to 294 | * find the missing value and add it below. If that still doesn't fix the 295 | * compilation error, then most likely a constraint would be violated by the 296 | * requested parameters, so they cannot be used, at least without other changes 297 | * to the decode table --- see DECODE_TABLE_SIZE(). 298 | */ 299 | #define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \ 300 | ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \ 301 | ((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \ 302 | ((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \ 303 | ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \ 304 | ((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \ 305 | ((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \ 306 | ((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \ 307 | ((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \ 308 | ((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \ 309 | ((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \ 310 | ((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \ 311 | ((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \ 312 | ((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \ 313 | ((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \ 314 | ((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \ 315 | ((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \ 316 | ((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \ 317 | ((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \ 318 | ((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \ 319 | ((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \ 320 | ((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \ 321 | ((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \ 322 | ((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \ 323 | ((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \ 324 | -1) 325 | 326 | extern int 327 | make_huffman_decode_table(uint16_t decode_table[], unsigned num_syms, 328 | unsigned table_bits, const uint8_t lens[], 329 | unsigned max_codeword_len, uint16_t working_space[]); 330 | 331 | /******************************************************************************/ 332 | /* LZ match copying */ 333 | /*----------------------------------------------------------------------------*/ 334 | 335 | static forceinline void 336 | copy_word_unaligned(const void *src, void *dst) 337 | { 338 | *(machine_word_t*)dst = *(machine_word_t*)src; 339 | } 340 | 341 | static forceinline machine_word_t 342 | repeat_u16(uint16_t b) 343 | { 344 | machine_word_t v = b; 345 | 346 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 347 | v |= v << 16; 348 | v |= v << ((WORDBITS == 64) ? 32 : 0); 349 | return v; 350 | } 351 | 352 | static forceinline machine_word_t 353 | repeat_byte(uint8_t b) 354 | { 355 | return repeat_u16(((uint16_t)b << 8) | b); 356 | } 357 | 358 | /* 359 | * Copy an LZ77 match of 'length' bytes from the match source at 'out_next - 360 | * offset' to the match destination at 'out_next'. The source and destination 361 | * may overlap. 362 | * 363 | * This handles validating the length and offset. It is validated that the 364 | * beginning of the match source is '>= out_begin' and that end of the match 365 | * destination is '<= out_end'. The return value is 0 if the match was valid 366 | * (and was copied), otherwise -1. 367 | * 368 | * 'min_length' is a hint which specifies the minimum possible match length. 369 | * This should be a compile-time constant. 370 | */ 371 | static forceinline int 372 | lz_copy(uint32_t length, uint32_t offset, uint8_t *out_begin, uint8_t *out_next, uint8_t *out_end, 373 | uint32_t min_length) 374 | { 375 | const uint8_t *src; 376 | uint8_t *end; 377 | 378 | /* Validate the offset. */ 379 | if (unlikely(offset > out_next - out_begin)) 380 | return -1; 381 | 382 | /* 383 | * Fast path: copy a match which is no longer than a few words, is not 384 | * overlapped such that copying a word at a time would produce incorrect 385 | * results, and is not too close to the end of the buffer. Note that 386 | * this might copy more than the length of the match, but that's okay in 387 | * this scenario. 388 | */ 389 | src = out_next - offset; 390 | if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES && 391 | offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES) 392 | { 393 | copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0); 394 | copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1); 395 | copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2); 396 | return 0; 397 | } 398 | 399 | /* Validate the length. This isn't needed in the fast path above, due 400 | * to the additional conditions tested, but we do need it here. */ 401 | if (unlikely(length > out_end - out_next)) 402 | return -1; 403 | end = out_next + length; 404 | 405 | /* 406 | * Try to copy one word at a time. On i386 and x86_64 this is faster 407 | * than copying one byte at a time, unless the data is near-random and 408 | * all the matches have very short lengths. Note that since this 409 | * requires unaligned memory accesses, it won't necessarily be faster on 410 | * every architecture. 411 | * 412 | * Also note that we might copy more than the length of the match. For 413 | * example, if a word is 8 bytes and the match is of length 5, then 414 | * we'll simply copy 8 bytes. This is okay as long as we don't write 415 | * beyond the end of the output buffer, hence the check for (out_end - 416 | * end >= WORDBYTES - 1). 417 | */ 418 | if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1)) 419 | { 420 | if (offset >= WORDBYTES) { 421 | /* The source and destination words don't overlap. */ 422 | do { 423 | copy_word_unaligned(src, out_next); 424 | src += WORDBYTES; 425 | out_next += WORDBYTES; 426 | } while (out_next < end); 427 | return 0; 428 | } else if (offset == 1) { 429 | /* Offset 1 matches are equivalent to run-length 430 | * encoding of the previous byte. This case is common 431 | * if the data contains many repeated bytes. */ 432 | machine_word_t v = repeat_byte(*(out_next - 1)); 433 | do { 434 | *(machine_word_t*)out_next = v; 435 | src += WORDBYTES; 436 | out_next += WORDBYTES; 437 | } while (out_next < end); 438 | return 0; 439 | } 440 | /* 441 | * We don't bother with special cases for other 'offset < 442 | * WORDBYTES', which are usually rarer than 'offset == 1'. 443 | * Extra checks will just slow things down. Actually, it's 444 | * possible to handle all the 'offset < WORDBYTES' cases using 445 | * the same code, but it still becomes more complicated doesn't 446 | * seem any faster overall; it definitely slows down the more 447 | * common 'offset == 1' case. 448 | */ 449 | } 450 | 451 | /* Fall back to a bytewise copy. */ 452 | if (min_length >= 2) 453 | *out_next++ = *src++; 454 | if (min_length >= 3) 455 | *out_next++ = *src++; 456 | if (min_length >= 4) 457 | *out_next++ = *src++; 458 | do { 459 | *out_next++ = *src++; 460 | } while (out_next != end); 461 | return 0; 462 | } 463 | 464 | #endif /* _DECOMPRESS_COMMON_H */ 465 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_common.c - Common code for LZX compression and decompression. 3 | */ 4 | 5 | /* 6 | * Copyright (C) 2012-2016 Eric Biggers 7 | * 8 | * This program is free software: you can redistribute it and/or modify it under 9 | * the terms of the GNU General Public License as published by the Free Software 10 | * Foundation, either version 2 of the License, or (at your option) any later 11 | * version. 12 | * 13 | * This program is distributed in the hope that it will be useful, but WITHOUT 14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 | * details. 17 | * 18 | * You should have received a copy of the GNU General Public License along with 19 | * this program. If not, see . 20 | */ 21 | 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif 25 | 26 | #include 27 | 28 | #ifdef __SSE2__ 29 | # include 30 | #endif 31 | 32 | #ifdef __AVX2__ 33 | # include 34 | #endif 35 | 36 | #include "common_defs.h" 37 | #include "lzx_common.h" 38 | 39 | /* Mapping: offset slot => first match offset that uses that offset slot. 40 | * The offset slots for repeat offsets map to "fake" offsets < 1. */ 41 | const int32_t lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = { 42 | -2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */ 43 | 4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */ 44 | 30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */ 45 | 190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */ 46 | 1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */ 47 | 6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */ 48 | 32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */ 49 | 196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */ 50 | 786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */ 51 | 1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */ 52 | 2097150 /* extra */ 53 | }; 54 | 55 | /* Mapping: offset slot => how many extra bits must be read and added to the 56 | * corresponding offset slot base to decode the match offset. */ 57 | const uint8_t lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = { 58 | 0 , 0 , 0 , 0 , 1 , 59 | 1 , 2 , 2 , 3 , 3 , 60 | 4 , 4 , 5 , 5 , 6 , 61 | 6 , 7 , 7 , 8 , 8 , 62 | 9 , 9 , 10, 10, 11, 63 | 11, 12, 12, 13, 13, 64 | 14, 14, 15, 15, 16, 65 | 16, 17, 17, 17, 17, 66 | 17, 17, 17, 17, 17, 67 | 17, 17, 17, 17, 17, 68 | }; 69 | 70 | /* Round the specified buffer size up to the next valid LZX window size, and 71 | * return its order (log2). Or, if the buffer size is 0 or greater than the 72 | * largest valid LZX window size, return 0. */ 73 | unsigned 74 | lzx_get_window_order(size_t max_bufsize) 75 | { 76 | if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE) 77 | return 0; 78 | 79 | return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER); 80 | } 81 | 82 | /* Given a valid LZX window order, return the number of symbols that will exist 83 | * in the main Huffman code. */ 84 | unsigned 85 | lzx_get_num_main_syms(unsigned window_order) 86 | { 87 | /* Note: one would expect that the maximum match offset would be 88 | * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two 89 | * bytes were to match the last two bytes. However, the format 90 | * disallows this case. This reduces the number of needed offset slots 91 | * by 1. */ 92 | uint32_t window_size = (uint32_t)1 << window_order; 93 | uint32_t max_offset = window_size - LZX_MIN_MATCH_LEN - 1; 94 | unsigned num_offset_slots = 30; 95 | while (max_offset >= lzx_offset_slot_base[num_offset_slots]) 96 | num_offset_slots++; 97 | 98 | return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS); 99 | } 100 | 101 | static void 102 | do_translate_target(void *target, int32_t input_pos) 103 | { 104 | int32_t abs_offset, rel_offset; 105 | 106 | rel_offset = *(int32_t*)target; 107 | if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) { 108 | if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) { 109 | /* "good translation" */ 110 | abs_offset = rel_offset + input_pos; 111 | } else { 112 | /* "compensating translation" */ 113 | abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE; 114 | } 115 | *(uint32_t*)target = abs_offset; 116 | } 117 | } 118 | 119 | static void 120 | undo_translate_target(void *target, int32_t input_pos) 121 | { 122 | int32_t abs_offset, rel_offset; 123 | 124 | abs_offset = *(int32_t*)target; 125 | if (abs_offset >= 0) { 126 | if (abs_offset < LZX_WIM_MAGIC_FILESIZE) { 127 | /* "good translation" */ 128 | rel_offset = abs_offset - input_pos; 129 | *(uint32_t*)target = rel_offset; 130 | } 131 | } else { 132 | if (abs_offset >= -input_pos) { 133 | /* "compensating translation" */ 134 | rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE; 135 | *(uint32_t*)target = rel_offset; 136 | } 137 | } 138 | } 139 | 140 | /* 141 | * Do or undo the 'E8' preprocessing used in LZX. Before compression, the 142 | * uncompressed data is preprocessed by changing the targets of x86 CALL 143 | * instructions from relative offsets to absolute offsets. After decompression, 144 | * the translation is undone by changing the targets of x86 CALL instructions 145 | * from absolute offsets to relative offsets. 146 | * 147 | * Note that despite its intent, E8 preprocessing can be done on any data even 148 | * if it is not actually x86 machine code. In fact, E8 preprocessing appears to 149 | * always be used in LZX-compressed resources in WIM files; there is no bit to 150 | * indicate whether it is used or not, unlike in the LZX compressed format as 151 | * used in cabinet files, where a bit is reserved for that purpose. 152 | * 153 | * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data, 154 | * which really means the 5-byte call instruction cannot start in the last 10 155 | * bytes of the uncompressed data. This is one of the errors in the LZX 156 | * documentation. 157 | * 158 | * E8 preprocessing does not appear to be disabled after the 32768th chunk of a 159 | * WIM resource, which apparently is another difference from the LZX compression 160 | * used in cabinet files. 161 | * 162 | * E8 processing is supposed to take the file size as a parameter, as it is used 163 | * in calculating the translated jump targets. But in WIM files, this file size 164 | * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000). 165 | */ 166 | static void 167 | lzx_e8_filter(uint8_t *data, uint32_t size, void (*process_target)(void *, int32_t)) 168 | { 169 | 170 | #if !defined(__SSE2__) && !defined(__AVX2__) 171 | /* 172 | * A worthwhile optimization is to push the end-of-buffer check into the 173 | * relatively rare E8 case. This is possible if we replace the last six 174 | * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte 175 | * before reaching end-of-buffer. In addition, this scheme guarantees 176 | * that no translation can begin following an E8 byte in the last 10 177 | * bytes because a 4-byte offset containing E8 as its high byte is a 178 | * large negative number that is not valid for translation. That is 179 | * exactly what we need. 180 | */ 181 | uint8_t *tail; 182 | uint8_t saved_bytes[6]; 183 | uint8_t *p; 184 | 185 | if (size <= 10) 186 | return; 187 | 188 | tail = &data[size - 6]; 189 | memcpy(saved_bytes, tail, 6); 190 | memset(tail, 0xE8, 6); 191 | p = data; 192 | for (;;) { 193 | while (*p != 0xE8) 194 | p++; 195 | if (p >= tail) 196 | break; 197 | (*process_target)(p + 1, p - data); 198 | p += 5; 199 | } 200 | memcpy(tail, saved_bytes, 6); 201 | #else 202 | /* SSE2 or AVX-2 optimized version for x86_64 */ 203 | 204 | uint8_t *p = data; 205 | uint64_t valid_mask = ~0; 206 | 207 | if (size <= 10) 208 | return; 209 | #ifdef __AVX2__ 210 | # define ALIGNMENT_REQUIRED 32 211 | #else 212 | # define ALIGNMENT_REQUIRED 16 213 | #endif 214 | 215 | /* Process one byte at a time until the pointer is properly aligned. */ 216 | while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) { 217 | if (p >= data + size - 10) 218 | return; 219 | if (*p == 0xE8 && (valid_mask & 1)) { 220 | (*process_target)(p + 1, p - data); 221 | valid_mask &= ~0x1F; 222 | } 223 | p++; 224 | valid_mask >>= 1; 225 | valid_mask |= (uint64_t)1 << 63; 226 | } 227 | 228 | if (data + size - p >= 64) { 229 | 230 | /* Vectorized processing */ 231 | 232 | /* Note: we use a "trap" E8 byte to eliminate the need to check 233 | * for end-of-buffer in the inner loop. This byte is carefully 234 | * positioned so that it will never be changed by a previous 235 | * translation before it is detected. */ 236 | 237 | uint8_t *trap = p + ((data + size - p) & ~31) - 32 + 4; 238 | uint8_t saved_byte = *trap; 239 | *trap = 0xE8; 240 | 241 | for (;;) { 242 | uint32_t e8_mask; 243 | uint8_t *orig_p = p; 244 | #ifdef __AVX2__ 245 | const __m256i e8_bytes = _mm256_set1_epi8(0xE8); 246 | for (;;) { 247 | __m256i bytes = *(const __m256i *)p; 248 | __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); 249 | e8_mask = _mm256_movemask_epi8(cmpresult); 250 | if (e8_mask) 251 | break; 252 | p += 32; 253 | } 254 | #else 255 | const __m128i e8_bytes = _mm_set1_epi8(0xE8); 256 | for (;;) { 257 | /* Read the next 32 bytes of data and test them 258 | * for E8 bytes. */ 259 | __m128i bytes1 = *(const __m128i *)p; 260 | __m128i bytes2 = *(const __m128i *)(p + 16); 261 | __m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes); 262 | __m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes); 263 | uint32_t mask1 = _mm_movemask_epi8(cmpresult1); 264 | uint32_t mask2 = _mm_movemask_epi8(cmpresult2); 265 | /* The masks have a bit set for each E8 byte. 266 | * We stay in this fast inner loop as long as 267 | * there are no E8 bytes. */ 268 | if (mask1 | mask2) { 269 | e8_mask = mask1 | (mask2 << 16); 270 | break; 271 | } 272 | p += 32; 273 | } 274 | #endif 275 | 276 | /* Did we pass over data with no E8 bytes? */ 277 | if (p != orig_p) 278 | valid_mask = ~0; 279 | 280 | /* Are we nearing end-of-buffer? */ 281 | if (p == trap - 4) 282 | break; 283 | 284 | /* Process the E8 bytes. However, the AND with 285 | * 'valid_mask' ensures we never process an E8 byte that 286 | * was itself part of a translation target. */ 287 | while ((e8_mask &= valid_mask)) { 288 | unsigned bit = bsf32(e8_mask); 289 | (*process_target)(p + bit + 1, p + bit - data); 290 | valid_mask &= ~((uint64_t)0x1F << bit); 291 | } 292 | 293 | valid_mask >>= 32; 294 | valid_mask |= 0xFFFFFFFF00000000; 295 | p += 32; 296 | } 297 | 298 | *trap = saved_byte; 299 | } 300 | 301 | /* Approaching the end of the buffer; process one byte a time. */ 302 | while (p < data + size - 10) { 303 | if (*p == 0xE8 && (valid_mask & 1)) { 304 | (*process_target)(p + 1, p - data); 305 | valid_mask &= ~0x1F; 306 | } 307 | p++; 308 | valid_mask >>= 1; 309 | valid_mask |= (uint64_t)1 << 63; 310 | } 311 | #endif /* __SSE2__ || __AVX2__ */ 312 | } 313 | 314 | void 315 | lzx_preprocess(uint8_t *data, uint32_t size) 316 | { 317 | lzx_e8_filter(data, size, do_translate_target); 318 | } 319 | 320 | void 321 | lzx_postprocess(uint8_t *data, uint32_t size) 322 | { 323 | lzx_e8_filter(data, size, undo_translate_target); 324 | } 325 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_common.h 3 | * 4 | * Declarations shared between LZX compression and decompression. 5 | */ 6 | 7 | #ifndef _LZX_COMMON_H 8 | #define _LZX_COMMON_H 9 | 10 | #include "lzx_constants.h" 11 | #include "common_defs.h" 12 | 13 | extern const int32_t lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1]; 14 | 15 | extern const uint8_t lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; 16 | 17 | extern unsigned 18 | lzx_get_window_order(size_t max_bufsize); 19 | 20 | extern unsigned 21 | lzx_get_num_main_syms(unsigned window_order); 22 | 23 | extern void 24 | lzx_preprocess(uint8_t *data, uint32_t size); 25 | 26 | extern void 27 | lzx_postprocess(uint8_t *data, uint32_t size); 28 | 29 | #endif /* _LZX_COMMON_H */ 30 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_constants.h 3 | * 4 | * Constants for the LZX compression format. 5 | */ 6 | 7 | #ifndef _LZX_CONSTANTS_H 8 | #define _LZX_CONSTANTS_H 9 | 10 | /* Number of literal byte values. */ 11 | #define LZX_NUM_CHARS 256 12 | 13 | /* The smallest and largest allowed match lengths. */ 14 | #define LZX_MIN_MATCH_LEN 2 15 | #define LZX_MAX_MATCH_LEN 257 16 | 17 | /* Number of distinct match lengths that can be represented. */ 18 | #define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) 19 | 20 | /* Number of match lengths for which no length symbol is required. */ 21 | #define LZX_NUM_PRIMARY_LENS 7 22 | #define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1) 23 | 24 | /* Valid values of the 3-bit block type field. */ 25 | #define LZX_BLOCKTYPE_VERBATIM 1 26 | #define LZX_BLOCKTYPE_ALIGNED 2 27 | #define LZX_BLOCKTYPE_UNCOMPRESSED 3 28 | 29 | /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum 30 | * sizes of the sliding window. */ 31 | #define LZX_MIN_WINDOW_ORDER 15 32 | #define LZX_MAX_WINDOW_ORDER 21 33 | #define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */ 34 | #define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */ 35 | 36 | /* Maximum number of offset slots. (The actual number of offset slots depends 37 | * on the window size.) */ 38 | #define LZX_MAX_OFFSET_SLOTS 50 39 | 40 | /* Maximum number of symbols in the main code. (The actual number of symbols in 41 | * the main code depends on the window size.) */ 42 | #define LZX_MAINCODE_MAX_NUM_SYMBOLS \ 43 | (LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS)) 44 | 45 | /* Number of symbols in the length code. */ 46 | #define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS) 47 | 48 | /* Number of symbols in the pre-code. */ 49 | #define LZX_PRECODE_NUM_SYMBOLS 20 50 | 51 | /* Number of bits in which each pre-code codeword length is represented. */ 52 | #define LZX_PRECODE_ELEMENT_SIZE 4 53 | 54 | /* Number of low-order bits of each match offset that are entropy-encoded in 55 | * aligned offset blocks. */ 56 | #define LZX_NUM_ALIGNED_OFFSET_BITS 3 57 | 58 | /* Number of symbols in the aligned offset code. */ 59 | #define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS) 60 | 61 | /* Mask for the match offset bits that are entropy-encoded in aligned offset 62 | * blocks. */ 63 | #define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1) 64 | 65 | /* Number of bits in which each aligned offset codeword length is represented. */ 66 | #define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 67 | 68 | /* The first offset slot which requires an aligned offset symbol in aligned 69 | * offset blocks. */ 70 | #define LZX_MIN_ALIGNED_OFFSET_SLOT 8 71 | 72 | /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */ 73 | #define LZX_MIN_ALIGNED_OFFSET 14 74 | 75 | /* The maximum number of extra offset bits in verbatim blocks. (One would need 76 | * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset 77 | * bits in *aligned* blocks.) */ 78 | #define LZX_MAX_NUM_EXTRA_BITS 17 79 | 80 | /* Maximum lengths (in bits) for length-limited Huffman code construction. */ 81 | #define LZX_MAX_MAIN_CODEWORD_LEN 16 82 | #define LZX_MAX_LEN_CODEWORD_LEN 16 83 | #define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1) 84 | #define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1) 85 | 86 | /* For LZX-compressed blocks in WIM resources, this value is always used as the 87 | * filesize parameter for the call instruction (0xe8 byte) preprocessing, even 88 | * though the blocks themselves are not this size, and the size of the actual 89 | * file resource in the WIM file is very likely to be something entirely 90 | * different as well. */ 91 | #define LZX_WIM_MAGIC_FILESIZE 12000000 92 | 93 | /* Assumed LZX block size when the encoded block size begins with a 0 bit. 94 | * This is probably WIM-specific. */ 95 | #define LZX_DEFAULT_BLOCK_SIZE 32768 96 | 97 | /* Number of offsets in the recent (or "repeat") offsets queue. */ 98 | #define LZX_NUM_RECENT_OFFSETS 3 99 | 100 | /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */ 101 | #define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1) 102 | 103 | #endif /* _LZX_CONSTANTS_H */ 104 | -------------------------------------------------------------------------------- /src/ebiggers/lzx_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_decompress.c 3 | * 4 | * A decompressor for the LZX compression format, as used in WIM files. 5 | */ 6 | 7 | /* 8 | * Copyright (C) 2012-2016 Eric Biggers 9 | * 10 | * This program is free software: you can redistribute it and/or modify it under 11 | * the terms of the GNU General Public License as published by the Free Software 12 | * Foundation, either version 2 of the License, or (at your option) any later 13 | * version. 14 | * 15 | * This program is distributed in the hope that it will be useful, but WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 18 | * details. 19 | * 20 | * You should have received a copy of the GNU General Public License along with 21 | * this program. If not, see . 22 | */ 23 | 24 | /* 25 | * LZX is an LZ77 and Huffman-code based compression format that has many 26 | * similarities to DEFLATE (the format used by zlib/gzip). The compression 27 | * ratio is as good or better than DEFLATE. See lzx_compress.c for a format 28 | * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a 29 | * historical overview. Here I make some pragmatic notes. 30 | * 31 | * The old specification for LZX is the document "Microsoft LZX Data Compression 32 | * Format" (1997). It defines the LZX format as used in cabinet files. Allowed 33 | * window sizes are 2^n where 15 <= n <= 21. However, this document contains 34 | * several errors, so don't read too much into it... 35 | * 36 | * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA 37 | * Compression and Decompression" (2014). It defines the LZX format as used by 38 | * Microsoft's binary patcher. It corrects several errors in the 1997 document 39 | * and extends the format in several ways --- namely, optional reference data, 40 | * up to 2^25 byte windows, and longer match lengths. 41 | * 42 | * WIM files use a more restricted form of LZX. No LZX DELTA extensions are 43 | * present, the window is not "sliding", E8 preprocessing is done 44 | * unconditionally with a fixed file size, and the maximum window size is always 45 | * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource). 46 | * This code is primarily intended to implement this form of LZX. But although 47 | * not compatible with WIMGAPI, this code also supports maximum window sizes up 48 | * to 2^21 bytes. 49 | * 50 | * TODO: Add support for window sizes up to 2^25 bytes. 51 | */ 52 | 53 | #ifdef HAVE_CONFIG_H 54 | # include "config.h" 55 | #endif 56 | 57 | #include 58 | 59 | #include "decompress_common.h" 60 | #include "lzx_common.h" 61 | #include "system_compression.h" 62 | 63 | /* These values are chosen for fast decompression. */ 64 | #define LZX_MAINCODE_TABLEBITS 11 65 | #define LZX_LENCODE_TABLEBITS 9 66 | #define LZX_PRECODE_TABLEBITS 6 67 | #define LZX_ALIGNEDCODE_TABLEBITS 7 68 | 69 | #define LZX_READ_LENS_MAX_OVERRUN 50 70 | 71 | struct lzx_decompressor { 72 | uint16_t maincode_decode_table[DECODE_TABLE_ENOUGH(LZX_MAINCODE_MAX_NUM_SYMBOLS, LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); 73 | uint8_t maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; 74 | 75 | uint16_t lencode_decode_table[DECODE_TABLE_ENOUGH(LZX_LENCODE_NUM_SYMBOLS, LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); 76 | uint8_t lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; 77 | 78 | union { 79 | uint16_t alignedcode_decode_table[DECODE_TABLE_ENOUGH(LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); 80 | uint8_t alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; 81 | }; 82 | 83 | union { 84 | uint16_t precode_decode_table[DECODE_TABLE_ENOUGH(LZX_PRECODE_NUM_SYMBOLS, LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT); 85 | uint8_t precode_lens[LZX_PRECODE_NUM_SYMBOLS]; 86 | uint8_t extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; 87 | }; 88 | 89 | union { 90 | uint16_t maincode_working_space[2 * (LZX_MAX_MAIN_CODEWORD_LEN + 1) + LZX_MAINCODE_MAX_NUM_SYMBOLS]; 91 | uint16_t lencode_working_space[2 * (LZX_MAX_LEN_CODEWORD_LEN + 1) + LZX_LENCODE_NUM_SYMBOLS]; 92 | uint16_t alignedcode_working_space[2 * (LZX_MAX_ALIGNED_CODEWORD_LEN + 1) + LZX_ALIGNEDCODE_NUM_SYMBOLS]; 93 | uint16_t precode_working_space[2 * (LZX_MAX_PRE_CODEWORD_LEN + 1) + LZX_PRECODE_NUM_SYMBOLS]; 94 | }; 95 | 96 | unsigned window_order; 97 | unsigned num_main_syms; 98 | 99 | /* Like lzx_extra_offset_bits[], but does not include the entropy-coded 100 | * bits of aligned offset blocks */ 101 | uint8_t extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS]; 102 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT); 103 | 104 | /* Read a Huffman-encoded symbol using the precode. */ 105 | static forceinline unsigned 106 | read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) 107 | { 108 | return read_huffsym(is, d->precode_decode_table, 109 | LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); 110 | } 111 | 112 | /* Read a Huffman-encoded symbol using the main code. */ 113 | static forceinline unsigned 114 | read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) 115 | { 116 | return read_huffsym(is, d->maincode_decode_table, 117 | LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); 118 | } 119 | 120 | /* Read a Huffman-encoded symbol using the length code. */ 121 | static forceinline unsigned 122 | read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) 123 | { 124 | return read_huffsym(is, d->lencode_decode_table, 125 | LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); 126 | } 127 | 128 | /* Read a Huffman-encoded symbol using the aligned offset code. */ 129 | static forceinline unsigned 130 | read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) 131 | { 132 | return read_huffsym(is, d->alignedcode_decode_table, 133 | LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); 134 | } 135 | 136 | /* 137 | * Read a precode from the compressed input bitstream, then use it to decode 138 | * @num_lens codeword length values and write them to @lens. 139 | */ 140 | static int 141 | lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is, 142 | uint8_t *lens, unsigned num_lens) 143 | { 144 | uint8_t *len_ptr = lens; 145 | uint8_t *lens_end = lens + num_lens; 146 | 147 | /* Read the lengths of the precode codewords. These are stored 148 | * explicitly. */ 149 | for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) { 150 | d->precode_lens[i] = 151 | bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE); 152 | } 153 | 154 | /* Build the decoding table for the precode. */ 155 | if (make_huffman_decode_table(d->precode_decode_table, 156 | LZX_PRECODE_NUM_SYMBOLS, 157 | LZX_PRECODE_TABLEBITS, 158 | d->precode_lens, 159 | LZX_MAX_PRE_CODEWORD_LEN, 160 | d->precode_working_space)) 161 | return -1; 162 | 163 | /* Decode the codeword lengths. */ 164 | do { 165 | unsigned presym; 166 | uint8_t len; 167 | 168 | /* Read the next precode symbol. */ 169 | presym = read_presym(d, is); 170 | if (presym < 17) { 171 | /* Difference from old length */ 172 | len = *len_ptr - presym; 173 | if ((int8_t)len < 0) 174 | len += 17; 175 | *len_ptr++ = len; 176 | } else { 177 | /* Special RLE values */ 178 | 179 | unsigned run_len; 180 | 181 | if (presym == 17) { 182 | /* Run of 0's */ 183 | run_len = 4 + bitstream_read_bits(is, 4); 184 | len = 0; 185 | } else if (presym == 18) { 186 | /* Longer run of 0's */ 187 | run_len = 20 + bitstream_read_bits(is, 5); 188 | len = 0; 189 | } else { 190 | /* Run of identical lengths */ 191 | run_len = 4 + bitstream_read_bits(is, 1); 192 | presym = read_presym(d, is); 193 | if (unlikely(presym > 17)) 194 | return -1; 195 | len = *len_ptr - presym; 196 | if ((int8_t)len < 0) 197 | len += 17; 198 | } 199 | 200 | do { 201 | *len_ptr++ = len; 202 | } while (--run_len); 203 | /* 204 | * The worst case overrun is when presym == 18, 205 | * run_len == 20 + 31, and only 1 length was remaining. 206 | * So LZX_READ_LENS_MAX_OVERRUN == 50. 207 | * 208 | * Overrun while reading the first half of maincode_lens 209 | * can corrupt the previous values in the second half. 210 | * This doesn't really matter because the resulting 211 | * lengths will still be in range, and data that 212 | * generates overruns is invalid anyway. 213 | */ 214 | } 215 | } while (len_ptr < lens_end); 216 | 217 | return 0; 218 | } 219 | 220 | /* 221 | * Read the header of an LZX block. For all block types, the block type and 222 | * size is saved in *block_type_ret and *block_size_ret, respectively. For 223 | * compressed blocks, the codeword lengths are also saved. For uncompressed 224 | * blocks, the recent offsets queue is also updated. 225 | */ 226 | static int 227 | lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, 228 | uint32_t recent_offsets[], int *block_type_ret, 229 | uint32_t *block_size_ret) 230 | { 231 | int block_type; 232 | uint32_t block_size; 233 | 234 | bitstream_ensure_bits(is, 4); 235 | 236 | /* Read the block type. */ 237 | block_type = bitstream_pop_bits(is, 3); 238 | 239 | /* Read the block size. */ 240 | if (bitstream_pop_bits(is, 1)) { 241 | block_size = LZX_DEFAULT_BLOCK_SIZE; 242 | } else { 243 | block_size = bitstream_read_bits(is, 16); 244 | if (d->window_order >= 16) { 245 | block_size <<= 8; 246 | block_size |= bitstream_read_bits(is, 8); 247 | } 248 | } 249 | 250 | switch (block_type) { 251 | 252 | case LZX_BLOCKTYPE_ALIGNED: 253 | 254 | /* Read the aligned offset codeword lengths. */ 255 | 256 | for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { 257 | d->alignedcode_lens[i] = 258 | bitstream_read_bits(is, 259 | LZX_ALIGNEDCODE_ELEMENT_SIZE); 260 | } 261 | 262 | /* Fall though, since the rest of the header for aligned offset 263 | * blocks is the same as that for verbatim blocks. */ 264 | 265 | case LZX_BLOCKTYPE_VERBATIM: 266 | 267 | /* Read the main codeword lengths, which are divided into two 268 | * parts: literal symbols and match headers. */ 269 | 270 | if (lzx_read_codeword_lens(d, is, d->maincode_lens, 271 | LZX_NUM_CHARS)) 272 | return -1; 273 | 274 | if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS, 275 | d->num_main_syms - LZX_NUM_CHARS)) 276 | return -1; 277 | 278 | 279 | /* Read the length codeword lengths. */ 280 | 281 | if (lzx_read_codeword_lens(d, is, d->lencode_lens, 282 | LZX_LENCODE_NUM_SYMBOLS)) 283 | return -1; 284 | 285 | break; 286 | 287 | case LZX_BLOCKTYPE_UNCOMPRESSED: 288 | /* 289 | * The header of an uncompressed block contains new values for 290 | * the recent offsets queue, starting on the next 16-bit 291 | * boundary in the bitstream. Careful: if the stream is 292 | * *already* aligned, the correct thing to do is to throw away 293 | * the next 16 bits (this is probably a mistake in the format). 294 | */ 295 | bitstream_ensure_bits(is, 1); 296 | bitstream_align(is); 297 | recent_offsets[0] = bitstream_read_u32(is); 298 | recent_offsets[1] = bitstream_read_u32(is); 299 | recent_offsets[2] = bitstream_read_u32(is); 300 | 301 | /* Offsets of 0 are invalid. */ 302 | if (recent_offsets[0] == 0 || recent_offsets[1] == 0 || 303 | recent_offsets[2] == 0) 304 | return -1; 305 | break; 306 | 307 | default: 308 | /* Unrecognized block type. */ 309 | return -1; 310 | } 311 | 312 | *block_type_ret = block_type; 313 | *block_size_ret = block_size; 314 | return 0; 315 | } 316 | 317 | /* Decompress a block of LZX-compressed data. */ 318 | static int 319 | lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, 320 | int block_type, uint32_t block_size, 321 | uint8_t * const out_begin, uint8_t *out_next, uint32_t recent_offsets[]) 322 | { 323 | uint8_t * const block_end = out_next + block_size; 324 | unsigned min_aligned_offset_slot; 325 | 326 | /* 327 | * Build the Huffman decode tables. We always need to build the main 328 | * and length decode tables. For aligned blocks we additionally need to 329 | * build the aligned offset decode table. 330 | */ 331 | 332 | if (make_huffman_decode_table(d->maincode_decode_table, 333 | d->num_main_syms, 334 | LZX_MAINCODE_TABLEBITS, 335 | d->maincode_lens, 336 | LZX_MAX_MAIN_CODEWORD_LEN, 337 | d->maincode_working_space)) 338 | return -1; 339 | 340 | if (make_huffman_decode_table(d->lencode_decode_table, 341 | LZX_LENCODE_NUM_SYMBOLS, 342 | LZX_LENCODE_TABLEBITS, 343 | d->lencode_lens, 344 | LZX_MAX_LEN_CODEWORD_LEN, 345 | d->lencode_working_space)) 346 | return -1; 347 | 348 | if (block_type == LZX_BLOCKTYPE_ALIGNED) { 349 | if (make_huffman_decode_table(d->alignedcode_decode_table, 350 | LZX_ALIGNEDCODE_NUM_SYMBOLS, 351 | LZX_ALIGNEDCODE_TABLEBITS, 352 | d->alignedcode_lens, 353 | LZX_MAX_ALIGNED_CODEWORD_LEN, 354 | d->alignedcode_working_space)) 355 | return -1; 356 | min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; 357 | memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned, 358 | sizeof(lzx_extra_offset_bits)); 359 | } else { 360 | min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS; 361 | memcpy(d->extra_offset_bits, lzx_extra_offset_bits, 362 | sizeof(lzx_extra_offset_bits)); 363 | } 364 | 365 | /* Decode the literals and matches. */ 366 | 367 | do { 368 | unsigned mainsym; 369 | unsigned length; 370 | uint32_t offset; 371 | unsigned offset_slot; 372 | 373 | mainsym = read_mainsym(d, is); 374 | if (mainsym < LZX_NUM_CHARS) { 375 | /* Literal */ 376 | *out_next++ = mainsym; 377 | continue; 378 | } 379 | 380 | /* Match */ 381 | 382 | /* Decode the length header and offset slot. */ 383 | STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0); 384 | length = mainsym % LZX_NUM_LEN_HEADERS; 385 | offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS; 386 | 387 | /* If needed, read a length symbol to decode the full length. */ 388 | if (length == LZX_NUM_PRIMARY_LENS) 389 | length += read_lensym(d, is); 390 | length += LZX_MIN_MATCH_LEN; 391 | 392 | if (offset_slot < LZX_NUM_RECENT_OFFSETS) { 393 | /* Repeat offset */ 394 | 395 | /* Note: This isn't a real LRU queue, since using the R2 396 | * offset doesn't bump the R1 offset down to R2. */ 397 | offset = recent_offsets[offset_slot]; 398 | recent_offsets[offset_slot] = recent_offsets[0]; 399 | } else { 400 | /* Explicit offset */ 401 | offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]); 402 | if (offset_slot >= min_aligned_offset_slot) { 403 | offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) | 404 | read_alignedsym(d, is); 405 | } 406 | offset += lzx_offset_slot_base[offset_slot]; 407 | 408 | /* Update the match offset LRU queue. */ 409 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); 410 | recent_offsets[2] = recent_offsets[1]; 411 | recent_offsets[1] = recent_offsets[0]; 412 | } 413 | recent_offsets[0] = offset; 414 | 415 | /* Validate the match and copy it to the current position. */ 416 | if (unlikely(lz_copy(length, offset, out_begin, 417 | out_next, block_end, LZX_MIN_MATCH_LEN))) 418 | return -1; 419 | out_next += length; 420 | } while (out_next != block_end); 421 | 422 | return 0; 423 | } 424 | 425 | int 426 | lzx_decompress(struct lzx_decompressor *d, 427 | const void *compressed_data, size_t compressed_size, 428 | void *uncompressed_data, size_t uncompressed_size) 429 | { 430 | uint8_t * const out_begin = uncompressed_data; 431 | uint8_t *out_next = out_begin; 432 | uint8_t * const out_end = out_begin + uncompressed_size; 433 | struct input_bitstream is; 434 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); 435 | uint32_t recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1}; 436 | unsigned may_have_e8_byte = 0; 437 | 438 | init_input_bitstream(&is, compressed_data, compressed_size); 439 | 440 | /* Codeword lengths begin as all 0's for delta encoding purposes. */ 441 | memset(d->maincode_lens, 0, d->num_main_syms); 442 | memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); 443 | 444 | /* Decompress blocks until we have all the uncompressed data. */ 445 | 446 | while (out_next != out_end) { 447 | int block_type; 448 | uint32_t block_size; 449 | 450 | if (lzx_read_block_header(d, &is, recent_offsets, 451 | &block_type, &block_size)) 452 | return -1; 453 | 454 | if (block_size < 1 || block_size > out_end - out_next) 455 | return -1; 456 | 457 | if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) { 458 | 459 | /* Compressed block */ 460 | if (lzx_decompress_block(d, &is, block_type, block_size, 461 | out_begin, out_next, 462 | recent_offsets)) 463 | return -1; 464 | 465 | /* If the first E8 byte was in this block, then it must 466 | * have been encoded as a literal using mainsym E8. */ 467 | may_have_e8_byte |= d->maincode_lens[0xE8]; 468 | } else { 469 | 470 | /* Uncompressed block */ 471 | if (bitstream_read_bytes(&is, out_next, block_size)) 472 | return -1; 473 | 474 | /* Re-align the bitstream if needed. */ 475 | if (block_size & 1) 476 | bitstream_read_byte(&is); 477 | 478 | /* There may have been an E8 byte in the block. */ 479 | may_have_e8_byte = 1; 480 | } 481 | out_next += block_size; 482 | } 483 | 484 | /* Postprocess the data unless it cannot possibly contain E8 bytes. */ 485 | if (may_have_e8_byte) 486 | lzx_postprocess(uncompressed_data, uncompressed_size); 487 | 488 | return 0; 489 | } 490 | 491 | bool 492 | lzx_init_decompressor(size_t max_block_size, struct lzx_decompressor *d) 493 | { 494 | unsigned window_order; 495 | 496 | window_order = lzx_get_window_order(max_block_size); 497 | if (window_order == 0) 498 | return false; 499 | 500 | d->window_order = window_order; 501 | d->num_main_syms = lzx_get_num_main_syms(window_order); 502 | 503 | /* Initialize 'd->extra_offset_bits_minus_aligned'. */ 504 | STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) == 505 | sizeof(lzx_extra_offset_bits)); 506 | STATIC_ASSERT(sizeof(d->extra_offset_bits) == 507 | sizeof(lzx_extra_offset_bits)); 508 | memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits, 509 | sizeof(lzx_extra_offset_bits)); 510 | for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; 511 | offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++) 512 | { 513 | d->extra_offset_bits_minus_aligned[offset_slot] -= 514 | LZX_NUM_ALIGNED_OFFSET_BITS; 515 | } 516 | 517 | return true; 518 | } 519 | -------------------------------------------------------------------------------- /src/ebiggers/system_compression.h: -------------------------------------------------------------------------------- 1 | /* 2 | * system_compression.h - declarations for accessing System Compressed files 3 | * 4 | * Copyright (C) 2015 Eric Biggers 5 | * 6 | * This program is free software: you can redistribute it and/or modify it under 7 | * the terms of the GNU General Public License as published by the Free Software 8 | * Foundation, either version 2 of the License, or (at your option) any later 9 | * version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 14 | * details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #pragma once 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | /* System compressed file access */ 31 | 32 | struct ntfs_system_decompression_ctx; 33 | 34 | extern void 35 | ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx); 36 | 37 | /* XPRESS decompression */ 38 | 39 | #define XPRESS_NUM_CHARS 256 40 | #define XPRESS_NUM_SYMBOLS 512 41 | #define XPRESS_MAX_CODEWORD_LEN 15 42 | 43 | #define XPRESS_MIN_MATCH_LEN 3 44 | 45 | #define DECODE_TABLE_ALIGNMENT 16 46 | 47 | struct xpress_decompressor { 48 | union { 49 | uint16_t decode_table[2566] __attribute__((aligned(DECODE_TABLE_ALIGNMENT))); 50 | uint8_t lens[XPRESS_NUM_SYMBOLS]; 51 | }; 52 | uint16_t working_space[2 * (XPRESS_MAX_CODEWORD_LEN + 1) + XPRESS_NUM_SYMBOLS]; 53 | } __attribute__((aligned(DECODE_TABLE_ALIGNMENT))); 54 | 55 | extern struct xpress_decompressor *xpress_allocate_decompressor(void); 56 | 57 | extern int xpress_decompress(struct xpress_decompressor *decompressor, 58 | const void *compressed_data, size_t compressed_size, 59 | void *uncompressed_data, size_t uncompressed_size); 60 | 61 | extern void xpress_free_decompressor(struct xpress_decompressor *decompressor); 62 | 63 | /* LZX decompression */ 64 | 65 | struct lzx_decompressor; 66 | 67 | extern bool 68 | lzx_init_decompressor(size_t max_block_size, struct lzx_decompressor *d); 69 | 70 | extern int lzx_decompress(struct lzx_decompressor *decompressor, 71 | const void *compressed_data, size_t compressed_size, 72 | void *uncompressed_data, size_t uncompressed_size); 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | -------------------------------------------------------------------------------- /src/ebiggers/xpress_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * xpress_decompress.c 3 | * 4 | * A decompressor for the XPRESS compression format (Huffman variant). 5 | */ 6 | 7 | /* 8 | * 9 | * Copyright (C) 2012-2016 Eric Biggers 10 | * 11 | * This program is free software: you can redistribute it and/or modify it under 12 | * the terms of the GNU General Public License as published by the Free Software 13 | * Foundation, either version 2 of the License, or (at your option) any later 14 | * version. 15 | * 16 | * This program is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 19 | * details. 20 | * 21 | * You should have received a copy of the GNU General Public License along with 22 | * this program. If not, see . 23 | */ 24 | 25 | 26 | /* 27 | * The XPRESS compression format is an LZ77 and Huffman-code based algorithm. 28 | * That means it is fairly similar to LZX compression, but XPRESS is simpler, so 29 | * it is a little faster to compress and decompress. 30 | * 31 | * The XPRESS compression format is mostly documented in a file called "[MS-XCA] 32 | * Xpress Compression Algorithm". In the MSDN library, it can currently be 33 | * found under Open Specifications => Protocols => Windows Protocols => Windows 34 | * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in 35 | * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm" 36 | * (there apparently are some other versions of XPRESS as well). 37 | * 38 | * If you are already familiar with the LZ77 algorithm and Huffman coding, the 39 | * XPRESS format is fairly simple. The compressed data begins with 256 bytes 40 | * that contain 512 4-bit integers that are the lengths of the symbols in the 41 | * Huffman code used for match/literal headers. In contrast with more 42 | * complicated formats such as DEFLATE and LZX, this is the only Huffman code 43 | * that is used for the entirety of the XPRESS compressed data, and the codeword 44 | * lengths are not encoded with a pretree. 45 | * 46 | * The rest of the compressed data is Huffman-encoded symbols. Values 0 through 47 | * 255 represent the corresponding literal bytes. Values 256 through 511 48 | * represent matches and may require extra bits or bytes to be read to get the 49 | * match offset and match length. 50 | * 51 | * The trickiest part is probably the way in which literal bytes for match 52 | * lengths are interleaved in the bitstream. 53 | * 54 | * Also, a caveat--- according to Microsoft's documentation for XPRESS, 55 | * 56 | * "Some implementation of the decompression algorithm expect an extra 57 | * symbol to mark the end of the data. Specifically, some implementations 58 | * fail during decompression if the Huffman symbol 256 is not found after 59 | * the actual data." 60 | * 61 | * This is the case with Microsoft's implementation in WIMGAPI, for example. So 62 | * although our implementation doesn't currently check for this extra symbol, 63 | * compressors would be wise to add it. 64 | */ 65 | 66 | #ifdef HAVE_CONFIG_H 67 | # include "config.h" 68 | #endif 69 | 70 | #include "decompress_common.h" 71 | #include "system_compression.h" 72 | 73 | /* This value is chosen for fast decompression. */ 74 | #define XPRESS_TABLEBITS 11 75 | 76 | int 77 | xpress_decompress(struct xpress_decompressor * d, 78 | const void *compressed_data, size_t compressed_size, 79 | void *uncompressed_data, size_t uncompressed_size) 80 | { 81 | const uint8_t * const in_begin = compressed_data; 82 | uint8_t * const out_begin = uncompressed_data; 83 | uint8_t *out_next = out_begin; 84 | uint8_t * const out_end = out_begin + uncompressed_size; 85 | struct input_bitstream is; 86 | 87 | /* Read the Huffman codeword lengths. */ 88 | if (compressed_size < XPRESS_NUM_SYMBOLS / 2) 89 | return -1; 90 | for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { 91 | d->lens[2 * i + 0] = in_begin[i] & 0xf; 92 | d->lens[2 * i + 1] = in_begin[i] >> 4; 93 | } 94 | 95 | /* Build a decoding table for the Huffman code. */ 96 | if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, 97 | XPRESS_TABLEBITS, d->lens, 98 | XPRESS_MAX_CODEWORD_LEN, 99 | d->working_space)) 100 | return -1; 101 | 102 | /* Decode the matches and literals. */ 103 | 104 | init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, 105 | compressed_size - XPRESS_NUM_SYMBOLS / 2); 106 | 107 | while (out_next != out_end) { 108 | unsigned sym; 109 | unsigned log2_offset; 110 | uint32_t length; 111 | uint32_t offset; 112 | 113 | sym = read_huffsym(&is, d->decode_table, 114 | XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); 115 | if (sym < XPRESS_NUM_CHARS) { 116 | /* Literal */ 117 | *out_next++ = sym; 118 | } else { 119 | /* Match */ 120 | length = sym & 0xf; 121 | log2_offset = (sym >> 4) & 0xf; 122 | 123 | bitstream_ensure_bits(&is, 16); 124 | 125 | offset = ((uint32_t)1 << log2_offset) | 126 | bitstream_pop_bits(&is, log2_offset); 127 | 128 | if (length == 0xf) { 129 | length += bitstream_read_byte(&is); 130 | if (length == 0xf + 0xff) 131 | length = bitstream_read_u16(&is); 132 | } 133 | length += XPRESS_MIN_MATCH_LEN; 134 | 135 | if (unlikely(lz_copy(length, offset, 136 | out_begin, out_next, out_end, 137 | XPRESS_MIN_MATCH_LEN))) 138 | return -1; 139 | 140 | out_next += length; 141 | } 142 | } 143 | return 0; 144 | } 145 | -------------------------------------------------------------------------------- /src/misc.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2023 2 | * 3 | * This file is part of ntfs-efi. 4 | * 5 | * ntfs-efi is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * ntfs-efi is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with ntfs-efi. If not, see . */ 17 | 18 | #include "misc.h" 19 | #include 20 | #include 21 | 22 | extern "C" 23 | void* memset(void* s, int c, size_t n) { 24 | void* orig_s = s; 25 | 26 | // FIXME - faster if we make sure we're aligned (also in memcpy)? 27 | 28 | #if __INTPTR_WIDTH__ == 64 29 | uint64_t v; 30 | 31 | v = 0; 32 | 33 | for (unsigned int i = 0; i < sizeof(uint64_t); i++) { 34 | v <<= 8; 35 | v |= c & 0xff; 36 | } 37 | 38 | while (n >= sizeof(uint64_t)) { 39 | *(uint64_t*)s = v; 40 | 41 | s = (uint8_t*)s + sizeof(uint64_t); 42 | n -= sizeof(uint64_t); 43 | } 44 | #else 45 | uint32_t v; 46 | 47 | v = 0; 48 | 49 | for (unsigned int i = 0; i < sizeof(uint32_t); i++) { 50 | v <<= 8; 51 | v |= c & 0xff; 52 | } 53 | 54 | while (n >= sizeof(uint32_t)) { 55 | *(uint32_t*)s = v; 56 | 57 | s = (uint8_t*)s + sizeof(uint32_t); 58 | n -= sizeof(uint32_t); 59 | } 60 | #endif 61 | 62 | while (n > 0) { 63 | *(uint8_t*)s = c; 64 | 65 | s = (uint8_t*)s + 1; 66 | n--; 67 | } 68 | 69 | return orig_s; 70 | } 71 | 72 | extern "C" 73 | int memcmp(const void* s1, const void* s2, size_t n) { 74 | #if __INTPTR_WIDTH__ == 64 75 | while (n > sizeof(uint64_t)) { 76 | uint64_t c1 = *(uint64_t*)s1; 77 | uint64_t c2 = *(uint64_t*)s2; 78 | 79 | if (c1 != c2) 80 | return c1 > c2 ? 1 : -1; 81 | 82 | s1 = (uint64_t*)s1 + 1; 83 | s2 = (uint64_t*)s2 + 1; 84 | n -= sizeof(uint64_t); 85 | } 86 | #endif 87 | 88 | while (n > sizeof(uint32_t)) { 89 | uint32_t c1 = *(uint32_t*)s1; 90 | uint32_t c2 = *(uint32_t*)s2; 91 | 92 | if (c1 != c2) 93 | return c1 > c2 ? 1 : -1; 94 | 95 | s1 = (uint32_t*)s1 + 1; 96 | s2 = (uint32_t*)s2 + 1; 97 | n -= sizeof(uint32_t); 98 | } 99 | 100 | while (n > 0) { 101 | uint8_t c1 = *(uint8_t*)s1; 102 | uint8_t c2 = *(uint8_t*)s2; 103 | 104 | if (c1 != c2) 105 | return c1 > c2 ? 1 : -1; 106 | 107 | s1 = (uint8_t*)s1 + 1; 108 | s2 = (uint8_t*)s2 + 1; 109 | n--; 110 | } 111 | 112 | return 0; 113 | } 114 | 115 | extern "C" 116 | void* memcpy(void* dest, const void* src, size_t n) { 117 | void* orig_dest = dest; 118 | 119 | #if __INTPTR_WIDTH__ == 64 120 | while (n >= sizeof(uint64_t)) { 121 | *(uint64_t*)dest = *(uint64_t*)src; 122 | 123 | dest = (uint8_t*)dest + sizeof(uint64_t); 124 | src = (uint8_t*)src + sizeof(uint64_t); 125 | 126 | n -= sizeof(uint64_t); 127 | } 128 | #endif 129 | 130 | while (n >= sizeof(uint32_t)) { 131 | *(uint32_t*)dest = *(uint32_t*)src; 132 | 133 | dest = (uint8_t*)dest + sizeof(uint32_t); 134 | src = (uint8_t*)src + sizeof(uint32_t); 135 | 136 | n -= sizeof(uint32_t); 137 | } 138 | 139 | while (n >= sizeof(uint16_t)) { 140 | *(uint16_t*)dest = *(uint16_t*)src; 141 | 142 | dest = (uint8_t*)dest + sizeof(uint16_t); 143 | src = (uint8_t*)src + sizeof(uint16_t); 144 | 145 | n -= sizeof(uint16_t); 146 | } 147 | 148 | while (n >= sizeof(uint8_t)) { 149 | *(uint8_t*)dest = *(uint8_t*)src; 150 | 151 | dest = (uint8_t*)dest + sizeof(uint8_t); 152 | src = (uint8_t*)src + sizeof(uint8_t); 153 | 154 | n -= sizeof(uint8_t); 155 | } 156 | 157 | return orig_dest; 158 | } 159 | 160 | const char* error_string(EFI_STATUS Status) { 161 | switch (Status) { 162 | case EFI_SUCCESS: 163 | return "EFI_SUCCESS"; 164 | 165 | case EFI_LOAD_ERROR: 166 | return "EFI_LOAD_ERROR"; 167 | 168 | case EFI_INVALID_PARAMETER: 169 | return "EFI_INVALID_PARAMETER"; 170 | 171 | case EFI_UNSUPPORTED: 172 | return "EFI_UNSUPPORTED"; 173 | 174 | case EFI_BAD_BUFFER_SIZE: 175 | return "EFI_BAD_BUFFER_SIZE"; 176 | 177 | case EFI_BUFFER_TOO_SMALL: 178 | return "EFI_BUFFER_TOO_SMALL"; 179 | 180 | case EFI_NOT_READY: 181 | return "EFI_NOT_READY"; 182 | 183 | case EFI_DEVICE_ERROR: 184 | return "EFI_DEVICE_ERROR"; 185 | 186 | case EFI_WRITE_PROTECTED: 187 | return "EFI_WRITE_PROTECTED"; 188 | 189 | case EFI_OUT_OF_RESOURCES: 190 | return "EFI_OUT_OF_RESOURCES"; 191 | 192 | case EFI_VOLUME_CORRUPTED: 193 | return "EFI_VOLUME_CORRUPTED"; 194 | 195 | case EFI_VOLUME_FULL: 196 | return "EFI_VOLUME_FULL"; 197 | 198 | case EFI_NO_MEDIA: 199 | return "EFI_NO_MEDIA"; 200 | 201 | case EFI_MEDIA_CHANGED: 202 | return "EFI_MEDIA_CHANGED"; 203 | 204 | case EFI_NOT_FOUND: 205 | return "EFI_NOT_FOUND"; 206 | 207 | case EFI_ACCESS_DENIED: 208 | return "EFI_ACCESS_DENIED"; 209 | 210 | case EFI_NO_RESPONSE: 211 | return "EFI_NO_RESPONSE"; 212 | 213 | case EFI_NO_MAPPING: 214 | return "EFI_NO_MAPPING"; 215 | 216 | case EFI_TIMEOUT: 217 | return "EFI_TIMEOUT"; 218 | 219 | case EFI_NOT_STARTED: 220 | return "EFI_NOT_STARTED"; 221 | 222 | case EFI_ALREADY_STARTED: 223 | return "EFI_ALREADY_STARTED"; 224 | 225 | case EFI_ABORTED: 226 | return "EFI_ABORTED"; 227 | 228 | case EFI_ICMP_ERROR: 229 | return "EFI_ICMP_ERROR"; 230 | 231 | case EFI_TFTP_ERROR: 232 | return "EFI_TFTP_ERROR"; 233 | 234 | case EFI_PROTOCOL_ERROR: 235 | return "EFI_PROTOCOL_ERROR"; 236 | 237 | case EFI_INCOMPATIBLE_VERSION: 238 | return "EFI_INCOMPATIBLE_VERSION"; 239 | 240 | case EFI_SECURITY_VIOLATION: 241 | return "EFI_SECURITY_VIOLATION"; 242 | 243 | case EFI_CRC_ERROR: 244 | return "EFI_CRC_ERROR"; 245 | 246 | case EFI_END_OF_MEDIA: 247 | return "EFI_END_OF_MEDIA"; 248 | 249 | case EFI_END_OF_FILE: 250 | return "EFI_END_OF_FILE"; 251 | 252 | case EFI_INVALID_LANGUAGE: 253 | return "EFI_INVALID_LANGUAGE"; 254 | 255 | case EFI_COMPROMISED_DATA: 256 | return "EFI_COMPROMISED_DATA"; 257 | 258 | default: 259 | return "(unknown error)"; 260 | } 261 | } 262 | 263 | char* stpcpy(char* dest, const char* src) { 264 | while (*src != 0) { 265 | *dest = *src; 266 | dest++; 267 | src++; 268 | } 269 | 270 | *dest = 0; 271 | 272 | return dest; 273 | } 274 | -------------------------------------------------------------------------------- /src/misc.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2023 2 | * 3 | * This file is part of ntfs-efi. 4 | * 5 | * ntfs-efi is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * ntfs-efi is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with ntfs-efi. If not, see . */ 17 | 18 | #pragma once 19 | 20 | #include 21 | 22 | const char* error_string(EFI_STATUS Status); 23 | char* stpcpy(char* dest, const char* src); 24 | -------------------------------------------------------------------------------- /src/ntfs.cpp: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2023 2 | * 3 | * This file is part of ntfs-efi. 4 | * 5 | * ntfs-efi is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * ntfs-efi is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with ntfs-efi. If not, see . */ 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "ntfs.h" 26 | #include "misc.h" 27 | #include "quibbleproto.h" 28 | #include "ebiggers/system_compression.h" 29 | 30 | #define UNUSED(x) (void)(x) 31 | #define sector_align(n, a) ((n)&((a)-1)?(((n)+(a))&~((a)-1)):(n)) 32 | 33 | using namespace std; 34 | 35 | struct mapping { 36 | LIST_ENTRY list_entry; 37 | uint64_t lcn; 38 | uint64_t vcn; 39 | uint64_t length; 40 | }; 41 | 42 | struct volume { 43 | ~volume(); 44 | 45 | EFI_SIMPLE_FILE_SYSTEM_PROTOCOL proto; 46 | EFI_QUIBBLE_PROTOCOL quibble_proto; 47 | NTFS_BOOT_SECTOR* boot_sector; 48 | EFI_HANDLE controller; 49 | EFI_BLOCK_IO_PROTOCOL* block; 50 | EFI_DISK_IO_PROTOCOL* disk_io; 51 | uint64_t file_record_size; 52 | LIST_ENTRY mft_mappings; 53 | char16_t upcase[0x10000]; 54 | }; 55 | 56 | struct inode { 57 | inode(volume& vol) : vol(vol) { } 58 | ~inode(); 59 | 60 | EFI_FILE_PROTOCOL proto; 61 | uint64_t ino; 62 | volume& vol; 63 | bool inode_loaded; 64 | STANDARD_INFORMATION standard_info; 65 | uint64_t size; 66 | uint64_t phys_size; 67 | uint64_t vdl; 68 | uint64_t position; 69 | LIST_ENTRY index_mappings; 70 | index_root* index_root; 71 | LIST_ENTRY levels; 72 | bool is_dir; 73 | size_t name_len; 74 | char16_t* name; 75 | bool data_loaded; 76 | LIST_ENTRY data_mappings; 77 | uint8_t* data; 78 | }; 79 | 80 | struct btree_level { 81 | LIST_ENTRY list_entry; 82 | const index_entry* ent; 83 | uint8_t data[]; 84 | }; 85 | 86 | static EFI_SYSTEM_TABLE* systable; 87 | static EFI_BOOT_SERVICES* bs; 88 | static EFI_DRIVER_BINDING_PROTOCOL drvbind; 89 | static EFI_QUIBBLE_INFO_PROTOCOL* info_proto = nullptr; 90 | 91 | static void populate_file_handle(EFI_FILE_PROTOCOL* h); 92 | static EFI_STATUS load_inode(inode& ino); 93 | static EFI_STATUS read_from_mappings(const volume& vol, const LIST_ENTRY* mappings, uint64_t offset, 94 | uint8_t* buf, uint64_t size); 95 | static EFI_STATUS process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length, 96 | unsigned int sector_size); 97 | static EFI_STATUS read_mappings(const volume& vol, const ATTRIBUTE_RECORD_HEADER& att, 98 | LIST_ENTRY* mappings); 99 | static EFI_STATUS loop_through_atts(const volume& vol, uint64_t inode, const FILE_RECORD_SEGMENT_HEADER* file_record, 100 | invocable auto func); 101 | 102 | void do_print(const char* s) { 103 | if (info_proto) 104 | info_proto->Print(s); 105 | } 106 | 107 | void do_print_error(const char* func, EFI_STATUS Status) { 108 | char s[255], *p; 109 | 110 | p = stpcpy(s, func); 111 | p = stpcpy(p, " returned "); 112 | p = stpcpy(p, error_string(Status)); 113 | p = stpcpy(p, "\n"); 114 | 115 | do_print(s); 116 | } 117 | 118 | static EFI_STATUS drv_supported(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle, 119 | EFI_DEVICE_PATH_PROTOCOL* RemainingDevicePath) { 120 | EFI_STATUS Status; 121 | EFI_DISK_IO_PROTOCOL* disk_io; 122 | EFI_GUID guid_disk = EFI_DISK_IO_PROTOCOL_GUID; 123 | EFI_GUID guid_block = EFI_BLOCK_IO_PROTOCOL_GUID; 124 | 125 | UNUSED(RemainingDevicePath); 126 | 127 | Status = bs->OpenProtocol(ControllerHandle, &guid_disk, (void**)&disk_io, This->DriverBindingHandle, 128 | ControllerHandle, EFI_OPEN_PROTOCOL_BY_DRIVER); 129 | 130 | if (EFI_ERROR(Status)) 131 | return Status; 132 | 133 | bs->CloseProtocol(ControllerHandle, &guid_disk, This->DriverBindingHandle, ControllerHandle); 134 | 135 | return bs->OpenProtocol(ControllerHandle, &guid_block, NULL, This->DriverBindingHandle, 136 | ControllerHandle, EFI_OPEN_PROTOCOL_TEST_PROTOCOL); 137 | } 138 | 139 | static int cmp_filenames(const char16_t* upcase, u16string_view fn1, u16string_view fn2) { 140 | // FIXME - what about directories with case-sensitivity flag set? 141 | 142 | while (!fn1.empty() || !fn2.empty()) { 143 | if (fn1.empty()) 144 | return -1; 145 | 146 | if (fn2.empty()) 147 | return 1; 148 | 149 | char16_t c1 = upcase[fn1[0]]; 150 | char16_t c2 = upcase[fn2[0]]; 151 | 152 | if (c1 < c2) 153 | return -1; 154 | else if (c1 > c2) 155 | return 1; 156 | 157 | fn1 = u16string_view(fn1.data() + 1, fn1.size() - 1); 158 | fn2 = u16string_view(fn2.data() + 1, fn2.size() - 1); 159 | } 160 | 161 | return 0; 162 | } 163 | 164 | static EFI_STATUS find_file_in_dir(const volume& vol, uint64_t dir, u16string_view name, uint64_t* inode) { 165 | EFI_STATUS Status, Status2; 166 | FILE_RECORD_SEGMENT_HEADER* file; 167 | index_root* ir = nullptr; 168 | LIST_ENTRY index_mappings; 169 | const index_entry* ent; 170 | uint8_t* scratch = nullptr; 171 | 172 | InitializeListHead(&index_mappings); 173 | 174 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file); 175 | if (EFI_ERROR(Status)) { 176 | do_print_error("AllocatePool", Status); 177 | return Status; 178 | } 179 | 180 | Status = read_from_mappings(vol, &vol.mft_mappings, dir * vol.file_record_size, 181 | (uint8_t*)file, vol.file_record_size); 182 | if (EFI_ERROR(Status)) { 183 | bs->FreePool(file); 184 | do_print_error("read_from_mappings", Status); 185 | return Status; 186 | } 187 | 188 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { 189 | do_print("Signature was not FILE\n"); 190 | bs->FreePool(file); 191 | return EFI_INVALID_PARAMETER; 192 | } 193 | 194 | Status = process_fixups(&file->MultiSectorHeader, vol.file_record_size, 195 | vol.boot_sector->BytesPerSector); 196 | 197 | if (EFI_ERROR(Status)) { 198 | do_print_error("process_fixups", Status); 199 | bs->FreePool(file); 200 | return Status; 201 | } 202 | 203 | Status2 = loop_through_atts(vol, dir, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool { 204 | switch (att.TypeCode) { 205 | case ntfs_attribute::INDEX_ALLOCATION: 206 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) { 207 | Status = read_mappings(vol, att, &index_mappings); 208 | if (EFI_ERROR(Status)) { 209 | do_print_error("read_mappings", Status); 210 | return false; 211 | } 212 | } 213 | break; 214 | 215 | case ntfs_attribute::INDEX_ROOT: 216 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !res_data.empty() && !ir) { 217 | Status = bs->AllocatePool(EfiBootServicesData, res_data.size(), (void**)&ir); 218 | if (EFI_ERROR(Status)) { 219 | do_print_error("AllocatePool", Status); 220 | return false; 221 | } 222 | 223 | memcpy(ir, res_data.data(), res_data.size()); 224 | } 225 | break; 226 | 227 | default: 228 | break; 229 | } 230 | 231 | return true; 232 | }); 233 | 234 | if (EFI_ERROR(Status2)) { 235 | do_print_error("loop_through_atts", Status2); 236 | Status = Status2; 237 | } 238 | 239 | if (EFI_ERROR(Status)) 240 | goto end; 241 | 242 | if (!ir) { 243 | Status = EFI_NOT_FOUND; 244 | goto end; 245 | } 246 | 247 | ent = reinterpret_cast((uint8_t*)&ir->node_header + ir->node_header.first_entry); 248 | 249 | while (true) { 250 | string_view data((const char*)ent + sizeof(index_entry), ent->stream_length); 251 | 252 | if (data.size() >= offsetof(FILE_NAME, FileName)) { 253 | const auto& fn = *(FILE_NAME*)data.data(); 254 | u16string_view ent_name(fn.FileName, fn.FileNameLength); 255 | 256 | auto cmp = cmp_filenames(vol.upcase, name, ent_name); 257 | 258 | if (cmp == 0) { // found 259 | *inode = ent->file_reference.SegmentNumber; 260 | Status = EFI_SUCCESS; 261 | goto end; 262 | } else if (cmp == 1) { // skip to next 263 | ent = reinterpret_cast((uint8_t*)ent + ent->entry_length); 264 | continue; 265 | } 266 | 267 | if (cmp == -1 && !(ent->flags & INDEX_ENTRY_SUBNODE)) { 268 | Status = EFI_NOT_FOUND; 269 | goto end; 270 | } 271 | } 272 | 273 | if (ent->flags & INDEX_ENTRY_SUBNODE) { // if subnode, descend 274 | uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber; 275 | 276 | if (ir->bytes_per_index_record < vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster) 277 | vcn *= vol.boot_sector->BytesPerSector; 278 | else 279 | vcn *= (uint64_t)vol.boot_sector->BytesPerSector * (uint64_t)vol.boot_sector->SectorsPerCluster; 280 | 281 | if (!scratch) { 282 | Status = bs->AllocatePool(EfiBootServicesData, ir->bytes_per_index_record, 283 | (void**)&scratch); 284 | if (EFI_ERROR(Status)) { 285 | do_print_error("AllocatePool", Status); 286 | goto end; 287 | } 288 | } 289 | 290 | Status = read_from_mappings(vol, &index_mappings, vcn, scratch, ir->bytes_per_index_record); 291 | if (EFI_ERROR(Status)) { 292 | do_print_error("read_from_mappings", Status); 293 | goto end; 294 | } 295 | 296 | auto rec = reinterpret_cast(scratch); 297 | 298 | if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) { 299 | do_print("Signature was not INDX\n"); 300 | Status = EFI_INVALID_PARAMETER; 301 | goto end; 302 | } 303 | 304 | Status = process_fixups(&rec->MultiSectorHeader, ir->bytes_per_index_record, 305 | vol.boot_sector->BytesPerSector); 306 | if (EFI_ERROR(Status)) { 307 | do_print_error("process_fixups", Status); 308 | goto end; 309 | } 310 | 311 | ent = reinterpret_cast((uint8_t*)&rec->header + rec->header.first_entry); 312 | 313 | continue; 314 | } 315 | 316 | if (ent->flags & INDEX_ENTRY_LAST) { 317 | Status = EFI_NOT_FOUND; 318 | goto end; 319 | } 320 | 321 | ent = reinterpret_cast((uint8_t*)ent + ent->entry_length); 322 | } 323 | 324 | end: 325 | if (ir) 326 | bs->FreePool(ir); 327 | 328 | if (scratch) 329 | bs->FreePool(scratch); 330 | 331 | bs->FreePool(file); 332 | 333 | return Status; 334 | } 335 | 336 | static size_t count_path_parts(u16string_view v) { 337 | size_t num_parts = 0; 338 | 339 | while (!v.empty()) { 340 | num_parts++; 341 | 342 | if (auto bs = v.find(u'\\'); bs != u16string_view::npos) 343 | v = u16string_view(v.data() + bs + 1, v.size() - bs - 1); 344 | else 345 | break; 346 | } 347 | 348 | return num_parts; 349 | } 350 | 351 | static void extract_parts(u16string_view v, u16string_view*& p) { 352 | while (!v.empty()) { 353 | if (auto bs = v.find(u'\\'); bs != u16string_view::npos) { 354 | *p = u16string_view(v.data(), bs); 355 | p++; 356 | v = u16string_view(v.data() + bs + 1, v.size() - bs - 1); 357 | } else { 358 | *p = v; 359 | p++; 360 | break; 361 | } 362 | } 363 | } 364 | 365 | static EFI_STATUS normalize_path(u16string_view fn, u16string_view parent, char16_t*& name, size_t& name_len) { 366 | EFI_STATUS Status; 367 | bool from_root = false; 368 | size_t num_parts = 0; 369 | u16string_view* parts; 370 | bool first; 371 | 372 | if (fn.front() == '\\') { 373 | from_root = true; 374 | fn = u16string_view(fn.data() + 1, fn.size() - 1); 375 | } 376 | 377 | if (parent.empty()) 378 | from_root = true; 379 | 380 | if (!from_root) 381 | num_parts = count_path_parts(parent); 382 | 383 | num_parts += count_path_parts(fn); 384 | 385 | if (num_parts == 0) { 386 | name = nullptr; 387 | name_len = 0; 388 | return EFI_SUCCESS; 389 | } 390 | 391 | Status = bs->AllocatePool(EfiBootServicesData, num_parts * sizeof(u16string_view), (void**)&parts); 392 | if (EFI_ERROR(Status)) { 393 | do_print_error("AllocatePool", Status); 394 | return Status; 395 | } 396 | 397 | { 398 | u16string_view* p = parts; 399 | 400 | if (!from_root) 401 | extract_parts(parent, p); 402 | 403 | extract_parts(fn, p); 404 | } 405 | 406 | for (size_t i = 0; i < num_parts; i++) { 407 | if (parts[i] == u".") 408 | parts[i] = u""; 409 | else if (parts[i] == u"..") { 410 | parts[i] = u""; 411 | 412 | if (i == 0) { 413 | bs->FreePool(parts); 414 | return EFI_INVALID_PARAMETER; 415 | } 416 | 417 | auto j = i - 1; 418 | while (true) { 419 | if (!parts[j].empty()) { 420 | parts[j] = u""; 421 | break; 422 | } 423 | 424 | if (j == 0) { 425 | bs->FreePool(parts); 426 | return EFI_INVALID_PARAMETER; 427 | } 428 | 429 | j--; 430 | } 431 | } 432 | } 433 | 434 | name_len = 0; 435 | first = true; 436 | for (size_t i = 0; i < num_parts; i++) { 437 | if (parts[i].empty()) 438 | continue; 439 | 440 | if (!first) 441 | name_len++; 442 | 443 | name_len += parts[i].size(); 444 | first = false; 445 | } 446 | 447 | if (name_len == 0) { 448 | bs->FreePool(parts); 449 | name = nullptr; 450 | return EFI_SUCCESS; 451 | } 452 | 453 | Status = bs->AllocatePool(EfiBootServicesData, name_len * sizeof(char16_t), (void**)&name); 454 | if (EFI_ERROR(Status)) { 455 | do_print_error("AllocatePool", Status); 456 | bs->FreePool(parts); 457 | return Status; 458 | } 459 | 460 | { 461 | char16_t* n = name; 462 | 463 | first = true; 464 | for (size_t i = 0; i < num_parts; i++) { 465 | if (parts[i].empty()) 466 | continue; 467 | 468 | if (!first) { 469 | *n = u'\\'; 470 | n++; 471 | } 472 | 473 | memcpy(n, parts[i].data(), parts[i].size() * sizeof(char16_t)); 474 | n += parts[i].size(); 475 | first = false; 476 | } 477 | } 478 | 479 | bs->FreePool(parts); 480 | 481 | return EFI_SUCCESS; 482 | } 483 | 484 | static EFI_STATUS EFIAPI file_open(struct _EFI_FILE_HANDLE* File, struct _EFI_FILE_HANDLE** NewHandle, CHAR16* FileName, 485 | UINT64 OpenMode, UINT64 Attributes) { 486 | EFI_STATUS Status; 487 | inode* file = _CR(File, inode, proto); 488 | uint64_t inode_num; 489 | inode* ino; 490 | char16_t* name; 491 | size_t name_len; 492 | 493 | UNUSED(Attributes); 494 | 495 | if (OpenMode & EFI_FILE_MODE_CREATE) 496 | return EFI_UNSUPPORTED; 497 | 498 | if (FileName[0] == L'\\' && FileName[1] == 0) { 499 | inode_num = NTFS_ROOT_DIR_INODE; 500 | name = nullptr; 501 | name_len = 0; 502 | } else if (FileName[0] == L'.' && FileName[1] == 0) { 503 | inode_num = file->ino; 504 | 505 | if (file->name) { 506 | Status = bs->AllocatePool(EfiBootServicesData, file->name_len * sizeof(char16_t), (void**)&name); 507 | if (EFI_ERROR(Status)) { 508 | do_print_error("AllocatePool", Status); 509 | return Status; 510 | } 511 | 512 | memcpy(name, file->name, file->name_len * sizeof(char16_t)); 513 | name_len = file->name_len; 514 | } else { 515 | name = nullptr; 516 | name_len = 0; 517 | } 518 | } else { 519 | u16string_view fn((char16_t*)FileName); 520 | 521 | if (fn.empty()) 522 | return EFI_NOT_FOUND; 523 | 524 | if (file->ino == NTFS_ROOT_DIR_INODE && fn == u"..") 525 | return EFI_INVALID_PARAMETER; 526 | 527 | Status = normalize_path(fn, u16string_view(file->name, file->name_len), name, name_len); 528 | if (EFI_ERROR(Status)) { 529 | do_print_error("normalize_path", Status); 530 | return Status; 531 | } 532 | 533 | fn = u16string_view(name, name_len); 534 | inode_num = NTFS_ROOT_DIR_INODE; 535 | 536 | if (!fn.empty()) { 537 | while (true) { 538 | u16string_view part; 539 | 540 | auto backslash = fn.find(u'\\'); 541 | 542 | if (backslash != u16string_view::npos) 543 | part = u16string_view(fn.data(), backslash); 544 | else 545 | part = fn; 546 | 547 | Status = find_file_in_dir(file->vol, inode_num, part, &inode_num); 548 | 549 | if (Status == EFI_NOT_FOUND) { 550 | if (name) 551 | bs->FreePool(name); 552 | 553 | return Status; 554 | } 555 | 556 | if (EFI_ERROR(Status)) { 557 | if (name) 558 | bs->FreePool(name); 559 | 560 | do_print_error("find_file_in_dir", Status); 561 | return Status; 562 | } 563 | 564 | if (backslash == u16string_view::npos) 565 | break; 566 | 567 | fn = u16string_view(fn.data() + backslash + 1, fn.size() - backslash - 1); 568 | } 569 | } 570 | } 571 | 572 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(inode), (void**)&ino); 573 | if (EFI_ERROR(Status)) { 574 | if (name) 575 | bs->FreePool(name); 576 | 577 | do_print_error("AllocatePool", Status); 578 | return Status; 579 | } 580 | 581 | memset(ino, 0, sizeof(inode)); 582 | 583 | new (ino) inode(file->vol); 584 | 585 | populate_file_handle(&ino->proto); 586 | 587 | ino->ino = inode_num; 588 | ino->name = name; 589 | ino->name_len = name_len; 590 | 591 | *NewHandle = &ino->proto; 592 | 593 | return EFI_SUCCESS; 594 | } 595 | 596 | inode::~inode() { 597 | if (name) 598 | bs->FreePool(name); 599 | 600 | if (data) 601 | bs->FreePool(data); 602 | 603 | if (!inode_loaded) 604 | return; 605 | 606 | if (index_root) 607 | bs->FreePool(index_root); 608 | 609 | while (!IsListEmpty(&index_mappings)) { 610 | mapping* m = _CR(index_mappings.Flink, mapping, list_entry); 611 | RemoveEntryList(&m->list_entry); 612 | bs->FreePool(m); 613 | } 614 | 615 | while (!IsListEmpty(&levels)) { 616 | auto l = _CR(levels.Flink, btree_level, list_entry); 617 | RemoveEntryList(&l->list_entry); 618 | bs->FreePool(l); 619 | } 620 | 621 | while (!IsListEmpty(&data_mappings)) { 622 | mapping* m = _CR(data_mappings.Flink, mapping, list_entry); 623 | RemoveEntryList(&m->list_entry); 624 | bs->FreePool(m); 625 | } 626 | } 627 | 628 | static EFI_STATUS EFIAPI file_close(struct _EFI_FILE_HANDLE* File) { 629 | inode* ino = _CR(File, inode, proto); 630 | 631 | ino->inode::~inode(); 632 | bs->FreePool(ino); 633 | 634 | return EFI_SUCCESS; 635 | } 636 | 637 | static EFI_STATUS EFIAPI file_delete(struct _EFI_FILE_HANDLE* File) { 638 | UNUSED(File); 639 | 640 | return EFI_UNSUPPORTED; 641 | } 642 | 643 | static EFI_STATUS read_from_mappings(const volume& vol, const LIST_ENTRY* mappings, uint64_t offset, uint8_t* buf, 644 | uint64_t size) { 645 | EFI_STATUS Status; 646 | uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster; 647 | uint64_t vcn = offset / cluster_size; 648 | uint64_t last_vcn = sector_align(offset + size, cluster_size) / cluster_size; 649 | LIST_ENTRY* le; 650 | 651 | le = mappings->Flink; 652 | while (le != mappings) { 653 | mapping* m = _CR(le, mapping, list_entry); 654 | 655 | if (m->vcn < last_vcn && m->vcn + m->length > vcn) { 656 | uint64_t to_read, mapping_offset; 657 | 658 | mapping_offset = offset - (m->vcn * cluster_size); 659 | to_read = ((m->vcn + m->length) * cluster_size) - offset; 660 | 661 | if (to_read > size) 662 | to_read = size; 663 | 664 | if (m->lcn == 0) // sparse 665 | memset(buf, 0, to_read); 666 | else { 667 | Status = vol.block->ReadBlocks(vol.block, vol.block->Media->MediaId, 668 | ((m->lcn * cluster_size) + mapping_offset) / vol.block->Media->BlockSize, 669 | to_read, buf); 670 | if (EFI_ERROR(Status)) { 671 | do_print_error("ReadBlocks", Status); 672 | return Status; 673 | } 674 | } 675 | 676 | if (to_read == size) 677 | break; 678 | 679 | offset += to_read; 680 | buf += to_read; 681 | size -= to_read; 682 | vcn = offset / cluster_size; 683 | } 684 | 685 | le = le->Flink; 686 | } 687 | 688 | return EFI_SUCCESS; 689 | } 690 | 691 | static EFI_STATUS process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length, unsigned int sector_size) { 692 | uint64_t sectors; 693 | uint16_t* seq; 694 | uint8_t* ptr; 695 | 696 | sectors = length / sector_size; 697 | 698 | if (header->UpdateSequenceArraySize < sectors + 1) 699 | return EFI_INVALID_PARAMETER; 700 | 701 | seq = (uint16_t*)((uint8_t*)header + header->UpdateSequenceArrayOffset); 702 | 703 | ptr = (uint8_t*)header + sector_size - sizeof(uint16_t); 704 | 705 | for (unsigned int i = 0; i < sectors; i++) { 706 | if (*(uint16_t*)ptr != seq[0]) 707 | return EFI_INVALID_PARAMETER; 708 | 709 | *(uint16_t*)ptr = seq[i + 1]; 710 | 711 | ptr += sector_size; 712 | } 713 | 714 | return EFI_SUCCESS; 715 | } 716 | 717 | static EFI_STATUS next_index_item(inode& ino, const invocable auto& func) { 718 | EFI_STATUS Status; 719 | const index_root& ir = *ino.index_root; 720 | 721 | if (IsListEmpty(&ino.levels)) 722 | return EFI_NOT_FOUND; 723 | 724 | auto l = _CR(ino.levels.Blink, btree_level, list_entry); 725 | 726 | do { 727 | if (l->ent->flags & INDEX_ENTRY_SUBNODE) { 728 | btree_level* l2; 729 | uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)l->ent + l->ent->entry_length - sizeof(uint64_t)))->SegmentNumber; 730 | 731 | if (ir.bytes_per_index_record < ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster) 732 | vcn *= ino.vol.boot_sector->BytesPerSector; 733 | else 734 | vcn *= (uint64_t)ino.vol.boot_sector->BytesPerSector * (uint64_t)ino.vol.boot_sector->SectorsPerCluster; 735 | 736 | Status = bs->AllocatePool(EfiBootServicesData, offsetof(btree_level, data) + ir.bytes_per_index_record, 737 | (void**)&l2); 738 | if (EFI_ERROR(Status)) { 739 | do_print_error("AllocatePool", Status); 740 | return Status; 741 | } 742 | 743 | Status = read_from_mappings(ino.vol, &ino.index_mappings, vcn, l2->data, ir.bytes_per_index_record); 744 | if (EFI_ERROR(Status)) { 745 | bs->FreePool(l2); 746 | do_print_error("read_from_mappings", Status); 747 | return Status; 748 | } 749 | 750 | auto rec = reinterpret_cast(l2->data); 751 | 752 | if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) { 753 | do_print("Signature was not INDX\n"); 754 | bs->FreePool(l2); 755 | return EFI_INVALID_PARAMETER; 756 | } 757 | 758 | Status = process_fixups(&rec->MultiSectorHeader, ir.bytes_per_index_record, 759 | ino.vol.boot_sector->BytesPerSector); 760 | if (EFI_ERROR(Status)) { 761 | bs->FreePool(l2); 762 | do_print_error("process_fixups", Status); 763 | return EFI_INVALID_PARAMETER; 764 | } 765 | 766 | InsertTailList(&ino.levels, &l2->list_entry); 767 | l = l2; 768 | l->ent = reinterpret_cast((uint8_t*)&rec->header + rec->header.first_entry); 769 | 770 | continue; 771 | } 772 | 773 | while (l->ent->flags & INDEX_ENTRY_LAST) { 774 | RemoveEntryList(&l->list_entry); 775 | bs->FreePool(l); 776 | 777 | if (IsListEmpty(&ino.levels)) 778 | break; 779 | 780 | l = _CR(ino.levels.Blink, btree_level, list_entry); 781 | } 782 | 783 | if (IsListEmpty(&ino.levels)) 784 | break; 785 | 786 | if (!(l->ent->flags & INDEX_ENTRY_LAST)) { 787 | if (func(string_view((const char*)l->ent + sizeof(index_entry), l->ent->stream_length))) 788 | l->ent = reinterpret_cast((uint8_t*)l->ent + l->ent->entry_length); 789 | 790 | return EFI_SUCCESS; 791 | } 792 | } while (!IsListEmpty(&ino.levels)); 793 | 794 | return EFI_SUCCESS; 795 | } 796 | 797 | static void win_time_to_efi(int64_t win, EFI_TIME* efi) { 798 | int64_t secs, time, days; 799 | 800 | secs = win / 10000000; 801 | time = secs % 86400; 802 | days = secs / 86400; 803 | 804 | unsigned int jd = 2305814 + days; // Julian date 805 | 806 | unsigned int f = jd + 1401 + (((((4 * jd) + 274277) / 146097) * 3) / 4) - 38; 807 | unsigned int e = (4 * f) + 3; 808 | unsigned int g = (e % 1461) / 4; 809 | unsigned int h = (5 * g) + 2; 810 | 811 | efi->Month = (((h / 153) + 2) % 12) + 1; 812 | efi->Year = (e / 1461) - 4716 + ((14 - efi->Month) / 12); 813 | efi->Day = ((h % 153) / 5) + 1; 814 | efi->Hour = time / 3600; 815 | efi->Minute = (time % 3600) / 60; 816 | efi->Second = time % 60; 817 | efi->Pad1 = 0; 818 | efi->Nanosecond = (win % 10000000) * 100; 819 | efi->TimeZone = 0; 820 | efi->Daylight = 0; 821 | efi->Pad2 = 0; 822 | } 823 | 824 | static uint64_t win_attributes_to_efi(uint32_t attr, bool is_dir) { 825 | uint64_t ret = 0; 826 | 827 | if (is_dir) 828 | ret |= EFI_FILE_DIRECTORY; 829 | 830 | if (attr & FILE_ATTRIBUTE_READONLY) 831 | ret |= EFI_FILE_READ_ONLY; 832 | 833 | if (attr & FILE_ATTRIBUTE_HIDDEN) 834 | ret |= EFI_FILE_HIDDEN; 835 | 836 | if (attr & FILE_ATTRIBUTE_SYSTEM) 837 | ret |= EFI_FILE_SYSTEM; 838 | 839 | if (attr & EFI_FILE_ARCHIVE) 840 | ret |= EFI_FILE_ARCHIVE; 841 | 842 | return ret; 843 | } 844 | 845 | static EFI_STATUS read_dir(inode& ino, UINTN* BufferSize, VOID* Buffer) { 846 | EFI_STATUS Status; 847 | bool overflow = false, again; 848 | 849 | if (!ino.inode_loaded) { 850 | Status = load_inode(ino); 851 | if (EFI_ERROR(Status)) { 852 | do_print_error("load_inode", Status); 853 | return Status; 854 | } 855 | } 856 | 857 | if (ino.position == 0 && IsListEmpty(&ino.levels)) { 858 | btree_level* l; 859 | 860 | Status = bs->AllocatePool(EfiBootServicesData, offsetof(btree_level, data), (void**)&l); 861 | if (EFI_ERROR(Status)) { 862 | do_print_error("AllocatePool", Status); 863 | return Status; 864 | } 865 | 866 | l->ent = reinterpret_cast((uint8_t*)&ino.index_root->node_header + ino.index_root->node_header.first_entry); 867 | InsertTailList(&ino.levels, &l->list_entry); 868 | } 869 | 870 | // FIXME - ignore special files in root 871 | 872 | do { 873 | again = false; 874 | 875 | Status = next_index_item(ino, [&](string_view data) -> bool { 876 | size_t size; 877 | 878 | const auto& fn = *reinterpret_cast(data.data()); 879 | 880 | if (fn.Namespace == file_name_type::DOS) { // ignore DOS filenames 881 | again = true; 882 | return true; 883 | } 884 | 885 | size = offsetof(EFI_FILE_INFO, FileName[0]) + ((fn.FileNameLength + 1) * sizeof(char16_t)); 886 | 887 | if (*BufferSize < size) { 888 | *BufferSize = size; 889 | overflow = true; 890 | return false; 891 | } 892 | 893 | auto& info = *(EFI_FILE_INFO*)Buffer; 894 | 895 | info.Size = size; 896 | info.FileSize = fn.EndOfFile; 897 | info.PhysicalSize = fn.AllocationSize; 898 | win_time_to_efi(fn.CreationTime, &info.CreateTime); 899 | win_time_to_efi(fn.LastAccessTime, &info.LastAccessTime); 900 | win_time_to_efi(fn.LastWriteTime, &info.ModificationTime); 901 | info.Attribute = win_attributes_to_efi(fn.FileAttributes, fn.FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT); 902 | 903 | memcpy(info.FileName, fn.FileName, fn.FileNameLength * sizeof(char16_t)); 904 | info.FileName[fn.FileNameLength] = 0; 905 | 906 | *BufferSize = size; 907 | 908 | ino.position++; 909 | 910 | return true; 911 | }); 912 | } while (again); 913 | 914 | if (overflow) 915 | return EFI_BUFFER_TOO_SMALL; 916 | 917 | if (Status == EFI_NOT_FOUND) { // last one 918 | *BufferSize = 0; 919 | return EFI_SUCCESS; 920 | } 921 | 922 | if (EFI_ERROR(Status)) { 923 | do_print_error("next_index_item", Status); 924 | return Status; 925 | } 926 | 927 | return EFI_SUCCESS; 928 | } 929 | 930 | static EFI_STATUS read_nonresident_attribute(volume& vol, const ATTRIBUTE_RECORD_HEADER& att, span data) { 931 | EFI_STATUS Status; 932 | LIST_ENTRY mappings; 933 | 934 | InitializeListHead(&mappings); 935 | 936 | Status = read_mappings(vol, att, &mappings); 937 | if (EFI_ERROR(Status)) { 938 | do_print_error("read_mappings", Status); 939 | return Status; 940 | } 941 | 942 | Status = read_from_mappings(vol, &mappings, 0, data.data(), data.size()); 943 | 944 | while (!IsListEmpty(&mappings)) { 945 | mapping* m = _CR(mappings.Flink, mapping, list_entry); 946 | RemoveEntryList(&m->list_entry); 947 | bs->FreePool(m); 948 | } 949 | 950 | if (EFI_ERROR(Status)) 951 | do_print_error("read_from_mappings", Status); 952 | 953 | return Status; 954 | } 955 | 956 | static EFI_STATUS do_xpress_decompress(inode& ino, span compdata, uint32_t chunk_size) { 957 | EFI_STATUS Status; 958 | xpress_decompressor ctx; 959 | uint64_t size = ino.size; 960 | uint64_t num_chunks = (size + chunk_size - 1) / chunk_size; 961 | auto offsets = (uint32_t*)compdata.data(); 962 | 963 | if (ino.data) { 964 | bs->FreePool(ino.data); 965 | ino.data = nullptr; 966 | } 967 | 968 | Status = bs->AllocatePool(EfiBootServicesData, ino.size, (void**)&ino.data); 969 | if (EFI_ERROR(Status)) { 970 | do_print_error("AllocatePool", Status); 971 | return Status; 972 | } 973 | 974 | auto ret = span(ino.data, ino.size); 975 | 976 | auto data = span(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)), 977 | (uint32_t)(compdata.size() - ((num_chunks - 1) * sizeof(uint32_t)))); 978 | 979 | for (uint64_t i = 0; i < num_chunks; i++) { 980 | uint64_t off = i == 0 ? 0 : offsets[i - 1]; 981 | uint32_t complen; 982 | 983 | if (i == 0) 984 | complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.size(); 985 | else if (i == num_chunks - 1) 986 | complen = (uint32_t)data.size() - offsets[i - 1]; 987 | else 988 | complen = offsets[i] - offsets[i - 1]; 989 | 990 | if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) { 991 | // stored uncompressed 992 | memcpy(ret.data() + (i * chunk_size), data.data() + off, complen); 993 | } else { 994 | auto err = xpress_decompress(&ctx, data.data() + off, complen, ret.data() + (i * chunk_size), 995 | (size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)); 996 | 997 | if (err != 0) { 998 | do_print("xpress_decompress failed\n"); 999 | bs->FreePool(ino.data); 1000 | ino.data = nullptr; 1001 | return EFI_INVALID_PARAMETER; 1002 | } 1003 | } 1004 | } 1005 | 1006 | return EFI_SUCCESS; 1007 | } 1008 | 1009 | static EFI_STATUS handle_wof(inode& ino, span rp, span wof) { 1010 | if (rp.size() < offsetof(reparse_point_header, DataBuffer)) { 1011 | do_print("truncated IO_REPARSE_TAG_WOF reparse point buffer\n"); 1012 | return EFI_INVALID_PARAMETER; 1013 | } 1014 | 1015 | const auto& rph = *(reparse_point_header*)rp.data(); 1016 | 1017 | if (rp.size() < offsetof(reparse_point_header, DataBuffer) + rph.ReparseDataLength) { 1018 | do_print("truncated IO_REPARSE_TAG_WOF reparse point buffer\n"); 1019 | return EFI_INVALID_PARAMETER; 1020 | } 1021 | 1022 | if (rph.ReparseDataLength < sizeof(wof_external_info)) { 1023 | do_print("IO_REPARSE_TAG_WOF ReparseDataLength shorter than expected\n"); 1024 | return EFI_INVALID_PARAMETER; 1025 | } 1026 | 1027 | const auto& wofei = *(wof_external_info*)rph.DataBuffer; 1028 | 1029 | if (wofei.Version != WOF_CURRENT_VERSION) { 1030 | do_print("Unsupported WOF version\n"); 1031 | return EFI_INVALID_PARAMETER; 1032 | } 1033 | 1034 | if (wofei.Provider == WOF_PROVIDER_WIM) { 1035 | do_print("Unsupported WOF provider WOF_PROVIDER_WIM\n"); 1036 | return EFI_INVALID_PARAMETER; 1037 | } else if (wofei.Provider != WOF_PROVIDER_FILE) { 1038 | do_print("Unsupported WOF provider\n"); 1039 | return EFI_INVALID_PARAMETER; 1040 | } 1041 | 1042 | if (rph.ReparseDataLength < sizeof(wof_external_info) + sizeof(file_provider_external_info_v0)) { 1043 | do_print("IO_REPARSE_TAG_WOF ReparseDataLength shorter than expected\n"); 1044 | return EFI_INVALID_PARAMETER; 1045 | } 1046 | 1047 | const auto& fpei = *(file_provider_external_info_v0*)((uint8_t*)&wofei + sizeof(wofei)); 1048 | 1049 | if (fpei.Version != FILE_PROVIDER_CURRENT_VERSION) { 1050 | do_print("Unsupported FILE_PROVIDER_EXTERNAL_INFO version\n"); 1051 | return EFI_INVALID_PARAMETER; 1052 | } 1053 | 1054 | switch (fpei.Algorithm) { 1055 | case FILE_PROVIDER_COMPRESSION_XPRESS4K: 1056 | return do_xpress_decompress(ino, wof, 4096); 1057 | 1058 | case FILE_PROVIDER_COMPRESSION_LZX: 1059 | do_print("FIXME - FILE_PROVIDER_COMPRESSION_LZX\n"); 1060 | return EFI_INVALID_PARAMETER; 1061 | 1062 | case FILE_PROVIDER_COMPRESSION_XPRESS8K: 1063 | return do_xpress_decompress(ino, wof, 8192); 1064 | 1065 | case FILE_PROVIDER_COMPRESSION_XPRESS16K: 1066 | return do_xpress_decompress(ino, wof, 16384); 1067 | 1068 | default: 1069 | do_print("Unrecognized WIM compression algorithm\n"); 1070 | return EFI_INVALID_PARAMETER; 1071 | } 1072 | } 1073 | 1074 | static EFI_STATUS read_file(inode& ino, UINTN* BufferSize, VOID* Buffer) { 1075 | EFI_STATUS Status, Status2; 1076 | uint64_t start, end; 1077 | 1078 | if (ino.position >= ino.size || *BufferSize == 0) { 1079 | *BufferSize = 0; 1080 | return EFI_SUCCESS; 1081 | } 1082 | 1083 | if (ino.position >= ino.vdl) { 1084 | UINTN to_read = *BufferSize; 1085 | 1086 | if (to_read > ino.size - ino.position) 1087 | to_read = ino.size - ino.position; 1088 | 1089 | memset(Buffer, 0, to_read); 1090 | 1091 | *BufferSize = to_read; 1092 | 1093 | return EFI_SUCCESS; 1094 | } 1095 | 1096 | if (!ino.data_loaded) { 1097 | FILE_RECORD_SEGMENT_HEADER* file; 1098 | uint8_t* wof_data = nullptr; 1099 | size_t wof_len = 0; 1100 | uint8_t* rp_data = nullptr; 1101 | size_t rp_len = 0; 1102 | 1103 | Status = bs->AllocatePool(EfiBootServicesData, ino.vol.file_record_size, (void**)&file); 1104 | if (EFI_ERROR(Status)) { 1105 | do_print_error("AllocatePool", Status); 1106 | return Status; 1107 | } 1108 | 1109 | Status = read_from_mappings(ino.vol, &ino.vol.mft_mappings, ino.ino * ino.vol.file_record_size, 1110 | (uint8_t*)file, ino.vol.file_record_size); 1111 | if (EFI_ERROR(Status)) { 1112 | do_print_error("read_from_mappings", Status); 1113 | bs->FreePool(file); 1114 | return Status; 1115 | } 1116 | 1117 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { 1118 | do_print("Signature was not FILE\n"); 1119 | bs->FreePool(file); 1120 | return EFI_INVALID_PARAMETER; 1121 | } 1122 | 1123 | Status = process_fixups(&file->MultiSectorHeader, ino.vol.file_record_size, 1124 | ino.vol.boot_sector->BytesPerSector); 1125 | 1126 | if (EFI_ERROR(Status)) { 1127 | do_print_error("process_fixups", Status); 1128 | bs->FreePool(file); 1129 | return Status; 1130 | } 1131 | 1132 | Status = EFI_SUCCESS; 1133 | 1134 | Status2 = loop_through_atts(ino.vol, ino.ino, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view data, u16string_view att_name) -> bool { 1135 | switch (att.TypeCode) { 1136 | case ntfs_attribute::DATA: 1137 | if (att_name.empty()) { 1138 | switch (att.FormCode) { 1139 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: 1140 | Status = read_mappings(ino.vol, att, &ino.data_mappings); 1141 | if (EFI_ERROR(Status)) 1142 | do_print_error("read_mappings", Status); 1143 | break; 1144 | 1145 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM: 1146 | Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&ino.data); 1147 | if (EFI_ERROR(Status)) { 1148 | do_print_error("AllocatePool", Status); 1149 | break; 1150 | } 1151 | 1152 | memcpy(ino.data, data.data(), data.size()); 1153 | break; 1154 | } 1155 | } else if (att_name == u"WofCompressedData") { 1156 | switch (att.FormCode) { 1157 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: { 1158 | uint32_t cluster_size = ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster; 1159 | 1160 | wof_len = att.Form.Nonresident.FileSize; 1161 | 1162 | if (wof_len == 0) 1163 | break; 1164 | 1165 | Status = bs->AllocatePool(EfiBootServicesData, sector_align(wof_len, cluster_size), (void**)&wof_data); 1166 | if (EFI_ERROR(Status)) { 1167 | do_print_error("AllocatePool", Status); 1168 | break; 1169 | } 1170 | 1171 | Status = read_nonresident_attribute(ino.vol, att, span(wof_data, sector_align(wof_len, cluster_size))); 1172 | if (EFI_ERROR(Status)) { 1173 | do_print_error("read_nonresident_attribute", Status); 1174 | bs->FreePool(wof_data); 1175 | wof_data = nullptr; 1176 | break; 1177 | } 1178 | 1179 | break; 1180 | } 1181 | 1182 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM: 1183 | Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&wof_data); 1184 | if (EFI_ERROR(Status)) { 1185 | do_print_error("AllocatePool", Status); 1186 | break; 1187 | } 1188 | 1189 | memcpy(wof_data, data.data(), data.size()); 1190 | wof_len = data.size(); 1191 | 1192 | break; 1193 | } 1194 | } 1195 | 1196 | if (EFI_ERROR(Status)) 1197 | return false; 1198 | break; 1199 | 1200 | case ntfs_attribute::REPARSE_POINT: 1201 | if (att_name.empty()) { 1202 | switch (att.FormCode) { 1203 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: { 1204 | uint32_t cluster_size = ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster; 1205 | 1206 | rp_len = att.Form.Nonresident.FileSize; 1207 | 1208 | if (rp_len == 0) 1209 | break; 1210 | 1211 | Status = bs->AllocatePool(EfiBootServicesData, sector_align(rp_len, cluster_size), (void**)&rp_data); 1212 | if (EFI_ERROR(Status)) { 1213 | do_print_error("AllocatePool", Status); 1214 | break; 1215 | } 1216 | 1217 | Status = read_nonresident_attribute(ino.vol, att, span(rp_data, sector_align(rp_len, cluster_size))); 1218 | if (EFI_ERROR(Status)) { 1219 | do_print_error("read_nonresident_attribute", Status); 1220 | bs->FreePool(rp_data); 1221 | rp_data = nullptr; 1222 | break; 1223 | } 1224 | 1225 | break; 1226 | } 1227 | 1228 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM: 1229 | Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&rp_data); 1230 | if (EFI_ERROR(Status)) { 1231 | do_print_error("AllocatePool", Status); 1232 | break; 1233 | } 1234 | 1235 | memcpy(rp_data, data.data(), data.size()); 1236 | rp_len = data.size(); 1237 | 1238 | break; 1239 | } 1240 | } 1241 | break; 1242 | 1243 | default: 1244 | break; 1245 | } 1246 | 1247 | return true; 1248 | }); 1249 | 1250 | bs->FreePool(file); 1251 | 1252 | if (rp_data) { 1253 | if (rp_len > sizeof(uint32_t) && *(uint32_t*)rp_data == IO_REPARSE_TAG_WOF) { 1254 | Status = handle_wof(ino, span(rp_data, rp_len), span(wof_data, wof_len)); 1255 | if (EFI_ERROR(Status)) { 1256 | do_print_error("handle_wof", Status); 1257 | bs->FreePool(rp_data); 1258 | 1259 | if (wof_data) 1260 | bs->FreePool(wof_data); 1261 | 1262 | return Status; 1263 | } 1264 | } 1265 | 1266 | bs->FreePool(rp_data); 1267 | } 1268 | 1269 | if (wof_data) 1270 | bs->FreePool(wof_data); 1271 | 1272 | if (EFI_ERROR(Status2)) { 1273 | do_print_error("loop_through_atts", Status2); 1274 | return Status2; 1275 | } 1276 | 1277 | if (EFI_ERROR(Status)) 1278 | return Status; 1279 | 1280 | ino.data_loaded = true; 1281 | } 1282 | 1283 | start = ino.position; 1284 | end = ino.position + *BufferSize; 1285 | 1286 | if (end > ino.size) 1287 | end = ino.size; 1288 | 1289 | if (ino.data) 1290 | memcpy(Buffer, ino.data + start, end - start); 1291 | else { 1292 | uint64_t start_aligned, valid_end, end_aligned; 1293 | uint8_t* tmp = nullptr; 1294 | 1295 | valid_end = end; 1296 | 1297 | if (valid_end > ino.vdl) 1298 | valid_end = ino.vdl; 1299 | 1300 | start_aligned = start & ~(ino.vol.boot_sector->BytesPerSector - 1); 1301 | end_aligned = sector_align(valid_end, ino.vol.boot_sector->BytesPerSector); 1302 | 1303 | if (start_aligned != start || end_aligned != valid_end) { 1304 | Status = bs->AllocatePool(EfiBootServicesData, end_aligned - start_aligned, (void**)&tmp); 1305 | if (EFI_ERROR(Status)) { 1306 | do_print_error("AllocatePool", Status); 1307 | return Status; 1308 | } 1309 | } 1310 | 1311 | // FIXME - LZNT1 compressed data 1312 | 1313 | Status = read_from_mappings(ino.vol, &ino.data_mappings, start_aligned, 1314 | tmp ? tmp : (uint8_t*)Buffer, end_aligned - start_aligned); 1315 | if (EFI_ERROR(Status)) { 1316 | do_print_error("read_from_mappings", Status); 1317 | 1318 | if (tmp) 1319 | bs->FreePool(tmp); 1320 | 1321 | return Status; 1322 | } 1323 | 1324 | if (tmp) { 1325 | memcpy(Buffer, tmp + start - start_aligned, valid_end - start); 1326 | bs->FreePool(tmp); 1327 | } 1328 | 1329 | if (valid_end < end) 1330 | memset((uint8_t*)Buffer + valid_end - start, 0, end - valid_end); 1331 | } 1332 | 1333 | ino.position = end; 1334 | *BufferSize = end - start; 1335 | 1336 | return EFI_SUCCESS; 1337 | } 1338 | 1339 | static EFI_STATUS EFIAPI file_read(struct _EFI_FILE_HANDLE* File, UINTN* BufferSize, VOID* Buffer) { 1340 | EFI_STATUS Status; 1341 | inode* ino = _CR(File, inode, proto); 1342 | 1343 | if (!ino->inode_loaded) { 1344 | Status = load_inode(*ino); 1345 | if (EFI_ERROR(Status)) { 1346 | do_print_error("load_inode", Status); 1347 | return Status; 1348 | } 1349 | } 1350 | 1351 | if (ino->is_dir) 1352 | return read_dir(*ino, BufferSize, Buffer); 1353 | else 1354 | return read_file(*ino, BufferSize, Buffer); 1355 | } 1356 | 1357 | static EFI_STATUS EFIAPI file_write(struct _EFI_FILE_HANDLE* File, UINTN* BufferSize, VOID* Buffer) { 1358 | UNUSED(File); 1359 | UNUSED(BufferSize); 1360 | UNUSED(Buffer); 1361 | 1362 | return EFI_UNSUPPORTED; 1363 | } 1364 | 1365 | static EFI_STATUS EFIAPI file_set_position(struct _EFI_FILE_HANDLE* File, UINT64 Position) { 1366 | EFI_STATUS Status; 1367 | inode* ino = _CR(File, inode, proto); 1368 | 1369 | if (!ino->inode_loaded) { 1370 | Status = load_inode(*ino); 1371 | if (EFI_ERROR(Status)) { 1372 | do_print_error("load_inode", Status); 1373 | return Status; 1374 | } 1375 | } 1376 | 1377 | if (ino->is_dir) { 1378 | if (Position != 0) 1379 | return EFI_UNSUPPORTED; 1380 | 1381 | ino->position = 0; 1382 | 1383 | while (!IsListEmpty(&ino->levels)) { 1384 | auto l = _CR(ino->levels.Flink, btree_level, list_entry); 1385 | RemoveEntryList(&l->list_entry); 1386 | bs->FreePool(l); 1387 | } 1388 | } else { 1389 | if (Position == 0xffffffffffffffff) 1390 | ino->position = ino->size; 1391 | else 1392 | ino->position = Position; 1393 | } 1394 | 1395 | return EFI_SUCCESS; 1396 | } 1397 | 1398 | static EFI_STATUS EFIAPI file_get_position(struct _EFI_FILE_HANDLE* File, UINT64* Position) { 1399 | inode* ino = _CR(File, inode, proto); 1400 | 1401 | if (ino->is_dir) 1402 | return EFI_UNSUPPORTED; 1403 | 1404 | *Position = ino->position; 1405 | 1406 | return EFI_SUCCESS; 1407 | } 1408 | 1409 | static EFI_STATUS loop_through_atts(const volume& vol, uint64_t inode, const FILE_RECORD_SEGMENT_HEADER* file_record, 1410 | invocable auto func) { 1411 | EFI_STATUS Status; 1412 | auto att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset); 1413 | size_t offset = file_record->FirstAttributeOffset; 1414 | uint8_t* attlist = nullptr; 1415 | size_t attlist_size; 1416 | 1417 | while (true) { 1418 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) 1419 | break; 1420 | 1421 | if (att->TypeCode == ntfs_attribute::ATTRIBUTE_LIST) { 1422 | switch (att->FormCode) { 1423 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: { 1424 | uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster; 1425 | LIST_ENTRY mappings; 1426 | 1427 | if (att->Form.Nonresident.FileSize == 0) 1428 | break; 1429 | 1430 | attlist_size = att->Form.Nonresident.FileSize; 1431 | 1432 | Status = bs->AllocatePool(EfiBootServicesData, sector_align(attlist_size, cluster_size), (void**)&attlist); 1433 | if (EFI_ERROR(Status)) { 1434 | do_print_error("AllocatePool", Status); 1435 | return Status; 1436 | } 1437 | 1438 | InitializeListHead(&mappings); 1439 | 1440 | Status = read_mappings(vol, *att, &mappings); 1441 | if (EFI_ERROR(Status)) { 1442 | bs->FreePool(attlist); 1443 | do_print_error("read_mappings", Status); 1444 | return Status; 1445 | } 1446 | 1447 | Status = read_from_mappings(vol, &mappings, 0, attlist, sector_align(attlist_size, cluster_size)); 1448 | if (EFI_ERROR(Status)) { 1449 | while (!IsListEmpty(&mappings)) { 1450 | mapping* m = _CR(mappings.Flink, mapping, list_entry); 1451 | RemoveEntryList(&m->list_entry); 1452 | bs->FreePool(m); 1453 | } 1454 | 1455 | bs->FreePool(attlist); 1456 | do_print_error("read_from_mappings", Status); 1457 | return Status; 1458 | } 1459 | 1460 | while (!IsListEmpty(&mappings)) { 1461 | mapping* m = _CR(mappings.Flink, mapping, list_entry); 1462 | RemoveEntryList(&m->list_entry); 1463 | bs->FreePool(m); 1464 | } 1465 | 1466 | break; 1467 | } 1468 | 1469 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM: 1470 | if (att->Form.Resident.ValueLength == 0) 1471 | break; 1472 | 1473 | attlist_size = att->Form.Resident.ValueLength; 1474 | 1475 | Status = bs->AllocatePool(EfiBootServicesData, attlist_size, (void**)&attlist); 1476 | if (EFI_ERROR(Status)) { 1477 | do_print_error("AllocatePool", Status); 1478 | return Status; 1479 | } 1480 | 1481 | memcpy(attlist, (uint8_t*)att + att->Form.Resident.ValueOffset, attlist_size); 1482 | break; 1483 | } 1484 | 1485 | break; 1486 | } 1487 | 1488 | offset += att->RecordLength; 1489 | att = reinterpret_cast((uint8_t*)att + att->RecordLength); 1490 | } 1491 | 1492 | if (attlist) { 1493 | { 1494 | auto ent = (const attribute_list_entry*)attlist; 1495 | size_t left = attlist_size; 1496 | 1497 | while (true) { 1498 | uint64_t file_reference = ent->file_reference.SegmentNumber; 1499 | 1500 | if (file_reference == inode) { // contained elsewhere in this inode 1501 | att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset); 1502 | offset = file_record->FirstAttributeOffset; 1503 | 1504 | while (true) { 1505 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) 1506 | break; 1507 | 1508 | if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) { 1509 | if (att->NameLength == 0 || !memcmp((uint8_t*)file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) { 1510 | string_view data; 1511 | u16string_view name; 1512 | 1513 | if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) 1514 | data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); 1515 | 1516 | if (att->NameLength != 0) 1517 | name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength); 1518 | 1519 | if (!func(*att, data, name)) { 1520 | bs->FreePool(attlist); 1521 | return EFI_SUCCESS; 1522 | } 1523 | 1524 | break; 1525 | } 1526 | } 1527 | 1528 | offset += att->RecordLength; 1529 | att = reinterpret_cast((uint8_t*)att + att->RecordLength); 1530 | } 1531 | } 1532 | 1533 | if (left <= ent->record_length) 1534 | break; 1535 | 1536 | left -= ent->record_length; 1537 | ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length); 1538 | } 1539 | } 1540 | 1541 | while (true) { 1542 | auto ent = (attribute_list_entry*)attlist; 1543 | size_t left = attlist_size; 1544 | optional ref; 1545 | FILE_RECORD_SEGMENT_HEADER* file2 = nullptr; 1546 | 1547 | while (true) { 1548 | uint64_t file_reference = ent->file_reference.SegmentNumber; 1549 | 1550 | // skip entries already handled 1551 | if (file_reference == inode) { 1552 | if (left <= ent->record_length) 1553 | break; 1554 | 1555 | left -= ent->record_length; 1556 | ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length); 1557 | continue; 1558 | } 1559 | 1560 | if (ref.has_value() && *ref != file_reference) { 1561 | if (left <= ent->record_length) 1562 | break; 1563 | 1564 | left -= ent->record_length; 1565 | ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length); 1566 | continue; 1567 | } 1568 | 1569 | if (!ref.has_value()) { 1570 | ref = file_reference; 1571 | 1572 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file2); 1573 | if (EFI_ERROR(Status)) { 1574 | do_print_error("AllocatePool", Status); 1575 | bs->FreePool(attlist); 1576 | return Status; 1577 | } 1578 | 1579 | Status = read_from_mappings(vol, &vol.mft_mappings, file_reference * vol.file_record_size, 1580 | (uint8_t*)file2, vol.file_record_size); 1581 | if (EFI_ERROR(Status)) { 1582 | do_print_error("read_from_mappings", Status); 1583 | bs->FreePool(file2); 1584 | bs->FreePool(attlist); 1585 | return Status; 1586 | } 1587 | 1588 | if (file2->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { 1589 | do_print("Signature was not FILE\n"); 1590 | bs->FreePool(file2); 1591 | bs->FreePool(attlist); 1592 | return EFI_INVALID_PARAMETER; 1593 | } 1594 | 1595 | Status = process_fixups(&file2->MultiSectorHeader, vol.file_record_size, 1596 | vol.boot_sector->BytesPerSector); 1597 | 1598 | if (EFI_ERROR(Status)) { 1599 | do_print_error("process_fixups", Status); 1600 | bs->FreePool(file2); 1601 | bs->FreePool(attlist); 1602 | return Status; 1603 | } 1604 | } 1605 | 1606 | att = reinterpret_cast((uint8_t*)file2 + file2->FirstAttributeOffset); 1607 | offset = file2->FirstAttributeOffset; 1608 | 1609 | while (true) { 1610 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) 1611 | break; 1612 | 1613 | if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) { 1614 | if (att->NameLength == 0 || !memcmp((uint8_t*)file2 + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) { 1615 | string_view data; 1616 | u16string_view name; 1617 | 1618 | if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) 1619 | data = string_view((const char*)file2 + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); 1620 | 1621 | if (att->NameLength != 0) 1622 | name = u16string_view((char16_t*)((uint8_t*)file2 + offset + att->NameOffset), att->NameLength); 1623 | 1624 | if (!func(*att, data, name)) { 1625 | bs->FreePool(file2); 1626 | bs->FreePool(attlist); 1627 | return EFI_SUCCESS; 1628 | } 1629 | 1630 | break; 1631 | } 1632 | } 1633 | 1634 | offset += att->RecordLength; 1635 | att = reinterpret_cast((uint8_t*)att + att->RecordLength); 1636 | } 1637 | 1638 | // don't process this again 1639 | ent->file_reference.SegmentNumber = inode; 1640 | 1641 | if (left <= ent->record_length) 1642 | break; 1643 | 1644 | left -= ent->record_length; 1645 | ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length); 1646 | } 1647 | 1648 | if (file2) 1649 | bs->FreePool(file2); 1650 | 1651 | if (!ref.has_value()) { 1652 | bs->FreePool(attlist); 1653 | return EFI_SUCCESS; 1654 | } 1655 | } 1656 | 1657 | bs->FreePool(attlist); 1658 | 1659 | return EFI_SUCCESS; 1660 | } 1661 | 1662 | att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset); 1663 | offset = file_record->FirstAttributeOffset; 1664 | 1665 | while (true) { 1666 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0) 1667 | break; 1668 | 1669 | string_view data; 1670 | u16string_view name; 1671 | 1672 | if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) 1673 | data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength); 1674 | 1675 | if (att->NameLength != 0) 1676 | name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength); 1677 | 1678 | if (!func(*att, data, name)) 1679 | return EFI_SUCCESS; 1680 | 1681 | offset += att->RecordLength; 1682 | att = reinterpret_cast((uint8_t*)att + att->RecordLength); 1683 | } 1684 | 1685 | return EFI_SUCCESS; 1686 | } 1687 | 1688 | static EFI_STATUS read_mappings(const volume& vol, const ATTRIBUTE_RECORD_HEADER& att, LIST_ENTRY* mappings) { 1689 | EFI_STATUS Status; 1690 | uint64_t next_vcn, current_lcn = 0, current_vcn; 1691 | uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster; 1692 | uint8_t* stream; 1693 | uint64_t max_cluster; 1694 | 1695 | if (att.FormCode != NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) { 1696 | do_print("Cannot read mappings for attribute that is not non-resident\n"); 1697 | return EFI_INVALID_PARAMETER; 1698 | } 1699 | 1700 | if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) { 1701 | do_print("Cannot read encrypted data\n"); 1702 | return EFI_INVALID_PARAMETER; 1703 | } 1704 | 1705 | if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK) { 1706 | do_print("Compression not yet supported\n"); 1707 | return EFI_INVALID_PARAMETER; 1708 | } 1709 | 1710 | next_vcn = att.Form.Nonresident.LowestVcn; 1711 | stream = (uint8_t*)&att + att.Form.Nonresident.MappingPairsOffset; 1712 | 1713 | max_cluster = att.Form.Nonresident.ValidDataLength / cluster_size; 1714 | 1715 | if (att.Form.Nonresident.ValidDataLength & (cluster_size - 1)) 1716 | max_cluster++; 1717 | 1718 | if (max_cluster == 0) 1719 | return EFI_SUCCESS; 1720 | 1721 | while (true) { 1722 | uint64_t v, l; 1723 | int64_t v_val, l_val; 1724 | mapping* m; 1725 | 1726 | current_vcn = next_vcn; 1727 | 1728 | if (*stream == 0) 1729 | break; 1730 | 1731 | v = *stream & 0xf; 1732 | l = *stream >> 4; 1733 | 1734 | stream++; 1735 | 1736 | if (v > 8) 1737 | return EFI_INVALID_PARAMETER; 1738 | 1739 | if (l > 8) 1740 | return EFI_INVALID_PARAMETER; 1741 | 1742 | // FIXME - do we need to make sure that int64_t pointers don't go past end of buffer? 1743 | 1744 | v_val = *(int64_t*)stream; 1745 | v_val &= (1ull << (v * 8)) - 1; 1746 | 1747 | if ((uint64_t)v_val & (1ull << ((v * 8) - 1))) // sign-extend if negative 1748 | v_val |= 0xffffffffffffffff & ~((1ull << (v * 8)) - 1); 1749 | 1750 | stream += v; 1751 | 1752 | next_vcn += v_val; 1753 | 1754 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(mapping), (void**)&m); 1755 | if (EFI_ERROR(Status)) { 1756 | do_print_error("AllocatePool", Status); 1757 | return Status; 1758 | } 1759 | 1760 | if (l != 0) { 1761 | l_val = *(int64_t*)stream; 1762 | l_val &= (1ull << (l * 8)) - 1; 1763 | 1764 | if ((uint64_t)l_val & (1ull << ((l * 8) - 1))) // sign-extend if negative 1765 | l_val |= 0xffffffffffffffff & ~((1ull << (l * 8)) - 1); 1766 | 1767 | stream += l; 1768 | 1769 | current_lcn += l_val; 1770 | 1771 | if (next_vcn > max_cluster) 1772 | next_vcn = max_cluster; 1773 | 1774 | m->lcn = current_lcn; 1775 | } else 1776 | m->lcn = 0; 1777 | 1778 | m->vcn = current_vcn; 1779 | m->length = next_vcn - current_vcn; 1780 | 1781 | InsertTailList(mappings, &m->list_entry); 1782 | 1783 | if (next_vcn == max_cluster) 1784 | break; 1785 | } 1786 | 1787 | return EFI_SUCCESS; 1788 | } 1789 | 1790 | static EFI_STATUS load_inode(inode& ino) { 1791 | EFI_STATUS Status, Status2; 1792 | FILE_RECORD_SEGMENT_HEADER* file; 1793 | 1794 | InitializeListHead(&ino.index_mappings); 1795 | InitializeListHead(&ino.levels); 1796 | InitializeListHead(&ino.data_mappings); 1797 | 1798 | Status = bs->AllocatePool(EfiBootServicesData, ino.vol.file_record_size, (void**)&file); 1799 | if (EFI_ERROR(Status)) { 1800 | do_print_error("AllocatePool", Status); 1801 | return Status; 1802 | } 1803 | 1804 | Status = read_from_mappings(ino.vol, &ino.vol.mft_mappings, ino.ino * ino.vol.file_record_size, 1805 | (uint8_t*)file, ino.vol.file_record_size); 1806 | if (EFI_ERROR(Status)) { 1807 | bs->FreePool(file); 1808 | do_print_error("read_from_mappings", Status); 1809 | return Status; 1810 | } 1811 | 1812 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { 1813 | do_print("Signature was not FILE\n"); 1814 | bs->FreePool(file); 1815 | return EFI_INVALID_PARAMETER; 1816 | } 1817 | 1818 | Status = process_fixups(&file->MultiSectorHeader, ino.vol.file_record_size, 1819 | ino.vol.boot_sector->BytesPerSector); 1820 | 1821 | if (EFI_ERROR(Status)) { 1822 | bs->FreePool(file); 1823 | do_print_error("process_fixups", Status); 1824 | return Status; 1825 | } 1826 | 1827 | memset(&ino.standard_info, 0, sizeof(STANDARD_INFORMATION)); 1828 | 1829 | Status = EFI_SUCCESS; 1830 | 1831 | Status2 = loop_through_atts(ino.vol, ino.ino, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool { 1832 | switch (att.TypeCode) { 1833 | case ntfs_attribute::STANDARD_INFORMATION: 1834 | if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) { 1835 | size_t to_copy = res_data.size(); 1836 | 1837 | if (to_copy > sizeof(STANDARD_INFORMATION)) 1838 | to_copy = sizeof(STANDARD_INFORMATION); 1839 | 1840 | memcpy(&ino.standard_info, res_data.data(), to_copy); 1841 | } 1842 | break; 1843 | 1844 | case ntfs_attribute::FILE_NAME: 1845 | if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) { 1846 | const auto& fn = *(FILE_NAME*)res_data.data(); 1847 | 1848 | if (res_data.size() >= offsetof(FILE_NAME, EaSize)) 1849 | ino.is_dir = fn.FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT; 1850 | } 1851 | 1852 | break; 1853 | 1854 | case ntfs_attribute::INDEX_ALLOCATION: 1855 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) { 1856 | ino.size = att.Form.Nonresident.FileSize; 1857 | ino.phys_size = att.Form.Nonresident.AllocatedLength; 1858 | 1859 | Status = read_mappings(ino.vol, att, &ino.index_mappings); 1860 | if (EFI_ERROR(Status)) { 1861 | do_print_error("read_mappings", Status); 1862 | return false; 1863 | } 1864 | } 1865 | break; 1866 | 1867 | case ntfs_attribute::INDEX_ROOT: 1868 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !res_data.empty() && !ino.index_root) { 1869 | Status = bs->AllocatePool(EfiBootServicesData, res_data.size(), (void**)&ino.index_root); 1870 | if (EFI_ERROR(Status)) { 1871 | do_print_error("AllocatePool", Status); 1872 | return false; 1873 | } 1874 | 1875 | memcpy(ino.index_root, res_data.data(), res_data.size()); 1876 | } 1877 | break; 1878 | 1879 | case ntfs_attribute::DATA: 1880 | if (att_name.empty()) { 1881 | switch (att.FormCode) { 1882 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: 1883 | ino.size = att.Form.Nonresident.FileSize; 1884 | ino.phys_size = att.Form.Nonresident.AllocatedLength; 1885 | ino.vdl = att.Form.Nonresident.ValidDataLength; 1886 | break; 1887 | 1888 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM: 1889 | ino.size = ino.phys_size = ino.vdl = att.Form.Resident.ValueLength; 1890 | break; 1891 | } 1892 | } 1893 | break; 1894 | 1895 | default: 1896 | break; 1897 | } 1898 | 1899 | return true; 1900 | }); 1901 | 1902 | if (EFI_ERROR(Status2)) { 1903 | do_print_error("loop_through_atts", Status2); 1904 | Status = Status2; 1905 | } 1906 | 1907 | if (EFI_ERROR(Status)) { 1908 | if (ino.index_root) { 1909 | bs->FreePool(ino.index_root); 1910 | ino.index_root = nullptr; 1911 | } 1912 | 1913 | bs->FreePool(file); 1914 | return Status; 1915 | } 1916 | 1917 | ino.inode_loaded = true; 1918 | 1919 | bs->FreePool(file); 1920 | 1921 | return EFI_SUCCESS; 1922 | } 1923 | 1924 | static EFI_STATUS get_inode_file_info(inode& ino, UINTN* BufferSize, VOID* Buffer) { 1925 | EFI_STATUS Status; 1926 | unsigned int size = offsetof(EFI_FILE_INFO, FileName[0]) + sizeof(CHAR16); 1927 | EFI_FILE_INFO* info = (EFI_FILE_INFO*)Buffer; 1928 | u16string_view name; 1929 | 1930 | if (ino.name) { 1931 | name = u16string_view(ino.name, ino.name_len); 1932 | 1933 | if (auto bs = name.rfind(u'\\'); bs != u16string_view::npos) 1934 | name = u16string_view(name.data() + bs + 1, name.size() - bs - 1); 1935 | 1936 | size += name.size() * sizeof(char16_t); 1937 | } 1938 | 1939 | if (*BufferSize < size) { 1940 | *BufferSize = size; 1941 | return EFI_BUFFER_TOO_SMALL; 1942 | } 1943 | 1944 | if (!ino.inode_loaded) { 1945 | Status = load_inode(ino); 1946 | if (EFI_ERROR(Status)) { 1947 | do_print_error("load_inode", Status); 1948 | return Status; 1949 | } 1950 | } 1951 | 1952 | info->Size = size; 1953 | info->FileSize = ino.size; 1954 | info->PhysicalSize = ino.phys_size; 1955 | win_time_to_efi(ino.standard_info.CreationTime, &info->CreateTime); 1956 | win_time_to_efi(ino.standard_info.LastAccessTime, &info->LastAccessTime); 1957 | win_time_to_efi(ino.standard_info.LastWriteTime, &info->ModificationTime); 1958 | info->Attribute = win_attributes_to_efi(ino.standard_info.FileAttributes, ino.is_dir); 1959 | 1960 | if (!name.empty()) { 1961 | memcpy(info->FileName, name.data(), name.size() * sizeof(char16_t)); 1962 | info->FileName[name.size()] = 0; 1963 | } else 1964 | info->FileName[0] = 0; 1965 | 1966 | return EFI_SUCCESS; 1967 | } 1968 | 1969 | static EFI_STATUS EFIAPI file_get_info(struct _EFI_FILE_HANDLE* File, EFI_GUID* InformationType, UINTN* BufferSize, VOID* Buffer) { 1970 | inode* ino = _CR(File, inode, proto); 1971 | EFI_GUID guid = EFI_FILE_INFO_ID; 1972 | 1973 | // FIXME - EFI_FILE_SYSTEM_INFO 1974 | 1975 | if (memcmp(InformationType, &guid, sizeof(EFI_GUID))) 1976 | return EFI_UNSUPPORTED; 1977 | 1978 | return get_inode_file_info(*ino, BufferSize, Buffer); 1979 | } 1980 | 1981 | static EFI_STATUS EFIAPI file_set_info(struct _EFI_FILE_HANDLE* File, EFI_GUID* InformationType, UINTN BufferSize, VOID* Buffer) { 1982 | UNUSED(File); 1983 | UNUSED(InformationType); 1984 | UNUSED(BufferSize); 1985 | UNUSED(Buffer); 1986 | 1987 | return EFI_UNSUPPORTED; 1988 | } 1989 | 1990 | static EFI_STATUS file_flush(struct _EFI_FILE_HANDLE* File) { 1991 | UNUSED(File); 1992 | 1993 | // nop 1994 | 1995 | return EFI_SUCCESS; 1996 | } 1997 | 1998 | static void populate_file_handle(EFI_FILE_PROTOCOL* h) { 1999 | h->Revision = EFI_FILE_PROTOCOL_REVISION; 2000 | h->Open = file_open; 2001 | h->Close = file_close; 2002 | h->Delete = file_delete; 2003 | h->Read = file_read; 2004 | h->Write = file_write; 2005 | h->GetPosition = file_get_position; 2006 | h->SetPosition = file_set_position; 2007 | h->GetInfo = file_get_info; 2008 | h->SetInfo = file_set_info; 2009 | h->Flush = file_flush; 2010 | } 2011 | 2012 | static EFI_STATUS EFIAPI open_volume(EFI_SIMPLE_FILE_SYSTEM_PROTOCOL* This, EFI_FILE_PROTOCOL** Root) { 2013 | EFI_STATUS Status; 2014 | volume* vol = _CR(This, volume, proto); 2015 | inode* ino; 2016 | 2017 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(inode), (void**)&ino); 2018 | if (EFI_ERROR(Status)) { 2019 | do_print_error("AllocatePool", Status); 2020 | return Status; 2021 | } 2022 | 2023 | memset(ino, 0, sizeof(inode)); 2024 | 2025 | new (ino) inode(*vol); 2026 | 2027 | populate_file_handle(&ino->proto); 2028 | 2029 | ino->ino = NTFS_ROOT_DIR_INODE; 2030 | 2031 | *Root = &ino->proto; 2032 | 2033 | return EFI_SUCCESS; 2034 | } 2035 | 2036 | static EFI_STATUS read_mft(volume& vol) { 2037 | EFI_STATUS Status, Status2; 2038 | FILE_RECORD_SEGMENT_HEADER* mft; 2039 | 2040 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&mft); 2041 | if (EFI_ERROR(Status)) { 2042 | do_print_error("AllocatePool", Status); 2043 | return Status; 2044 | } 2045 | 2046 | Status = vol.block->ReadBlocks(vol.block, vol.block->Media->MediaId, 2047 | (vol.boot_sector->MFT * vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster) / vol.block->Media->BlockSize, 2048 | vol.file_record_size, mft); 2049 | if (EFI_ERROR(Status)) { 2050 | bs->FreePool(mft); 2051 | do_print_error("ReadBlocks", Status); 2052 | return Status; 2053 | } 2054 | 2055 | if (mft->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { 2056 | do_print("Signature was not FILE\n"); 2057 | bs->FreePool(mft); 2058 | return EFI_INVALID_PARAMETER; 2059 | } 2060 | 2061 | Status = process_fixups(&mft->MultiSectorHeader, vol.file_record_size, 2062 | vol.boot_sector->BytesPerSector); 2063 | if (EFI_ERROR(Status)) { 2064 | bs->FreePool(mft); 2065 | do_print_error("process_fixups", Status); 2066 | return Status; 2067 | } 2068 | 2069 | // read DATA mappings 2070 | 2071 | Status = EFI_INVALID_PARAMETER; 2072 | 2073 | Status2 = loop_through_atts(vol, NTFS_MFT_INODE, mft, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool { 2074 | if (att.TypeCode == ntfs_attribute::DATA && att_name.empty()) { 2075 | Status = read_mappings(vol, att, &vol.mft_mappings); 2076 | if (EFI_ERROR(Status)) 2077 | do_print_error("read_mappings", Status); 2078 | 2079 | return false; 2080 | } 2081 | 2082 | return true; 2083 | }); 2084 | 2085 | bs->FreePool(mft); 2086 | 2087 | if (EFI_ERROR(Status2)) { 2088 | do_print_error("loop_through_atts", Status2); 2089 | return Status2; 2090 | } 2091 | 2092 | return Status; 2093 | } 2094 | 2095 | volume::~volume() { 2096 | while (!IsListEmpty(&mft_mappings)) { 2097 | mapping* m = _CR(mft_mappings.Flink, mapping, list_entry); 2098 | RemoveEntryList(&m->list_entry); 2099 | bs->FreePool(m); 2100 | } 2101 | 2102 | bs->FreePool(boot_sector); 2103 | } 2104 | 2105 | static EFI_STATUS read_upcase(volume& vol) { 2106 | EFI_STATUS Status, Status2; 2107 | FILE_RECORD_SEGMENT_HEADER* file; 2108 | LIST_ENTRY mappings; 2109 | uint64_t size; 2110 | 2111 | InitializeListHead(&mappings); 2112 | 2113 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file); 2114 | if (EFI_ERROR(Status)) { 2115 | do_print_error("AllocatePool", Status); 2116 | return Status; 2117 | } 2118 | 2119 | Status = read_from_mappings(vol, &vol.mft_mappings, NTFS_UPCASE_INODE * vol.file_record_size, 2120 | (uint8_t*)file, vol.file_record_size); 2121 | if (EFI_ERROR(Status)) { 2122 | bs->FreePool(file); 2123 | do_print_error("read_from_mappings", Status); 2124 | return Status; 2125 | } 2126 | 2127 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) { 2128 | do_print("Signature was not FILE\n"); 2129 | bs->FreePool(file); 2130 | return EFI_INVALID_PARAMETER; 2131 | } 2132 | 2133 | Status = process_fixups(&file->MultiSectorHeader, vol.file_record_size, 2134 | vol.boot_sector->BytesPerSector); 2135 | 2136 | if (EFI_ERROR(Status)) { 2137 | bs->FreePool(file); 2138 | do_print_error("process_fixups", Status); 2139 | return Status; 2140 | } 2141 | 2142 | Status2 = loop_through_atts(vol, NTFS_UPCASE_INODE, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool { 2143 | switch (att.TypeCode) { 2144 | case ntfs_attribute::DATA: 2145 | // assuming that $UpCase DATA can never be resident 2146 | if (att_name.empty() && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) { 2147 | size = att.Form.Nonresident.AllocatedLength; 2148 | Status = read_mappings(vol, att, &mappings); 2149 | 2150 | if (EFI_ERROR(Status)) 2151 | do_print_error("read_mappings", Status); 2152 | 2153 | return false; 2154 | } 2155 | break; 2156 | 2157 | default: 2158 | break; 2159 | } 2160 | 2161 | return true; 2162 | }); 2163 | 2164 | if (EFI_ERROR(Status2)) { 2165 | do_print_error("loop_through_atts", Status2); 2166 | return Status2; 2167 | } 2168 | 2169 | if (EFI_ERROR(Status)) 2170 | return Status; 2171 | 2172 | Status = read_from_mappings(vol, &mappings, 0, (uint8_t*)vol.upcase, min(size, (uint64_t)sizeof(vol.upcase))); 2173 | 2174 | if (EFI_ERROR(Status)) 2175 | do_print_error("read_from_mappings", Status); 2176 | 2177 | while (!IsListEmpty(&mappings)) { 2178 | mapping* m = _CR(mappings.Flink, mapping, list_entry); 2179 | RemoveEntryList(&m->list_entry); 2180 | bs->FreePool(m); 2181 | } 2182 | 2183 | return Status; 2184 | } 2185 | 2186 | static EFI_STATUS EFIAPI get_arc_name(EFI_QUIBBLE_PROTOCOL* This, char* ArcName, UINTN* ArcNameLen) { 2187 | UNUSED(This); 2188 | UNUSED(ArcName); 2189 | UNUSED(ArcNameLen); 2190 | 2191 | return EFI_UNSUPPORTED; 2192 | } 2193 | 2194 | static EFI_STATUS get_driver_name(EFI_QUIBBLE_PROTOCOL* This, CHAR16* DriverName, UINTN* DriverNameLen) { 2195 | static const char16_t name[] = u"ntfs"; 2196 | 2197 | UNUSED(This); 2198 | 2199 | if (*DriverNameLen < sizeof(name)) { 2200 | *DriverNameLen = sizeof(name); 2201 | return EFI_BUFFER_TOO_SMALL; 2202 | } 2203 | 2204 | *DriverNameLen = sizeof(name); 2205 | 2206 | memcpy(DriverName, name, sizeof(name)); 2207 | 2208 | return EFI_SUCCESS; 2209 | } 2210 | 2211 | static EFI_STATUS EFIAPI drv_start(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle, 2212 | EFI_DEVICE_PATH_PROTOCOL* RemainingDevicePath) { 2213 | EFI_STATUS Status; 2214 | EFI_GUID disk_guid = EFI_DISK_IO_PROTOCOL_GUID; 2215 | EFI_GUID block_guid = EFI_BLOCK_IO_PROTOCOL_GUID; 2216 | EFI_GUID fs_guid = EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID; 2217 | EFI_GUID quibble_guid = EFI_QUIBBLE_PROTOCOL_GUID; 2218 | EFI_BLOCK_IO_PROTOCOL* block; 2219 | uint32_t sblen; 2220 | NTFS_BOOT_SECTOR* sb; 2221 | EFI_DISK_IO_PROTOCOL* disk_io; 2222 | volume* vol; 2223 | 2224 | UNUSED(RemainingDevicePath); 2225 | 2226 | Status = bs->OpenProtocol(ControllerHandle, &block_guid, (void**)&block, This->DriverBindingHandle, 2227 | ControllerHandle, EFI_OPEN_PROTOCOL_GET_PROTOCOL); 2228 | if (EFI_ERROR(Status)) 2229 | return Status; 2230 | 2231 | if (block->Media->BlockSize == 0) { 2232 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2233 | return EFI_UNSUPPORTED; 2234 | } 2235 | 2236 | Status = bs->OpenProtocol(ControllerHandle, &disk_guid, (void**)&disk_io, This->DriverBindingHandle, 2237 | ControllerHandle, EFI_OPEN_PROTOCOL_BY_DRIVER); 2238 | if (EFI_ERROR(Status)) { 2239 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2240 | return Status; 2241 | } 2242 | 2243 | // FIXME - FAT driver also claims DISK_IO 2 protocol - do we need to? 2244 | 2245 | sblen = sector_align(sizeof(NTFS_BOOT_SECTOR), block->Media->BlockSize); 2246 | 2247 | Status = bs->AllocatePool(EfiBootServicesData, sblen, (void**)&sb); 2248 | if (EFI_ERROR(Status)) { 2249 | do_print_error("AllocatePool", Status); 2250 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2251 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2252 | return Status; 2253 | } 2254 | 2255 | // read superblock 2256 | 2257 | Status = block->ReadBlocks(block, block->Media->MediaId, 0, sblen, sb); 2258 | if (EFI_ERROR(Status)) { 2259 | bs->FreePool(sb); 2260 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2261 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2262 | return Status; 2263 | } 2264 | 2265 | if (memcmp(sb->FsName, NTFS_FS_NAME, sizeof(NTFS_FS_NAME) - 1)) { // not NTFS 2266 | bs->FreePool(sb); 2267 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2268 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2269 | return EFI_UNSUPPORTED; 2270 | } 2271 | 2272 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(volume), (void**)&vol); 2273 | if (EFI_ERROR(Status)) { 2274 | do_print_error("AllocatePool", Status); 2275 | bs->FreePool(sb); 2276 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2277 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2278 | return Status; 2279 | } 2280 | 2281 | memset(vol, 0, sizeof(volume)); 2282 | 2283 | vol->proto.Revision = EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_REVISION; 2284 | vol->proto.OpenVolume = open_volume; 2285 | vol->boot_sector = sb; 2286 | vol->controller = ControllerHandle; 2287 | vol->block = block; 2288 | vol->disk_io = disk_io; 2289 | 2290 | if (sb->ClustersPerMFTRecord < 0) 2291 | vol->file_record_size = 1ull << -sb->ClustersPerMFTRecord; 2292 | else 2293 | vol->file_record_size = (uint64_t)sb->BytesPerSector * (uint64_t)sb->SectorsPerCluster * (uint64_t)sb->ClustersPerMFTRecord; 2294 | 2295 | InitializeListHead(&vol->mft_mappings); 2296 | 2297 | Status = read_mft(*vol); 2298 | if (EFI_ERROR(Status)) { 2299 | do_print_error("read_mft", Status); 2300 | vol->volume::~volume(); 2301 | bs->FreePool(vol); 2302 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2303 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2304 | return Status; 2305 | } 2306 | 2307 | Status = read_upcase(*vol); 2308 | if (EFI_ERROR(Status)) { 2309 | do_print_error("read_upcase", Status); 2310 | vol->volume::~volume(); 2311 | bs->FreePool(vol); 2312 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2313 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2314 | return Status; 2315 | } 2316 | 2317 | vol->quibble_proto.GetArcName = get_arc_name; 2318 | vol->quibble_proto.GetWindowsDriverName = get_driver_name; 2319 | 2320 | Status = bs->InstallMultipleProtocolInterfaces(&ControllerHandle, &fs_guid, &vol->proto, 2321 | &quibble_guid, &vol->quibble_proto, nullptr); 2322 | if (EFI_ERROR(Status)) { 2323 | do_print_error("InstallMultipleProtocolInterfaces", Status); 2324 | vol->volume::~volume(); 2325 | bs->FreePool(vol); 2326 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle); 2327 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle); 2328 | return Status; 2329 | } 2330 | 2331 | return EFI_SUCCESS; 2332 | } 2333 | 2334 | static EFI_STATUS EFIAPI drv_stop(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle, 2335 | UINTN NumberOfChildren, EFI_HANDLE* ChildHandleBuffer) { 2336 | UNUSED(This); 2337 | UNUSED(ControllerHandle); 2338 | UNUSED(NumberOfChildren); 2339 | UNUSED(ChildHandleBuffer); 2340 | 2341 | // FIXME - make this work(?) 2342 | 2343 | return EFI_INVALID_PARAMETER; 2344 | } 2345 | 2346 | static void get_info_protocol(EFI_HANDLE image_handle) { 2347 | EFI_GUID guid = EFI_QUIBBLE_INFO_PROTOCOL_GUID; 2348 | EFI_HANDLE* handles = NULL; 2349 | UINTN count; 2350 | EFI_STATUS Status; 2351 | 2352 | Status = bs->LocateHandleBuffer(ByProtocol, &guid, NULL, &count, &handles); 2353 | if (EFI_ERROR(Status)) 2354 | return; 2355 | 2356 | if (count == 0) { 2357 | bs->FreePool(handles); 2358 | return; 2359 | } 2360 | 2361 | for (unsigned int i = 0; i < count; i++) { 2362 | Status = bs->OpenProtocol(handles[i], &guid, (void**)&info_proto, image_handle, NULL, 2363 | EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL); 2364 | if (EFI_ERROR(Status)) 2365 | continue; 2366 | 2367 | break; 2368 | } 2369 | 2370 | bs->FreePool(handles); 2371 | } 2372 | 2373 | extern "C" 2374 | EFI_STATUS EFIAPI efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE* SystemTable) { 2375 | EFI_STATUS Status; 2376 | EFI_GUID guid = EFI_DRIVER_BINDING_PROTOCOL_GUID; 2377 | 2378 | systable = SystemTable; 2379 | bs = SystemTable->BootServices; 2380 | 2381 | get_info_protocol(ImageHandle); 2382 | 2383 | drvbind.Supported = drv_supported; 2384 | drvbind.Start = drv_start; 2385 | drvbind.Stop = drv_stop; 2386 | drvbind.Version = 0x10; 2387 | drvbind.ImageHandle = ImageHandle; 2388 | drvbind.DriverBindingHandle = ImageHandle; 2389 | 2390 | Status = bs->InstallProtocolInterface(&drvbind.DriverBindingHandle, &guid, 2391 | EFI_NATIVE_INTERFACE, &drvbind); 2392 | if (EFI_ERROR(Status)) { 2393 | do_print_error("InstallProtocolInterface", Status); 2394 | return Status; 2395 | } 2396 | 2397 | return EFI_SUCCESS; 2398 | } 2399 | -------------------------------------------------------------------------------- /src/ntfs.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2023 2 | * 3 | * This file is part of ntfs-efi. 4 | * 5 | * ntfs-efi is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * ntfs-efi is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with ntfs-efi. If not, see . */ 17 | 18 | #pragma once 19 | 20 | enum class ntfs_attribute : uint32_t { 21 | STANDARD_INFORMATION = 0x10, 22 | ATTRIBUTE_LIST = 0x20, 23 | FILE_NAME = 0x30, 24 | VOLUME_VERSION = 0x40, 25 | SECURITY_DESCRIPTOR = 0x50, 26 | VOLUME_NAME = 0x60, 27 | VOLUME_INFORMATION = 0x70, 28 | DATA = 0x80, 29 | INDEX_ROOT = 0x90, 30 | INDEX_ALLOCATION = 0xA0, 31 | BITMAP = 0xB0, 32 | REPARSE_POINT = 0xC0, 33 | EA_INFORMATION = 0xD0, 34 | EA = 0xE0, 35 | PROPERTY_SET = 0xF0, 36 | LOGGED_UTILITY_STREAM = 0x100, 37 | }; 38 | 39 | enum class NTFS_ATTRIBUTE_FORM : uint8_t { 40 | RESIDENT_FORM = 0, 41 | NONRESIDENT_FORM = 1 42 | }; 43 | 44 | #pragma pack(push,1) 45 | 46 | struct NTFS_BOOT_SECTOR { 47 | uint8_t Jmp[3]; 48 | uint8_t FsName[8]; 49 | uint16_t BytesPerSector; 50 | uint8_t SectorsPerCluster; 51 | uint16_t ReservedSectors; 52 | uint8_t Unused1[5]; 53 | uint8_t Media; 54 | uint8_t Unused2[2]; 55 | uint16_t SectorsPerTrack; 56 | uint16_t Heads; 57 | uint32_t HiddenSectors; 58 | uint32_t Unused3; 59 | uint32_t Unknown; 60 | uint64_t TotalSectors; 61 | uint64_t MFT; 62 | uint64_t MFTMirr; 63 | int8_t ClustersPerMFTRecord; 64 | uint8_t Padding1[3]; 65 | int8_t ClustersPerIndexRecord; 66 | uint8_t Padding2[3]; 67 | uint64_t SerialNumber; 68 | uint32_t Checksum; 69 | }; 70 | 71 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header 72 | struct MULTI_SECTOR_HEADER { 73 | uint32_t Signature; 74 | uint16_t UpdateSequenceArrayOffset; 75 | uint16_t UpdateSequenceArraySize; 76 | }; 77 | 78 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference 79 | struct MFT_SEGMENT_REFERENCE { 80 | uint64_t SegmentNumber : 48; 81 | uint64_t SequenceNumber : 16; 82 | }; 83 | 84 | // based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and 85 | // http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html 86 | struct FILE_RECORD_SEGMENT_HEADER { 87 | MULTI_SECTOR_HEADER MultiSectorHeader; 88 | uint64_t LogFileSequenceNumber; 89 | uint16_t SequenceNumber; 90 | uint16_t HardLinkCount; 91 | uint16_t FirstAttributeOffset; 92 | uint16_t Flags; 93 | uint32_t EntryUsedSize; 94 | uint32_t EntryAllocatedSize; 95 | MFT_SEGMENT_REFERENCE BaseFileRecordSegment; 96 | uint16_t NextAttributeID; 97 | }; 98 | 99 | struct ATTRIBUTE_RECORD_HEADER { 100 | enum ntfs_attribute TypeCode; 101 | uint16_t RecordLength; 102 | uint16_t Unknown; 103 | enum NTFS_ATTRIBUTE_FORM FormCode; 104 | uint8_t NameLength; 105 | uint16_t NameOffset; 106 | uint16_t Flags; 107 | uint16_t Instance; 108 | union { 109 | struct { 110 | uint32_t ValueLength; 111 | uint16_t ValueOffset; 112 | uint8_t Reserved[2]; 113 | } Resident; 114 | struct { 115 | uint64_t LowestVcn; 116 | uint64_t HighestVcn; 117 | uint16_t MappingPairsOffset; 118 | uint16_t CompressionUnit; 119 | uint32_t Padding; 120 | uint64_t AllocatedLength; 121 | uint64_t FileSize; 122 | uint64_t ValidDataLength; 123 | uint64_t TotalAllocated; 124 | } Nonresident; 125 | } Form; 126 | }; 127 | 128 | // https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html 129 | 130 | struct STANDARD_INFORMATION { 131 | int64_t CreationTime; 132 | int64_t LastAccessTime; 133 | int64_t LastWriteTime; 134 | int64_t ChangeTime; 135 | uint32_t FileAttributes; 136 | uint32_t MaximumVersions; 137 | uint32_t VersionNumber; 138 | uint32_t ClassId; 139 | uint32_t OwnerId; 140 | uint32_t SecurityId; 141 | uint64_t QuotaCharged; 142 | uint64_t USN; 143 | }; 144 | 145 | // https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html 146 | 147 | struct index_node_header { 148 | uint32_t first_entry; 149 | uint32_t total_size; 150 | uint32_t allocated_size; 151 | uint32_t flags; 152 | }; 153 | 154 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html 155 | 156 | #define INDEX_ENTRY_SUBNODE 1 157 | #define INDEX_ENTRY_LAST 2 158 | 159 | struct index_entry { 160 | MFT_SEGMENT_REFERENCE file_reference; 161 | uint16_t entry_length; 162 | uint16_t stream_length; 163 | uint32_t flags; 164 | }; 165 | 166 | // https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html 167 | 168 | struct index_root { 169 | enum ntfs_attribute attribute_type; 170 | uint32_t collation_rule; 171 | uint32_t bytes_per_index_record; 172 | uint8_t clusters_per_index_record; 173 | uint8_t padding[3]; 174 | index_node_header node_header; 175 | index_entry entries[1]; 176 | }; 177 | 178 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html 179 | 180 | struct index_record { 181 | MULTI_SECTOR_HEADER MultiSectorHeader; 182 | uint64_t sequence_number; 183 | uint64_t vcn; 184 | index_node_header header; 185 | uint16_t update_sequence; 186 | }; 187 | 188 | #define INDEX_RECORD_MAGIC 0x58444e49 // "INDX" 189 | 190 | // https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html 191 | 192 | enum class file_name_type : uint8_t { 193 | POSIX = 0, 194 | WINDOWS = 1, 195 | DOS = 2, 196 | WINDOWS_AND_DOS = 3 197 | }; 198 | 199 | struct FILE_NAME { 200 | MFT_SEGMENT_REFERENCE Parent; 201 | int64_t CreationTime; 202 | int64_t LastAccessTime; 203 | int64_t LastWriteTime; 204 | int64_t ChangeTime; 205 | uint64_t AllocationSize; 206 | uint64_t EndOfFile; 207 | uint32_t FileAttributes; 208 | uint32_t EaSize; 209 | uint8_t FileNameLength; 210 | file_name_type Namespace; 211 | char16_t FileName[1]; 212 | }; 213 | 214 | // https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html 215 | 216 | struct attribute_list_entry { 217 | enum ntfs_attribute type; 218 | uint16_t record_length; 219 | uint8_t name_length; 220 | uint8_t name_offset; 221 | uint64_t starting_vcn; 222 | MFT_SEGMENT_REFERENCE file_reference; 223 | uint16_t instance; 224 | }; 225 | 226 | struct reparse_point_header { // edited form of REPARSE_DATA_BUFFER 227 | uint32_t ReparseTag; 228 | uint16_t ReparseDataLength; 229 | uint16_t Reserved; 230 | uint8_t DataBuffer[1]; 231 | }; 232 | 233 | static const uint32_t WOF_CURRENT_VERSION = 1; 234 | 235 | static const uint32_t WOF_PROVIDER_WIM = 1; 236 | static const uint32_t WOF_PROVIDER_FILE = 2; 237 | 238 | struct wof_external_info { // WOF_EXTERNAL_INFO in winioctl.h 239 | uint32_t Version; 240 | uint32_t Provider; 241 | }; 242 | 243 | static const uint32_t FILE_PROVIDER_CURRENT_VERSION = 1; 244 | 245 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS4K = 0; 246 | static const uint32_t FILE_PROVIDER_COMPRESSION_LZX = 1; 247 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS8K = 2; 248 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS16K = 3; 249 | 250 | struct file_provider_external_info_v0 { // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h 251 | uint32_t Version; 252 | uint32_t Algorithm; 253 | }; 254 | 255 | #pragma pack(pop) 256 | 257 | #define NTFS_FS_NAME "NTFS " 258 | 259 | #define NTFS_MFT_INODE 0 260 | #define NTFS_ROOT_DIR_INODE 5 261 | #define NTFS_UPCASE_INODE 10 262 | 263 | #define NTFS_FILE_SIGNATURE 0x454c4946 // "FILE" 264 | 265 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header 266 | #define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff 267 | #define ATTRIBUTE_FLAG_ENCRYPTED 0x4000 268 | 269 | #define FILE_ATTRIBUTE_READONLY 0x00000001 270 | #define FILE_ATTRIBUTE_HIDDEN 0x00000002 271 | #define FILE_ATTRIBUTE_SYSTEM 0x00000004 272 | #define FILE_ATTRIBUTE_DIRECTORY 0x00000010 273 | #define FILE_ATTRIBUTE_ARCHIVE 0x00000020 274 | #define FILE_ATTRIBUTE_DIRECTORY_MFT 0x10000000 275 | 276 | static const uint32_t IO_REPARSE_TAG_WOF = 0x80000017; 277 | -------------------------------------------------------------------------------- /src/quibbleproto.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Mark Harmstone 2023 2 | * 3 | * This file is part of ntfs-efi. 4 | * 5 | * ntfs-efi is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public Licence as published by 7 | * the Free Software Foundation, either version 2 of the Licence, or 8 | * (at your option) any later version. 9 | * 10 | * ntfs-efi is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public Licence for more details. 14 | * 15 | * You should have received a copy of the GNU General Public Licence 16 | * along with ntfs-efi. If not, see . */ 17 | 18 | #pragma once 19 | 20 | #define EFI_QUIBBLE_PROTOCOL_GUID { 0x98BCC8FF, 0xD212, 0x4B09, {0x84, 0x0C, 0x43, 0x19, 0xAD, 0x2E, 0xD3, 0x6A } } 21 | 22 | typedef struct _EFI_QUIBBLE_PROTOCOL EFI_QUIBBLE_PROTOCOL; 23 | 24 | typedef EFI_STATUS (EFIAPI* EFI_QUIBBLE_GET_ARC_NAME) ( 25 | IN EFI_QUIBBLE_PROTOCOL* This, 26 | OUT char* ArcName, 27 | IN OUT UINTN* ArcNameLen 28 | ); 29 | 30 | typedef EFI_STATUS (EFIAPI* EFI_QUIBBLE_GET_WINDOWS_DRIVER_NAME) ( 31 | IN EFI_QUIBBLE_PROTOCOL* This, 32 | OUT CHAR16* DriverName, 33 | IN OUT UINTN* DriverNameLen 34 | ); 35 | 36 | typedef struct _EFI_QUIBBLE_PROTOCOL { 37 | EFI_QUIBBLE_GET_ARC_NAME GetArcName; 38 | EFI_QUIBBLE_GET_WINDOWS_DRIVER_NAME GetWindowsDriverName; 39 | } EFI_QUIBBLE_PROTOCOL; 40 | 41 | #define EFI_OPEN_SUBVOL_GUID { 0x5861E4D5, 0xC7F1, 0x4932, {0xA0, 0x81, 0xF2, 0x2A, 0xAE, 0x8A, 0x82, 0x98 } } 42 | 43 | typedef struct _EFI_OPEN_SUBVOL_PROTOCOL EFI_OPEN_SUBVOL_PROTOCOL; 44 | 45 | typedef EFI_STATUS (EFIAPI* EFI_OPEN_SUBVOL_FUNC) ( 46 | IN EFI_OPEN_SUBVOL_PROTOCOL* This, 47 | IN UINT64 Subvol, 48 | OUT EFI_FILE_HANDLE* File 49 | ); 50 | 51 | typedef struct _EFI_OPEN_SUBVOL_PROTOCOL { 52 | EFI_OPEN_SUBVOL_FUNC OpenSubvol; 53 | } EFI_OPEN_SUBVOL_PROTOCOL; 54 | 55 | #define EFI_QUIBBLE_INFO_PROTOCOL_GUID { 0x89498E00, 0xAE8F, 0x4B23, {0x86, 0x11, 0x71, 0x2A, 0xE1, 0x2F, 0xC8, 0xD9 } } 56 | 57 | typedef void (EFIAPI* EFI_QUIBBLE_INFO_PRINT) ( 58 | IN const char* s 59 | ); 60 | 61 | typedef struct _EFI_QUIBBLE_INFO_PROTOCOL { 62 | EFI_QUIBBLE_INFO_PRINT Print; 63 | } EFI_QUIBBLE_INFO_PROTOCOL; 64 | --------------------------------------------------------------------------------