├── .github
    └── workflows
    │   ├── build-amd64.yml
    │   └── build-x86.yml
├── CMakeLists.txt
├── LICENCE
├── README.md
├── mingw-amd64.cmake
├── mingw-x86.cmake
└── src
    ├── ebiggers
        ├── common_defs.h
        ├── decompress_common.c
        ├── decompress_common.h
        ├── lzx_common.c
        ├── lzx_common.h
        ├── lzx_constants.h
        ├── lzx_decompress.c
        ├── system_compression.h
        └── xpress_decompress.c
    ├── misc.cpp
    ├── misc.h
    ├── ntfs.cpp
    ├── ntfs.h
    └── quibbleproto.h


/.github/workflows/build-amd64.yml:
--------------------------------------------------------------------------------
 1 | name: build amd64
 2 | on: [push]
 3 | jobs:
 4 |   amd64:
 5 |     runs-on: ubuntu-rolling
 6 |     steps:
 7 |       - run: apt-get update
 8 |       - run: apt-get install -y g++ git cmake nodejs g++-mingw-w64-x86-64 gnu-efi
 9 |       - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV
10 |       - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA}
11 |       - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }}
12 |       - run: mkdir -p install/debug
13 |       - run: |
14 |           cmake -DCMAKE_BUILD_TYPE=Debug \
15 |             -DCMAKE_TOOLCHAIN_FILE=mingw-amd64.cmake \
16 |             -S ${SHORT_SHA} -B debug-work && \
17 |           cmake --build debug-work --parallel `nproc` && \
18 |           cp debug-work/ntfs.efi install/debug/
19 |       - run: |
20 |           cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
21 |             -DCMAKE_TOOLCHAIN_FILE=mingw-amd64.cmake \
22 |             -S ${SHORT_SHA} -B release-work && \
23 |           cmake --build release-work --parallel `nproc` && \
24 |           cp release-work/ntfs.efi install/
25 |       - uses: actions/upload-artifact@v3
26 |         with:
27 |           name: ${{ github.sha }}
28 |           overwrite: true
29 |           path: |
30 |             install
31 | 


--------------------------------------------------------------------------------
/.github/workflows/build-x86.yml:
--------------------------------------------------------------------------------
 1 | name: build x86
 2 | on: [push]
 3 | jobs:
 4 |   x86:
 5 |     runs-on: ubuntu-rolling
 6 |     steps:
 7 |       - run: apt-get update
 8 |       - run: apt-get install -y g++ git cmake nodejs g++-mingw-w64-i686 gnu-efi
 9 |       - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV
10 |       - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA}
11 |       - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }}
12 |       - run: mkdir -p install/debug
13 |       - run: |
14 |           cmake -DCMAKE_BUILD_TYPE=Debug \
15 |             -DCMAKE_TOOLCHAIN_FILE=mingw-x86.cmake \
16 |             -S ${SHORT_SHA} -B debug-work && \
17 |           cmake --build debug-work --parallel `nproc` && \
18 |           cp debug-work/ntfs.efi install/debug/
19 |       - run: |
20 |           cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
21 |             -DCMAKE_TOOLCHAIN_FILE=mingw-x86.cmake \
22 |             -S ${SHORT_SHA} -B release-work && \
23 |           cmake --build release-work --parallel `nproc` && \
24 |           cp release-work/ntfs.efi install/
25 |       - uses: actions/upload-artifact@v3
26 |         with:
27 |           name: ${{ github.sha }}
28 |           overwrite: true
29 |           path: |
30 |             install
31 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.14)
 2 | 
 3 | project(ntfs-uefi)
 4 | 
 5 | set(CMAKE_CXX_STANDARD 20)
 6 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
 7 | 
 8 | if(MSVC)
 9 |     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /ENTRY:efi_main")
10 |     add_compile_options("/GS-")
11 |     string(REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
12 |     string(REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
13 | 
14 |     # work around bug in Visual Studio
15 |     if (${MSVC_CXX_ARCHITECTURE_ID} STREQUAL "X86")
16 |         set(CMAKE_SYSTEM_PROCESSOR "X86")
17 |     endif()
18 | else()
19 |     add_compile_options(-fno-stack-check -fno-stack-protector -mno-stack-arg-probe)
20 | endif()
21 | 
22 | include_directories(/usr/include/efi)
23 | if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
24 |     include_directories(/usr/include/efi/x86_64)
25 | elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "X86")
26 |     include_directories(/usr/include/efi/ia32)
27 | endif()
28 | 
29 | set(SRC_FILES src/ntfs.cpp
30 |     src/misc.cpp
31 |     src/ebiggers/decompress_common.c
32 |     src/ebiggers/lzx_common.c
33 |     src/ebiggers/lzx_decompress.c
34 |     src/ebiggers/xpress_decompress.c
35 | )
36 | 
37 | add_executable(ntfs ${SRC_FILES})
38 | 
39 | set_target_properties(ntfs PROPERTIES SUFFIX ".efi")
40 | 
41 | if(${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
42 |     target_compile_options(ntfs PRIVATE "-ffreestanding")
43 |     target_compile_options(ntfs PRIVATE "-fno-stack-protector")
44 |     target_compile_options(ntfs PRIVATE "-fno-stack-check")
45 |     target_compile_options(ntfs PRIVATE "-mno-stack-arg-probe")
46 | 
47 |     target_link_options(ntfs PRIVATE "-nostartfiles")
48 |     target_link_options(ntfs PRIVATE "-shared")
49 | 
50 |     if(${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
51 |         target_link_options(ntfs PRIVATE "-Wl,--subsystem,efi_boot_service_driver")
52 |     else()
53 |         target_link_options(ntfs PRIVATE "-Wl,--subsystem,11")
54 |     endif()
55 | 
56 |     if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "X86")
57 |         target_link_options(ntfs PRIVATE "-e_efi_main")
58 |     else()
59 |         target_link_options(ntfs PRIVATE "-eefi_main")
60 |     endif()
61 | elseif(MSVC)
62 |     target_link_options(ntfs PRIVATE "/SUBSYSTEM:EFI_BOOT_SERVICE_DRIVER")
63 |     target_compile_options(ntfs PRIVATE "/Oi-")
64 | endif()
65 | 
66 | target_compile_options(ntfs PRIVATE
67 |      $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
68 |           -Wall -Wextra -Wno-address-of-packed-member -Werror=pointer-arith -fno-exceptions>
69 |      $<$<CXX_COMPILER_ID:MSVC>:
70 |           /W4 /Oi->)
71 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ntfs-efi
 2 | ---------
 3 | 
 4 | ntfs-efi is an NTFS filesystem driver for EFI. It is intended for use with the free
 5 | Windows bootloader [Quibble](https://github.com/maharmstone/quibble), but you
 6 | should be able to use it for anything EFI-related.
 7 | 
 8 | Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's
 9 | "WOF compressed data", and whose code I've used here.
10 | 
11 | Changelog
12 | ---------
13 | 
14 | * 20231107
15 |   * Fixed memcpy miscompilation bug
16 | 
17 | * 20230328
18 |   * Initial release
19 | 
20 | To do
21 | -----
22 | 
23 | * LZX WOF compression
24 | * LZNT1 compression
25 | * Hide special files in root
26 | * Free space, volume label, etc.
27 | * Symlinks
28 | * Case-sensitive directories
29 | 


--------------------------------------------------------------------------------
/mingw-amd64.cmake:
--------------------------------------------------------------------------------
 1 | SET(CMAKE_SYSTEM_NAME Windows)
 2 | 
 3 | SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
 4 | SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
 5 | SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
 6 | SET(CMAKE_SYSTEM_PROCESSOR "AMD64")
 7 | 
 8 | set(CMAKE_EXE_LINKER_FLAGS "-static")
 9 | 
10 | SET(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
11 | 
12 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
13 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
14 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
15 | 
16 | 


--------------------------------------------------------------------------------
/mingw-x86.cmake:
--------------------------------------------------------------------------------
 1 | SET(CMAKE_SYSTEM_NAME Windows)
 2 | 
 3 | SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
 4 | SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
 5 | SET(CMAKE_SYSTEM_PROCESSOR X86)
 6 | 
 7 | set(CMAKE_EXE_LINKER_FLAGS "-static")
 8 | 
 9 | SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
10 | 
11 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
12 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
13 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
14 | 
15 | 


--------------------------------------------------------------------------------
/src/ebiggers/common_defs.h:
--------------------------------------------------------------------------------
  1 | #ifndef _COMMON_DEFS_H
  2 | #define _COMMON_DEFS_H
  3 | 
  4 | #include <stdint.h>
  5 | 
  6 | /* ========================================================================== */
  7 | /*                              Type definitions                              */
  8 | /* ========================================================================== */
  9 | 
 10 | /*
 11 |  * Type of a machine word.  'unsigned long' would be logical, but that is only
 12 |  * 32 bits on x86_64 Windows.  The same applies to 'uint_fast32_t'.  So the best
 13 |  * we can do without a bunch of #ifdefs appears to be 'size_t'.
 14 |  */
 15 | typedef size_t machine_word_t;
 16 | 
 17 | #define WORDBYTES	sizeof(machine_word_t)
 18 | #define WORDBITS	(8 * WORDBYTES)
 19 | 
 20 | /* ========================================================================== */
 21 | /*                         Compiler-specific definitions                      */
 22 | /* ========================================================================== */
 23 | 
 24 | #ifdef __GNUC__  /* GCC, or GCC-compatible compiler such as clang */
 25 | #  define forceinline		inline __attribute__((always_inline))
 26 | #  define likely(expr)		__builtin_expect(!!(expr), 1)
 27 | #  define unlikely(expr)	__builtin_expect(!!(expr), 0)
 28 | #  define _aligned_attribute(n)	__attribute__((aligned(n)))
 29 | #  define bsr32(n)		(31 - __builtin_clz(n))
 30 | #  define bsr64(n)		(63 - __builtin_clzll(n))
 31 | #  define bsf32(n)		__builtin_ctz(n)
 32 | #  define bsf64(n)		__builtin_ctzll(n)
 33 | #  ifndef min
 34 | #    define min(a, b)  ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
 35 | 			(_a < _b) ? _a : _b; })
 36 | #  endif
 37 | #  ifndef max
 38 | #    define max(a, b)  ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
 39 | 			(_a > _b) ? _a : _b; })
 40 | #  endif
 41 | 
 42 | #  define DEFINE_UNALIGNED_TYPE(type)				\
 43 | struct type##_unaligned {					\
 44 | 	type v;							\
 45 | } __attribute__((packed));					\
 46 | 								\
 47 | static inline type						\
 48 | load_##type##_unaligned(const void *p)				\
 49 | {								\
 50 | 	return ((const struct type##_unaligned *)p)->v;		\
 51 | }								\
 52 | 								\
 53 | static inline void						\
 54 | store_##type##_unaligned(type val, void *p)			\
 55 | {								\
 56 | 	((struct type##_unaligned *)p)->v = val;		\
 57 | }
 58 | 
 59 | #endif /* __GNUC__ */
 60 | 
 61 | /* Declare that the annotated function should always be inlined.  This might be
 62 |  * desirable in highly tuned code, e.g. compression codecs */
 63 | #ifndef forceinline
 64 | #  define forceinline		inline
 65 | #endif
 66 | 
 67 | /* Hint that the expression is usually true */
 68 | #ifndef likely
 69 | #  define likely(expr)		(expr)
 70 | #endif
 71 | 
 72 | /* Hint that the expression is usually false */
 73 | #ifndef unlikely
 74 | #  define unlikely(expr)	(expr)
 75 | #endif
 76 | 
 77 | /* Declare that the annotated variable, or variables of the annotated type, are
 78 |  * to be aligned on n-byte boundaries */
 79 | #ifndef _aligned_attribute
 80 | #  define _aligned_attribute(n)
 81 | #endif
 82 | 
 83 | /* min() and max() macros */
 84 | #ifndef min
 85 | #  define min(a, b)	((a) < (b) ? (a) : (b))
 86 | #endif
 87 | #ifndef max
 88 | #  define max(a, b)	((a) > (b) ? (a) : (b))
 89 | #endif
 90 | 
 91 | /* STATIC_ASSERT() - verify the truth of an expression at compilation time */
 92 | #define STATIC_ASSERT(expr)	((void)sizeof(char[1 - 2 * !(expr)]))
 93 | 
 94 | /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
 95 |  * can be performed efficiently on the target platform.  */
 96 | #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED)
 97 | #  define UNALIGNED_ACCESS_IS_FAST 1
 98 | #else
 99 | #  define UNALIGNED_ACCESS_IS_FAST 0
100 | #endif
101 | 
102 | /*
103 |  * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
104 |  * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
105 |  * which load and store variables of type 'type' from/to unaligned memory
106 |  * addresses.
107 |  */
108 | #ifndef DEFINE_UNALIGNED_TYPE
109 | 
110 | #include <string.h>
111 | /*
112 |  * Although memcpy() may seem inefficient, it *usually* gets optimized
113 |  * appropriately by modern compilers.  It's portable and may be the best we can
114 |  * do for a fallback...
115 |  */
116 | #define DEFINE_UNALIGNED_TYPE(type)				\
117 | 								\
118 | static forceinline type						\
119 | load_##type##_unaligned(const void *p)				\
120 | {								\
121 | 	type v;							\
122 | 	memcpy(&v, p, sizeof(v));				\
123 | 	return v;						\
124 | }								\
125 | 								\
126 | static forceinline void						\
127 | store_##type##_unaligned(type v, void *p)			\
128 | {								\
129 | 	memcpy(p, &v, sizeof(v));				\
130 | }
131 | 
132 | #endif /* !DEFINE_UNALIGNED_TYPE */
133 | 
134 | 
135 | /* ========================================================================== */
136 | /*                          Unaligned memory accesses                         */
137 | /* ========================================================================== */
138 | 
139 | #define load_word_unaligned	load_machine_word_t_unaligned
140 | #define store_word_unaligned	store_machine_word_t_unaligned
141 | 
142 | /* ========================================================================== */
143 | /*                             Bit scan functions                             */
144 | /* ========================================================================== */
145 | 
146 | /*
147 |  * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
148 |  * significant end) of the *most* significant 1 bit in the input value.  The
149 |  * input value must be nonzero!
150 |  */
151 | 
152 | #ifndef bsr32
153 | static forceinline unsigned
154 | bsr32(uint32_t v)
155 | {
156 | 	unsigned bit = 0;
157 | 	while ((v >>= 1) != 0)
158 | 		bit++;
159 | 	return bit;
160 | }
161 | #endif
162 | 
163 | #ifndef bsr64
164 | static forceinline unsigned
165 | bsr64(uint64_t v)
166 | {
167 | 	unsigned bit = 0;
168 | 	while ((v >>= 1) != 0)
169 | 		bit++;
170 | 	return bit;
171 | }
172 | #endif
173 | 
174 | static forceinline unsigned
175 | bsrw(machine_word_t v)
176 | {
177 | 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
178 | 	if (WORDBITS == 32)
179 | 		return bsr32(v);
180 | 	else
181 | 		return bsr64(v);
182 | }
183 | 
184 | /*
185 |  * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
186 |  * significant end) of the *least* significant 1 bit in the input value.  The
187 |  * input value must be nonzero!
188 |  */
189 | 
190 | #ifndef bsf32
191 | static forceinline unsigned
192 | bsf32(uint32_t v)
193 | {
194 | 	unsigned bit;
195 | 	for (bit = 0; !(v & 1); bit++, v >>= 1)
196 | 		;
197 | 	return bit;
198 | }
199 | #endif
200 | 
201 | #ifndef bsf64
202 | static forceinline unsigned
203 | bsf64(uint64_t v)
204 | {
205 | 	unsigned bit;
206 | 	for (bit = 0; !(v & 1); bit++, v >>= 1)
207 | 		;
208 | 	return bit;
209 | }
210 | #endif
211 | 
212 | static forceinline unsigned
213 | bsfw(machine_word_t v)
214 | {
215 | 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
216 | 	if (WORDBITS == 32)
217 | 		return bsf32(v);
218 | 	else
219 | 		return bsf64(v);
220 | }
221 | 
222 | /* Return the log base 2 of 'n', rounded up to the nearest integer. */
223 | static forceinline unsigned
224 | ilog2_ceil(size_t n)
225 | {
226 |         if (n <= 1)
227 |                 return 0;
228 |         return 1 + bsrw(n - 1);
229 | }
230 | 
231 | #endif /* _COMMON_DEFS_H */
232 | 


--------------------------------------------------------------------------------
/src/ebiggers/decompress_common.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * decompress_common.c
  3 |  *
  4 |  * Code for decompression shared among multiple compression formats.
  5 |  *
  6 |  * The following copying information applies to this specific source code file:
  7 |  *
  8 |  * Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
  9 |  *
 10 |  * To the extent possible under law, the author(s) have dedicated all copyright
 11 |  * and related and neighboring rights to this software to the public domain
 12 |  * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
 13 |  * Dedication (the "CC0").
 14 |  *
 15 |  * This software is distributed in the hope that it will be useful, but WITHOUT
 16 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 17 |  * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
 18 |  *
 19 |  * You should have received a copy of the CC0 along with this software; if not
 20 |  * see <http://creativecommons.org/publicdomain/zero/1.0/>.
 21 |  */
 22 | 
 23 | #ifdef HAVE_CONFIG_H
 24 | #  include "config.h"
 25 | #endif
 26 | 
 27 | #include <string.h>
 28 | 
 29 | #ifdef __SSE2__
 30 | #  include <emmintrin.h>
 31 | #endif
 32 | 
 33 | #include "decompress_common.h"
 34 | 
 35 | /*
 36 |  * make_huffman_decode_table() -
 37 |  *
 38 |  * Given an alphabet of symbols and the length of each symbol's codeword in a
 39 |  * canonical prefix code, build a table for quickly decoding symbols that were
 40 |  * encoded with that code.
 41 |  *
 42 |  * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols
 43 |  * such that no whole codeword is a prefix of any other.  A prefix code might be
 44 |  * a _Huffman code_, which means that it is an optimum prefix code for a given
 45 |  * list of symbol frequencies and was generated by the Huffman algorithm.
 46 |  * Although the prefix codes processed here will ordinarily be "Huffman codes",
 47 |  * strictly speaking the decoder cannot know whether a given code was actually
 48 |  * generated by the Huffman algorithm or not.
 49 |  *
 50 |  * A prefix code is _canonical_ if and only if a longer codeword never
 51 |  * lexicographically precedes a shorter codeword, and the lexicographic ordering
 52 |  * of codewords of equal length is the same as the lexicographic ordering of the
 53 |  * corresponding symbols.  The advantage of using a canonical prefix code is
 54 |  * that the codewords can be reconstructed from only the symbol => codeword
 55 |  * length mapping.  This eliminates the need to transmit the codewords
 56 |  * explicitly.  Instead, they can be enumerated in lexicographic order after
 57 |  * sorting the symbols primarily by increasing codeword length and secondarily
 58 |  * by increasing symbol value.
 59 |  *
 60 |  * However, the decoder's real goal is to decode symbols with the code, not just
 61 |  * generate the list of codewords.  Consequently, this function directly builds
 62 |  * a table for efficiently decoding symbols using the code.  The basic idea is
 63 |  * that given the next 'max_codeword_len' bits of input, the decoder can look up
 64 |  * the next decoded symbol by indexing a table containing '2^max_codeword_len'
 65 |  * entries.  A codeword with length 'max_codeword_len' will have exactly one
 66 |  * entry in this table, whereas a codeword shorter than 'max_codeword_len' will
 67 |  * have multiple entries in this table.  Precisely, a codeword of length 'n'
 68 |  * will have '2^(max_codeword_len - n)' entries.  The index of each such entry,
 69 |  * considered as a bitstring of length 'max_codeword_len', will contain the
 70 |  * corresponding codeword as a prefix.
 71 |  *
 72 |  * That's the basic idea, but we extend it in two ways:
 73 |  *
 74 |  * - Often the maximum codeword length is too long for it to be efficient to
 75 |  *   build the full decode table whenever a new code is used.  Instead, we build
 76 |  *   a "root" table using only '2^table_bits' entries, where 'table_bits <=
 77 |  *   max_codeword_len'.  Then, a lookup of 'table_bits' bits produces either a
 78 |  *   symbol directly (for codewords not longer than 'table_bits'), or the index
 79 |  *   of a subtable which must be indexed with additional bits of input to fully
 80 |  *   decode the symbol (for codewords longer than 'table_bits').
 81 |  *
 82 |  * - Whenever the decoder decodes a symbol, it needs to know the codeword length
 83 |  *   so that it can remove the appropriate number of input bits.  The obvious
 84 |  *   solution would be to simply retain the codeword lengths array and use the
 85 |  *   decoded symbol as an index into it.  However, that would require two array
 86 |  *   accesses when decoding each symbol.  Our strategy is to instead store the
 87 |  *   codeword length directly in the decode table entry along with the symbol.
 88 |  *
 89 |  * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table
 90 |  * entries, and see read_huffsym() for full details on how symbols are decoded.
 91 |  *
 92 |  * @decode_table:
 93 |  *	The array in which to build the decode table.  This must have been
 94 |  *	declared by the DECODE_TABLE() macro.  This may alias @lens, since all
 95 |  *	@lens are consumed before the decode table is written to.
 96 |  *
 97 |  * @num_syms:
 98 |  *	The number of symbols in the alphabet.
 99 |  *
100 |  * @table_bits:
101 |  *	The log base 2 of the number of entries in the root table.
102 |  *
103 |  * @lens:
104 |  *	An array of length @num_syms, indexed by symbol, that gives the length
105 |  *	of the codeword, in bits, for each symbol.  The length can be 0, which
106 |  *	means that the symbol does not have a codeword assigned.  In addition,
107 |  *	@lens may alias @decode_table, as noted above.
108 |  *
109 |  * @max_codeword_len:
110 |  *	The maximum codeword length permitted for this code.  All entries in
111 |  *	'lens' must be less than or equal to this value.
112 |  *
113 |  * @working_space
114 |  *	A temporary array that was declared with DECODE_TABLE_WORKING_SPACE().
115 |  *
116 |  * Returns 0 on success, or -1 if the lengths do not form a valid prefix code.
117 |  */
118 | int
119 | make_huffman_decode_table(uint16_t decode_table[], unsigned num_syms,
120 | 			  unsigned table_bits, const uint8_t lens[],
121 | 			  unsigned max_codeword_len, uint16_t working_space[])
122 | {
123 | 	uint16_t * const len_counts = &working_space[0];
124 | 	uint16_t * const offsets = &working_space[1 * (max_codeword_len + 1)];
125 | 	uint16_t * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
126 | 	int32_t remainder = 1;
127 | 	uint8_t *entry_ptr = (uint8_t*)decode_table;
128 | 	unsigned codeword_len = 1;
129 | 	unsigned sym_idx;
130 | 	unsigned codeword;
131 | 	unsigned subtable_pos;
132 | 	unsigned subtable_bits;
133 | 	unsigned subtable_prefix;
134 | 
135 | 	/* Count how many codewords have each length, including 0.  */
136 | 	for (unsigned len = 0; len <= max_codeword_len; len++)
137 | 		len_counts[len] = 0;
138 | 	for (unsigned sym = 0; sym < num_syms; sym++)
139 | 		len_counts[lens[sym]]++;
140 | 
141 | 	/* It is already guaranteed that all lengths are <= max_codeword_len,
142 | 	 * but it cannot be assumed they form a complete prefix code.  A
143 | 	 * codeword of length n should require a proportion of the codespace
144 | 	 * equaling (1/2)^n.  The code is complete if and only if, by this
145 | 	 * measure, the codespace is exactly filled by the lengths.  */
146 | 	for (unsigned len = 1; len <= max_codeword_len; len++) {
147 | 		remainder = (remainder << 1) - len_counts[len];
148 | 		/* Do the lengths overflow the codespace? */
149 | 		if (unlikely(remainder < 0))
150 | 			return -1;
151 | 	}
152 | 
153 | 	if (remainder != 0) {
154 | 		/* The lengths do not fill the codespace; that is, they form an
155 | 		 * incomplete code.  This is permitted only if the code is empty
156 | 		 * (contains no symbols). */
157 | 
158 | 		if (unlikely(remainder != 1U << max_codeword_len))
159 | 			return -1;
160 | 
161 | 		/* The code is empty.  When processing a well-formed stream, the
162 | 		 * decode table need not be initialized in this case.  However,
163 | 		 * we cannot assume the stream is well-formed, so we must
164 | 		 * initialize the decode table anyway.  Setting all entries to 0
165 | 		 * makes the decode table always produce symbol '0' without
166 | 		 * consuming any bits, which is good enough. */
167 | 		memset(decode_table, 0, sizeof(decode_table[0]) << table_bits);
168 | 		return 0;
169 | 	}
170 | 
171 | 	/* Sort the symbols primarily by increasing codeword length and
172 | 	 * secondarily by increasing symbol value. */
173 | 
174 | 	/* Initialize 'offsets' so that 'offsets[len]' is the number of
175 | 	 * codewords shorter than 'len' bits, including length 0. */
176 | 	offsets[0] = 0;
177 | 	for (unsigned len = 0; len < max_codeword_len; len++)
178 | 		offsets[len + 1] = offsets[len] + len_counts[len];
179 | 
180 | 	/* Use the 'offsets' array to sort the symbols. */
181 | 	for (unsigned sym = 0; sym < num_syms; sym++)
182 | 		sorted_syms[offsets[lens[sym]]++] = sym;
183 | 
184 | 	/*
185 | 	 * Fill the root table entries for codewords no longer than table_bits.
186 | 	 *
187 | 	 * The table will start with entries for the shortest codeword(s), which
188 | 	 * will have the most entries.  From there, the number of entries per
189 | 	 * codeword will decrease.  As an optimization, we may begin filling
190 | 	 * entries with SSE2 vector accesses (8 entries/store), then change to
191 | 	 * word accesses (2 or 4 entries/store), then change to 16-bit accesses
192 | 	 * (1 entry/store).
193 | 	 */
194 | 	sym_idx = offsets[0];
195 | 
196 | #ifdef __SSE2__
197 | 	/* Fill entries one 128-bit vector (8 entries) at a time. */
198 | 	for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
199 | 				    (sizeof(__m128i) / sizeof(decode_table[0]));
200 | 	     stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
201 | 	{
202 | 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
203 | 		for (; sym_idx < end_sym_idx; sym_idx++) {
204 | 			/* Note: unlike in the "word" version below, the __m128i
205 | 			 * type already has __attribute__((may_alias)), so using
206 | 			 * it to access an array of u16 will not violate strict
207 | 			 * aliasing.  */
208 | 			__m128i v = _mm_set1_epi16(
209 | 				MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
210 | 							codeword_len));
211 | 			unsigned n = stores_per_loop;
212 | 			do {
213 | 				*(__m128i *)entry_ptr = v;
214 | 				entry_ptr += sizeof(v);
215 | 			} while (--n);
216 | 		}
217 | 	}
218 | #endif /* __SSE2__ */
219 | 
220 | #ifdef __GNUC__
221 | 	/* Fill entries one word (2 or 4 entries) at a time. */
222 | 	for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
223 | 					(WORDBYTES / sizeof(decode_table[0]));
224 | 	     stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
225 | 	{
226 | 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
227 | 		for (; sym_idx < end_sym_idx; sym_idx++) {
228 | 
229 | 			/* Accessing the array of u16 as u32 or u64 would
230 | 			 * violate strict aliasing and would require compiling
231 | 			 * the code with -fno-strict-aliasing to guarantee
232 | 			 * correctness.  To work around this problem, use the
233 | 			 * gcc 'may_alias' extension.  */
234 | 			typedef machine_word_t
235 | 				__attribute__((may_alias)) aliased_word_t;
236 | 			aliased_word_t v = repeat_u16(
237 | 				MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
238 | 							codeword_len));
239 | 			unsigned n = stores_per_loop;
240 | 			do {
241 | 				*(aliased_word_t *)entry_ptr = v;
242 | 				entry_ptr += sizeof(v);
243 | 			} while (--n);
244 | 		}
245 | 	}
246 | #endif /* __GNUC__ */
247 | 
248 | 	/* Fill entries one at a time. */
249 | 	for (unsigned stores_per_loop = (1U << (table_bits - codeword_len));
250 | 	     stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
251 | 	{
252 | 		unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
253 | 		for (; sym_idx < end_sym_idx; sym_idx++) {
254 | 			uint16_t v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
255 | 							codeword_len);
256 | 			unsigned n = stores_per_loop;
257 | 			do {
258 | 				*(uint16_t *)entry_ptr = v;
259 | 				entry_ptr += sizeof(v);
260 | 			} while (--n);
261 | 		}
262 | 	}
263 | 
264 | 	/* If all symbols were processed, then no subtables are required. */
265 | 	if (sym_idx == num_syms)
266 | 		return 0;
267 | 
268 | 	/* At least one subtable is required.  Process the remaining symbols. */
269 | 	codeword = ((uint16_t *)entry_ptr - decode_table) << 1;
270 | 	subtable_pos = 1U << table_bits;
271 | 	subtable_bits = table_bits;
272 | 	subtable_prefix = -1;
273 | 	do {
274 | 		while (len_counts[codeword_len] == 0) {
275 | 			codeword_len++;
276 | 			codeword <<= 1;
277 | 		}
278 | 
279 | 		unsigned prefix = codeword >> (codeword_len - table_bits);
280 | 
281 | 		/* Start a new subtable if the first 'table_bits' bits of the
282 | 		 * codeword don't match the prefix for the previous subtable, or
283 | 		 * if this will be the first subtable. */
284 | 		if (prefix != subtable_prefix) {
285 | 
286 | 			subtable_prefix = prefix;
287 | 
288 | 			/*
289 | 			 * Calculate the subtable length.  If the codeword
290 | 			 * length exceeds 'table_bits' by n, then the subtable
291 | 			 * needs at least 2^n entries.  But it may need more; if
292 | 			 * there are fewer than 2^n codewords of length
293 | 			 * 'table_bits + n' remaining, then n will need to be
294 | 			 * incremented to bring in longer codewords until the
295 | 			 * subtable can be filled completely.  Note that it
296 | 			 * always will, eventually, be possible to fill the
297 | 			 * subtable, since it was previously verified that the
298 | 			 * code is complete.
299 | 			 */
300 | 			subtable_bits = codeword_len - table_bits;
301 | 			remainder = (int32_t)1 << subtable_bits;
302 | 			for (;;) {
303 | 				remainder -= len_counts[table_bits +
304 | 							subtable_bits];
305 | 				if (remainder <= 0)
306 | 					break;
307 | 				subtable_bits++;
308 | 				remainder <<= 1;
309 | 			}
310 | 
311 | 			/* Create the entry that points from the root table to
312 | 			 * the subtable.  This entry contains the index of the
313 | 			 * start of the subtable and the number of bits with
314 | 			 * which the subtable is indexed (the log base 2 of the
315 | 			 * number of entries it contains).  */
316 | 			decode_table[subtable_prefix] =
317 | 				MAKE_DECODE_TABLE_ENTRY(subtable_pos,
318 | 							subtable_bits);
319 | 		}
320 | 
321 | 		/* Fill the subtable entries for this symbol. */
322 | 		uint16_t entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
323 | 						    codeword_len - table_bits);
324 | 		unsigned n = 1U << (subtable_bits - (codeword_len -
325 | 						     table_bits));
326 | 		do {
327 | 			decode_table[subtable_pos++] = entry;
328 | 		} while (--n);
329 | 
330 | 		len_counts[codeword_len]--;
331 | 		codeword++;
332 | 	} while (++sym_idx < num_syms);
333 | 
334 | 	return 0;
335 | }
336 | 


--------------------------------------------------------------------------------
/src/ebiggers/decompress_common.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * decompress_common.h
  3 |  *
  4 |  * Header for decompression code shared by multiple compression formats.
  5 |  *
  6 |  * The following copying information applies to this specific source code file:
  7 |  *
  8 |  * Written in 2012-2016 by Eric Biggers <ebiggers3@gmail.com>
  9 |  *
 10 |  * To the extent possible under law, the author(s) have dedicated all copyright
 11 |  * and related and neighboring rights to this software to the public domain
 12 |  * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
 13 |  * Dedication (the "CC0").
 14 |  *
 15 |  * This software is distributed in the hope that it will be useful, but WITHOUT
 16 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 17 |  * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
 18 |  *
 19 |  * You should have received a copy of the CC0 along with this software; if not
 20 |  * see <http://creativecommons.org/publicdomain/zero/1.0/>.
 21 |  */
 22 | 
 23 | #ifndef _DECOMPRESS_COMMON_H
 24 | #define _DECOMPRESS_COMMON_H
 25 | 
 26 | #include <string.h>
 27 | 
 28 | #include "common_defs.h"
 29 | 
 30 | /******************************************************************************/
 31 | /*                   Input bitstream for XPRESS and LZX                       */
 32 | /*----------------------------------------------------------------------------*/
 33 | 
 34 | /* Structure that encapsulates a block of in-memory data being interpreted as a
 35 |  * stream of bits, optionally with interwoven literal bytes.  Bits are assumed
 36 |  * to be stored in little endian 16-bit coding units, with the bits ordered high
 37 |  * to low.  */
 38 | struct input_bitstream {
 39 | 
 40 | 	/* Bits that have been read from the input buffer.  The bits are
 41 | 	 * left-justified; the next bit is always bit 31.  */
 42 | 	uint32_t bitbuf;
 43 | 
 44 | 	/* Number of bits currently held in @bitbuf.  */
 45 | 	uint32_t bitsleft;
 46 | 
 47 | 	/* Pointer to the next byte to be retrieved from the input buffer.  */
 48 | 	const uint8_t *next;
 49 | 
 50 | 	/* Pointer past the end of the input buffer.  */
 51 | 	const uint8_t *end;
 52 | };
 53 | 
 54 | /* Initialize a bitstream to read from the specified input buffer.  */
 55 | static forceinline void
 56 | init_input_bitstream(struct input_bitstream *is, const void *buffer, uint32_t size)
 57 | {
 58 | 	is->bitbuf = 0;
 59 | 	is->bitsleft = 0;
 60 | 	is->next = buffer;
 61 | 	is->end = is->next + size;
 62 | }
 63 | 
 64 | /* Note: for performance reasons, the following methods don't return error codes
 65 |  * to the caller if the input buffer is overrun.  Instead, they just assume that
 66 |  * all overrun data is zeroes.  This has no effect on well-formed compressed
 67 |  * data.  The only disadvantage is that bad compressed data may go undetected,
 68 |  * but even this is irrelevant if higher level code checksums the uncompressed
 69 |  * data anyway.  */
 70 | 
 71 | /* Ensure the bit buffer variable for the bitstream contains at least @num_bits
 72 |  * bits.  Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
 73 |  * may be called on the bitstream to peek or remove up to @num_bits bits.  */
 74 | static forceinline void
 75 | bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits)
 76 | {
 77 | 	/* This currently works for at most 17 bits.  */
 78 | 
 79 | 	if (is->bitsleft >= num_bits)
 80 | 		return;
 81 | 
 82 | 	if (unlikely(is->end - is->next < 2))
 83 | 		goto overflow;
 84 | 
 85 | 	is->bitbuf |= (uint32_t)*((uint16_t*)is->next) << (16 - is->bitsleft);
 86 | 	is->next += 2;
 87 | 	is->bitsleft += 16;
 88 | 
 89 | 	if (unlikely(num_bits == 17 && is->bitsleft == 16)) {
 90 | 		if (unlikely(is->end - is->next < 2))
 91 | 			goto overflow;
 92 | 
 93 | 		is->bitbuf |= (uint32_t)*((uint16_t*)(is->next));
 94 | 		is->next += 2;
 95 | 		is->bitsleft = 32;
 96 | 	}
 97 | 
 98 | 	return;
 99 | 
100 | overflow:
101 | 	is->bitsleft = 32;
102 | }
103 | 
104 | /* Return the next @num_bits bits from the bitstream, without removing them.
105 |  * There must be at least @num_bits remaining in the buffer variable, from a
106 |  * previous call to bitstream_ensure_bits().  */
107 | static forceinline uint32_t
108 | bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits)
109 | {
110 | 	return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
111 | }
112 | 
113 | /* Remove @num_bits from the bitstream.  There must be at least @num_bits
114 |  * remaining in the buffer variable, from a previous call to
115 |  * bitstream_ensure_bits().  */
116 | static forceinline void
117 | bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits)
118 | {
119 | 	is->bitbuf <<= num_bits;
120 | 	is->bitsleft -= num_bits;
121 | }
122 | 
123 | /* Remove and return @num_bits bits from the bitstream.  There must be at least
124 |  * @num_bits remaining in the buffer variable, from a previous call to
125 |  * bitstream_ensure_bits().  */
126 | static forceinline uint32_t
127 | bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits)
128 | {
129 | 	uint32_t bits = bitstream_peek_bits(is, num_bits);
130 | 	bitstream_remove_bits(is, num_bits);
131 | 	return bits;
132 | }
133 | 
134 | /* Read and return the next @num_bits bits from the bitstream.  */
135 | static forceinline uint32_t
136 | bitstream_read_bits(struct input_bitstream *is, unsigned num_bits)
137 | {
138 | 	bitstream_ensure_bits(is, num_bits);
139 | 	return bitstream_pop_bits(is, num_bits);
140 | }
141 | 
142 | /* Read and return the next literal byte embedded in the bitstream.  */
143 | static forceinline uint8_t
144 | bitstream_read_byte(struct input_bitstream *is)
145 | {
146 | 	if (unlikely(is->end == is->next))
147 | 		return 0;
148 | 	return *is->next++;
149 | }
150 | 
151 | /* Read and return the next 16-bit integer embedded in the bitstream.  */
152 | static forceinline uint16_t
153 | bitstream_read_u16(struct input_bitstream *is)
154 | {
155 | 	uint16_t v;
156 | 
157 | 	if (unlikely(is->end - is->next < 2))
158 | 		return 0;
159 | 	v = *(uint16_t*)is->next;
160 | 	is->next += 2;
161 | 	return v;
162 | }
163 | 
164 | /* Read and return the next 32-bit integer embedded in the bitstream.  */
165 | static forceinline uint32_t
166 | bitstream_read_u32(struct input_bitstream *is)
167 | {
168 | 	uint32_t v;
169 | 
170 | 	if (unlikely(is->end - is->next < 4))
171 | 		return 0;
172 | 	v = *(uint32_t*)is->next;
173 | 	is->next += 4;
174 | 	return v;
175 | }
176 | 
177 | /* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
178 |  * Return 0 if there were enough bytes remaining in the input, otherwise -1. */
179 | static forceinline int
180 | bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count)
181 | {
182 | 	if (unlikely(is->end - is->next < count))
183 | 		return -1;
184 | 	memcpy(dst_buffer, is->next, count);
185 | 	is->next += count;
186 | 	return 0;
187 | }
188 | 
189 | /* Align the input bitstream on a coding-unit boundary.  */
190 | static forceinline void
191 | bitstream_align(struct input_bitstream *is)
192 | {
193 | 	is->bitsleft = 0;
194 | 	is->bitbuf = 0;
195 | }
196 | 
197 | /******************************************************************************/
198 | /*                             Huffman decoding                               */
199 | /*----------------------------------------------------------------------------*/
200 | 
201 | /*
202 |  * Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12
203 |  * bits) and 'length' (low 4 bits).  The precise meaning of these fields depends
204 |  * on the type of entry:
205 |  *
206 |  * Root table entries which are *not* subtable pointers:
207 |  *	symbol: symbol to decode
208 |  *	length: codeword length in bits
209 |  *
210 |  * Root table entries which are subtable pointers:
211 |  *	symbol: index of start of subtable
212 |  *	length: number of bits with which the subtable is indexed
213 |  *
214 |  * Subtable entries:
215 |  *	symbol: symbol to decode
216 |  *	length: codeword length in bits, minus the number of bits with which the
217 |  *		root table is indexed
218 |  */
219 | #define DECODE_TABLE_SYMBOL_SHIFT  4
220 | #define DECODE_TABLE_MAX_SYMBOL	   ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1)
221 | #define DECODE_TABLE_MAX_LENGTH    ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1)
222 | #define DECODE_TABLE_LENGTH_MASK   DECODE_TABLE_MAX_LENGTH
223 | #define MAKE_DECODE_TABLE_ENTRY(symbol, length) \
224 | 	(((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length))
225 | 
226 | /*
227 |  * Read and return the next Huffman-encoded symbol from the given bitstream
228 |  * using the given decode table.
229 |  *
230 |  * If the input data is exhausted, then the Huffman symbol will be decoded as if
231 |  * the missing bits were all zeroes.
232 |  *
233 |  * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in
234 |  * lzms_decompress.c; keep them in sync!
235 |  */
236 | static forceinline unsigned
237 | read_huffsym(struct input_bitstream *is, const uint16_t decode_table[],
238 | 	     unsigned table_bits, unsigned max_codeword_len)
239 | {
240 | 	unsigned entry;
241 | 	unsigned symbol;
242 | 	unsigned length;
243 | 
244 | 	/* Preload the bitbuffer with 'max_codeword_len' bits so that we're
245 | 	 * guaranteed to be able to fully decode a codeword. */
246 | 	bitstream_ensure_bits(is, max_codeword_len);
247 | 
248 | 	/* Index the root table by the next 'table_bits' bits of input. */
249 | 	entry = decode_table[bitstream_peek_bits(is, table_bits)];
250 | 
251 | 	/* Extract the "symbol" and "length" from the entry. */
252 | 	symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
253 | 	length = entry & DECODE_TABLE_LENGTH_MASK;
254 | 
255 | 	/* If the root table is indexed by the full 'max_codeword_len' bits,
256 | 	 * then there cannot be any subtables, and this will be known at compile
257 | 	 * time.  Otherwise, we must check whether the decoded symbol is really
258 | 	 * a subtable pointer.  If so, we must discard the bits with which the
259 | 	 * root table was indexed, then index the subtable by the next 'length'
260 | 	 * bits of input to get the real entry. */
261 | 	if (max_codeword_len > table_bits &&
262 | 	    entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT)))
263 | 	{
264 | 		/* Subtable required */
265 | 		bitstream_remove_bits(is, table_bits);
266 | 		entry = decode_table[symbol + bitstream_peek_bits(is, length)];
267 | 		symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
268 | 		length = entry & DECODE_TABLE_LENGTH_MASK;
269 | 	}
270 | 
271 | 	/* Discard the bits (or the remaining bits, if a subtable was required)
272 | 	 * of the codeword. */
273 | 	bitstream_remove_bits(is, length);
274 | 
275 | 	/* Return the decoded symbol. */
276 | 	return symbol;
277 | }
278 | 
279 | /*
280 |  * The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode
281 |  * table entries, including all subtable entries, that may be required for
282 |  * decoding a given Huffman code.  This depends on three parameters:
283 |  *
284 |  *	num_syms: the maximum number of symbols in the code
285 |  *	table_bits: the number of bits with which the root table will be indexed
286 |  *	max_codeword_len: the maximum allowed codeword length in the code
287 |  *
288 |  * Given these parameters, the utility program 'enough' from zlib, when passed
289 |  * the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will
290 |  * compute the maximum number of entries required.  This has already been done
291 |  * for the combinations we need and incorporated into the macro below so that
292 |  * the mapping can be done at compilation time.  If an unknown combination is
293 |  * used, then a compilation error will result.  To fix this, use 'enough' to
294 |  * find the missing value and add it below.  If that still doesn't fix the
295 |  * compilation error, then most likely a constraint would be violated by the
296 |  * requested parameters, so they cannot be used, at least without other changes
297 |  * to the decode table --- see DECODE_TABLE_SIZE().
298 |  */
299 | #define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \
300 | 	((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \
301 | 	((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \
302 | 	((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \
303 | 	((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \
304 | 	((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \
305 | 	((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \
306 | 	((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \
307 | 	((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \
308 | 	((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \
309 | 	((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \
310 | 	((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \
311 | 	((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \
312 | 	((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \
313 | 	((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \
314 | 	((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \
315 | 	((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \
316 | 	((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \
317 | 	((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \
318 | 	((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \
319 | 	((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \
320 | 	((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \
321 | 	((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \
322 | 	((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \
323 | 	((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \
324 | 	-1)
325 | 
326 | extern int
327 | make_huffman_decode_table(uint16_t decode_table[], unsigned num_syms,
328 | 			  unsigned table_bits, const uint8_t lens[],
329 | 			  unsigned max_codeword_len, uint16_t working_space[]);
330 | 
331 | /******************************************************************************/
332 | /*                             LZ match copying                               */
333 | /*----------------------------------------------------------------------------*/
334 | 
335 | static forceinline void
336 | copy_word_unaligned(const void *src, void *dst)
337 | {
338 | 	*(machine_word_t*)dst = *(machine_word_t*)src;
339 | }
340 | 
341 | static forceinline machine_word_t
342 | repeat_u16(uint16_t b)
343 | {
344 | 	machine_word_t v = b;
345 | 
346 | 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
347 | 	v |= v << 16;
348 | 	v |= v << ((WORDBITS == 64) ? 32 : 0);
349 | 	return v;
350 | }
351 | 
352 | static forceinline machine_word_t
353 | repeat_byte(uint8_t b)
354 | {
355 | 	return repeat_u16(((uint16_t)b << 8) | b);
356 | }
357 | 
358 | /*
359 |  * Copy an LZ77 match of 'length' bytes from the match source at 'out_next -
360 |  * offset' to the match destination at 'out_next'.  The source and destination
361 |  * may overlap.
362 |  *
363 |  * This handles validating the length and offset.  It is validated that the
364 |  * beginning of the match source is '>= out_begin' and that end of the match
365 |  * destination is '<= out_end'.  The return value is 0 if the match was valid
366 |  * (and was copied), otherwise -1.
367 |  *
368 |  * 'min_length' is a hint which specifies the minimum possible match length.
369 |  * This should be a compile-time constant.
370 |  */
371 | static forceinline int
372 | lz_copy(uint32_t length, uint32_t offset, uint8_t *out_begin, uint8_t *out_next, uint8_t *out_end,
373 | 	uint32_t min_length)
374 | {
375 | 	const uint8_t *src;
376 | 	uint8_t *end;
377 | 
378 | 	/* Validate the offset. */
379 | 	if (unlikely(offset > out_next - out_begin))
380 | 		return -1;
381 | 
382 | 	/*
383 | 	 * Fast path: copy a match which is no longer than a few words, is not
384 | 	 * overlapped such that copying a word at a time would produce incorrect
385 | 	 * results, and is not too close to the end of the buffer.  Note that
386 | 	 * this might copy more than the length of the match, but that's okay in
387 | 	 * this scenario.
388 | 	 */
389 | 	src = out_next - offset;
390 | 	if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES &&
391 | 	    offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES)
392 | 	{
393 | 		copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0);
394 | 		copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1);
395 | 		copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2);
396 | 		return 0;
397 | 	}
398 | 
399 | 	/* Validate the length.  This isn't needed in the fast path above, due
400 | 	 * to the additional conditions tested, but we do need it here. */
401 | 	if (unlikely(length > out_end - out_next))
402 | 		return -1;
403 | 	end = out_next + length;
404 | 
405 | 	/*
406 | 	 * Try to copy one word at a time.  On i386 and x86_64 this is faster
407 | 	 * than copying one byte at a time, unless the data is near-random and
408 | 	 * all the matches have very short lengths.  Note that since this
409 | 	 * requires unaligned memory accesses, it won't necessarily be faster on
410 | 	 * every architecture.
411 | 	 *
412 | 	 * Also note that we might copy more than the length of the match.  For
413 | 	 * example, if a word is 8 bytes and the match is of length 5, then
414 | 	 * we'll simply copy 8 bytes.  This is okay as long as we don't write
415 | 	 * beyond the end of the output buffer, hence the check for (out_end -
416 | 	 * end >= WORDBYTES - 1).
417 | 	 */
418 | 	if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1))
419 | 	{
420 | 		if (offset >= WORDBYTES) {
421 | 			/* The source and destination words don't overlap. */
422 | 			do {
423 | 				copy_word_unaligned(src, out_next);
424 | 				src += WORDBYTES;
425 | 				out_next += WORDBYTES;
426 | 			} while (out_next < end);
427 | 			return 0;
428 | 		} else if (offset == 1) {
429 | 			/* Offset 1 matches are equivalent to run-length
430 | 			 * encoding of the previous byte.  This case is common
431 | 			 * if the data contains many repeated bytes. */
432 | 			machine_word_t v = repeat_byte(*(out_next - 1));
433 | 			do {
434 | 				*(machine_word_t*)out_next = v;
435 | 				src += WORDBYTES;
436 | 				out_next += WORDBYTES;
437 | 			} while (out_next < end);
438 | 			return 0;
439 | 		}
440 | 		/*
441 | 		 * We don't bother with special cases for other 'offset <
442 | 		 * WORDBYTES', which are usually rarer than 'offset == 1'.
443 | 		 * Extra checks will just slow things down.  Actually, it's
444 | 		 * possible to handle all the 'offset < WORDBYTES' cases using
445 | 		 * the same code, but it still becomes more complicated doesn't
446 | 		 * seem any faster overall; it definitely slows down the more
447 | 		 * common 'offset == 1' case.
448 | 		 */
449 | 	}
450 | 
451 | 	/* Fall back to a bytewise copy.  */
452 | 	if (min_length >= 2)
453 | 		*out_next++ = *src++;
454 | 	if (min_length >= 3)
455 | 		*out_next++ = *src++;
456 | 	if (min_length >= 4)
457 | 		*out_next++ = *src++;
458 | 	do {
459 | 		*out_next++ = *src++;
460 | 	} while (out_next != end);
461 | 	return 0;
462 | }
463 | 
464 | #endif /* _DECOMPRESS_COMMON_H */
465 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_common.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lzx_common.c - Common code for LZX compression and decompression.
  3 |  */
  4 | 
  5 | /*
  6 |  * Copyright (C) 2012-2016 Eric Biggers
  7 |  *
  8 |  * This program is free software: you can redistribute it and/or modify it under
  9 |  * the terms of the GNU General Public License as published by the Free Software
 10 |  * Foundation, either version 2 of the License, or (at your option) any later
 11 |  * version.
 12 |  *
 13 |  * This program is distributed in the hope that it will be useful, but WITHOUT
 14 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 15 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 16 |  * details.
 17 |  *
 18 |  * You should have received a copy of the GNU General Public License along with
 19 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
 20 |  */
 21 | 
 22 | #ifdef HAVE_CONFIG_H
 23 | #  include "config.h"
 24 | #endif
 25 | 
 26 | #include <string.h>
 27 | 
 28 | #ifdef __SSE2__
 29 | #  include <emmintrin.h>
 30 | #endif
 31 | 
 32 | #ifdef __AVX2__
 33 | #  include <immintrin.h>
 34 | #endif
 35 | 
 36 | #include "common_defs.h"
 37 | #include "lzx_common.h"
 38 | 
 39 | /* Mapping: offset slot => first match offset that uses that offset slot.
 40 |  * The offset slots for repeat offsets map to "fake" offsets < 1.  */
 41 | const int32_t lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = {
 42 |         -2     , -1     , 0      , 1      , 2      ,    /* 0  --- 4  */
 43 |         4      , 6      , 10     , 14     , 22     ,    /* 5  --- 9  */
 44 |         30     , 46     , 62     , 94     , 126    ,    /* 10 --- 14 */
 45 |         190    , 254    , 382    , 510    , 766    ,    /* 15 --- 19 */
 46 |         1022   , 1534   , 2046   , 3070   , 4094   ,    /* 20 --- 24 */
 47 |         6142   , 8190   , 12286  , 16382  , 24574  ,    /* 25 --- 29 */
 48 |         32766  , 49150  , 65534  , 98302  , 131070 ,    /* 30 --- 34 */
 49 |         196606 , 262142 , 393214 , 524286 , 655358 ,    /* 35 --- 39 */
 50 |         786430 , 917502 , 1048574, 1179646, 1310718,    /* 40 --- 44 */
 51 |         1441790, 1572862, 1703934, 1835006, 1966078,    /* 45 --- 49 */
 52 |         2097150                                         /* extra     */
 53 | };
 54 | 
 55 | /* Mapping: offset slot => how many extra bits must be read and added to the
 56 |  * corresponding offset slot base to decode the match offset.  */
 57 | const uint8_t lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = {
 58 | 	0 , 0 , 0 , 0 , 1 ,
 59 | 	1 , 2 , 2 , 3 , 3 ,
 60 | 	4 , 4 , 5 , 5 , 6 ,
 61 | 	6 , 7 , 7 , 8 , 8 ,
 62 | 	9 , 9 , 10, 10, 11,
 63 | 	11, 12, 12, 13, 13,
 64 | 	14, 14, 15, 15, 16,
 65 | 	16, 17, 17, 17, 17,
 66 | 	17, 17, 17, 17, 17,
 67 | 	17, 17, 17, 17, 17,
 68 | };
 69 | 
 70 | /* Round the specified buffer size up to the next valid LZX window size, and
 71 |  * return its order (log2).  Or, if the buffer size is 0 or greater than the
 72 |  * largest valid LZX window size, return 0.  */
 73 | unsigned
 74 | lzx_get_window_order(size_t max_bufsize)
 75 | {
 76 | 	if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE)
 77 | 		return 0;
 78 | 
 79 | 	return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER);
 80 | }
 81 | 
 82 | /* Given a valid LZX window order, return the number of symbols that will exist
 83 |  * in the main Huffman code.  */
 84 | unsigned
 85 | lzx_get_num_main_syms(unsigned window_order)
 86 | {
 87 | 	/* Note: one would expect that the maximum match offset would be
 88 | 	 * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two
 89 | 	 * bytes were to match the last two bytes.  However, the format
 90 | 	 * disallows this case.  This reduces the number of needed offset slots
 91 | 	 * by 1.  */
 92 | 	uint32_t window_size = (uint32_t)1 << window_order;
 93 | 	uint32_t max_offset = window_size - LZX_MIN_MATCH_LEN - 1;
 94 | 	unsigned num_offset_slots = 30;
 95 | 	while (max_offset >= lzx_offset_slot_base[num_offset_slots])
 96 | 		num_offset_slots++;
 97 | 
 98 | 	return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS);
 99 | }
100 | 
101 | static void
102 | do_translate_target(void *target, int32_t input_pos)
103 | {
104 | 	int32_t abs_offset, rel_offset;
105 | 
106 | 	rel_offset = *(int32_t*)target;
107 | 	if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
108 | 		if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
109 | 			/* "good translation" */
110 | 			abs_offset = rel_offset + input_pos;
111 | 		} else {
112 | 			/* "compensating translation" */
113 | 			abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
114 | 		}
115 |         *(uint32_t*)target = abs_offset;
116 | 	}
117 | }
118 | 
119 | static void
120 | undo_translate_target(void *target, int32_t input_pos)
121 | {
122 | 	int32_t abs_offset, rel_offset;
123 | 
124 | 	abs_offset = *(int32_t*)target;
125 | 	if (abs_offset >= 0) {
126 | 		if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
127 | 			/* "good translation" */
128 | 			rel_offset = abs_offset - input_pos;
129 |             *(uint32_t*)target = rel_offset;
130 | 		}
131 | 	} else {
132 | 		if (abs_offset >= -input_pos) {
133 | 			/* "compensating translation" */
134 | 			rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
135 |             *(uint32_t*)target = rel_offset;
136 | 		}
137 | 	}
138 | }
139 | 
140 | /*
141 |  * Do or undo the 'E8' preprocessing used in LZX.  Before compression, the
142 |  * uncompressed data is preprocessed by changing the targets of x86 CALL
143 |  * instructions from relative offsets to absolute offsets.  After decompression,
144 |  * the translation is undone by changing the targets of x86 CALL instructions
145 |  * from absolute offsets to relative offsets.
146 |  *
147 |  * Note that despite its intent, E8 preprocessing can be done on any data even
148 |  * if it is not actually x86 machine code.  In fact, E8 preprocessing appears to
149 |  * always be used in LZX-compressed resources in WIM files; there is no bit to
150 |  * indicate whether it is used or not, unlike in the LZX compressed format as
151 |  * used in cabinet files, where a bit is reserved for that purpose.
152 |  *
153 |  * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data,
154 |  * which really means the 5-byte call instruction cannot start in the last 10
155 |  * bytes of the uncompressed data.  This is one of the errors in the LZX
156 |  * documentation.
157 |  *
158 |  * E8 preprocessing does not appear to be disabled after the 32768th chunk of a
159 |  * WIM resource, which apparently is another difference from the LZX compression
160 |  * used in cabinet files.
161 |  *
162 |  * E8 processing is supposed to take the file size as a parameter, as it is used
163 |  * in calculating the translated jump targets.  But in WIM files, this file size
164 |  * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).
165 |  */
166 | static void
167 | lzx_e8_filter(uint8_t *data, uint32_t size, void (*process_target)(void *, int32_t))
168 | {
169 | 
170 | #if !defined(__SSE2__) && !defined(__AVX2__)
171 | 	/*
172 | 	 * A worthwhile optimization is to push the end-of-buffer check into the
173 | 	 * relatively rare E8 case.  This is possible if we replace the last six
174 | 	 * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
175 | 	 * before reaching end-of-buffer.  In addition, this scheme guarantees
176 | 	 * that no translation can begin following an E8 byte in the last 10
177 | 	 * bytes because a 4-byte offset containing E8 as its high byte is a
178 | 	 * large negative number that is not valid for translation.  That is
179 | 	 * exactly what we need.
180 | 	 */
181 | 	uint8_t *tail;
182 | 	uint8_t saved_bytes[6];
183 | 	uint8_t *p;
184 | 
185 | 	if (size <= 10)
186 | 		return;
187 | 
188 | 	tail = &data[size - 6];
189 | 	memcpy(saved_bytes, tail, 6);
190 | 	memset(tail, 0xE8, 6);
191 | 	p = data;
192 | 	for (;;) {
193 | 		while (*p != 0xE8)
194 | 			p++;
195 | 		if (p >= tail)
196 | 			break;
197 | 		(*process_target)(p + 1, p - data);
198 | 		p += 5;
199 | 	}
200 | 	memcpy(tail, saved_bytes, 6);
201 | #else
202 | 	/* SSE2 or AVX-2 optimized version for x86_64  */
203 | 
204 | 	uint8_t *p = data;
205 | 	uint64_t valid_mask = ~0;
206 | 
207 | 	if (size <= 10)
208 | 		return;
209 | #ifdef __AVX2__
210 | #  define ALIGNMENT_REQUIRED 32
211 | #else
212 | #  define ALIGNMENT_REQUIRED 16
213 | #endif
214 | 
215 | 	/* Process one byte at a time until the pointer is properly aligned.  */
216 | 	while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) {
217 | 		if (p >= data + size - 10)
218 | 			return;
219 | 		if (*p == 0xE8 && (valid_mask & 1)) {
220 | 			(*process_target)(p + 1, p - data);
221 | 			valid_mask &= ~0x1F;
222 | 		}
223 | 		p++;
224 | 		valid_mask >>= 1;
225 | 		valid_mask |= (uint64_t)1 << 63;
226 | 	}
227 | 
228 | 	if (data + size - p >= 64) {
229 | 
230 | 		/* Vectorized processing  */
231 | 
232 | 		/* Note: we use a "trap" E8 byte to eliminate the need to check
233 | 		 * for end-of-buffer in the inner loop.  This byte is carefully
234 | 		 * positioned so that it will never be changed by a previous
235 | 		 * translation before it is detected.  */
236 | 
237 | 		uint8_t *trap = p + ((data + size - p) & ~31) - 32 + 4;
238 | 		uint8_t saved_byte = *trap;
239 | 		*trap = 0xE8;
240 | 
241 | 		for (;;) {
242 | 			uint32_t e8_mask;
243 | 			uint8_t *orig_p = p;
244 | 		#ifdef __AVX2__
245 | 			const __m256i e8_bytes = _mm256_set1_epi8(0xE8);
246 | 			for (;;) {
247 | 				__m256i bytes = *(const __m256i *)p;
248 | 				__m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes);
249 | 				e8_mask = _mm256_movemask_epi8(cmpresult);
250 | 				if (e8_mask)
251 | 					break;
252 | 				p += 32;
253 | 			}
254 | 		#else
255 | 			const __m128i e8_bytes = _mm_set1_epi8(0xE8);
256 | 			for (;;) {
257 | 				/* Read the next 32 bytes of data and test them
258 | 				 * for E8 bytes.  */
259 | 				__m128i bytes1 = *(const __m128i *)p;
260 | 				__m128i bytes2 = *(const __m128i *)(p + 16);
261 | 				__m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes);
262 | 				__m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes);
263 | 				uint32_t mask1 = _mm_movemask_epi8(cmpresult1);
264 | 				uint32_t mask2 = _mm_movemask_epi8(cmpresult2);
265 | 				/* The masks have a bit set for each E8 byte.
266 | 				 * We stay in this fast inner loop as long as
267 | 				 * there are no E8 bytes.  */
268 | 				if (mask1 | mask2) {
269 | 					e8_mask = mask1 | (mask2 << 16);
270 | 					break;
271 | 				}
272 | 				p += 32;
273 | 			}
274 | 		#endif
275 | 
276 | 			/* Did we pass over data with no E8 bytes?  */
277 | 			if (p != orig_p)
278 | 				valid_mask = ~0;
279 | 
280 | 			/* Are we nearing end-of-buffer?  */
281 | 			if (p == trap - 4)
282 | 				break;
283 | 
284 | 			/* Process the E8 bytes.  However, the AND with
285 | 			 * 'valid_mask' ensures we never process an E8 byte that
286 | 			 * was itself part of a translation target.  */
287 | 			while ((e8_mask &= valid_mask)) {
288 | 				unsigned bit = bsf32(e8_mask);
289 | 				(*process_target)(p + bit + 1, p + bit - data);
290 | 				valid_mask &= ~((uint64_t)0x1F << bit);
291 | 			}
292 | 
293 | 			valid_mask >>= 32;
294 | 			valid_mask |= 0xFFFFFFFF00000000;
295 | 			p += 32;
296 | 		}
297 | 
298 | 		*trap = saved_byte;
299 | 	}
300 | 
301 | 	/* Approaching the end of the buffer; process one byte a time.  */
302 | 	while (p < data + size - 10) {
303 | 		if (*p == 0xE8 && (valid_mask & 1)) {
304 | 			(*process_target)(p + 1, p - data);
305 | 			valid_mask &= ~0x1F;
306 | 		}
307 | 		p++;
308 | 		valid_mask >>= 1;
309 | 		valid_mask |= (uint64_t)1 << 63;
310 | 	}
311 | #endif /* __SSE2__ || __AVX2__ */
312 | }
313 | 
314 | void
315 | lzx_preprocess(uint8_t *data, uint32_t size)
316 | {
317 | 	lzx_e8_filter(data, size, do_translate_target);
318 | }
319 | 
320 | void
321 | lzx_postprocess(uint8_t *data, uint32_t size)
322 | {
323 | 	lzx_e8_filter(data, size, undo_translate_target);
324 | }
325 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * lzx_common.h
 3 |  *
 4 |  * Declarations shared between LZX compression and decompression.
 5 |  */
 6 | 
 7 | #ifndef _LZX_COMMON_H
 8 | #define _LZX_COMMON_H
 9 | 
10 | #include "lzx_constants.h"
11 | #include "common_defs.h"
12 | 
13 | extern const int32_t lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1];
14 | 
15 | extern const uint8_t lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
16 | 
17 | extern unsigned
18 | lzx_get_window_order(size_t max_bufsize);
19 | 
20 | extern unsigned
21 | lzx_get_num_main_syms(unsigned window_order);
22 | 
23 | extern void
24 | lzx_preprocess(uint8_t *data, uint32_t size);
25 | 
26 | extern void
27 | lzx_postprocess(uint8_t *data, uint32_t size);
28 | 
29 | #endif /* _LZX_COMMON_H */
30 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_constants.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lzx_constants.h
  3 |  *
  4 |  * Constants for the LZX compression format.
  5 |  */
  6 | 
  7 | #ifndef _LZX_CONSTANTS_H
  8 | #define _LZX_CONSTANTS_H
  9 | 
 10 | /* Number of literal byte values.  */
 11 | #define LZX_NUM_CHARS	256
 12 | 
 13 | /* The smallest and largest allowed match lengths.  */
 14 | #define LZX_MIN_MATCH_LEN	2
 15 | #define LZX_MAX_MATCH_LEN	257
 16 | 
 17 | /* Number of distinct match lengths that can be represented.  */
 18 | #define LZX_NUM_LENS		(LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
 19 | 
 20 | /* Number of match lengths for which no length symbol is required.  */
 21 | #define LZX_NUM_PRIMARY_LENS	7
 22 | #define LZX_NUM_LEN_HEADERS	(LZX_NUM_PRIMARY_LENS + 1)
 23 | 
 24 | /* Valid values of the 3-bit block type field.  */
 25 | #define LZX_BLOCKTYPE_VERBATIM       1
 26 | #define LZX_BLOCKTYPE_ALIGNED        2
 27 | #define LZX_BLOCKTYPE_UNCOMPRESSED   3
 28 | 
 29 | /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum
 30 |  * sizes of the sliding window.  */
 31 | #define LZX_MIN_WINDOW_ORDER	15
 32 | #define LZX_MAX_WINDOW_ORDER	21
 33 | #define LZX_MIN_WINDOW_SIZE	(1UL << LZX_MIN_WINDOW_ORDER)  /* 32768   */
 34 | #define LZX_MAX_WINDOW_SIZE	(1UL << LZX_MAX_WINDOW_ORDER)  /* 2097152 */
 35 | 
 36 | /* Maximum number of offset slots.  (The actual number of offset slots depends
 37 |  * on the window size.)  */
 38 | #define LZX_MAX_OFFSET_SLOTS	50
 39 | 
 40 | /* Maximum number of symbols in the main code.  (The actual number of symbols in
 41 |  * the main code depends on the window size.)  */
 42 | #define LZX_MAINCODE_MAX_NUM_SYMBOLS	\
 43 | 	(LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
 44 | 
 45 | /* Number of symbols in the length code.  */
 46 | #define LZX_LENCODE_NUM_SYMBOLS		(LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
 47 | 
 48 | /* Number of symbols in the pre-code.  */
 49 | #define LZX_PRECODE_NUM_SYMBOLS		20
 50 | 
 51 | /* Number of bits in which each pre-code codeword length is represented.  */
 52 | #define LZX_PRECODE_ELEMENT_SIZE	4
 53 | 
 54 | /* Number of low-order bits of each match offset that are entropy-encoded in
 55 |  * aligned offset blocks.  */
 56 | #define LZX_NUM_ALIGNED_OFFSET_BITS	3
 57 | 
 58 | /* Number of symbols in the aligned offset code.  */
 59 | #define LZX_ALIGNEDCODE_NUM_SYMBOLS	(1 << LZX_NUM_ALIGNED_OFFSET_BITS)
 60 | 
 61 | /* Mask for the match offset bits that are entropy-encoded in aligned offset
 62 |  * blocks.  */
 63 | #define LZX_ALIGNED_OFFSET_BITMASK	((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
 64 | 
 65 | /* Number of bits in which each aligned offset codeword length is represented.  */
 66 | #define LZX_ALIGNEDCODE_ELEMENT_SIZE	3
 67 | 
 68 | /* The first offset slot which requires an aligned offset symbol in aligned
 69 |  * offset blocks.  */
 70 | #define LZX_MIN_ALIGNED_OFFSET_SLOT	8
 71 | 
 72 | /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT.  */
 73 | #define LZX_MIN_ALIGNED_OFFSET		14
 74 | 
 75 | /* The maximum number of extra offset bits in verbatim blocks.  (One would need
 76 |  * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset
 77 |  * bits in *aligned* blocks.)  */
 78 | #define LZX_MAX_NUM_EXTRA_BITS		17
 79 | 
 80 | /* Maximum lengths (in bits) for length-limited Huffman code construction.  */
 81 | #define LZX_MAX_MAIN_CODEWORD_LEN	16
 82 | #define LZX_MAX_LEN_CODEWORD_LEN	16
 83 | #define LZX_MAX_PRE_CODEWORD_LEN	((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
 84 | #define LZX_MAX_ALIGNED_CODEWORD_LEN	((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
 85 | 
 86 | /* For LZX-compressed blocks in WIM resources, this value is always used as the
 87 |  * filesize parameter for the call instruction (0xe8 byte) preprocessing, even
 88 |  * though the blocks themselves are not this size, and the size of the actual
 89 |  * file resource in the WIM file is very likely to be something entirely
 90 |  * different as well.  */
 91 | #define LZX_WIM_MAGIC_FILESIZE	12000000
 92 | 
 93 | /* Assumed LZX block size when the encoded block size begins with a 0 bit.
 94 |  * This is probably WIM-specific.  */
 95 | #define LZX_DEFAULT_BLOCK_SIZE	32768
 96 | 
 97 | /* Number of offsets in the recent (or "repeat") offsets queue.  */
 98 | #define LZX_NUM_RECENT_OFFSETS	3
 99 | 
100 | /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT).  */
101 | #define LZX_OFFSET_ADJUSTMENT	(LZX_NUM_RECENT_OFFSETS - 1)
102 | 
103 | #endif /* _LZX_CONSTANTS_H */
104 | 


--------------------------------------------------------------------------------
/src/ebiggers/lzx_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * lzx_decompress.c
  3 |  *
  4 |  * A decompressor for the LZX compression format, as used in WIM files.
  5 |  */
  6 | 
  7 | /*
  8 |  * Copyright (C) 2012-2016 Eric Biggers
  9 |  *
 10 |  * This program is free software: you can redistribute it and/or modify it under
 11 |  * the terms of the GNU General Public License as published by the Free Software
 12 |  * Foundation, either version 2 of the License, or (at your option) any later
 13 |  * version.
 14 |  *
 15 |  * This program is distributed in the hope that it will be useful, but WITHOUT
 16 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 17 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 18 |  * details.
 19 |  *
 20 |  * You should have received a copy of the GNU General Public License along with
 21 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
 22 |  */
 23 | 
 24 | /*
 25 |  * LZX is an LZ77 and Huffman-code based compression format that has many
 26 |  * similarities to DEFLATE (the format used by zlib/gzip).  The compression
 27 |  * ratio is as good or better than DEFLATE.  See lzx_compress.c for a format
 28 |  * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
 29 |  * historical overview.  Here I make some pragmatic notes.
 30 |  *
 31 |  * The old specification for LZX is the document "Microsoft LZX Data Compression
 32 |  * Format" (1997).  It defines the LZX format as used in cabinet files.  Allowed
 33 |  * window sizes are 2^n where 15 <= n <= 21.  However, this document contains
 34 |  * several errors, so don't read too much into it...
 35 |  *
 36 |  * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
 37 |  * Compression and Decompression" (2014).  It defines the LZX format as used by
 38 |  * Microsoft's binary patcher.  It corrects several errors in the 1997 document
 39 |  * and extends the format in several ways --- namely, optional reference data,
 40 |  * up to 2^25 byte windows, and longer match lengths.
 41 |  *
 42 |  * WIM files use a more restricted form of LZX.  No LZX DELTA extensions are
 43 |  * present, the window is not "sliding", E8 preprocessing is done
 44 |  * unconditionally with a fixed file size, and the maximum window size is always
 45 |  * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
 46 |  * This code is primarily intended to implement this form of LZX.  But although
 47 |  * not compatible with WIMGAPI, this code also supports maximum window sizes up
 48 |  * to 2^21 bytes.
 49 |  *
 50 |  * TODO: Add support for window sizes up to 2^25 bytes.
 51 |  */
 52 | 
 53 | #ifdef HAVE_CONFIG_H
 54 | #  include "config.h"
 55 | #endif
 56 | 
 57 | #include <string.h>
 58 | 
 59 | #include "decompress_common.h"
 60 | #include "lzx_common.h"
 61 | #include "system_compression.h"
 62 | 
 63 | /* These values are chosen for fast decompression.  */
 64 | #define LZX_MAINCODE_TABLEBITS		11
 65 | #define LZX_LENCODE_TABLEBITS		9
 66 | #define LZX_PRECODE_TABLEBITS		6
 67 | #define LZX_ALIGNEDCODE_TABLEBITS	7
 68 | 
 69 | #define LZX_READ_LENS_MAX_OVERRUN 50
 70 | 
 71 | struct lzx_decompressor {
 72 | 	uint16_t maincode_decode_table[DECODE_TABLE_ENOUGH(LZX_MAINCODE_MAX_NUM_SYMBOLS, LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 73 | 	uint8_t maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
 74 | 
 75 | 	uint16_t lencode_decode_table[DECODE_TABLE_ENOUGH(LZX_LENCODE_NUM_SYMBOLS, LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 76 | 	uint8_t lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
 77 | 
 78 | 	union {
 79 | 		uint16_t alignedcode_decode_table[DECODE_TABLE_ENOUGH(LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 80 | 		uint8_t alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
 81 | 	};
 82 | 
 83 | 	union {
 84 | 		uint16_t precode_decode_table[DECODE_TABLE_ENOUGH(LZX_PRECODE_NUM_SYMBOLS, LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
 85 | 		uint8_t precode_lens[LZX_PRECODE_NUM_SYMBOLS];
 86 | 		uint8_t extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
 87 | 	};
 88 | 
 89 | 	union {
 90 | 		uint16_t maincode_working_space[2 * (LZX_MAX_MAIN_CODEWORD_LEN + 1) + LZX_MAINCODE_MAX_NUM_SYMBOLS];
 91 | 		uint16_t lencode_working_space[2 * (LZX_MAX_LEN_CODEWORD_LEN + 1) + LZX_LENCODE_NUM_SYMBOLS];
 92 | 		uint16_t alignedcode_working_space[2 * (LZX_MAX_ALIGNED_CODEWORD_LEN + 1) + LZX_ALIGNEDCODE_NUM_SYMBOLS];
 93 | 		uint16_t precode_working_space[2 * (LZX_MAX_PRE_CODEWORD_LEN + 1) + LZX_PRECODE_NUM_SYMBOLS];
 94 | 	};
 95 | 
 96 | 	unsigned window_order;
 97 | 	unsigned num_main_syms;
 98 | 
 99 | 	/* Like lzx_extra_offset_bits[], but does not include the entropy-coded
100 | 	 * bits of aligned offset blocks */
101 | 	uint8_t extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
102 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT);
103 | 
104 | /* Read a Huffman-encoded symbol using the precode. */
105 | static forceinline unsigned
106 | read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
107 | {
108 | 	return read_huffsym(is, d->precode_decode_table,
109 | 			    LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
110 | }
111 | 
112 | /* Read a Huffman-encoded symbol using the main code. */
113 | static forceinline unsigned
114 | read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
115 | {
116 | 	return read_huffsym(is, d->maincode_decode_table,
117 | 			    LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
118 | }
119 | 
120 | /* Read a Huffman-encoded symbol using the length code. */
121 | static forceinline unsigned
122 | read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
123 | {
124 | 	return read_huffsym(is, d->lencode_decode_table,
125 | 			    LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
126 | }
127 | 
128 | /* Read a Huffman-encoded symbol using the aligned offset code. */
129 | static forceinline unsigned
130 | read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
131 | {
132 | 	return read_huffsym(is, d->alignedcode_decode_table,
133 | 			    LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
134 | }
135 | 
136 | /*
137 |  * Read a precode from the compressed input bitstream, then use it to decode
138 |  * @num_lens codeword length values and write them to @lens.
139 |  */
140 | static int
141 | lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
142 | 		       uint8_t *lens, unsigned num_lens)
143 | {
144 | 	uint8_t *len_ptr = lens;
145 | 	uint8_t *lens_end = lens + num_lens;
146 | 
147 | 	/* Read the lengths of the precode codewords.  These are stored
148 | 	 * explicitly. */
149 | 	for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
150 | 		d->precode_lens[i] =
151 | 			bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
152 | 	}
153 | 
154 | 	/* Build the decoding table for the precode. */
155 | 	if (make_huffman_decode_table(d->precode_decode_table,
156 | 				      LZX_PRECODE_NUM_SYMBOLS,
157 | 				      LZX_PRECODE_TABLEBITS,
158 | 				      d->precode_lens,
159 | 				      LZX_MAX_PRE_CODEWORD_LEN,
160 | 				      d->precode_working_space))
161 | 		return -1;
162 | 
163 | 	/* Decode the codeword lengths.  */
164 | 	do {
165 | 		unsigned presym;
166 | 		uint8_t len;
167 | 
168 | 		/* Read the next precode symbol.  */
169 | 		presym = read_presym(d, is);
170 | 		if (presym < 17) {
171 | 			/* Difference from old length  */
172 | 			len = *len_ptr - presym;
173 | 			if ((int8_t)len < 0)
174 | 				len += 17;
175 | 			*len_ptr++ = len;
176 | 		} else {
177 | 			/* Special RLE values  */
178 | 
179 | 			unsigned run_len;
180 | 
181 | 			if (presym == 17) {
182 | 				/* Run of 0's  */
183 | 				run_len = 4 + bitstream_read_bits(is, 4);
184 | 				len = 0;
185 | 			} else if (presym == 18) {
186 | 				/* Longer run of 0's  */
187 | 				run_len = 20 + bitstream_read_bits(is, 5);
188 | 				len = 0;
189 | 			} else {
190 | 				/* Run of identical lengths  */
191 | 				run_len = 4 + bitstream_read_bits(is, 1);
192 | 				presym = read_presym(d, is);
193 | 				if (unlikely(presym > 17))
194 | 					return -1;
195 | 				len = *len_ptr - presym;
196 | 				if ((int8_t)len < 0)
197 | 					len += 17;
198 | 			}
199 | 
200 | 			do {
201 | 				*len_ptr++ = len;
202 | 			} while (--run_len);
203 | 			/*
204 | 			 * The worst case overrun is when presym == 18,
205 | 			 * run_len == 20 + 31, and only 1 length was remaining.
206 | 			 * So LZX_READ_LENS_MAX_OVERRUN == 50.
207 | 			 *
208 | 			 * Overrun while reading the first half of maincode_lens
209 | 			 * can corrupt the previous values in the second half.
210 | 			 * This doesn't really matter because the resulting
211 | 			 * lengths will still be in range, and data that
212 | 			 * generates overruns is invalid anyway.
213 | 			 */
214 | 		}
215 | 	} while (len_ptr < lens_end);
216 | 
217 | 	return 0;
218 | }
219 | 
220 | /*
221 |  * Read the header of an LZX block.  For all block types, the block type and
222 |  * size is saved in *block_type_ret and *block_size_ret, respectively.  For
223 |  * compressed blocks, the codeword lengths are also saved.  For uncompressed
224 |  * blocks, the recent offsets queue is also updated.
225 |  */
226 | static int
227 | lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
228 | 		      uint32_t recent_offsets[], int *block_type_ret,
229 | 		      uint32_t *block_size_ret)
230 | {
231 | 	int block_type;
232 | 	uint32_t block_size;
233 | 
234 | 	bitstream_ensure_bits(is, 4);
235 | 
236 | 	/* Read the block type. */
237 | 	block_type = bitstream_pop_bits(is, 3);
238 | 
239 | 	/* Read the block size. */
240 | 	if (bitstream_pop_bits(is, 1)) {
241 | 		block_size = LZX_DEFAULT_BLOCK_SIZE;
242 | 	} else {
243 | 		block_size = bitstream_read_bits(is, 16);
244 | 		if (d->window_order >= 16) {
245 | 			block_size <<= 8;
246 | 			block_size |= bitstream_read_bits(is, 8);
247 | 		}
248 | 	}
249 | 
250 | 	switch (block_type) {
251 | 
252 | 	case LZX_BLOCKTYPE_ALIGNED:
253 | 
254 | 		/* Read the aligned offset codeword lengths. */
255 | 
256 | 		for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
257 | 			d->alignedcode_lens[i] =
258 | 				bitstream_read_bits(is,
259 | 						    LZX_ALIGNEDCODE_ELEMENT_SIZE);
260 | 		}
261 | 
262 | 		/* Fall though, since the rest of the header for aligned offset
263 | 		 * blocks is the same as that for verbatim blocks.  */
264 | 
265 | 	case LZX_BLOCKTYPE_VERBATIM:
266 | 
267 | 		/* Read the main codeword lengths, which are divided into two
268 | 		 * parts: literal symbols and match headers. */
269 | 
270 | 		if (lzx_read_codeword_lens(d, is, d->maincode_lens,
271 | 					   LZX_NUM_CHARS))
272 | 			return -1;
273 | 
274 | 		if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
275 | 					   d->num_main_syms - LZX_NUM_CHARS))
276 | 			return -1;
277 | 
278 | 
279 | 		/* Read the length codeword lengths. */
280 | 
281 | 		if (lzx_read_codeword_lens(d, is, d->lencode_lens,
282 | 					   LZX_LENCODE_NUM_SYMBOLS))
283 | 			return -1;
284 | 
285 | 		break;
286 | 
287 | 	case LZX_BLOCKTYPE_UNCOMPRESSED:
288 | 		/*
289 | 		 * The header of an uncompressed block contains new values for
290 | 		 * the recent offsets queue, starting on the next 16-bit
291 | 		 * boundary in the bitstream.  Careful: if the stream is
292 | 		 * *already* aligned, the correct thing to do is to throw away
293 | 		 * the next 16 bits (this is probably a mistake in the format).
294 | 		 */
295 | 		bitstream_ensure_bits(is, 1);
296 | 		bitstream_align(is);
297 | 		recent_offsets[0] = bitstream_read_u32(is);
298 | 		recent_offsets[1] = bitstream_read_u32(is);
299 | 		recent_offsets[2] = bitstream_read_u32(is);
300 | 
301 | 		/* Offsets of 0 are invalid.  */
302 | 		if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
303 | 		    recent_offsets[2] == 0)
304 | 			return -1;
305 | 		break;
306 | 
307 | 	default:
308 | 		/* Unrecognized block type.  */
309 | 		return -1;
310 | 	}
311 | 
312 | 	*block_type_ret = block_type;
313 | 	*block_size_ret = block_size;
314 | 	return 0;
315 | }
316 | 
317 | /* Decompress a block of LZX-compressed data. */
318 | static int
319 | lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
320 | 		     int block_type, uint32_t block_size,
321 | 		     uint8_t * const out_begin, uint8_t *out_next, uint32_t recent_offsets[])
322 | {
323 | 	uint8_t * const block_end = out_next + block_size;
324 | 	unsigned min_aligned_offset_slot;
325 | 
326 | 	/*
327 | 	 * Build the Huffman decode tables.  We always need to build the main
328 | 	 * and length decode tables.  For aligned blocks we additionally need to
329 | 	 * build the aligned offset decode table.
330 | 	 */
331 | 
332 | 	if (make_huffman_decode_table(d->maincode_decode_table,
333 | 				      d->num_main_syms,
334 | 				      LZX_MAINCODE_TABLEBITS,
335 | 				      d->maincode_lens,
336 | 				      LZX_MAX_MAIN_CODEWORD_LEN,
337 | 				      d->maincode_working_space))
338 | 		return -1;
339 | 
340 | 	if (make_huffman_decode_table(d->lencode_decode_table,
341 | 				      LZX_LENCODE_NUM_SYMBOLS,
342 | 				      LZX_LENCODE_TABLEBITS,
343 | 				      d->lencode_lens,
344 | 				      LZX_MAX_LEN_CODEWORD_LEN,
345 | 				      d->lencode_working_space))
346 | 		return -1;
347 | 
348 | 	if (block_type == LZX_BLOCKTYPE_ALIGNED) {
349 | 		if (make_huffman_decode_table(d->alignedcode_decode_table,
350 | 					      LZX_ALIGNEDCODE_NUM_SYMBOLS,
351 | 					      LZX_ALIGNEDCODE_TABLEBITS,
352 | 					      d->alignedcode_lens,
353 | 					      LZX_MAX_ALIGNED_CODEWORD_LEN,
354 | 					      d->alignedcode_working_space))
355 | 			return -1;
356 | 		min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
357 | 		memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
358 | 		       sizeof(lzx_extra_offset_bits));
359 | 	} else {
360 | 		min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
361 | 		memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
362 | 		       sizeof(lzx_extra_offset_bits));
363 | 	}
364 | 
365 | 	/* Decode the literals and matches. */
366 | 
367 | 	do {
368 | 		unsigned mainsym;
369 | 		unsigned length;
370 | 		uint32_t offset;
371 | 		unsigned offset_slot;
372 | 
373 | 		mainsym = read_mainsym(d, is);
374 | 		if (mainsym < LZX_NUM_CHARS) {
375 | 			/* Literal */
376 | 			*out_next++ = mainsym;
377 | 			continue;
378 | 		}
379 | 
380 | 		/* Match */
381 | 
382 | 		/* Decode the length header and offset slot.  */
383 | 		STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
384 | 		length = mainsym % LZX_NUM_LEN_HEADERS;
385 | 		offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
386 | 
387 | 		/* If needed, read a length symbol to decode the full length. */
388 | 		if (length == LZX_NUM_PRIMARY_LENS)
389 | 			length += read_lensym(d, is);
390 | 		length += LZX_MIN_MATCH_LEN;
391 | 
392 | 		if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
393 | 			/* Repeat offset  */
394 | 
395 | 			/* Note: This isn't a real LRU queue, since using the R2
396 | 			 * offset doesn't bump the R1 offset down to R2. */
397 | 			offset = recent_offsets[offset_slot];
398 | 			recent_offsets[offset_slot] = recent_offsets[0];
399 | 		} else {
400 | 			/* Explicit offset  */
401 | 			offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
402 | 			if (offset_slot >= min_aligned_offset_slot) {
403 | 				offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
404 | 					 read_alignedsym(d, is);
405 | 			}
406 | 			offset += lzx_offset_slot_base[offset_slot];
407 | 
408 | 			/* Update the match offset LRU queue.  */
409 | 			STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
410 | 			recent_offsets[2] = recent_offsets[1];
411 | 			recent_offsets[1] = recent_offsets[0];
412 | 		}
413 | 		recent_offsets[0] = offset;
414 | 
415 | 		/* Validate the match and copy it to the current position.  */
416 | 		if (unlikely(lz_copy(length, offset, out_begin,
417 | 				     out_next, block_end, LZX_MIN_MATCH_LEN)))
418 | 			return -1;
419 | 		out_next += length;
420 | 	} while (out_next != block_end);
421 | 
422 | 	return 0;
423 | }
424 | 
425 | int
426 | lzx_decompress(struct lzx_decompressor *d,
427 | 	       const void *compressed_data, size_t compressed_size,
428 | 	       void *uncompressed_data, size_t uncompressed_size)
429 | {
430 | 	uint8_t * const out_begin = uncompressed_data;
431 | 	uint8_t *out_next = out_begin;
432 | 	uint8_t * const out_end = out_begin + uncompressed_size;
433 | 	struct input_bitstream is;
434 | 	STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
435 | 	uint32_t recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
436 | 	unsigned may_have_e8_byte = 0;
437 | 
438 | 	init_input_bitstream(&is, compressed_data, compressed_size);
439 | 
440 | 	/* Codeword lengths begin as all 0's for delta encoding purposes. */
441 | 	memset(d->maincode_lens, 0, d->num_main_syms);
442 | 	memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
443 | 
444 | 	/* Decompress blocks until we have all the uncompressed data. */
445 | 
446 | 	while (out_next != out_end) {
447 | 		int block_type;
448 | 		uint32_t block_size;
449 | 
450 | 		if (lzx_read_block_header(d, &is, recent_offsets,
451 | 					  &block_type, &block_size))
452 | 			return -1;
453 | 
454 | 		if (block_size < 1 || block_size > out_end - out_next)
455 | 			return -1;
456 | 
457 | 		if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
458 | 
459 | 			/* Compressed block */
460 | 			if (lzx_decompress_block(d, &is, block_type, block_size,
461 | 						 out_begin, out_next,
462 | 						 recent_offsets))
463 | 				return -1;
464 | 
465 | 			/* If the first E8 byte was in this block, then it must
466 | 			 * have been encoded as a literal using mainsym E8. */
467 | 			may_have_e8_byte |= d->maincode_lens[0xE8];
468 | 		} else {
469 | 
470 | 			/* Uncompressed block */
471 | 			if (bitstream_read_bytes(&is, out_next, block_size))
472 | 				return -1;
473 | 
474 | 			/* Re-align the bitstream if needed. */
475 | 			if (block_size & 1)
476 | 				bitstream_read_byte(&is);
477 | 
478 | 			/* There may have been an E8 byte in the block. */
479 | 			may_have_e8_byte = 1;
480 | 		}
481 | 		out_next += block_size;
482 | 	}
483 | 
484 | 	/* Postprocess the data unless it cannot possibly contain E8 bytes. */
485 | 	if (may_have_e8_byte)
486 | 		lzx_postprocess(uncompressed_data, uncompressed_size);
487 | 
488 | 	return 0;
489 | }
490 | 
491 | bool
492 | lzx_init_decompressor(size_t max_block_size, struct lzx_decompressor *d)
493 | {
494 | 	unsigned window_order;
495 | 
496 | 	window_order = lzx_get_window_order(max_block_size);
497 | 	if (window_order == 0)
498 | 		return false;
499 | 
500 | 	d->window_order = window_order;
501 | 	d->num_main_syms = lzx_get_num_main_syms(window_order);
502 | 
503 | 	/* Initialize 'd->extra_offset_bits_minus_aligned'. */
504 | 	STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
505 | 		      sizeof(lzx_extra_offset_bits));
506 | 	STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
507 | 		      sizeof(lzx_extra_offset_bits));
508 | 	memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
509 | 	       sizeof(lzx_extra_offset_bits));
510 | 	for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
511 | 	     offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++)
512 | 	{
513 | 		d->extra_offset_bits_minus_aligned[offset_slot] -=
514 | 				LZX_NUM_ALIGNED_OFFSET_BITS;
515 | 	}
516 | 
517 | 	return true;
518 | }
519 | 


--------------------------------------------------------------------------------
/src/ebiggers/system_compression.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * system_compression.h - declarations for accessing System Compressed files
 3 |  *
 4 |  * Copyright (C) 2015 Eric Biggers
 5 |  *
 6 |  * This program is free software: you can redistribute it and/or modify it under
 7 |  * the terms of the GNU General Public License as published by the Free Software
 8 |  * Foundation, either version 2 of the License, or (at your option) any later
 9 |  * version.
10 |  *
11 |  * This program is distributed in the hope that it will be useful, but WITHOUT
12 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
14 |  * details.
15 |  *
16 |  * You should have received a copy of the GNU General Public License along with
17 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
18 |  */
19 | 
20 | #pragma once
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | #include <stddef.h>
27 | #include <stdbool.h>
28 | #include <sys/types.h>
29 | 
30 | /* System compressed file access  */
31 | 
32 | struct ntfs_system_decompression_ctx;
33 | 
34 | extern void
35 | ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx);
36 | 
37 | /* XPRESS decompression  */
38 | 
39 | #define XPRESS_NUM_CHARS	256
40 | #define XPRESS_NUM_SYMBOLS	512
41 | #define XPRESS_MAX_CODEWORD_LEN	15
42 | 
43 | #define XPRESS_MIN_MATCH_LEN	3
44 | 
45 | #define DECODE_TABLE_ALIGNMENT 16
46 | 
47 | struct xpress_decompressor {
48 | 	union {
49 | 		uint16_t decode_table[2566] __attribute__((aligned(DECODE_TABLE_ALIGNMENT)));
50 | 		uint8_t lens[XPRESS_NUM_SYMBOLS];
51 | 	};
52 | 	uint16_t working_space[2 * (XPRESS_MAX_CODEWORD_LEN + 1) + XPRESS_NUM_SYMBOLS];
53 | } __attribute__((aligned(DECODE_TABLE_ALIGNMENT)));
54 | 
55 | extern struct xpress_decompressor *xpress_allocate_decompressor(void);
56 | 
57 | extern int xpress_decompress(struct xpress_decompressor *decompressor,
58 | 		      const void *compressed_data, size_t compressed_size,
59 | 		      void *uncompressed_data, size_t uncompressed_size);
60 | 
61 | extern void xpress_free_decompressor(struct xpress_decompressor *decompressor);
62 | 
63 | /* LZX decompression  */
64 | 
65 | struct lzx_decompressor;
66 | 
67 | extern bool
68 | lzx_init_decompressor(size_t max_block_size, struct lzx_decompressor *d);
69 | 
70 | extern int lzx_decompress(struct lzx_decompressor *decompressor,
71 | 			  const void *compressed_data, size_t compressed_size,
72 | 			  void *uncompressed_data, size_t uncompressed_size);
73 | 
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | 


--------------------------------------------------------------------------------
/src/ebiggers/xpress_decompress.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * xpress_decompress.c
  3 |  *
  4 |  * A decompressor for the XPRESS compression format (Huffman variant).
  5 |  */
  6 | 
  7 | /*
  8 |  *
  9 |  * Copyright (C) 2012-2016 Eric Biggers
 10 |  *
 11 |  * This program is free software: you can redistribute it and/or modify it under
 12 |  * the terms of the GNU General Public License as published by the Free Software
 13 |  * Foundation, either version 2 of the License, or (at your option) any later
 14 |  * version.
 15 |  *
 16 |  * This program is distributed in the hope that it will be useful, but WITHOUT
 17 |  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 18 |  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 19 |  * details.
 20 |  *
 21 |  * You should have received a copy of the GNU General Public License along with
 22 |  * this program.  If not, see <http://www.gnu.org/licenses/>.
 23 |  */
 24 | 
 25 | 
 26 | /*
 27 |  * The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
 28 |  * That means it is fairly similar to LZX compression, but XPRESS is simpler, so
 29 |  * it is a little faster to compress and decompress.
 30 |  *
 31 |  * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
 32 |  * Xpress Compression Algorithm".  In the MSDN library, it can currently be
 33 |  * found under Open Specifications => Protocols => Windows Protocols => Windows
 34 |  * Server Protocols => [MS-XCA] Xpress Compression Algorithm".  The format in
 35 |  * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
 36 |  * (there apparently are some other versions of XPRESS as well).
 37 |  *
 38 |  * If you are already familiar with the LZ77 algorithm and Huffman coding, the
 39 |  * XPRESS format is fairly simple.  The compressed data begins with 256 bytes
 40 |  * that contain 512 4-bit integers that are the lengths of the symbols in the
 41 |  * Huffman code used for match/literal headers.  In contrast with more
 42 |  * complicated formats such as DEFLATE and LZX, this is the only Huffman code
 43 |  * that is used for the entirety of the XPRESS compressed data, and the codeword
 44 |  * lengths are not encoded with a pretree.
 45 |  *
 46 |  * The rest of the compressed data is Huffman-encoded symbols.  Values 0 through
 47 |  * 255 represent the corresponding literal bytes.  Values 256 through 511
 48 |  * represent matches and may require extra bits or bytes to be read to get the
 49 |  * match offset and match length.
 50 |  *
 51 |  * The trickiest part is probably the way in which literal bytes for match
 52 |  * lengths are interleaved in the bitstream.
 53 |  *
 54 |  * Also, a caveat--- according to Microsoft's documentation for XPRESS,
 55 |  *
 56 |  *	"Some implementation of the decompression algorithm expect an extra
 57 |  *	symbol to mark the end of the data.  Specifically, some implementations
 58 |  *	fail during decompression if the Huffman symbol 256 is not found after
 59 |  *	the actual data."
 60 |  *
 61 |  * This is the case with Microsoft's implementation in WIMGAPI, for example.  So
 62 |  * although our implementation doesn't currently check for this extra symbol,
 63 |  * compressors would be wise to add it.
 64 |  */
 65 | 
 66 | #ifdef HAVE_CONFIG_H
 67 | #  include "config.h"
 68 | #endif
 69 | 
 70 | #include "decompress_common.h"
 71 | #include "system_compression.h"
 72 | 
 73 | /* This value is chosen for fast decompression.  */
 74 | #define XPRESS_TABLEBITS 11
 75 | 
 76 | int
 77 | xpress_decompress(struct xpress_decompressor * d,
 78 |                  const void *compressed_data, size_t compressed_size,
 79 |                  void *uncompressed_data, size_t uncompressed_size)
 80 | {
 81 | 	const uint8_t * const in_begin = compressed_data;
 82 | 	uint8_t * const out_begin = uncompressed_data;
 83 | 	uint8_t *out_next = out_begin;
 84 | 	uint8_t * const out_end = out_begin + uncompressed_size;
 85 | 	struct input_bitstream is;
 86 | 
 87 | 	/* Read the Huffman codeword lengths.  */
 88 | 	if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
 89 | 		return -1;
 90 | 	for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
 91 | 		d->lens[2 * i + 0] = in_begin[i] & 0xf;
 92 | 		d->lens[2 * i + 1] = in_begin[i] >> 4;
 93 | 	}
 94 | 
 95 | 	/* Build a decoding table for the Huffman code.  */
 96 | 	if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
 97 | 				      XPRESS_TABLEBITS, d->lens,
 98 | 				      XPRESS_MAX_CODEWORD_LEN,
 99 | 				      d->working_space))
100 | 		return -1;
101 | 
102 | 	/* Decode the matches and literals.  */
103 | 
104 | 	init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
105 | 			     compressed_size - XPRESS_NUM_SYMBOLS / 2);
106 | 
107 | 	while (out_next != out_end) {
108 | 		unsigned sym;
109 | 		unsigned log2_offset;
110 | 		uint32_t length;
111 | 		uint32_t offset;
112 | 
113 | 		sym = read_huffsym(&is, d->decode_table,
114 | 				   XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
115 | 		if (sym < XPRESS_NUM_CHARS) {
116 | 			/* Literal  */
117 | 			*out_next++ = sym;
118 | 		} else {
119 | 			/* Match  */
120 | 			length = sym & 0xf;
121 | 			log2_offset = (sym >> 4) & 0xf;
122 | 
123 | 			bitstream_ensure_bits(&is, 16);
124 | 
125 | 			offset = ((uint32_t)1 << log2_offset) |
126 | 				 bitstream_pop_bits(&is, log2_offset);
127 | 
128 | 			if (length == 0xf) {
129 | 				length += bitstream_read_byte(&is);
130 | 				if (length == 0xf + 0xff)
131 | 					length = bitstream_read_u16(&is);
132 | 			}
133 | 			length += XPRESS_MIN_MATCH_LEN;
134 | 
135 | 			if (unlikely(lz_copy(length, offset,
136 | 					     out_begin, out_next, out_end,
137 | 					     XPRESS_MIN_MATCH_LEN)))
138 | 				return -1;
139 | 
140 | 			out_next += length;
141 | 		}
142 | 	}
143 | 	return 0;
144 | }
145 | 


--------------------------------------------------------------------------------
/src/misc.cpp:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2023
  2 |  *
  3 |  * This file is part of ntfs-efi.
  4 |  *
  5 |  * ntfs-efi is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * ntfs-efi is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with ntfs-efi.  If not, see <http://www.gnu.org/licenses/>. */
 17 | 
 18 | #include "misc.h"
 19 | #include <stddef.h>
 20 | #include <stdint.h>
 21 | 
 22 | extern "C"
 23 | void* memset(void* s, int c, size_t n) {
 24 |     void* orig_s = s;
 25 | 
 26 |     // FIXME - faster if we make sure we're aligned (also in memcpy)?
 27 | 
 28 | #if __INTPTR_WIDTH__ == 64
 29 |     uint64_t v;
 30 | 
 31 |     v = 0;
 32 | 
 33 |     for (unsigned int i = 0; i < sizeof(uint64_t); i++) {
 34 |         v <<= 8;
 35 |         v |= c & 0xff;
 36 |     }
 37 | 
 38 |     while (n >= sizeof(uint64_t)) {
 39 |         *(uint64_t*)s = v;
 40 | 
 41 |         s = (uint8_t*)s + sizeof(uint64_t);
 42 |         n -= sizeof(uint64_t);
 43 |     }
 44 | #else
 45 |     uint32_t v;
 46 | 
 47 |     v = 0;
 48 | 
 49 |     for (unsigned int i = 0; i < sizeof(uint32_t); i++) {
 50 |         v <<= 8;
 51 |         v |= c & 0xff;
 52 |     }
 53 | 
 54 |     while (n >= sizeof(uint32_t)) {
 55 |         *(uint32_t*)s = v;
 56 | 
 57 |         s = (uint8_t*)s + sizeof(uint32_t);
 58 |         n -= sizeof(uint32_t);
 59 |     }
 60 | #endif
 61 | 
 62 |     while (n > 0) {
 63 |         *(uint8_t*)s = c;
 64 | 
 65 |         s = (uint8_t*)s + 1;
 66 |         n--;
 67 |     }
 68 | 
 69 |     return orig_s;
 70 | }
 71 | 
 72 | extern "C"
 73 | int memcmp(const void* s1, const void* s2, size_t n) {
 74 | #if __INTPTR_WIDTH__ == 64
 75 |     while (n > sizeof(uint64_t)) {
 76 |         uint64_t c1 = *(uint64_t*)s1;
 77 |         uint64_t c2 = *(uint64_t*)s2;
 78 | 
 79 |         if (c1 != c2)
 80 |             return c1 > c2 ? 1 : -1;
 81 | 
 82 |         s1 = (uint64_t*)s1 + 1;
 83 |         s2 = (uint64_t*)s2 + 1;
 84 |         n -= sizeof(uint64_t);
 85 |     }
 86 | #endif
 87 | 
 88 |     while (n > sizeof(uint32_t)) {
 89 |         uint32_t c1 = *(uint32_t*)s1;
 90 |         uint32_t c2 = *(uint32_t*)s2;
 91 | 
 92 |         if (c1 != c2)
 93 |             return c1 > c2 ? 1 : -1;
 94 | 
 95 |         s1 = (uint32_t*)s1 + 1;
 96 |         s2 = (uint32_t*)s2 + 1;
 97 |         n -= sizeof(uint32_t);
 98 |     }
 99 | 
100 |     while (n > 0) {
101 |         uint8_t c1 = *(uint8_t*)s1;
102 |         uint8_t c2 = *(uint8_t*)s2;
103 | 
104 |         if (c1 != c2)
105 |             return c1 > c2 ? 1 : -1;
106 | 
107 |         s1 = (uint8_t*)s1 + 1;
108 |         s2 = (uint8_t*)s2 + 1;
109 |         n--;
110 |     }
111 | 
112 |     return 0;
113 | }
114 | 
115 | extern "C"
116 | void* memcpy(void* dest, const void* src, size_t n) {
117 |     void* orig_dest = dest;
118 | 
119 | #if __INTPTR_WIDTH__ == 64
120 |     while (n >= sizeof(uint64_t)) {
121 |         *(uint64_t*)dest = *(uint64_t*)src;
122 | 
123 |         dest = (uint8_t*)dest + sizeof(uint64_t);
124 |         src = (uint8_t*)src + sizeof(uint64_t);
125 | 
126 |         n -= sizeof(uint64_t);
127 |     }
128 | #endif
129 | 
130 |     while (n >= sizeof(uint32_t)) {
131 |         *(uint32_t*)dest = *(uint32_t*)src;
132 | 
133 |         dest = (uint8_t*)dest + sizeof(uint32_t);
134 |         src = (uint8_t*)src + sizeof(uint32_t);
135 | 
136 |         n -= sizeof(uint32_t);
137 |     }
138 | 
139 |     while (n >= sizeof(uint16_t)) {
140 |         *(uint16_t*)dest = *(uint16_t*)src;
141 | 
142 |         dest = (uint8_t*)dest + sizeof(uint16_t);
143 |         src = (uint8_t*)src + sizeof(uint16_t);
144 | 
145 |         n -= sizeof(uint16_t);
146 |     }
147 | 
148 |     while (n >= sizeof(uint8_t)) {
149 |         *(uint8_t*)dest = *(uint8_t*)src;
150 | 
151 |         dest = (uint8_t*)dest + sizeof(uint8_t);
152 |         src = (uint8_t*)src + sizeof(uint8_t);
153 | 
154 |         n -= sizeof(uint8_t);
155 |     }
156 | 
157 |     return orig_dest;
158 | }
159 | 
160 | const char* error_string(EFI_STATUS Status) {
161 |     switch (Status) {
162 |         case EFI_SUCCESS:
163 |             return "EFI_SUCCESS";
164 | 
165 |         case EFI_LOAD_ERROR:
166 |             return "EFI_LOAD_ERROR";
167 | 
168 |         case EFI_INVALID_PARAMETER:
169 |             return "EFI_INVALID_PARAMETER";
170 | 
171 |         case EFI_UNSUPPORTED:
172 |             return "EFI_UNSUPPORTED";
173 | 
174 |         case EFI_BAD_BUFFER_SIZE:
175 |             return "EFI_BAD_BUFFER_SIZE";
176 | 
177 |         case EFI_BUFFER_TOO_SMALL:
178 |             return "EFI_BUFFER_TOO_SMALL";
179 | 
180 |         case EFI_NOT_READY:
181 |             return "EFI_NOT_READY";
182 | 
183 |         case EFI_DEVICE_ERROR:
184 |             return "EFI_DEVICE_ERROR";
185 | 
186 |         case EFI_WRITE_PROTECTED:
187 |             return "EFI_WRITE_PROTECTED";
188 | 
189 |         case EFI_OUT_OF_RESOURCES:
190 |             return "EFI_OUT_OF_RESOURCES";
191 | 
192 |         case EFI_VOLUME_CORRUPTED:
193 |             return "EFI_VOLUME_CORRUPTED";
194 | 
195 |         case EFI_VOLUME_FULL:
196 |             return "EFI_VOLUME_FULL";
197 | 
198 |         case EFI_NO_MEDIA:
199 |             return "EFI_NO_MEDIA";
200 | 
201 |         case EFI_MEDIA_CHANGED:
202 |             return "EFI_MEDIA_CHANGED";
203 | 
204 |         case EFI_NOT_FOUND:
205 |             return "EFI_NOT_FOUND";
206 | 
207 |         case EFI_ACCESS_DENIED:
208 |             return "EFI_ACCESS_DENIED";
209 | 
210 |         case EFI_NO_RESPONSE:
211 |             return "EFI_NO_RESPONSE";
212 | 
213 |         case EFI_NO_MAPPING:
214 |             return "EFI_NO_MAPPING";
215 | 
216 |         case EFI_TIMEOUT:
217 |             return "EFI_TIMEOUT";
218 | 
219 |         case EFI_NOT_STARTED:
220 |             return "EFI_NOT_STARTED";
221 | 
222 |         case EFI_ALREADY_STARTED:
223 |             return "EFI_ALREADY_STARTED";
224 | 
225 |         case EFI_ABORTED:
226 |             return "EFI_ABORTED";
227 | 
228 |         case EFI_ICMP_ERROR:
229 |             return "EFI_ICMP_ERROR";
230 | 
231 |         case EFI_TFTP_ERROR:
232 |             return "EFI_TFTP_ERROR";
233 | 
234 |         case EFI_PROTOCOL_ERROR:
235 |             return "EFI_PROTOCOL_ERROR";
236 | 
237 |         case EFI_INCOMPATIBLE_VERSION:
238 |             return "EFI_INCOMPATIBLE_VERSION";
239 | 
240 |         case EFI_SECURITY_VIOLATION:
241 |             return "EFI_SECURITY_VIOLATION";
242 | 
243 |         case EFI_CRC_ERROR:
244 |             return "EFI_CRC_ERROR";
245 | 
246 |         case EFI_END_OF_MEDIA:
247 |             return "EFI_END_OF_MEDIA";
248 | 
249 |         case EFI_END_OF_FILE:
250 |             return "EFI_END_OF_FILE";
251 | 
252 |         case EFI_INVALID_LANGUAGE:
253 |             return "EFI_INVALID_LANGUAGE";
254 | 
255 |         case EFI_COMPROMISED_DATA:
256 |             return "EFI_COMPROMISED_DATA";
257 | 
258 |         default:
259 |             return "(unknown error)";
260 |     }
261 | }
262 | 
263 | char* stpcpy(char* dest, const char* src) {
264 |     while (*src != 0) {
265 |         *dest = *src;
266 |         dest++;
267 |         src++;
268 |     }
269 | 
270 |     *dest = 0;
271 | 
272 |     return dest;
273 | }
274 | 


--------------------------------------------------------------------------------
/src/misc.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) Mark Harmstone 2023
 2 |  *
 3 |  * This file is part of ntfs-efi.
 4 |  *
 5 |  * ntfs-efi is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public Licence as published by
 7 |  * the Free Software Foundation, either version 2 of the Licence, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * ntfs-efi is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public Licence for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public Licence
16 |  * along with ntfs-efi.  If not, see <http://www.gnu.org/licenses/>. */
17 | 
18 | #pragma once
19 | 
20 | #include <efi.h>
21 | 
22 | const char* error_string(EFI_STATUS Status);
23 | char* stpcpy(char* dest, const char* src);
24 | 


--------------------------------------------------------------------------------
/src/ntfs.cpp:
--------------------------------------------------------------------------------
   1 | /* Copyright (c) Mark Harmstone 2023
   2 |  *
   3 |  * This file is part of ntfs-efi.
   4 |  *
   5 |  * ntfs-efi is free software: you can redistribute it and/or modify
   6 |  * it under the terms of the GNU General Public Licence as published by
   7 |  * the Free Software Foundation, either version 2 of the Licence, or
   8 |  * (at your option) any later version.
   9 |  *
  10 |  * ntfs-efi is distributed in the hope that it will be useful,
  11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 |  * GNU General Public Licence for more details.
  14 |  *
  15 |  * You should have received a copy of the GNU General Public Licence
  16 |  * along with ntfs-efi.  If not, see <http://www.gnu.org/licenses/>. */
  17 | 
  18 | #include <efi.h>
  19 | #include <efilink.h>
  20 | #include <string.h>
  21 | #include <string_view>
  22 | #include <optional>
  23 | #include <span>
  24 | #include <uchar.h>
  25 | #include "ntfs.h"
  26 | #include "misc.h"
  27 | #include "quibbleproto.h"
  28 | #include "ebiggers/system_compression.h"
  29 | 
  30 | #define UNUSED(x) (void)(x)
  31 | #define sector_align(n, a) ((n)&((a)-1)?(((n)+(a))&~((a)-1)):(n))
  32 | 
  33 | using namespace std;
  34 | 
  35 | struct mapping {
  36 |     LIST_ENTRY list_entry;
  37 |     uint64_t lcn;
  38 |     uint64_t vcn;
  39 |     uint64_t length;
  40 | };
  41 | 
  42 | struct volume {
  43 |     ~volume();
  44 | 
  45 |     EFI_SIMPLE_FILE_SYSTEM_PROTOCOL proto;
  46 |     EFI_QUIBBLE_PROTOCOL quibble_proto;
  47 |     NTFS_BOOT_SECTOR* boot_sector;
  48 |     EFI_HANDLE controller;
  49 |     EFI_BLOCK_IO_PROTOCOL* block;
  50 |     EFI_DISK_IO_PROTOCOL* disk_io;
  51 |     uint64_t file_record_size;
  52 |     LIST_ENTRY mft_mappings;
  53 |     char16_t upcase[0x10000];
  54 | };
  55 | 
  56 | struct inode {
  57 |     inode(volume& vol) : vol(vol) { }
  58 |     ~inode();
  59 | 
  60 |     EFI_FILE_PROTOCOL proto;
  61 |     uint64_t ino;
  62 |     volume& vol;
  63 |     bool inode_loaded;
  64 |     STANDARD_INFORMATION standard_info;
  65 |     uint64_t size;
  66 |     uint64_t phys_size;
  67 |     uint64_t vdl;
  68 |     uint64_t position;
  69 |     LIST_ENTRY index_mappings;
  70 |     index_root* index_root;
  71 |     LIST_ENTRY levels;
  72 |     bool is_dir;
  73 |     size_t name_len;
  74 |     char16_t* name;
  75 |     bool data_loaded;
  76 |     LIST_ENTRY data_mappings;
  77 |     uint8_t* data;
  78 | };
  79 | 
  80 | struct btree_level {
  81 |     LIST_ENTRY list_entry;
  82 |     const index_entry* ent;
  83 |     uint8_t data[];
  84 | };
  85 | 
  86 | static EFI_SYSTEM_TABLE* systable;
  87 | static EFI_BOOT_SERVICES* bs;
  88 | static EFI_DRIVER_BINDING_PROTOCOL drvbind;
  89 | static EFI_QUIBBLE_INFO_PROTOCOL* info_proto = nullptr;
  90 | 
  91 | static void populate_file_handle(EFI_FILE_PROTOCOL* h);
  92 | static EFI_STATUS load_inode(inode& ino);
  93 | static EFI_STATUS read_from_mappings(const volume& vol, const LIST_ENTRY* mappings, uint64_t offset,
  94 |                                      uint8_t* buf, uint64_t size);
  95 | static EFI_STATUS process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length,
  96 |                                  unsigned int sector_size);
  97 | static EFI_STATUS read_mappings(const volume& vol, const ATTRIBUTE_RECORD_HEADER& att,
  98 |                                 LIST_ENTRY* mappings);
  99 | static EFI_STATUS loop_through_atts(const volume& vol, uint64_t inode, const FILE_RECORD_SEGMENT_HEADER* file_record,
 100 |                                     invocable<const ATTRIBUTE_RECORD_HEADER&, string_view, u16string_view> auto func);
 101 | 
 102 | void do_print(const char* s) {
 103 |     if (info_proto)
 104 |         info_proto->Print(s);
 105 | }
 106 | 
 107 | void do_print_error(const char* func, EFI_STATUS Status) {
 108 |     char s[255], *p;
 109 | 
 110 |     p = stpcpy(s, func);
 111 |     p = stpcpy(p, " returned ");
 112 |     p = stpcpy(p, error_string(Status));
 113 |     p = stpcpy(p, "\n");
 114 | 
 115 |     do_print(s);
 116 | }
 117 | 
 118 | static EFI_STATUS drv_supported(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle,
 119 |                                 EFI_DEVICE_PATH_PROTOCOL* RemainingDevicePath) {
 120 |     EFI_STATUS Status;
 121 |     EFI_DISK_IO_PROTOCOL* disk_io;
 122 |     EFI_GUID guid_disk = EFI_DISK_IO_PROTOCOL_GUID;
 123 |     EFI_GUID guid_block = EFI_BLOCK_IO_PROTOCOL_GUID;
 124 | 
 125 |     UNUSED(RemainingDevicePath);
 126 | 
 127 |     Status = bs->OpenProtocol(ControllerHandle, &guid_disk, (void**)&disk_io, This->DriverBindingHandle,
 128 |                               ControllerHandle, EFI_OPEN_PROTOCOL_BY_DRIVER);
 129 | 
 130 |     if (EFI_ERROR(Status))
 131 |         return Status;
 132 | 
 133 |     bs->CloseProtocol(ControllerHandle, &guid_disk, This->DriverBindingHandle, ControllerHandle);
 134 | 
 135 |     return bs->OpenProtocol(ControllerHandle, &guid_block, NULL, This->DriverBindingHandle,
 136 |                             ControllerHandle, EFI_OPEN_PROTOCOL_TEST_PROTOCOL);
 137 | }
 138 | 
 139 | static int cmp_filenames(const char16_t* upcase, u16string_view fn1, u16string_view fn2) {
 140 |     // FIXME - what about directories with case-sensitivity flag set?
 141 | 
 142 |     while (!fn1.empty() || !fn2.empty()) {
 143 |         if (fn1.empty())
 144 |             return -1;
 145 | 
 146 |         if (fn2.empty())
 147 |             return 1;
 148 | 
 149 |         char16_t c1 = upcase[fn1[0]];
 150 |         char16_t c2 = upcase[fn2[0]];
 151 | 
 152 |         if (c1 < c2)
 153 |             return -1;
 154 |         else if (c1 > c2)
 155 |             return 1;
 156 | 
 157 |         fn1 = u16string_view(fn1.data() + 1, fn1.size() - 1);
 158 |         fn2 = u16string_view(fn2.data() + 1, fn2.size() - 1);
 159 |     }
 160 | 
 161 |     return 0;
 162 | }
 163 | 
 164 | static EFI_STATUS find_file_in_dir(const volume& vol, uint64_t dir, u16string_view name, uint64_t* inode) {
 165 |     EFI_STATUS Status, Status2;
 166 |     FILE_RECORD_SEGMENT_HEADER* file;
 167 |     index_root* ir = nullptr;
 168 |     LIST_ENTRY index_mappings;
 169 |     const index_entry* ent;
 170 |     uint8_t* scratch = nullptr;
 171 | 
 172 |     InitializeListHead(&index_mappings);
 173 | 
 174 |     Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file);
 175 |     if (EFI_ERROR(Status)) {
 176 |         do_print_error("AllocatePool", Status);
 177 |         return Status;
 178 |     }
 179 | 
 180 |     Status = read_from_mappings(vol, &vol.mft_mappings, dir * vol.file_record_size,
 181 |                                 (uint8_t*)file, vol.file_record_size);
 182 |     if (EFI_ERROR(Status)) {
 183 |         bs->FreePool(file);
 184 |         do_print_error("read_from_mappings", Status);
 185 |         return Status;
 186 |     }
 187 | 
 188 |     if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
 189 |         do_print("Signature was not FILE\n");
 190 |         bs->FreePool(file);
 191 |         return EFI_INVALID_PARAMETER;
 192 |     }
 193 | 
 194 |     Status = process_fixups(&file->MultiSectorHeader, vol.file_record_size,
 195 |                             vol.boot_sector->BytesPerSector);
 196 | 
 197 |     if (EFI_ERROR(Status)) {
 198 |         do_print_error("process_fixups", Status);
 199 |         bs->FreePool(file);
 200 |         return Status;
 201 |     }
 202 | 
 203 |     Status2 = loop_through_atts(vol, dir, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool {
 204 |         switch (att.TypeCode) {
 205 |             case ntfs_attribute::INDEX_ALLOCATION:
 206 |                 if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
 207 |                     Status = read_mappings(vol, att, &index_mappings);
 208 |                     if (EFI_ERROR(Status)) {
 209 |                         do_print_error("read_mappings", Status);
 210 |                         return false;
 211 |                     }
 212 |                 }
 213 |             break;
 214 | 
 215 |             case ntfs_attribute::INDEX_ROOT:
 216 |                 if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !res_data.empty() && !ir) {
 217 |                     Status = bs->AllocatePool(EfiBootServicesData, res_data.size(), (void**)&ir);
 218 |                     if (EFI_ERROR(Status)) {
 219 |                         do_print_error("AllocatePool", Status);
 220 |                         return false;
 221 |                     }
 222 | 
 223 |                     memcpy(ir, res_data.data(), res_data.size());
 224 |                 }
 225 |             break;
 226 | 
 227 |             default:
 228 |                 break;
 229 |         }
 230 | 
 231 |         return true;
 232 |     });
 233 | 
 234 |     if (EFI_ERROR(Status2)) {
 235 |         do_print_error("loop_through_atts", Status2);
 236 |         Status = Status2;
 237 |     }
 238 | 
 239 |     if (EFI_ERROR(Status))
 240 |         goto end;
 241 | 
 242 |     if (!ir) {
 243 |         Status = EFI_NOT_FOUND;
 244 |         goto end;
 245 |     }
 246 | 
 247 |     ent = reinterpret_cast<const index_entry*>((uint8_t*)&ir->node_header + ir->node_header.first_entry);
 248 | 
 249 |     while (true) {
 250 |         string_view data((const char*)ent + sizeof(index_entry), ent->stream_length);
 251 | 
 252 |         if (data.size() >= offsetof(FILE_NAME, FileName)) {
 253 |             const auto& fn = *(FILE_NAME*)data.data();
 254 |             u16string_view ent_name(fn.FileName, fn.FileNameLength);
 255 | 
 256 |             auto cmp = cmp_filenames(vol.upcase, name, ent_name);
 257 | 
 258 |             if (cmp == 0) { // found
 259 |                 *inode = ent->file_reference.SegmentNumber;
 260 |                 Status = EFI_SUCCESS;
 261 |                 goto end;
 262 |             } else if (cmp == 1) { // skip to next
 263 |                 ent = reinterpret_cast<const index_entry*>((uint8_t*)ent + ent->entry_length);
 264 |                 continue;
 265 |             }
 266 | 
 267 |             if (cmp == -1 && !(ent->flags & INDEX_ENTRY_SUBNODE)) {
 268 |                 Status = EFI_NOT_FOUND;
 269 |                 goto end;
 270 |             }
 271 |         }
 272 | 
 273 |         if (ent->flags & INDEX_ENTRY_SUBNODE) { // if subnode, descend
 274 |             uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber;
 275 | 
 276 |             if (ir->bytes_per_index_record < vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster)
 277 |                 vcn *= vol.boot_sector->BytesPerSector;
 278 |             else
 279 |                 vcn *= (uint64_t)vol.boot_sector->BytesPerSector * (uint64_t)vol.boot_sector->SectorsPerCluster;
 280 | 
 281 |             if (!scratch) {
 282 |                 Status = bs->AllocatePool(EfiBootServicesData, ir->bytes_per_index_record,
 283 |                                          (void**)&scratch);
 284 |                 if (EFI_ERROR(Status)) {
 285 |                     do_print_error("AllocatePool", Status);
 286 |                     goto end;
 287 |                 }
 288 |             }
 289 | 
 290 |             Status = read_from_mappings(vol, &index_mappings, vcn, scratch, ir->bytes_per_index_record);
 291 |             if (EFI_ERROR(Status)) {
 292 |                 do_print_error("read_from_mappings", Status);
 293 |                 goto end;
 294 |             }
 295 | 
 296 |             auto rec = reinterpret_cast<index_record*>(scratch);
 297 | 
 298 |             if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) {
 299 |                 do_print("Signature was not INDX\n");
 300 |                 Status = EFI_INVALID_PARAMETER;
 301 |                 goto end;
 302 |             }
 303 | 
 304 |             Status = process_fixups(&rec->MultiSectorHeader, ir->bytes_per_index_record,
 305 |                                     vol.boot_sector->BytesPerSector);
 306 |             if (EFI_ERROR(Status)) {
 307 |                 do_print_error("process_fixups", Status);
 308 |                 goto end;
 309 |             }
 310 | 
 311 |             ent = reinterpret_cast<const index_entry*>((uint8_t*)&rec->header + rec->header.first_entry);
 312 | 
 313 |             continue;
 314 |         }
 315 | 
 316 |         if (ent->flags & INDEX_ENTRY_LAST) {
 317 |             Status = EFI_NOT_FOUND;
 318 |             goto end;
 319 |         }
 320 | 
 321 |         ent = reinterpret_cast<const index_entry*>((uint8_t*)ent + ent->entry_length);
 322 |     }
 323 | 
 324 | end:
 325 |     if (ir)
 326 |         bs->FreePool(ir);
 327 | 
 328 |     if (scratch)
 329 |         bs->FreePool(scratch);
 330 | 
 331 |     bs->FreePool(file);
 332 | 
 333 |     return Status;
 334 | }
 335 | 
 336 | static size_t count_path_parts(u16string_view v) {
 337 |     size_t num_parts = 0;
 338 | 
 339 |     while (!v.empty()) {
 340 |         num_parts++;
 341 | 
 342 |         if (auto bs = v.find(u'\\'); bs != u16string_view::npos)
 343 |             v = u16string_view(v.data() + bs + 1, v.size() - bs - 1);
 344 |         else
 345 |             break;
 346 |     }
 347 | 
 348 |     return num_parts;
 349 | }
 350 | 
 351 | static void extract_parts(u16string_view v, u16string_view*& p) {
 352 |     while (!v.empty()) {
 353 |         if (auto bs = v.find(u'\\'); bs != u16string_view::npos) {
 354 |             *p = u16string_view(v.data(), bs);
 355 |             p++;
 356 |             v = u16string_view(v.data() + bs + 1, v.size() - bs - 1);
 357 |         } else {
 358 |             *p = v;
 359 |             p++;
 360 |             break;
 361 |         }
 362 |     }
 363 | }
 364 | 
 365 | static EFI_STATUS normalize_path(u16string_view fn, u16string_view parent, char16_t*& name, size_t& name_len) {
 366 |     EFI_STATUS Status;
 367 |     bool from_root = false;
 368 |     size_t num_parts = 0;
 369 |     u16string_view* parts;
 370 |     bool first;
 371 | 
 372 |     if (fn.front() == '\\') {
 373 |         from_root = true;
 374 |         fn = u16string_view(fn.data() + 1, fn.size() - 1);
 375 |     }
 376 | 
 377 |     if (parent.empty())
 378 |         from_root = true;
 379 | 
 380 |     if (!from_root)
 381 |         num_parts = count_path_parts(parent);
 382 | 
 383 |     num_parts += count_path_parts(fn);
 384 | 
 385 |     if (num_parts == 0) {
 386 |         name = nullptr;
 387 |         name_len = 0;
 388 |         return EFI_SUCCESS;
 389 |     }
 390 | 
 391 |     Status = bs->AllocatePool(EfiBootServicesData, num_parts * sizeof(u16string_view), (void**)&parts);
 392 |     if (EFI_ERROR(Status)) {
 393 |         do_print_error("AllocatePool", Status);
 394 |         return Status;
 395 |     }
 396 | 
 397 |     {
 398 |         u16string_view* p = parts;
 399 | 
 400 |         if (!from_root)
 401 |             extract_parts(parent, p);
 402 | 
 403 |         extract_parts(fn, p);
 404 |     }
 405 | 
 406 |     for (size_t i = 0; i < num_parts; i++) {
 407 |         if (parts[i] == u".")
 408 |             parts[i] = u"";
 409 |         else if (parts[i] == u"..") {
 410 |             parts[i] = u"";
 411 | 
 412 |             if (i == 0) {
 413 |                 bs->FreePool(parts);
 414 |                 return EFI_INVALID_PARAMETER;
 415 |             }
 416 | 
 417 |             auto j = i - 1;
 418 |             while (true) {
 419 |                 if (!parts[j].empty()) {
 420 |                     parts[j] = u"";
 421 |                     break;
 422 |                 }
 423 | 
 424 |                 if (j == 0) {
 425 |                     bs->FreePool(parts);
 426 |                     return EFI_INVALID_PARAMETER;
 427 |                 }
 428 | 
 429 |                 j--;
 430 |             }
 431 |         }
 432 |     }
 433 | 
 434 |     name_len = 0;
 435 |     first = true;
 436 |     for (size_t i = 0; i < num_parts; i++) {
 437 |         if (parts[i].empty())
 438 |             continue;
 439 | 
 440 |         if (!first)
 441 |             name_len++;
 442 | 
 443 |         name_len += parts[i].size();
 444 |         first = false;
 445 |     }
 446 | 
 447 |     if (name_len == 0) {
 448 |         bs->FreePool(parts);
 449 |         name = nullptr;
 450 |         return EFI_SUCCESS;
 451 |     }
 452 | 
 453 |     Status = bs->AllocatePool(EfiBootServicesData, name_len * sizeof(char16_t), (void**)&name);
 454 |     if (EFI_ERROR(Status)) {
 455 |         do_print_error("AllocatePool", Status);
 456 |         bs->FreePool(parts);
 457 |         return Status;
 458 |     }
 459 | 
 460 |     {
 461 |         char16_t* n = name;
 462 | 
 463 |         first = true;
 464 |         for (size_t i = 0; i < num_parts; i++) {
 465 |             if (parts[i].empty())
 466 |                 continue;
 467 | 
 468 |             if (!first) {
 469 |                 *n = u'\\';
 470 |                 n++;
 471 |             }
 472 | 
 473 |             memcpy(n, parts[i].data(), parts[i].size() * sizeof(char16_t));
 474 |             n += parts[i].size();
 475 |             first = false;
 476 |         }
 477 |     }
 478 | 
 479 |     bs->FreePool(parts);
 480 | 
 481 |     return EFI_SUCCESS;
 482 | }
 483 | 
 484 | static EFI_STATUS EFIAPI file_open(struct _EFI_FILE_HANDLE* File, struct _EFI_FILE_HANDLE** NewHandle, CHAR16* FileName,
 485 |                                    UINT64 OpenMode, UINT64 Attributes) {
 486 |     EFI_STATUS Status;
 487 |     inode* file = _CR(File, inode, proto);
 488 |     uint64_t inode_num;
 489 |     inode* ino;
 490 |     char16_t* name;
 491 |     size_t name_len;
 492 | 
 493 |     UNUSED(Attributes);
 494 | 
 495 |     if (OpenMode & EFI_FILE_MODE_CREATE)
 496 |         return EFI_UNSUPPORTED;
 497 | 
 498 |     if (FileName[0] == L'\\' && FileName[1] == 0) {
 499 |         inode_num = NTFS_ROOT_DIR_INODE;
 500 |         name = nullptr;
 501 |         name_len = 0;
 502 |     } else if (FileName[0] == L'.' && FileName[1] == 0) {
 503 |         inode_num = file->ino;
 504 | 
 505 |         if (file->name) {
 506 |             Status = bs->AllocatePool(EfiBootServicesData, file->name_len * sizeof(char16_t), (void**)&name);
 507 |             if (EFI_ERROR(Status)) {
 508 |                 do_print_error("AllocatePool", Status);
 509 |                 return Status;
 510 |             }
 511 | 
 512 |             memcpy(name, file->name, file->name_len * sizeof(char16_t));
 513 |             name_len = file->name_len;
 514 |         } else {
 515 |             name = nullptr;
 516 |             name_len = 0;
 517 |         }
 518 |     } else {
 519 |         u16string_view fn((char16_t*)FileName);
 520 | 
 521 |         if (fn.empty())
 522 |             return EFI_NOT_FOUND;
 523 | 
 524 |         if (file->ino == NTFS_ROOT_DIR_INODE && fn == u"..")
 525 |             return EFI_INVALID_PARAMETER;
 526 | 
 527 |         Status = normalize_path(fn, u16string_view(file->name, file->name_len), name, name_len);
 528 |         if (EFI_ERROR(Status)) {
 529 |             do_print_error("normalize_path", Status);
 530 |             return Status;
 531 |         }
 532 | 
 533 |         fn = u16string_view(name, name_len);
 534 |         inode_num = NTFS_ROOT_DIR_INODE;
 535 | 
 536 |         if (!fn.empty()) {
 537 |             while (true) {
 538 |                 u16string_view part;
 539 | 
 540 |                 auto backslash = fn.find(u'\\');
 541 | 
 542 |                 if (backslash != u16string_view::npos)
 543 |                     part = u16string_view(fn.data(), backslash);
 544 |                 else
 545 |                     part = fn;
 546 | 
 547 |                 Status = find_file_in_dir(file->vol, inode_num, part, &inode_num);
 548 | 
 549 |                 if (Status == EFI_NOT_FOUND) {
 550 |                     if (name)
 551 |                         bs->FreePool(name);
 552 | 
 553 |                     return Status;
 554 |                 }
 555 | 
 556 |                 if (EFI_ERROR(Status)) {
 557 |                     if (name)
 558 |                         bs->FreePool(name);
 559 | 
 560 |                     do_print_error("find_file_in_dir", Status);
 561 |                     return Status;
 562 |                 }
 563 | 
 564 |                 if (backslash == u16string_view::npos)
 565 |                     break;
 566 | 
 567 |                 fn = u16string_view(fn.data() + backslash + 1, fn.size() - backslash - 1);
 568 |             }
 569 |         }
 570 |     }
 571 | 
 572 |     Status = bs->AllocatePool(EfiBootServicesData, sizeof(inode), (void**)&ino);
 573 |     if (EFI_ERROR(Status)) {
 574 |         if (name)
 575 |             bs->FreePool(name);
 576 | 
 577 |         do_print_error("AllocatePool", Status);
 578 |         return Status;
 579 |     }
 580 | 
 581 |     memset(ino, 0, sizeof(inode));
 582 | 
 583 |     new (ino) inode(file->vol);
 584 | 
 585 |     populate_file_handle(&ino->proto);
 586 | 
 587 |     ino->ino = inode_num;
 588 |     ino->name = name;
 589 |     ino->name_len = name_len;
 590 | 
 591 |     *NewHandle = &ino->proto;
 592 | 
 593 |     return EFI_SUCCESS;
 594 | }
 595 | 
 596 | inode::~inode() {
 597 |     if (name)
 598 |         bs->FreePool(name);
 599 | 
 600 |     if (data)
 601 |         bs->FreePool(data);
 602 | 
 603 |     if (!inode_loaded)
 604 |         return;
 605 | 
 606 |     if (index_root)
 607 |         bs->FreePool(index_root);
 608 | 
 609 |     while (!IsListEmpty(&index_mappings)) {
 610 |         mapping* m = _CR(index_mappings.Flink, mapping, list_entry);
 611 |         RemoveEntryList(&m->list_entry);
 612 |         bs->FreePool(m);
 613 |     }
 614 | 
 615 |     while (!IsListEmpty(&levels)) {
 616 |         auto l = _CR(levels.Flink, btree_level, list_entry);
 617 |         RemoveEntryList(&l->list_entry);
 618 |         bs->FreePool(l);
 619 |     }
 620 | 
 621 |     while (!IsListEmpty(&data_mappings)) {
 622 |         mapping* m = _CR(data_mappings.Flink, mapping, list_entry);
 623 |         RemoveEntryList(&m->list_entry);
 624 |         bs->FreePool(m);
 625 |     }
 626 | }
 627 | 
 628 | static EFI_STATUS EFIAPI file_close(struct _EFI_FILE_HANDLE* File) {
 629 |     inode* ino = _CR(File, inode, proto);
 630 | 
 631 |     ino->inode::~inode();
 632 |     bs->FreePool(ino);
 633 | 
 634 |     return EFI_SUCCESS;
 635 | }
 636 | 
 637 | static EFI_STATUS EFIAPI file_delete(struct _EFI_FILE_HANDLE* File) {
 638 |     UNUSED(File);
 639 | 
 640 |     return EFI_UNSUPPORTED;
 641 | }
 642 | 
 643 | static EFI_STATUS read_from_mappings(const volume& vol, const LIST_ENTRY* mappings, uint64_t offset, uint8_t* buf,
 644 |                                      uint64_t size) {
 645 |     EFI_STATUS Status;
 646 |     uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster;
 647 |     uint64_t vcn = offset / cluster_size;
 648 |     uint64_t last_vcn = sector_align(offset + size, cluster_size) / cluster_size;
 649 |     LIST_ENTRY* le;
 650 | 
 651 |     le = mappings->Flink;
 652 |     while (le != mappings) {
 653 |         mapping* m = _CR(le, mapping, list_entry);
 654 | 
 655 |         if (m->vcn < last_vcn && m->vcn + m->length > vcn) {
 656 |             uint64_t to_read, mapping_offset;
 657 | 
 658 |             mapping_offset = offset - (m->vcn * cluster_size);
 659 |             to_read = ((m->vcn + m->length) * cluster_size) - offset;
 660 | 
 661 |             if (to_read > size)
 662 |                 to_read = size;
 663 | 
 664 |             if (m->lcn == 0) // sparse
 665 |                 memset(buf, 0, to_read);
 666 |             else {
 667 |                 Status = vol.block->ReadBlocks(vol.block, vol.block->Media->MediaId,
 668 |                                                 ((m->lcn * cluster_size) + mapping_offset) / vol.block->Media->BlockSize,
 669 |                                                 to_read, buf);
 670 |                 if (EFI_ERROR(Status)) {
 671 |                     do_print_error("ReadBlocks", Status);
 672 |                     return Status;
 673 |                 }
 674 |             }
 675 | 
 676 |             if (to_read == size)
 677 |                 break;
 678 | 
 679 |             offset += to_read;
 680 |             buf += to_read;
 681 |             size -= to_read;
 682 |             vcn = offset / cluster_size;
 683 |         }
 684 | 
 685 |         le = le->Flink;
 686 |     }
 687 | 
 688 |     return EFI_SUCCESS;
 689 | }
 690 | 
 691 | static EFI_STATUS process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length, unsigned int sector_size) {
 692 |     uint64_t sectors;
 693 |     uint16_t* seq;
 694 |     uint8_t* ptr;
 695 | 
 696 |     sectors = length / sector_size;
 697 | 
 698 |     if (header->UpdateSequenceArraySize < sectors + 1)
 699 |         return EFI_INVALID_PARAMETER;
 700 | 
 701 |     seq = (uint16_t*)((uint8_t*)header + header->UpdateSequenceArrayOffset);
 702 | 
 703 |     ptr = (uint8_t*)header + sector_size - sizeof(uint16_t);
 704 | 
 705 |     for (unsigned int i = 0; i < sectors; i++) {
 706 |         if (*(uint16_t*)ptr != seq[0])
 707 |             return EFI_INVALID_PARAMETER;
 708 | 
 709 |         *(uint16_t*)ptr = seq[i + 1];
 710 | 
 711 |         ptr += sector_size;
 712 |     }
 713 | 
 714 |     return EFI_SUCCESS;
 715 | }
 716 | 
 717 | static EFI_STATUS next_index_item(inode& ino, const invocable<string_view> auto& func) {
 718 |     EFI_STATUS Status;
 719 |     const index_root& ir = *ino.index_root;
 720 | 
 721 |     if (IsListEmpty(&ino.levels))
 722 |         return EFI_NOT_FOUND;
 723 | 
 724 |     auto l = _CR(ino.levels.Blink, btree_level, list_entry);
 725 | 
 726 |     do {
 727 |         if (l->ent->flags & INDEX_ENTRY_SUBNODE) {
 728 |             btree_level* l2;
 729 |             uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)l->ent + l->ent->entry_length - sizeof(uint64_t)))->SegmentNumber;
 730 | 
 731 |             if (ir.bytes_per_index_record < ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster)
 732 |                 vcn *= ino.vol.boot_sector->BytesPerSector;
 733 |             else
 734 |                 vcn *= (uint64_t)ino.vol.boot_sector->BytesPerSector * (uint64_t)ino.vol.boot_sector->SectorsPerCluster;
 735 | 
 736 |             Status = bs->AllocatePool(EfiBootServicesData, offsetof(btree_level, data) + ir.bytes_per_index_record,
 737 |                                       (void**)&l2);
 738 |             if (EFI_ERROR(Status)) {
 739 |                 do_print_error("AllocatePool", Status);
 740 |                 return Status;
 741 |             }
 742 | 
 743 |             Status = read_from_mappings(ino.vol, &ino.index_mappings, vcn, l2->data, ir.bytes_per_index_record);
 744 |             if (EFI_ERROR(Status)) {
 745 |                 bs->FreePool(l2);
 746 |                 do_print_error("read_from_mappings", Status);
 747 |                 return Status;
 748 |             }
 749 | 
 750 |             auto rec = reinterpret_cast<index_record*>(l2->data);
 751 | 
 752 |             if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) {
 753 |                 do_print("Signature was not INDX\n");
 754 |                 bs->FreePool(l2);
 755 |                 return EFI_INVALID_PARAMETER;
 756 |             }
 757 | 
 758 |             Status = process_fixups(&rec->MultiSectorHeader, ir.bytes_per_index_record,
 759 |                                     ino.vol.boot_sector->BytesPerSector);
 760 |             if (EFI_ERROR(Status)) {
 761 |                 bs->FreePool(l2);
 762 |                 do_print_error("process_fixups", Status);
 763 |                 return EFI_INVALID_PARAMETER;
 764 |             }
 765 | 
 766 |             InsertTailList(&ino.levels, &l2->list_entry);
 767 |             l = l2;
 768 |             l->ent = reinterpret_cast<const index_entry*>((uint8_t*)&rec->header + rec->header.first_entry);
 769 | 
 770 |             continue;
 771 |         }
 772 | 
 773 |         while (l->ent->flags & INDEX_ENTRY_LAST) {
 774 |             RemoveEntryList(&l->list_entry);
 775 |             bs->FreePool(l);
 776 | 
 777 |             if (IsListEmpty(&ino.levels))
 778 |                 break;
 779 | 
 780 |             l = _CR(ino.levels.Blink, btree_level, list_entry);
 781 |         }
 782 | 
 783 |         if (IsListEmpty(&ino.levels))
 784 |             break;
 785 | 
 786 |         if (!(l->ent->flags & INDEX_ENTRY_LAST)) {
 787 |             if (func(string_view((const char*)l->ent + sizeof(index_entry), l->ent->stream_length)))
 788 |                 l->ent = reinterpret_cast<const index_entry*>((uint8_t*)l->ent + l->ent->entry_length);
 789 | 
 790 |             return EFI_SUCCESS;
 791 |         }
 792 |     } while (!IsListEmpty(&ino.levels));
 793 | 
 794 |     return EFI_SUCCESS;
 795 | }
 796 | 
 797 | static void win_time_to_efi(int64_t win, EFI_TIME* efi) {
 798 |     int64_t secs, time, days;
 799 | 
 800 |     secs = win / 10000000;
 801 |     time = secs % 86400;
 802 |     days = secs / 86400;
 803 | 
 804 |     unsigned int jd = 2305814 + days; // Julian date
 805 | 
 806 |     unsigned int f = jd + 1401 + (((((4 * jd) + 274277) / 146097) * 3) / 4) - 38;
 807 |     unsigned int e = (4 * f) + 3;
 808 |     unsigned int g = (e % 1461) / 4;
 809 |     unsigned int h = (5 * g) + 2;
 810 | 
 811 |     efi->Month = (((h / 153) + 2) % 12) + 1;
 812 |     efi->Year = (e / 1461) - 4716 + ((14 - efi->Month) / 12);
 813 |     efi->Day = ((h % 153) / 5) + 1;
 814 |     efi->Hour = time / 3600;
 815 |     efi->Minute = (time % 3600) / 60;
 816 |     efi->Second = time % 60;
 817 |     efi->Pad1 = 0;
 818 |     efi->Nanosecond = (win % 10000000) * 100;
 819 |     efi->TimeZone = 0;
 820 |     efi->Daylight = 0;
 821 |     efi->Pad2 = 0;
 822 | }
 823 | 
 824 | static uint64_t win_attributes_to_efi(uint32_t attr, bool is_dir) {
 825 |     uint64_t ret = 0;
 826 | 
 827 |     if (is_dir)
 828 |         ret |= EFI_FILE_DIRECTORY;
 829 | 
 830 |     if (attr & FILE_ATTRIBUTE_READONLY)
 831 |         ret |= EFI_FILE_READ_ONLY;
 832 | 
 833 |     if (attr & FILE_ATTRIBUTE_HIDDEN)
 834 |         ret |= EFI_FILE_HIDDEN;
 835 | 
 836 |     if (attr & FILE_ATTRIBUTE_SYSTEM)
 837 |         ret |= EFI_FILE_SYSTEM;
 838 | 
 839 |     if (attr & EFI_FILE_ARCHIVE)
 840 |         ret |= EFI_FILE_ARCHIVE;
 841 | 
 842 |     return ret;
 843 | }
 844 | 
 845 | static EFI_STATUS read_dir(inode& ino, UINTN* BufferSize, VOID* Buffer) {
 846 |     EFI_STATUS Status;
 847 |     bool overflow = false, again;
 848 | 
 849 |     if (!ino.inode_loaded) {
 850 |         Status = load_inode(ino);
 851 |         if (EFI_ERROR(Status)) {
 852 |             do_print_error("load_inode", Status);
 853 |             return Status;
 854 |         }
 855 |     }
 856 | 
 857 |     if (ino.position == 0 && IsListEmpty(&ino.levels)) {
 858 |         btree_level* l;
 859 | 
 860 |         Status = bs->AllocatePool(EfiBootServicesData, offsetof(btree_level, data), (void**)&l);
 861 |         if (EFI_ERROR(Status)) {
 862 |             do_print_error("AllocatePool", Status);
 863 |             return Status;
 864 |         }
 865 | 
 866 |         l->ent = reinterpret_cast<const index_entry*>((uint8_t*)&ino.index_root->node_header + ino.index_root->node_header.first_entry);
 867 |         InsertTailList(&ino.levels, &l->list_entry);
 868 |     }
 869 | 
 870 |     // FIXME - ignore special files in root
 871 | 
 872 |     do {
 873 |         again = false;
 874 | 
 875 |         Status = next_index_item(ino, [&](string_view data) -> bool {
 876 |             size_t size;
 877 | 
 878 |             const auto& fn = *reinterpret_cast<const FILE_NAME*>(data.data());
 879 | 
 880 |             if (fn.Namespace == file_name_type::DOS) { // ignore DOS filenames
 881 |                 again = true;
 882 |                 return true;
 883 |             }
 884 | 
 885 |             size = offsetof(EFI_FILE_INFO, FileName[0]) + ((fn.FileNameLength + 1) * sizeof(char16_t));
 886 | 
 887 |             if (*BufferSize < size) {
 888 |                 *BufferSize = size;
 889 |                 overflow = true;
 890 |                 return false;
 891 |             }
 892 | 
 893 |             auto& info = *(EFI_FILE_INFO*)Buffer;
 894 | 
 895 |             info.Size = size;
 896 |             info.FileSize = fn.EndOfFile;
 897 |             info.PhysicalSize = fn.AllocationSize;
 898 |             win_time_to_efi(fn.CreationTime, &info.CreateTime);
 899 |             win_time_to_efi(fn.LastAccessTime, &info.LastAccessTime);
 900 |             win_time_to_efi(fn.LastWriteTime, &info.ModificationTime);
 901 |             info.Attribute = win_attributes_to_efi(fn.FileAttributes, fn.FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT);
 902 | 
 903 |             memcpy(info.FileName, fn.FileName, fn.FileNameLength * sizeof(char16_t));
 904 |             info.FileName[fn.FileNameLength] = 0;
 905 | 
 906 |             *BufferSize = size;
 907 | 
 908 |             ino.position++;
 909 | 
 910 |             return true;
 911 |         });
 912 |     } while (again);
 913 | 
 914 |     if (overflow)
 915 |         return EFI_BUFFER_TOO_SMALL;
 916 | 
 917 |     if (Status == EFI_NOT_FOUND) { // last one
 918 |         *BufferSize = 0;
 919 |         return EFI_SUCCESS;
 920 |     }
 921 | 
 922 |     if (EFI_ERROR(Status)) {
 923 |         do_print_error("next_index_item", Status);
 924 |         return Status;
 925 |     }
 926 | 
 927 |     return EFI_SUCCESS;
 928 | }
 929 | 
 930 | static EFI_STATUS read_nonresident_attribute(volume& vol, const ATTRIBUTE_RECORD_HEADER& att, span<uint8_t> data) {
 931 |     EFI_STATUS Status;
 932 |     LIST_ENTRY mappings;
 933 | 
 934 |     InitializeListHead(&mappings);
 935 | 
 936 |     Status = read_mappings(vol, att, &mappings);
 937 |     if (EFI_ERROR(Status)) {
 938 |         do_print_error("read_mappings", Status);
 939 |         return Status;
 940 |     }
 941 | 
 942 |     Status = read_from_mappings(vol, &mappings, 0, data.data(), data.size());
 943 | 
 944 |     while (!IsListEmpty(&mappings)) {
 945 |         mapping* m = _CR(mappings.Flink, mapping, list_entry);
 946 |         RemoveEntryList(&m->list_entry);
 947 |         bs->FreePool(m);
 948 |     }
 949 | 
 950 |     if (EFI_ERROR(Status))
 951 |         do_print_error("read_from_mappings", Status);
 952 | 
 953 |     return Status;
 954 | }
 955 | 
 956 | static EFI_STATUS do_xpress_decompress(inode& ino, span<const uint8_t> compdata, uint32_t chunk_size) {
 957 |     EFI_STATUS Status;
 958 |     xpress_decompressor ctx;
 959 |     uint64_t size = ino.size;
 960 |     uint64_t num_chunks = (size + chunk_size - 1) / chunk_size;
 961 |     auto offsets = (uint32_t*)compdata.data();
 962 | 
 963 |     if (ino.data) {
 964 |         bs->FreePool(ino.data);
 965 |         ino.data = nullptr;
 966 |     }
 967 | 
 968 |     Status = bs->AllocatePool(EfiBootServicesData, ino.size, (void**)&ino.data);
 969 |     if (EFI_ERROR(Status)) {
 970 |         do_print_error("AllocatePool", Status);
 971 |         return Status;
 972 |     }
 973 | 
 974 |     auto ret = span(ino.data, ino.size);
 975 | 
 976 |     auto data = span(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)),
 977 |                      (uint32_t)(compdata.size() - ((num_chunks - 1) * sizeof(uint32_t))));
 978 | 
 979 |     for (uint64_t i = 0; i < num_chunks; i++) {
 980 |         uint64_t off = i == 0 ? 0 : offsets[i - 1];
 981 |         uint32_t complen;
 982 | 
 983 |         if (i == 0)
 984 |             complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.size();
 985 |         else if (i == num_chunks - 1)
 986 |             complen = (uint32_t)data.size() - offsets[i - 1];
 987 |         else
 988 |             complen = offsets[i] - offsets[i - 1];
 989 | 
 990 |         if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) {
 991 |             // stored uncompressed
 992 |             memcpy(ret.data() + (i * chunk_size), data.data() + off, complen);
 993 |         } else {
 994 |             auto err = xpress_decompress(&ctx, data.data() + off, complen, ret.data() + (i * chunk_size),
 995 |                                          (size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size));
 996 | 
 997 |             if (err != 0) {
 998 |                 do_print("xpress_decompress failed\n");
 999 |                 bs->FreePool(ino.data);
1000 |                 ino.data = nullptr;
1001 |                 return EFI_INVALID_PARAMETER;
1002 |             }
1003 |         }
1004 |     }
1005 | 
1006 |     return EFI_SUCCESS;
1007 | }
1008 | 
1009 | static EFI_STATUS handle_wof(inode& ino, span<const uint8_t> rp, span<const uint8_t> wof) {
1010 |     if (rp.size() < offsetof(reparse_point_header, DataBuffer)) {
1011 |         do_print("truncated IO_REPARSE_TAG_WOF reparse point buffer\n");
1012 |         return EFI_INVALID_PARAMETER;
1013 |     }
1014 | 
1015 |     const auto& rph = *(reparse_point_header*)rp.data();
1016 | 
1017 |     if (rp.size() < offsetof(reparse_point_header, DataBuffer) + rph.ReparseDataLength) {
1018 |         do_print("truncated IO_REPARSE_TAG_WOF reparse point buffer\n");
1019 |         return EFI_INVALID_PARAMETER;
1020 |     }
1021 | 
1022 |     if (rph.ReparseDataLength < sizeof(wof_external_info)) {
1023 |         do_print("IO_REPARSE_TAG_WOF ReparseDataLength shorter than expected\n");
1024 |         return EFI_INVALID_PARAMETER;
1025 |     }
1026 | 
1027 |     const auto& wofei = *(wof_external_info*)rph.DataBuffer;
1028 | 
1029 |     if (wofei.Version != WOF_CURRENT_VERSION) {
1030 |         do_print("Unsupported WOF version\n");
1031 |         return EFI_INVALID_PARAMETER;
1032 |     }
1033 | 
1034 |     if (wofei.Provider == WOF_PROVIDER_WIM) {
1035 |         do_print("Unsupported WOF provider WOF_PROVIDER_WIM\n");
1036 |         return EFI_INVALID_PARAMETER;
1037 |     } else if (wofei.Provider != WOF_PROVIDER_FILE) {
1038 |         do_print("Unsupported WOF provider\n");
1039 |         return EFI_INVALID_PARAMETER;
1040 |     }
1041 | 
1042 |     if (rph.ReparseDataLength < sizeof(wof_external_info) + sizeof(file_provider_external_info_v0)) {
1043 |         do_print("IO_REPARSE_TAG_WOF ReparseDataLength shorter than expected\n");
1044 |         return EFI_INVALID_PARAMETER;
1045 |     }
1046 | 
1047 |     const auto& fpei = *(file_provider_external_info_v0*)((uint8_t*)&wofei + sizeof(wofei));
1048 | 
1049 |     if (fpei.Version != FILE_PROVIDER_CURRENT_VERSION) {
1050 |         do_print("Unsupported FILE_PROVIDER_EXTERNAL_INFO version\n");
1051 |         return EFI_INVALID_PARAMETER;
1052 |     }
1053 | 
1054 |     switch (fpei.Algorithm) {
1055 |         case FILE_PROVIDER_COMPRESSION_XPRESS4K:
1056 |             return do_xpress_decompress(ino, wof, 4096);
1057 | 
1058 |         case FILE_PROVIDER_COMPRESSION_LZX:
1059 |             do_print("FIXME - FILE_PROVIDER_COMPRESSION_LZX\n");
1060 |             return EFI_INVALID_PARAMETER;
1061 | 
1062 |         case FILE_PROVIDER_COMPRESSION_XPRESS8K:
1063 |             return do_xpress_decompress(ino, wof, 8192);
1064 | 
1065 |         case FILE_PROVIDER_COMPRESSION_XPRESS16K:
1066 |             return do_xpress_decompress(ino, wof, 16384);
1067 | 
1068 |         default:
1069 |             do_print("Unrecognized WIM compression algorithm\n");
1070 |             return EFI_INVALID_PARAMETER;
1071 |     }
1072 | }
1073 | 
1074 | static EFI_STATUS read_file(inode& ino, UINTN* BufferSize, VOID* Buffer) {
1075 |     EFI_STATUS Status, Status2;
1076 |     uint64_t start, end;
1077 | 
1078 |     if (ino.position >= ino.size || *BufferSize == 0) {
1079 |         *BufferSize = 0;
1080 |         return EFI_SUCCESS;
1081 |     }
1082 | 
1083 |     if (ino.position >= ino.vdl) {
1084 |         UINTN to_read = *BufferSize;
1085 | 
1086 |         if (to_read > ino.size - ino.position)
1087 |             to_read = ino.size - ino.position;
1088 | 
1089 |         memset(Buffer, 0, to_read);
1090 | 
1091 |         *BufferSize = to_read;
1092 | 
1093 |         return EFI_SUCCESS;
1094 |     }
1095 | 
1096 |     if (!ino.data_loaded) {
1097 |         FILE_RECORD_SEGMENT_HEADER* file;
1098 |         uint8_t* wof_data = nullptr;
1099 |         size_t wof_len = 0;
1100 |         uint8_t* rp_data = nullptr;
1101 |         size_t rp_len = 0;
1102 | 
1103 |         Status = bs->AllocatePool(EfiBootServicesData, ino.vol.file_record_size, (void**)&file);
1104 |         if (EFI_ERROR(Status)) {
1105 |             do_print_error("AllocatePool", Status);
1106 |             return Status;
1107 |         }
1108 | 
1109 |         Status = read_from_mappings(ino.vol, &ino.vol.mft_mappings, ino.ino * ino.vol.file_record_size,
1110 |                                     (uint8_t*)file, ino.vol.file_record_size);
1111 |         if (EFI_ERROR(Status)) {
1112 |             do_print_error("read_from_mappings", Status);
1113 |             bs->FreePool(file);
1114 |             return Status;
1115 |         }
1116 | 
1117 |         if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
1118 |             do_print("Signature was not FILE\n");
1119 |             bs->FreePool(file);
1120 |             return EFI_INVALID_PARAMETER;
1121 |         }
1122 | 
1123 |         Status = process_fixups(&file->MultiSectorHeader, ino.vol.file_record_size,
1124 |                                 ino.vol.boot_sector->BytesPerSector);
1125 | 
1126 |         if (EFI_ERROR(Status)) {
1127 |             do_print_error("process_fixups", Status);
1128 |             bs->FreePool(file);
1129 |             return Status;
1130 |         }
1131 | 
1132 |         Status = EFI_SUCCESS;
1133 | 
1134 |         Status2 = loop_through_atts(ino.vol, ino.ino, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view data, u16string_view att_name) -> bool {
1135 |             switch (att.TypeCode) {
1136 |                 case ntfs_attribute::DATA:
1137 |                     if (att_name.empty()) {
1138 |                         switch (att.FormCode) {
1139 |                             case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM:
1140 |                                 Status = read_mappings(ino.vol, att, &ino.data_mappings);
1141 |                                 if (EFI_ERROR(Status))
1142 |                                     do_print_error("read_mappings", Status);
1143 |                                 break;
1144 | 
1145 |                             case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1146 |                                 Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&ino.data);
1147 |                                 if (EFI_ERROR(Status)) {
1148 |                                     do_print_error("AllocatePool", Status);
1149 |                                     break;
1150 |                                 }
1151 | 
1152 |                                 memcpy(ino.data, data.data(), data.size());
1153 |                                 break;
1154 |                         }
1155 |                     } else if (att_name == u"WofCompressedData") {
1156 |                         switch (att.FormCode) {
1157 |                             case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: {
1158 |                                 uint32_t cluster_size = ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster;
1159 | 
1160 |                                 wof_len = att.Form.Nonresident.FileSize;
1161 | 
1162 |                                 if (wof_len == 0)
1163 |                                     break;
1164 | 
1165 |                                 Status = bs->AllocatePool(EfiBootServicesData, sector_align(wof_len, cluster_size), (void**)&wof_data);
1166 |                                 if (EFI_ERROR(Status)) {
1167 |                                     do_print_error("AllocatePool", Status);
1168 |                                     break;
1169 |                                 }
1170 | 
1171 |                                 Status = read_nonresident_attribute(ino.vol, att, span(wof_data, sector_align(wof_len, cluster_size)));
1172 |                                 if (EFI_ERROR(Status)) {
1173 |                                     do_print_error("read_nonresident_attribute", Status);
1174 |                                     bs->FreePool(wof_data);
1175 |                                     wof_data = nullptr;
1176 |                                     break;
1177 |                                 }
1178 | 
1179 |                                 break;
1180 |                             }
1181 | 
1182 |                             case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1183 |                                 Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&wof_data);
1184 |                                 if (EFI_ERROR(Status)) {
1185 |                                     do_print_error("AllocatePool", Status);
1186 |                                     break;
1187 |                                 }
1188 | 
1189 |                                 memcpy(wof_data, data.data(), data.size());
1190 |                                 wof_len = data.size();
1191 | 
1192 |                                 break;
1193 |                         }
1194 |                     }
1195 | 
1196 |                     if (EFI_ERROR(Status))
1197 |                         return false;
1198 |                 break;
1199 | 
1200 |                 case ntfs_attribute::REPARSE_POINT:
1201 |                     if (att_name.empty()) {
1202 |                         switch (att.FormCode) {
1203 |                             case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: {
1204 |                                 uint32_t cluster_size = ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster;
1205 | 
1206 |                                 rp_len = att.Form.Nonresident.FileSize;
1207 | 
1208 |                                 if (rp_len == 0)
1209 |                                     break;
1210 | 
1211 |                                 Status = bs->AllocatePool(EfiBootServicesData, sector_align(rp_len, cluster_size), (void**)&rp_data);
1212 |                                 if (EFI_ERROR(Status)) {
1213 |                                     do_print_error("AllocatePool", Status);
1214 |                                     break;
1215 |                                 }
1216 | 
1217 |                                 Status = read_nonresident_attribute(ino.vol, att, span(rp_data, sector_align(rp_len, cluster_size)));
1218 |                                 if (EFI_ERROR(Status)) {
1219 |                                     do_print_error("read_nonresident_attribute", Status);
1220 |                                     bs->FreePool(rp_data);
1221 |                                     rp_data = nullptr;
1222 |                                     break;
1223 |                                 }
1224 | 
1225 |                                 break;
1226 |                             }
1227 | 
1228 |                             case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1229 |                                 Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&rp_data);
1230 |                                 if (EFI_ERROR(Status)) {
1231 |                                     do_print_error("AllocatePool", Status);
1232 |                                     break;
1233 |                                 }
1234 | 
1235 |                                 memcpy(rp_data, data.data(), data.size());
1236 |                                 rp_len = data.size();
1237 | 
1238 |                                 break;
1239 |                         }
1240 |                     }
1241 |                     break;
1242 | 
1243 |                 default:
1244 |                     break;
1245 |             }
1246 | 
1247 |             return true;
1248 |         });
1249 | 
1250 |         bs->FreePool(file);
1251 | 
1252 |         if (rp_data) {
1253 |             if (rp_len > sizeof(uint32_t) && *(uint32_t*)rp_data == IO_REPARSE_TAG_WOF) {
1254 |                 Status = handle_wof(ino, span(rp_data, rp_len), span(wof_data, wof_len));
1255 |                 if (EFI_ERROR(Status)) {
1256 |                     do_print_error("handle_wof", Status);
1257 |                     bs->FreePool(rp_data);
1258 | 
1259 |                     if (wof_data)
1260 |                         bs->FreePool(wof_data);
1261 | 
1262 |                     return Status;
1263 |                 }
1264 |             }
1265 | 
1266 |             bs->FreePool(rp_data);
1267 |         }
1268 | 
1269 |         if (wof_data)
1270 |             bs->FreePool(wof_data);
1271 | 
1272 |         if (EFI_ERROR(Status2)) {
1273 |             do_print_error("loop_through_atts", Status2);
1274 |             return Status2;
1275 |         }
1276 | 
1277 |         if (EFI_ERROR(Status))
1278 |             return Status;
1279 | 
1280 |         ino.data_loaded = true;
1281 |     }
1282 | 
1283 |     start = ino.position;
1284 |     end = ino.position + *BufferSize;
1285 | 
1286 |     if (end > ino.size)
1287 |         end = ino.size;
1288 | 
1289 |     if (ino.data)
1290 |         memcpy(Buffer, ino.data + start, end - start);
1291 |     else {
1292 |         uint64_t start_aligned, valid_end, end_aligned;
1293 |         uint8_t* tmp = nullptr;
1294 | 
1295 |         valid_end = end;
1296 | 
1297 |         if (valid_end > ino.vdl)
1298 |             valid_end = ino.vdl;
1299 | 
1300 |         start_aligned = start & ~(ino.vol.boot_sector->BytesPerSector - 1);
1301 |         end_aligned = sector_align(valid_end, ino.vol.boot_sector->BytesPerSector);
1302 | 
1303 |         if (start_aligned != start || end_aligned != valid_end) {
1304 |             Status = bs->AllocatePool(EfiBootServicesData, end_aligned - start_aligned, (void**)&tmp);
1305 |             if (EFI_ERROR(Status)) {
1306 |                 do_print_error("AllocatePool", Status);
1307 |                 return Status;
1308 |             }
1309 |         }
1310 | 
1311 |         // FIXME - LZNT1 compressed data
1312 | 
1313 |         Status = read_from_mappings(ino.vol, &ino.data_mappings, start_aligned,
1314 |                                     tmp ? tmp : (uint8_t*)Buffer, end_aligned - start_aligned);
1315 |         if (EFI_ERROR(Status)) {
1316 |             do_print_error("read_from_mappings", Status);
1317 | 
1318 |             if (tmp)
1319 |                 bs->FreePool(tmp);
1320 | 
1321 |             return Status;
1322 |         }
1323 | 
1324 |         if (tmp) {
1325 |             memcpy(Buffer, tmp + start - start_aligned, valid_end - start);
1326 |             bs->FreePool(tmp);
1327 |         }
1328 | 
1329 |         if (valid_end < end)
1330 |             memset((uint8_t*)Buffer + valid_end - start, 0, end - valid_end);
1331 |     }
1332 | 
1333 |     ino.position = end;
1334 |     *BufferSize = end - start;
1335 | 
1336 |     return EFI_SUCCESS;
1337 | }
1338 | 
1339 | static EFI_STATUS EFIAPI file_read(struct _EFI_FILE_HANDLE* File, UINTN* BufferSize, VOID* Buffer) {
1340 |     EFI_STATUS Status;
1341 |     inode* ino = _CR(File, inode, proto);
1342 | 
1343 |     if (!ino->inode_loaded) {
1344 |         Status = load_inode(*ino);
1345 |         if (EFI_ERROR(Status)) {
1346 |             do_print_error("load_inode", Status);
1347 |             return Status;
1348 |         }
1349 |     }
1350 | 
1351 |     if (ino->is_dir)
1352 |         return read_dir(*ino, BufferSize, Buffer);
1353 |     else
1354 |         return read_file(*ino, BufferSize, Buffer);
1355 | }
1356 | 
1357 | static EFI_STATUS EFIAPI file_write(struct _EFI_FILE_HANDLE* File, UINTN* BufferSize, VOID* Buffer) {
1358 |     UNUSED(File);
1359 |     UNUSED(BufferSize);
1360 |     UNUSED(Buffer);
1361 | 
1362 |     return EFI_UNSUPPORTED;
1363 | }
1364 | 
1365 | static EFI_STATUS EFIAPI file_set_position(struct _EFI_FILE_HANDLE* File, UINT64 Position) {
1366 |     EFI_STATUS Status;
1367 |     inode* ino = _CR(File, inode, proto);
1368 | 
1369 |     if (!ino->inode_loaded) {
1370 |         Status = load_inode(*ino);
1371 |         if (EFI_ERROR(Status)) {
1372 |             do_print_error("load_inode", Status);
1373 |             return Status;
1374 |         }
1375 |     }
1376 | 
1377 |     if (ino->is_dir) {
1378 |         if (Position != 0)
1379 |             return EFI_UNSUPPORTED;
1380 | 
1381 |         ino->position = 0;
1382 | 
1383 |         while (!IsListEmpty(&ino->levels)) {
1384 |             auto l = _CR(ino->levels.Flink, btree_level, list_entry);
1385 |             RemoveEntryList(&l->list_entry);
1386 |             bs->FreePool(l);
1387 |         }
1388 |     } else {
1389 |         if (Position == 0xffffffffffffffff)
1390 |             ino->position = ino->size;
1391 |         else
1392 |             ino->position = Position;
1393 |     }
1394 | 
1395 |     return EFI_SUCCESS;
1396 | }
1397 | 
1398 | static EFI_STATUS EFIAPI file_get_position(struct _EFI_FILE_HANDLE* File, UINT64* Position) {
1399 |     inode* ino = _CR(File, inode, proto);
1400 | 
1401 |     if (ino->is_dir)
1402 |         return EFI_UNSUPPORTED;
1403 | 
1404 |     *Position = ino->position;
1405 | 
1406 |     return EFI_SUCCESS;
1407 | }
1408 | 
1409 | static EFI_STATUS loop_through_atts(const volume& vol, uint64_t inode, const FILE_RECORD_SEGMENT_HEADER* file_record,
1410 |                                     invocable<const ATTRIBUTE_RECORD_HEADER&, string_view, u16string_view> auto func) {
1411 |     EFI_STATUS Status;
1412 |     auto att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)file_record + file_record->FirstAttributeOffset);
1413 |     size_t offset = file_record->FirstAttributeOffset;
1414 |     uint8_t* attlist = nullptr;
1415 |     size_t attlist_size;
1416 | 
1417 |     while (true) {
1418 |         if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1419 |             break;
1420 | 
1421 |         if (att->TypeCode == ntfs_attribute::ATTRIBUTE_LIST) {
1422 |             switch (att->FormCode) {
1423 |                 case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: {
1424 |                     uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster;
1425 |                     LIST_ENTRY mappings;
1426 | 
1427 |                     if (att->Form.Nonresident.FileSize == 0)
1428 |                         break;
1429 | 
1430 |                     attlist_size = att->Form.Nonresident.FileSize;
1431 | 
1432 |                     Status = bs->AllocatePool(EfiBootServicesData, sector_align(attlist_size, cluster_size), (void**)&attlist);
1433 |                     if (EFI_ERROR(Status)) {
1434 |                         do_print_error("AllocatePool", Status);
1435 |                         return Status;
1436 |                     }
1437 | 
1438 |                     InitializeListHead(&mappings);
1439 | 
1440 |                     Status = read_mappings(vol, *att, &mappings);
1441 |                     if (EFI_ERROR(Status)) {
1442 |                         bs->FreePool(attlist);
1443 |                         do_print_error("read_mappings", Status);
1444 |                         return Status;
1445 |                     }
1446 | 
1447 |                     Status = read_from_mappings(vol, &mappings, 0, attlist, sector_align(attlist_size, cluster_size));
1448 |                     if (EFI_ERROR(Status)) {
1449 |                         while (!IsListEmpty(&mappings)) {
1450 |                             mapping* m = _CR(mappings.Flink, mapping, list_entry);
1451 |                             RemoveEntryList(&m->list_entry);
1452 |                             bs->FreePool(m);
1453 |                         }
1454 | 
1455 |                         bs->FreePool(attlist);
1456 |                         do_print_error("read_from_mappings", Status);
1457 |                         return Status;
1458 |                     }
1459 | 
1460 |                     while (!IsListEmpty(&mappings)) {
1461 |                         mapping* m = _CR(mappings.Flink, mapping, list_entry);
1462 |                         RemoveEntryList(&m->list_entry);
1463 |                         bs->FreePool(m);
1464 |                     }
1465 | 
1466 |                     break;
1467 |                 }
1468 | 
1469 |                 case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1470 |                     if (att->Form.Resident.ValueLength == 0)
1471 |                         break;
1472 | 
1473 |                     attlist_size = att->Form.Resident.ValueLength;
1474 | 
1475 |                     Status = bs->AllocatePool(EfiBootServicesData, attlist_size, (void**)&attlist);
1476 |                     if (EFI_ERROR(Status)) {
1477 |                         do_print_error("AllocatePool", Status);
1478 |                         return Status;
1479 |                     }
1480 | 
1481 |                     memcpy(attlist, (uint8_t*)att + att->Form.Resident.ValueOffset, attlist_size);
1482 |                 break;
1483 |             }
1484 | 
1485 |             break;
1486 |         }
1487 | 
1488 |         offset += att->RecordLength;
1489 |         att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)att + att->RecordLength);
1490 |     }
1491 | 
1492 |     if (attlist) {
1493 |         {
1494 |             auto ent = (const attribute_list_entry*)attlist;
1495 |             size_t left = attlist_size;
1496 | 
1497 |             while (true) {
1498 |                 uint64_t file_reference = ent->file_reference.SegmentNumber;
1499 | 
1500 |                 if (file_reference == inode) { // contained elsewhere in this inode
1501 |                     att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)file_record + file_record->FirstAttributeOffset);
1502 |                     offset = file_record->FirstAttributeOffset;
1503 | 
1504 |                     while (true) {
1505 |                         if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1506 |                             break;
1507 | 
1508 |                         if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) {
1509 |                             if (att->NameLength == 0 || !memcmp((uint8_t*)file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) {
1510 |                                 string_view data;
1511 |                                 u16string_view name;
1512 | 
1513 |                                 if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
1514 |                                     data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
1515 | 
1516 |                                 if (att->NameLength != 0)
1517 |                                     name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength);
1518 | 
1519 |                                 if (!func(*att, data, name)) {
1520 |                                     bs->FreePool(attlist);
1521 |                                     return EFI_SUCCESS;
1522 |                                 }
1523 | 
1524 |                                 break;
1525 |                             }
1526 |                         }
1527 | 
1528 |                         offset += att->RecordLength;
1529 |                         att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)att + att->RecordLength);
1530 |                     }
1531 |                 }
1532 | 
1533 |                 if (left <= ent->record_length)
1534 |                     break;
1535 | 
1536 |                 left -= ent->record_length;
1537 |                 ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1538 |             }
1539 |         }
1540 | 
1541 |         while (true) {
1542 |             auto ent = (attribute_list_entry*)attlist;
1543 |             size_t left = attlist_size;
1544 |             optional<uint64_t> ref;
1545 |             FILE_RECORD_SEGMENT_HEADER* file2 = nullptr;
1546 | 
1547 |             while (true) {
1548 |                 uint64_t file_reference = ent->file_reference.SegmentNumber;
1549 | 
1550 |                 // skip entries already handled
1551 |                 if (file_reference == inode) {
1552 |                     if (left <= ent->record_length)
1553 |                         break;
1554 | 
1555 |                     left -= ent->record_length;
1556 |                     ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1557 |                     continue;
1558 |                 }
1559 | 
1560 |                 if (ref.has_value() && *ref != file_reference) {
1561 |                     if (left <= ent->record_length)
1562 |                         break;
1563 | 
1564 |                     left -= ent->record_length;
1565 |                     ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1566 |                     continue;
1567 |                 }
1568 | 
1569 |                 if (!ref.has_value()) {
1570 |                     ref = file_reference;
1571 | 
1572 |                     Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file2);
1573 |                     if (EFI_ERROR(Status)) {
1574 |                         do_print_error("AllocatePool", Status);
1575 |                         bs->FreePool(attlist);
1576 |                         return Status;
1577 |                     }
1578 | 
1579 |                     Status = read_from_mappings(vol, &vol.mft_mappings, file_reference * vol.file_record_size,
1580 |                                                 (uint8_t*)file2, vol.file_record_size);
1581 |                     if (EFI_ERROR(Status)) {
1582 |                         do_print_error("read_from_mappings", Status);
1583 |                         bs->FreePool(file2);
1584 |                         bs->FreePool(attlist);
1585 |                         return Status;
1586 |                     }
1587 | 
1588 |                     if (file2->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
1589 |                         do_print("Signature was not FILE\n");
1590 |                         bs->FreePool(file2);
1591 |                         bs->FreePool(attlist);
1592 |                         return EFI_INVALID_PARAMETER;
1593 |                     }
1594 | 
1595 |                     Status = process_fixups(&file2->MultiSectorHeader, vol.file_record_size,
1596 |                                             vol.boot_sector->BytesPerSector);
1597 | 
1598 |                     if (EFI_ERROR(Status)) {
1599 |                         do_print_error("process_fixups", Status);
1600 |                         bs->FreePool(file2);
1601 |                         bs->FreePool(attlist);
1602 |                         return Status;
1603 |                     }
1604 |                 }
1605 | 
1606 |                 att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)file2 + file2->FirstAttributeOffset);
1607 |                 offset = file2->FirstAttributeOffset;
1608 | 
1609 |                 while (true) {
1610 |                     if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1611 |                         break;
1612 | 
1613 |                     if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) {
1614 |                         if (att->NameLength == 0 || !memcmp((uint8_t*)file2 + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) {
1615 |                             string_view data;
1616 |                             u16string_view name;
1617 | 
1618 |                             if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
1619 |                                 data = string_view((const char*)file2 + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
1620 | 
1621 |                             if (att->NameLength != 0)
1622 |                                 name = u16string_view((char16_t*)((uint8_t*)file2 + offset + att->NameOffset), att->NameLength);
1623 | 
1624 |                             if (!func(*att, data, name)) {
1625 |                                 bs->FreePool(file2);
1626 |                                 bs->FreePool(attlist);
1627 |                                 return EFI_SUCCESS;
1628 |                             }
1629 | 
1630 |                             break;
1631 |                         }
1632 |                     }
1633 | 
1634 |                     offset += att->RecordLength;
1635 |                     att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)att + att->RecordLength);
1636 |                 }
1637 | 
1638 |                 // don't process this again
1639 |                 ent->file_reference.SegmentNumber = inode;
1640 | 
1641 |                 if (left <= ent->record_length)
1642 |                     break;
1643 | 
1644 |                 left -= ent->record_length;
1645 |                 ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1646 |             }
1647 | 
1648 |             if (file2)
1649 |                 bs->FreePool(file2);
1650 | 
1651 |             if (!ref.has_value()) {
1652 |                 bs->FreePool(attlist);
1653 |                 return EFI_SUCCESS;
1654 |             }
1655 |         }
1656 | 
1657 |         bs->FreePool(attlist);
1658 | 
1659 |         return EFI_SUCCESS;
1660 |     }
1661 | 
1662 |     att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)file_record + file_record->FirstAttributeOffset);
1663 |     offset = file_record->FirstAttributeOffset;
1664 | 
1665 |     while (true) {
1666 |         if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1667 |             break;
1668 | 
1669 |         string_view data;
1670 |         u16string_view name;
1671 | 
1672 |         if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
1673 |             data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
1674 | 
1675 |         if (att->NameLength != 0)
1676 |             name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength);
1677 | 
1678 |         if (!func(*att, data, name))
1679 |             return EFI_SUCCESS;
1680 | 
1681 |         offset += att->RecordLength;
1682 |         att = reinterpret_cast<const ATTRIBUTE_RECORD_HEADER*>((uint8_t*)att + att->RecordLength);
1683 |     }
1684 | 
1685 |     return EFI_SUCCESS;
1686 | }
1687 | 
1688 | static EFI_STATUS read_mappings(const volume& vol, const ATTRIBUTE_RECORD_HEADER& att, LIST_ENTRY* mappings) {
1689 |     EFI_STATUS Status;
1690 |     uint64_t next_vcn, current_lcn = 0, current_vcn;
1691 |     uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster;
1692 |     uint8_t* stream;
1693 |     uint64_t max_cluster;
1694 | 
1695 |     if (att.FormCode != NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
1696 |         do_print("Cannot read mappings for attribute that is not non-resident\n");
1697 |         return EFI_INVALID_PARAMETER;
1698 |     }
1699 | 
1700 |     if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) {
1701 |         do_print("Cannot read encrypted data\n");
1702 |         return EFI_INVALID_PARAMETER;
1703 |     }
1704 | 
1705 |     if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK) {
1706 |         do_print("Compression not yet supported\n");
1707 |         return EFI_INVALID_PARAMETER;
1708 |     }
1709 | 
1710 |     next_vcn = att.Form.Nonresident.LowestVcn;
1711 |     stream = (uint8_t*)&att + att.Form.Nonresident.MappingPairsOffset;
1712 | 
1713 |     max_cluster = att.Form.Nonresident.ValidDataLength / cluster_size;
1714 | 
1715 |     if (att.Form.Nonresident.ValidDataLength & (cluster_size - 1))
1716 |         max_cluster++;
1717 | 
1718 |     if (max_cluster == 0)
1719 |         return EFI_SUCCESS;
1720 | 
1721 |     while (true) {
1722 |         uint64_t v, l;
1723 |         int64_t v_val, l_val;
1724 |         mapping* m;
1725 | 
1726 |         current_vcn = next_vcn;
1727 | 
1728 |         if (*stream == 0)
1729 |             break;
1730 | 
1731 |         v = *stream & 0xf;
1732 |         l = *stream >> 4;
1733 | 
1734 |         stream++;
1735 | 
1736 |         if (v > 8)
1737 |             return EFI_INVALID_PARAMETER;
1738 | 
1739 |         if (l > 8)
1740 |             return EFI_INVALID_PARAMETER;
1741 | 
1742 |         // FIXME - do we need to make sure that int64_t pointers don't go past end of buffer?
1743 | 
1744 |         v_val = *(int64_t*)stream;
1745 |         v_val &= (1ull << (v * 8)) - 1;
1746 | 
1747 |         if ((uint64_t)v_val & (1ull << ((v * 8) - 1))) // sign-extend if negative
1748 |             v_val |= 0xffffffffffffffff & ~((1ull << (v * 8)) - 1);
1749 | 
1750 |         stream += v;
1751 | 
1752 |         next_vcn += v_val;
1753 | 
1754 |         Status = bs->AllocatePool(EfiBootServicesData, sizeof(mapping), (void**)&m);
1755 |         if (EFI_ERROR(Status)) {
1756 |             do_print_error("AllocatePool", Status);
1757 |             return Status;
1758 |         }
1759 | 
1760 |         if (l != 0) {
1761 |             l_val = *(int64_t*)stream;
1762 |             l_val &= (1ull << (l * 8)) - 1;
1763 | 
1764 |             if ((uint64_t)l_val & (1ull << ((l * 8) - 1))) // sign-extend if negative
1765 |                 l_val |= 0xffffffffffffffff & ~((1ull << (l * 8)) - 1);
1766 | 
1767 |             stream += l;
1768 | 
1769 |             current_lcn += l_val;
1770 | 
1771 |             if (next_vcn > max_cluster)
1772 |                 next_vcn = max_cluster;
1773 | 
1774 |             m->lcn = current_lcn;
1775 |         } else
1776 |             m->lcn = 0;
1777 | 
1778 |         m->vcn = current_vcn;
1779 |         m->length = next_vcn - current_vcn;
1780 | 
1781 |         InsertTailList(mappings, &m->list_entry);
1782 | 
1783 |         if (next_vcn == max_cluster)
1784 |             break;
1785 |     }
1786 | 
1787 |     return EFI_SUCCESS;
1788 | }
1789 | 
1790 | static EFI_STATUS load_inode(inode& ino) {
1791 |     EFI_STATUS Status, Status2;
1792 |     FILE_RECORD_SEGMENT_HEADER* file;
1793 | 
1794 |     InitializeListHead(&ino.index_mappings);
1795 |     InitializeListHead(&ino.levels);
1796 |     InitializeListHead(&ino.data_mappings);
1797 | 
1798 |     Status = bs->AllocatePool(EfiBootServicesData, ino.vol.file_record_size, (void**)&file);
1799 |     if (EFI_ERROR(Status)) {
1800 |         do_print_error("AllocatePool", Status);
1801 |         return Status;
1802 |     }
1803 | 
1804 |     Status = read_from_mappings(ino.vol, &ino.vol.mft_mappings, ino.ino * ino.vol.file_record_size,
1805 |                                 (uint8_t*)file, ino.vol.file_record_size);
1806 |     if (EFI_ERROR(Status)) {
1807 |         bs->FreePool(file);
1808 |         do_print_error("read_from_mappings", Status);
1809 |         return Status;
1810 |     }
1811 | 
1812 |     if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
1813 |         do_print("Signature was not FILE\n");
1814 |         bs->FreePool(file);
1815 |         return EFI_INVALID_PARAMETER;
1816 |     }
1817 | 
1818 |     Status = process_fixups(&file->MultiSectorHeader, ino.vol.file_record_size,
1819 |                             ino.vol.boot_sector->BytesPerSector);
1820 | 
1821 |     if (EFI_ERROR(Status)) {
1822 |         bs->FreePool(file);
1823 |         do_print_error("process_fixups", Status);
1824 |         return Status;
1825 |     }
1826 | 
1827 |     memset(&ino.standard_info, 0, sizeof(STANDARD_INFORMATION));
1828 | 
1829 |     Status = EFI_SUCCESS;
1830 | 
1831 |     Status2 = loop_through_atts(ino.vol, ino.ino, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool {
1832 |         switch (att.TypeCode) {
1833 |             case ntfs_attribute::STANDARD_INFORMATION:
1834 |                 if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) {
1835 |                     size_t to_copy = res_data.size();
1836 | 
1837 |                     if (to_copy > sizeof(STANDARD_INFORMATION))
1838 |                         to_copy = sizeof(STANDARD_INFORMATION);
1839 | 
1840 |                     memcpy(&ino.standard_info, res_data.data(), to_copy);
1841 |                 }
1842 |             break;
1843 | 
1844 |             case ntfs_attribute::FILE_NAME:
1845 |                 if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) {
1846 |                     const auto& fn = *(FILE_NAME*)res_data.data();
1847 | 
1848 |                     if (res_data.size() >= offsetof(FILE_NAME, EaSize))
1849 |                         ino.is_dir = fn.FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT;
1850 |                 }
1851 | 
1852 |             break;
1853 | 
1854 |             case ntfs_attribute::INDEX_ALLOCATION:
1855 |                 if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
1856 |                     ino.size = att.Form.Nonresident.FileSize;
1857 |                     ino.phys_size = att.Form.Nonresident.AllocatedLength;
1858 | 
1859 |                     Status = read_mappings(ino.vol, att, &ino.index_mappings);
1860 |                     if (EFI_ERROR(Status)) {
1861 |                         do_print_error("read_mappings", Status);
1862 |                         return false;
1863 |                     }
1864 |                 }
1865 |             break;
1866 | 
1867 |             case ntfs_attribute::INDEX_ROOT:
1868 |                 if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !res_data.empty() && !ino.index_root) {
1869 |                     Status = bs->AllocatePool(EfiBootServicesData, res_data.size(), (void**)&ino.index_root);
1870 |                     if (EFI_ERROR(Status)) {
1871 |                         do_print_error("AllocatePool", Status);
1872 |                         return false;
1873 |                     }
1874 | 
1875 |                     memcpy(ino.index_root, res_data.data(), res_data.size());
1876 |                 }
1877 |             break;
1878 | 
1879 |             case ntfs_attribute::DATA:
1880 |                 if (att_name.empty()) {
1881 |                     switch (att.FormCode) {
1882 |                         case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM:
1883 |                             ino.size = att.Form.Nonresident.FileSize;
1884 |                             ino.phys_size = att.Form.Nonresident.AllocatedLength;
1885 |                             ino.vdl = att.Form.Nonresident.ValidDataLength;
1886 |                         break;
1887 | 
1888 |                         case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1889 |                             ino.size = ino.phys_size = ino.vdl = att.Form.Resident.ValueLength;
1890 |                         break;
1891 |                     }
1892 |                 }
1893 |             break;
1894 | 
1895 |             default:
1896 |                 break;
1897 |         }
1898 | 
1899 |         return true;
1900 |     });
1901 | 
1902 |     if (EFI_ERROR(Status2)) {
1903 |         do_print_error("loop_through_atts", Status2);
1904 |         Status = Status2;
1905 |     }
1906 | 
1907 |     if (EFI_ERROR(Status)) {
1908 |         if (ino.index_root) {
1909 |             bs->FreePool(ino.index_root);
1910 |             ino.index_root = nullptr;
1911 |         }
1912 | 
1913 |         bs->FreePool(file);
1914 |         return Status;
1915 |     }
1916 | 
1917 |     ino.inode_loaded = true;
1918 | 
1919 |     bs->FreePool(file);
1920 | 
1921 |     return EFI_SUCCESS;
1922 | }
1923 | 
1924 | static EFI_STATUS get_inode_file_info(inode& ino, UINTN* BufferSize, VOID* Buffer) {
1925 |     EFI_STATUS Status;
1926 |     unsigned int size = offsetof(EFI_FILE_INFO, FileName[0]) + sizeof(CHAR16);
1927 |     EFI_FILE_INFO* info = (EFI_FILE_INFO*)Buffer;
1928 |     u16string_view name;
1929 | 
1930 |     if (ino.name) {
1931 |         name = u16string_view(ino.name, ino.name_len);
1932 | 
1933 |         if (auto bs = name.rfind(u'\\'); bs != u16string_view::npos)
1934 |             name = u16string_view(name.data() + bs + 1, name.size() - bs - 1);
1935 | 
1936 |         size += name.size() * sizeof(char16_t);
1937 |     }
1938 | 
1939 |     if (*BufferSize < size) {
1940 |         *BufferSize = size;
1941 |         return EFI_BUFFER_TOO_SMALL;
1942 |     }
1943 | 
1944 |     if (!ino.inode_loaded) {
1945 |         Status = load_inode(ino);
1946 |         if (EFI_ERROR(Status)) {
1947 |             do_print_error("load_inode", Status);
1948 |             return Status;
1949 |         }
1950 |     }
1951 | 
1952 |     info->Size = size;
1953 |     info->FileSize = ino.size;
1954 |     info->PhysicalSize = ino.phys_size;
1955 |     win_time_to_efi(ino.standard_info.CreationTime, &info->CreateTime);
1956 |     win_time_to_efi(ino.standard_info.LastAccessTime, &info->LastAccessTime);
1957 |     win_time_to_efi(ino.standard_info.LastWriteTime, &info->ModificationTime);
1958 |     info->Attribute = win_attributes_to_efi(ino.standard_info.FileAttributes, ino.is_dir);
1959 | 
1960 |     if (!name.empty()) {
1961 |         memcpy(info->FileName, name.data(), name.size() * sizeof(char16_t));
1962 |         info->FileName[name.size()] = 0;
1963 |     } else
1964 |         info->FileName[0] = 0;
1965 | 
1966 |     return EFI_SUCCESS;
1967 | }
1968 | 
1969 | static EFI_STATUS EFIAPI file_get_info(struct _EFI_FILE_HANDLE* File, EFI_GUID* InformationType, UINTN* BufferSize, VOID* Buffer) {
1970 |     inode* ino = _CR(File, inode, proto);
1971 |     EFI_GUID guid = EFI_FILE_INFO_ID;
1972 | 
1973 |     // FIXME - EFI_FILE_SYSTEM_INFO
1974 | 
1975 |     if (memcmp(InformationType, &guid, sizeof(EFI_GUID)))
1976 |         return EFI_UNSUPPORTED;
1977 | 
1978 |     return get_inode_file_info(*ino, BufferSize, Buffer);
1979 | }
1980 | 
1981 | static EFI_STATUS EFIAPI file_set_info(struct _EFI_FILE_HANDLE* File, EFI_GUID* InformationType, UINTN BufferSize, VOID* Buffer) {
1982 |     UNUSED(File);
1983 |     UNUSED(InformationType);
1984 |     UNUSED(BufferSize);
1985 |     UNUSED(Buffer);
1986 | 
1987 |     return EFI_UNSUPPORTED;
1988 | }
1989 | 
1990 | static EFI_STATUS file_flush(struct _EFI_FILE_HANDLE* File) {
1991 |     UNUSED(File);
1992 | 
1993 |     // nop
1994 | 
1995 |     return EFI_SUCCESS;
1996 | }
1997 | 
1998 | static void populate_file_handle(EFI_FILE_PROTOCOL* h) {
1999 |     h->Revision = EFI_FILE_PROTOCOL_REVISION;
2000 |     h->Open = file_open;
2001 |     h->Close = file_close;
2002 |     h->Delete = file_delete;
2003 |     h->Read = file_read;
2004 |     h->Write = file_write;
2005 |     h->GetPosition = file_get_position;
2006 |     h->SetPosition = file_set_position;
2007 |     h->GetInfo = file_get_info;
2008 |     h->SetInfo = file_set_info;
2009 |     h->Flush = file_flush;
2010 | }
2011 | 
2012 | static EFI_STATUS EFIAPI open_volume(EFI_SIMPLE_FILE_SYSTEM_PROTOCOL* This, EFI_FILE_PROTOCOL** Root) {
2013 |     EFI_STATUS Status;
2014 |     volume* vol = _CR(This, volume, proto);
2015 |     inode* ino;
2016 | 
2017 |     Status = bs->AllocatePool(EfiBootServicesData, sizeof(inode), (void**)&ino);
2018 |     if (EFI_ERROR(Status)) {
2019 |         do_print_error("AllocatePool", Status);
2020 |         return Status;
2021 |     }
2022 | 
2023 |     memset(ino, 0, sizeof(inode));
2024 | 
2025 |     new (ino) inode(*vol);
2026 | 
2027 |     populate_file_handle(&ino->proto);
2028 | 
2029 |     ino->ino = NTFS_ROOT_DIR_INODE;
2030 | 
2031 |     *Root = &ino->proto;
2032 | 
2033 |     return EFI_SUCCESS;
2034 | }
2035 | 
2036 | static EFI_STATUS read_mft(volume& vol) {
2037 |     EFI_STATUS Status, Status2;
2038 |     FILE_RECORD_SEGMENT_HEADER* mft;
2039 | 
2040 |     Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&mft);
2041 |     if (EFI_ERROR(Status)) {
2042 |         do_print_error("AllocatePool", Status);
2043 |         return Status;
2044 |     }
2045 | 
2046 |     Status = vol.block->ReadBlocks(vol.block, vol.block->Media->MediaId,
2047 |                                     (vol.boot_sector->MFT * vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster) / vol.block->Media->BlockSize,
2048 |                                     vol.file_record_size, mft);
2049 |     if (EFI_ERROR(Status)) {
2050 |         bs->FreePool(mft);
2051 |         do_print_error("ReadBlocks", Status);
2052 |         return Status;
2053 |     }
2054 | 
2055 |     if (mft->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
2056 |         do_print("Signature was not FILE\n");
2057 |         bs->FreePool(mft);
2058 |         return EFI_INVALID_PARAMETER;
2059 |     }
2060 | 
2061 |     Status = process_fixups(&mft->MultiSectorHeader, vol.file_record_size,
2062 |                             vol.boot_sector->BytesPerSector);
2063 |     if (EFI_ERROR(Status)) {
2064 |         bs->FreePool(mft);
2065 |         do_print_error("process_fixups", Status);
2066 |         return Status;
2067 |     }
2068 | 
2069 |     // read DATA mappings
2070 | 
2071 |     Status = EFI_INVALID_PARAMETER;
2072 | 
2073 |     Status2 = loop_through_atts(vol, NTFS_MFT_INODE, mft, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool {
2074 |         if (att.TypeCode == ntfs_attribute::DATA && att_name.empty()) {
2075 |             Status = read_mappings(vol, att, &vol.mft_mappings);
2076 |             if (EFI_ERROR(Status))
2077 |                 do_print_error("read_mappings", Status);
2078 | 
2079 |             return false;
2080 |         }
2081 | 
2082 |         return true;
2083 |     });
2084 | 
2085 |     bs->FreePool(mft);
2086 | 
2087 |     if (EFI_ERROR(Status2)) {
2088 |         do_print_error("loop_through_atts", Status2);
2089 |         return Status2;
2090 |     }
2091 | 
2092 |     return Status;
2093 | }
2094 | 
2095 | volume::~volume() {
2096 |     while (!IsListEmpty(&mft_mappings)) {
2097 |         mapping* m = _CR(mft_mappings.Flink, mapping, list_entry);
2098 |         RemoveEntryList(&m->list_entry);
2099 |         bs->FreePool(m);
2100 |     }
2101 | 
2102 |     bs->FreePool(boot_sector);
2103 | }
2104 | 
2105 | static EFI_STATUS read_upcase(volume& vol) {
2106 |     EFI_STATUS Status, Status2;
2107 |     FILE_RECORD_SEGMENT_HEADER* file;
2108 |     LIST_ENTRY mappings;
2109 |     uint64_t size;
2110 | 
2111 |     InitializeListHead(&mappings);
2112 | 
2113 |     Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file);
2114 |     if (EFI_ERROR(Status)) {
2115 |         do_print_error("AllocatePool", Status);
2116 |         return Status;
2117 |     }
2118 | 
2119 |     Status = read_from_mappings(vol, &vol.mft_mappings, NTFS_UPCASE_INODE * vol.file_record_size,
2120 |                                 (uint8_t*)file, vol.file_record_size);
2121 |     if (EFI_ERROR(Status)) {
2122 |         bs->FreePool(file);
2123 |         do_print_error("read_from_mappings", Status);
2124 |         return Status;
2125 |     }
2126 | 
2127 |     if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
2128 |         do_print("Signature was not FILE\n");
2129 |         bs->FreePool(file);
2130 |         return EFI_INVALID_PARAMETER;
2131 |     }
2132 | 
2133 |     Status = process_fixups(&file->MultiSectorHeader, vol.file_record_size,
2134 |                             vol.boot_sector->BytesPerSector);
2135 | 
2136 |     if (EFI_ERROR(Status)) {
2137 |         bs->FreePool(file);
2138 |         do_print_error("process_fixups", Status);
2139 |         return Status;
2140 |     }
2141 | 
2142 |     Status2 = loop_through_atts(vol, NTFS_UPCASE_INODE, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool {
2143 |         switch (att.TypeCode) {
2144 |             case ntfs_attribute::DATA:
2145 |                 // assuming that $UpCase DATA can never be resident
2146 |                 if (att_name.empty() && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
2147 |                     size = att.Form.Nonresident.AllocatedLength;
2148 |                     Status = read_mappings(vol, att, &mappings);
2149 | 
2150 |                     if (EFI_ERROR(Status))
2151 |                         do_print_error("read_mappings", Status);
2152 | 
2153 |                     return false;
2154 |                 }
2155 |             break;
2156 | 
2157 |             default:
2158 |                 break;
2159 |         }
2160 | 
2161 |         return true;
2162 |     });
2163 | 
2164 |     if (EFI_ERROR(Status2)) {
2165 |         do_print_error("loop_through_atts", Status2);
2166 |         return Status2;
2167 |     }
2168 | 
2169 |     if (EFI_ERROR(Status))
2170 |         return Status;
2171 | 
2172 |     Status = read_from_mappings(vol, &mappings, 0, (uint8_t*)vol.upcase, min(size, (uint64_t)sizeof(vol.upcase)));
2173 | 
2174 |     if (EFI_ERROR(Status))
2175 |         do_print_error("read_from_mappings", Status);
2176 | 
2177 |     while (!IsListEmpty(&mappings)) {
2178 |         mapping* m = _CR(mappings.Flink, mapping, list_entry);
2179 |         RemoveEntryList(&m->list_entry);
2180 |         bs->FreePool(m);
2181 |     }
2182 | 
2183 |     return Status;
2184 | }
2185 | 
2186 | static EFI_STATUS EFIAPI get_arc_name(EFI_QUIBBLE_PROTOCOL* This, char* ArcName, UINTN* ArcNameLen) {
2187 |     UNUSED(This);
2188 |     UNUSED(ArcName);
2189 |     UNUSED(ArcNameLen);
2190 | 
2191 |     return EFI_UNSUPPORTED;
2192 | }
2193 | 
2194 | static EFI_STATUS get_driver_name(EFI_QUIBBLE_PROTOCOL* This, CHAR16* DriverName, UINTN* DriverNameLen) {
2195 |     static const char16_t name[] = u"ntfs";
2196 | 
2197 |     UNUSED(This);
2198 | 
2199 |     if (*DriverNameLen < sizeof(name)) {
2200 |         *DriverNameLen = sizeof(name);
2201 |         return EFI_BUFFER_TOO_SMALL;
2202 |     }
2203 | 
2204 |     *DriverNameLen = sizeof(name);
2205 | 
2206 |     memcpy(DriverName, name, sizeof(name));
2207 | 
2208 |     return EFI_SUCCESS;
2209 | }
2210 | 
2211 | static EFI_STATUS EFIAPI drv_start(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle,
2212 |                                    EFI_DEVICE_PATH_PROTOCOL* RemainingDevicePath) {
2213 |     EFI_STATUS Status;
2214 |     EFI_GUID disk_guid = EFI_DISK_IO_PROTOCOL_GUID;
2215 |     EFI_GUID block_guid = EFI_BLOCK_IO_PROTOCOL_GUID;
2216 |     EFI_GUID fs_guid = EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID;
2217 |     EFI_GUID quibble_guid = EFI_QUIBBLE_PROTOCOL_GUID;
2218 |     EFI_BLOCK_IO_PROTOCOL* block;
2219 |     uint32_t sblen;
2220 |     NTFS_BOOT_SECTOR* sb;
2221 |     EFI_DISK_IO_PROTOCOL* disk_io;
2222 |     volume* vol;
2223 | 
2224 |     UNUSED(RemainingDevicePath);
2225 | 
2226 |     Status = bs->OpenProtocol(ControllerHandle, &block_guid, (void**)&block, This->DriverBindingHandle,
2227 |                               ControllerHandle, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
2228 |     if (EFI_ERROR(Status))
2229 |         return Status;
2230 | 
2231 |     if (block->Media->BlockSize == 0) {
2232 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2233 |         return EFI_UNSUPPORTED;
2234 |     }
2235 | 
2236 |     Status = bs->OpenProtocol(ControllerHandle, &disk_guid, (void**)&disk_io, This->DriverBindingHandle,
2237 |                               ControllerHandle, EFI_OPEN_PROTOCOL_BY_DRIVER);
2238 |     if (EFI_ERROR(Status)) {
2239 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2240 |         return Status;
2241 |     }
2242 | 
2243 |     // FIXME - FAT driver also claims DISK_IO 2 protocol - do we need to?
2244 | 
2245 |     sblen = sector_align(sizeof(NTFS_BOOT_SECTOR), block->Media->BlockSize);
2246 | 
2247 |     Status = bs->AllocatePool(EfiBootServicesData, sblen, (void**)&sb);
2248 |     if (EFI_ERROR(Status)) {
2249 |         do_print_error("AllocatePool", Status);
2250 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2251 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2252 |         return Status;
2253 |     }
2254 | 
2255 |     // read superblock
2256 | 
2257 |     Status = block->ReadBlocks(block, block->Media->MediaId, 0, sblen, sb);
2258 |     if (EFI_ERROR(Status)) {
2259 |         bs->FreePool(sb);
2260 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2261 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2262 |         return Status;
2263 |     }
2264 | 
2265 |     if (memcmp(sb->FsName, NTFS_FS_NAME, sizeof(NTFS_FS_NAME) - 1)) { // not NTFS
2266 |         bs->FreePool(sb);
2267 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2268 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2269 |         return EFI_UNSUPPORTED;
2270 |     }
2271 | 
2272 |     Status = bs->AllocatePool(EfiBootServicesData, sizeof(volume), (void**)&vol);
2273 |     if (EFI_ERROR(Status)) {
2274 |         do_print_error("AllocatePool", Status);
2275 |         bs->FreePool(sb);
2276 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2277 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2278 |         return Status;
2279 |     }
2280 | 
2281 |     memset(vol, 0, sizeof(volume));
2282 | 
2283 |     vol->proto.Revision = EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_REVISION;
2284 |     vol->proto.OpenVolume = open_volume;
2285 |     vol->boot_sector = sb;
2286 |     vol->controller = ControllerHandle;
2287 |     vol->block = block;
2288 |     vol->disk_io = disk_io;
2289 | 
2290 |     if (sb->ClustersPerMFTRecord < 0)
2291 |         vol->file_record_size = 1ull << -sb->ClustersPerMFTRecord;
2292 |     else
2293 |         vol->file_record_size = (uint64_t)sb->BytesPerSector * (uint64_t)sb->SectorsPerCluster * (uint64_t)sb->ClustersPerMFTRecord;
2294 | 
2295 |     InitializeListHead(&vol->mft_mappings);
2296 | 
2297 |     Status = read_mft(*vol);
2298 |     if (EFI_ERROR(Status)) {
2299 |         do_print_error("read_mft", Status);
2300 |         vol->volume::~volume();
2301 |         bs->FreePool(vol);
2302 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2303 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2304 |         return Status;
2305 |     }
2306 | 
2307 |     Status = read_upcase(*vol);
2308 |     if (EFI_ERROR(Status)) {
2309 |         do_print_error("read_upcase", Status);
2310 |         vol->volume::~volume();
2311 |         bs->FreePool(vol);
2312 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2313 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2314 |         return Status;
2315 |     }
2316 | 
2317 |     vol->quibble_proto.GetArcName = get_arc_name;
2318 |     vol->quibble_proto.GetWindowsDriverName = get_driver_name;
2319 | 
2320 |     Status = bs->InstallMultipleProtocolInterfaces(&ControllerHandle, &fs_guid, &vol->proto,
2321 |                                                    &quibble_guid, &vol->quibble_proto, nullptr);
2322 |     if (EFI_ERROR(Status)) {
2323 |         do_print_error("InstallMultipleProtocolInterfaces", Status);
2324 |         vol->volume::~volume();
2325 |         bs->FreePool(vol);
2326 |         bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2327 |         bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2328 |         return Status;
2329 |     }
2330 | 
2331 |     return EFI_SUCCESS;
2332 | }
2333 | 
2334 | static EFI_STATUS EFIAPI drv_stop(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle,
2335 |                                   UINTN NumberOfChildren, EFI_HANDLE* ChildHandleBuffer) {
2336 |     UNUSED(This);
2337 |     UNUSED(ControllerHandle);
2338 |     UNUSED(NumberOfChildren);
2339 |     UNUSED(ChildHandleBuffer);
2340 | 
2341 |     // FIXME - make this work(?)
2342 | 
2343 |     return EFI_INVALID_PARAMETER;
2344 | }
2345 | 
2346 | static void get_info_protocol(EFI_HANDLE image_handle) {
2347 |     EFI_GUID guid = EFI_QUIBBLE_INFO_PROTOCOL_GUID;
2348 |     EFI_HANDLE* handles = NULL;
2349 |     UINTN count;
2350 |     EFI_STATUS Status;
2351 | 
2352 |     Status = bs->LocateHandleBuffer(ByProtocol, &guid, NULL, &count, &handles);
2353 |     if (EFI_ERROR(Status))
2354 |         return;
2355 | 
2356 |     if (count == 0) {
2357 |         bs->FreePool(handles);
2358 |         return;
2359 |     }
2360 | 
2361 |     for (unsigned int i = 0; i < count; i++) {
2362 |         Status = bs->OpenProtocol(handles[i], &guid, (void**)&info_proto, image_handle, NULL,
2363 |                                   EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL);
2364 |         if (EFI_ERROR(Status))
2365 |             continue;
2366 | 
2367 |         break;
2368 |     }
2369 | 
2370 |     bs->FreePool(handles);
2371 | }
2372 | 
2373 | extern "C"
2374 | EFI_STATUS EFIAPI efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE* SystemTable) {
2375 |     EFI_STATUS Status;
2376 |     EFI_GUID guid = EFI_DRIVER_BINDING_PROTOCOL_GUID;
2377 | 
2378 |     systable = SystemTable;
2379 |     bs = SystemTable->BootServices;
2380 | 
2381 |     get_info_protocol(ImageHandle);
2382 | 
2383 |     drvbind.Supported = drv_supported;
2384 |     drvbind.Start = drv_start;
2385 |     drvbind.Stop = drv_stop;
2386 |     drvbind.Version = 0x10;
2387 |     drvbind.ImageHandle = ImageHandle;
2388 |     drvbind.DriverBindingHandle = ImageHandle;
2389 | 
2390 |     Status = bs->InstallProtocolInterface(&drvbind.DriverBindingHandle, &guid,
2391 |                                           EFI_NATIVE_INTERFACE, &drvbind);
2392 |     if (EFI_ERROR(Status)) {
2393 |         do_print_error("InstallProtocolInterface", Status);
2394 |         return Status;
2395 |     }
2396 | 
2397 |     return EFI_SUCCESS;
2398 | }
2399 | 


--------------------------------------------------------------------------------
/src/ntfs.h:
--------------------------------------------------------------------------------
  1 | /* Copyright (c) Mark Harmstone 2023
  2 |  *
  3 |  * This file is part of ntfs-efi.
  4 |  *
  5 |  * ntfs-efi is free software: you can redistribute it and/or modify
  6 |  * it under the terms of the GNU General Public Licence as published by
  7 |  * the Free Software Foundation, either version 2 of the Licence, or
  8 |  * (at your option) any later version.
  9 |  *
 10 |  * ntfs-efi is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |  * GNU General Public Licence for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU General Public Licence
 16 |  * along with ntfs-efi.  If not, see <http://www.gnu.org/licenses/>. */
 17 | 
 18 | #pragma once
 19 | 
 20 | enum class ntfs_attribute : uint32_t {
 21 |     STANDARD_INFORMATION = 0x10,
 22 |     ATTRIBUTE_LIST = 0x20,
 23 |     FILE_NAME = 0x30,
 24 |     VOLUME_VERSION = 0x40,
 25 |     SECURITY_DESCRIPTOR = 0x50,
 26 |     VOLUME_NAME = 0x60,
 27 |     VOLUME_INFORMATION = 0x70,
 28 |     DATA = 0x80,
 29 |     INDEX_ROOT = 0x90,
 30 |     INDEX_ALLOCATION = 0xA0,
 31 |     BITMAP = 0xB0,
 32 |     REPARSE_POINT = 0xC0,
 33 |     EA_INFORMATION = 0xD0,
 34 |     EA = 0xE0,
 35 |     PROPERTY_SET = 0xF0,
 36 |     LOGGED_UTILITY_STREAM = 0x100,
 37 | };
 38 | 
 39 | enum class NTFS_ATTRIBUTE_FORM : uint8_t {
 40 |     RESIDENT_FORM = 0,
 41 |     NONRESIDENT_FORM = 1
 42 | };
 43 | 
 44 | #pragma pack(push,1)
 45 | 
 46 | struct NTFS_BOOT_SECTOR {
 47 |     uint8_t Jmp[3];
 48 |     uint8_t FsName[8];
 49 |     uint16_t BytesPerSector;
 50 |     uint8_t SectorsPerCluster;
 51 |     uint16_t ReservedSectors;
 52 |     uint8_t Unused1[5];
 53 |     uint8_t Media;
 54 |     uint8_t Unused2[2];
 55 |     uint16_t SectorsPerTrack;
 56 |     uint16_t Heads;
 57 |     uint32_t HiddenSectors;
 58 |     uint32_t Unused3;
 59 |     uint32_t Unknown;
 60 |     uint64_t TotalSectors;
 61 |     uint64_t MFT;
 62 |     uint64_t MFTMirr;
 63 |     int8_t ClustersPerMFTRecord;
 64 |     uint8_t Padding1[3];
 65 |     int8_t ClustersPerIndexRecord;
 66 |     uint8_t Padding2[3];
 67 |     uint64_t SerialNumber;
 68 |     uint32_t Checksum;
 69 | };
 70 | 
 71 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header
 72 | struct MULTI_SECTOR_HEADER {
 73 |     uint32_t Signature;
 74 |     uint16_t UpdateSequenceArrayOffset;
 75 |     uint16_t UpdateSequenceArraySize;
 76 | };
 77 | 
 78 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference
 79 | struct MFT_SEGMENT_REFERENCE {
 80 |     uint64_t SegmentNumber : 48;
 81 |     uint64_t SequenceNumber : 16;
 82 | };
 83 | 
 84 | // based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and
 85 | // http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html
 86 | struct FILE_RECORD_SEGMENT_HEADER {
 87 |     MULTI_SECTOR_HEADER MultiSectorHeader;
 88 |     uint64_t LogFileSequenceNumber;
 89 |     uint16_t SequenceNumber;
 90 |     uint16_t HardLinkCount;
 91 |     uint16_t FirstAttributeOffset;
 92 |     uint16_t Flags;
 93 |     uint32_t EntryUsedSize;
 94 |     uint32_t EntryAllocatedSize;
 95 |     MFT_SEGMENT_REFERENCE BaseFileRecordSegment;
 96 |     uint16_t NextAttributeID;
 97 | };
 98 | 
 99 | struct ATTRIBUTE_RECORD_HEADER {
100 |     enum ntfs_attribute TypeCode;
101 |     uint16_t RecordLength;
102 |     uint16_t Unknown;
103 |     enum NTFS_ATTRIBUTE_FORM FormCode;
104 |     uint8_t NameLength;
105 |     uint16_t NameOffset;
106 |     uint16_t Flags;
107 |     uint16_t Instance;
108 |     union {
109 |         struct {
110 |             uint32_t ValueLength;
111 |             uint16_t ValueOffset;
112 |             uint8_t Reserved[2];
113 |         } Resident;
114 |         struct {
115 |             uint64_t LowestVcn;
116 |             uint64_t HighestVcn;
117 |             uint16_t MappingPairsOffset;
118 |             uint16_t CompressionUnit;
119 |             uint32_t Padding;
120 |             uint64_t AllocatedLength;
121 |             uint64_t FileSize;
122 |             uint64_t ValidDataLength;
123 |             uint64_t TotalAllocated;
124 |         } Nonresident;
125 |     } Form;
126 | };
127 | 
128 | // https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html
129 | 
130 | struct STANDARD_INFORMATION {
131 |     int64_t CreationTime;
132 |     int64_t LastAccessTime;
133 |     int64_t LastWriteTime;
134 |     int64_t ChangeTime;
135 |     uint32_t FileAttributes;
136 |     uint32_t MaximumVersions;
137 |     uint32_t VersionNumber;
138 |     uint32_t ClassId;
139 |     uint32_t OwnerId;
140 |     uint32_t SecurityId;
141 |     uint64_t QuotaCharged;
142 |     uint64_t USN;
143 | };
144 | 
145 | // https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html
146 | 
147 | struct index_node_header {
148 |     uint32_t first_entry;
149 |     uint32_t total_size;
150 |     uint32_t allocated_size;
151 |     uint32_t flags;
152 | };
153 | 
154 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html
155 | 
156 | #define INDEX_ENTRY_SUBNODE     1
157 | #define INDEX_ENTRY_LAST        2
158 | 
159 | struct index_entry {
160 |     MFT_SEGMENT_REFERENCE file_reference;
161 |     uint16_t entry_length;
162 |     uint16_t stream_length;
163 |     uint32_t flags;
164 | };
165 | 
166 | // https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html
167 | 
168 | struct index_root {
169 |     enum ntfs_attribute attribute_type;
170 |     uint32_t collation_rule;
171 |     uint32_t bytes_per_index_record;
172 |     uint8_t clusters_per_index_record;
173 |     uint8_t padding[3];
174 |     index_node_header node_header;
175 |     index_entry entries[1];
176 | };
177 | 
178 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html
179 | 
180 | struct index_record {
181 |     MULTI_SECTOR_HEADER MultiSectorHeader;
182 |     uint64_t sequence_number;
183 |     uint64_t vcn;
184 |     index_node_header header;
185 |     uint16_t update_sequence;
186 | };
187 | 
188 | #define INDEX_RECORD_MAGIC 0x58444e49 // "INDX"
189 | 
190 | // https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html
191 | 
192 | enum class file_name_type : uint8_t {
193 |     POSIX = 0,
194 |     WINDOWS = 1,
195 |     DOS = 2,
196 |     WINDOWS_AND_DOS = 3
197 | };
198 | 
199 | struct FILE_NAME {
200 |     MFT_SEGMENT_REFERENCE Parent;
201 |     int64_t CreationTime;
202 |     int64_t LastAccessTime;
203 |     int64_t LastWriteTime;
204 |     int64_t ChangeTime;
205 |     uint64_t AllocationSize;
206 |     uint64_t EndOfFile;
207 |     uint32_t FileAttributes;
208 |     uint32_t EaSize;
209 |     uint8_t FileNameLength;
210 |     file_name_type Namespace;
211 |     char16_t FileName[1];
212 | };
213 | 
214 | // https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html
215 | 
216 | struct attribute_list_entry {
217 |     enum ntfs_attribute type;
218 |     uint16_t record_length;
219 |     uint8_t name_length;
220 |     uint8_t name_offset;
221 |     uint64_t starting_vcn;
222 |     MFT_SEGMENT_REFERENCE file_reference;
223 |     uint16_t instance;
224 | };
225 | 
226 | struct reparse_point_header {  // edited form of REPARSE_DATA_BUFFER
227 |     uint32_t ReparseTag;
228 |     uint16_t ReparseDataLength;
229 |     uint16_t Reserved;
230 |     uint8_t DataBuffer[1];
231 | };
232 | 
233 | static const uint32_t WOF_CURRENT_VERSION = 1;
234 | 
235 | static const uint32_t WOF_PROVIDER_WIM = 1;
236 | static const uint32_t WOF_PROVIDER_FILE = 2;
237 | 
238 | struct wof_external_info { // WOF_EXTERNAL_INFO in winioctl.h
239 |     uint32_t Version;
240 |     uint32_t Provider;
241 | };
242 | 
243 | static const uint32_t FILE_PROVIDER_CURRENT_VERSION = 1;
244 | 
245 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS4K = 0;
246 | static const uint32_t FILE_PROVIDER_COMPRESSION_LZX = 1;
247 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS8K = 2;
248 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS16K = 3;
249 | 
250 | struct file_provider_external_info_v0 { // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h
251 |     uint32_t Version;
252 |     uint32_t Algorithm;
253 | };
254 | 
255 | #pragma pack(pop)
256 | 
257 | #define NTFS_FS_NAME "NTFS    "
258 | 
259 | #define NTFS_MFT_INODE          0
260 | #define NTFS_ROOT_DIR_INODE     5
261 | #define NTFS_UPCASE_INODE       10
262 | 
263 | #define NTFS_FILE_SIGNATURE     0x454c4946 // "FILE"
264 | 
265 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header
266 | #define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff
267 | #define ATTRIBUTE_FLAG_ENCRYPTED 0x4000
268 | 
269 | #define FILE_ATTRIBUTE_READONLY             0x00000001
270 | #define FILE_ATTRIBUTE_HIDDEN               0x00000002
271 | #define FILE_ATTRIBUTE_SYSTEM               0x00000004
272 | #define FILE_ATTRIBUTE_DIRECTORY            0x00000010
273 | #define FILE_ATTRIBUTE_ARCHIVE              0x00000020
274 | #define FILE_ATTRIBUTE_DIRECTORY_MFT        0x10000000
275 | 
276 | static const uint32_t IO_REPARSE_TAG_WOF = 0x80000017;
277 | 


--------------------------------------------------------------------------------
/src/quibbleproto.h:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) Mark Harmstone 2023
 2 |  *
 3 |  * This file is part of ntfs-efi.
 4 |  *
 5 |  * ntfs-efi is free software: you can redistribute it and/or modify
 6 |  * it under the terms of the GNU General Public Licence as published by
 7 |  * the Free Software Foundation, either version 2 of the Licence, or
 8 |  * (at your option) any later version.
 9 |  *
10 |  * ntfs-efi is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 |  * GNU General Public Licence for more details.
14 |  *
15 |  * You should have received a copy of the GNU General Public Licence
16 |  * along with ntfs-efi.  If not, see <http://www.gnu.org/licenses/>. */
17 | 
18 | #pragma once
19 | 
20 | #define EFI_QUIBBLE_PROTOCOL_GUID { 0x98BCC8FF, 0xD212, 0x4B09, {0x84, 0x0C, 0x43, 0x19, 0xAD, 0x2E, 0xD3, 0x6A } }
21 | 
22 | typedef struct _EFI_QUIBBLE_PROTOCOL EFI_QUIBBLE_PROTOCOL;
23 | 
24 | typedef EFI_STATUS (EFIAPI* EFI_QUIBBLE_GET_ARC_NAME) (
25 |     IN EFI_QUIBBLE_PROTOCOL* This,
26 |     OUT char* ArcName,
27 |     IN OUT UINTN* ArcNameLen
28 | );
29 | 
30 | typedef EFI_STATUS (EFIAPI* EFI_QUIBBLE_GET_WINDOWS_DRIVER_NAME) (
31 |     IN EFI_QUIBBLE_PROTOCOL* This,
32 |     OUT CHAR16* DriverName,
33 |     IN OUT UINTN* DriverNameLen
34 | );
35 | 
36 | typedef struct _EFI_QUIBBLE_PROTOCOL {
37 |     EFI_QUIBBLE_GET_ARC_NAME GetArcName;
38 |     EFI_QUIBBLE_GET_WINDOWS_DRIVER_NAME GetWindowsDriverName;
39 | } EFI_QUIBBLE_PROTOCOL;
40 | 
41 | #define EFI_OPEN_SUBVOL_GUID { 0x5861E4D5, 0xC7F1, 0x4932, {0xA0, 0x81, 0xF2, 0x2A, 0xAE, 0x8A, 0x82, 0x98 } }
42 | 
43 | typedef struct _EFI_OPEN_SUBVOL_PROTOCOL EFI_OPEN_SUBVOL_PROTOCOL;
44 | 
45 | typedef EFI_STATUS (EFIAPI* EFI_OPEN_SUBVOL_FUNC) (
46 |     IN EFI_OPEN_SUBVOL_PROTOCOL* This,
47 |     IN UINT64 Subvol,
48 |     OUT EFI_FILE_HANDLE* File
49 | );
50 | 
51 | typedef struct _EFI_OPEN_SUBVOL_PROTOCOL {
52 |     EFI_OPEN_SUBVOL_FUNC OpenSubvol;
53 | } EFI_OPEN_SUBVOL_PROTOCOL;
54 | 
55 | #define EFI_QUIBBLE_INFO_PROTOCOL_GUID { 0x89498E00, 0xAE8F, 0x4B23, {0x86, 0x11, 0x71, 0x2A, 0xE1, 0x2F, 0xC8, 0xD9 } }
56 | 
57 | typedef void (EFIAPI* EFI_QUIBBLE_INFO_PRINT) (
58 |     IN const char* s
59 | );
60 | 
61 | typedef struct _EFI_QUIBBLE_INFO_PROTOCOL {
62 |     EFI_QUIBBLE_INFO_PRINT Print;
63 | } EFI_QUIBBLE_INFO_PROTOCOL;
64 | 


--------------------------------------------------------------------------------