├── .gitignore ├── COPYING ├── Makefile.am ├── README.md ├── configure.ac └── src ├── aligned_malloc.c ├── common_defs.h ├── decompress_common.c ├── decompress_common.h ├── lzx_common.c ├── lzx_common.h ├── lzx_constants.h ├── lzx_decompress.c ├── plugin.c ├── system_compression.c ├── system_compression.h ├── xpress_constants.h └── xpress_decompress.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.la 2 | *.lo 3 | *.o 4 | /.libs 5 | /Makefile 6 | /Makefile.in 7 | /aclocal.m4 8 | /autom4te.cache/ 9 | /build-aux/ 10 | /config.h 11 | /config.h.in 12 | /config.h.in~ 13 | /config.log 14 | /config.status 15 | /configure 16 | /libtool 17 | /m4/ 18 | /src/.deps/ 19 | /src/.dirstamp 20 | /stamp-h1 21 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 309 | 310 | 311 | Also add information on how to contact you by electronic and paper mail. 312 | 313 | If the program is interactive, make it output a short notice like this 314 | when it starts in an interactive mode: 315 | 316 | Gnomovision version 69, Copyright (C) year name of author 317 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 318 | This is free software, and you are welcome to redistribute it 319 | under certain conditions; type `show c' for details. 320 | 321 | The hypothetical commands `show w' and `show c' should show the appropriate 322 | parts of the General Public License. Of course, the commands you use may 323 | be called something other than `show w' and `show c'; they could even be 324 | mouse-clicks or menu items--whatever suits your program. 325 | 326 | You should also get your employer (if you work as a programmer) or your 327 | school, if any, to sign a "copyright disclaimer" for the program, if 328 | necessary. Here is a sample; alter the names: 329 | 330 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 331 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 332 | 333 | , 1 April 1989 334 | Ty Coon, President of Vice 335 | 336 | This General Public License does not permit incorporating your program into 337 | proprietary programs. If your program is a subroutine library, you may 338 | consider it more useful to permit linking proprietary applications with the 339 | library. If this is what you want to do, use the GNU Library General 340 | Public License instead of this License. 341 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 2 | 3 | EXTRA_DIST = README.md COPYING 4 | 5 | plugindir = $(libdir)/ntfs-3g 6 | 7 | plugin_LTLIBRARIES = ntfs-plugin-80000017.la 8 | 9 | ntfs_plugin_80000017_la_SOURCES = \ 10 | src/aligned_malloc.c \ 11 | src/common_defs.h \ 12 | src/decompress_common.c \ 13 | src/decompress_common.h \ 14 | src/lzx_common.c \ 15 | src/lzx_common.h \ 16 | src/lzx_constants.h \ 17 | src/lzx_decompress.c \ 18 | src/plugin.c \ 19 | src/system_compression.c \ 20 | src/system_compression.h \ 21 | src/xpress_constants.h \ 22 | src/xpress_decompress.c 23 | 24 | ntfs_plugin_80000017_la_LDFLAGS = -module -shared -avoid-version 25 | ntfs_plugin_80000017_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 26 | ntfs_plugin_80000017_la_CFLAGS = $(LIBNTFS_3G_CFLAGS) -std=gnu99 27 | ntfs_plugin_80000017_la_LIBADD = $(LIBNTFS_3G_LIBS) 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | System compression, also known as "Compact OS", is a Windows feature that allows 4 | rarely modified files to be compressed using the XPRESS or LZX compression 5 | formats. It is not built directly into NTFS but rather is implemented using 6 | reparse points. This feature appeared in Windows 10 and it appears that many 7 | Windows 10 systems have been using it by default. 8 | 9 | This repository contains a plugin which enables the NTFS-3G FUSE driver to 10 | transparently read from system-compressed files. It must be built against 11 | NTFS-3G version 2017.3.23 or later, since that was the first stable version to 12 | include support for reparse point plugins. 13 | 14 | Currently, only reading is supported. Compressing an existing file may be done 15 | by using the "compact" utility on Windows, with one of the options below 16 | ("xpress4k" is the weakest and fastest, "lzx" is the strongest and slowest): 17 | 18 | /exe:xpress4k 19 | /exe:xpress8k 20 | /exe:xpress16k 21 | /exe:lzx 22 | 23 | # Installation 24 | 25 | First, either download and extract the latest release tarball from 26 | https://github.com/ebiggers/ntfs-3g-system-compression/releases, or clone the 27 | git repository. If you're building from the git repository, you'll need to 28 | generate the `configure` script by running `autoreconf -i`. This requires 29 | autoconf, automake, libtool, and pkg-config. 30 | 31 | The plugin can then be built by running `./configure && make`. The build system 32 | must be able to find the NTFS-3G library and headers as well as the FUSE 33 | headers. Depending on the operating system, this may require that the 34 | "ntfs-3g-dev" and "libfuse-dev" (or similarly named) packages be installed. 35 | pkg-config must also be installed. 36 | 37 | After compiling, run `make install` to install the plugin to the NTFS-3G plugin 38 | directory, which will be a subdirectory "ntfs-3g" of the system library 39 | directory (`$libdir`). An example full path to the installed plugin is 40 | `/usr/lib/ntfs-3g/ntfs-plugin-80000017.so`. It may differ slightly on different 41 | platforms. `make install` will create the plugin directory if it does not 42 | already exist. 43 | 44 | # Implementation note 45 | 46 | The XPRESS and LZX compression formats used in system-compressed files are 47 | identical to the formats used in Windows Imaging (WIM) archives. Therefore, for 48 | the system compression plugin I borrowed the XPRESS and LZX decompressors I had 49 | already written for the wimlib project (https://wimlib.net/). I made some 50 | slight modifications for integration purposes, and I relicensed the files that 51 | used the LGPLv3+ license to GPLv2+ for compatibility with NTFS-3G's license. 52 | 53 | # Notices 54 | 55 | The NTFS-3G system compression plugin was written by Eric Biggers, with 56 | contributions from Jean-Pierre André. You can contact the author at 57 | ebiggers3@gmail.com. 58 | 59 | This software may be redistributed and/or modified under the terms of the GNU 60 | General Public License as published by the Free Software Foundation, either 61 | version 2 of the License, or (at your option) any later version. There is NO 62 | WARRANY, to the extent permitted by law. See the COPYING file for details. 63 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([ntfs-3g-system-compression], [1.0], [ebiggers3@gmail.com]) 2 | 3 | AC_CONFIG_SRCDIR([src/plugin.c]) 4 | AC_CONFIG_MACRO_DIR([m4]) 5 | AC_CONFIG_AUX_DIR([build-aux]) 6 | AM_INIT_AUTOMAKE([-Wall -Werror subdir-objects foreign]) 7 | AM_SILENT_RULES([yes]) 8 | m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) 9 | 10 | AC_PROG_CC 11 | AC_C_BIGENDIAN 12 | 13 | LT_INIT([dlopen]) 14 | 15 | AC_CONFIG_HEADERS([config.h]) 16 | AC_CONFIG_FILES([Makefile]) 17 | 18 | AC_CHECK_HEADERS([errno.h \ 19 | limits.h \ 20 | stdarg.h \ 21 | stddef.h \ 22 | stdlib.h \ 23 | string.h \ 24 | sys/types.h \ 25 | time.h]) 26 | 27 | PKG_CHECK_MODULES([LIBNTFS_3G], [libntfs-3g >= 2017.3.23], [], 28 | [AC_MSG_ERROR(["Unable to find libntfs-3g"])]) 29 | PKG_CHECK_MODULES([FUSE], [fuse >= 2.6.0], [], 30 | [AC_MSG_ERROR(["Unable to find fuse"])]) 31 | AC_OUTPUT 32 | -------------------------------------------------------------------------------- /src/aligned_malloc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * aligned_malloc.c - aligned memory allocation 3 | * 4 | * This file provides portable aligned memory allocation functions that only use 5 | * malloc() and free(). This avoids portability problems with posix_memalign(), 6 | * aligned_alloc(), etc. 7 | */ 8 | 9 | #include 10 | 11 | #include "common_defs.h" 12 | 13 | void * 14 | aligned_malloc(size_t size, size_t alignment) 15 | { 16 | const uintptr_t mask = alignment - 1; 17 | char *ptr = NULL; 18 | char *raw_ptr; 19 | 20 | raw_ptr = malloc(mask + sizeof(size_t) + size); 21 | if (raw_ptr) { 22 | ptr = (char *)raw_ptr + sizeof(size_t); 23 | ptr = (void *)(((uintptr_t)ptr + mask) & ~mask); 24 | *((size_t *)ptr - 1) = ptr - raw_ptr; 25 | } 26 | return ptr; 27 | } 28 | 29 | void 30 | aligned_free(void *ptr) 31 | { 32 | if (ptr) 33 | free((char *)ptr - *((size_t *)ptr - 1)); 34 | } 35 | -------------------------------------------------------------------------------- /src/common_defs.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMMON_DEFS_H 2 | #define _COMMON_DEFS_H 3 | 4 | #include 5 | #include 6 | 7 | /* ========================================================================== */ 8 | /* Type definitions */ 9 | /* ========================================================================== */ 10 | 11 | /* 12 | * Type of a machine word. 'unsigned long' would be logical, but that is only 13 | * 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best 14 | * we can do without a bunch of #ifdefs appears to be 'size_t'. 15 | */ 16 | typedef size_t machine_word_t; 17 | 18 | #define WORDBYTES sizeof(machine_word_t) 19 | #define WORDBITS (8 * WORDBYTES) 20 | 21 | /* ========================================================================== */ 22 | /* Compiler-specific definitions */ 23 | /* ========================================================================== */ 24 | 25 | #ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */ 26 | # define forceinline inline __attribute__((always_inline)) 27 | # define likely(expr) __builtin_expect(!!(expr), 1) 28 | # define unlikely(expr) __builtin_expect(!!(expr), 0) 29 | # define _aligned_attribute(n) __attribute__((aligned(n))) 30 | # define bsr32(n) (31 - __builtin_clz(n)) 31 | # define bsr64(n) (63 - __builtin_clzll(n)) 32 | # define bsf32(n) __builtin_ctz(n) 33 | # define bsf64(n) __builtin_ctzll(n) 34 | # ifndef min 35 | # define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ 36 | (_a < _b) ? _a : _b; }) 37 | # endif 38 | # ifndef max 39 | # define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \ 40 | (_a > _b) ? _a : _b; }) 41 | # endif 42 | 43 | # define DEFINE_UNALIGNED_TYPE(type) \ 44 | struct type##_unaligned { \ 45 | type v; \ 46 | } __attribute__((packed)); \ 47 | \ 48 | static inline type \ 49 | load_##type##_unaligned(const void *p) \ 50 | { \ 51 | return ((const struct type##_unaligned *)p)->v; \ 52 | } \ 53 | \ 54 | static inline void \ 55 | store_##type##_unaligned(type val, void *p) \ 56 | { \ 57 | ((struct type##_unaligned *)p)->v = val; \ 58 | } 59 | 60 | #endif /* __GNUC__ */ 61 | 62 | /* Declare that the annotated function should always be inlined. This might be 63 | * desirable in highly tuned code, e.g. compression codecs */ 64 | #ifndef forceinline 65 | # define forceinline inline 66 | #endif 67 | 68 | /* Hint that the expression is usually true */ 69 | #ifndef likely 70 | # define likely(expr) (expr) 71 | #endif 72 | 73 | /* Hint that the expression is usually false */ 74 | #ifndef unlikely 75 | # define unlikely(expr) (expr) 76 | #endif 77 | 78 | /* Declare that the annotated variable, or variables of the annotated type, are 79 | * to be aligned on n-byte boundaries */ 80 | #ifndef _aligned_attribute 81 | # define _aligned_attribute(n) 82 | #endif 83 | 84 | /* min() and max() macros */ 85 | #ifndef min 86 | # define min(a, b) ((a) < (b) ? (a) : (b)) 87 | #endif 88 | #ifndef max 89 | # define max(a, b) ((a) > (b) ? (a) : (b)) 90 | #endif 91 | 92 | /* STATIC_ASSERT() - verify the truth of an expression at compilation time */ 93 | #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) 94 | 95 | /* STATIC_ASSERT_ZERO() - verify the truth of an expression at compilation time 96 | * and also produce a result of value '0' to be used in constant expressions */ 97 | #define STATIC_ASSERT_ZERO(expr) ((int)sizeof(char[-!(expr)])) 98 | 99 | /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses 100 | * can be performed efficiently on the target platform. */ 101 | #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) 102 | # define UNALIGNED_ACCESS_IS_FAST 1 103 | #else 104 | # define UNALIGNED_ACCESS_IS_FAST 0 105 | #endif 106 | 107 | /* 108 | * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type', 109 | * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions 110 | * which load and store variables of type 'type' from/to unaligned memory 111 | * addresses. 112 | */ 113 | #ifndef DEFINE_UNALIGNED_TYPE 114 | 115 | #include 116 | /* 117 | * Although memcpy() may seem inefficient, it *usually* gets optimized 118 | * appropriately by modern compilers. It's portable and may be the best we can 119 | * do for a fallback... 120 | */ 121 | #define DEFINE_UNALIGNED_TYPE(type) \ 122 | \ 123 | static forceinline type \ 124 | load_##type##_unaligned(const void *p) \ 125 | { \ 126 | type v; \ 127 | memcpy(&v, p, sizeof(v)); \ 128 | return v; \ 129 | } \ 130 | \ 131 | static forceinline void \ 132 | store_##type##_unaligned(type v, void *p) \ 133 | { \ 134 | memcpy(p, &v, sizeof(v)); \ 135 | } 136 | 137 | #endif /* !DEFINE_UNALIGNED_TYPE */ 138 | 139 | 140 | /* ========================================================================== */ 141 | /* Unaligned memory accesses */ 142 | /* ========================================================================== */ 143 | 144 | DEFINE_UNALIGNED_TYPE(le16); 145 | DEFINE_UNALIGNED_TYPE(le32); 146 | DEFINE_UNALIGNED_TYPE(machine_word_t); 147 | 148 | #define load_word_unaligned load_machine_word_t_unaligned 149 | #define store_word_unaligned store_machine_word_t_unaligned 150 | 151 | static inline u16 152 | get_unaligned_le16(const u8 *p) 153 | { 154 | if (UNALIGNED_ACCESS_IS_FAST) 155 | return le16_to_cpu(load_le16_unaligned(p)); 156 | else 157 | return ((u16)p[1] << 8) | p[0]; 158 | } 159 | 160 | static inline u32 161 | get_unaligned_le32(const u8 *p) 162 | { 163 | if (UNALIGNED_ACCESS_IS_FAST) 164 | return le32_to_cpu(load_le32_unaligned(p)); 165 | else 166 | return ((u32)p[3] << 24) | ((u32)p[2] << 16) | 167 | ((u32)p[1] << 8) | p[0]; 168 | } 169 | 170 | static inline void 171 | put_unaligned_le16(u16 v, u8 *p) 172 | { 173 | if (UNALIGNED_ACCESS_IS_FAST) { 174 | store_le16_unaligned(cpu_to_le16(v), p); 175 | } else { 176 | p[0] = (u8)(v >> 0); 177 | p[1] = (u8)(v >> 8); 178 | } 179 | } 180 | 181 | static inline void 182 | put_unaligned_le32(u32 v, u8 *p) 183 | { 184 | if (UNALIGNED_ACCESS_IS_FAST) { 185 | store_le32_unaligned(cpu_to_le32(v), p); 186 | } else { 187 | p[0] = (u8)(v >> 0); 188 | p[1] = (u8)(v >> 8); 189 | p[2] = (u8)(v >> 16); 190 | p[3] = (u8)(v >> 24); 191 | } 192 | } 193 | 194 | /* ========================================================================== */ 195 | /* Bit scan functions */ 196 | /* ========================================================================== */ 197 | 198 | /* 199 | * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least 200 | * significant end) of the *most* significant 1 bit in the input value. The 201 | * input value must be nonzero! 202 | */ 203 | 204 | #ifndef bsr32 205 | static forceinline unsigned 206 | bsr32(u32 v) 207 | { 208 | unsigned bit = 0; 209 | while ((v >>= 1) != 0) 210 | bit++; 211 | return bit; 212 | } 213 | #endif 214 | 215 | #ifndef bsr64 216 | static forceinline unsigned 217 | bsr64(u64 v) 218 | { 219 | unsigned bit = 0; 220 | while ((v >>= 1) != 0) 221 | bit++; 222 | return bit; 223 | } 224 | #endif 225 | 226 | static forceinline unsigned 227 | bsrw(machine_word_t v) 228 | { 229 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 230 | if (WORDBITS == 32) 231 | return bsr32(v); 232 | else 233 | return bsr64(v); 234 | } 235 | 236 | /* 237 | * Bit Scan Forward (BSF) - find the 0-based index (relative to the least 238 | * significant end) of the *least* significant 1 bit in the input value. The 239 | * input value must be nonzero! 240 | */ 241 | 242 | #ifndef bsf32 243 | static forceinline unsigned 244 | bsf32(u32 v) 245 | { 246 | unsigned bit; 247 | for (bit = 0; !(v & 1); bit++, v >>= 1) 248 | ; 249 | return bit; 250 | } 251 | #endif 252 | 253 | #ifndef bsf64 254 | static forceinline unsigned 255 | bsf64(u64 v) 256 | { 257 | unsigned bit; 258 | for (bit = 0; !(v & 1); bit++, v >>= 1) 259 | ; 260 | return bit; 261 | } 262 | #endif 263 | 264 | static forceinline unsigned 265 | bsfw(machine_word_t v) 266 | { 267 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 268 | if (WORDBITS == 32) 269 | return bsf32(v); 270 | else 271 | return bsf64(v); 272 | } 273 | 274 | /* Return the log base 2 of 'n', rounded up to the nearest integer. */ 275 | static forceinline unsigned 276 | ilog2_ceil(size_t n) 277 | { 278 | if (n <= 1) 279 | return 0; 280 | return 1 + bsrw(n - 1); 281 | } 282 | 283 | /* ========================================================================== */ 284 | /* Aligned memory allocation */ 285 | /* ========================================================================== */ 286 | 287 | extern void *aligned_malloc(size_t size, size_t alignment); 288 | extern void aligned_free(void *ptr); 289 | 290 | #endif /* _COMMON_DEFS_H */ 291 | -------------------------------------------------------------------------------- /src/decompress_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * decompress_common.c 3 | * 4 | * Code for decompression shared among multiple compression formats. 5 | * 6 | * Copyright 2022 Eric Biggers 7 | * 8 | * Permission is hereby granted, free of charge, to any person 9 | * obtaining a copy of this software and associated documentation 10 | * files (the "Software"), to deal in the Software without 11 | * restriction, including without limitation the rights to use, 12 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following 15 | * conditions: 16 | * 17 | * The above copyright notice and this permission notice shall be 18 | * included in all copies or substantial portions of the Software. 19 | * 20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27 | * OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #ifdef HAVE_CONFIG_H 31 | # include "config.h" 32 | #endif 33 | 34 | #include 35 | 36 | #ifdef __SSE2__ 37 | # include 38 | #endif 39 | 40 | #include "decompress_common.h" 41 | 42 | /* 43 | * make_huffman_decode_table() - 44 | * 45 | * Given an alphabet of symbols and the length of each symbol's codeword in a 46 | * canonical prefix code, build a table for quickly decoding symbols that were 47 | * encoded with that code. 48 | * 49 | * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols 50 | * such that no whole codeword is a prefix of any other. A prefix code might be 51 | * a _Huffman code_, which means that it is an optimum prefix code for a given 52 | * list of symbol frequencies and was generated by the Huffman algorithm. 53 | * Although the prefix codes processed here will ordinarily be "Huffman codes", 54 | * strictly speaking the decoder cannot know whether a given code was actually 55 | * generated by the Huffman algorithm or not. 56 | * 57 | * A prefix code is _canonical_ if and only if a longer codeword never 58 | * lexicographically precedes a shorter codeword, and the lexicographic ordering 59 | * of codewords of equal length is the same as the lexicographic ordering of the 60 | * corresponding symbols. The advantage of using a canonical prefix code is 61 | * that the codewords can be reconstructed from only the symbol => codeword 62 | * length mapping. This eliminates the need to transmit the codewords 63 | * explicitly. Instead, they can be enumerated in lexicographic order after 64 | * sorting the symbols primarily by increasing codeword length and secondarily 65 | * by increasing symbol value. 66 | * 67 | * However, the decoder's real goal is to decode symbols with the code, not just 68 | * generate the list of codewords. Consequently, this function directly builds 69 | * a table for efficiently decoding symbols using the code. The basic idea is 70 | * that given the next 'max_codeword_len' bits of input, the decoder can look up 71 | * the next decoded symbol by indexing a table containing '2^max_codeword_len' 72 | * entries. A codeword with length 'max_codeword_len' will have exactly one 73 | * entry in this table, whereas a codeword shorter than 'max_codeword_len' will 74 | * have multiple entries in this table. Precisely, a codeword of length 'n' 75 | * will have '2^(max_codeword_len - n)' entries. The index of each such entry, 76 | * considered as a bitstring of length 'max_codeword_len', will contain the 77 | * corresponding codeword as a prefix. 78 | * 79 | * That's the basic idea, but we extend it in two ways: 80 | * 81 | * - Often the maximum codeword length is too long for it to be efficient to 82 | * build the full decode table whenever a new code is used. Instead, we build 83 | * a "root" table using only '2^table_bits' entries, where 'table_bits <= 84 | * max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a 85 | * symbol directly (for codewords not longer than 'table_bits'), or the index 86 | * of a subtable which must be indexed with additional bits of input to fully 87 | * decode the symbol (for codewords longer than 'table_bits'). 88 | * 89 | * - Whenever the decoder decodes a symbol, it needs to know the codeword length 90 | * so that it can remove the appropriate number of input bits. The obvious 91 | * solution would be to simply retain the codeword lengths array and use the 92 | * decoded symbol as an index into it. However, that would require two array 93 | * accesses when decoding each symbol. Our strategy is to instead store the 94 | * codeword length directly in the decode table entry along with the symbol. 95 | * 96 | * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table 97 | * entries, and see read_huffsym() for full details on how symbols are decoded. 98 | * 99 | * @decode_table: 100 | * The array in which to build the decode table. This must have been 101 | * declared by the DECODE_TABLE() macro. This may alias @lens, since all 102 | * @lens are consumed before the decode table is written to. 103 | * 104 | * @num_syms: 105 | * The number of symbols in the alphabet. 106 | * 107 | * @table_bits: 108 | * The log base 2 of the number of entries in the root table. 109 | * 110 | * @lens: 111 | * An array of length @num_syms, indexed by symbol, that gives the length 112 | * of the codeword, in bits, for each symbol. The length can be 0, which 113 | * means that the symbol does not have a codeword assigned. In addition, 114 | * @lens may alias @decode_table, as noted above. 115 | * 116 | * @max_codeword_len: 117 | * The maximum codeword length permitted for this code. All entries in 118 | * 'lens' must be less than or equal to this value. 119 | * 120 | * @working_space 121 | * A temporary array that was declared with DECODE_TABLE_WORKING_SPACE(). 122 | * 123 | * Returns 0 on success, or -1 if the lengths do not form a valid prefix code. 124 | */ 125 | int 126 | make_huffman_decode_table(u16 decode_table[], unsigned num_syms, 127 | unsigned table_bits, const u8 lens[], 128 | unsigned max_codeword_len, u16 working_space[]) 129 | { 130 | u16 * const len_counts = &working_space[0]; 131 | u16 * const offsets = &working_space[1 * (max_codeword_len + 1)]; 132 | u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)]; 133 | s32 remainder = 1; 134 | void *entry_ptr = decode_table; 135 | unsigned codeword_len = 1; 136 | unsigned sym_idx; 137 | unsigned codeword; 138 | unsigned subtable_pos; 139 | unsigned subtable_bits; 140 | unsigned subtable_prefix; 141 | 142 | /* Count how many codewords have each length, including 0. */ 143 | for (unsigned len = 0; len <= max_codeword_len; len++) 144 | len_counts[len] = 0; 145 | for (unsigned sym = 0; sym < num_syms; sym++) 146 | len_counts[lens[sym]]++; 147 | 148 | /* It is already guaranteed that all lengths are <= max_codeword_len, 149 | * but it cannot be assumed they form a complete prefix code. A 150 | * codeword of length n should require a proportion of the codespace 151 | * equaling (1/2)^n. The code is complete if and only if, by this 152 | * measure, the codespace is exactly filled by the lengths. */ 153 | for (unsigned len = 1; len <= max_codeword_len; len++) { 154 | remainder = (remainder << 1) - len_counts[len]; 155 | /* Do the lengths overflow the codespace? */ 156 | if (unlikely(remainder < 0)) 157 | return -1; 158 | } 159 | 160 | if (remainder != 0) { 161 | /* The lengths do not fill the codespace; that is, they form an 162 | * incomplete code. This is permitted only if the code is empty 163 | * (contains no symbols). */ 164 | 165 | if (unlikely(remainder != 1U << max_codeword_len)) 166 | return -1; 167 | 168 | /* The code is empty. When processing a well-formed stream, the 169 | * decode table need not be initialized in this case. However, 170 | * we cannot assume the stream is well-formed, so we must 171 | * initialize the decode table anyway. Setting all entries to 0 172 | * makes the decode table always produce symbol '0' without 173 | * consuming any bits, which is good enough. */ 174 | memset(decode_table, 0, sizeof(decode_table[0]) << table_bits); 175 | return 0; 176 | } 177 | 178 | /* Sort the symbols primarily by increasing codeword length and 179 | * secondarily by increasing symbol value. */ 180 | 181 | /* Initialize 'offsets' so that 'offsets[len]' is the number of 182 | * codewords shorter than 'len' bits, including length 0. */ 183 | offsets[0] = 0; 184 | for (unsigned len = 0; len < max_codeword_len; len++) 185 | offsets[len + 1] = offsets[len] + len_counts[len]; 186 | 187 | /* Use the 'offsets' array to sort the symbols. */ 188 | for (unsigned sym = 0; sym < num_syms; sym++) 189 | sorted_syms[offsets[lens[sym]]++] = sym; 190 | 191 | /* 192 | * Fill the root table entries for codewords no longer than table_bits. 193 | * 194 | * The table will start with entries for the shortest codeword(s), which 195 | * will have the most entries. From there, the number of entries per 196 | * codeword will decrease. As an optimization, we may begin filling 197 | * entries with SSE2 vector accesses (8 entries/store), then change to 198 | * word accesses (2 or 4 entries/store), then change to 16-bit accesses 199 | * (1 entry/store). 200 | */ 201 | sym_idx = offsets[0]; 202 | 203 | #ifdef __SSE2__ 204 | /* Fill entries one 128-bit vector (8 entries) at a time. */ 205 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / 206 | (sizeof(__m128i) / sizeof(decode_table[0])); 207 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 208 | { 209 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 210 | for (; sym_idx < end_sym_idx; sym_idx++) { 211 | /* Note: unlike in the "word" version below, the __m128i 212 | * type already has __attribute__((may_alias)), so using 213 | * it to access an array of u16 will not violate strict 214 | * aliasing. */ 215 | __m128i v = _mm_set1_epi16( 216 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 217 | codeword_len)); 218 | unsigned n = stores_per_loop; 219 | do { 220 | *(__m128i *)entry_ptr = v; 221 | entry_ptr += sizeof(v); 222 | } while (--n); 223 | } 224 | } 225 | #endif /* __SSE2__ */ 226 | 227 | #ifdef __GNUC__ 228 | /* Fill entries one word (2 or 4 entries) at a time. */ 229 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) / 230 | (WORDBYTES / sizeof(decode_table[0])); 231 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 232 | { 233 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 234 | for (; sym_idx < end_sym_idx; sym_idx++) { 235 | 236 | /* Accessing the array of u16 as u32 or u64 would 237 | * violate strict aliasing and would require compiling 238 | * the code with -fno-strict-aliasing to guarantee 239 | * correctness. To work around this problem, use the 240 | * gcc 'may_alias' extension. */ 241 | typedef machine_word_t 242 | __attribute__((may_alias)) aliased_word_t; 243 | aliased_word_t v = repeat_u16( 244 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 245 | codeword_len)); 246 | unsigned n = stores_per_loop; 247 | do { 248 | *(aliased_word_t *)entry_ptr = v; 249 | entry_ptr += sizeof(v); 250 | } while (--n); 251 | } 252 | } 253 | #endif /* __GNUC__ */ 254 | 255 | /* Fill entries one at a time. */ 256 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)); 257 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) 258 | { 259 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len]; 260 | for (; sym_idx < end_sym_idx; sym_idx++) { 261 | u16 v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 262 | codeword_len); 263 | unsigned n = stores_per_loop; 264 | do { 265 | *(u16 *)entry_ptr = v; 266 | entry_ptr += sizeof(v); 267 | } while (--n); 268 | } 269 | } 270 | 271 | /* If all symbols were processed, then no subtables are required. */ 272 | if (sym_idx == num_syms) 273 | return 0; 274 | 275 | /* At least one subtable is required. Process the remaining symbols. */ 276 | codeword = ((u16 *)entry_ptr - decode_table) << 1; 277 | subtable_pos = 1U << table_bits; 278 | subtable_bits = table_bits; 279 | subtable_prefix = -1; 280 | do { 281 | while (len_counts[codeword_len] == 0) { 282 | codeword_len++; 283 | codeword <<= 1; 284 | } 285 | 286 | unsigned prefix = codeword >> (codeword_len - table_bits); 287 | 288 | /* Start a new subtable if the first 'table_bits' bits of the 289 | * codeword don't match the prefix for the previous subtable, or 290 | * if this will be the first subtable. */ 291 | if (prefix != subtable_prefix) { 292 | 293 | subtable_prefix = prefix; 294 | 295 | /* 296 | * Calculate the subtable length. If the codeword 297 | * length exceeds 'table_bits' by n, then the subtable 298 | * needs at least 2^n entries. But it may need more; if 299 | * there are fewer than 2^n codewords of length 300 | * 'table_bits + n' remaining, then n will need to be 301 | * incremented to bring in longer codewords until the 302 | * subtable can be filled completely. Note that it 303 | * always will, eventually, be possible to fill the 304 | * subtable, since it was previously verified that the 305 | * code is complete. 306 | */ 307 | subtable_bits = codeword_len - table_bits; 308 | remainder = (s32)1 << subtable_bits; 309 | for (;;) { 310 | remainder -= len_counts[table_bits + 311 | subtable_bits]; 312 | if (remainder <= 0) 313 | break; 314 | subtable_bits++; 315 | remainder <<= 1; 316 | } 317 | 318 | /* Create the entry that points from the root table to 319 | * the subtable. This entry contains the index of the 320 | * start of the subtable and the number of bits with 321 | * which the subtable is indexed (the log base 2 of the 322 | * number of entries it contains). */ 323 | decode_table[subtable_prefix] = 324 | MAKE_DECODE_TABLE_ENTRY(subtable_pos, 325 | subtable_bits); 326 | } 327 | 328 | /* Fill the subtable entries for this symbol. */ 329 | u16 entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx], 330 | codeword_len - table_bits); 331 | unsigned n = 1U << (subtable_bits - (codeword_len - 332 | table_bits)); 333 | do { 334 | decode_table[subtable_pos++] = entry; 335 | } while (--n); 336 | 337 | len_counts[codeword_len]--; 338 | codeword++; 339 | } while (++sym_idx < num_syms); 340 | 341 | return 0; 342 | } 343 | -------------------------------------------------------------------------------- /src/decompress_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * decompress_common.h 3 | * 4 | * Header for decompression code shared by multiple compression formats. 5 | * 6 | * Copyright 2022 Eric Biggers 7 | * 8 | * Permission is hereby granted, free of charge, to any person 9 | * obtaining a copy of this software and associated documentation 10 | * files (the "Software"), to deal in the Software without 11 | * restriction, including without limitation the rights to use, 12 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | * copies of the Software, and to permit persons to whom the 14 | * Software is furnished to do so, subject to the following 15 | * conditions: 16 | * 17 | * The above copyright notice and this permission notice shall be 18 | * included in all copies or substantial portions of the Software. 19 | * 20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 21 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 22 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 25 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 26 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 27 | * OTHER DEALINGS IN THE SOFTWARE. 28 | */ 29 | 30 | #ifndef _DECOMPRESS_COMMON_H 31 | #define _DECOMPRESS_COMMON_H 32 | 33 | #include 34 | #include 35 | 36 | #include "common_defs.h" 37 | 38 | /******************************************************************************/ 39 | /* Input bitstream for XPRESS and LZX */ 40 | /*----------------------------------------------------------------------------*/ 41 | 42 | /* Structure that encapsulates a block of in-memory data being interpreted as a 43 | * stream of bits, optionally with interwoven literal bytes. Bits are assumed 44 | * to be stored in little endian 16-bit coding units, with the bits ordered high 45 | * to low. */ 46 | struct input_bitstream { 47 | 48 | /* Bits that have been read from the input buffer. The bits are 49 | * left-justified; the next bit is always bit 31. */ 50 | u32 bitbuf; 51 | 52 | /* Number of bits currently held in @bitbuf. */ 53 | u32 bitsleft; 54 | 55 | /* Pointer to the next byte to be retrieved from the input buffer. */ 56 | const u8 *next; 57 | 58 | /* Pointer past the end of the input buffer. */ 59 | const u8 *end; 60 | }; 61 | 62 | /* Initialize a bitstream to read from the specified input buffer. */ 63 | static forceinline void 64 | init_input_bitstream(struct input_bitstream *is, const void *buffer, u32 size) 65 | { 66 | is->bitbuf = 0; 67 | is->bitsleft = 0; 68 | is->next = buffer; 69 | is->end = is->next + size; 70 | } 71 | 72 | /* Note: for performance reasons, the following methods don't return error codes 73 | * to the caller if the input buffer is overrun. Instead, they just assume that 74 | * all overrun data is zeroes. This has no effect on well-formed compressed 75 | * data. The only disadvantage is that bad compressed data may go undetected, 76 | * but even this is irrelevant if higher level code checksums the uncompressed 77 | * data anyway. */ 78 | 79 | /* Ensure the bit buffer variable for the bitstream contains at least @num_bits 80 | * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() 81 | * may be called on the bitstream to peek or remove up to @num_bits bits. */ 82 | static forceinline void 83 | bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits) 84 | { 85 | /* This currently works for at most 17 bits. */ 86 | 87 | if (is->bitsleft >= num_bits) 88 | return; 89 | 90 | if (unlikely(is->end - is->next < 2)) 91 | goto overflow; 92 | 93 | is->bitbuf |= (u32)get_unaligned_le16(is->next) << (16 - is->bitsleft); 94 | is->next += 2; 95 | is->bitsleft += 16; 96 | 97 | if (unlikely(num_bits == 17 && is->bitsleft == 16)) { 98 | if (unlikely(is->end - is->next < 2)) 99 | goto overflow; 100 | 101 | is->bitbuf |= (u32)get_unaligned_le16(is->next); 102 | is->next += 2; 103 | is->bitsleft = 32; 104 | } 105 | 106 | return; 107 | 108 | overflow: 109 | is->bitsleft = 32; 110 | } 111 | 112 | /* Return the next @num_bits bits from the bitstream, without removing them. 113 | * There must be at least @num_bits remaining in the buffer variable, from a 114 | * previous call to bitstream_ensure_bits(). */ 115 | static forceinline u32 116 | bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits) 117 | { 118 | return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); 119 | } 120 | 121 | /* Remove @num_bits from the bitstream. There must be at least @num_bits 122 | * remaining in the buffer variable, from a previous call to 123 | * bitstream_ensure_bits(). */ 124 | static forceinline void 125 | bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits) 126 | { 127 | is->bitbuf <<= num_bits; 128 | is->bitsleft -= num_bits; 129 | } 130 | 131 | /* Remove and return @num_bits bits from the bitstream. There must be at least 132 | * @num_bits remaining in the buffer variable, from a previous call to 133 | * bitstream_ensure_bits(). */ 134 | static forceinline u32 135 | bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits) 136 | { 137 | u32 bits = bitstream_peek_bits(is, num_bits); 138 | bitstream_remove_bits(is, num_bits); 139 | return bits; 140 | } 141 | 142 | /* Read and return the next @num_bits bits from the bitstream. */ 143 | static forceinline u32 144 | bitstream_read_bits(struct input_bitstream *is, unsigned num_bits) 145 | { 146 | bitstream_ensure_bits(is, num_bits); 147 | return bitstream_pop_bits(is, num_bits); 148 | } 149 | 150 | /* Read and return the next literal byte embedded in the bitstream. */ 151 | static forceinline u8 152 | bitstream_read_byte(struct input_bitstream *is) 153 | { 154 | if (unlikely(is->end == is->next)) 155 | return 0; 156 | return *is->next++; 157 | } 158 | 159 | /* Read and return the next 16-bit integer embedded in the bitstream. */ 160 | static forceinline u16 161 | bitstream_read_u16(struct input_bitstream *is) 162 | { 163 | u16 v; 164 | 165 | if (unlikely(is->end - is->next < 2)) 166 | return 0; 167 | v = get_unaligned_le16(is->next); 168 | is->next += 2; 169 | return v; 170 | } 171 | 172 | /* Read and return the next 32-bit integer embedded in the bitstream. */ 173 | static forceinline u32 174 | bitstream_read_u32(struct input_bitstream *is) 175 | { 176 | u32 v; 177 | 178 | if (unlikely(is->end - is->next < 4)) 179 | return 0; 180 | v = get_unaligned_le32(is->next); 181 | is->next += 4; 182 | return v; 183 | } 184 | 185 | /* Read into @dst_buffer an array of literal bytes embedded in the bitstream. 186 | * Return 0 if there were enough bytes remaining in the input, otherwise -1. */ 187 | static forceinline int 188 | bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count) 189 | { 190 | if (unlikely(is->end - is->next < count)) 191 | return -1; 192 | memcpy(dst_buffer, is->next, count); 193 | is->next += count; 194 | return 0; 195 | } 196 | 197 | /* Align the input bitstream on a coding-unit boundary. */ 198 | static forceinline void 199 | bitstream_align(struct input_bitstream *is) 200 | { 201 | is->bitsleft = 0; 202 | is->bitbuf = 0; 203 | } 204 | 205 | /******************************************************************************/ 206 | /* Huffman decoding */ 207 | /*----------------------------------------------------------------------------*/ 208 | 209 | /* 210 | * Required alignment for the Huffman decode tables. We require this alignment 211 | * so that we can fill the entries with vector or word instructions and not have 212 | * to deal with misaligned buffers. 213 | */ 214 | #define DECODE_TABLE_ALIGNMENT 16 215 | 216 | /* 217 | * Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12 218 | * bits) and 'length' (low 4 bits). The precise meaning of these fields depends 219 | * on the type of entry: 220 | * 221 | * Root table entries which are *not* subtable pointers: 222 | * symbol: symbol to decode 223 | * length: codeword length in bits 224 | * 225 | * Root table entries which are subtable pointers: 226 | * symbol: index of start of subtable 227 | * length: number of bits with which the subtable is indexed 228 | * 229 | * Subtable entries: 230 | * symbol: symbol to decode 231 | * length: codeword length in bits, minus the number of bits with which the 232 | * root table is indexed 233 | */ 234 | #define DECODE_TABLE_SYMBOL_SHIFT 4 235 | #define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1) 236 | #define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1) 237 | #define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH 238 | #define MAKE_DECODE_TABLE_ENTRY(symbol, length) \ 239 | (((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length)) 240 | 241 | /* 242 | * Read and return the next Huffman-encoded symbol from the given bitstream 243 | * using the given decode table. 244 | * 245 | * If the input data is exhausted, then the Huffman symbol will be decoded as if 246 | * the missing bits were all zeroes. 247 | * 248 | * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in 249 | * lzms_decompress.c; keep them in sync! 250 | */ 251 | static forceinline unsigned 252 | read_huffsym(struct input_bitstream *is, const u16 decode_table[], 253 | unsigned table_bits, unsigned max_codeword_len) 254 | { 255 | unsigned entry; 256 | unsigned symbol; 257 | unsigned length; 258 | 259 | /* Preload the bitbuffer with 'max_codeword_len' bits so that we're 260 | * guaranteed to be able to fully decode a codeword. */ 261 | bitstream_ensure_bits(is, max_codeword_len); 262 | 263 | /* Index the root table by the next 'table_bits' bits of input. */ 264 | entry = decode_table[bitstream_peek_bits(is, table_bits)]; 265 | 266 | /* Extract the "symbol" and "length" from the entry. */ 267 | symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; 268 | length = entry & DECODE_TABLE_LENGTH_MASK; 269 | 270 | /* If the root table is indexed by the full 'max_codeword_len' bits, 271 | * then there cannot be any subtables, and this will be known at compile 272 | * time. Otherwise, we must check whether the decoded symbol is really 273 | * a subtable pointer. If so, we must discard the bits with which the 274 | * root table was indexed, then index the subtable by the next 'length' 275 | * bits of input to get the real entry. */ 276 | if (max_codeword_len > table_bits && 277 | entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT))) 278 | { 279 | /* Subtable required */ 280 | bitstream_remove_bits(is, table_bits); 281 | entry = decode_table[symbol + bitstream_peek_bits(is, length)]; 282 | symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT; 283 | length = entry & DECODE_TABLE_LENGTH_MASK; 284 | } 285 | 286 | /* Discard the bits (or the remaining bits, if a subtable was required) 287 | * of the codeword. */ 288 | bitstream_remove_bits(is, length); 289 | 290 | /* Return the decoded symbol. */ 291 | return symbol; 292 | } 293 | 294 | /* 295 | * The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode 296 | * table entries, including all subtable entries, that may be required for 297 | * decoding a given Huffman code. This depends on three parameters: 298 | * 299 | * num_syms: the maximum number of symbols in the code 300 | * table_bits: the number of bits with which the root table will be indexed 301 | * max_codeword_len: the maximum allowed codeword length in the code 302 | * 303 | * Given these parameters, the utility program 'enough' from zlib, when passed 304 | * the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will 305 | * compute the maximum number of entries required. This has already been done 306 | * for the combinations we need and incorporated into the macro below so that 307 | * the mapping can be done at compilation time. If an unknown combination is 308 | * used, then a compilation error will result. To fix this, use 'enough' to 309 | * find the missing value and add it below. If that still doesn't fix the 310 | * compilation error, then most likely a constraint would be violated by the 311 | * requested parameters, so they cannot be used, at least without other changes 312 | * to the decode table --- see DECODE_TABLE_SIZE(). 313 | */ 314 | #define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \ 315 | ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \ 316 | ((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \ 317 | ((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \ 318 | ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \ 319 | ((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \ 320 | ((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \ 321 | ((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \ 322 | ((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \ 323 | ((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \ 324 | ((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \ 325 | ((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \ 326 | ((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \ 327 | ((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \ 328 | ((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \ 329 | ((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \ 330 | ((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \ 331 | ((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \ 332 | ((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \ 333 | ((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \ 334 | ((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \ 335 | ((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \ 336 | ((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \ 337 | ((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \ 338 | ((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \ 339 | -1) 340 | 341 | /* Wrapper around DECODE_TABLE_ENOUGH() that does additional compile-time 342 | * validation. */ 343 | #define DECODE_TABLE_SIZE(num_syms, table_bits, max_codeword_len) ( \ 344 | \ 345 | /* All values must be positive. */ \ 346 | STATIC_ASSERT_ZERO((num_syms) > 0) + \ 347 | STATIC_ASSERT_ZERO((table_bits) > 0) + \ 348 | STATIC_ASSERT_ZERO((max_codeword_len) > 0) + \ 349 | \ 350 | /* There cannot be more symbols than possible codewords. */ \ 351 | STATIC_ASSERT_ZERO((num_syms) <= 1U << (max_codeword_len)) + \ 352 | \ 353 | /* There is no reason for the root table to be indexed with 354 | * more bits than the maximum codeword length. */ \ 355 | STATIC_ASSERT_ZERO((table_bits) <= (max_codeword_len)) + \ 356 | \ 357 | /* The maximum symbol value must fit in the 'symbol' field. */ \ 358 | STATIC_ASSERT_ZERO((num_syms) - 1 <= DECODE_TABLE_MAX_SYMBOL) + \ 359 | \ 360 | /* The maximum codeword length in the root table must fit in 361 | * the 'length' field. */ \ 362 | STATIC_ASSERT_ZERO((table_bits) <= DECODE_TABLE_MAX_LENGTH) + \ 363 | \ 364 | /* The maximum codeword length in a subtable must fit in the 365 | * 'length' field. */ \ 366 | STATIC_ASSERT_ZERO((max_codeword_len) - (table_bits) <= \ 367 | DECODE_TABLE_MAX_LENGTH) + \ 368 | \ 369 | /* The minimum subtable index must be greater than the maximum 370 | * symbol value. If this were not the case, then there would 371 | * be no way to tell whether a given root table entry is a 372 | * "subtable pointer" or not. (An alternate solution would be 373 | * to reserve a flag bit specifically for this purpose.) */ \ 374 | STATIC_ASSERT_ZERO((1U << table_bits) > (num_syms) - 1) + \ 375 | \ 376 | /* The needed 'enough' value must have been defined. */ \ 377 | STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \ 378 | (num_syms), (table_bits), \ 379 | (max_codeword_len)) > 0) + \ 380 | \ 381 | /* The maximum subtable index must fit in the 'symbol' field. */\ 382 | STATIC_ASSERT_ZERO(DECODE_TABLE_ENOUGH( \ 383 | (num_syms), (table_bits), \ 384 | (max_codeword_len)) - 1 <= \ 385 | DECODE_TABLE_MAX_SYMBOL) + \ 386 | \ 387 | /* Finally, make the macro evaluate to the needed maximum 388 | * number of decode table entries. */ \ 389 | DECODE_TABLE_ENOUGH((num_syms), (table_bits), \ 390 | (max_codeword_len)) \ 391 | ) 392 | 393 | /* 394 | * Declare the decode table for a Huffman code, given several compile-time 395 | * constants that describe the code. See DECODE_TABLE_ENOUGH() for details. 396 | * 397 | * Decode tables must be aligned to a DECODE_TABLE_ALIGNMENT-byte boundary. 398 | * This implies that if a decode table is nested inside a dynamically allocated 399 | * structure, then the outer structure must be allocated on a 400 | * DECODE_TABLE_ALIGNMENT-byte aligned boundary as well. 401 | */ 402 | #define DECODE_TABLE(name, num_syms, table_bits, max_codeword_len) \ 403 | u16 name[DECODE_TABLE_SIZE((num_syms), (table_bits), \ 404 | (max_codeword_len))] \ 405 | _aligned_attribute(DECODE_TABLE_ALIGNMENT) 406 | 407 | /* 408 | * Declare the temporary "working_space" array needed for building the decode 409 | * table for a Huffman code. 410 | */ 411 | #define DECODE_TABLE_WORKING_SPACE(name, num_syms, max_codeword_len) \ 412 | u16 name[2 * ((max_codeword_len) + 1) + (num_syms)]; 413 | 414 | extern int 415 | make_huffman_decode_table(u16 decode_table[], unsigned num_syms, 416 | unsigned table_bits, const u8 lens[], 417 | unsigned max_codeword_len, u16 working_space[]); 418 | 419 | /******************************************************************************/ 420 | /* LZ match copying */ 421 | /*----------------------------------------------------------------------------*/ 422 | 423 | static forceinline void 424 | copy_word_unaligned(const void *src, void *dst) 425 | { 426 | store_word_unaligned(load_word_unaligned(src), dst); 427 | } 428 | 429 | static forceinline machine_word_t 430 | repeat_u16(u16 b) 431 | { 432 | machine_word_t v = b; 433 | 434 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); 435 | v |= v << 16; 436 | v |= v << ((WORDBITS == 64) ? 32 : 0); 437 | return v; 438 | } 439 | 440 | static forceinline machine_word_t 441 | repeat_byte(u8 b) 442 | { 443 | return repeat_u16(((u16)b << 8) | b); 444 | } 445 | 446 | /* 447 | * Copy an LZ77 match of 'length' bytes from the match source at 'out_next - 448 | * offset' to the match destination at 'out_next'. The source and destination 449 | * may overlap. 450 | * 451 | * This handles validating the length and offset. It is validated that the 452 | * beginning of the match source is '>= out_begin' and that end of the match 453 | * destination is '<= out_end'. The return value is 0 if the match was valid 454 | * (and was copied), otherwise -1. 455 | * 456 | * 'min_length' is a hint which specifies the minimum possible match length. 457 | * This should be a compile-time constant. 458 | */ 459 | static forceinline int 460 | lz_copy(u32 length, u32 offset, u8 *out_begin, u8 *out_next, u8 *out_end, 461 | u32 min_length) 462 | { 463 | const u8 *src; 464 | u8 *end; 465 | 466 | /* Validate the offset. */ 467 | if (unlikely(offset > out_next - out_begin)) 468 | return -1; 469 | 470 | /* 471 | * Fast path: copy a match which is no longer than a few words, is not 472 | * overlapped such that copying a word at a time would produce incorrect 473 | * results, and is not too close to the end of the buffer. Note that 474 | * this might copy more than the length of the match, but that's okay in 475 | * this scenario. 476 | */ 477 | src = out_next - offset; 478 | if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES && 479 | offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES) 480 | { 481 | copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0); 482 | copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1); 483 | copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2); 484 | return 0; 485 | } 486 | 487 | /* Validate the length. This isn't needed in the fast path above, due 488 | * to the additional conditions tested, but we do need it here. */ 489 | if (unlikely(length > out_end - out_next)) 490 | return -1; 491 | end = out_next + length; 492 | 493 | /* 494 | * Try to copy one word at a time. On i386 and x86_64 this is faster 495 | * than copying one byte at a time, unless the data is near-random and 496 | * all the matches have very short lengths. Note that since this 497 | * requires unaligned memory accesses, it won't necessarily be faster on 498 | * every architecture. 499 | * 500 | * Also note that we might copy more than the length of the match. For 501 | * example, if a word is 8 bytes and the match is of length 5, then 502 | * we'll simply copy 8 bytes. This is okay as long as we don't write 503 | * beyond the end of the output buffer, hence the check for (out_end - 504 | * end >= WORDBYTES - 1). 505 | */ 506 | if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1)) 507 | { 508 | if (offset >= WORDBYTES) { 509 | /* The source and destination words don't overlap. */ 510 | do { 511 | copy_word_unaligned(src, out_next); 512 | src += WORDBYTES; 513 | out_next += WORDBYTES; 514 | } while (out_next < end); 515 | return 0; 516 | } else if (offset == 1) { 517 | /* Offset 1 matches are equivalent to run-length 518 | * encoding of the previous byte. This case is common 519 | * if the data contains many repeated bytes. */ 520 | machine_word_t v = repeat_byte(*(out_next - 1)); 521 | do { 522 | store_word_unaligned(v, out_next); 523 | src += WORDBYTES; 524 | out_next += WORDBYTES; 525 | } while (out_next < end); 526 | return 0; 527 | } 528 | /* 529 | * We don't bother with special cases for other 'offset < 530 | * WORDBYTES', which are usually rarer than 'offset == 1'. 531 | * Extra checks will just slow things down. Actually, it's 532 | * possible to handle all the 'offset < WORDBYTES' cases using 533 | * the same code, but it still becomes more complicated doesn't 534 | * seem any faster overall; it definitely slows down the more 535 | * common 'offset == 1' case. 536 | */ 537 | } 538 | 539 | /* Fall back to a bytewise copy. */ 540 | if (min_length >= 2) 541 | *out_next++ = *src++; 542 | if (min_length >= 3) 543 | *out_next++ = *src++; 544 | if (min_length >= 4) 545 | *out_next++ = *src++; 546 | do { 547 | *out_next++ = *src++; 548 | } while (out_next != end); 549 | return 0; 550 | } 551 | 552 | #endif /* _DECOMPRESS_COMMON_H */ 553 | -------------------------------------------------------------------------------- /src/lzx_common.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_common.c - Common code for LZX compression and decompression. 3 | */ 4 | 5 | /* 6 | * Copyright (C) 2012-2016 Eric Biggers 7 | * 8 | * This program is free software: you can redistribute it and/or modify it under 9 | * the terms of the GNU General Public License as published by the Free Software 10 | * Foundation, either version 2 of the License, or (at your option) any later 11 | * version. 12 | * 13 | * This program is distributed in the hope that it will be useful, but WITHOUT 14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 | * details. 17 | * 18 | * You should have received a copy of the GNU General Public License along with 19 | * this program. If not, see . 20 | */ 21 | 22 | #ifdef HAVE_CONFIG_H 23 | # include "config.h" 24 | #endif 25 | 26 | #include 27 | 28 | #ifdef __SSE2__ 29 | # include 30 | #endif 31 | 32 | #ifdef __AVX2__ 33 | # include 34 | #endif 35 | 36 | #include "common_defs.h" 37 | #include "lzx_common.h" 38 | 39 | /* Mapping: offset slot => first match offset that uses that offset slot. 40 | * The offset slots for repeat offsets map to "fake" offsets < 1. */ 41 | const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = { 42 | -2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */ 43 | 4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */ 44 | 30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */ 45 | 190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */ 46 | 1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */ 47 | 6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */ 48 | 32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */ 49 | 196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */ 50 | 786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */ 51 | 1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */ 52 | 2097150 /* extra */ 53 | }; 54 | 55 | /* Mapping: offset slot => how many extra bits must be read and added to the 56 | * corresponding offset slot base to decode the match offset. */ 57 | const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = { 58 | 0 , 0 , 0 , 0 , 1 , 59 | 1 , 2 , 2 , 3 , 3 , 60 | 4 , 4 , 5 , 5 , 6 , 61 | 6 , 7 , 7 , 8 , 8 , 62 | 9 , 9 , 10, 10, 11, 63 | 11, 12, 12, 13, 13, 64 | 14, 14, 15, 15, 16, 65 | 16, 17, 17, 17, 17, 66 | 17, 17, 17, 17, 17, 67 | 17, 17, 17, 17, 17, 68 | }; 69 | 70 | /* Round the specified buffer size up to the next valid LZX window size, and 71 | * return its order (log2). Or, if the buffer size is 0 or greater than the 72 | * largest valid LZX window size, return 0. */ 73 | unsigned 74 | lzx_get_window_order(size_t max_bufsize) 75 | { 76 | if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE) 77 | return 0; 78 | 79 | return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER); 80 | } 81 | 82 | /* Given a valid LZX window order, return the number of symbols that will exist 83 | * in the main Huffman code. */ 84 | unsigned 85 | lzx_get_num_main_syms(unsigned window_order) 86 | { 87 | /* Note: one would expect that the maximum match offset would be 88 | * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two 89 | * bytes were to match the last two bytes. However, the format 90 | * disallows this case. This reduces the number of needed offset slots 91 | * by 1. */ 92 | u32 window_size = (u32)1 << window_order; 93 | u32 max_offset = window_size - LZX_MIN_MATCH_LEN - 1; 94 | unsigned num_offset_slots = 30; 95 | while (max_offset >= lzx_offset_slot_base[num_offset_slots]) 96 | num_offset_slots++; 97 | 98 | return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS); 99 | } 100 | 101 | static void 102 | do_translate_target(void *target, s32 input_pos) 103 | { 104 | s32 abs_offset, rel_offset; 105 | 106 | rel_offset = get_unaligned_le32(target); 107 | if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) { 108 | if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) { 109 | /* "good translation" */ 110 | abs_offset = rel_offset + input_pos; 111 | } else { 112 | /* "compensating translation" */ 113 | abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE; 114 | } 115 | put_unaligned_le32(abs_offset, target); 116 | } 117 | } 118 | 119 | static void 120 | undo_translate_target(void *target, s32 input_pos) 121 | { 122 | s32 abs_offset, rel_offset; 123 | 124 | abs_offset = get_unaligned_le32(target); 125 | if (abs_offset >= 0) { 126 | if (abs_offset < LZX_WIM_MAGIC_FILESIZE) { 127 | /* "good translation" */ 128 | rel_offset = abs_offset - input_pos; 129 | put_unaligned_le32(rel_offset, target); 130 | } 131 | } else { 132 | if (abs_offset >= -input_pos) { 133 | /* "compensating translation" */ 134 | rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE; 135 | put_unaligned_le32(rel_offset, target); 136 | } 137 | } 138 | } 139 | 140 | /* 141 | * Do or undo the 'E8' preprocessing used in LZX. Before compression, the 142 | * uncompressed data is preprocessed by changing the targets of x86 CALL 143 | * instructions from relative offsets to absolute offsets. After decompression, 144 | * the translation is undone by changing the targets of x86 CALL instructions 145 | * from absolute offsets to relative offsets. 146 | * 147 | * Note that despite its intent, E8 preprocessing can be done on any data even 148 | * if it is not actually x86 machine code. In fact, E8 preprocessing appears to 149 | * always be used in LZX-compressed resources in WIM files; there is no bit to 150 | * indicate whether it is used or not, unlike in the LZX compressed format as 151 | * used in cabinet files, where a bit is reserved for that purpose. 152 | * 153 | * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data, 154 | * which really means the 5-byte call instruction cannot start in the last 10 155 | * bytes of the uncompressed data. This is one of the errors in the LZX 156 | * documentation. 157 | * 158 | * E8 preprocessing does not appear to be disabled after the 32768th chunk of a 159 | * WIM resource, which apparently is another difference from the LZX compression 160 | * used in cabinet files. 161 | * 162 | * E8 processing is supposed to take the file size as a parameter, as it is used 163 | * in calculating the translated jump targets. But in WIM files, this file size 164 | * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000). 165 | */ 166 | static void 167 | lzx_e8_filter(u8 *data, u32 size, void (*process_target)(void *, s32)) 168 | { 169 | 170 | #if !defined(__SSE2__) && !defined(__AVX2__) 171 | /* 172 | * A worthwhile optimization is to push the end-of-buffer check into the 173 | * relatively rare E8 case. This is possible if we replace the last six 174 | * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte 175 | * before reaching end-of-buffer. In addition, this scheme guarantees 176 | * that no translation can begin following an E8 byte in the last 10 177 | * bytes because a 4-byte offset containing E8 as its high byte is a 178 | * large negative number that is not valid for translation. That is 179 | * exactly what we need. 180 | */ 181 | u8 *tail; 182 | u8 saved_bytes[6]; 183 | u8 *p; 184 | 185 | if (size <= 10) 186 | return; 187 | 188 | tail = &data[size - 6]; 189 | memcpy(saved_bytes, tail, 6); 190 | memset(tail, 0xE8, 6); 191 | p = data; 192 | for (;;) { 193 | while (*p != 0xE8) 194 | p++; 195 | if (p >= tail) 196 | break; 197 | (*process_target)(p + 1, p - data); 198 | p += 5; 199 | } 200 | memcpy(tail, saved_bytes, 6); 201 | #else 202 | /* SSE2 or AVX-2 optimized version for x86_64 */ 203 | 204 | u8 *p = data; 205 | u64 valid_mask = ~0; 206 | 207 | if (size <= 10) 208 | return; 209 | #ifdef __AVX2__ 210 | # define ALIGNMENT_REQUIRED 32 211 | #else 212 | # define ALIGNMENT_REQUIRED 16 213 | #endif 214 | 215 | /* Process one byte at a time until the pointer is properly aligned. */ 216 | while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) { 217 | if (p >= data + size - 10) 218 | return; 219 | if (*p == 0xE8 && (valid_mask & 1)) { 220 | (*process_target)(p + 1, p - data); 221 | valid_mask &= ~0x1F; 222 | } 223 | p++; 224 | valid_mask >>= 1; 225 | valid_mask |= (u64)1 << 63; 226 | } 227 | 228 | if (data + size - p >= 64) { 229 | 230 | /* Vectorized processing */ 231 | 232 | /* Note: we use a "trap" E8 byte to eliminate the need to check 233 | * for end-of-buffer in the inner loop. This byte is carefully 234 | * positioned so that it will never be changed by a previous 235 | * translation before it is detected. */ 236 | 237 | u8 *trap = p + ((data + size - p) & ~31) - 32 + 4; 238 | u8 saved_byte = *trap; 239 | *trap = 0xE8; 240 | 241 | for (;;) { 242 | u32 e8_mask; 243 | u8 *orig_p = p; 244 | #ifdef __AVX2__ 245 | const __m256i e8_bytes = _mm256_set1_epi8(0xE8); 246 | for (;;) { 247 | __m256i bytes = *(const __m256i *)p; 248 | __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes); 249 | e8_mask = _mm256_movemask_epi8(cmpresult); 250 | if (e8_mask) 251 | break; 252 | p += 32; 253 | } 254 | #else 255 | const __m128i e8_bytes = _mm_set1_epi8(0xE8); 256 | for (;;) { 257 | /* Read the next 32 bytes of data and test them 258 | * for E8 bytes. */ 259 | __m128i bytes1 = *(const __m128i *)p; 260 | __m128i bytes2 = *(const __m128i *)(p + 16); 261 | __m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes); 262 | __m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes); 263 | u32 mask1 = _mm_movemask_epi8(cmpresult1); 264 | u32 mask2 = _mm_movemask_epi8(cmpresult2); 265 | /* The masks have a bit set for each E8 byte. 266 | * We stay in this fast inner loop as long as 267 | * there are no E8 bytes. */ 268 | if (mask1 | mask2) { 269 | e8_mask = mask1 | (mask2 << 16); 270 | break; 271 | } 272 | p += 32; 273 | } 274 | #endif 275 | 276 | /* Did we pass over data with no E8 bytes? */ 277 | if (p != orig_p) 278 | valid_mask = ~0; 279 | 280 | /* Are we nearing end-of-buffer? */ 281 | if (p == trap - 4) 282 | break; 283 | 284 | /* Process the E8 bytes. However, the AND with 285 | * 'valid_mask' ensures we never process an E8 byte that 286 | * was itself part of a translation target. */ 287 | while ((e8_mask &= valid_mask)) { 288 | unsigned bit = bsf32(e8_mask); 289 | (*process_target)(p + bit + 1, p + bit - data); 290 | valid_mask &= ~((u64)0x1F << bit); 291 | } 292 | 293 | valid_mask >>= 32; 294 | valid_mask |= 0xFFFFFFFF00000000; 295 | p += 32; 296 | } 297 | 298 | *trap = saved_byte; 299 | } 300 | 301 | /* Approaching the end of the buffer; process one byte a time. */ 302 | while (p < data + size - 10) { 303 | if (*p == 0xE8 && (valid_mask & 1)) { 304 | (*process_target)(p + 1, p - data); 305 | valid_mask &= ~0x1F; 306 | } 307 | p++; 308 | valid_mask >>= 1; 309 | valid_mask |= (u64)1 << 63; 310 | } 311 | #endif /* __SSE2__ || __AVX2__ */ 312 | } 313 | 314 | void 315 | lzx_preprocess(u8 *data, u32 size) 316 | { 317 | lzx_e8_filter(data, size, do_translate_target); 318 | } 319 | 320 | void 321 | lzx_postprocess(u8 *data, u32 size) 322 | { 323 | lzx_e8_filter(data, size, undo_translate_target); 324 | } 325 | -------------------------------------------------------------------------------- /src/lzx_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_common.h 3 | * 4 | * Declarations shared between LZX compression and decompression. 5 | */ 6 | 7 | #ifndef _LZX_COMMON_H 8 | #define _LZX_COMMON_H 9 | 10 | #include "lzx_constants.h" 11 | #include "common_defs.h" 12 | 13 | extern const s32 lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1]; 14 | 15 | extern const u8 lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; 16 | 17 | extern unsigned 18 | lzx_get_window_order(size_t max_bufsize); 19 | 20 | extern unsigned 21 | lzx_get_num_main_syms(unsigned window_order); 22 | 23 | extern void 24 | lzx_preprocess(u8 *data, u32 size); 25 | 26 | extern void 27 | lzx_postprocess(u8 *data, u32 size); 28 | 29 | #endif /* _LZX_COMMON_H */ 30 | -------------------------------------------------------------------------------- /src/lzx_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_constants.h 3 | * 4 | * Constants for the LZX compression format. 5 | */ 6 | 7 | #ifndef _LZX_CONSTANTS_H 8 | #define _LZX_CONSTANTS_H 9 | 10 | /* Number of literal byte values. */ 11 | #define LZX_NUM_CHARS 256 12 | 13 | /* The smallest and largest allowed match lengths. */ 14 | #define LZX_MIN_MATCH_LEN 2 15 | #define LZX_MAX_MATCH_LEN 257 16 | 17 | /* Number of distinct match lengths that can be represented. */ 18 | #define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) 19 | 20 | /* Number of match lengths for which no length symbol is required. */ 21 | #define LZX_NUM_PRIMARY_LENS 7 22 | #define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1) 23 | 24 | /* Valid values of the 3-bit block type field. */ 25 | #define LZX_BLOCKTYPE_VERBATIM 1 26 | #define LZX_BLOCKTYPE_ALIGNED 2 27 | #define LZX_BLOCKTYPE_UNCOMPRESSED 3 28 | 29 | /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum 30 | * sizes of the sliding window. */ 31 | #define LZX_MIN_WINDOW_ORDER 15 32 | #define LZX_MAX_WINDOW_ORDER 21 33 | #define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */ 34 | #define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */ 35 | 36 | /* Maximum number of offset slots. (The actual number of offset slots depends 37 | * on the window size.) */ 38 | #define LZX_MAX_OFFSET_SLOTS 50 39 | 40 | /* Maximum number of symbols in the main code. (The actual number of symbols in 41 | * the main code depends on the window size.) */ 42 | #define LZX_MAINCODE_MAX_NUM_SYMBOLS \ 43 | (LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS)) 44 | 45 | /* Number of symbols in the length code. */ 46 | #define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS) 47 | 48 | /* Number of symbols in the pre-code. */ 49 | #define LZX_PRECODE_NUM_SYMBOLS 20 50 | 51 | /* Number of bits in which each pre-code codeword length is represented. */ 52 | #define LZX_PRECODE_ELEMENT_SIZE 4 53 | 54 | /* Number of low-order bits of each match offset that are entropy-encoded in 55 | * aligned offset blocks. */ 56 | #define LZX_NUM_ALIGNED_OFFSET_BITS 3 57 | 58 | /* Number of symbols in the aligned offset code. */ 59 | #define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS) 60 | 61 | /* Mask for the match offset bits that are entropy-encoded in aligned offset 62 | * blocks. */ 63 | #define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1) 64 | 65 | /* Number of bits in which each aligned offset codeword length is represented. */ 66 | #define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 67 | 68 | /* The first offset slot which requires an aligned offset symbol in aligned 69 | * offset blocks. */ 70 | #define LZX_MIN_ALIGNED_OFFSET_SLOT 8 71 | 72 | /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */ 73 | #define LZX_MIN_ALIGNED_OFFSET 14 74 | 75 | /* The maximum number of extra offset bits in verbatim blocks. (One would need 76 | * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset 77 | * bits in *aligned* blocks.) */ 78 | #define LZX_MAX_NUM_EXTRA_BITS 17 79 | 80 | /* Maximum lengths (in bits) for length-limited Huffman code construction. */ 81 | #define LZX_MAX_MAIN_CODEWORD_LEN 16 82 | #define LZX_MAX_LEN_CODEWORD_LEN 16 83 | #define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1) 84 | #define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1) 85 | 86 | /* For LZX-compressed blocks in WIM resources, this value is always used as the 87 | * filesize parameter for the call instruction (0xe8 byte) preprocessing, even 88 | * though the blocks themselves are not this size, and the size of the actual 89 | * file resource in the WIM file is very likely to be something entirely 90 | * different as well. */ 91 | #define LZX_WIM_MAGIC_FILESIZE 12000000 92 | 93 | /* Assumed LZX block size when the encoded block size begins with a 0 bit. 94 | * This is probably WIM-specific. */ 95 | #define LZX_DEFAULT_BLOCK_SIZE 32768 96 | 97 | /* Number of offsets in the recent (or "repeat") offsets queue. */ 98 | #define LZX_NUM_RECENT_OFFSETS 3 99 | 100 | /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */ 101 | #define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1) 102 | 103 | #endif /* _LZX_CONSTANTS_H */ 104 | -------------------------------------------------------------------------------- /src/lzx_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * lzx_decompress.c 3 | * 4 | * A decompressor for the LZX compression format, as used in WIM files. 5 | */ 6 | 7 | /* 8 | * Copyright (C) 2012-2016 Eric Biggers 9 | * 10 | * This program is free software: you can redistribute it and/or modify it under 11 | * the terms of the GNU General Public License as published by the Free Software 12 | * Foundation, either version 2 of the License, or (at your option) any later 13 | * version. 14 | * 15 | * This program is distributed in the hope that it will be useful, but WITHOUT 16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 18 | * details. 19 | * 20 | * You should have received a copy of the GNU General Public License along with 21 | * this program. If not, see . 22 | */ 23 | 24 | /* 25 | * LZX is an LZ77 and Huffman-code based compression format that has many 26 | * similarities to DEFLATE (the format used by zlib/gzip). The compression 27 | * ratio is as good or better than DEFLATE. See lzx_compress.c for a format 28 | * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a 29 | * historical overview. Here I make some pragmatic notes. 30 | * 31 | * The old specification for LZX is the document "Microsoft LZX Data Compression 32 | * Format" (1997). It defines the LZX format as used in cabinet files. Allowed 33 | * window sizes are 2^n where 15 <= n <= 21. However, this document contains 34 | * several errors, so don't read too much into it... 35 | * 36 | * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA 37 | * Compression and Decompression" (2014). It defines the LZX format as used by 38 | * Microsoft's binary patcher. It corrects several errors in the 1997 document 39 | * and extends the format in several ways --- namely, optional reference data, 40 | * up to 2^25 byte windows, and longer match lengths. 41 | * 42 | * WIM files use a more restricted form of LZX. No LZX DELTA extensions are 43 | * present, the window is not "sliding", E8 preprocessing is done 44 | * unconditionally with a fixed file size, and the maximum window size is always 45 | * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource). 46 | * This code is primarily intended to implement this form of LZX. But although 47 | * not compatible with WIMGAPI, this code also supports maximum window sizes up 48 | * to 2^21 bytes. 49 | * 50 | * TODO: Add support for window sizes up to 2^25 bytes. 51 | */ 52 | 53 | #ifdef HAVE_CONFIG_H 54 | # include "config.h" 55 | #endif 56 | 57 | #include 58 | 59 | #include "decompress_common.h" 60 | #include "lzx_common.h" 61 | #include "system_compression.h" 62 | 63 | /* These values are chosen for fast decompression. */ 64 | #define LZX_MAINCODE_TABLEBITS 11 65 | #define LZX_LENCODE_TABLEBITS 9 66 | #define LZX_PRECODE_TABLEBITS 6 67 | #define LZX_ALIGNEDCODE_TABLEBITS 7 68 | 69 | #define LZX_READ_LENS_MAX_OVERRUN 50 70 | 71 | struct lzx_decompressor { 72 | 73 | DECODE_TABLE(maincode_decode_table, LZX_MAINCODE_MAX_NUM_SYMBOLS, 74 | LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); 75 | u8 maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; 76 | 77 | DECODE_TABLE(lencode_decode_table, LZX_LENCODE_NUM_SYMBOLS, 78 | LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); 79 | u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; 80 | 81 | union { 82 | DECODE_TABLE(alignedcode_decode_table, LZX_ALIGNEDCODE_NUM_SYMBOLS, 83 | LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); 84 | u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; 85 | }; 86 | 87 | union { 88 | DECODE_TABLE(precode_decode_table, LZX_PRECODE_NUM_SYMBOLS, 89 | LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); 90 | u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; 91 | u8 extra_offset_bits[LZX_MAX_OFFSET_SLOTS]; 92 | }; 93 | 94 | union { 95 | DECODE_TABLE_WORKING_SPACE(maincode_working_space, 96 | LZX_MAINCODE_MAX_NUM_SYMBOLS, 97 | LZX_MAX_MAIN_CODEWORD_LEN); 98 | DECODE_TABLE_WORKING_SPACE(lencode_working_space, 99 | LZX_LENCODE_NUM_SYMBOLS, 100 | LZX_MAX_LEN_CODEWORD_LEN); 101 | DECODE_TABLE_WORKING_SPACE(alignedcode_working_space, 102 | LZX_ALIGNEDCODE_NUM_SYMBOLS, 103 | LZX_MAX_ALIGNED_CODEWORD_LEN); 104 | DECODE_TABLE_WORKING_SPACE(precode_working_space, 105 | LZX_PRECODE_NUM_SYMBOLS, 106 | LZX_MAX_PRE_CODEWORD_LEN); 107 | }; 108 | 109 | unsigned window_order; 110 | unsigned num_main_syms; 111 | 112 | /* Like lzx_extra_offset_bits[], but does not include the entropy-coded 113 | * bits of aligned offset blocks */ 114 | u8 extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS]; 115 | 116 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT); 117 | 118 | /* Read a Huffman-encoded symbol using the precode. */ 119 | static forceinline unsigned 120 | read_presym(const struct lzx_decompressor *d, struct input_bitstream *is) 121 | { 122 | return read_huffsym(is, d->precode_decode_table, 123 | LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); 124 | } 125 | 126 | /* Read a Huffman-encoded symbol using the main code. */ 127 | static forceinline unsigned 128 | read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is) 129 | { 130 | return read_huffsym(is, d->maincode_decode_table, 131 | LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); 132 | } 133 | 134 | /* Read a Huffman-encoded symbol using the length code. */ 135 | static forceinline unsigned 136 | read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is) 137 | { 138 | return read_huffsym(is, d->lencode_decode_table, 139 | LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); 140 | } 141 | 142 | /* Read a Huffman-encoded symbol using the aligned offset code. */ 143 | static forceinline unsigned 144 | read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is) 145 | { 146 | return read_huffsym(is, d->alignedcode_decode_table, 147 | LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN); 148 | } 149 | 150 | /* 151 | * Read a precode from the compressed input bitstream, then use it to decode 152 | * @num_lens codeword length values and write them to @lens. 153 | */ 154 | static int 155 | lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is, 156 | u8 *lens, unsigned num_lens) 157 | { 158 | u8 *len_ptr = lens; 159 | u8 *lens_end = lens + num_lens; 160 | 161 | /* Read the lengths of the precode codewords. These are stored 162 | * explicitly. */ 163 | for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) { 164 | d->precode_lens[i] = 165 | bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE); 166 | } 167 | 168 | /* Build the decoding table for the precode. */ 169 | if (make_huffman_decode_table(d->precode_decode_table, 170 | LZX_PRECODE_NUM_SYMBOLS, 171 | LZX_PRECODE_TABLEBITS, 172 | d->precode_lens, 173 | LZX_MAX_PRE_CODEWORD_LEN, 174 | d->precode_working_space)) 175 | return -1; 176 | 177 | /* Decode the codeword lengths. */ 178 | do { 179 | unsigned presym; 180 | u8 len; 181 | 182 | /* Read the next precode symbol. */ 183 | presym = read_presym(d, is); 184 | if (presym < 17) { 185 | /* Difference from old length */ 186 | len = *len_ptr - presym; 187 | if ((s8)len < 0) 188 | len += 17; 189 | *len_ptr++ = len; 190 | } else { 191 | /* Special RLE values */ 192 | 193 | unsigned run_len; 194 | 195 | if (presym == 17) { 196 | /* Run of 0's */ 197 | run_len = 4 + bitstream_read_bits(is, 4); 198 | len = 0; 199 | } else if (presym == 18) { 200 | /* Longer run of 0's */ 201 | run_len = 20 + bitstream_read_bits(is, 5); 202 | len = 0; 203 | } else { 204 | /* Run of identical lengths */ 205 | run_len = 4 + bitstream_read_bits(is, 1); 206 | presym = read_presym(d, is); 207 | if (unlikely(presym > 17)) 208 | return -1; 209 | len = *len_ptr - presym; 210 | if ((s8)len < 0) 211 | len += 17; 212 | } 213 | 214 | do { 215 | *len_ptr++ = len; 216 | } while (--run_len); 217 | /* 218 | * The worst case overrun is when presym == 18, 219 | * run_len == 20 + 31, and only 1 length was remaining. 220 | * So LZX_READ_LENS_MAX_OVERRUN == 50. 221 | * 222 | * Overrun while reading the first half of maincode_lens 223 | * can corrupt the previous values in the second half. 224 | * This doesn't really matter because the resulting 225 | * lengths will still be in range, and data that 226 | * generates overruns is invalid anyway. 227 | */ 228 | } 229 | } while (len_ptr < lens_end); 230 | 231 | return 0; 232 | } 233 | 234 | /* 235 | * Read the header of an LZX block. For all block types, the block type and 236 | * size is saved in *block_type_ret and *block_size_ret, respectively. For 237 | * compressed blocks, the codeword lengths are also saved. For uncompressed 238 | * blocks, the recent offsets queue is also updated. 239 | */ 240 | static int 241 | lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is, 242 | u32 recent_offsets[], int *block_type_ret, 243 | u32 *block_size_ret) 244 | { 245 | int block_type; 246 | u32 block_size; 247 | 248 | bitstream_ensure_bits(is, 4); 249 | 250 | /* Read the block type. */ 251 | block_type = bitstream_pop_bits(is, 3); 252 | 253 | /* Read the block size. */ 254 | if (bitstream_pop_bits(is, 1)) { 255 | block_size = LZX_DEFAULT_BLOCK_SIZE; 256 | } else { 257 | block_size = bitstream_read_bits(is, 16); 258 | if (d->window_order >= 16) { 259 | block_size <<= 8; 260 | block_size |= bitstream_read_bits(is, 8); 261 | } 262 | } 263 | 264 | switch (block_type) { 265 | 266 | case LZX_BLOCKTYPE_ALIGNED: 267 | 268 | /* Read the aligned offset codeword lengths. */ 269 | 270 | for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { 271 | d->alignedcode_lens[i] = 272 | bitstream_read_bits(is, 273 | LZX_ALIGNEDCODE_ELEMENT_SIZE); 274 | } 275 | 276 | /* Fall though, since the rest of the header for aligned offset 277 | * blocks is the same as that for verbatim blocks. */ 278 | 279 | case LZX_BLOCKTYPE_VERBATIM: 280 | 281 | /* Read the main codeword lengths, which are divided into two 282 | * parts: literal symbols and match headers. */ 283 | 284 | if (lzx_read_codeword_lens(d, is, d->maincode_lens, 285 | LZX_NUM_CHARS)) 286 | return -1; 287 | 288 | if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS, 289 | d->num_main_syms - LZX_NUM_CHARS)) 290 | return -1; 291 | 292 | 293 | /* Read the length codeword lengths. */ 294 | 295 | if (lzx_read_codeword_lens(d, is, d->lencode_lens, 296 | LZX_LENCODE_NUM_SYMBOLS)) 297 | return -1; 298 | 299 | break; 300 | 301 | case LZX_BLOCKTYPE_UNCOMPRESSED: 302 | /* 303 | * The header of an uncompressed block contains new values for 304 | * the recent offsets queue, starting on the next 16-bit 305 | * boundary in the bitstream. Careful: if the stream is 306 | * *already* aligned, the correct thing to do is to throw away 307 | * the next 16 bits (this is probably a mistake in the format). 308 | */ 309 | bitstream_ensure_bits(is, 1); 310 | bitstream_align(is); 311 | recent_offsets[0] = bitstream_read_u32(is); 312 | recent_offsets[1] = bitstream_read_u32(is); 313 | recent_offsets[2] = bitstream_read_u32(is); 314 | 315 | /* Offsets of 0 are invalid. */ 316 | if (recent_offsets[0] == 0 || recent_offsets[1] == 0 || 317 | recent_offsets[2] == 0) 318 | return -1; 319 | break; 320 | 321 | default: 322 | /* Unrecognized block type. */ 323 | return -1; 324 | } 325 | 326 | *block_type_ret = block_type; 327 | *block_size_ret = block_size; 328 | return 0; 329 | } 330 | 331 | /* Decompress a block of LZX-compressed data. */ 332 | static int 333 | lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is, 334 | int block_type, u32 block_size, 335 | u8 * const out_begin, u8 *out_next, u32 recent_offsets[]) 336 | { 337 | u8 * const block_end = out_next + block_size; 338 | unsigned min_aligned_offset_slot; 339 | 340 | /* 341 | * Build the Huffman decode tables. We always need to build the main 342 | * and length decode tables. For aligned blocks we additionally need to 343 | * build the aligned offset decode table. 344 | */ 345 | 346 | if (make_huffman_decode_table(d->maincode_decode_table, 347 | d->num_main_syms, 348 | LZX_MAINCODE_TABLEBITS, 349 | d->maincode_lens, 350 | LZX_MAX_MAIN_CODEWORD_LEN, 351 | d->maincode_working_space)) 352 | return -1; 353 | 354 | if (make_huffman_decode_table(d->lencode_decode_table, 355 | LZX_LENCODE_NUM_SYMBOLS, 356 | LZX_LENCODE_TABLEBITS, 357 | d->lencode_lens, 358 | LZX_MAX_LEN_CODEWORD_LEN, 359 | d->lencode_working_space)) 360 | return -1; 361 | 362 | if (block_type == LZX_BLOCKTYPE_ALIGNED) { 363 | if (make_huffman_decode_table(d->alignedcode_decode_table, 364 | LZX_ALIGNEDCODE_NUM_SYMBOLS, 365 | LZX_ALIGNEDCODE_TABLEBITS, 366 | d->alignedcode_lens, 367 | LZX_MAX_ALIGNED_CODEWORD_LEN, 368 | d->alignedcode_working_space)) 369 | return -1; 370 | min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; 371 | memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned, 372 | sizeof(lzx_extra_offset_bits)); 373 | } else { 374 | min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS; 375 | memcpy(d->extra_offset_bits, lzx_extra_offset_bits, 376 | sizeof(lzx_extra_offset_bits)); 377 | } 378 | 379 | /* Decode the literals and matches. */ 380 | 381 | do { 382 | unsigned mainsym; 383 | unsigned length; 384 | u32 offset; 385 | unsigned offset_slot; 386 | 387 | mainsym = read_mainsym(d, is); 388 | if (mainsym < LZX_NUM_CHARS) { 389 | /* Literal */ 390 | *out_next++ = mainsym; 391 | continue; 392 | } 393 | 394 | /* Match */ 395 | 396 | /* Decode the length header and offset slot. */ 397 | STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0); 398 | length = mainsym % LZX_NUM_LEN_HEADERS; 399 | offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS; 400 | 401 | /* If needed, read a length symbol to decode the full length. */ 402 | if (length == LZX_NUM_PRIMARY_LENS) 403 | length += read_lensym(d, is); 404 | length += LZX_MIN_MATCH_LEN; 405 | 406 | if (offset_slot < LZX_NUM_RECENT_OFFSETS) { 407 | /* Repeat offset */ 408 | 409 | /* Note: This isn't a real LRU queue, since using the R2 410 | * offset doesn't bump the R1 offset down to R2. */ 411 | offset = recent_offsets[offset_slot]; 412 | recent_offsets[offset_slot] = recent_offsets[0]; 413 | } else { 414 | /* Explicit offset */ 415 | offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]); 416 | if (offset_slot >= min_aligned_offset_slot) { 417 | offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) | 418 | read_alignedsym(d, is); 419 | } 420 | offset += lzx_offset_slot_base[offset_slot]; 421 | 422 | /* Update the match offset LRU queue. */ 423 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); 424 | recent_offsets[2] = recent_offsets[1]; 425 | recent_offsets[1] = recent_offsets[0]; 426 | } 427 | recent_offsets[0] = offset; 428 | 429 | /* Validate the match and copy it to the current position. */ 430 | if (unlikely(lz_copy(length, offset, out_begin, 431 | out_next, block_end, LZX_MIN_MATCH_LEN))) 432 | return -1; 433 | out_next += length; 434 | } while (out_next != block_end); 435 | 436 | return 0; 437 | } 438 | 439 | int 440 | lzx_decompress(struct lzx_decompressor *restrict d, 441 | const void *restrict compressed_data, size_t compressed_size, 442 | void *restrict uncompressed_data, size_t uncompressed_size) 443 | { 444 | u8 * const out_begin = uncompressed_data; 445 | u8 *out_next = out_begin; 446 | u8 * const out_end = out_begin + uncompressed_size; 447 | struct input_bitstream is; 448 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3); 449 | u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1}; 450 | unsigned may_have_e8_byte = 0; 451 | 452 | init_input_bitstream(&is, compressed_data, compressed_size); 453 | 454 | /* Codeword lengths begin as all 0's for delta encoding purposes. */ 455 | memset(d->maincode_lens, 0, d->num_main_syms); 456 | memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); 457 | 458 | /* Decompress blocks until we have all the uncompressed data. */ 459 | 460 | while (out_next != out_end) { 461 | int block_type; 462 | u32 block_size; 463 | 464 | if (lzx_read_block_header(d, &is, recent_offsets, 465 | &block_type, &block_size)) 466 | return -1; 467 | 468 | if (block_size < 1 || block_size > out_end - out_next) 469 | return -1; 470 | 471 | if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) { 472 | 473 | /* Compressed block */ 474 | if (lzx_decompress_block(d, &is, block_type, block_size, 475 | out_begin, out_next, 476 | recent_offsets)) 477 | return -1; 478 | 479 | /* If the first E8 byte was in this block, then it must 480 | * have been encoded as a literal using mainsym E8. */ 481 | may_have_e8_byte |= d->maincode_lens[0xE8]; 482 | } else { 483 | 484 | /* Uncompressed block */ 485 | if (bitstream_read_bytes(&is, out_next, block_size)) 486 | return -1; 487 | 488 | /* Re-align the bitstream if needed. */ 489 | if (block_size & 1) 490 | bitstream_read_byte(&is); 491 | 492 | /* There may have been an E8 byte in the block. */ 493 | may_have_e8_byte = 1; 494 | } 495 | out_next += block_size; 496 | } 497 | 498 | /* Postprocess the data unless it cannot possibly contain E8 bytes. */ 499 | if (may_have_e8_byte) 500 | lzx_postprocess(uncompressed_data, uncompressed_size); 501 | 502 | return 0; 503 | } 504 | 505 | struct lzx_decompressor * 506 | lzx_allocate_decompressor(size_t max_block_size) 507 | { 508 | unsigned window_order; 509 | struct lzx_decompressor *d; 510 | 511 | window_order = lzx_get_window_order(max_block_size); 512 | if (window_order == 0) { 513 | errno = EINVAL; 514 | return NULL; 515 | } 516 | 517 | d = aligned_malloc(sizeof(*d), DECODE_TABLE_ALIGNMENT); 518 | if (!d) 519 | return NULL; 520 | 521 | d->window_order = window_order; 522 | d->num_main_syms = lzx_get_num_main_syms(window_order); 523 | 524 | /* Initialize 'd->extra_offset_bits_minus_aligned'. */ 525 | STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) == 526 | sizeof(lzx_extra_offset_bits)); 527 | STATIC_ASSERT(sizeof(d->extra_offset_bits) == 528 | sizeof(lzx_extra_offset_bits)); 529 | memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits, 530 | sizeof(lzx_extra_offset_bits)); 531 | for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT; 532 | offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++) 533 | { 534 | d->extra_offset_bits_minus_aligned[offset_slot] -= 535 | LZX_NUM_ALIGNED_OFFSET_BITS; 536 | } 537 | 538 | return d; 539 | } 540 | 541 | void 542 | lzx_free_decompressor(struct lzx_decompressor *d) 543 | { 544 | aligned_free(d); 545 | } 546 | -------------------------------------------------------------------------------- /src/plugin.c: -------------------------------------------------------------------------------- 1 | /* 2 | * plugin.c - NTFS-3G system compression plugin 3 | * 4 | * Copyright (C) 2015 Jean-Pierre Andre 5 | * Copyright (C) 2015-2016 Eric Biggers 6 | * 7 | * This program is free software: you can redistribute it and/or modify it under 8 | * the terms of the GNU General Public License as published by the Free Software 9 | * Foundation, either version 2 of the License, or (at your option) any later 10 | * version. 11 | * 12 | * This program is distributed in the hope that it will be useful, but WITHOUT 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 15 | * details. 16 | * 17 | * You should have received a copy of the GNU General Public License along with 18 | * this program. If not, see . 19 | */ 20 | #include "config.h" 21 | 22 | /* Although fuse.h is only needed for 'struct fuse_file_info', we still need to 23 | * request a specific FUSE API version. (It's required on FreeBSD, and it's 24 | * probably a good idea to request the same version used by NTFS-3G anyway.) */ 25 | #define FUSE_USE_VERSION 26 26 | #include 27 | 28 | #ifdef HAVE_ERRNO_H 29 | #include 30 | #endif 31 | 32 | #include 33 | #include 34 | 35 | #include "system_compression.h" 36 | 37 | /* 38 | * For each open file description for a system-compressed file, we cache an 39 | * ntfs_system_decompression_ctx for the file in the FUSE file handle. 40 | * 41 | * A decompression context includes a decompressor, cached data, and cached 42 | * metadata. It does not include an open ntfs_inode for the file or an open 43 | * ntfs_attr for the file's compressed stream. This is necessary because 44 | * NTFS-3G is not guaranteed to keep the inode open the whole time the file is 45 | * open. Indeed, NTFS-3G may close an inode after a read request and re-open it 46 | * for the next one, though it does maintain an open inode cache. 47 | * 48 | * As a result of the decompression context caching, the results of reads from a 49 | * system-compressed file that has been written to since being opened for 50 | * reading are unspecified. Stale data might be returned. Currently, this 51 | * doesn't matter because this plugin blocks writes to system-compressed files. 52 | * (It might still be possible for adventurous users to play with the 53 | * WofCompressedData named data stream directly.) 54 | */ 55 | #define DECOMPRESSION_CTX(fi) \ 56 | ((struct ntfs_system_decompression_ctx *)(uintptr_t)((fi)->fh)) 57 | 58 | static int compressed_getattr(ntfs_inode *ni, const REPARSE_POINT *reparse, 59 | struct stat *stbuf) 60 | { 61 | s64 compressed_size = ntfs_get_system_compressed_file_size(ni, reparse); 62 | 63 | if (compressed_size >= 0) { 64 | /* System-compressed file */ 65 | stbuf->st_size = ni->data_size; 66 | stbuf->st_blocks = (compressed_size + 511) >> 9; 67 | stbuf->st_mode = S_IFREG | 0555; 68 | return 0; 69 | } 70 | 71 | /* Not a system compressed file, or another error occurred */ 72 | return -errno; 73 | } 74 | 75 | static int compressed_open(ntfs_inode *ni, const REPARSE_POINT *reparse, 76 | struct fuse_file_info *fi) 77 | { 78 | struct ntfs_system_decompression_ctx *dctx; 79 | 80 | if ((fi->flags & O_ACCMODE) != O_RDONLY) 81 | return -EOPNOTSUPP; 82 | 83 | dctx = ntfs_open_system_decompression_ctx(ni, reparse); 84 | if (!dctx) 85 | return -errno; 86 | 87 | fi->fh = (uintptr_t)dctx; 88 | return 0; 89 | } 90 | 91 | static int compressed_release(ntfs_inode *ni __attribute__((unused)), 92 | const REPARSE_POINT *reparse __attribute__((unused)), 93 | struct fuse_file_info *fi) 94 | { 95 | ntfs_close_system_decompression_ctx(DECOMPRESSION_CTX(fi)); 96 | return 0; 97 | } 98 | 99 | static int compressed_read(ntfs_inode *ni, const REPARSE_POINT *reparse, 100 | char *buf, size_t size, off_t offset, 101 | struct fuse_file_info *fi) 102 | { 103 | ssize_t res; 104 | 105 | res = ntfs_read_system_compressed_data(DECOMPRESSION_CTX(fi), ni, 106 | offset, size, buf); 107 | if (res < 0) 108 | return -errno; 109 | return res; 110 | } 111 | 112 | static const struct plugin_operations ops = { 113 | .getattr = compressed_getattr, 114 | .open = compressed_open, 115 | .release = compressed_release, 116 | .read = compressed_read, 117 | }; 118 | 119 | const struct plugin_operations *init(le32 tag) 120 | { 121 | if (tag == IO_REPARSE_TAG_WOF) 122 | return &ops; 123 | errno = EINVAL; 124 | return NULL; 125 | } 126 | -------------------------------------------------------------------------------- /src/system_compression.c: -------------------------------------------------------------------------------- 1 | /* 2 | * system_compression.c - Support for reading System Compressed files 3 | * 4 | * Copyright (C) 2015-2016 Eric Biggers 5 | * 6 | * This program is free software: you can redistribute it and/or modify it under 7 | * the terms of the GNU General Public License as published by the Free Software 8 | * Foundation, either version 2 of the License, or (at your option) any later 9 | * version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 14 | * details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | /* 21 | * Windows 10 introduced a new filesystem compression feature: System 22 | * Compression, also called "Compact OS". The feature allows rarely modified 23 | * files to be compressed more heavily than is possible with regular NTFS 24 | * compression (which uses the LZNT1 algorithm with 4096-byte chunks). 25 | * System-compressed files can only be read, not written; on Windows, if a 26 | * program attempts to write to such a file, it is automatically decompressed 27 | * and turned into an ordinary uncompressed file. 28 | * 29 | * Rather than building it directly into NTFS, Microsoft implemented this new 30 | * compression mode using the Windows Overlay Filesystem (WOF) filter driver 31 | * that was added in Windows 8.1. A system-compressed file contains the 32 | * following NTFS attributes: 33 | * 34 | * - A reparse point attribute in the format WOF_FILE_PROVIDER_REPARSE_POINT_V1, 35 | * documented below 36 | * - A sparse unnamed data attribute, containing all zero bytes, with data size 37 | * equal to the uncompressed file size 38 | * - A data attribute named "WofCompressedData" containing the compressed data 39 | * of the file. 40 | * 41 | * The compressed data contains a series of chunks, each of which decompresses 42 | * to a known size determined by the compression format specified in the reparse 43 | * point. The last chunk can be an exception, since it decompresses to whatever 44 | * size remains. Chunks that did not compress to less than their original size 45 | * are stored uncompressed. The compressed chunks are concatenated in order and 46 | * are prefixed by a table of 4-byte (for files < 4 GiB in size uncompressed) or 47 | * 8-byte (for files >= 4 GiB in size uncompressed) little endian numbers which 48 | * give the offset of each compressed chunk from the end of the table. Since 49 | * every chunk can be decompressed independently and its location can be 50 | * discovered from the chunk offset table, "random access" reads are possible 51 | * with chunk granularity. Writes are not possible, in general, without 52 | * rewriting the entire file. 53 | */ 54 | 55 | #ifdef HAVE_CONFIG_H 56 | #include "config.h" 57 | #endif 58 | 59 | #include 60 | #include 61 | #include 62 | #include 63 | 64 | #include 65 | #include 66 | #include 67 | 68 | #include "system_compression.h" 69 | 70 | /******************************************************************************/ 71 | 72 | /* Known values of the WOF protocol / reparse point format */ 73 | typedef enum { 74 | WOF_CURRENT_VERSION = const_cpu_to_le32(1), 75 | } WOF_VERSION; 76 | 77 | /* Known WOF providers */ 78 | typedef enum { 79 | /* WIM backing provider ("WIMBoot") */ 80 | WOF_PROVIDER_WIM = const_cpu_to_le32(1), 81 | 82 | /* System compressed file provider */ 83 | WOF_PROVIDER_FILE = const_cpu_to_le32(2), 84 | } WOF_PROVIDER; 85 | 86 | /* Known versions of the compressed file provider */ 87 | typedef enum { 88 | WOF_FILE_PROVIDER_CURRENT_VERSION = const_cpu_to_le32(1), 89 | } WOF_FILE_PROVIDER_VERSION; 90 | 91 | /* Information needed to specify a WOF provider */ 92 | typedef struct { 93 | le32 version; 94 | le32 provider; 95 | } __attribute__((packed)) WOF_EXTERNAL_INFO; 96 | 97 | /* Metadata for the compressed file provider --- indicates how the file 98 | * is compressed */ 99 | typedef struct { 100 | le32 version; 101 | le32 compression_format; 102 | } __attribute__((packed)) WOF_FILE_PROVIDER_EXTERNAL_INFO_V1; 103 | 104 | /* Format of the reparse point attribute of system compressed files */ 105 | typedef struct { 106 | /* The reparse point header. This indicates that the reparse point is 107 | * supposed to be interpreted by the WOF filter driver. */ 108 | REPARSE_POINT reparse; 109 | 110 | /* The WOF provider specification. This indicates the "provider" that 111 | * the WOF filter driver is supposed to hand control to. */ 112 | WOF_EXTERNAL_INFO wof; 113 | 114 | /* The metadata specific to the compressed file "provider" */ 115 | WOF_FILE_PROVIDER_EXTERNAL_INFO_V1 file; 116 | 117 | } __attribute__((packed)) WOF_FILE_PROVIDER_REPARSE_POINT_V1; 118 | 119 | /* The available compression formats for system compressed files */ 120 | typedef enum { 121 | FORMAT_XPRESS4K = const_cpu_to_le32(0), 122 | FORMAT_LZX = const_cpu_to_le32(1), 123 | FORMAT_XPRESS8K = const_cpu_to_le32(2), 124 | FORMAT_XPRESS16K = const_cpu_to_le32(3), 125 | } WOF_FILE_PROVIDER_COMPRESSION_FORMAT; 126 | 127 | /* "WofCompressedData": the name of the named data stream which contains the 128 | * compressed data of a system compressed file */ 129 | static ntfschar compressed_stream_name[] = { 130 | const_cpu_to_le16('W'), const_cpu_to_le16('o'), 131 | const_cpu_to_le16('f'), const_cpu_to_le16('C'), 132 | const_cpu_to_le16('o'), const_cpu_to_le16('m'), 133 | const_cpu_to_le16('p'), const_cpu_to_le16('r'), 134 | const_cpu_to_le16('e'), const_cpu_to_le16('s'), 135 | const_cpu_to_le16('s'), const_cpu_to_le16('e'), 136 | const_cpu_to_le16('d'), const_cpu_to_le16('D'), 137 | const_cpu_to_le16('a'), const_cpu_to_le16('t'), 138 | const_cpu_to_le16('a'), 139 | }; 140 | 141 | /******************************************************************************/ 142 | 143 | /* The maximum number of chunk offsets that may be cached at any one time. This 144 | * is purely an implementation detail, and this number can be changed. The 145 | * minimum possible value is 2, and the maximum possible value is UINT32_MAX 146 | * divided by the maximum chunk size. */ 147 | #define NUM_CHUNK_OFFSETS 128 148 | 149 | /* A special marker value not used by any chunk index */ 150 | #define INVALID_CHUNK_INDEX UINT64_MAX 151 | 152 | /* A decompression context for a system compressed file */ 153 | struct ntfs_system_decompression_ctx { 154 | 155 | /* The compression format of the file */ 156 | WOF_FILE_PROVIDER_COMPRESSION_FORMAT format; 157 | 158 | /* The decompressor for the file */ 159 | void *decompressor; 160 | 161 | /* The uncompressed size of the file in bytes */ 162 | u64 uncompressed_size; 163 | 164 | /* The compressed size of the file in bytes */ 165 | u64 compressed_size; 166 | 167 | /* The number of chunks into which the file is divided */ 168 | u64 num_chunks; 169 | 170 | /* The base 2 logarithm of chunk_size */ 171 | u32 chunk_order; 172 | 173 | /* The uncompressed chunk size in bytes. All chunks have this 174 | * uncompressed size except possibly the last. */ 175 | u32 chunk_size; 176 | 177 | /* 178 | * The chunk offsets cache. If 'base_chunk_idx == INVALID_CHUNK_INDEX', 179 | * then the cache is empty. Otherwise, 'base_chunk_idx' is the 0-based 180 | * index of the chunk that has its offset cached in 'chunk_offsets[0]'. 181 | * The offsets of the subsequent chunks follow until either the array is 182 | * full or the offset of the file's last chunk has been cached. There 183 | * is an extra entry at end-of-file which contains the end-of-file 184 | * offset. All offsets are stored relative to 'base_chunk_offset'. 185 | */ 186 | u64 base_chunk_idx; 187 | u64 base_chunk_offset; 188 | u32 chunk_offsets[NUM_CHUNK_OFFSETS]; 189 | 190 | /* A temporary buffer used to hold the compressed chunk currently being 191 | * decompressed or the chunk offset data currently being parsed. */ 192 | void *temp_buffer; 193 | 194 | /* 195 | * A cache for the most recently decompressed chunk. 'cached_chunk' is 196 | * a buffer which, if 'cached_chunk_idx != INVALID_CHUNK_INDEX', 197 | * contains the uncompressed data of the chunk with index 198 | * 'cached_chunk_idx'. 199 | * 200 | * This cache is intended to prevent adjacent reads with lengths shorter 201 | * than the chunk size from causing redundant chunk decompressions. 202 | * It's not intended to be a general purpose data cache. 203 | */ 204 | void *cached_chunk; 205 | u64 cached_chunk_idx; 206 | }; 207 | 208 | static int allocate_decompressor(struct ntfs_system_decompression_ctx *ctx) 209 | { 210 | if (ctx->format == FORMAT_LZX) 211 | ctx->decompressor = lzx_allocate_decompressor(32768); 212 | else 213 | ctx->decompressor = xpress_allocate_decompressor(); 214 | if (!ctx->decompressor) 215 | return -1; 216 | return 0; 217 | } 218 | 219 | static void free_decompressor(struct ntfs_system_decompression_ctx *ctx) 220 | { 221 | if (ctx->format == FORMAT_LZX) 222 | lzx_free_decompressor(ctx->decompressor); 223 | else 224 | xpress_free_decompressor(ctx->decompressor); 225 | } 226 | 227 | static int decompress(struct ntfs_system_decompression_ctx *ctx, 228 | const void *compressed_data, size_t compressed_size, 229 | void *uncompressed_data, size_t uncompressed_size) 230 | { 231 | if (ctx->format == FORMAT_LZX) 232 | return lzx_decompress(ctx->decompressor, 233 | compressed_data, compressed_size, 234 | uncompressed_data, uncompressed_size); 235 | else 236 | return xpress_decompress(ctx->decompressor, 237 | compressed_data, compressed_size, 238 | uncompressed_data, uncompressed_size); 239 | } 240 | 241 | static int get_compression_format(ntfs_inode *ni, const REPARSE_POINT *reparse, 242 | WOF_FILE_PROVIDER_COMPRESSION_FORMAT *format_ret) 243 | { 244 | WOF_FILE_PROVIDER_REPARSE_POINT_V1 *rp; 245 | s64 rpbuflen; 246 | int ret; 247 | 248 | if (!ni) { 249 | errno = EINVAL; 250 | return -1; 251 | } 252 | 253 | /* Is this a reparse point file? */ 254 | if (!(ni->flags & FILE_ATTR_REPARSE_POINT)) { 255 | errno = EOPNOTSUPP; 256 | return -1; 257 | } 258 | 259 | /* Read the reparse point if not done already. */ 260 | if (reparse) { 261 | rp = (WOF_FILE_PROVIDER_REPARSE_POINT_V1 *)reparse; 262 | rpbuflen = sizeof(REPARSE_POINT) + 263 | le16_to_cpu(reparse->reparse_data_length); 264 | } else { 265 | rp = ntfs_attr_readall(ni, AT_REPARSE_POINT, AT_UNNAMED, 0, 266 | &rpbuflen); 267 | if (!rp) 268 | return -1; 269 | } 270 | 271 | /* Does the reparse point indicate a system compressed file? */ 272 | if (rpbuflen >= (s64)sizeof(WOF_FILE_PROVIDER_REPARSE_POINT_V1) && 273 | rp->reparse.reparse_tag == IO_REPARSE_TAG_WOF && 274 | rp->wof.version == WOF_CURRENT_VERSION && 275 | rp->wof.provider == WOF_PROVIDER_FILE && 276 | rp->file.version == WOF_FILE_PROVIDER_CURRENT_VERSION && 277 | (rp->file.compression_format == FORMAT_XPRESS4K || 278 | rp->file.compression_format == FORMAT_XPRESS8K || 279 | rp->file.compression_format == FORMAT_XPRESS16K || 280 | rp->file.compression_format == FORMAT_LZX)) 281 | { 282 | /* Yes, it's a system compressed file. Save the compression 283 | * format identifier. */ 284 | *format_ret = rp->file.compression_format; 285 | ret = 0; 286 | } else { 287 | /* No, it's not a system compressed file. */ 288 | errno = EOPNOTSUPP; 289 | ret = -1; 290 | } 291 | 292 | if ((const REPARSE_POINT *)rp != reparse) 293 | free(rp); 294 | return ret; 295 | } 296 | 297 | static u32 get_chunk_order(WOF_FILE_PROVIDER_COMPRESSION_FORMAT format) 298 | { 299 | switch (format) { 300 | case FORMAT_XPRESS4K: 301 | return 12; 302 | case FORMAT_XPRESS8K: 303 | return 13; 304 | case FORMAT_XPRESS16K: 305 | return 14; 306 | case FORMAT_LZX: 307 | return 15; 308 | } 309 | /* Not reached */ 310 | return 0; 311 | } 312 | 313 | /* 314 | * Get the compressed size of a system compressed file. This is the size of its 315 | * WofCompressedData stream. 316 | */ 317 | static s64 get_compressed_size(ntfs_inode *ni) 318 | { 319 | ntfs_attr_search_ctx *actx; 320 | s64 ret; 321 | 322 | actx = ntfs_attr_get_search_ctx(ni, NULL); 323 | if (!actx) 324 | return -1; 325 | 326 | ret = ntfs_attr_lookup(AT_DATA, compressed_stream_name, 327 | sizeof(compressed_stream_name) / 328 | sizeof(compressed_stream_name[0]), 329 | CASE_SENSITIVE, 0, NULL, 0, actx); 330 | if (!ret) 331 | ret = ntfs_get_attribute_value_length(actx->attr); 332 | 333 | ntfs_attr_put_search_ctx(actx); 334 | return ret; 335 | } 336 | 337 | /* 338 | * ntfs_get_system_compressed_file_size - Return the compressed size of a system 339 | * compressed file 340 | * 341 | * @ni: The NTFS inode for the file 342 | * @reparse: (Optional) the contents of the file's reparse point attribute 343 | * 344 | * On success, return the compressed size in bytes. On failure, return -1 and 345 | * set errno. If the file is not a system compressed file, return -1 and set 346 | * errno to EOPNOTSUPP. 347 | */ 348 | s64 ntfs_get_system_compressed_file_size(ntfs_inode *ni, 349 | const REPARSE_POINT *reparse) 350 | { 351 | WOF_FILE_PROVIDER_COMPRESSION_FORMAT format; 352 | 353 | /* Verify this is a system compressed file. */ 354 | if (get_compression_format(ni, reparse, &format)) 355 | return -1; 356 | 357 | return get_compressed_size(ni); 358 | } 359 | 360 | /* 361 | * ntfs_open_system_decompression_ctx - Prepare to read a system-compressed file 362 | * 363 | * @ni: The NTFS inode for the file 364 | * @reparse: (Optional) the contents of the file's reparse point attribute 365 | * 366 | * On success, return a pointer to the decompression context. On failure, 367 | * return NULL and set errno. If the file is not a system-compressed file, 368 | * return NULL and set errno to EOPNOTSUPP. 369 | */ 370 | struct ntfs_system_decompression_ctx * 371 | ntfs_open_system_decompression_ctx(ntfs_inode *ni, const REPARSE_POINT *reparse) 372 | { 373 | WOF_FILE_PROVIDER_COMPRESSION_FORMAT format; 374 | struct ntfs_system_decompression_ctx *ctx; 375 | s64 csize; 376 | 377 | /* Get the compression format. This also validates that the file really 378 | * is a system-compressed file. */ 379 | if (get_compression_format(ni, reparse, &format)) 380 | goto err; 381 | 382 | /* Allocate the decompression context. */ 383 | ctx = ntfs_malloc(sizeof(struct ntfs_system_decompression_ctx)); 384 | if (!ctx) 385 | goto err; 386 | 387 | /* Allocate the decompressor. */ 388 | ctx->format = format; 389 | if (allocate_decompressor(ctx)) 390 | goto err_free_ctx; 391 | 392 | /* Determine the compressed size of the file. */ 393 | csize = get_compressed_size(ni); 394 | if (csize < 0) 395 | goto err_free_decompressor; 396 | ctx->compressed_size = csize; 397 | 398 | /* The uncompressed size of a system-compressed file is the size of its 399 | * unnamed data stream, which should be sparse so that it consumes no 400 | * disk space (though we don't rely on it being sparse). */ 401 | ctx->uncompressed_size = ni->data_size; 402 | 403 | /* Get the chunk size, which depends on the compression format. */ 404 | ctx->chunk_order = get_chunk_order(ctx->format); 405 | ctx->chunk_size = (u32)1 << ctx->chunk_order; 406 | 407 | /* Compute the number of chunks into which the file is divided. */ 408 | ctx->num_chunks = (ctx->uncompressed_size + 409 | ctx->chunk_size - 1) >> ctx->chunk_order; 410 | 411 | /* Initially, no chunk offsets are cached. */ 412 | ctx->base_chunk_idx = INVALID_CHUNK_INDEX; 413 | 414 | /* Allocate buffers for chunk data. */ 415 | ctx->temp_buffer = ntfs_malloc(max(ctx->chunk_size, 416 | NUM_CHUNK_OFFSETS * sizeof(u64))); 417 | ctx->cached_chunk = ntfs_malloc(ctx->chunk_size); 418 | ctx->cached_chunk_idx = INVALID_CHUNK_INDEX; 419 | if (!ctx->temp_buffer || !ctx->cached_chunk) 420 | goto err_free_buffers; 421 | 422 | return ctx; 423 | 424 | err_free_buffers: 425 | free(ctx->cached_chunk); 426 | free(ctx->temp_buffer); 427 | err_free_decompressor: 428 | free_decompressor(ctx); 429 | err_free_ctx: 430 | free(ctx); 431 | err: 432 | return NULL; 433 | } 434 | 435 | /* Retrieve the stored offset and size of a chunk stored in the compressed file 436 | * stream. */ 437 | static int get_chunk_location(struct ntfs_system_decompression_ctx *ctx, 438 | ntfs_attr *na, u64 chunk_idx, 439 | u64 *offset_ret, u32 *stored_size_ret) 440 | { 441 | size_t cache_idx; 442 | 443 | /* To get the stored size of the chunk, we need its offset and the next 444 | * chunk's offset. Use the cached values if possible; otherwise load 445 | * the needed offsets into the cache. To reduce the number of chunk 446 | * table reads that may be required later, also load some extra. */ 447 | if (chunk_idx < ctx->base_chunk_idx || 448 | chunk_idx + 1 >= ctx->base_chunk_idx + NUM_CHUNK_OFFSETS) 449 | { 450 | const u64 start_chunk = chunk_idx; 451 | const u64 end_chunk = 452 | chunk_idx + min(NUM_CHUNK_OFFSETS - 1, 453 | ctx->num_chunks - chunk_idx); 454 | const int entry_shift = 455 | (ctx->uncompressed_size <= UINT32_MAX) ? 2 : 3; 456 | le32 * const offsets32 = ctx->temp_buffer; 457 | le64 * const offsets64 = ctx->temp_buffer; 458 | u64 first_entry_to_read; 459 | size_t num_entries_to_read; 460 | size_t i, j; 461 | s64 res; 462 | 463 | num_entries_to_read = end_chunk - start_chunk; 464 | 465 | /* The first chunk has no explicit chunk table entry. */ 466 | if (start_chunk == 0) { 467 | num_entries_to_read--; 468 | first_entry_to_read = 0; 469 | } else { 470 | first_entry_to_read = start_chunk - 1; 471 | } 472 | 473 | if (end_chunk != ctx->num_chunks) 474 | num_entries_to_read++; 475 | 476 | /* Read the chunk table entries into a temporary buffer. */ 477 | res = ntfs_attr_pread(na, first_entry_to_read << entry_shift, 478 | num_entries_to_read << entry_shift, 479 | ctx->temp_buffer); 480 | 481 | if ((u64)res != num_entries_to_read << entry_shift) { 482 | if (res >= 0) 483 | errno = EINVAL; 484 | ctx->base_chunk_idx = INVALID_CHUNK_INDEX; 485 | return -1; 486 | } 487 | 488 | /* Prepare the cached chunk offsets. */ 489 | 490 | i = 0; 491 | if (start_chunk == 0) { 492 | /* Implicit first entry */ 493 | ctx->chunk_offsets[i++] = 0; 494 | ctx->base_chunk_offset = 0; 495 | } else { 496 | if (entry_shift == 3) { 497 | ctx->base_chunk_offset = 498 | le64_to_cpu(offsets64[0]); 499 | } else { 500 | ctx->base_chunk_offset = 501 | le32_to_cpu(offsets32[0]); 502 | } 503 | } 504 | 505 | if (entry_shift == 3) { 506 | /* 64-bit entries (huge file) */ 507 | for (j = 0; j < num_entries_to_read; j++) { 508 | ctx->chunk_offsets[i++] = 509 | le64_to_cpu(offsets64[j]) - 510 | ctx->base_chunk_offset; 511 | } 512 | } else { 513 | /* 32-bit entries */ 514 | for (j = 0; j < num_entries_to_read; j++) { 515 | ctx->chunk_offsets[i++] = 516 | le32_to_cpu(offsets32[j]) - 517 | ctx->base_chunk_offset; 518 | } 519 | } 520 | 521 | /* Account for the chunk table itself. */ 522 | ctx->base_chunk_offset += (ctx->num_chunks - 1) << entry_shift; 523 | 524 | if (end_chunk == ctx->num_chunks) { 525 | /* Implicit last entry */ 526 | ctx->chunk_offsets[i] = ctx->compressed_size - 527 | ctx->base_chunk_offset; 528 | } 529 | 530 | ctx->base_chunk_idx = start_chunk; 531 | } 532 | 533 | cache_idx = chunk_idx - ctx->base_chunk_idx; 534 | *offset_ret = ctx->base_chunk_offset + ctx->chunk_offsets[cache_idx]; 535 | *stored_size_ret = ctx->chunk_offsets[cache_idx + 1] - 536 | ctx->chunk_offsets[cache_idx]; 537 | return 0; 538 | } 539 | 540 | /* Retrieve into @buffer the uncompressed data of chunk @chunk_idx. */ 541 | static int read_and_decompress_chunk(struct ntfs_system_decompression_ctx *ctx, 542 | ntfs_attr *na, u64 chunk_idx, void *buffer) 543 | { 544 | u64 offset; 545 | u32 stored_size; 546 | u32 uncompressed_size; 547 | void *read_buffer; 548 | s64 res; 549 | 550 | /* Get the location of the chunk data as stored in the file. */ 551 | if (get_chunk_location(ctx, na, chunk_idx, &offset, &stored_size)) 552 | return -1; 553 | 554 | /* All chunks decompress to 'chunk_size' bytes except possibly the last, 555 | * which decompresses to whatever remains. */ 556 | if (chunk_idx == ctx->num_chunks - 1) 557 | uncompressed_size = ((ctx->uncompressed_size - 1) & 558 | (ctx->chunk_size - 1)) + 1; 559 | else 560 | uncompressed_size = ctx->chunk_size; 561 | 562 | /* Forbid strange compressed sizes. */ 563 | if (stored_size <= 0 || stored_size > uncompressed_size) { 564 | errno = EINVAL; 565 | return -1; 566 | } 567 | 568 | /* Chunks that didn't compress to less than their original size are 569 | * stored uncompressed. */ 570 | if (stored_size == uncompressed_size) { 571 | /* Chunk is stored uncompressed */ 572 | read_buffer = buffer; 573 | } else { 574 | /* Chunk is stored compressed */ 575 | read_buffer = ctx->temp_buffer; 576 | } 577 | 578 | /* Read the stored chunk data. */ 579 | res = ntfs_attr_pread(na, offset, stored_size, read_buffer); 580 | if (res != stored_size) { 581 | if (res >= 0) 582 | errno = EINVAL; 583 | return -1; 584 | } 585 | 586 | /* If the chunk was stored uncompressed, then we're done. */ 587 | if (read_buffer == buffer) 588 | return 0; 589 | 590 | /* The chunk was stored compressed. Decompress its data. */ 591 | if (decompress(ctx, read_buffer, stored_size, 592 | buffer, uncompressed_size)) { 593 | errno = EINVAL; 594 | return -1; 595 | } 596 | 597 | return 0; 598 | } 599 | 600 | /* Retrieve a pointer to the uncompressed data of the specified chunk. On 601 | * failure, return NULL and set errno. */ 602 | static const void *get_chunk_data(struct ntfs_system_decompression_ctx *ctx, 603 | ntfs_attr *na, u64 chunk_idx) 604 | { 605 | if (chunk_idx != ctx->cached_chunk_idx) { 606 | ctx->cached_chunk_idx = INVALID_CHUNK_INDEX; 607 | if (read_and_decompress_chunk(ctx, na, chunk_idx, 608 | ctx->cached_chunk)) 609 | return NULL; 610 | ctx->cached_chunk_idx = chunk_idx; 611 | } 612 | return ctx->cached_chunk; 613 | } 614 | 615 | /* 616 | * ntfs_read_system_compressed_data - Read data from a system-compressed file 617 | * 618 | * @ctx: The decompression context for the file 619 | * @ni: The NTFS inode for the file 620 | * @pos: The byte offset into the uncompressed data to read from 621 | * @count: The number of bytes of uncompressed data to read 622 | * @buf: The buffer into which to read the data 623 | * 624 | * On full or partial success, return the number of bytes read (0 indicates 625 | * end-of-file). On complete failure, return -1 and set errno. 626 | */ 627 | ssize_t ntfs_read_system_compressed_data(struct ntfs_system_decompression_ctx *ctx, 628 | ntfs_inode *ni, s64 pos, size_t count, 629 | void *buf) 630 | { 631 | u64 offset; 632 | ntfs_attr *na; 633 | u8 *p; 634 | u8 *end_p; 635 | u64 chunk_idx; 636 | u32 offset_in_chunk; 637 | u32 chunk_size; 638 | 639 | if (!ctx || !ni || pos < 0) { 640 | errno = EINVAL; 641 | return -1; 642 | } 643 | 644 | offset = (u64)pos; 645 | if (offset >= ctx->uncompressed_size) 646 | return 0; 647 | 648 | count = min(count, ctx->uncompressed_size - offset); 649 | if (!count) 650 | return 0; 651 | 652 | na = ntfs_attr_open(ni, AT_DATA, compressed_stream_name, 653 | sizeof(compressed_stream_name) / 654 | sizeof(compressed_stream_name[0])); 655 | if (!na) 656 | return -1; 657 | 658 | p = buf; 659 | end_p = p + count; 660 | chunk_idx = offset >> ctx->chunk_order; 661 | offset_in_chunk = offset & (ctx->chunk_size - 1); 662 | chunk_size = ctx->chunk_size; 663 | do { 664 | u32 len_to_copy; 665 | const u8 *chunk; 666 | 667 | if (chunk_idx == ctx->num_chunks - 1) 668 | chunk_size = ((ctx->uncompressed_size - 1) & 669 | (ctx->chunk_size - 1)) + 1; 670 | 671 | len_to_copy = min((size_t)(end_p - p), 672 | chunk_size - offset_in_chunk); 673 | 674 | chunk = get_chunk_data(ctx, na, chunk_idx); 675 | if (!chunk) 676 | break; 677 | 678 | memcpy(p, &chunk[offset_in_chunk], len_to_copy); 679 | 680 | p += len_to_copy; 681 | chunk_idx++; 682 | offset_in_chunk = 0; 683 | } while (p != end_p); 684 | 685 | ntfs_attr_close(na); 686 | 687 | return (p == buf) ? -1 : p - (u8 *)buf; 688 | } 689 | 690 | /* 691 | * ntfs_close_system_decompression_ctx - Close a system-compressed file 692 | */ 693 | void ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx) 694 | { 695 | if (ctx) { 696 | free(ctx->cached_chunk); 697 | free(ctx->temp_buffer); 698 | free_decompressor(ctx); 699 | free(ctx); 700 | } 701 | } 702 | -------------------------------------------------------------------------------- /src/system_compression.h: -------------------------------------------------------------------------------- 1 | /* 2 | * system_compression.h - declarations for accessing System Compressed files 3 | * 4 | * Copyright (C) 2015 Eric Biggers 5 | * 6 | * This program is free software: you can redistribute it and/or modify it under 7 | * the terms of the GNU General Public License as published by the Free Software 8 | * Foundation, either version 2 of the License, or (at your option) any later 9 | * version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 14 | * details. 15 | * 16 | * You should have received a copy of the GNU General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #ifndef _NTFS_SYSTEM_COMPRESSION_H 21 | #define _NTFS_SYSTEM_COMPRESSION_H 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | 29 | /* System compressed file access */ 30 | 31 | struct ntfs_system_decompression_ctx; 32 | 33 | extern s64 ntfs_get_system_compressed_file_size(ntfs_inode *ni, 34 | const REPARSE_POINT *reparse); 35 | 36 | extern struct ntfs_system_decompression_ctx * 37 | ntfs_open_system_decompression_ctx(ntfs_inode *ni, 38 | const REPARSE_POINT *reparse); 39 | 40 | extern ssize_t 41 | ntfs_read_system_compressed_data(struct ntfs_system_decompression_ctx *ctx, 42 | ntfs_inode *ni, s64 pos, size_t count, 43 | void *buf); 44 | 45 | extern void 46 | ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx); 47 | 48 | /* XPRESS decompression */ 49 | 50 | struct xpress_decompressor; 51 | 52 | extern struct xpress_decompressor *xpress_allocate_decompressor(void); 53 | 54 | extern int xpress_decompress(struct xpress_decompressor *decompressor, 55 | const void *compressed_data, size_t compressed_size, 56 | void *uncompressed_data, size_t uncompressed_size); 57 | 58 | extern void xpress_free_decompressor(struct xpress_decompressor *decompressor); 59 | 60 | /* LZX decompression */ 61 | 62 | struct lzx_decompressor; 63 | 64 | extern struct lzx_decompressor * 65 | lzx_allocate_decompressor(size_t max_block_size); 66 | 67 | extern int lzx_decompress(struct lzx_decompressor *decompressor, 68 | const void *compressed_data, size_t compressed_size, 69 | void *uncompressed_data, size_t uncompressed_size); 70 | 71 | extern void lzx_free_decompressor(struct lzx_decompressor *decompressor); 72 | 73 | #endif /* _NTFS_SYSTEM_COMPRESSION_H */ 74 | -------------------------------------------------------------------------------- /src/xpress_constants.h: -------------------------------------------------------------------------------- 1 | /* 2 | * xpress_constants.h 3 | * 4 | * Constants for the XPRESS compression format. 5 | */ 6 | 7 | #ifndef _XPRESS_CONSTANTS_H 8 | #define _XPRESS_CONSTANTS_H 9 | 10 | #define XPRESS_NUM_CHARS 256 11 | #define XPRESS_NUM_SYMBOLS 512 12 | #define XPRESS_MAX_CODEWORD_LEN 15 13 | 14 | #define XPRESS_END_OF_DATA 256 15 | 16 | #define XPRESS_MIN_OFFSET 1 17 | #define XPRESS_MAX_OFFSET 65535 18 | 19 | #define XPRESS_MIN_MATCH_LEN 3 20 | #define XPRESS_MAX_MATCH_LEN 65538 21 | 22 | #endif /* _XPRESS_CONSTANTS_H */ 23 | -------------------------------------------------------------------------------- /src/xpress_decompress.c: -------------------------------------------------------------------------------- 1 | /* 2 | * xpress_decompress.c 3 | * 4 | * A decompressor for the XPRESS compression format (Huffman variant). 5 | */ 6 | 7 | /* 8 | * 9 | * Copyright (C) 2012-2016 Eric Biggers 10 | * 11 | * This program is free software: you can redistribute it and/or modify it under 12 | * the terms of the GNU General Public License as published by the Free Software 13 | * Foundation, either version 2 of the License, or (at your option) any later 14 | * version. 15 | * 16 | * This program is distributed in the hope that it will be useful, but WITHOUT 17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 19 | * details. 20 | * 21 | * You should have received a copy of the GNU General Public License along with 22 | * this program. If not, see . 23 | */ 24 | 25 | 26 | /* 27 | * The XPRESS compression format is an LZ77 and Huffman-code based algorithm. 28 | * That means it is fairly similar to LZX compression, but XPRESS is simpler, so 29 | * it is a little faster to compress and decompress. 30 | * 31 | * The XPRESS compression format is mostly documented in a file called "[MS-XCA] 32 | * Xpress Compression Algorithm". In the MSDN library, it can currently be 33 | * found under Open Specifications => Protocols => Windows Protocols => Windows 34 | * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in 35 | * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm" 36 | * (there apparently are some other versions of XPRESS as well). 37 | * 38 | * If you are already familiar with the LZ77 algorithm and Huffman coding, the 39 | * XPRESS format is fairly simple. The compressed data begins with 256 bytes 40 | * that contain 512 4-bit integers that are the lengths of the symbols in the 41 | * Huffman code used for match/literal headers. In contrast with more 42 | * complicated formats such as DEFLATE and LZX, this is the only Huffman code 43 | * that is used for the entirety of the XPRESS compressed data, and the codeword 44 | * lengths are not encoded with a pretree. 45 | * 46 | * The rest of the compressed data is Huffman-encoded symbols. Values 0 through 47 | * 255 represent the corresponding literal bytes. Values 256 through 511 48 | * represent matches and may require extra bits or bytes to be read to get the 49 | * match offset and match length. 50 | * 51 | * The trickiest part is probably the way in which literal bytes for match 52 | * lengths are interleaved in the bitstream. 53 | * 54 | * Also, a caveat--- according to Microsoft's documentation for XPRESS, 55 | * 56 | * "Some implementation of the decompression algorithm expect an extra 57 | * symbol to mark the end of the data. Specifically, some implementations 58 | * fail during decompression if the Huffman symbol 256 is not found after 59 | * the actual data." 60 | * 61 | * This is the case with Microsoft's implementation in WIMGAPI, for example. So 62 | * although our implementation doesn't currently check for this extra symbol, 63 | * compressors would be wise to add it. 64 | */ 65 | 66 | #ifdef HAVE_CONFIG_H 67 | # include "config.h" 68 | #endif 69 | 70 | #include "decompress_common.h" 71 | #include "system_compression.h" 72 | #include "xpress_constants.h" 73 | 74 | /* This value is chosen for fast decompression. */ 75 | #define XPRESS_TABLEBITS 11 76 | 77 | struct xpress_decompressor { 78 | union { 79 | DECODE_TABLE(decode_table, XPRESS_NUM_SYMBOLS, 80 | XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); 81 | u8 lens[XPRESS_NUM_SYMBOLS]; 82 | }; 83 | DECODE_TABLE_WORKING_SPACE(working_space, XPRESS_NUM_SYMBOLS, 84 | XPRESS_MAX_CODEWORD_LEN); 85 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT); 86 | 87 | int 88 | xpress_decompress(struct xpress_decompressor *restrict d, 89 | const void *restrict compressed_data, size_t compressed_size, 90 | void *restrict uncompressed_data, size_t uncompressed_size) 91 | { 92 | const u8 * const in_begin = compressed_data; 93 | u8 * const out_begin = uncompressed_data; 94 | u8 *out_next = out_begin; 95 | u8 * const out_end = out_begin + uncompressed_size; 96 | struct input_bitstream is; 97 | 98 | /* Read the Huffman codeword lengths. */ 99 | if (compressed_size < XPRESS_NUM_SYMBOLS / 2) 100 | return -1; 101 | for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { 102 | d->lens[2 * i + 0] = in_begin[i] & 0xf; 103 | d->lens[2 * i + 1] = in_begin[i] >> 4; 104 | } 105 | 106 | /* Build a decoding table for the Huffman code. */ 107 | if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, 108 | XPRESS_TABLEBITS, d->lens, 109 | XPRESS_MAX_CODEWORD_LEN, 110 | d->working_space)) 111 | return -1; 112 | 113 | /* Decode the matches and literals. */ 114 | 115 | init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, 116 | compressed_size - XPRESS_NUM_SYMBOLS / 2); 117 | 118 | while (out_next != out_end) { 119 | unsigned sym; 120 | unsigned log2_offset; 121 | u32 length; 122 | u32 offset; 123 | 124 | sym = read_huffsym(&is, d->decode_table, 125 | XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); 126 | if (sym < XPRESS_NUM_CHARS) { 127 | /* Literal */ 128 | *out_next++ = sym; 129 | } else { 130 | /* Match */ 131 | length = sym & 0xf; 132 | log2_offset = (sym >> 4) & 0xf; 133 | 134 | bitstream_ensure_bits(&is, 16); 135 | 136 | offset = ((u32)1 << log2_offset) | 137 | bitstream_pop_bits(&is, log2_offset); 138 | 139 | if (length == 0xf) { 140 | length += bitstream_read_byte(&is); 141 | if (length == 0xf + 0xff) 142 | length = bitstream_read_u16(&is); 143 | } 144 | length += XPRESS_MIN_MATCH_LEN; 145 | 146 | if (unlikely(lz_copy(length, offset, 147 | out_begin, out_next, out_end, 148 | XPRESS_MIN_MATCH_LEN))) 149 | return -1; 150 | 151 | out_next += length; 152 | } 153 | } 154 | return 0; 155 | } 156 | 157 | struct xpress_decompressor * 158 | xpress_allocate_decompressor(void) 159 | { 160 | return aligned_malloc(sizeof(struct xpress_decompressor), 161 | DECODE_TABLE_ALIGNMENT); 162 | } 163 | 164 | void 165 | xpress_free_decompressor(struct xpress_decompressor *d) 166 | { 167 | aligned_free(d); 168 | } 169 | --------------------------------------------------------------------------------