├── mingw-x86.cmake
├── mingw-amd64.cmake
├── src
├── ebiggers
│ ├── lzx_common.h
│ ├── system_compression.h
│ ├── lzx_constants.h
│ ├── xpress_decompress.c
│ ├── common_defs.h
│ ├── lzx_common.c
│ ├── decompress_common.c
│ ├── lzx_decompress.c
│ └── decompress_common.h
├── misc.h
├── quibbleproto.h
├── misc.cpp
├── ntfs.h
└── ntfs.cpp
├── README.md
├── .github
└── workflows
│ ├── build-x86.yml
│ └── build-amd64.yml
├── CMakeLists.txt
└── LICENCE
/mingw-x86.cmake:
--------------------------------------------------------------------------------
1 | SET(CMAKE_SYSTEM_NAME Windows)
2 |
3 | SET(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
4 | SET(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
5 | SET(CMAKE_SYSTEM_PROCESSOR X86)
6 |
7 | set(CMAKE_EXE_LINKER_FLAGS "-static")
8 |
9 | SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
10 |
11 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
12 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
13 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
14 |
15 |
--------------------------------------------------------------------------------
/mingw-amd64.cmake:
--------------------------------------------------------------------------------
1 | SET(CMAKE_SYSTEM_NAME Windows)
2 |
3 | SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
4 | SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
5 | SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
6 | SET(CMAKE_SYSTEM_PROCESSOR "AMD64")
7 |
8 | set(CMAKE_EXE_LINKER_FLAGS "-static")
9 |
10 | SET(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
11 |
12 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
13 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
14 | set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
15 |
16 |
--------------------------------------------------------------------------------
/src/ebiggers/lzx_common.h:
--------------------------------------------------------------------------------
1 | /*
2 | * lzx_common.h
3 | *
4 | * Declarations shared between LZX compression and decompression.
5 | */
6 |
7 | #ifndef _LZX_COMMON_H
8 | #define _LZX_COMMON_H
9 |
10 | #include "lzx_constants.h"
11 | #include "common_defs.h"
12 |
13 | extern const int32_t lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1];
14 |
15 | extern const uint8_t lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
16 |
17 | extern unsigned
18 | lzx_get_window_order(size_t max_bufsize);
19 |
20 | extern unsigned
21 | lzx_get_num_main_syms(unsigned window_order);
22 |
23 | extern void
24 | lzx_preprocess(uint8_t *data, uint32_t size);
25 |
26 | extern void
27 | lzx_postprocess(uint8_t *data, uint32_t size);
28 |
29 | #endif /* _LZX_COMMON_H */
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ntfs-efi
2 | ---------
3 |
4 | ntfs-efi is an NTFS filesystem driver for EFI. It is intended for use with the free
5 | Windows bootloader [Quibble](https://github.com/maharmstone/quibble), but you
6 | should be able to use it for anything EFI-related.
7 |
8 | Thanks to [Eric Biggers](https://github.com/ebiggers), who [successfully reverse-engineered](https://github.com/ebiggers/ntfs-3g-system-compression/) Windows 10's
9 | "WOF compressed data", and whose code I've used here.
10 |
11 | Changelog
12 | ---------
13 |
14 | * 20231107
15 | * Fixed memcpy miscompilation bug
16 |
17 | * 20230328
18 | * Initial release
19 |
20 | To do
21 | -----
22 |
23 | * LZX WOF compression
24 | * LZNT1 compression
25 | * Hide special files in root
26 | * Free space, volume label, etc.
27 | * Symlinks
28 | * Case-sensitive directories
29 |
--------------------------------------------------------------------------------
/src/misc.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) Mark Harmstone 2023
2 | *
3 | * This file is part of ntfs-efi.
4 | *
5 | * ntfs-efi is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public Licence as published by
7 | * the Free Software Foundation, either version 2 of the Licence, or
8 | * (at your option) any later version.
9 | *
10 | * ntfs-efi is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public Licence for more details.
14 | *
15 | * You should have received a copy of the GNU General Public Licence
16 | * along with ntfs-efi. If not, see . */
17 |
18 | #pragma once
19 |
20 | #include
21 |
22 | const char* error_string(EFI_STATUS Status);
23 | char* stpcpy(char* dest, const char* src);
24 |
--------------------------------------------------------------------------------
/.github/workflows/build-x86.yml:
--------------------------------------------------------------------------------
1 | name: build x86
2 | on: [push]
3 | jobs:
4 | x86:
5 | runs-on: ubuntu-rolling
6 | steps:
7 | - run: apt-get update
8 | - run: apt-get install -y g++ git cmake nodejs g++-mingw-w64-i686 gnu-efi
9 | - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV
10 | - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA}
11 | - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }}
12 | - run: mkdir -p install/debug
13 | - run: |
14 | cmake -DCMAKE_BUILD_TYPE=Debug \
15 | -DCMAKE_TOOLCHAIN_FILE=mingw-x86.cmake \
16 | -S ${SHORT_SHA} -B debug-work && \
17 | cmake --build debug-work --parallel `nproc` && \
18 | cp debug-work/ntfs.efi install/debug/
19 | - run: |
20 | cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
21 | -DCMAKE_TOOLCHAIN_FILE=mingw-x86.cmake \
22 | -S ${SHORT_SHA} -B release-work && \
23 | cmake --build release-work --parallel `nproc` && \
24 | cp release-work/ntfs.efi install/
25 | - uses: actions/upload-artifact@v3
26 | with:
27 | name: ${{ github.sha }}
28 | overwrite: true
29 | path: |
30 | install
31 |
--------------------------------------------------------------------------------
/.github/workflows/build-amd64.yml:
--------------------------------------------------------------------------------
1 | name: build amd64
2 | on: [push]
3 | jobs:
4 | amd64:
5 | runs-on: ubuntu-rolling
6 | steps:
7 | - run: apt-get update
8 | - run: apt-get install -y g++ git cmake nodejs g++-mingw-w64-x86-64 gnu-efi
9 | - run: echo "SHORT_SHA=`echo ${{ github.sha }} | cut -c1-8`" >> $GITHUB_ENV
10 | - run: git clone ${{ github.server_url }}/${{ github.repository }} ${SHORT_SHA}
11 | - run: cd ${SHORT_SHA} && git checkout ${{ github.sha }}
12 | - run: mkdir -p install/debug
13 | - run: |
14 | cmake -DCMAKE_BUILD_TYPE=Debug \
15 | -DCMAKE_TOOLCHAIN_FILE=mingw-amd64.cmake \
16 | -S ${SHORT_SHA} -B debug-work && \
17 | cmake --build debug-work --parallel `nproc` && \
18 | cp debug-work/ntfs.efi install/debug/
19 | - run: |
20 | cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \
21 | -DCMAKE_TOOLCHAIN_FILE=mingw-amd64.cmake \
22 | -S ${SHORT_SHA} -B release-work && \
23 | cmake --build release-work --parallel `nproc` && \
24 | cp release-work/ntfs.efi install/
25 | - uses: actions/upload-artifact@v3
26 | with:
27 | name: ${{ github.sha }}
28 | overwrite: true
29 | path: |
30 | install
31 |
--------------------------------------------------------------------------------
/src/quibbleproto.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) Mark Harmstone 2023
2 | *
3 | * This file is part of ntfs-efi.
4 | *
5 | * ntfs-efi is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public Licence as published by
7 | * the Free Software Foundation, either version 2 of the Licence, or
8 | * (at your option) any later version.
9 | *
10 | * ntfs-efi is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public Licence for more details.
14 | *
15 | * You should have received a copy of the GNU General Public Licence
16 | * along with ntfs-efi. If not, see . */
17 |
18 | #pragma once
19 |
20 | #define EFI_QUIBBLE_PROTOCOL_GUID { 0x98BCC8FF, 0xD212, 0x4B09, {0x84, 0x0C, 0x43, 0x19, 0xAD, 0x2E, 0xD3, 0x6A } }
21 |
22 | typedef struct _EFI_QUIBBLE_PROTOCOL EFI_QUIBBLE_PROTOCOL;
23 |
24 | typedef EFI_STATUS (EFIAPI* EFI_QUIBBLE_GET_ARC_NAME) (
25 | IN EFI_QUIBBLE_PROTOCOL* This,
26 | OUT char* ArcName,
27 | IN OUT UINTN* ArcNameLen
28 | );
29 |
30 | typedef EFI_STATUS (EFIAPI* EFI_QUIBBLE_GET_WINDOWS_DRIVER_NAME) (
31 | IN EFI_QUIBBLE_PROTOCOL* This,
32 | OUT CHAR16* DriverName,
33 | IN OUT UINTN* DriverNameLen
34 | );
35 |
36 | typedef struct _EFI_QUIBBLE_PROTOCOL {
37 | EFI_QUIBBLE_GET_ARC_NAME GetArcName;
38 | EFI_QUIBBLE_GET_WINDOWS_DRIVER_NAME GetWindowsDriverName;
39 | } EFI_QUIBBLE_PROTOCOL;
40 |
41 | #define EFI_OPEN_SUBVOL_GUID { 0x5861E4D5, 0xC7F1, 0x4932, {0xA0, 0x81, 0xF2, 0x2A, 0xAE, 0x8A, 0x82, 0x98 } }
42 |
43 | typedef struct _EFI_OPEN_SUBVOL_PROTOCOL EFI_OPEN_SUBVOL_PROTOCOL;
44 |
45 | typedef EFI_STATUS (EFIAPI* EFI_OPEN_SUBVOL_FUNC) (
46 | IN EFI_OPEN_SUBVOL_PROTOCOL* This,
47 | IN UINT64 Subvol,
48 | OUT EFI_FILE_HANDLE* File
49 | );
50 |
51 | typedef struct _EFI_OPEN_SUBVOL_PROTOCOL {
52 | EFI_OPEN_SUBVOL_FUNC OpenSubvol;
53 | } EFI_OPEN_SUBVOL_PROTOCOL;
54 |
55 | #define EFI_QUIBBLE_INFO_PROTOCOL_GUID { 0x89498E00, 0xAE8F, 0x4B23, {0x86, 0x11, 0x71, 0x2A, 0xE1, 0x2F, 0xC8, 0xD9 } }
56 |
57 | typedef void (EFIAPI* EFI_QUIBBLE_INFO_PRINT) (
58 | IN const char* s
59 | );
60 |
61 | typedef struct _EFI_QUIBBLE_INFO_PROTOCOL {
62 | EFI_QUIBBLE_INFO_PRINT Print;
63 | } EFI_QUIBBLE_INFO_PROTOCOL;
64 |
--------------------------------------------------------------------------------
/src/ebiggers/system_compression.h:
--------------------------------------------------------------------------------
1 | /*
2 | * system_compression.h - declarations for accessing System Compressed files
3 | *
4 | * Copyright (C) 2015 Eric Biggers
5 | *
6 | * This program is free software: you can redistribute it and/or modify it under
7 | * the terms of the GNU General Public License as published by the Free Software
8 | * Foundation, either version 2 of the License, or (at your option) any later
9 | * version.
10 | *
11 | * This program is distributed in the hope that it will be useful, but WITHOUT
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 | * details.
15 | *
16 | * You should have received a copy of the GNU General Public License along with
17 | * this program. If not, see .
18 | */
19 |
20 | #pragma once
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | /* System compressed file access */
31 |
32 | struct ntfs_system_decompression_ctx;
33 |
34 | extern void
35 | ntfs_close_system_decompression_ctx(struct ntfs_system_decompression_ctx *ctx);
36 |
37 | /* XPRESS decompression */
38 |
39 | #define XPRESS_NUM_CHARS 256
40 | #define XPRESS_NUM_SYMBOLS 512
41 | #define XPRESS_MAX_CODEWORD_LEN 15
42 |
43 | #define XPRESS_MIN_MATCH_LEN 3
44 |
45 | #define DECODE_TABLE_ALIGNMENT 16
46 |
47 | struct xpress_decompressor {
48 | union {
49 | uint16_t decode_table[2566] __attribute__((aligned(DECODE_TABLE_ALIGNMENT)));
50 | uint8_t lens[XPRESS_NUM_SYMBOLS];
51 | };
52 | uint16_t working_space[2 * (XPRESS_MAX_CODEWORD_LEN + 1) + XPRESS_NUM_SYMBOLS];
53 | } __attribute__((aligned(DECODE_TABLE_ALIGNMENT)));
54 |
55 | extern struct xpress_decompressor *xpress_allocate_decompressor(void);
56 |
57 | extern int xpress_decompress(struct xpress_decompressor *decompressor,
58 | const void *compressed_data, size_t compressed_size,
59 | void *uncompressed_data, size_t uncompressed_size);
60 |
61 | extern void xpress_free_decompressor(struct xpress_decompressor *decompressor);
62 |
63 | /* LZX decompression */
64 |
65 | struct lzx_decompressor;
66 |
67 | extern bool
68 | lzx_init_decompressor(size_t max_block_size, struct lzx_decompressor *d);
69 |
70 | extern int lzx_decompress(struct lzx_decompressor *decompressor,
71 | const void *compressed_data, size_t compressed_size,
72 | void *uncompressed_data, size_t uncompressed_size);
73 |
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.14)
2 |
3 | project(ntfs-uefi)
4 |
5 | set(CMAKE_CXX_STANDARD 20)
6 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
7 |
8 | if(MSVC)
9 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /ENTRY:efi_main")
10 | add_compile_options("/GS-")
11 | string(REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
12 | string(REGEX REPLACE "/RTC(su|[1su])" "" CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG}")
13 |
14 | # work around bug in Visual Studio
15 | if (${MSVC_CXX_ARCHITECTURE_ID} STREQUAL "X86")
16 | set(CMAKE_SYSTEM_PROCESSOR "X86")
17 | endif()
18 | else()
19 | add_compile_options(-fno-stack-check -fno-stack-protector -mno-stack-arg-probe)
20 | endif()
21 |
22 | include_directories(/usr/include/efi)
23 | if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
24 | include_directories(/usr/include/efi/x86_64)
25 | elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "X86")
26 | include_directories(/usr/include/efi/ia32)
27 | endif()
28 |
29 | set(SRC_FILES src/ntfs.cpp
30 | src/misc.cpp
31 | src/ebiggers/decompress_common.c
32 | src/ebiggers/lzx_common.c
33 | src/ebiggers/lzx_decompress.c
34 | src/ebiggers/xpress_decompress.c
35 | )
36 |
37 | add_executable(ntfs ${SRC_FILES})
38 |
39 | set_target_properties(ntfs PROPERTIES SUFFIX ".efi")
40 |
41 | if(${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
42 | target_compile_options(ntfs PRIVATE "-ffreestanding")
43 | target_compile_options(ntfs PRIVATE "-fno-stack-protector")
44 | target_compile_options(ntfs PRIVATE "-fno-stack-check")
45 | target_compile_options(ntfs PRIVATE "-mno-stack-arg-probe")
46 |
47 | target_link_options(ntfs PRIVATE "-nostartfiles")
48 | target_link_options(ntfs PRIVATE "-shared")
49 |
50 | if(${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
51 | target_link_options(ntfs PRIVATE "-Wl,--subsystem,efi_boot_service_driver")
52 | else()
53 | target_link_options(ntfs PRIVATE "-Wl,--subsystem,11")
54 | endif()
55 |
56 | if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "X86")
57 | target_link_options(ntfs PRIVATE "-e_efi_main")
58 | else()
59 | target_link_options(ntfs PRIVATE "-eefi_main")
60 | endif()
61 | elseif(MSVC)
62 | target_link_options(ntfs PRIVATE "/SUBSYSTEM:EFI_BOOT_SERVICE_DRIVER")
63 | target_compile_options(ntfs PRIVATE "/Oi-")
64 | endif()
65 |
66 | target_compile_options(ntfs PRIVATE
67 | $<$,$,$>:
68 | -Wall -Wextra -Wno-address-of-packed-member -Werror=pointer-arith -fno-exceptions>
69 | $<$:
70 | /W4 /Oi->)
71 |
--------------------------------------------------------------------------------
/src/ebiggers/lzx_constants.h:
--------------------------------------------------------------------------------
1 | /*
2 | * lzx_constants.h
3 | *
4 | * Constants for the LZX compression format.
5 | */
6 |
7 | #ifndef _LZX_CONSTANTS_H
8 | #define _LZX_CONSTANTS_H
9 |
10 | /* Number of literal byte values. */
11 | #define LZX_NUM_CHARS 256
12 |
13 | /* The smallest and largest allowed match lengths. */
14 | #define LZX_MIN_MATCH_LEN 2
15 | #define LZX_MAX_MATCH_LEN 257
16 |
17 | /* Number of distinct match lengths that can be represented. */
18 | #define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
19 |
20 | /* Number of match lengths for which no length symbol is required. */
21 | #define LZX_NUM_PRIMARY_LENS 7
22 | #define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
23 |
24 | /* Valid values of the 3-bit block type field. */
25 | #define LZX_BLOCKTYPE_VERBATIM 1
26 | #define LZX_BLOCKTYPE_ALIGNED 2
27 | #define LZX_BLOCKTYPE_UNCOMPRESSED 3
28 |
29 | /* 'LZX_MIN_WINDOW_SIZE' and 'LZX_MAX_WINDOW_SIZE' are the minimum and maximum
30 | * sizes of the sliding window. */
31 | #define LZX_MIN_WINDOW_ORDER 15
32 | #define LZX_MAX_WINDOW_ORDER 21
33 | #define LZX_MIN_WINDOW_SIZE (1UL << LZX_MIN_WINDOW_ORDER) /* 32768 */
34 | #define LZX_MAX_WINDOW_SIZE (1UL << LZX_MAX_WINDOW_ORDER) /* 2097152 */
35 |
36 | /* Maximum number of offset slots. (The actual number of offset slots depends
37 | * on the window size.) */
38 | #define LZX_MAX_OFFSET_SLOTS 50
39 |
40 | /* Maximum number of symbols in the main code. (The actual number of symbols in
41 | * the main code depends on the window size.) */
42 | #define LZX_MAINCODE_MAX_NUM_SYMBOLS \
43 | (LZX_NUM_CHARS + (LZX_MAX_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
44 |
45 | /* Number of symbols in the length code. */
46 | #define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
47 |
48 | /* Number of symbols in the pre-code. */
49 | #define LZX_PRECODE_NUM_SYMBOLS 20
50 |
51 | /* Number of bits in which each pre-code codeword length is represented. */
52 | #define LZX_PRECODE_ELEMENT_SIZE 4
53 |
54 | /* Number of low-order bits of each match offset that are entropy-encoded in
55 | * aligned offset blocks. */
56 | #define LZX_NUM_ALIGNED_OFFSET_BITS 3
57 |
58 | /* Number of symbols in the aligned offset code. */
59 | #define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
60 |
61 | /* Mask for the match offset bits that are entropy-encoded in aligned offset
62 | * blocks. */
63 | #define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
64 |
65 | /* Number of bits in which each aligned offset codeword length is represented. */
66 | #define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
67 |
68 | /* The first offset slot which requires an aligned offset symbol in aligned
69 | * offset blocks. */
70 | #define LZX_MIN_ALIGNED_OFFSET_SLOT 8
71 |
72 | /* The offset slot base for LZX_MIN_ALIGNED_OFFSET_SLOT. */
73 | #define LZX_MIN_ALIGNED_OFFSET 14
74 |
75 | /* The maximum number of extra offset bits in verbatim blocks. (One would need
76 | * to subtract LZX_NUM_ALIGNED_OFFSET_BITS to get the number of extra offset
77 | * bits in *aligned* blocks.) */
78 | #define LZX_MAX_NUM_EXTRA_BITS 17
79 |
80 | /* Maximum lengths (in bits) for length-limited Huffman code construction. */
81 | #define LZX_MAX_MAIN_CODEWORD_LEN 16
82 | #define LZX_MAX_LEN_CODEWORD_LEN 16
83 | #define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
84 | #define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
85 |
86 | /* For LZX-compressed blocks in WIM resources, this value is always used as the
87 | * filesize parameter for the call instruction (0xe8 byte) preprocessing, even
88 | * though the blocks themselves are not this size, and the size of the actual
89 | * file resource in the WIM file is very likely to be something entirely
90 | * different as well. */
91 | #define LZX_WIM_MAGIC_FILESIZE 12000000
92 |
93 | /* Assumed LZX block size when the encoded block size begins with a 0 bit.
94 | * This is probably WIM-specific. */
95 | #define LZX_DEFAULT_BLOCK_SIZE 32768
96 |
97 | /* Number of offsets in the recent (or "repeat") offsets queue. */
98 | #define LZX_NUM_RECENT_OFFSETS 3
99 |
100 | /* An offset of n bytes is actually encoded as (n + LZX_OFFSET_ADJUSTMENT). */
101 | #define LZX_OFFSET_ADJUSTMENT (LZX_NUM_RECENT_OFFSETS - 1)
102 |
103 | #endif /* _LZX_CONSTANTS_H */
104 |
--------------------------------------------------------------------------------
/src/ebiggers/xpress_decompress.c:
--------------------------------------------------------------------------------
1 | /*
2 | * xpress_decompress.c
3 | *
4 | * A decompressor for the XPRESS compression format (Huffman variant).
5 | */
6 |
7 | /*
8 | *
9 | * Copyright (C) 2012-2016 Eric Biggers
10 | *
11 | * This program is free software: you can redistribute it and/or modify it under
12 | * the terms of the GNU General Public License as published by the Free Software
13 | * Foundation, either version 2 of the License, or (at your option) any later
14 | * version.
15 | *
16 | * This program is distributed in the hope that it will be useful, but WITHOUT
17 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
19 | * details.
20 | *
21 | * You should have received a copy of the GNU General Public License along with
22 | * this program. If not, see .
23 | */
24 |
25 |
26 | /*
27 | * The XPRESS compression format is an LZ77 and Huffman-code based algorithm.
28 | * That means it is fairly similar to LZX compression, but XPRESS is simpler, so
29 | * it is a little faster to compress and decompress.
30 | *
31 | * The XPRESS compression format is mostly documented in a file called "[MS-XCA]
32 | * Xpress Compression Algorithm". In the MSDN library, it can currently be
33 | * found under Open Specifications => Protocols => Windows Protocols => Windows
34 | * Server Protocols => [MS-XCA] Xpress Compression Algorithm". The format in
35 | * WIMs is specifically the algorithm labeled as the "LZ77+Huffman Algorithm"
36 | * (there apparently are some other versions of XPRESS as well).
37 | *
38 | * If you are already familiar with the LZ77 algorithm and Huffman coding, the
39 | * XPRESS format is fairly simple. The compressed data begins with 256 bytes
40 | * that contain 512 4-bit integers that are the lengths of the symbols in the
41 | * Huffman code used for match/literal headers. In contrast with more
42 | * complicated formats such as DEFLATE and LZX, this is the only Huffman code
43 | * that is used for the entirety of the XPRESS compressed data, and the codeword
44 | * lengths are not encoded with a pretree.
45 | *
46 | * The rest of the compressed data is Huffman-encoded symbols. Values 0 through
47 | * 255 represent the corresponding literal bytes. Values 256 through 511
48 | * represent matches and may require extra bits or bytes to be read to get the
49 | * match offset and match length.
50 | *
51 | * The trickiest part is probably the way in which literal bytes for match
52 | * lengths are interleaved in the bitstream.
53 | *
54 | * Also, a caveat--- according to Microsoft's documentation for XPRESS,
55 | *
56 | * "Some implementation of the decompression algorithm expect an extra
57 | * symbol to mark the end of the data. Specifically, some implementations
58 | * fail during decompression if the Huffman symbol 256 is not found after
59 | * the actual data."
60 | *
61 | * This is the case with Microsoft's implementation in WIMGAPI, for example. So
62 | * although our implementation doesn't currently check for this extra symbol,
63 | * compressors would be wise to add it.
64 | */
65 |
66 | #ifdef HAVE_CONFIG_H
67 | # include "config.h"
68 | #endif
69 |
70 | #include "decompress_common.h"
71 | #include "system_compression.h"
72 |
73 | /* This value is chosen for fast decompression. */
74 | #define XPRESS_TABLEBITS 11
75 |
76 | int
77 | xpress_decompress(struct xpress_decompressor * d,
78 | const void *compressed_data, size_t compressed_size,
79 | void *uncompressed_data, size_t uncompressed_size)
80 | {
81 | const uint8_t * const in_begin = compressed_data;
82 | uint8_t * const out_begin = uncompressed_data;
83 | uint8_t *out_next = out_begin;
84 | uint8_t * const out_end = out_begin + uncompressed_size;
85 | struct input_bitstream is;
86 |
87 | /* Read the Huffman codeword lengths. */
88 | if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
89 | return -1;
90 | for (int i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
91 | d->lens[2 * i + 0] = in_begin[i] & 0xf;
92 | d->lens[2 * i + 1] = in_begin[i] >> 4;
93 | }
94 |
95 | /* Build a decoding table for the Huffman code. */
96 | if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
97 | XPRESS_TABLEBITS, d->lens,
98 | XPRESS_MAX_CODEWORD_LEN,
99 | d->working_space))
100 | return -1;
101 |
102 | /* Decode the matches and literals. */
103 |
104 | init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
105 | compressed_size - XPRESS_NUM_SYMBOLS / 2);
106 |
107 | while (out_next != out_end) {
108 | unsigned sym;
109 | unsigned log2_offset;
110 | uint32_t length;
111 | uint32_t offset;
112 |
113 | sym = read_huffsym(&is, d->decode_table,
114 | XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
115 | if (sym < XPRESS_NUM_CHARS) {
116 | /* Literal */
117 | *out_next++ = sym;
118 | } else {
119 | /* Match */
120 | length = sym & 0xf;
121 | log2_offset = (sym >> 4) & 0xf;
122 |
123 | bitstream_ensure_bits(&is, 16);
124 |
125 | offset = ((uint32_t)1 << log2_offset) |
126 | bitstream_pop_bits(&is, log2_offset);
127 |
128 | if (length == 0xf) {
129 | length += bitstream_read_byte(&is);
130 | if (length == 0xf + 0xff)
131 | length = bitstream_read_u16(&is);
132 | }
133 | length += XPRESS_MIN_MATCH_LEN;
134 |
135 | if (unlikely(lz_copy(length, offset,
136 | out_begin, out_next, out_end,
137 | XPRESS_MIN_MATCH_LEN)))
138 | return -1;
139 |
140 | out_next += length;
141 | }
142 | }
143 | return 0;
144 | }
145 |
--------------------------------------------------------------------------------
/src/ebiggers/common_defs.h:
--------------------------------------------------------------------------------
1 | #ifndef _COMMON_DEFS_H
2 | #define _COMMON_DEFS_H
3 |
4 | #include
5 |
6 | /* ========================================================================== */
7 | /* Type definitions */
8 | /* ========================================================================== */
9 |
10 | /*
11 | * Type of a machine word. 'unsigned long' would be logical, but that is only
12 | * 32 bits on x86_64 Windows. The same applies to 'uint_fast32_t'. So the best
13 | * we can do without a bunch of #ifdefs appears to be 'size_t'.
14 | */
15 | typedef size_t machine_word_t;
16 |
17 | #define WORDBYTES sizeof(machine_word_t)
18 | #define WORDBITS (8 * WORDBYTES)
19 |
20 | /* ========================================================================== */
21 | /* Compiler-specific definitions */
22 | /* ========================================================================== */
23 |
24 | #ifdef __GNUC__ /* GCC, or GCC-compatible compiler such as clang */
25 | # define forceinline inline __attribute__((always_inline))
26 | # define likely(expr) __builtin_expect(!!(expr), 1)
27 | # define unlikely(expr) __builtin_expect(!!(expr), 0)
28 | # define _aligned_attribute(n) __attribute__((aligned(n)))
29 | # define bsr32(n) (31 - __builtin_clz(n))
30 | # define bsr64(n) (63 - __builtin_clzll(n))
31 | # define bsf32(n) __builtin_ctz(n)
32 | # define bsf64(n) __builtin_ctzll(n)
33 | # ifndef min
34 | # define min(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
35 | (_a < _b) ? _a : _b; })
36 | # endif
37 | # ifndef max
38 | # define max(a, b) ({ __typeof__(a) _a = (a); __typeof__(b) _b = (b); \
39 | (_a > _b) ? _a : _b; })
40 | # endif
41 |
42 | # define DEFINE_UNALIGNED_TYPE(type) \
43 | struct type##_unaligned { \
44 | type v; \
45 | } __attribute__((packed)); \
46 | \
47 | static inline type \
48 | load_##type##_unaligned(const void *p) \
49 | { \
50 | return ((const struct type##_unaligned *)p)->v; \
51 | } \
52 | \
53 | static inline void \
54 | store_##type##_unaligned(type val, void *p) \
55 | { \
56 | ((struct type##_unaligned *)p)->v = val; \
57 | }
58 |
59 | #endif /* __GNUC__ */
60 |
61 | /* Declare that the annotated function should always be inlined. This might be
62 | * desirable in highly tuned code, e.g. compression codecs */
63 | #ifndef forceinline
64 | # define forceinline inline
65 | #endif
66 |
67 | /* Hint that the expression is usually true */
68 | #ifndef likely
69 | # define likely(expr) (expr)
70 | #endif
71 |
72 | /* Hint that the expression is usually false */
73 | #ifndef unlikely
74 | # define unlikely(expr) (expr)
75 | #endif
76 |
77 | /* Declare that the annotated variable, or variables of the annotated type, are
78 | * to be aligned on n-byte boundaries */
79 | #ifndef _aligned_attribute
80 | # define _aligned_attribute(n)
81 | #endif
82 |
83 | /* min() and max() macros */
84 | #ifndef min
85 | # define min(a, b) ((a) < (b) ? (a) : (b))
86 | #endif
87 | #ifndef max
88 | # define max(a, b) ((a) > (b) ? (a) : (b))
89 | #endif
90 |
91 | /* STATIC_ASSERT() - verify the truth of an expression at compilation time */
92 | #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)]))
93 |
94 | /* UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses
95 | * can be performed efficiently on the target platform. */
96 | #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED)
97 | # define UNALIGNED_ACCESS_IS_FAST 1
98 | #else
99 | # define UNALIGNED_ACCESS_IS_FAST 0
100 | #endif
101 |
102 | /*
103 | * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type',
104 | * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions
105 | * which load and store variables of type 'type' from/to unaligned memory
106 | * addresses.
107 | */
108 | #ifndef DEFINE_UNALIGNED_TYPE
109 |
110 | #include
111 | /*
112 | * Although memcpy() may seem inefficient, it *usually* gets optimized
113 | * appropriately by modern compilers. It's portable and may be the best we can
114 | * do for a fallback...
115 | */
116 | #define DEFINE_UNALIGNED_TYPE(type) \
117 | \
118 | static forceinline type \
119 | load_##type##_unaligned(const void *p) \
120 | { \
121 | type v; \
122 | memcpy(&v, p, sizeof(v)); \
123 | return v; \
124 | } \
125 | \
126 | static forceinline void \
127 | store_##type##_unaligned(type v, void *p) \
128 | { \
129 | memcpy(p, &v, sizeof(v)); \
130 | }
131 |
132 | #endif /* !DEFINE_UNALIGNED_TYPE */
133 |
134 |
135 | /* ========================================================================== */
136 | /* Unaligned memory accesses */
137 | /* ========================================================================== */
138 |
139 | #define load_word_unaligned load_machine_word_t_unaligned
140 | #define store_word_unaligned store_machine_word_t_unaligned
141 |
142 | /* ========================================================================== */
143 | /* Bit scan functions */
144 | /* ========================================================================== */
145 |
146 | /*
147 | * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least
148 | * significant end) of the *most* significant 1 bit in the input value. The
149 | * input value must be nonzero!
150 | */
151 |
152 | #ifndef bsr32
153 | static forceinline unsigned
154 | bsr32(uint32_t v)
155 | {
156 | unsigned bit = 0;
157 | while ((v >>= 1) != 0)
158 | bit++;
159 | return bit;
160 | }
161 | #endif
162 |
163 | #ifndef bsr64
164 | static forceinline unsigned
165 | bsr64(uint64_t v)
166 | {
167 | unsigned bit = 0;
168 | while ((v >>= 1) != 0)
169 | bit++;
170 | return bit;
171 | }
172 | #endif
173 |
174 | static forceinline unsigned
175 | bsrw(machine_word_t v)
176 | {
177 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
178 | if (WORDBITS == 32)
179 | return bsr32(v);
180 | else
181 | return bsr64(v);
182 | }
183 |
184 | /*
185 | * Bit Scan Forward (BSF) - find the 0-based index (relative to the least
186 | * significant end) of the *least* significant 1 bit in the input value. The
187 | * input value must be nonzero!
188 | */
189 |
190 | #ifndef bsf32
191 | static forceinline unsigned
192 | bsf32(uint32_t v)
193 | {
194 | unsigned bit;
195 | for (bit = 0; !(v & 1); bit++, v >>= 1)
196 | ;
197 | return bit;
198 | }
199 | #endif
200 |
201 | #ifndef bsf64
202 | static forceinline unsigned
203 | bsf64(uint64_t v)
204 | {
205 | unsigned bit;
206 | for (bit = 0; !(v & 1); bit++, v >>= 1)
207 | ;
208 | return bit;
209 | }
210 | #endif
211 |
212 | static forceinline unsigned
213 | bsfw(machine_word_t v)
214 | {
215 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
216 | if (WORDBITS == 32)
217 | return bsf32(v);
218 | else
219 | return bsf64(v);
220 | }
221 |
222 | /* Return the log base 2 of 'n', rounded up to the nearest integer. */
223 | static forceinline unsigned
224 | ilog2_ceil(size_t n)
225 | {
226 | if (n <= 1)
227 | return 0;
228 | return 1 + bsrw(n - 1);
229 | }
230 |
231 | #endif /* _COMMON_DEFS_H */
232 |
--------------------------------------------------------------------------------
/src/misc.cpp:
--------------------------------------------------------------------------------
1 | /* Copyright (c) Mark Harmstone 2023
2 | *
3 | * This file is part of ntfs-efi.
4 | *
5 | * ntfs-efi is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public Licence as published by
7 | * the Free Software Foundation, either version 2 of the Licence, or
8 | * (at your option) any later version.
9 | *
10 | * ntfs-efi is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public Licence for more details.
14 | *
15 | * You should have received a copy of the GNU General Public Licence
16 | * along with ntfs-efi. If not, see . */
17 |
18 | #include "misc.h"
19 | #include
20 | #include
21 |
22 | extern "C"
23 | void* memset(void* s, int c, size_t n) {
24 | void* orig_s = s;
25 |
26 | // FIXME - faster if we make sure we're aligned (also in memcpy)?
27 |
28 | #if __INTPTR_WIDTH__ == 64
29 | uint64_t v;
30 |
31 | v = 0;
32 |
33 | for (unsigned int i = 0; i < sizeof(uint64_t); i++) {
34 | v <<= 8;
35 | v |= c & 0xff;
36 | }
37 |
38 | while (n >= sizeof(uint64_t)) {
39 | *(uint64_t*)s = v;
40 |
41 | s = (uint8_t*)s + sizeof(uint64_t);
42 | n -= sizeof(uint64_t);
43 | }
44 | #else
45 | uint32_t v;
46 |
47 | v = 0;
48 |
49 | for (unsigned int i = 0; i < sizeof(uint32_t); i++) {
50 | v <<= 8;
51 | v |= c & 0xff;
52 | }
53 |
54 | while (n >= sizeof(uint32_t)) {
55 | *(uint32_t*)s = v;
56 |
57 | s = (uint8_t*)s + sizeof(uint32_t);
58 | n -= sizeof(uint32_t);
59 | }
60 | #endif
61 |
62 | while (n > 0) {
63 | *(uint8_t*)s = c;
64 |
65 | s = (uint8_t*)s + 1;
66 | n--;
67 | }
68 |
69 | return orig_s;
70 | }
71 |
72 | extern "C"
73 | int memcmp(const void* s1, const void* s2, size_t n) {
74 | #if __INTPTR_WIDTH__ == 64
75 | while (n > sizeof(uint64_t)) {
76 | uint64_t c1 = *(uint64_t*)s1;
77 | uint64_t c2 = *(uint64_t*)s2;
78 |
79 | if (c1 != c2)
80 | return c1 > c2 ? 1 : -1;
81 |
82 | s1 = (uint64_t*)s1 + 1;
83 | s2 = (uint64_t*)s2 + 1;
84 | n -= sizeof(uint64_t);
85 | }
86 | #endif
87 |
88 | while (n > sizeof(uint32_t)) {
89 | uint32_t c1 = *(uint32_t*)s1;
90 | uint32_t c2 = *(uint32_t*)s2;
91 |
92 | if (c1 != c2)
93 | return c1 > c2 ? 1 : -1;
94 |
95 | s1 = (uint32_t*)s1 + 1;
96 | s2 = (uint32_t*)s2 + 1;
97 | n -= sizeof(uint32_t);
98 | }
99 |
100 | while (n > 0) {
101 | uint8_t c1 = *(uint8_t*)s1;
102 | uint8_t c2 = *(uint8_t*)s2;
103 |
104 | if (c1 != c2)
105 | return c1 > c2 ? 1 : -1;
106 |
107 | s1 = (uint8_t*)s1 + 1;
108 | s2 = (uint8_t*)s2 + 1;
109 | n--;
110 | }
111 |
112 | return 0;
113 | }
114 |
115 | extern "C"
116 | void* memcpy(void* dest, const void* src, size_t n) {
117 | void* orig_dest = dest;
118 |
119 | #if __INTPTR_WIDTH__ == 64
120 | while (n >= sizeof(uint64_t)) {
121 | *(uint64_t*)dest = *(uint64_t*)src;
122 |
123 | dest = (uint8_t*)dest + sizeof(uint64_t);
124 | src = (uint8_t*)src + sizeof(uint64_t);
125 |
126 | n -= sizeof(uint64_t);
127 | }
128 | #endif
129 |
130 | while (n >= sizeof(uint32_t)) {
131 | *(uint32_t*)dest = *(uint32_t*)src;
132 |
133 | dest = (uint8_t*)dest + sizeof(uint32_t);
134 | src = (uint8_t*)src + sizeof(uint32_t);
135 |
136 | n -= sizeof(uint32_t);
137 | }
138 |
139 | while (n >= sizeof(uint16_t)) {
140 | *(uint16_t*)dest = *(uint16_t*)src;
141 |
142 | dest = (uint8_t*)dest + sizeof(uint16_t);
143 | src = (uint8_t*)src + sizeof(uint16_t);
144 |
145 | n -= sizeof(uint16_t);
146 | }
147 |
148 | while (n >= sizeof(uint8_t)) {
149 | *(uint8_t*)dest = *(uint8_t*)src;
150 |
151 | dest = (uint8_t*)dest + sizeof(uint8_t);
152 | src = (uint8_t*)src + sizeof(uint8_t);
153 |
154 | n -= sizeof(uint8_t);
155 | }
156 |
157 | return orig_dest;
158 | }
159 |
160 | const char* error_string(EFI_STATUS Status) {
161 | switch (Status) {
162 | case EFI_SUCCESS:
163 | return "EFI_SUCCESS";
164 |
165 | case EFI_LOAD_ERROR:
166 | return "EFI_LOAD_ERROR";
167 |
168 | case EFI_INVALID_PARAMETER:
169 | return "EFI_INVALID_PARAMETER";
170 |
171 | case EFI_UNSUPPORTED:
172 | return "EFI_UNSUPPORTED";
173 |
174 | case EFI_BAD_BUFFER_SIZE:
175 | return "EFI_BAD_BUFFER_SIZE";
176 |
177 | case EFI_BUFFER_TOO_SMALL:
178 | return "EFI_BUFFER_TOO_SMALL";
179 |
180 | case EFI_NOT_READY:
181 | return "EFI_NOT_READY";
182 |
183 | case EFI_DEVICE_ERROR:
184 | return "EFI_DEVICE_ERROR";
185 |
186 | case EFI_WRITE_PROTECTED:
187 | return "EFI_WRITE_PROTECTED";
188 |
189 | case EFI_OUT_OF_RESOURCES:
190 | return "EFI_OUT_OF_RESOURCES";
191 |
192 | case EFI_VOLUME_CORRUPTED:
193 | return "EFI_VOLUME_CORRUPTED";
194 |
195 | case EFI_VOLUME_FULL:
196 | return "EFI_VOLUME_FULL";
197 |
198 | case EFI_NO_MEDIA:
199 | return "EFI_NO_MEDIA";
200 |
201 | case EFI_MEDIA_CHANGED:
202 | return "EFI_MEDIA_CHANGED";
203 |
204 | case EFI_NOT_FOUND:
205 | return "EFI_NOT_FOUND";
206 |
207 | case EFI_ACCESS_DENIED:
208 | return "EFI_ACCESS_DENIED";
209 |
210 | case EFI_NO_RESPONSE:
211 | return "EFI_NO_RESPONSE";
212 |
213 | case EFI_NO_MAPPING:
214 | return "EFI_NO_MAPPING";
215 |
216 | case EFI_TIMEOUT:
217 | return "EFI_TIMEOUT";
218 |
219 | case EFI_NOT_STARTED:
220 | return "EFI_NOT_STARTED";
221 |
222 | case EFI_ALREADY_STARTED:
223 | return "EFI_ALREADY_STARTED";
224 |
225 | case EFI_ABORTED:
226 | return "EFI_ABORTED";
227 |
228 | case EFI_ICMP_ERROR:
229 | return "EFI_ICMP_ERROR";
230 |
231 | case EFI_TFTP_ERROR:
232 | return "EFI_TFTP_ERROR";
233 |
234 | case EFI_PROTOCOL_ERROR:
235 | return "EFI_PROTOCOL_ERROR";
236 |
237 | case EFI_INCOMPATIBLE_VERSION:
238 | return "EFI_INCOMPATIBLE_VERSION";
239 |
240 | case EFI_SECURITY_VIOLATION:
241 | return "EFI_SECURITY_VIOLATION";
242 |
243 | case EFI_CRC_ERROR:
244 | return "EFI_CRC_ERROR";
245 |
246 | case EFI_END_OF_MEDIA:
247 | return "EFI_END_OF_MEDIA";
248 |
249 | case EFI_END_OF_FILE:
250 | return "EFI_END_OF_FILE";
251 |
252 | case EFI_INVALID_LANGUAGE:
253 | return "EFI_INVALID_LANGUAGE";
254 |
255 | case EFI_COMPROMISED_DATA:
256 | return "EFI_COMPROMISED_DATA";
257 |
258 | default:
259 | return "(unknown error)";
260 | }
261 | }
262 |
263 | char* stpcpy(char* dest, const char* src) {
264 | while (*src != 0) {
265 | *dest = *src;
266 | dest++;
267 | src++;
268 | }
269 |
270 | *dest = 0;
271 |
272 | return dest;
273 | }
274 |
--------------------------------------------------------------------------------
/src/ntfs.h:
--------------------------------------------------------------------------------
1 | /* Copyright (c) Mark Harmstone 2023
2 | *
3 | * This file is part of ntfs-efi.
4 | *
5 | * ntfs-efi is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public Licence as published by
7 | * the Free Software Foundation, either version 2 of the Licence, or
8 | * (at your option) any later version.
9 | *
10 | * ntfs-efi is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public Licence for more details.
14 | *
15 | * You should have received a copy of the GNU General Public Licence
16 | * along with ntfs-efi. If not, see . */
17 |
18 | #pragma once
19 |
20 | enum class ntfs_attribute : uint32_t {
21 | STANDARD_INFORMATION = 0x10,
22 | ATTRIBUTE_LIST = 0x20,
23 | FILE_NAME = 0x30,
24 | VOLUME_VERSION = 0x40,
25 | SECURITY_DESCRIPTOR = 0x50,
26 | VOLUME_NAME = 0x60,
27 | VOLUME_INFORMATION = 0x70,
28 | DATA = 0x80,
29 | INDEX_ROOT = 0x90,
30 | INDEX_ALLOCATION = 0xA0,
31 | BITMAP = 0xB0,
32 | REPARSE_POINT = 0xC0,
33 | EA_INFORMATION = 0xD0,
34 | EA = 0xE0,
35 | PROPERTY_SET = 0xF0,
36 | LOGGED_UTILITY_STREAM = 0x100,
37 | };
38 |
39 | enum class NTFS_ATTRIBUTE_FORM : uint8_t {
40 | RESIDENT_FORM = 0,
41 | NONRESIDENT_FORM = 1
42 | };
43 |
44 | #pragma pack(push,1)
45 |
46 | struct NTFS_BOOT_SECTOR {
47 | uint8_t Jmp[3];
48 | uint8_t FsName[8];
49 | uint16_t BytesPerSector;
50 | uint8_t SectorsPerCluster;
51 | uint16_t ReservedSectors;
52 | uint8_t Unused1[5];
53 | uint8_t Media;
54 | uint8_t Unused2[2];
55 | uint16_t SectorsPerTrack;
56 | uint16_t Heads;
57 | uint32_t HiddenSectors;
58 | uint32_t Unused3;
59 | uint32_t Unknown;
60 | uint64_t TotalSectors;
61 | uint64_t MFT;
62 | uint64_t MFTMirr;
63 | int8_t ClustersPerMFTRecord;
64 | uint8_t Padding1[3];
65 | int8_t ClustersPerIndexRecord;
66 | uint8_t Padding2[3];
67 | uint64_t SerialNumber;
68 | uint32_t Checksum;
69 | };
70 |
71 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/multi-sector-header
72 | struct MULTI_SECTOR_HEADER {
73 | uint32_t Signature;
74 | uint16_t UpdateSequenceArrayOffset;
75 | uint16_t UpdateSequenceArraySize;
76 | };
77 |
78 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/mft-segment-reference
79 | struct MFT_SEGMENT_REFERENCE {
80 | uint64_t SegmentNumber : 48;
81 | uint64_t SequenceNumber : 16;
82 | };
83 |
84 | // based on https://docs.microsoft.com/en-us/windows/win32/devnotes/file-record-segment-header and
85 | // http://www.cse.scu.edu/~tschwarz/coen252_07Fall/Lectures/NTFS.html
86 | struct FILE_RECORD_SEGMENT_HEADER {
87 | MULTI_SECTOR_HEADER MultiSectorHeader;
88 | uint64_t LogFileSequenceNumber;
89 | uint16_t SequenceNumber;
90 | uint16_t HardLinkCount;
91 | uint16_t FirstAttributeOffset;
92 | uint16_t Flags;
93 | uint32_t EntryUsedSize;
94 | uint32_t EntryAllocatedSize;
95 | MFT_SEGMENT_REFERENCE BaseFileRecordSegment;
96 | uint16_t NextAttributeID;
97 | };
98 |
99 | struct ATTRIBUTE_RECORD_HEADER {
100 | enum ntfs_attribute TypeCode;
101 | uint16_t RecordLength;
102 | uint16_t Unknown;
103 | enum NTFS_ATTRIBUTE_FORM FormCode;
104 | uint8_t NameLength;
105 | uint16_t NameOffset;
106 | uint16_t Flags;
107 | uint16_t Instance;
108 | union {
109 | struct {
110 | uint32_t ValueLength;
111 | uint16_t ValueOffset;
112 | uint8_t Reserved[2];
113 | } Resident;
114 | struct {
115 | uint64_t LowestVcn;
116 | uint64_t HighestVcn;
117 | uint16_t MappingPairsOffset;
118 | uint16_t CompressionUnit;
119 | uint32_t Padding;
120 | uint64_t AllocatedLength;
121 | uint64_t FileSize;
122 | uint64_t ValidDataLength;
123 | uint64_t TotalAllocated;
124 | } Nonresident;
125 | } Form;
126 | };
127 |
128 | // https://flatcap.org/linux-ntfs/ntfs/attributes/standard_information.html
129 |
130 | struct STANDARD_INFORMATION {
131 | int64_t CreationTime;
132 | int64_t LastAccessTime;
133 | int64_t LastWriteTime;
134 | int64_t ChangeTime;
135 | uint32_t FileAttributes;
136 | uint32_t MaximumVersions;
137 | uint32_t VersionNumber;
138 | uint32_t ClassId;
139 | uint32_t OwnerId;
140 | uint32_t SecurityId;
141 | uint64_t QuotaCharged;
142 | uint64_t USN;
143 | };
144 |
145 | // https://flatcap.org/linux-ntfs/ntfs/concepts/node_header.html
146 |
147 | struct index_node_header {
148 | uint32_t first_entry;
149 | uint32_t total_size;
150 | uint32_t allocated_size;
151 | uint32_t flags;
152 | };
153 |
154 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_entry.html
155 |
156 | #define INDEX_ENTRY_SUBNODE 1
157 | #define INDEX_ENTRY_LAST 2
158 |
159 | struct index_entry {
160 | MFT_SEGMENT_REFERENCE file_reference;
161 | uint16_t entry_length;
162 | uint16_t stream_length;
163 | uint32_t flags;
164 | };
165 |
166 | // https://flatcap.org/linux-ntfs/ntfs/attributes/index_root.html
167 |
168 | struct index_root {
169 | enum ntfs_attribute attribute_type;
170 | uint32_t collation_rule;
171 | uint32_t bytes_per_index_record;
172 | uint8_t clusters_per_index_record;
173 | uint8_t padding[3];
174 | index_node_header node_header;
175 | index_entry entries[1];
176 | };
177 |
178 | // https://flatcap.org/linux-ntfs/ntfs/concepts/index_record.html
179 |
180 | struct index_record {
181 | MULTI_SECTOR_HEADER MultiSectorHeader;
182 | uint64_t sequence_number;
183 | uint64_t vcn;
184 | index_node_header header;
185 | uint16_t update_sequence;
186 | };
187 |
188 | #define INDEX_RECORD_MAGIC 0x58444e49 // "INDX"
189 |
190 | // https://flatcap.org/linux-ntfs/ntfs/attributes/file_name.html
191 |
192 | enum class file_name_type : uint8_t {
193 | POSIX = 0,
194 | WINDOWS = 1,
195 | DOS = 2,
196 | WINDOWS_AND_DOS = 3
197 | };
198 |
199 | struct FILE_NAME {
200 | MFT_SEGMENT_REFERENCE Parent;
201 | int64_t CreationTime;
202 | int64_t LastAccessTime;
203 | int64_t LastWriteTime;
204 | int64_t ChangeTime;
205 | uint64_t AllocationSize;
206 | uint64_t EndOfFile;
207 | uint32_t FileAttributes;
208 | uint32_t EaSize;
209 | uint8_t FileNameLength;
210 | file_name_type Namespace;
211 | char16_t FileName[1];
212 | };
213 |
214 | // https://flatcap.org/linux-ntfs/ntfs/attributes/attribute_list.html
215 |
216 | struct attribute_list_entry {
217 | enum ntfs_attribute type;
218 | uint16_t record_length;
219 | uint8_t name_length;
220 | uint8_t name_offset;
221 | uint64_t starting_vcn;
222 | MFT_SEGMENT_REFERENCE file_reference;
223 | uint16_t instance;
224 | };
225 |
226 | struct reparse_point_header { // edited form of REPARSE_DATA_BUFFER
227 | uint32_t ReparseTag;
228 | uint16_t ReparseDataLength;
229 | uint16_t Reserved;
230 | uint8_t DataBuffer[1];
231 | };
232 |
233 | static const uint32_t WOF_CURRENT_VERSION = 1;
234 |
235 | static const uint32_t WOF_PROVIDER_WIM = 1;
236 | static const uint32_t WOF_PROVIDER_FILE = 2;
237 |
238 | struct wof_external_info { // WOF_EXTERNAL_INFO in winioctl.h
239 | uint32_t Version;
240 | uint32_t Provider;
241 | };
242 |
243 | static const uint32_t FILE_PROVIDER_CURRENT_VERSION = 1;
244 |
245 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS4K = 0;
246 | static const uint32_t FILE_PROVIDER_COMPRESSION_LZX = 1;
247 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS8K = 2;
248 | static const uint32_t FILE_PROVIDER_COMPRESSION_XPRESS16K = 3;
249 |
250 | struct file_provider_external_info_v0 { // FILE_PROVIDER_EXTERNAL_INFO_V0 in winioctl.h
251 | uint32_t Version;
252 | uint32_t Algorithm;
253 | };
254 |
255 | #pragma pack(pop)
256 |
257 | #define NTFS_FS_NAME "NTFS "
258 |
259 | #define NTFS_MFT_INODE 0
260 | #define NTFS_ROOT_DIR_INODE 5
261 | #define NTFS_UPCASE_INODE 10
262 |
263 | #define NTFS_FILE_SIGNATURE 0x454c4946 // "FILE"
264 |
265 | // https://docs.microsoft.com/en-us/windows/win32/devnotes/attribute-record-header
266 | #define ATTRIBUTE_FLAG_COMPRESSION_MASK 0x00ff
267 | #define ATTRIBUTE_FLAG_ENCRYPTED 0x4000
268 |
269 | #define FILE_ATTRIBUTE_READONLY 0x00000001
270 | #define FILE_ATTRIBUTE_HIDDEN 0x00000002
271 | #define FILE_ATTRIBUTE_SYSTEM 0x00000004
272 | #define FILE_ATTRIBUTE_DIRECTORY 0x00000010
273 | #define FILE_ATTRIBUTE_ARCHIVE 0x00000020
274 | #define FILE_ATTRIBUTE_DIRECTORY_MFT 0x10000000
275 |
276 | static const uint32_t IO_REPARSE_TAG_WOF = 0x80000017;
277 |
--------------------------------------------------------------------------------
/src/ebiggers/lzx_common.c:
--------------------------------------------------------------------------------
1 | /*
2 | * lzx_common.c - Common code for LZX compression and decompression.
3 | */
4 |
5 | /*
6 | * Copyright (C) 2012-2016 Eric Biggers
7 | *
8 | * This program is free software: you can redistribute it and/or modify it under
9 | * the terms of the GNU General Public License as published by the Free Software
10 | * Foundation, either version 2 of the License, or (at your option) any later
11 | * version.
12 | *
13 | * This program is distributed in the hope that it will be useful, but WITHOUT
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
16 | * details.
17 | *
18 | * You should have received a copy of the GNU General Public License along with
19 | * this program. If not, see .
20 | */
21 |
22 | #ifdef HAVE_CONFIG_H
23 | # include "config.h"
24 | #endif
25 |
26 | #include
27 |
28 | #ifdef __SSE2__
29 | # include
30 | #endif
31 |
32 | #ifdef __AVX2__
33 | # include
34 | #endif
35 |
36 | #include "common_defs.h"
37 | #include "lzx_common.h"
38 |
39 | /* Mapping: offset slot => first match offset that uses that offset slot.
40 | * The offset slots for repeat offsets map to "fake" offsets < 1. */
41 | const int32_t lzx_offset_slot_base[LZX_MAX_OFFSET_SLOTS + 1] = {
42 | -2 , -1 , 0 , 1 , 2 , /* 0 --- 4 */
43 | 4 , 6 , 10 , 14 , 22 , /* 5 --- 9 */
44 | 30 , 46 , 62 , 94 , 126 , /* 10 --- 14 */
45 | 190 , 254 , 382 , 510 , 766 , /* 15 --- 19 */
46 | 1022 , 1534 , 2046 , 3070 , 4094 , /* 20 --- 24 */
47 | 6142 , 8190 , 12286 , 16382 , 24574 , /* 25 --- 29 */
48 | 32766 , 49150 , 65534 , 98302 , 131070 , /* 30 --- 34 */
49 | 196606 , 262142 , 393214 , 524286 , 655358 , /* 35 --- 39 */
50 | 786430 , 917502 , 1048574, 1179646, 1310718, /* 40 --- 44 */
51 | 1441790, 1572862, 1703934, 1835006, 1966078, /* 45 --- 49 */
52 | 2097150 /* extra */
53 | };
54 |
55 | /* Mapping: offset slot => how many extra bits must be read and added to the
56 | * corresponding offset slot base to decode the match offset. */
57 | const uint8_t lzx_extra_offset_bits[LZX_MAX_OFFSET_SLOTS] = {
58 | 0 , 0 , 0 , 0 , 1 ,
59 | 1 , 2 , 2 , 3 , 3 ,
60 | 4 , 4 , 5 , 5 , 6 ,
61 | 6 , 7 , 7 , 8 , 8 ,
62 | 9 , 9 , 10, 10, 11,
63 | 11, 12, 12, 13, 13,
64 | 14, 14, 15, 15, 16,
65 | 16, 17, 17, 17, 17,
66 | 17, 17, 17, 17, 17,
67 | 17, 17, 17, 17, 17,
68 | };
69 |
70 | /* Round the specified buffer size up to the next valid LZX window size, and
71 | * return its order (log2). Or, if the buffer size is 0 or greater than the
72 | * largest valid LZX window size, return 0. */
73 | unsigned
74 | lzx_get_window_order(size_t max_bufsize)
75 | {
76 | if (max_bufsize == 0 || max_bufsize > LZX_MAX_WINDOW_SIZE)
77 | return 0;
78 |
79 | return max(ilog2_ceil(max_bufsize), LZX_MIN_WINDOW_ORDER);
80 | }
81 |
82 | /* Given a valid LZX window order, return the number of symbols that will exist
83 | * in the main Huffman code. */
84 | unsigned
85 | lzx_get_num_main_syms(unsigned window_order)
86 | {
87 | /* Note: one would expect that the maximum match offset would be
88 | * 'window_size - LZX_MIN_MATCH_LEN', which would occur if the first two
89 | * bytes were to match the last two bytes. However, the format
90 | * disallows this case. This reduces the number of needed offset slots
91 | * by 1. */
92 | uint32_t window_size = (uint32_t)1 << window_order;
93 | uint32_t max_offset = window_size - LZX_MIN_MATCH_LEN - 1;
94 | unsigned num_offset_slots = 30;
95 | while (max_offset >= lzx_offset_slot_base[num_offset_slots])
96 | num_offset_slots++;
97 |
98 | return LZX_NUM_CHARS + (num_offset_slots * LZX_NUM_LEN_HEADERS);
99 | }
100 |
101 | static void
102 | do_translate_target(void *target, int32_t input_pos)
103 | {
104 | int32_t abs_offset, rel_offset;
105 |
106 | rel_offset = *(int32_t*)target;
107 | if (rel_offset >= -input_pos && rel_offset < LZX_WIM_MAGIC_FILESIZE) {
108 | if (rel_offset < LZX_WIM_MAGIC_FILESIZE - input_pos) {
109 | /* "good translation" */
110 | abs_offset = rel_offset + input_pos;
111 | } else {
112 | /* "compensating translation" */
113 | abs_offset = rel_offset - LZX_WIM_MAGIC_FILESIZE;
114 | }
115 | *(uint32_t*)target = abs_offset;
116 | }
117 | }
118 |
119 | static void
120 | undo_translate_target(void *target, int32_t input_pos)
121 | {
122 | int32_t abs_offset, rel_offset;
123 |
124 | abs_offset = *(int32_t*)target;
125 | if (abs_offset >= 0) {
126 | if (abs_offset < LZX_WIM_MAGIC_FILESIZE) {
127 | /* "good translation" */
128 | rel_offset = abs_offset - input_pos;
129 | *(uint32_t*)target = rel_offset;
130 | }
131 | } else {
132 | if (abs_offset >= -input_pos) {
133 | /* "compensating translation" */
134 | rel_offset = abs_offset + LZX_WIM_MAGIC_FILESIZE;
135 | *(uint32_t*)target = rel_offset;
136 | }
137 | }
138 | }
139 |
140 | /*
141 | * Do or undo the 'E8' preprocessing used in LZX. Before compression, the
142 | * uncompressed data is preprocessed by changing the targets of x86 CALL
143 | * instructions from relative offsets to absolute offsets. After decompression,
144 | * the translation is undone by changing the targets of x86 CALL instructions
145 | * from absolute offsets to relative offsets.
146 | *
147 | * Note that despite its intent, E8 preprocessing can be done on any data even
148 | * if it is not actually x86 machine code. In fact, E8 preprocessing appears to
149 | * always be used in LZX-compressed resources in WIM files; there is no bit to
150 | * indicate whether it is used or not, unlike in the LZX compressed format as
151 | * used in cabinet files, where a bit is reserved for that purpose.
152 | *
153 | * E8 preprocessing is disabled in the last 6 bytes of the uncompressed data,
154 | * which really means the 5-byte call instruction cannot start in the last 10
155 | * bytes of the uncompressed data. This is one of the errors in the LZX
156 | * documentation.
157 | *
158 | * E8 preprocessing does not appear to be disabled after the 32768th chunk of a
159 | * WIM resource, which apparently is another difference from the LZX compression
160 | * used in cabinet files.
161 | *
162 | * E8 processing is supposed to take the file size as a parameter, as it is used
163 | * in calculating the translated jump targets. But in WIM files, this file size
164 | * is always the same (LZX_WIM_MAGIC_FILESIZE == 12000000).
165 | */
166 | static void
167 | lzx_e8_filter(uint8_t *data, uint32_t size, void (*process_target)(void *, int32_t))
168 | {
169 |
170 | #if !defined(__SSE2__) && !defined(__AVX2__)
171 | /*
172 | * A worthwhile optimization is to push the end-of-buffer check into the
173 | * relatively rare E8 case. This is possible if we replace the last six
174 | * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
175 | * before reaching end-of-buffer. In addition, this scheme guarantees
176 | * that no translation can begin following an E8 byte in the last 10
177 | * bytes because a 4-byte offset containing E8 as its high byte is a
178 | * large negative number that is not valid for translation. That is
179 | * exactly what we need.
180 | */
181 | uint8_t *tail;
182 | uint8_t saved_bytes[6];
183 | uint8_t *p;
184 |
185 | if (size <= 10)
186 | return;
187 |
188 | tail = &data[size - 6];
189 | memcpy(saved_bytes, tail, 6);
190 | memset(tail, 0xE8, 6);
191 | p = data;
192 | for (;;) {
193 | while (*p != 0xE8)
194 | p++;
195 | if (p >= tail)
196 | break;
197 | (*process_target)(p + 1, p - data);
198 | p += 5;
199 | }
200 | memcpy(tail, saved_bytes, 6);
201 | #else
202 | /* SSE2 or AVX-2 optimized version for x86_64 */
203 |
204 | uint8_t *p = data;
205 | uint64_t valid_mask = ~0;
206 |
207 | if (size <= 10)
208 | return;
209 | #ifdef __AVX2__
210 | # define ALIGNMENT_REQUIRED 32
211 | #else
212 | # define ALIGNMENT_REQUIRED 16
213 | #endif
214 |
215 | /* Process one byte at a time until the pointer is properly aligned. */
216 | while ((uintptr_t)p % ALIGNMENT_REQUIRED != 0) {
217 | if (p >= data + size - 10)
218 | return;
219 | if (*p == 0xE8 && (valid_mask & 1)) {
220 | (*process_target)(p + 1, p - data);
221 | valid_mask &= ~0x1F;
222 | }
223 | p++;
224 | valid_mask >>= 1;
225 | valid_mask |= (uint64_t)1 << 63;
226 | }
227 |
228 | if (data + size - p >= 64) {
229 |
230 | /* Vectorized processing */
231 |
232 | /* Note: we use a "trap" E8 byte to eliminate the need to check
233 | * for end-of-buffer in the inner loop. This byte is carefully
234 | * positioned so that it will never be changed by a previous
235 | * translation before it is detected. */
236 |
237 | uint8_t *trap = p + ((data + size - p) & ~31) - 32 + 4;
238 | uint8_t saved_byte = *trap;
239 | *trap = 0xE8;
240 |
241 | for (;;) {
242 | uint32_t e8_mask;
243 | uint8_t *orig_p = p;
244 | #ifdef __AVX2__
245 | const __m256i e8_bytes = _mm256_set1_epi8(0xE8);
246 | for (;;) {
247 | __m256i bytes = *(const __m256i *)p;
248 | __m256i cmpresult = _mm256_cmpeq_epi8(bytes, e8_bytes);
249 | e8_mask = _mm256_movemask_epi8(cmpresult);
250 | if (e8_mask)
251 | break;
252 | p += 32;
253 | }
254 | #else
255 | const __m128i e8_bytes = _mm_set1_epi8(0xE8);
256 | for (;;) {
257 | /* Read the next 32 bytes of data and test them
258 | * for E8 bytes. */
259 | __m128i bytes1 = *(const __m128i *)p;
260 | __m128i bytes2 = *(const __m128i *)(p + 16);
261 | __m128i cmpresult1 = _mm_cmpeq_epi8(bytes1, e8_bytes);
262 | __m128i cmpresult2 = _mm_cmpeq_epi8(bytes2, e8_bytes);
263 | uint32_t mask1 = _mm_movemask_epi8(cmpresult1);
264 | uint32_t mask2 = _mm_movemask_epi8(cmpresult2);
265 | /* The masks have a bit set for each E8 byte.
266 | * We stay in this fast inner loop as long as
267 | * there are no E8 bytes. */
268 | if (mask1 | mask2) {
269 | e8_mask = mask1 | (mask2 << 16);
270 | break;
271 | }
272 | p += 32;
273 | }
274 | #endif
275 |
276 | /* Did we pass over data with no E8 bytes? */
277 | if (p != orig_p)
278 | valid_mask = ~0;
279 |
280 | /* Are we nearing end-of-buffer? */
281 | if (p == trap - 4)
282 | break;
283 |
284 | /* Process the E8 bytes. However, the AND with
285 | * 'valid_mask' ensures we never process an E8 byte that
286 | * was itself part of a translation target. */
287 | while ((e8_mask &= valid_mask)) {
288 | unsigned bit = bsf32(e8_mask);
289 | (*process_target)(p + bit + 1, p + bit - data);
290 | valid_mask &= ~((uint64_t)0x1F << bit);
291 | }
292 |
293 | valid_mask >>= 32;
294 | valid_mask |= 0xFFFFFFFF00000000;
295 | p += 32;
296 | }
297 |
298 | *trap = saved_byte;
299 | }
300 |
301 | /* Approaching the end of the buffer; process one byte a time. */
302 | while (p < data + size - 10) {
303 | if (*p == 0xE8 && (valid_mask & 1)) {
304 | (*process_target)(p + 1, p - data);
305 | valid_mask &= ~0x1F;
306 | }
307 | p++;
308 | valid_mask >>= 1;
309 | valid_mask |= (uint64_t)1 << 63;
310 | }
311 | #endif /* __SSE2__ || __AVX2__ */
312 | }
313 |
314 | void
315 | lzx_preprocess(uint8_t *data, uint32_t size)
316 | {
317 | lzx_e8_filter(data, size, do_translate_target);
318 | }
319 |
320 | void
321 | lzx_postprocess(uint8_t *data, uint32_t size)
322 | {
323 | lzx_e8_filter(data, size, undo_translate_target);
324 | }
325 |
--------------------------------------------------------------------------------
/src/ebiggers/decompress_common.c:
--------------------------------------------------------------------------------
1 | /*
2 | * decompress_common.c
3 | *
4 | * Code for decompression shared among multiple compression formats.
5 | *
6 | * The following copying information applies to this specific source code file:
7 | *
8 | * Written in 2012-2016 by Eric Biggers
9 | *
10 | * To the extent possible under law, the author(s) have dedicated all copyright
11 | * and related and neighboring rights to this software to the public domain
12 | * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
13 | * Dedication (the "CC0").
14 | *
15 | * This software is distributed in the hope that it will be useful, but WITHOUT
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 | * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
18 | *
19 | * You should have received a copy of the CC0 along with this software; if not
20 | * see .
21 | */
22 |
23 | #ifdef HAVE_CONFIG_H
24 | # include "config.h"
25 | #endif
26 |
27 | #include
28 |
29 | #ifdef __SSE2__
30 | # include
31 | #endif
32 |
33 | #include "decompress_common.h"
34 |
35 | /*
36 | * make_huffman_decode_table() -
37 | *
38 | * Given an alphabet of symbols and the length of each symbol's codeword in a
39 | * canonical prefix code, build a table for quickly decoding symbols that were
40 | * encoded with that code.
41 | *
42 | * A _prefix code_ is an assignment of bitstrings called _codewords_ to symbols
43 | * such that no whole codeword is a prefix of any other. A prefix code might be
44 | * a _Huffman code_, which means that it is an optimum prefix code for a given
45 | * list of symbol frequencies and was generated by the Huffman algorithm.
46 | * Although the prefix codes processed here will ordinarily be "Huffman codes",
47 | * strictly speaking the decoder cannot know whether a given code was actually
48 | * generated by the Huffman algorithm or not.
49 | *
50 | * A prefix code is _canonical_ if and only if a longer codeword never
51 | * lexicographically precedes a shorter codeword, and the lexicographic ordering
52 | * of codewords of equal length is the same as the lexicographic ordering of the
53 | * corresponding symbols. The advantage of using a canonical prefix code is
54 | * that the codewords can be reconstructed from only the symbol => codeword
55 | * length mapping. This eliminates the need to transmit the codewords
56 | * explicitly. Instead, they can be enumerated in lexicographic order after
57 | * sorting the symbols primarily by increasing codeword length and secondarily
58 | * by increasing symbol value.
59 | *
60 | * However, the decoder's real goal is to decode symbols with the code, not just
61 | * generate the list of codewords. Consequently, this function directly builds
62 | * a table for efficiently decoding symbols using the code. The basic idea is
63 | * that given the next 'max_codeword_len' bits of input, the decoder can look up
64 | * the next decoded symbol by indexing a table containing '2^max_codeword_len'
65 | * entries. A codeword with length 'max_codeword_len' will have exactly one
66 | * entry in this table, whereas a codeword shorter than 'max_codeword_len' will
67 | * have multiple entries in this table. Precisely, a codeword of length 'n'
68 | * will have '2^(max_codeword_len - n)' entries. The index of each such entry,
69 | * considered as a bitstring of length 'max_codeword_len', will contain the
70 | * corresponding codeword as a prefix.
71 | *
72 | * That's the basic idea, but we extend it in two ways:
73 | *
74 | * - Often the maximum codeword length is too long for it to be efficient to
75 | * build the full decode table whenever a new code is used. Instead, we build
76 | * a "root" table using only '2^table_bits' entries, where 'table_bits <=
77 | * max_codeword_len'. Then, a lookup of 'table_bits' bits produces either a
78 | * symbol directly (for codewords not longer than 'table_bits'), or the index
79 | * of a subtable which must be indexed with additional bits of input to fully
80 | * decode the symbol (for codewords longer than 'table_bits').
81 | *
82 | * - Whenever the decoder decodes a symbol, it needs to know the codeword length
83 | * so that it can remove the appropriate number of input bits. The obvious
84 | * solution would be to simply retain the codeword lengths array and use the
85 | * decoded symbol as an index into it. However, that would require two array
86 | * accesses when decoding each symbol. Our strategy is to instead store the
87 | * codeword length directly in the decode table entry along with the symbol.
88 | *
89 | * See MAKE_DECODE_TABLE_ENTRY() for full details on the format of decode table
90 | * entries, and see read_huffsym() for full details on how symbols are decoded.
91 | *
92 | * @decode_table:
93 | * The array in which to build the decode table. This must have been
94 | * declared by the DECODE_TABLE() macro. This may alias @lens, since all
95 | * @lens are consumed before the decode table is written to.
96 | *
97 | * @num_syms:
98 | * The number of symbols in the alphabet.
99 | *
100 | * @table_bits:
101 | * The log base 2 of the number of entries in the root table.
102 | *
103 | * @lens:
104 | * An array of length @num_syms, indexed by symbol, that gives the length
105 | * of the codeword, in bits, for each symbol. The length can be 0, which
106 | * means that the symbol does not have a codeword assigned. In addition,
107 | * @lens may alias @decode_table, as noted above.
108 | *
109 | * @max_codeword_len:
110 | * The maximum codeword length permitted for this code. All entries in
111 | * 'lens' must be less than or equal to this value.
112 | *
113 | * @working_space
114 | * A temporary array that was declared with DECODE_TABLE_WORKING_SPACE().
115 | *
116 | * Returns 0 on success, or -1 if the lengths do not form a valid prefix code.
117 | */
118 | int
119 | make_huffman_decode_table(uint16_t decode_table[], unsigned num_syms,
120 | unsigned table_bits, const uint8_t lens[],
121 | unsigned max_codeword_len, uint16_t working_space[])
122 | {
123 | uint16_t * const len_counts = &working_space[0];
124 | uint16_t * const offsets = &working_space[1 * (max_codeword_len + 1)];
125 | uint16_t * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
126 | int32_t remainder = 1;
127 | uint8_t *entry_ptr = (uint8_t*)decode_table;
128 | unsigned codeword_len = 1;
129 | unsigned sym_idx;
130 | unsigned codeword;
131 | unsigned subtable_pos;
132 | unsigned subtable_bits;
133 | unsigned subtable_prefix;
134 |
135 | /* Count how many codewords have each length, including 0. */
136 | for (unsigned len = 0; len <= max_codeword_len; len++)
137 | len_counts[len] = 0;
138 | for (unsigned sym = 0; sym < num_syms; sym++)
139 | len_counts[lens[sym]]++;
140 |
141 | /* It is already guaranteed that all lengths are <= max_codeword_len,
142 | * but it cannot be assumed they form a complete prefix code. A
143 | * codeword of length n should require a proportion of the codespace
144 | * equaling (1/2)^n. The code is complete if and only if, by this
145 | * measure, the codespace is exactly filled by the lengths. */
146 | for (unsigned len = 1; len <= max_codeword_len; len++) {
147 | remainder = (remainder << 1) - len_counts[len];
148 | /* Do the lengths overflow the codespace? */
149 | if (unlikely(remainder < 0))
150 | return -1;
151 | }
152 |
153 | if (remainder != 0) {
154 | /* The lengths do not fill the codespace; that is, they form an
155 | * incomplete code. This is permitted only if the code is empty
156 | * (contains no symbols). */
157 |
158 | if (unlikely(remainder != 1U << max_codeword_len))
159 | return -1;
160 |
161 | /* The code is empty. When processing a well-formed stream, the
162 | * decode table need not be initialized in this case. However,
163 | * we cannot assume the stream is well-formed, so we must
164 | * initialize the decode table anyway. Setting all entries to 0
165 | * makes the decode table always produce symbol '0' without
166 | * consuming any bits, which is good enough. */
167 | memset(decode_table, 0, sizeof(decode_table[0]) << table_bits);
168 | return 0;
169 | }
170 |
171 | /* Sort the symbols primarily by increasing codeword length and
172 | * secondarily by increasing symbol value. */
173 |
174 | /* Initialize 'offsets' so that 'offsets[len]' is the number of
175 | * codewords shorter than 'len' bits, including length 0. */
176 | offsets[0] = 0;
177 | for (unsigned len = 0; len < max_codeword_len; len++)
178 | offsets[len + 1] = offsets[len] + len_counts[len];
179 |
180 | /* Use the 'offsets' array to sort the symbols. */
181 | for (unsigned sym = 0; sym < num_syms; sym++)
182 | sorted_syms[offsets[lens[sym]]++] = sym;
183 |
184 | /*
185 | * Fill the root table entries for codewords no longer than table_bits.
186 | *
187 | * The table will start with entries for the shortest codeword(s), which
188 | * will have the most entries. From there, the number of entries per
189 | * codeword will decrease. As an optimization, we may begin filling
190 | * entries with SSE2 vector accesses (8 entries/store), then change to
191 | * word accesses (2 or 4 entries/store), then change to 16-bit accesses
192 | * (1 entry/store).
193 | */
194 | sym_idx = offsets[0];
195 |
196 | #ifdef __SSE2__
197 | /* Fill entries one 128-bit vector (8 entries) at a time. */
198 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
199 | (sizeof(__m128i) / sizeof(decode_table[0]));
200 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
201 | {
202 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
203 | for (; sym_idx < end_sym_idx; sym_idx++) {
204 | /* Note: unlike in the "word" version below, the __m128i
205 | * type already has __attribute__((may_alias)), so using
206 | * it to access an array of u16 will not violate strict
207 | * aliasing. */
208 | __m128i v = _mm_set1_epi16(
209 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
210 | codeword_len));
211 | unsigned n = stores_per_loop;
212 | do {
213 | *(__m128i *)entry_ptr = v;
214 | entry_ptr += sizeof(v);
215 | } while (--n);
216 | }
217 | }
218 | #endif /* __SSE2__ */
219 |
220 | #ifdef __GNUC__
221 | /* Fill entries one word (2 or 4 entries) at a time. */
222 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len)) /
223 | (WORDBYTES / sizeof(decode_table[0]));
224 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
225 | {
226 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
227 | for (; sym_idx < end_sym_idx; sym_idx++) {
228 |
229 | /* Accessing the array of u16 as u32 or u64 would
230 | * violate strict aliasing and would require compiling
231 | * the code with -fno-strict-aliasing to guarantee
232 | * correctness. To work around this problem, use the
233 | * gcc 'may_alias' extension. */
234 | typedef machine_word_t
235 | __attribute__((may_alias)) aliased_word_t;
236 | aliased_word_t v = repeat_u16(
237 | MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
238 | codeword_len));
239 | unsigned n = stores_per_loop;
240 | do {
241 | *(aliased_word_t *)entry_ptr = v;
242 | entry_ptr += sizeof(v);
243 | } while (--n);
244 | }
245 | }
246 | #endif /* __GNUC__ */
247 |
248 | /* Fill entries one at a time. */
249 | for (unsigned stores_per_loop = (1U << (table_bits - codeword_len));
250 | stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1)
251 | {
252 | unsigned end_sym_idx = sym_idx + len_counts[codeword_len];
253 | for (; sym_idx < end_sym_idx; sym_idx++) {
254 | uint16_t v = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
255 | codeword_len);
256 | unsigned n = stores_per_loop;
257 | do {
258 | *(uint16_t *)entry_ptr = v;
259 | entry_ptr += sizeof(v);
260 | } while (--n);
261 | }
262 | }
263 |
264 | /* If all symbols were processed, then no subtables are required. */
265 | if (sym_idx == num_syms)
266 | return 0;
267 |
268 | /* At least one subtable is required. Process the remaining symbols. */
269 | codeword = ((uint16_t *)entry_ptr - decode_table) << 1;
270 | subtable_pos = 1U << table_bits;
271 | subtable_bits = table_bits;
272 | subtable_prefix = -1;
273 | do {
274 | while (len_counts[codeword_len] == 0) {
275 | codeword_len++;
276 | codeword <<= 1;
277 | }
278 |
279 | unsigned prefix = codeword >> (codeword_len - table_bits);
280 |
281 | /* Start a new subtable if the first 'table_bits' bits of the
282 | * codeword don't match the prefix for the previous subtable, or
283 | * if this will be the first subtable. */
284 | if (prefix != subtable_prefix) {
285 |
286 | subtable_prefix = prefix;
287 |
288 | /*
289 | * Calculate the subtable length. If the codeword
290 | * length exceeds 'table_bits' by n, then the subtable
291 | * needs at least 2^n entries. But it may need more; if
292 | * there are fewer than 2^n codewords of length
293 | * 'table_bits + n' remaining, then n will need to be
294 | * incremented to bring in longer codewords until the
295 | * subtable can be filled completely. Note that it
296 | * always will, eventually, be possible to fill the
297 | * subtable, since it was previously verified that the
298 | * code is complete.
299 | */
300 | subtable_bits = codeword_len - table_bits;
301 | remainder = (int32_t)1 << subtable_bits;
302 | for (;;) {
303 | remainder -= len_counts[table_bits +
304 | subtable_bits];
305 | if (remainder <= 0)
306 | break;
307 | subtable_bits++;
308 | remainder <<= 1;
309 | }
310 |
311 | /* Create the entry that points from the root table to
312 | * the subtable. This entry contains the index of the
313 | * start of the subtable and the number of bits with
314 | * which the subtable is indexed (the log base 2 of the
315 | * number of entries it contains). */
316 | decode_table[subtable_prefix] =
317 | MAKE_DECODE_TABLE_ENTRY(subtable_pos,
318 | subtable_bits);
319 | }
320 |
321 | /* Fill the subtable entries for this symbol. */
322 | uint16_t entry = MAKE_DECODE_TABLE_ENTRY(sorted_syms[sym_idx],
323 | codeword_len - table_bits);
324 | unsigned n = 1U << (subtable_bits - (codeword_len -
325 | table_bits));
326 | do {
327 | decode_table[subtable_pos++] = entry;
328 | } while (--n);
329 |
330 | len_counts[codeword_len]--;
331 | codeword++;
332 | } while (++sym_idx < num_syms);
333 |
334 | return 0;
335 | }
336 |
--------------------------------------------------------------------------------
/src/ebiggers/lzx_decompress.c:
--------------------------------------------------------------------------------
1 | /*
2 | * lzx_decompress.c
3 | *
4 | * A decompressor for the LZX compression format, as used in WIM files.
5 | */
6 |
7 | /*
8 | * Copyright (C) 2012-2016 Eric Biggers
9 | *
10 | * This program is free software: you can redistribute it and/or modify it under
11 | * the terms of the GNU General Public License as published by the Free Software
12 | * Foundation, either version 2 of the License, or (at your option) any later
13 | * version.
14 | *
15 | * This program is distributed in the hope that it will be useful, but WITHOUT
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 | * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 | * details.
19 | *
20 | * You should have received a copy of the GNU General Public License along with
21 | * this program. If not, see .
22 | */
23 |
24 | /*
25 | * LZX is an LZ77 and Huffman-code based compression format that has many
26 | * similarities to DEFLATE (the format used by zlib/gzip). The compression
27 | * ratio is as good or better than DEFLATE. See lzx_compress.c for a format
28 | * overview, and see https://en.wikipedia.org/wiki/LZX_(algorithm) for a
29 | * historical overview. Here I make some pragmatic notes.
30 | *
31 | * The old specification for LZX is the document "Microsoft LZX Data Compression
32 | * Format" (1997). It defines the LZX format as used in cabinet files. Allowed
33 | * window sizes are 2^n where 15 <= n <= 21. However, this document contains
34 | * several errors, so don't read too much into it...
35 | *
36 | * The new specification for LZX is the document "[MS-PATCH]: LZX DELTA
37 | * Compression and Decompression" (2014). It defines the LZX format as used by
38 | * Microsoft's binary patcher. It corrects several errors in the 1997 document
39 | * and extends the format in several ways --- namely, optional reference data,
40 | * up to 2^25 byte windows, and longer match lengths.
41 | *
42 | * WIM files use a more restricted form of LZX. No LZX DELTA extensions are
43 | * present, the window is not "sliding", E8 preprocessing is done
44 | * unconditionally with a fixed file size, and the maximum window size is always
45 | * 2^15 bytes (equal to the size of each "chunk" in a compressed WIM resource).
46 | * This code is primarily intended to implement this form of LZX. But although
47 | * not compatible with WIMGAPI, this code also supports maximum window sizes up
48 | * to 2^21 bytes.
49 | *
50 | * TODO: Add support for window sizes up to 2^25 bytes.
51 | */
52 |
53 | #ifdef HAVE_CONFIG_H
54 | # include "config.h"
55 | #endif
56 |
57 | #include
58 |
59 | #include "decompress_common.h"
60 | #include "lzx_common.h"
61 | #include "system_compression.h"
62 |
63 | /* These values are chosen for fast decompression. */
64 | #define LZX_MAINCODE_TABLEBITS 11
65 | #define LZX_LENCODE_TABLEBITS 9
66 | #define LZX_PRECODE_TABLEBITS 6
67 | #define LZX_ALIGNEDCODE_TABLEBITS 7
68 |
69 | #define LZX_READ_LENS_MAX_OVERRUN 50
70 |
71 | struct lzx_decompressor {
72 | uint16_t maincode_decode_table[DECODE_TABLE_ENOUGH(LZX_MAINCODE_MAX_NUM_SYMBOLS, LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
73 | uint8_t maincode_lens[LZX_MAINCODE_MAX_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
74 |
75 | uint16_t lencode_decode_table[DECODE_TABLE_ENOUGH(LZX_LENCODE_NUM_SYMBOLS, LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
76 | uint8_t lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
77 |
78 | union {
79 | uint16_t alignedcode_decode_table[DECODE_TABLE_ENOUGH(LZX_ALIGNEDCODE_NUM_SYMBOLS, LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
80 | uint8_t alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
81 | };
82 |
83 | union {
84 | uint16_t precode_decode_table[DECODE_TABLE_ENOUGH(LZX_PRECODE_NUM_SYMBOLS, LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN)] _aligned_attribute(DECODE_TABLE_ALIGNMENT);
85 | uint8_t precode_lens[LZX_PRECODE_NUM_SYMBOLS];
86 | uint8_t extra_offset_bits[LZX_MAX_OFFSET_SLOTS];
87 | };
88 |
89 | union {
90 | uint16_t maincode_working_space[2 * (LZX_MAX_MAIN_CODEWORD_LEN + 1) + LZX_MAINCODE_MAX_NUM_SYMBOLS];
91 | uint16_t lencode_working_space[2 * (LZX_MAX_LEN_CODEWORD_LEN + 1) + LZX_LENCODE_NUM_SYMBOLS];
92 | uint16_t alignedcode_working_space[2 * (LZX_MAX_ALIGNED_CODEWORD_LEN + 1) + LZX_ALIGNEDCODE_NUM_SYMBOLS];
93 | uint16_t precode_working_space[2 * (LZX_MAX_PRE_CODEWORD_LEN + 1) + LZX_PRECODE_NUM_SYMBOLS];
94 | };
95 |
96 | unsigned window_order;
97 | unsigned num_main_syms;
98 |
99 | /* Like lzx_extra_offset_bits[], but does not include the entropy-coded
100 | * bits of aligned offset blocks */
101 | uint8_t extra_offset_bits_minus_aligned[LZX_MAX_OFFSET_SLOTS];
102 | } _aligned_attribute(DECODE_TABLE_ALIGNMENT);
103 |
104 | /* Read a Huffman-encoded symbol using the precode. */
105 | static forceinline unsigned
106 | read_presym(const struct lzx_decompressor *d, struct input_bitstream *is)
107 | {
108 | return read_huffsym(is, d->precode_decode_table,
109 | LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
110 | }
111 |
112 | /* Read a Huffman-encoded symbol using the main code. */
113 | static forceinline unsigned
114 | read_mainsym(const struct lzx_decompressor *d, struct input_bitstream *is)
115 | {
116 | return read_huffsym(is, d->maincode_decode_table,
117 | LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
118 | }
119 |
120 | /* Read a Huffman-encoded symbol using the length code. */
121 | static forceinline unsigned
122 | read_lensym(const struct lzx_decompressor *d, struct input_bitstream *is)
123 | {
124 | return read_huffsym(is, d->lencode_decode_table,
125 | LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
126 | }
127 |
128 | /* Read a Huffman-encoded symbol using the aligned offset code. */
129 | static forceinline unsigned
130 | read_alignedsym(const struct lzx_decompressor *d, struct input_bitstream *is)
131 | {
132 | return read_huffsym(is, d->alignedcode_decode_table,
133 | LZX_ALIGNEDCODE_TABLEBITS, LZX_MAX_ALIGNED_CODEWORD_LEN);
134 | }
135 |
136 | /*
137 | * Read a precode from the compressed input bitstream, then use it to decode
138 | * @num_lens codeword length values and write them to @lens.
139 | */
140 | static int
141 | lzx_read_codeword_lens(struct lzx_decompressor *d, struct input_bitstream *is,
142 | uint8_t *lens, unsigned num_lens)
143 | {
144 | uint8_t *len_ptr = lens;
145 | uint8_t *lens_end = lens + num_lens;
146 |
147 | /* Read the lengths of the precode codewords. These are stored
148 | * explicitly. */
149 | for (int i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
150 | d->precode_lens[i] =
151 | bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
152 | }
153 |
154 | /* Build the decoding table for the precode. */
155 | if (make_huffman_decode_table(d->precode_decode_table,
156 | LZX_PRECODE_NUM_SYMBOLS,
157 | LZX_PRECODE_TABLEBITS,
158 | d->precode_lens,
159 | LZX_MAX_PRE_CODEWORD_LEN,
160 | d->precode_working_space))
161 | return -1;
162 |
163 | /* Decode the codeword lengths. */
164 | do {
165 | unsigned presym;
166 | uint8_t len;
167 |
168 | /* Read the next precode symbol. */
169 | presym = read_presym(d, is);
170 | if (presym < 17) {
171 | /* Difference from old length */
172 | len = *len_ptr - presym;
173 | if ((int8_t)len < 0)
174 | len += 17;
175 | *len_ptr++ = len;
176 | } else {
177 | /* Special RLE values */
178 |
179 | unsigned run_len;
180 |
181 | if (presym == 17) {
182 | /* Run of 0's */
183 | run_len = 4 + bitstream_read_bits(is, 4);
184 | len = 0;
185 | } else if (presym == 18) {
186 | /* Longer run of 0's */
187 | run_len = 20 + bitstream_read_bits(is, 5);
188 | len = 0;
189 | } else {
190 | /* Run of identical lengths */
191 | run_len = 4 + bitstream_read_bits(is, 1);
192 | presym = read_presym(d, is);
193 | if (unlikely(presym > 17))
194 | return -1;
195 | len = *len_ptr - presym;
196 | if ((int8_t)len < 0)
197 | len += 17;
198 | }
199 |
200 | do {
201 | *len_ptr++ = len;
202 | } while (--run_len);
203 | /*
204 | * The worst case overrun is when presym == 18,
205 | * run_len == 20 + 31, and only 1 length was remaining.
206 | * So LZX_READ_LENS_MAX_OVERRUN == 50.
207 | *
208 | * Overrun while reading the first half of maincode_lens
209 | * can corrupt the previous values in the second half.
210 | * This doesn't really matter because the resulting
211 | * lengths will still be in range, and data that
212 | * generates overruns is invalid anyway.
213 | */
214 | }
215 | } while (len_ptr < lens_end);
216 |
217 | return 0;
218 | }
219 |
220 | /*
221 | * Read the header of an LZX block. For all block types, the block type and
222 | * size is saved in *block_type_ret and *block_size_ret, respectively. For
223 | * compressed blocks, the codeword lengths are also saved. For uncompressed
224 | * blocks, the recent offsets queue is also updated.
225 | */
226 | static int
227 | lzx_read_block_header(struct lzx_decompressor *d, struct input_bitstream *is,
228 | uint32_t recent_offsets[], int *block_type_ret,
229 | uint32_t *block_size_ret)
230 | {
231 | int block_type;
232 | uint32_t block_size;
233 |
234 | bitstream_ensure_bits(is, 4);
235 |
236 | /* Read the block type. */
237 | block_type = bitstream_pop_bits(is, 3);
238 |
239 | /* Read the block size. */
240 | if (bitstream_pop_bits(is, 1)) {
241 | block_size = LZX_DEFAULT_BLOCK_SIZE;
242 | } else {
243 | block_size = bitstream_read_bits(is, 16);
244 | if (d->window_order >= 16) {
245 | block_size <<= 8;
246 | block_size |= bitstream_read_bits(is, 8);
247 | }
248 | }
249 |
250 | switch (block_type) {
251 |
252 | case LZX_BLOCKTYPE_ALIGNED:
253 |
254 | /* Read the aligned offset codeword lengths. */
255 |
256 | for (int i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
257 | d->alignedcode_lens[i] =
258 | bitstream_read_bits(is,
259 | LZX_ALIGNEDCODE_ELEMENT_SIZE);
260 | }
261 |
262 | /* Fall though, since the rest of the header for aligned offset
263 | * blocks is the same as that for verbatim blocks. */
264 |
265 | case LZX_BLOCKTYPE_VERBATIM:
266 |
267 | /* Read the main codeword lengths, which are divided into two
268 | * parts: literal symbols and match headers. */
269 |
270 | if (lzx_read_codeword_lens(d, is, d->maincode_lens,
271 | LZX_NUM_CHARS))
272 | return -1;
273 |
274 | if (lzx_read_codeword_lens(d, is, d->maincode_lens + LZX_NUM_CHARS,
275 | d->num_main_syms - LZX_NUM_CHARS))
276 | return -1;
277 |
278 |
279 | /* Read the length codeword lengths. */
280 |
281 | if (lzx_read_codeword_lens(d, is, d->lencode_lens,
282 | LZX_LENCODE_NUM_SYMBOLS))
283 | return -1;
284 |
285 | break;
286 |
287 | case LZX_BLOCKTYPE_UNCOMPRESSED:
288 | /*
289 | * The header of an uncompressed block contains new values for
290 | * the recent offsets queue, starting on the next 16-bit
291 | * boundary in the bitstream. Careful: if the stream is
292 | * *already* aligned, the correct thing to do is to throw away
293 | * the next 16 bits (this is probably a mistake in the format).
294 | */
295 | bitstream_ensure_bits(is, 1);
296 | bitstream_align(is);
297 | recent_offsets[0] = bitstream_read_u32(is);
298 | recent_offsets[1] = bitstream_read_u32(is);
299 | recent_offsets[2] = bitstream_read_u32(is);
300 |
301 | /* Offsets of 0 are invalid. */
302 | if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
303 | recent_offsets[2] == 0)
304 | return -1;
305 | break;
306 |
307 | default:
308 | /* Unrecognized block type. */
309 | return -1;
310 | }
311 |
312 | *block_type_ret = block_type;
313 | *block_size_ret = block_size;
314 | return 0;
315 | }
316 |
317 | /* Decompress a block of LZX-compressed data. */
318 | static int
319 | lzx_decompress_block(struct lzx_decompressor *d, struct input_bitstream *is,
320 | int block_type, uint32_t block_size,
321 | uint8_t * const out_begin, uint8_t *out_next, uint32_t recent_offsets[])
322 | {
323 | uint8_t * const block_end = out_next + block_size;
324 | unsigned min_aligned_offset_slot;
325 |
326 | /*
327 | * Build the Huffman decode tables. We always need to build the main
328 | * and length decode tables. For aligned blocks we additionally need to
329 | * build the aligned offset decode table.
330 | */
331 |
332 | if (make_huffman_decode_table(d->maincode_decode_table,
333 | d->num_main_syms,
334 | LZX_MAINCODE_TABLEBITS,
335 | d->maincode_lens,
336 | LZX_MAX_MAIN_CODEWORD_LEN,
337 | d->maincode_working_space))
338 | return -1;
339 |
340 | if (make_huffman_decode_table(d->lencode_decode_table,
341 | LZX_LENCODE_NUM_SYMBOLS,
342 | LZX_LENCODE_TABLEBITS,
343 | d->lencode_lens,
344 | LZX_MAX_LEN_CODEWORD_LEN,
345 | d->lencode_working_space))
346 | return -1;
347 |
348 | if (block_type == LZX_BLOCKTYPE_ALIGNED) {
349 | if (make_huffman_decode_table(d->alignedcode_decode_table,
350 | LZX_ALIGNEDCODE_NUM_SYMBOLS,
351 | LZX_ALIGNEDCODE_TABLEBITS,
352 | d->alignedcode_lens,
353 | LZX_MAX_ALIGNED_CODEWORD_LEN,
354 | d->alignedcode_working_space))
355 | return -1;
356 | min_aligned_offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
357 | memcpy(d->extra_offset_bits, d->extra_offset_bits_minus_aligned,
358 | sizeof(lzx_extra_offset_bits));
359 | } else {
360 | min_aligned_offset_slot = LZX_MAX_OFFSET_SLOTS;
361 | memcpy(d->extra_offset_bits, lzx_extra_offset_bits,
362 | sizeof(lzx_extra_offset_bits));
363 | }
364 |
365 | /* Decode the literals and matches. */
366 |
367 | do {
368 | unsigned mainsym;
369 | unsigned length;
370 | uint32_t offset;
371 | unsigned offset_slot;
372 |
373 | mainsym = read_mainsym(d, is);
374 | if (mainsym < LZX_NUM_CHARS) {
375 | /* Literal */
376 | *out_next++ = mainsym;
377 | continue;
378 | }
379 |
380 | /* Match */
381 |
382 | /* Decode the length header and offset slot. */
383 | STATIC_ASSERT(LZX_NUM_CHARS % LZX_NUM_LEN_HEADERS == 0);
384 | length = mainsym % LZX_NUM_LEN_HEADERS;
385 | offset_slot = (mainsym - LZX_NUM_CHARS) / LZX_NUM_LEN_HEADERS;
386 |
387 | /* If needed, read a length symbol to decode the full length. */
388 | if (length == LZX_NUM_PRIMARY_LENS)
389 | length += read_lensym(d, is);
390 | length += LZX_MIN_MATCH_LEN;
391 |
392 | if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
393 | /* Repeat offset */
394 |
395 | /* Note: This isn't a real LRU queue, since using the R2
396 | * offset doesn't bump the R1 offset down to R2. */
397 | offset = recent_offsets[offset_slot];
398 | recent_offsets[offset_slot] = recent_offsets[0];
399 | } else {
400 | /* Explicit offset */
401 | offset = bitstream_read_bits(is, d->extra_offset_bits[offset_slot]);
402 | if (offset_slot >= min_aligned_offset_slot) {
403 | offset = (offset << LZX_NUM_ALIGNED_OFFSET_BITS) |
404 | read_alignedsym(d, is);
405 | }
406 | offset += lzx_offset_slot_base[offset_slot];
407 |
408 | /* Update the match offset LRU queue. */
409 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
410 | recent_offsets[2] = recent_offsets[1];
411 | recent_offsets[1] = recent_offsets[0];
412 | }
413 | recent_offsets[0] = offset;
414 |
415 | /* Validate the match and copy it to the current position. */
416 | if (unlikely(lz_copy(length, offset, out_begin,
417 | out_next, block_end, LZX_MIN_MATCH_LEN)))
418 | return -1;
419 | out_next += length;
420 | } while (out_next != block_end);
421 |
422 | return 0;
423 | }
424 |
425 | int
426 | lzx_decompress(struct lzx_decompressor *d,
427 | const void *compressed_data, size_t compressed_size,
428 | void *uncompressed_data, size_t uncompressed_size)
429 | {
430 | uint8_t * const out_begin = uncompressed_data;
431 | uint8_t *out_next = out_begin;
432 | uint8_t * const out_end = out_begin + uncompressed_size;
433 | struct input_bitstream is;
434 | STATIC_ASSERT(LZX_NUM_RECENT_OFFSETS == 3);
435 | uint32_t recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
436 | unsigned may_have_e8_byte = 0;
437 |
438 | init_input_bitstream(&is, compressed_data, compressed_size);
439 |
440 | /* Codeword lengths begin as all 0's for delta encoding purposes. */
441 | memset(d->maincode_lens, 0, d->num_main_syms);
442 | memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
443 |
444 | /* Decompress blocks until we have all the uncompressed data. */
445 |
446 | while (out_next != out_end) {
447 | int block_type;
448 | uint32_t block_size;
449 |
450 | if (lzx_read_block_header(d, &is, recent_offsets,
451 | &block_type, &block_size))
452 | return -1;
453 |
454 | if (block_size < 1 || block_size > out_end - out_next)
455 | return -1;
456 |
457 | if (likely(block_type != LZX_BLOCKTYPE_UNCOMPRESSED)) {
458 |
459 | /* Compressed block */
460 | if (lzx_decompress_block(d, &is, block_type, block_size,
461 | out_begin, out_next,
462 | recent_offsets))
463 | return -1;
464 |
465 | /* If the first E8 byte was in this block, then it must
466 | * have been encoded as a literal using mainsym E8. */
467 | may_have_e8_byte |= d->maincode_lens[0xE8];
468 | } else {
469 |
470 | /* Uncompressed block */
471 | if (bitstream_read_bytes(&is, out_next, block_size))
472 | return -1;
473 |
474 | /* Re-align the bitstream if needed. */
475 | if (block_size & 1)
476 | bitstream_read_byte(&is);
477 |
478 | /* There may have been an E8 byte in the block. */
479 | may_have_e8_byte = 1;
480 | }
481 | out_next += block_size;
482 | }
483 |
484 | /* Postprocess the data unless it cannot possibly contain E8 bytes. */
485 | if (may_have_e8_byte)
486 | lzx_postprocess(uncompressed_data, uncompressed_size);
487 |
488 | return 0;
489 | }
490 |
491 | bool
492 | lzx_init_decompressor(size_t max_block_size, struct lzx_decompressor *d)
493 | {
494 | unsigned window_order;
495 |
496 | window_order = lzx_get_window_order(max_block_size);
497 | if (window_order == 0)
498 | return false;
499 |
500 | d->window_order = window_order;
501 | d->num_main_syms = lzx_get_num_main_syms(window_order);
502 |
503 | /* Initialize 'd->extra_offset_bits_minus_aligned'. */
504 | STATIC_ASSERT(sizeof(d->extra_offset_bits_minus_aligned) ==
505 | sizeof(lzx_extra_offset_bits));
506 | STATIC_ASSERT(sizeof(d->extra_offset_bits) ==
507 | sizeof(lzx_extra_offset_bits));
508 | memcpy(d->extra_offset_bits_minus_aligned, lzx_extra_offset_bits,
509 | sizeof(lzx_extra_offset_bits));
510 | for (unsigned offset_slot = LZX_MIN_ALIGNED_OFFSET_SLOT;
511 | offset_slot < LZX_MAX_OFFSET_SLOTS; offset_slot++)
512 | {
513 | d->extra_offset_bits_minus_aligned[offset_slot] -=
514 | LZX_NUM_ALIGNED_OFFSET_BITS;
515 | }
516 |
517 | return true;
518 | }
519 |
--------------------------------------------------------------------------------
/src/ebiggers/decompress_common.h:
--------------------------------------------------------------------------------
1 | /*
2 | * decompress_common.h
3 | *
4 | * Header for decompression code shared by multiple compression formats.
5 | *
6 | * The following copying information applies to this specific source code file:
7 | *
8 | * Written in 2012-2016 by Eric Biggers
9 | *
10 | * To the extent possible under law, the author(s) have dedicated all copyright
11 | * and related and neighboring rights to this software to the public domain
12 | * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
13 | * Dedication (the "CC0").
14 | *
15 | * This software is distributed in the hope that it will be useful, but WITHOUT
16 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 | * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
18 | *
19 | * You should have received a copy of the CC0 along with this software; if not
20 | * see .
21 | */
22 |
23 | #ifndef _DECOMPRESS_COMMON_H
24 | #define _DECOMPRESS_COMMON_H
25 |
26 | #include
27 |
28 | #include "common_defs.h"
29 |
30 | /******************************************************************************/
31 | /* Input bitstream for XPRESS and LZX */
32 | /*----------------------------------------------------------------------------*/
33 |
34 | /* Structure that encapsulates a block of in-memory data being interpreted as a
35 | * stream of bits, optionally with interwoven literal bytes. Bits are assumed
36 | * to be stored in little endian 16-bit coding units, with the bits ordered high
37 | * to low. */
38 | struct input_bitstream {
39 |
40 | /* Bits that have been read from the input buffer. The bits are
41 | * left-justified; the next bit is always bit 31. */
42 | uint32_t bitbuf;
43 |
44 | /* Number of bits currently held in @bitbuf. */
45 | uint32_t bitsleft;
46 |
47 | /* Pointer to the next byte to be retrieved from the input buffer. */
48 | const uint8_t *next;
49 |
50 | /* Pointer past the end of the input buffer. */
51 | const uint8_t *end;
52 | };
53 |
54 | /* Initialize a bitstream to read from the specified input buffer. */
55 | static forceinline void
56 | init_input_bitstream(struct input_bitstream *is, const void *buffer, uint32_t size)
57 | {
58 | is->bitbuf = 0;
59 | is->bitsleft = 0;
60 | is->next = buffer;
61 | is->end = is->next + size;
62 | }
63 |
64 | /* Note: for performance reasons, the following methods don't return error codes
65 | * to the caller if the input buffer is overrun. Instead, they just assume that
66 | * all overrun data is zeroes. This has no effect on well-formed compressed
67 | * data. The only disadvantage is that bad compressed data may go undetected,
68 | * but even this is irrelevant if higher level code checksums the uncompressed
69 | * data anyway. */
70 |
71 | /* Ensure the bit buffer variable for the bitstream contains at least @num_bits
72 | * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
73 | * may be called on the bitstream to peek or remove up to @num_bits bits. */
74 | static forceinline void
75 | bitstream_ensure_bits(struct input_bitstream *is, const unsigned num_bits)
76 | {
77 | /* This currently works for at most 17 bits. */
78 |
79 | if (is->bitsleft >= num_bits)
80 | return;
81 |
82 | if (unlikely(is->end - is->next < 2))
83 | goto overflow;
84 |
85 | is->bitbuf |= (uint32_t)*((uint16_t*)is->next) << (16 - is->bitsleft);
86 | is->next += 2;
87 | is->bitsleft += 16;
88 |
89 | if (unlikely(num_bits == 17 && is->bitsleft == 16)) {
90 | if (unlikely(is->end - is->next < 2))
91 | goto overflow;
92 |
93 | is->bitbuf |= (uint32_t)*((uint16_t*)(is->next));
94 | is->next += 2;
95 | is->bitsleft = 32;
96 | }
97 |
98 | return;
99 |
100 | overflow:
101 | is->bitsleft = 32;
102 | }
103 |
104 | /* Return the next @num_bits bits from the bitstream, without removing them.
105 | * There must be at least @num_bits remaining in the buffer variable, from a
106 | * previous call to bitstream_ensure_bits(). */
107 | static forceinline uint32_t
108 | bitstream_peek_bits(const struct input_bitstream *is, const unsigned num_bits)
109 | {
110 | return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
111 | }
112 |
113 | /* Remove @num_bits from the bitstream. There must be at least @num_bits
114 | * remaining in the buffer variable, from a previous call to
115 | * bitstream_ensure_bits(). */
116 | static forceinline void
117 | bitstream_remove_bits(struct input_bitstream *is, unsigned num_bits)
118 | {
119 | is->bitbuf <<= num_bits;
120 | is->bitsleft -= num_bits;
121 | }
122 |
123 | /* Remove and return @num_bits bits from the bitstream. There must be at least
124 | * @num_bits remaining in the buffer variable, from a previous call to
125 | * bitstream_ensure_bits(). */
126 | static forceinline uint32_t
127 | bitstream_pop_bits(struct input_bitstream *is, unsigned num_bits)
128 | {
129 | uint32_t bits = bitstream_peek_bits(is, num_bits);
130 | bitstream_remove_bits(is, num_bits);
131 | return bits;
132 | }
133 |
134 | /* Read and return the next @num_bits bits from the bitstream. */
135 | static forceinline uint32_t
136 | bitstream_read_bits(struct input_bitstream *is, unsigned num_bits)
137 | {
138 | bitstream_ensure_bits(is, num_bits);
139 | return bitstream_pop_bits(is, num_bits);
140 | }
141 |
142 | /* Read and return the next literal byte embedded in the bitstream. */
143 | static forceinline uint8_t
144 | bitstream_read_byte(struct input_bitstream *is)
145 | {
146 | if (unlikely(is->end == is->next))
147 | return 0;
148 | return *is->next++;
149 | }
150 |
151 | /* Read and return the next 16-bit integer embedded in the bitstream. */
152 | static forceinline uint16_t
153 | bitstream_read_u16(struct input_bitstream *is)
154 | {
155 | uint16_t v;
156 |
157 | if (unlikely(is->end - is->next < 2))
158 | return 0;
159 | v = *(uint16_t*)is->next;
160 | is->next += 2;
161 | return v;
162 | }
163 |
164 | /* Read and return the next 32-bit integer embedded in the bitstream. */
165 | static forceinline uint32_t
166 | bitstream_read_u32(struct input_bitstream *is)
167 | {
168 | uint32_t v;
169 |
170 | if (unlikely(is->end - is->next < 4))
171 | return 0;
172 | v = *(uint32_t*)is->next;
173 | is->next += 4;
174 | return v;
175 | }
176 |
177 | /* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
178 | * Return 0 if there were enough bytes remaining in the input, otherwise -1. */
179 | static forceinline int
180 | bitstream_read_bytes(struct input_bitstream *is, void *dst_buffer, size_t count)
181 | {
182 | if (unlikely(is->end - is->next < count))
183 | return -1;
184 | memcpy(dst_buffer, is->next, count);
185 | is->next += count;
186 | return 0;
187 | }
188 |
189 | /* Align the input bitstream on a coding-unit boundary. */
190 | static forceinline void
191 | bitstream_align(struct input_bitstream *is)
192 | {
193 | is->bitsleft = 0;
194 | is->bitbuf = 0;
195 | }
196 |
197 | /******************************************************************************/
198 | /* Huffman decoding */
199 | /*----------------------------------------------------------------------------*/
200 |
201 | /*
202 | * Each decode table entry is 16 bits divided into two fields: 'symbol' (high 12
203 | * bits) and 'length' (low 4 bits). The precise meaning of these fields depends
204 | * on the type of entry:
205 | *
206 | * Root table entries which are *not* subtable pointers:
207 | * symbol: symbol to decode
208 | * length: codeword length in bits
209 | *
210 | * Root table entries which are subtable pointers:
211 | * symbol: index of start of subtable
212 | * length: number of bits with which the subtable is indexed
213 | *
214 | * Subtable entries:
215 | * symbol: symbol to decode
216 | * length: codeword length in bits, minus the number of bits with which the
217 | * root table is indexed
218 | */
219 | #define DECODE_TABLE_SYMBOL_SHIFT 4
220 | #define DECODE_TABLE_MAX_SYMBOL ((1 << (16 - DECODE_TABLE_SYMBOL_SHIFT)) - 1)
221 | #define DECODE_TABLE_MAX_LENGTH ((1 << DECODE_TABLE_SYMBOL_SHIFT) - 1)
222 | #define DECODE_TABLE_LENGTH_MASK DECODE_TABLE_MAX_LENGTH
223 | #define MAKE_DECODE_TABLE_ENTRY(symbol, length) \
224 | (((symbol) << DECODE_TABLE_SYMBOL_SHIFT) | (length))
225 |
226 | /*
227 | * Read and return the next Huffman-encoded symbol from the given bitstream
228 | * using the given decode table.
229 | *
230 | * If the input data is exhausted, then the Huffman symbol will be decoded as if
231 | * the missing bits were all zeroes.
232 | *
233 | * XXX: This is mostly duplicated in lzms_decode_huffman_symbol() in
234 | * lzms_decompress.c; keep them in sync!
235 | */
236 | static forceinline unsigned
237 | read_huffsym(struct input_bitstream *is, const uint16_t decode_table[],
238 | unsigned table_bits, unsigned max_codeword_len)
239 | {
240 | unsigned entry;
241 | unsigned symbol;
242 | unsigned length;
243 |
244 | /* Preload the bitbuffer with 'max_codeword_len' bits so that we're
245 | * guaranteed to be able to fully decode a codeword. */
246 | bitstream_ensure_bits(is, max_codeword_len);
247 |
248 | /* Index the root table by the next 'table_bits' bits of input. */
249 | entry = decode_table[bitstream_peek_bits(is, table_bits)];
250 |
251 | /* Extract the "symbol" and "length" from the entry. */
252 | symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
253 | length = entry & DECODE_TABLE_LENGTH_MASK;
254 |
255 | /* If the root table is indexed by the full 'max_codeword_len' bits,
256 | * then there cannot be any subtables, and this will be known at compile
257 | * time. Otherwise, we must check whether the decoded symbol is really
258 | * a subtable pointer. If so, we must discard the bits with which the
259 | * root table was indexed, then index the subtable by the next 'length'
260 | * bits of input to get the real entry. */
261 | if (max_codeword_len > table_bits &&
262 | entry >= (1U << (table_bits + DECODE_TABLE_SYMBOL_SHIFT)))
263 | {
264 | /* Subtable required */
265 | bitstream_remove_bits(is, table_bits);
266 | entry = decode_table[symbol + bitstream_peek_bits(is, length)];
267 | symbol = entry >> DECODE_TABLE_SYMBOL_SHIFT;
268 | length = entry & DECODE_TABLE_LENGTH_MASK;
269 | }
270 |
271 | /* Discard the bits (or the remaining bits, if a subtable was required)
272 | * of the codeword. */
273 | bitstream_remove_bits(is, length);
274 |
275 | /* Return the decoded symbol. */
276 | return symbol;
277 | }
278 |
279 | /*
280 | * The DECODE_TABLE_ENOUGH() macro evaluates to the maximum number of decode
281 | * table entries, including all subtable entries, that may be required for
282 | * decoding a given Huffman code. This depends on three parameters:
283 | *
284 | * num_syms: the maximum number of symbols in the code
285 | * table_bits: the number of bits with which the root table will be indexed
286 | * max_codeword_len: the maximum allowed codeword length in the code
287 | *
288 | * Given these parameters, the utility program 'enough' from zlib, when passed
289 | * the three arguments 'num_syms', 'table_bits', and 'max_codeword_len', will
290 | * compute the maximum number of entries required. This has already been done
291 | * for the combinations we need and incorporated into the macro below so that
292 | * the mapping can be done at compilation time. If an unknown combination is
293 | * used, then a compilation error will result. To fix this, use 'enough' to
294 | * find the missing value and add it below. If that still doesn't fix the
295 | * compilation error, then most likely a constraint would be violated by the
296 | * requested parameters, so they cannot be used, at least without other changes
297 | * to the decode table --- see DECODE_TABLE_SIZE().
298 | */
299 | #define DECODE_TABLE_ENOUGH(num_syms, table_bits, max_codeword_len) ( \
300 | ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 15) ? 128 : \
301 | ((num_syms) == 8 && (table_bits) == 5 && (max_codeword_len) == 7) ? 36 : \
302 | ((num_syms) == 8 && (table_bits) == 6 && (max_codeword_len) == 7) ? 66 : \
303 | ((num_syms) == 8 && (table_bits) == 7 && (max_codeword_len) == 7) ? 128 : \
304 | ((num_syms) == 20 && (table_bits) == 5 && (max_codeword_len) == 15) ? 1062 : \
305 | ((num_syms) == 20 && (table_bits) == 6 && (max_codeword_len) == 15) ? 582 : \
306 | ((num_syms) == 20 && (table_bits) == 7 && (max_codeword_len) == 15) ? 390 : \
307 | ((num_syms) == 54 && (table_bits) == 9 && (max_codeword_len) == 15) ? 618 : \
308 | ((num_syms) == 54 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1098 : \
309 | ((num_syms) == 249 && (table_bits) == 9 && (max_codeword_len) == 16) ? 878 : \
310 | ((num_syms) == 249 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1326 : \
311 | ((num_syms) == 249 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2318 : \
312 | ((num_syms) == 256 && (table_bits) == 9 && (max_codeword_len) == 15) ? 822 : \
313 | ((num_syms) == 256 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1302 : \
314 | ((num_syms) == 256 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2310 : \
315 | ((num_syms) == 512 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1558 : \
316 | ((num_syms) == 512 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2566 : \
317 | ((num_syms) == 512 && (table_bits) == 12 && (max_codeword_len) == 15) ? 4606 : \
318 | ((num_syms) == 656 && (table_bits) == 10 && (max_codeword_len) == 16) ? 1734 : \
319 | ((num_syms) == 656 && (table_bits) == 11 && (max_codeword_len) == 16) ? 2726 : \
320 | ((num_syms) == 656 && (table_bits) == 12 && (max_codeword_len) == 16) ? 4758 : \
321 | ((num_syms) == 799 && (table_bits) == 9 && (max_codeword_len) == 15) ? 1366 : \
322 | ((num_syms) == 799 && (table_bits) == 10 && (max_codeword_len) == 15) ? 1846 : \
323 | ((num_syms) == 799 && (table_bits) == 11 && (max_codeword_len) == 15) ? 2854 : \
324 | -1)
325 |
326 | extern int
327 | make_huffman_decode_table(uint16_t decode_table[], unsigned num_syms,
328 | unsigned table_bits, const uint8_t lens[],
329 | unsigned max_codeword_len, uint16_t working_space[]);
330 |
331 | /******************************************************************************/
332 | /* LZ match copying */
333 | /*----------------------------------------------------------------------------*/
334 |
335 | static forceinline void
336 | copy_word_unaligned(const void *src, void *dst)
337 | {
338 | *(machine_word_t*)dst = *(machine_word_t*)src;
339 | }
340 |
341 | static forceinline machine_word_t
342 | repeat_u16(uint16_t b)
343 | {
344 | machine_word_t v = b;
345 |
346 | STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
347 | v |= v << 16;
348 | v |= v << ((WORDBITS == 64) ? 32 : 0);
349 | return v;
350 | }
351 |
352 | static forceinline machine_word_t
353 | repeat_byte(uint8_t b)
354 | {
355 | return repeat_u16(((uint16_t)b << 8) | b);
356 | }
357 |
358 | /*
359 | * Copy an LZ77 match of 'length' bytes from the match source at 'out_next -
360 | * offset' to the match destination at 'out_next'. The source and destination
361 | * may overlap.
362 | *
363 | * This handles validating the length and offset. It is validated that the
364 | * beginning of the match source is '>= out_begin' and that end of the match
365 | * destination is '<= out_end'. The return value is 0 if the match was valid
366 | * (and was copied), otherwise -1.
367 | *
368 | * 'min_length' is a hint which specifies the minimum possible match length.
369 | * This should be a compile-time constant.
370 | */
371 | static forceinline int
372 | lz_copy(uint32_t length, uint32_t offset, uint8_t *out_begin, uint8_t *out_next, uint8_t *out_end,
373 | uint32_t min_length)
374 | {
375 | const uint8_t *src;
376 | uint8_t *end;
377 |
378 | /* Validate the offset. */
379 | if (unlikely(offset > out_next - out_begin))
380 | return -1;
381 |
382 | /*
383 | * Fast path: copy a match which is no longer than a few words, is not
384 | * overlapped such that copying a word at a time would produce incorrect
385 | * results, and is not too close to the end of the buffer. Note that
386 | * this might copy more than the length of the match, but that's okay in
387 | * this scenario.
388 | */
389 | src = out_next - offset;
390 | if (UNALIGNED_ACCESS_IS_FAST && length <= 3 * WORDBYTES &&
391 | offset >= WORDBYTES && out_end - out_next >= 3 * WORDBYTES)
392 | {
393 | copy_word_unaligned(src + WORDBYTES*0, out_next + WORDBYTES*0);
394 | copy_word_unaligned(src + WORDBYTES*1, out_next + WORDBYTES*1);
395 | copy_word_unaligned(src + WORDBYTES*2, out_next + WORDBYTES*2);
396 | return 0;
397 | }
398 |
399 | /* Validate the length. This isn't needed in the fast path above, due
400 | * to the additional conditions tested, but we do need it here. */
401 | if (unlikely(length > out_end - out_next))
402 | return -1;
403 | end = out_next + length;
404 |
405 | /*
406 | * Try to copy one word at a time. On i386 and x86_64 this is faster
407 | * than copying one byte at a time, unless the data is near-random and
408 | * all the matches have very short lengths. Note that since this
409 | * requires unaligned memory accesses, it won't necessarily be faster on
410 | * every architecture.
411 | *
412 | * Also note that we might copy more than the length of the match. For
413 | * example, if a word is 8 bytes and the match is of length 5, then
414 | * we'll simply copy 8 bytes. This is okay as long as we don't write
415 | * beyond the end of the output buffer, hence the check for (out_end -
416 | * end >= WORDBYTES - 1).
417 | */
418 | if (UNALIGNED_ACCESS_IS_FAST && likely(out_end - end >= WORDBYTES - 1))
419 | {
420 | if (offset >= WORDBYTES) {
421 | /* The source and destination words don't overlap. */
422 | do {
423 | copy_word_unaligned(src, out_next);
424 | src += WORDBYTES;
425 | out_next += WORDBYTES;
426 | } while (out_next < end);
427 | return 0;
428 | } else if (offset == 1) {
429 | /* Offset 1 matches are equivalent to run-length
430 | * encoding of the previous byte. This case is common
431 | * if the data contains many repeated bytes. */
432 | machine_word_t v = repeat_byte(*(out_next - 1));
433 | do {
434 | *(machine_word_t*)out_next = v;
435 | src += WORDBYTES;
436 | out_next += WORDBYTES;
437 | } while (out_next < end);
438 | return 0;
439 | }
440 | /*
441 | * We don't bother with special cases for other 'offset <
442 | * WORDBYTES', which are usually rarer than 'offset == 1'.
443 | * Extra checks will just slow things down. Actually, it's
444 | * possible to handle all the 'offset < WORDBYTES' cases using
445 | * the same code, but it still becomes more complicated doesn't
446 | * seem any faster overall; it definitely slows down the more
447 | * common 'offset == 1' case.
448 | */
449 | }
450 |
451 | /* Fall back to a bytewise copy. */
452 | if (min_length >= 2)
453 | *out_next++ = *src++;
454 | if (min_length >= 3)
455 | *out_next++ = *src++;
456 | if (min_length >= 4)
457 | *out_next++ = *src++;
458 | do {
459 | *out_next++ = *src++;
460 | } while (out_next != end);
461 | return 0;
462 | }
463 |
464 | #endif /* _DECOMPRESS_COMMON_H */
465 |
--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 |
294 | Copyright (C)
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | , 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
--------------------------------------------------------------------------------
/src/ntfs.cpp:
--------------------------------------------------------------------------------
1 | /* Copyright (c) Mark Harmstone 2023
2 | *
3 | * This file is part of ntfs-efi.
4 | *
5 | * ntfs-efi is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU General Public Licence as published by
7 | * the Free Software Foundation, either version 2 of the Licence, or
8 | * (at your option) any later version.
9 | *
10 | * ntfs-efi is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU General Public Licence for more details.
14 | *
15 | * You should have received a copy of the GNU General Public Licence
16 | * along with ntfs-efi. If not, see . */
17 |
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include "ntfs.h"
26 | #include "misc.h"
27 | #include "quibbleproto.h"
28 | #include "ebiggers/system_compression.h"
29 |
30 | #define UNUSED(x) (void)(x)
31 | #define sector_align(n, a) ((n)&((a)-1)?(((n)+(a))&~((a)-1)):(n))
32 |
33 | using namespace std;
34 |
35 | struct mapping {
36 | LIST_ENTRY list_entry;
37 | uint64_t lcn;
38 | uint64_t vcn;
39 | uint64_t length;
40 | };
41 |
42 | struct volume {
43 | ~volume();
44 |
45 | EFI_SIMPLE_FILE_SYSTEM_PROTOCOL proto;
46 | EFI_QUIBBLE_PROTOCOL quibble_proto;
47 | NTFS_BOOT_SECTOR* boot_sector;
48 | EFI_HANDLE controller;
49 | EFI_BLOCK_IO_PROTOCOL* block;
50 | EFI_DISK_IO_PROTOCOL* disk_io;
51 | uint64_t file_record_size;
52 | LIST_ENTRY mft_mappings;
53 | char16_t upcase[0x10000];
54 | };
55 |
56 | struct inode {
57 | inode(volume& vol) : vol(vol) { }
58 | ~inode();
59 |
60 | EFI_FILE_PROTOCOL proto;
61 | uint64_t ino;
62 | volume& vol;
63 | bool inode_loaded;
64 | STANDARD_INFORMATION standard_info;
65 | uint64_t size;
66 | uint64_t phys_size;
67 | uint64_t vdl;
68 | uint64_t position;
69 | LIST_ENTRY index_mappings;
70 | index_root* index_root;
71 | LIST_ENTRY levels;
72 | bool is_dir;
73 | size_t name_len;
74 | char16_t* name;
75 | bool data_loaded;
76 | LIST_ENTRY data_mappings;
77 | uint8_t* data;
78 | };
79 |
80 | struct btree_level {
81 | LIST_ENTRY list_entry;
82 | const index_entry* ent;
83 | uint8_t data[];
84 | };
85 |
86 | static EFI_SYSTEM_TABLE* systable;
87 | static EFI_BOOT_SERVICES* bs;
88 | static EFI_DRIVER_BINDING_PROTOCOL drvbind;
89 | static EFI_QUIBBLE_INFO_PROTOCOL* info_proto = nullptr;
90 |
91 | static void populate_file_handle(EFI_FILE_PROTOCOL* h);
92 | static EFI_STATUS load_inode(inode& ino);
93 | static EFI_STATUS read_from_mappings(const volume& vol, const LIST_ENTRY* mappings, uint64_t offset,
94 | uint8_t* buf, uint64_t size);
95 | static EFI_STATUS process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length,
96 | unsigned int sector_size);
97 | static EFI_STATUS read_mappings(const volume& vol, const ATTRIBUTE_RECORD_HEADER& att,
98 | LIST_ENTRY* mappings);
99 | static EFI_STATUS loop_through_atts(const volume& vol, uint64_t inode, const FILE_RECORD_SEGMENT_HEADER* file_record,
100 | invocable auto func);
101 |
102 | void do_print(const char* s) {
103 | if (info_proto)
104 | info_proto->Print(s);
105 | }
106 |
107 | void do_print_error(const char* func, EFI_STATUS Status) {
108 | char s[255], *p;
109 |
110 | p = stpcpy(s, func);
111 | p = stpcpy(p, " returned ");
112 | p = stpcpy(p, error_string(Status));
113 | p = stpcpy(p, "\n");
114 |
115 | do_print(s);
116 | }
117 |
118 | static EFI_STATUS drv_supported(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle,
119 | EFI_DEVICE_PATH_PROTOCOL* RemainingDevicePath) {
120 | EFI_STATUS Status;
121 | EFI_DISK_IO_PROTOCOL* disk_io;
122 | EFI_GUID guid_disk = EFI_DISK_IO_PROTOCOL_GUID;
123 | EFI_GUID guid_block = EFI_BLOCK_IO_PROTOCOL_GUID;
124 |
125 | UNUSED(RemainingDevicePath);
126 |
127 | Status = bs->OpenProtocol(ControllerHandle, &guid_disk, (void**)&disk_io, This->DriverBindingHandle,
128 | ControllerHandle, EFI_OPEN_PROTOCOL_BY_DRIVER);
129 |
130 | if (EFI_ERROR(Status))
131 | return Status;
132 |
133 | bs->CloseProtocol(ControllerHandle, &guid_disk, This->DriverBindingHandle, ControllerHandle);
134 |
135 | return bs->OpenProtocol(ControllerHandle, &guid_block, NULL, This->DriverBindingHandle,
136 | ControllerHandle, EFI_OPEN_PROTOCOL_TEST_PROTOCOL);
137 | }
138 |
139 | static int cmp_filenames(const char16_t* upcase, u16string_view fn1, u16string_view fn2) {
140 | // FIXME - what about directories with case-sensitivity flag set?
141 |
142 | while (!fn1.empty() || !fn2.empty()) {
143 | if (fn1.empty())
144 | return -1;
145 |
146 | if (fn2.empty())
147 | return 1;
148 |
149 | char16_t c1 = upcase[fn1[0]];
150 | char16_t c2 = upcase[fn2[0]];
151 |
152 | if (c1 < c2)
153 | return -1;
154 | else if (c1 > c2)
155 | return 1;
156 |
157 | fn1 = u16string_view(fn1.data() + 1, fn1.size() - 1);
158 | fn2 = u16string_view(fn2.data() + 1, fn2.size() - 1);
159 | }
160 |
161 | return 0;
162 | }
163 |
164 | static EFI_STATUS find_file_in_dir(const volume& vol, uint64_t dir, u16string_view name, uint64_t* inode) {
165 | EFI_STATUS Status, Status2;
166 | FILE_RECORD_SEGMENT_HEADER* file;
167 | index_root* ir = nullptr;
168 | LIST_ENTRY index_mappings;
169 | const index_entry* ent;
170 | uint8_t* scratch = nullptr;
171 |
172 | InitializeListHead(&index_mappings);
173 |
174 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file);
175 | if (EFI_ERROR(Status)) {
176 | do_print_error("AllocatePool", Status);
177 | return Status;
178 | }
179 |
180 | Status = read_from_mappings(vol, &vol.mft_mappings, dir * vol.file_record_size,
181 | (uint8_t*)file, vol.file_record_size);
182 | if (EFI_ERROR(Status)) {
183 | bs->FreePool(file);
184 | do_print_error("read_from_mappings", Status);
185 | return Status;
186 | }
187 |
188 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
189 | do_print("Signature was not FILE\n");
190 | bs->FreePool(file);
191 | return EFI_INVALID_PARAMETER;
192 | }
193 |
194 | Status = process_fixups(&file->MultiSectorHeader, vol.file_record_size,
195 | vol.boot_sector->BytesPerSector);
196 |
197 | if (EFI_ERROR(Status)) {
198 | do_print_error("process_fixups", Status);
199 | bs->FreePool(file);
200 | return Status;
201 | }
202 |
203 | Status2 = loop_through_atts(vol, dir, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool {
204 | switch (att.TypeCode) {
205 | case ntfs_attribute::INDEX_ALLOCATION:
206 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
207 | Status = read_mappings(vol, att, &index_mappings);
208 | if (EFI_ERROR(Status)) {
209 | do_print_error("read_mappings", Status);
210 | return false;
211 | }
212 | }
213 | break;
214 |
215 | case ntfs_attribute::INDEX_ROOT:
216 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !res_data.empty() && !ir) {
217 | Status = bs->AllocatePool(EfiBootServicesData, res_data.size(), (void**)&ir);
218 | if (EFI_ERROR(Status)) {
219 | do_print_error("AllocatePool", Status);
220 | return false;
221 | }
222 |
223 | memcpy(ir, res_data.data(), res_data.size());
224 | }
225 | break;
226 |
227 | default:
228 | break;
229 | }
230 |
231 | return true;
232 | });
233 |
234 | if (EFI_ERROR(Status2)) {
235 | do_print_error("loop_through_atts", Status2);
236 | Status = Status2;
237 | }
238 |
239 | if (EFI_ERROR(Status))
240 | goto end;
241 |
242 | if (!ir) {
243 | Status = EFI_NOT_FOUND;
244 | goto end;
245 | }
246 |
247 | ent = reinterpret_cast((uint8_t*)&ir->node_header + ir->node_header.first_entry);
248 |
249 | while (true) {
250 | string_view data((const char*)ent + sizeof(index_entry), ent->stream_length);
251 |
252 | if (data.size() >= offsetof(FILE_NAME, FileName)) {
253 | const auto& fn = *(FILE_NAME*)data.data();
254 | u16string_view ent_name(fn.FileName, fn.FileNameLength);
255 |
256 | auto cmp = cmp_filenames(vol.upcase, name, ent_name);
257 |
258 | if (cmp == 0) { // found
259 | *inode = ent->file_reference.SegmentNumber;
260 | Status = EFI_SUCCESS;
261 | goto end;
262 | } else if (cmp == 1) { // skip to next
263 | ent = reinterpret_cast((uint8_t*)ent + ent->entry_length);
264 | continue;
265 | }
266 |
267 | if (cmp == -1 && !(ent->flags & INDEX_ENTRY_SUBNODE)) {
268 | Status = EFI_NOT_FOUND;
269 | goto end;
270 | }
271 | }
272 |
273 | if (ent->flags & INDEX_ENTRY_SUBNODE) { // if subnode, descend
274 | uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)ent + ent->entry_length - sizeof(uint64_t)))->SegmentNumber;
275 |
276 | if (ir->bytes_per_index_record < vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster)
277 | vcn *= vol.boot_sector->BytesPerSector;
278 | else
279 | vcn *= (uint64_t)vol.boot_sector->BytesPerSector * (uint64_t)vol.boot_sector->SectorsPerCluster;
280 |
281 | if (!scratch) {
282 | Status = bs->AllocatePool(EfiBootServicesData, ir->bytes_per_index_record,
283 | (void**)&scratch);
284 | if (EFI_ERROR(Status)) {
285 | do_print_error("AllocatePool", Status);
286 | goto end;
287 | }
288 | }
289 |
290 | Status = read_from_mappings(vol, &index_mappings, vcn, scratch, ir->bytes_per_index_record);
291 | if (EFI_ERROR(Status)) {
292 | do_print_error("read_from_mappings", Status);
293 | goto end;
294 | }
295 |
296 | auto rec = reinterpret_cast(scratch);
297 |
298 | if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) {
299 | do_print("Signature was not INDX\n");
300 | Status = EFI_INVALID_PARAMETER;
301 | goto end;
302 | }
303 |
304 | Status = process_fixups(&rec->MultiSectorHeader, ir->bytes_per_index_record,
305 | vol.boot_sector->BytesPerSector);
306 | if (EFI_ERROR(Status)) {
307 | do_print_error("process_fixups", Status);
308 | goto end;
309 | }
310 |
311 | ent = reinterpret_cast((uint8_t*)&rec->header + rec->header.first_entry);
312 |
313 | continue;
314 | }
315 |
316 | if (ent->flags & INDEX_ENTRY_LAST) {
317 | Status = EFI_NOT_FOUND;
318 | goto end;
319 | }
320 |
321 | ent = reinterpret_cast((uint8_t*)ent + ent->entry_length);
322 | }
323 |
324 | end:
325 | if (ir)
326 | bs->FreePool(ir);
327 |
328 | if (scratch)
329 | bs->FreePool(scratch);
330 |
331 | bs->FreePool(file);
332 |
333 | return Status;
334 | }
335 |
336 | static size_t count_path_parts(u16string_view v) {
337 | size_t num_parts = 0;
338 |
339 | while (!v.empty()) {
340 | num_parts++;
341 |
342 | if (auto bs = v.find(u'\\'); bs != u16string_view::npos)
343 | v = u16string_view(v.data() + bs + 1, v.size() - bs - 1);
344 | else
345 | break;
346 | }
347 |
348 | return num_parts;
349 | }
350 |
351 | static void extract_parts(u16string_view v, u16string_view*& p) {
352 | while (!v.empty()) {
353 | if (auto bs = v.find(u'\\'); bs != u16string_view::npos) {
354 | *p = u16string_view(v.data(), bs);
355 | p++;
356 | v = u16string_view(v.data() + bs + 1, v.size() - bs - 1);
357 | } else {
358 | *p = v;
359 | p++;
360 | break;
361 | }
362 | }
363 | }
364 |
365 | static EFI_STATUS normalize_path(u16string_view fn, u16string_view parent, char16_t*& name, size_t& name_len) {
366 | EFI_STATUS Status;
367 | bool from_root = false;
368 | size_t num_parts = 0;
369 | u16string_view* parts;
370 | bool first;
371 |
372 | if (fn.front() == '\\') {
373 | from_root = true;
374 | fn = u16string_view(fn.data() + 1, fn.size() - 1);
375 | }
376 |
377 | if (parent.empty())
378 | from_root = true;
379 |
380 | if (!from_root)
381 | num_parts = count_path_parts(parent);
382 |
383 | num_parts += count_path_parts(fn);
384 |
385 | if (num_parts == 0) {
386 | name = nullptr;
387 | name_len = 0;
388 | return EFI_SUCCESS;
389 | }
390 |
391 | Status = bs->AllocatePool(EfiBootServicesData, num_parts * sizeof(u16string_view), (void**)&parts);
392 | if (EFI_ERROR(Status)) {
393 | do_print_error("AllocatePool", Status);
394 | return Status;
395 | }
396 |
397 | {
398 | u16string_view* p = parts;
399 |
400 | if (!from_root)
401 | extract_parts(parent, p);
402 |
403 | extract_parts(fn, p);
404 | }
405 |
406 | for (size_t i = 0; i < num_parts; i++) {
407 | if (parts[i] == u".")
408 | parts[i] = u"";
409 | else if (parts[i] == u"..") {
410 | parts[i] = u"";
411 |
412 | if (i == 0) {
413 | bs->FreePool(parts);
414 | return EFI_INVALID_PARAMETER;
415 | }
416 |
417 | auto j = i - 1;
418 | while (true) {
419 | if (!parts[j].empty()) {
420 | parts[j] = u"";
421 | break;
422 | }
423 |
424 | if (j == 0) {
425 | bs->FreePool(parts);
426 | return EFI_INVALID_PARAMETER;
427 | }
428 |
429 | j--;
430 | }
431 | }
432 | }
433 |
434 | name_len = 0;
435 | first = true;
436 | for (size_t i = 0; i < num_parts; i++) {
437 | if (parts[i].empty())
438 | continue;
439 |
440 | if (!first)
441 | name_len++;
442 |
443 | name_len += parts[i].size();
444 | first = false;
445 | }
446 |
447 | if (name_len == 0) {
448 | bs->FreePool(parts);
449 | name = nullptr;
450 | return EFI_SUCCESS;
451 | }
452 |
453 | Status = bs->AllocatePool(EfiBootServicesData, name_len * sizeof(char16_t), (void**)&name);
454 | if (EFI_ERROR(Status)) {
455 | do_print_error("AllocatePool", Status);
456 | bs->FreePool(parts);
457 | return Status;
458 | }
459 |
460 | {
461 | char16_t* n = name;
462 |
463 | first = true;
464 | for (size_t i = 0; i < num_parts; i++) {
465 | if (parts[i].empty())
466 | continue;
467 |
468 | if (!first) {
469 | *n = u'\\';
470 | n++;
471 | }
472 |
473 | memcpy(n, parts[i].data(), parts[i].size() * sizeof(char16_t));
474 | n += parts[i].size();
475 | first = false;
476 | }
477 | }
478 |
479 | bs->FreePool(parts);
480 |
481 | return EFI_SUCCESS;
482 | }
483 |
484 | static EFI_STATUS EFIAPI file_open(struct _EFI_FILE_HANDLE* File, struct _EFI_FILE_HANDLE** NewHandle, CHAR16* FileName,
485 | UINT64 OpenMode, UINT64 Attributes) {
486 | EFI_STATUS Status;
487 | inode* file = _CR(File, inode, proto);
488 | uint64_t inode_num;
489 | inode* ino;
490 | char16_t* name;
491 | size_t name_len;
492 |
493 | UNUSED(Attributes);
494 |
495 | if (OpenMode & EFI_FILE_MODE_CREATE)
496 | return EFI_UNSUPPORTED;
497 |
498 | if (FileName[0] == L'\\' && FileName[1] == 0) {
499 | inode_num = NTFS_ROOT_DIR_INODE;
500 | name = nullptr;
501 | name_len = 0;
502 | } else if (FileName[0] == L'.' && FileName[1] == 0) {
503 | inode_num = file->ino;
504 |
505 | if (file->name) {
506 | Status = bs->AllocatePool(EfiBootServicesData, file->name_len * sizeof(char16_t), (void**)&name);
507 | if (EFI_ERROR(Status)) {
508 | do_print_error("AllocatePool", Status);
509 | return Status;
510 | }
511 |
512 | memcpy(name, file->name, file->name_len * sizeof(char16_t));
513 | name_len = file->name_len;
514 | } else {
515 | name = nullptr;
516 | name_len = 0;
517 | }
518 | } else {
519 | u16string_view fn((char16_t*)FileName);
520 |
521 | if (fn.empty())
522 | return EFI_NOT_FOUND;
523 |
524 | if (file->ino == NTFS_ROOT_DIR_INODE && fn == u"..")
525 | return EFI_INVALID_PARAMETER;
526 |
527 | Status = normalize_path(fn, u16string_view(file->name, file->name_len), name, name_len);
528 | if (EFI_ERROR(Status)) {
529 | do_print_error("normalize_path", Status);
530 | return Status;
531 | }
532 |
533 | fn = u16string_view(name, name_len);
534 | inode_num = NTFS_ROOT_DIR_INODE;
535 |
536 | if (!fn.empty()) {
537 | while (true) {
538 | u16string_view part;
539 |
540 | auto backslash = fn.find(u'\\');
541 |
542 | if (backslash != u16string_view::npos)
543 | part = u16string_view(fn.data(), backslash);
544 | else
545 | part = fn;
546 |
547 | Status = find_file_in_dir(file->vol, inode_num, part, &inode_num);
548 |
549 | if (Status == EFI_NOT_FOUND) {
550 | if (name)
551 | bs->FreePool(name);
552 |
553 | return Status;
554 | }
555 |
556 | if (EFI_ERROR(Status)) {
557 | if (name)
558 | bs->FreePool(name);
559 |
560 | do_print_error("find_file_in_dir", Status);
561 | return Status;
562 | }
563 |
564 | if (backslash == u16string_view::npos)
565 | break;
566 |
567 | fn = u16string_view(fn.data() + backslash + 1, fn.size() - backslash - 1);
568 | }
569 | }
570 | }
571 |
572 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(inode), (void**)&ino);
573 | if (EFI_ERROR(Status)) {
574 | if (name)
575 | bs->FreePool(name);
576 |
577 | do_print_error("AllocatePool", Status);
578 | return Status;
579 | }
580 |
581 | memset(ino, 0, sizeof(inode));
582 |
583 | new (ino) inode(file->vol);
584 |
585 | populate_file_handle(&ino->proto);
586 |
587 | ino->ino = inode_num;
588 | ino->name = name;
589 | ino->name_len = name_len;
590 |
591 | *NewHandle = &ino->proto;
592 |
593 | return EFI_SUCCESS;
594 | }
595 |
596 | inode::~inode() {
597 | if (name)
598 | bs->FreePool(name);
599 |
600 | if (data)
601 | bs->FreePool(data);
602 |
603 | if (!inode_loaded)
604 | return;
605 |
606 | if (index_root)
607 | bs->FreePool(index_root);
608 |
609 | while (!IsListEmpty(&index_mappings)) {
610 | mapping* m = _CR(index_mappings.Flink, mapping, list_entry);
611 | RemoveEntryList(&m->list_entry);
612 | bs->FreePool(m);
613 | }
614 |
615 | while (!IsListEmpty(&levels)) {
616 | auto l = _CR(levels.Flink, btree_level, list_entry);
617 | RemoveEntryList(&l->list_entry);
618 | bs->FreePool(l);
619 | }
620 |
621 | while (!IsListEmpty(&data_mappings)) {
622 | mapping* m = _CR(data_mappings.Flink, mapping, list_entry);
623 | RemoveEntryList(&m->list_entry);
624 | bs->FreePool(m);
625 | }
626 | }
627 |
628 | static EFI_STATUS EFIAPI file_close(struct _EFI_FILE_HANDLE* File) {
629 | inode* ino = _CR(File, inode, proto);
630 |
631 | ino->inode::~inode();
632 | bs->FreePool(ino);
633 |
634 | return EFI_SUCCESS;
635 | }
636 |
637 | static EFI_STATUS EFIAPI file_delete(struct _EFI_FILE_HANDLE* File) {
638 | UNUSED(File);
639 |
640 | return EFI_UNSUPPORTED;
641 | }
642 |
643 | static EFI_STATUS read_from_mappings(const volume& vol, const LIST_ENTRY* mappings, uint64_t offset, uint8_t* buf,
644 | uint64_t size) {
645 | EFI_STATUS Status;
646 | uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster;
647 | uint64_t vcn = offset / cluster_size;
648 | uint64_t last_vcn = sector_align(offset + size, cluster_size) / cluster_size;
649 | LIST_ENTRY* le;
650 |
651 | le = mappings->Flink;
652 | while (le != mappings) {
653 | mapping* m = _CR(le, mapping, list_entry);
654 |
655 | if (m->vcn < last_vcn && m->vcn + m->length > vcn) {
656 | uint64_t to_read, mapping_offset;
657 |
658 | mapping_offset = offset - (m->vcn * cluster_size);
659 | to_read = ((m->vcn + m->length) * cluster_size) - offset;
660 |
661 | if (to_read > size)
662 | to_read = size;
663 |
664 | if (m->lcn == 0) // sparse
665 | memset(buf, 0, to_read);
666 | else {
667 | Status = vol.block->ReadBlocks(vol.block, vol.block->Media->MediaId,
668 | ((m->lcn * cluster_size) + mapping_offset) / vol.block->Media->BlockSize,
669 | to_read, buf);
670 | if (EFI_ERROR(Status)) {
671 | do_print_error("ReadBlocks", Status);
672 | return Status;
673 | }
674 | }
675 |
676 | if (to_read == size)
677 | break;
678 |
679 | offset += to_read;
680 | buf += to_read;
681 | size -= to_read;
682 | vcn = offset / cluster_size;
683 | }
684 |
685 | le = le->Flink;
686 | }
687 |
688 | return EFI_SUCCESS;
689 | }
690 |
691 | static EFI_STATUS process_fixups(MULTI_SECTOR_HEADER* header, uint64_t length, unsigned int sector_size) {
692 | uint64_t sectors;
693 | uint16_t* seq;
694 | uint8_t* ptr;
695 |
696 | sectors = length / sector_size;
697 |
698 | if (header->UpdateSequenceArraySize < sectors + 1)
699 | return EFI_INVALID_PARAMETER;
700 |
701 | seq = (uint16_t*)((uint8_t*)header + header->UpdateSequenceArrayOffset);
702 |
703 | ptr = (uint8_t*)header + sector_size - sizeof(uint16_t);
704 |
705 | for (unsigned int i = 0; i < sectors; i++) {
706 | if (*(uint16_t*)ptr != seq[0])
707 | return EFI_INVALID_PARAMETER;
708 |
709 | *(uint16_t*)ptr = seq[i + 1];
710 |
711 | ptr += sector_size;
712 | }
713 |
714 | return EFI_SUCCESS;
715 | }
716 |
717 | static EFI_STATUS next_index_item(inode& ino, const invocable auto& func) {
718 | EFI_STATUS Status;
719 | const index_root& ir = *ino.index_root;
720 |
721 | if (IsListEmpty(&ino.levels))
722 | return EFI_NOT_FOUND;
723 |
724 | auto l = _CR(ino.levels.Blink, btree_level, list_entry);
725 |
726 | do {
727 | if (l->ent->flags & INDEX_ENTRY_SUBNODE) {
728 | btree_level* l2;
729 | uint64_t vcn = ((MFT_SEGMENT_REFERENCE*)((uint8_t*)l->ent + l->ent->entry_length - sizeof(uint64_t)))->SegmentNumber;
730 |
731 | if (ir.bytes_per_index_record < ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster)
732 | vcn *= ino.vol.boot_sector->BytesPerSector;
733 | else
734 | vcn *= (uint64_t)ino.vol.boot_sector->BytesPerSector * (uint64_t)ino.vol.boot_sector->SectorsPerCluster;
735 |
736 | Status = bs->AllocatePool(EfiBootServicesData, offsetof(btree_level, data) + ir.bytes_per_index_record,
737 | (void**)&l2);
738 | if (EFI_ERROR(Status)) {
739 | do_print_error("AllocatePool", Status);
740 | return Status;
741 | }
742 |
743 | Status = read_from_mappings(ino.vol, &ino.index_mappings, vcn, l2->data, ir.bytes_per_index_record);
744 | if (EFI_ERROR(Status)) {
745 | bs->FreePool(l2);
746 | do_print_error("read_from_mappings", Status);
747 | return Status;
748 | }
749 |
750 | auto rec = reinterpret_cast(l2->data);
751 |
752 | if (rec->MultiSectorHeader.Signature != INDEX_RECORD_MAGIC) {
753 | do_print("Signature was not INDX\n");
754 | bs->FreePool(l2);
755 | return EFI_INVALID_PARAMETER;
756 | }
757 |
758 | Status = process_fixups(&rec->MultiSectorHeader, ir.bytes_per_index_record,
759 | ino.vol.boot_sector->BytesPerSector);
760 | if (EFI_ERROR(Status)) {
761 | bs->FreePool(l2);
762 | do_print_error("process_fixups", Status);
763 | return EFI_INVALID_PARAMETER;
764 | }
765 |
766 | InsertTailList(&ino.levels, &l2->list_entry);
767 | l = l2;
768 | l->ent = reinterpret_cast((uint8_t*)&rec->header + rec->header.first_entry);
769 |
770 | continue;
771 | }
772 |
773 | while (l->ent->flags & INDEX_ENTRY_LAST) {
774 | RemoveEntryList(&l->list_entry);
775 | bs->FreePool(l);
776 |
777 | if (IsListEmpty(&ino.levels))
778 | break;
779 |
780 | l = _CR(ino.levels.Blink, btree_level, list_entry);
781 | }
782 |
783 | if (IsListEmpty(&ino.levels))
784 | break;
785 |
786 | if (!(l->ent->flags & INDEX_ENTRY_LAST)) {
787 | if (func(string_view((const char*)l->ent + sizeof(index_entry), l->ent->stream_length)))
788 | l->ent = reinterpret_cast((uint8_t*)l->ent + l->ent->entry_length);
789 |
790 | return EFI_SUCCESS;
791 | }
792 | } while (!IsListEmpty(&ino.levels));
793 |
794 | return EFI_SUCCESS;
795 | }
796 |
797 | static void win_time_to_efi(int64_t win, EFI_TIME* efi) {
798 | int64_t secs, time, days;
799 |
800 | secs = win / 10000000;
801 | time = secs % 86400;
802 | days = secs / 86400;
803 |
804 | unsigned int jd = 2305814 + days; // Julian date
805 |
806 | unsigned int f = jd + 1401 + (((((4 * jd) + 274277) / 146097) * 3) / 4) - 38;
807 | unsigned int e = (4 * f) + 3;
808 | unsigned int g = (e % 1461) / 4;
809 | unsigned int h = (5 * g) + 2;
810 |
811 | efi->Month = (((h / 153) + 2) % 12) + 1;
812 | efi->Year = (e / 1461) - 4716 + ((14 - efi->Month) / 12);
813 | efi->Day = ((h % 153) / 5) + 1;
814 | efi->Hour = time / 3600;
815 | efi->Minute = (time % 3600) / 60;
816 | efi->Second = time % 60;
817 | efi->Pad1 = 0;
818 | efi->Nanosecond = (win % 10000000) * 100;
819 | efi->TimeZone = 0;
820 | efi->Daylight = 0;
821 | efi->Pad2 = 0;
822 | }
823 |
824 | static uint64_t win_attributes_to_efi(uint32_t attr, bool is_dir) {
825 | uint64_t ret = 0;
826 |
827 | if (is_dir)
828 | ret |= EFI_FILE_DIRECTORY;
829 |
830 | if (attr & FILE_ATTRIBUTE_READONLY)
831 | ret |= EFI_FILE_READ_ONLY;
832 |
833 | if (attr & FILE_ATTRIBUTE_HIDDEN)
834 | ret |= EFI_FILE_HIDDEN;
835 |
836 | if (attr & FILE_ATTRIBUTE_SYSTEM)
837 | ret |= EFI_FILE_SYSTEM;
838 |
839 | if (attr & EFI_FILE_ARCHIVE)
840 | ret |= EFI_FILE_ARCHIVE;
841 |
842 | return ret;
843 | }
844 |
845 | static EFI_STATUS read_dir(inode& ino, UINTN* BufferSize, VOID* Buffer) {
846 | EFI_STATUS Status;
847 | bool overflow = false, again;
848 |
849 | if (!ino.inode_loaded) {
850 | Status = load_inode(ino);
851 | if (EFI_ERROR(Status)) {
852 | do_print_error("load_inode", Status);
853 | return Status;
854 | }
855 | }
856 |
857 | if (ino.position == 0 && IsListEmpty(&ino.levels)) {
858 | btree_level* l;
859 |
860 | Status = bs->AllocatePool(EfiBootServicesData, offsetof(btree_level, data), (void**)&l);
861 | if (EFI_ERROR(Status)) {
862 | do_print_error("AllocatePool", Status);
863 | return Status;
864 | }
865 |
866 | l->ent = reinterpret_cast((uint8_t*)&ino.index_root->node_header + ino.index_root->node_header.first_entry);
867 | InsertTailList(&ino.levels, &l->list_entry);
868 | }
869 |
870 | // FIXME - ignore special files in root
871 |
872 | do {
873 | again = false;
874 |
875 | Status = next_index_item(ino, [&](string_view data) -> bool {
876 | size_t size;
877 |
878 | const auto& fn = *reinterpret_cast(data.data());
879 |
880 | if (fn.Namespace == file_name_type::DOS) { // ignore DOS filenames
881 | again = true;
882 | return true;
883 | }
884 |
885 | size = offsetof(EFI_FILE_INFO, FileName[0]) + ((fn.FileNameLength + 1) * sizeof(char16_t));
886 |
887 | if (*BufferSize < size) {
888 | *BufferSize = size;
889 | overflow = true;
890 | return false;
891 | }
892 |
893 | auto& info = *(EFI_FILE_INFO*)Buffer;
894 |
895 | info.Size = size;
896 | info.FileSize = fn.EndOfFile;
897 | info.PhysicalSize = fn.AllocationSize;
898 | win_time_to_efi(fn.CreationTime, &info.CreateTime);
899 | win_time_to_efi(fn.LastAccessTime, &info.LastAccessTime);
900 | win_time_to_efi(fn.LastWriteTime, &info.ModificationTime);
901 | info.Attribute = win_attributes_to_efi(fn.FileAttributes, fn.FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT);
902 |
903 | memcpy(info.FileName, fn.FileName, fn.FileNameLength * sizeof(char16_t));
904 | info.FileName[fn.FileNameLength] = 0;
905 |
906 | *BufferSize = size;
907 |
908 | ino.position++;
909 |
910 | return true;
911 | });
912 | } while (again);
913 |
914 | if (overflow)
915 | return EFI_BUFFER_TOO_SMALL;
916 |
917 | if (Status == EFI_NOT_FOUND) { // last one
918 | *BufferSize = 0;
919 | return EFI_SUCCESS;
920 | }
921 |
922 | if (EFI_ERROR(Status)) {
923 | do_print_error("next_index_item", Status);
924 | return Status;
925 | }
926 |
927 | return EFI_SUCCESS;
928 | }
929 |
930 | static EFI_STATUS read_nonresident_attribute(volume& vol, const ATTRIBUTE_RECORD_HEADER& att, span data) {
931 | EFI_STATUS Status;
932 | LIST_ENTRY mappings;
933 |
934 | InitializeListHead(&mappings);
935 |
936 | Status = read_mappings(vol, att, &mappings);
937 | if (EFI_ERROR(Status)) {
938 | do_print_error("read_mappings", Status);
939 | return Status;
940 | }
941 |
942 | Status = read_from_mappings(vol, &mappings, 0, data.data(), data.size());
943 |
944 | while (!IsListEmpty(&mappings)) {
945 | mapping* m = _CR(mappings.Flink, mapping, list_entry);
946 | RemoveEntryList(&m->list_entry);
947 | bs->FreePool(m);
948 | }
949 |
950 | if (EFI_ERROR(Status))
951 | do_print_error("read_from_mappings", Status);
952 |
953 | return Status;
954 | }
955 |
956 | static EFI_STATUS do_xpress_decompress(inode& ino, span compdata, uint32_t chunk_size) {
957 | EFI_STATUS Status;
958 | xpress_decompressor ctx;
959 | uint64_t size = ino.size;
960 | uint64_t num_chunks = (size + chunk_size - 1) / chunk_size;
961 | auto offsets = (uint32_t*)compdata.data();
962 |
963 | if (ino.data) {
964 | bs->FreePool(ino.data);
965 | ino.data = nullptr;
966 | }
967 |
968 | Status = bs->AllocatePool(EfiBootServicesData, ino.size, (void**)&ino.data);
969 | if (EFI_ERROR(Status)) {
970 | do_print_error("AllocatePool", Status);
971 | return Status;
972 | }
973 |
974 | auto ret = span(ino.data, ino.size);
975 |
976 | auto data = span(compdata.data() + ((num_chunks - 1) * sizeof(uint32_t)),
977 | (uint32_t)(compdata.size() - ((num_chunks - 1) * sizeof(uint32_t))));
978 |
979 | for (uint64_t i = 0; i < num_chunks; i++) {
980 | uint64_t off = i == 0 ? 0 : offsets[i - 1];
981 | uint32_t complen;
982 |
983 | if (i == 0)
984 | complen = num_chunks > 1 ? offsets[0] : (uint32_t)data.size();
985 | else if (i == num_chunks - 1)
986 | complen = (uint32_t)data.size() - offsets[i - 1];
987 | else
988 | complen = offsets[i] - offsets[i - 1];
989 |
990 | if (complen == (i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size)) {
991 | // stored uncompressed
992 | memcpy(ret.data() + (i * chunk_size), data.data() + off, complen);
993 | } else {
994 | auto err = xpress_decompress(&ctx, data.data() + off, complen, ret.data() + (i * chunk_size),
995 | (size_t)(i == num_chunks - 1 ? (ret.size() - (i * chunk_size)) : chunk_size));
996 |
997 | if (err != 0) {
998 | do_print("xpress_decompress failed\n");
999 | bs->FreePool(ino.data);
1000 | ino.data = nullptr;
1001 | return EFI_INVALID_PARAMETER;
1002 | }
1003 | }
1004 | }
1005 |
1006 | return EFI_SUCCESS;
1007 | }
1008 |
1009 | static EFI_STATUS handle_wof(inode& ino, span rp, span wof) {
1010 | if (rp.size() < offsetof(reparse_point_header, DataBuffer)) {
1011 | do_print("truncated IO_REPARSE_TAG_WOF reparse point buffer\n");
1012 | return EFI_INVALID_PARAMETER;
1013 | }
1014 |
1015 | const auto& rph = *(reparse_point_header*)rp.data();
1016 |
1017 | if (rp.size() < offsetof(reparse_point_header, DataBuffer) + rph.ReparseDataLength) {
1018 | do_print("truncated IO_REPARSE_TAG_WOF reparse point buffer\n");
1019 | return EFI_INVALID_PARAMETER;
1020 | }
1021 |
1022 | if (rph.ReparseDataLength < sizeof(wof_external_info)) {
1023 | do_print("IO_REPARSE_TAG_WOF ReparseDataLength shorter than expected\n");
1024 | return EFI_INVALID_PARAMETER;
1025 | }
1026 |
1027 | const auto& wofei = *(wof_external_info*)rph.DataBuffer;
1028 |
1029 | if (wofei.Version != WOF_CURRENT_VERSION) {
1030 | do_print("Unsupported WOF version\n");
1031 | return EFI_INVALID_PARAMETER;
1032 | }
1033 |
1034 | if (wofei.Provider == WOF_PROVIDER_WIM) {
1035 | do_print("Unsupported WOF provider WOF_PROVIDER_WIM\n");
1036 | return EFI_INVALID_PARAMETER;
1037 | } else if (wofei.Provider != WOF_PROVIDER_FILE) {
1038 | do_print("Unsupported WOF provider\n");
1039 | return EFI_INVALID_PARAMETER;
1040 | }
1041 |
1042 | if (rph.ReparseDataLength < sizeof(wof_external_info) + sizeof(file_provider_external_info_v0)) {
1043 | do_print("IO_REPARSE_TAG_WOF ReparseDataLength shorter than expected\n");
1044 | return EFI_INVALID_PARAMETER;
1045 | }
1046 |
1047 | const auto& fpei = *(file_provider_external_info_v0*)((uint8_t*)&wofei + sizeof(wofei));
1048 |
1049 | if (fpei.Version != FILE_PROVIDER_CURRENT_VERSION) {
1050 | do_print("Unsupported FILE_PROVIDER_EXTERNAL_INFO version\n");
1051 | return EFI_INVALID_PARAMETER;
1052 | }
1053 |
1054 | switch (fpei.Algorithm) {
1055 | case FILE_PROVIDER_COMPRESSION_XPRESS4K:
1056 | return do_xpress_decompress(ino, wof, 4096);
1057 |
1058 | case FILE_PROVIDER_COMPRESSION_LZX:
1059 | do_print("FIXME - FILE_PROVIDER_COMPRESSION_LZX\n");
1060 | return EFI_INVALID_PARAMETER;
1061 |
1062 | case FILE_PROVIDER_COMPRESSION_XPRESS8K:
1063 | return do_xpress_decompress(ino, wof, 8192);
1064 |
1065 | case FILE_PROVIDER_COMPRESSION_XPRESS16K:
1066 | return do_xpress_decompress(ino, wof, 16384);
1067 |
1068 | default:
1069 | do_print("Unrecognized WIM compression algorithm\n");
1070 | return EFI_INVALID_PARAMETER;
1071 | }
1072 | }
1073 |
1074 | static EFI_STATUS read_file(inode& ino, UINTN* BufferSize, VOID* Buffer) {
1075 | EFI_STATUS Status, Status2;
1076 | uint64_t start, end;
1077 |
1078 | if (ino.position >= ino.size || *BufferSize == 0) {
1079 | *BufferSize = 0;
1080 | return EFI_SUCCESS;
1081 | }
1082 |
1083 | if (ino.position >= ino.vdl) {
1084 | UINTN to_read = *BufferSize;
1085 |
1086 | if (to_read > ino.size - ino.position)
1087 | to_read = ino.size - ino.position;
1088 |
1089 | memset(Buffer, 0, to_read);
1090 |
1091 | *BufferSize = to_read;
1092 |
1093 | return EFI_SUCCESS;
1094 | }
1095 |
1096 | if (!ino.data_loaded) {
1097 | FILE_RECORD_SEGMENT_HEADER* file;
1098 | uint8_t* wof_data = nullptr;
1099 | size_t wof_len = 0;
1100 | uint8_t* rp_data = nullptr;
1101 | size_t rp_len = 0;
1102 |
1103 | Status = bs->AllocatePool(EfiBootServicesData, ino.vol.file_record_size, (void**)&file);
1104 | if (EFI_ERROR(Status)) {
1105 | do_print_error("AllocatePool", Status);
1106 | return Status;
1107 | }
1108 |
1109 | Status = read_from_mappings(ino.vol, &ino.vol.mft_mappings, ino.ino * ino.vol.file_record_size,
1110 | (uint8_t*)file, ino.vol.file_record_size);
1111 | if (EFI_ERROR(Status)) {
1112 | do_print_error("read_from_mappings", Status);
1113 | bs->FreePool(file);
1114 | return Status;
1115 | }
1116 |
1117 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
1118 | do_print("Signature was not FILE\n");
1119 | bs->FreePool(file);
1120 | return EFI_INVALID_PARAMETER;
1121 | }
1122 |
1123 | Status = process_fixups(&file->MultiSectorHeader, ino.vol.file_record_size,
1124 | ino.vol.boot_sector->BytesPerSector);
1125 |
1126 | if (EFI_ERROR(Status)) {
1127 | do_print_error("process_fixups", Status);
1128 | bs->FreePool(file);
1129 | return Status;
1130 | }
1131 |
1132 | Status = EFI_SUCCESS;
1133 |
1134 | Status2 = loop_through_atts(ino.vol, ino.ino, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view data, u16string_view att_name) -> bool {
1135 | switch (att.TypeCode) {
1136 | case ntfs_attribute::DATA:
1137 | if (att_name.empty()) {
1138 | switch (att.FormCode) {
1139 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM:
1140 | Status = read_mappings(ino.vol, att, &ino.data_mappings);
1141 | if (EFI_ERROR(Status))
1142 | do_print_error("read_mappings", Status);
1143 | break;
1144 |
1145 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1146 | Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&ino.data);
1147 | if (EFI_ERROR(Status)) {
1148 | do_print_error("AllocatePool", Status);
1149 | break;
1150 | }
1151 |
1152 | memcpy(ino.data, data.data(), data.size());
1153 | break;
1154 | }
1155 | } else if (att_name == u"WofCompressedData") {
1156 | switch (att.FormCode) {
1157 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: {
1158 | uint32_t cluster_size = ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster;
1159 |
1160 | wof_len = att.Form.Nonresident.FileSize;
1161 |
1162 | if (wof_len == 0)
1163 | break;
1164 |
1165 | Status = bs->AllocatePool(EfiBootServicesData, sector_align(wof_len, cluster_size), (void**)&wof_data);
1166 | if (EFI_ERROR(Status)) {
1167 | do_print_error("AllocatePool", Status);
1168 | break;
1169 | }
1170 |
1171 | Status = read_nonresident_attribute(ino.vol, att, span(wof_data, sector_align(wof_len, cluster_size)));
1172 | if (EFI_ERROR(Status)) {
1173 | do_print_error("read_nonresident_attribute", Status);
1174 | bs->FreePool(wof_data);
1175 | wof_data = nullptr;
1176 | break;
1177 | }
1178 |
1179 | break;
1180 | }
1181 |
1182 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1183 | Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&wof_data);
1184 | if (EFI_ERROR(Status)) {
1185 | do_print_error("AllocatePool", Status);
1186 | break;
1187 | }
1188 |
1189 | memcpy(wof_data, data.data(), data.size());
1190 | wof_len = data.size();
1191 |
1192 | break;
1193 | }
1194 | }
1195 |
1196 | if (EFI_ERROR(Status))
1197 | return false;
1198 | break;
1199 |
1200 | case ntfs_attribute::REPARSE_POINT:
1201 | if (att_name.empty()) {
1202 | switch (att.FormCode) {
1203 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: {
1204 | uint32_t cluster_size = ino.vol.boot_sector->BytesPerSector * ino.vol.boot_sector->SectorsPerCluster;
1205 |
1206 | rp_len = att.Form.Nonresident.FileSize;
1207 |
1208 | if (rp_len == 0)
1209 | break;
1210 |
1211 | Status = bs->AllocatePool(EfiBootServicesData, sector_align(rp_len, cluster_size), (void**)&rp_data);
1212 | if (EFI_ERROR(Status)) {
1213 | do_print_error("AllocatePool", Status);
1214 | break;
1215 | }
1216 |
1217 | Status = read_nonresident_attribute(ino.vol, att, span(rp_data, sector_align(rp_len, cluster_size)));
1218 | if (EFI_ERROR(Status)) {
1219 | do_print_error("read_nonresident_attribute", Status);
1220 | bs->FreePool(rp_data);
1221 | rp_data = nullptr;
1222 | break;
1223 | }
1224 |
1225 | break;
1226 | }
1227 |
1228 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1229 | Status = bs->AllocatePool(EfiBootServicesData, data.size(), (void**)&rp_data);
1230 | if (EFI_ERROR(Status)) {
1231 | do_print_error("AllocatePool", Status);
1232 | break;
1233 | }
1234 |
1235 | memcpy(rp_data, data.data(), data.size());
1236 | rp_len = data.size();
1237 |
1238 | break;
1239 | }
1240 | }
1241 | break;
1242 |
1243 | default:
1244 | break;
1245 | }
1246 |
1247 | return true;
1248 | });
1249 |
1250 | bs->FreePool(file);
1251 |
1252 | if (rp_data) {
1253 | if (rp_len > sizeof(uint32_t) && *(uint32_t*)rp_data == IO_REPARSE_TAG_WOF) {
1254 | Status = handle_wof(ino, span(rp_data, rp_len), span(wof_data, wof_len));
1255 | if (EFI_ERROR(Status)) {
1256 | do_print_error("handle_wof", Status);
1257 | bs->FreePool(rp_data);
1258 |
1259 | if (wof_data)
1260 | bs->FreePool(wof_data);
1261 |
1262 | return Status;
1263 | }
1264 | }
1265 |
1266 | bs->FreePool(rp_data);
1267 | }
1268 |
1269 | if (wof_data)
1270 | bs->FreePool(wof_data);
1271 |
1272 | if (EFI_ERROR(Status2)) {
1273 | do_print_error("loop_through_atts", Status2);
1274 | return Status2;
1275 | }
1276 |
1277 | if (EFI_ERROR(Status))
1278 | return Status;
1279 |
1280 | ino.data_loaded = true;
1281 | }
1282 |
1283 | start = ino.position;
1284 | end = ino.position + *BufferSize;
1285 |
1286 | if (end > ino.size)
1287 | end = ino.size;
1288 |
1289 | if (ino.data)
1290 | memcpy(Buffer, ino.data + start, end - start);
1291 | else {
1292 | uint64_t start_aligned, valid_end, end_aligned;
1293 | uint8_t* tmp = nullptr;
1294 |
1295 | valid_end = end;
1296 |
1297 | if (valid_end > ino.vdl)
1298 | valid_end = ino.vdl;
1299 |
1300 | start_aligned = start & ~(ino.vol.boot_sector->BytesPerSector - 1);
1301 | end_aligned = sector_align(valid_end, ino.vol.boot_sector->BytesPerSector);
1302 |
1303 | if (start_aligned != start || end_aligned != valid_end) {
1304 | Status = bs->AllocatePool(EfiBootServicesData, end_aligned - start_aligned, (void**)&tmp);
1305 | if (EFI_ERROR(Status)) {
1306 | do_print_error("AllocatePool", Status);
1307 | return Status;
1308 | }
1309 | }
1310 |
1311 | // FIXME - LZNT1 compressed data
1312 |
1313 | Status = read_from_mappings(ino.vol, &ino.data_mappings, start_aligned,
1314 | tmp ? tmp : (uint8_t*)Buffer, end_aligned - start_aligned);
1315 | if (EFI_ERROR(Status)) {
1316 | do_print_error("read_from_mappings", Status);
1317 |
1318 | if (tmp)
1319 | bs->FreePool(tmp);
1320 |
1321 | return Status;
1322 | }
1323 |
1324 | if (tmp) {
1325 | memcpy(Buffer, tmp + start - start_aligned, valid_end - start);
1326 | bs->FreePool(tmp);
1327 | }
1328 |
1329 | if (valid_end < end)
1330 | memset((uint8_t*)Buffer + valid_end - start, 0, end - valid_end);
1331 | }
1332 |
1333 | ino.position = end;
1334 | *BufferSize = end - start;
1335 |
1336 | return EFI_SUCCESS;
1337 | }
1338 |
1339 | static EFI_STATUS EFIAPI file_read(struct _EFI_FILE_HANDLE* File, UINTN* BufferSize, VOID* Buffer) {
1340 | EFI_STATUS Status;
1341 | inode* ino = _CR(File, inode, proto);
1342 |
1343 | if (!ino->inode_loaded) {
1344 | Status = load_inode(*ino);
1345 | if (EFI_ERROR(Status)) {
1346 | do_print_error("load_inode", Status);
1347 | return Status;
1348 | }
1349 | }
1350 |
1351 | if (ino->is_dir)
1352 | return read_dir(*ino, BufferSize, Buffer);
1353 | else
1354 | return read_file(*ino, BufferSize, Buffer);
1355 | }
1356 |
1357 | static EFI_STATUS EFIAPI file_write(struct _EFI_FILE_HANDLE* File, UINTN* BufferSize, VOID* Buffer) {
1358 | UNUSED(File);
1359 | UNUSED(BufferSize);
1360 | UNUSED(Buffer);
1361 |
1362 | return EFI_UNSUPPORTED;
1363 | }
1364 |
1365 | static EFI_STATUS EFIAPI file_set_position(struct _EFI_FILE_HANDLE* File, UINT64 Position) {
1366 | EFI_STATUS Status;
1367 | inode* ino = _CR(File, inode, proto);
1368 |
1369 | if (!ino->inode_loaded) {
1370 | Status = load_inode(*ino);
1371 | if (EFI_ERROR(Status)) {
1372 | do_print_error("load_inode", Status);
1373 | return Status;
1374 | }
1375 | }
1376 |
1377 | if (ino->is_dir) {
1378 | if (Position != 0)
1379 | return EFI_UNSUPPORTED;
1380 |
1381 | ino->position = 0;
1382 |
1383 | while (!IsListEmpty(&ino->levels)) {
1384 | auto l = _CR(ino->levels.Flink, btree_level, list_entry);
1385 | RemoveEntryList(&l->list_entry);
1386 | bs->FreePool(l);
1387 | }
1388 | } else {
1389 | if (Position == 0xffffffffffffffff)
1390 | ino->position = ino->size;
1391 | else
1392 | ino->position = Position;
1393 | }
1394 |
1395 | return EFI_SUCCESS;
1396 | }
1397 |
1398 | static EFI_STATUS EFIAPI file_get_position(struct _EFI_FILE_HANDLE* File, UINT64* Position) {
1399 | inode* ino = _CR(File, inode, proto);
1400 |
1401 | if (ino->is_dir)
1402 | return EFI_UNSUPPORTED;
1403 |
1404 | *Position = ino->position;
1405 |
1406 | return EFI_SUCCESS;
1407 | }
1408 |
1409 | static EFI_STATUS loop_through_atts(const volume& vol, uint64_t inode, const FILE_RECORD_SEGMENT_HEADER* file_record,
1410 | invocable auto func) {
1411 | EFI_STATUS Status;
1412 | auto att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset);
1413 | size_t offset = file_record->FirstAttributeOffset;
1414 | uint8_t* attlist = nullptr;
1415 | size_t attlist_size;
1416 |
1417 | while (true) {
1418 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1419 | break;
1420 |
1421 | if (att->TypeCode == ntfs_attribute::ATTRIBUTE_LIST) {
1422 | switch (att->FormCode) {
1423 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM: {
1424 | uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster;
1425 | LIST_ENTRY mappings;
1426 |
1427 | if (att->Form.Nonresident.FileSize == 0)
1428 | break;
1429 |
1430 | attlist_size = att->Form.Nonresident.FileSize;
1431 |
1432 | Status = bs->AllocatePool(EfiBootServicesData, sector_align(attlist_size, cluster_size), (void**)&attlist);
1433 | if (EFI_ERROR(Status)) {
1434 | do_print_error("AllocatePool", Status);
1435 | return Status;
1436 | }
1437 |
1438 | InitializeListHead(&mappings);
1439 |
1440 | Status = read_mappings(vol, *att, &mappings);
1441 | if (EFI_ERROR(Status)) {
1442 | bs->FreePool(attlist);
1443 | do_print_error("read_mappings", Status);
1444 | return Status;
1445 | }
1446 |
1447 | Status = read_from_mappings(vol, &mappings, 0, attlist, sector_align(attlist_size, cluster_size));
1448 | if (EFI_ERROR(Status)) {
1449 | while (!IsListEmpty(&mappings)) {
1450 | mapping* m = _CR(mappings.Flink, mapping, list_entry);
1451 | RemoveEntryList(&m->list_entry);
1452 | bs->FreePool(m);
1453 | }
1454 |
1455 | bs->FreePool(attlist);
1456 | do_print_error("read_from_mappings", Status);
1457 | return Status;
1458 | }
1459 |
1460 | while (!IsListEmpty(&mappings)) {
1461 | mapping* m = _CR(mappings.Flink, mapping, list_entry);
1462 | RemoveEntryList(&m->list_entry);
1463 | bs->FreePool(m);
1464 | }
1465 |
1466 | break;
1467 | }
1468 |
1469 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1470 | if (att->Form.Resident.ValueLength == 0)
1471 | break;
1472 |
1473 | attlist_size = att->Form.Resident.ValueLength;
1474 |
1475 | Status = bs->AllocatePool(EfiBootServicesData, attlist_size, (void**)&attlist);
1476 | if (EFI_ERROR(Status)) {
1477 | do_print_error("AllocatePool", Status);
1478 | return Status;
1479 | }
1480 |
1481 | memcpy(attlist, (uint8_t*)att + att->Form.Resident.ValueOffset, attlist_size);
1482 | break;
1483 | }
1484 |
1485 | break;
1486 | }
1487 |
1488 | offset += att->RecordLength;
1489 | att = reinterpret_cast((uint8_t*)att + att->RecordLength);
1490 | }
1491 |
1492 | if (attlist) {
1493 | {
1494 | auto ent = (const attribute_list_entry*)attlist;
1495 | size_t left = attlist_size;
1496 |
1497 | while (true) {
1498 | uint64_t file_reference = ent->file_reference.SegmentNumber;
1499 |
1500 | if (file_reference == inode) { // contained elsewhere in this inode
1501 | att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset);
1502 | offset = file_record->FirstAttributeOffset;
1503 |
1504 | while (true) {
1505 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1506 | break;
1507 |
1508 | if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) {
1509 | if (att->NameLength == 0 || !memcmp((uint8_t*)file_record + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) {
1510 | string_view data;
1511 | u16string_view name;
1512 |
1513 | if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
1514 | data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
1515 |
1516 | if (att->NameLength != 0)
1517 | name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength);
1518 |
1519 | if (!func(*att, data, name)) {
1520 | bs->FreePool(attlist);
1521 | return EFI_SUCCESS;
1522 | }
1523 |
1524 | break;
1525 | }
1526 | }
1527 |
1528 | offset += att->RecordLength;
1529 | att = reinterpret_cast((uint8_t*)att + att->RecordLength);
1530 | }
1531 | }
1532 |
1533 | if (left <= ent->record_length)
1534 | break;
1535 |
1536 | left -= ent->record_length;
1537 | ent = (const attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1538 | }
1539 | }
1540 |
1541 | while (true) {
1542 | auto ent = (attribute_list_entry*)attlist;
1543 | size_t left = attlist_size;
1544 | optional ref;
1545 | FILE_RECORD_SEGMENT_HEADER* file2 = nullptr;
1546 |
1547 | while (true) {
1548 | uint64_t file_reference = ent->file_reference.SegmentNumber;
1549 |
1550 | // skip entries already handled
1551 | if (file_reference == inode) {
1552 | if (left <= ent->record_length)
1553 | break;
1554 |
1555 | left -= ent->record_length;
1556 | ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1557 | continue;
1558 | }
1559 |
1560 | if (ref.has_value() && *ref != file_reference) {
1561 | if (left <= ent->record_length)
1562 | break;
1563 |
1564 | left -= ent->record_length;
1565 | ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1566 | continue;
1567 | }
1568 |
1569 | if (!ref.has_value()) {
1570 | ref = file_reference;
1571 |
1572 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file2);
1573 | if (EFI_ERROR(Status)) {
1574 | do_print_error("AllocatePool", Status);
1575 | bs->FreePool(attlist);
1576 | return Status;
1577 | }
1578 |
1579 | Status = read_from_mappings(vol, &vol.mft_mappings, file_reference * vol.file_record_size,
1580 | (uint8_t*)file2, vol.file_record_size);
1581 | if (EFI_ERROR(Status)) {
1582 | do_print_error("read_from_mappings", Status);
1583 | bs->FreePool(file2);
1584 | bs->FreePool(attlist);
1585 | return Status;
1586 | }
1587 |
1588 | if (file2->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
1589 | do_print("Signature was not FILE\n");
1590 | bs->FreePool(file2);
1591 | bs->FreePool(attlist);
1592 | return EFI_INVALID_PARAMETER;
1593 | }
1594 |
1595 | Status = process_fixups(&file2->MultiSectorHeader, vol.file_record_size,
1596 | vol.boot_sector->BytesPerSector);
1597 |
1598 | if (EFI_ERROR(Status)) {
1599 | do_print_error("process_fixups", Status);
1600 | bs->FreePool(file2);
1601 | bs->FreePool(attlist);
1602 | return Status;
1603 | }
1604 | }
1605 |
1606 | att = reinterpret_cast((uint8_t*)file2 + file2->FirstAttributeOffset);
1607 | offset = file2->FirstAttributeOffset;
1608 |
1609 | while (true) {
1610 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1611 | break;
1612 |
1613 | if (att->TypeCode == ent->type && att->NameLength == ent->name_length && att->Instance == ent->instance) {
1614 | if (att->NameLength == 0 || !memcmp((uint8_t*)file2 + offset + att->NameOffset, (uint8_t*)ent + ent->name_offset, att->NameLength * sizeof(char16_t))) {
1615 | string_view data;
1616 | u16string_view name;
1617 |
1618 | if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
1619 | data = string_view((const char*)file2 + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
1620 |
1621 | if (att->NameLength != 0)
1622 | name = u16string_view((char16_t*)((uint8_t*)file2 + offset + att->NameOffset), att->NameLength);
1623 |
1624 | if (!func(*att, data, name)) {
1625 | bs->FreePool(file2);
1626 | bs->FreePool(attlist);
1627 | return EFI_SUCCESS;
1628 | }
1629 |
1630 | break;
1631 | }
1632 | }
1633 |
1634 | offset += att->RecordLength;
1635 | att = reinterpret_cast((uint8_t*)att + att->RecordLength);
1636 | }
1637 |
1638 | // don't process this again
1639 | ent->file_reference.SegmentNumber = inode;
1640 |
1641 | if (left <= ent->record_length)
1642 | break;
1643 |
1644 | left -= ent->record_length;
1645 | ent = (attribute_list_entry*)((uint8_t*)ent + ent->record_length);
1646 | }
1647 |
1648 | if (file2)
1649 | bs->FreePool(file2);
1650 |
1651 | if (!ref.has_value()) {
1652 | bs->FreePool(attlist);
1653 | return EFI_SUCCESS;
1654 | }
1655 | }
1656 |
1657 | bs->FreePool(attlist);
1658 |
1659 | return EFI_SUCCESS;
1660 | }
1661 |
1662 | att = reinterpret_cast((uint8_t*)file_record + file_record->FirstAttributeOffset);
1663 | offset = file_record->FirstAttributeOffset;
1664 |
1665 | while (true) {
1666 | if (att->TypeCode == (enum ntfs_attribute)0xffffffff || att->RecordLength == 0)
1667 | break;
1668 |
1669 | string_view data;
1670 | u16string_view name;
1671 |
1672 | if (att->FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM)
1673 | data = string_view((const char*)file_record + offset + att->Form.Resident.ValueOffset, att->Form.Resident.ValueLength);
1674 |
1675 | if (att->NameLength != 0)
1676 | name = u16string_view((char16_t*)((uint8_t*)file_record + offset + att->NameOffset), att->NameLength);
1677 |
1678 | if (!func(*att, data, name))
1679 | return EFI_SUCCESS;
1680 |
1681 | offset += att->RecordLength;
1682 | att = reinterpret_cast((uint8_t*)att + att->RecordLength);
1683 | }
1684 |
1685 | return EFI_SUCCESS;
1686 | }
1687 |
1688 | static EFI_STATUS read_mappings(const volume& vol, const ATTRIBUTE_RECORD_HEADER& att, LIST_ENTRY* mappings) {
1689 | EFI_STATUS Status;
1690 | uint64_t next_vcn, current_lcn = 0, current_vcn;
1691 | uint32_t cluster_size = vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster;
1692 | uint8_t* stream;
1693 | uint64_t max_cluster;
1694 |
1695 | if (att.FormCode != NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
1696 | do_print("Cannot read mappings for attribute that is not non-resident\n");
1697 | return EFI_INVALID_PARAMETER;
1698 | }
1699 |
1700 | if (att.Flags & ATTRIBUTE_FLAG_ENCRYPTED) {
1701 | do_print("Cannot read encrypted data\n");
1702 | return EFI_INVALID_PARAMETER;
1703 | }
1704 |
1705 | if (att.Flags & ATTRIBUTE_FLAG_COMPRESSION_MASK) {
1706 | do_print("Compression not yet supported\n");
1707 | return EFI_INVALID_PARAMETER;
1708 | }
1709 |
1710 | next_vcn = att.Form.Nonresident.LowestVcn;
1711 | stream = (uint8_t*)&att + att.Form.Nonresident.MappingPairsOffset;
1712 |
1713 | max_cluster = att.Form.Nonresident.ValidDataLength / cluster_size;
1714 |
1715 | if (att.Form.Nonresident.ValidDataLength & (cluster_size - 1))
1716 | max_cluster++;
1717 |
1718 | if (max_cluster == 0)
1719 | return EFI_SUCCESS;
1720 |
1721 | while (true) {
1722 | uint64_t v, l;
1723 | int64_t v_val, l_val;
1724 | mapping* m;
1725 |
1726 | current_vcn = next_vcn;
1727 |
1728 | if (*stream == 0)
1729 | break;
1730 |
1731 | v = *stream & 0xf;
1732 | l = *stream >> 4;
1733 |
1734 | stream++;
1735 |
1736 | if (v > 8)
1737 | return EFI_INVALID_PARAMETER;
1738 |
1739 | if (l > 8)
1740 | return EFI_INVALID_PARAMETER;
1741 |
1742 | // FIXME - do we need to make sure that int64_t pointers don't go past end of buffer?
1743 |
1744 | v_val = *(int64_t*)stream;
1745 | v_val &= (1ull << (v * 8)) - 1;
1746 |
1747 | if ((uint64_t)v_val & (1ull << ((v * 8) - 1))) // sign-extend if negative
1748 | v_val |= 0xffffffffffffffff & ~((1ull << (v * 8)) - 1);
1749 |
1750 | stream += v;
1751 |
1752 | next_vcn += v_val;
1753 |
1754 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(mapping), (void**)&m);
1755 | if (EFI_ERROR(Status)) {
1756 | do_print_error("AllocatePool", Status);
1757 | return Status;
1758 | }
1759 |
1760 | if (l != 0) {
1761 | l_val = *(int64_t*)stream;
1762 | l_val &= (1ull << (l * 8)) - 1;
1763 |
1764 | if ((uint64_t)l_val & (1ull << ((l * 8) - 1))) // sign-extend if negative
1765 | l_val |= 0xffffffffffffffff & ~((1ull << (l * 8)) - 1);
1766 |
1767 | stream += l;
1768 |
1769 | current_lcn += l_val;
1770 |
1771 | if (next_vcn > max_cluster)
1772 | next_vcn = max_cluster;
1773 |
1774 | m->lcn = current_lcn;
1775 | } else
1776 | m->lcn = 0;
1777 |
1778 | m->vcn = current_vcn;
1779 | m->length = next_vcn - current_vcn;
1780 |
1781 | InsertTailList(mappings, &m->list_entry);
1782 |
1783 | if (next_vcn == max_cluster)
1784 | break;
1785 | }
1786 |
1787 | return EFI_SUCCESS;
1788 | }
1789 |
1790 | static EFI_STATUS load_inode(inode& ino) {
1791 | EFI_STATUS Status, Status2;
1792 | FILE_RECORD_SEGMENT_HEADER* file;
1793 |
1794 | InitializeListHead(&ino.index_mappings);
1795 | InitializeListHead(&ino.levels);
1796 | InitializeListHead(&ino.data_mappings);
1797 |
1798 | Status = bs->AllocatePool(EfiBootServicesData, ino.vol.file_record_size, (void**)&file);
1799 | if (EFI_ERROR(Status)) {
1800 | do_print_error("AllocatePool", Status);
1801 | return Status;
1802 | }
1803 |
1804 | Status = read_from_mappings(ino.vol, &ino.vol.mft_mappings, ino.ino * ino.vol.file_record_size,
1805 | (uint8_t*)file, ino.vol.file_record_size);
1806 | if (EFI_ERROR(Status)) {
1807 | bs->FreePool(file);
1808 | do_print_error("read_from_mappings", Status);
1809 | return Status;
1810 | }
1811 |
1812 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
1813 | do_print("Signature was not FILE\n");
1814 | bs->FreePool(file);
1815 | return EFI_INVALID_PARAMETER;
1816 | }
1817 |
1818 | Status = process_fixups(&file->MultiSectorHeader, ino.vol.file_record_size,
1819 | ino.vol.boot_sector->BytesPerSector);
1820 |
1821 | if (EFI_ERROR(Status)) {
1822 | bs->FreePool(file);
1823 | do_print_error("process_fixups", Status);
1824 | return Status;
1825 | }
1826 |
1827 | memset(&ino.standard_info, 0, sizeof(STANDARD_INFORMATION));
1828 |
1829 | Status = EFI_SUCCESS;
1830 |
1831 | Status2 = loop_through_atts(ino.vol, ino.ino, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view res_data, u16string_view att_name) -> bool {
1832 | switch (att.TypeCode) {
1833 | case ntfs_attribute::STANDARD_INFORMATION:
1834 | if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) {
1835 | size_t to_copy = res_data.size();
1836 |
1837 | if (to_copy > sizeof(STANDARD_INFORMATION))
1838 | to_copy = sizeof(STANDARD_INFORMATION);
1839 |
1840 | memcpy(&ino.standard_info, res_data.data(), to_copy);
1841 | }
1842 | break;
1843 |
1844 | case ntfs_attribute::FILE_NAME:
1845 | if (att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM) {
1846 | const auto& fn = *(FILE_NAME*)res_data.data();
1847 |
1848 | if (res_data.size() >= offsetof(FILE_NAME, EaSize))
1849 | ino.is_dir = fn.FileAttributes & FILE_ATTRIBUTE_DIRECTORY_MFT;
1850 | }
1851 |
1852 | break;
1853 |
1854 | case ntfs_attribute::INDEX_ALLOCATION:
1855 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
1856 | ino.size = att.Form.Nonresident.FileSize;
1857 | ino.phys_size = att.Form.Nonresident.AllocatedLength;
1858 |
1859 | Status = read_mappings(ino.vol, att, &ino.index_mappings);
1860 | if (EFI_ERROR(Status)) {
1861 | do_print_error("read_mappings", Status);
1862 | return false;
1863 | }
1864 | }
1865 | break;
1866 |
1867 | case ntfs_attribute::INDEX_ROOT:
1868 | if (att_name == u"$I30" && att.FormCode == NTFS_ATTRIBUTE_FORM::RESIDENT_FORM && !res_data.empty() && !ino.index_root) {
1869 | Status = bs->AllocatePool(EfiBootServicesData, res_data.size(), (void**)&ino.index_root);
1870 | if (EFI_ERROR(Status)) {
1871 | do_print_error("AllocatePool", Status);
1872 | return false;
1873 | }
1874 |
1875 | memcpy(ino.index_root, res_data.data(), res_data.size());
1876 | }
1877 | break;
1878 |
1879 | case ntfs_attribute::DATA:
1880 | if (att_name.empty()) {
1881 | switch (att.FormCode) {
1882 | case NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM:
1883 | ino.size = att.Form.Nonresident.FileSize;
1884 | ino.phys_size = att.Form.Nonresident.AllocatedLength;
1885 | ino.vdl = att.Form.Nonresident.ValidDataLength;
1886 | break;
1887 |
1888 | case NTFS_ATTRIBUTE_FORM::RESIDENT_FORM:
1889 | ino.size = ino.phys_size = ino.vdl = att.Form.Resident.ValueLength;
1890 | break;
1891 | }
1892 | }
1893 | break;
1894 |
1895 | default:
1896 | break;
1897 | }
1898 |
1899 | return true;
1900 | });
1901 |
1902 | if (EFI_ERROR(Status2)) {
1903 | do_print_error("loop_through_atts", Status2);
1904 | Status = Status2;
1905 | }
1906 |
1907 | if (EFI_ERROR(Status)) {
1908 | if (ino.index_root) {
1909 | bs->FreePool(ino.index_root);
1910 | ino.index_root = nullptr;
1911 | }
1912 |
1913 | bs->FreePool(file);
1914 | return Status;
1915 | }
1916 |
1917 | ino.inode_loaded = true;
1918 |
1919 | bs->FreePool(file);
1920 |
1921 | return EFI_SUCCESS;
1922 | }
1923 |
1924 | static EFI_STATUS get_inode_file_info(inode& ino, UINTN* BufferSize, VOID* Buffer) {
1925 | EFI_STATUS Status;
1926 | unsigned int size = offsetof(EFI_FILE_INFO, FileName[0]) + sizeof(CHAR16);
1927 | EFI_FILE_INFO* info = (EFI_FILE_INFO*)Buffer;
1928 | u16string_view name;
1929 |
1930 | if (ino.name) {
1931 | name = u16string_view(ino.name, ino.name_len);
1932 |
1933 | if (auto bs = name.rfind(u'\\'); bs != u16string_view::npos)
1934 | name = u16string_view(name.data() + bs + 1, name.size() - bs - 1);
1935 |
1936 | size += name.size() * sizeof(char16_t);
1937 | }
1938 |
1939 | if (*BufferSize < size) {
1940 | *BufferSize = size;
1941 | return EFI_BUFFER_TOO_SMALL;
1942 | }
1943 |
1944 | if (!ino.inode_loaded) {
1945 | Status = load_inode(ino);
1946 | if (EFI_ERROR(Status)) {
1947 | do_print_error("load_inode", Status);
1948 | return Status;
1949 | }
1950 | }
1951 |
1952 | info->Size = size;
1953 | info->FileSize = ino.size;
1954 | info->PhysicalSize = ino.phys_size;
1955 | win_time_to_efi(ino.standard_info.CreationTime, &info->CreateTime);
1956 | win_time_to_efi(ino.standard_info.LastAccessTime, &info->LastAccessTime);
1957 | win_time_to_efi(ino.standard_info.LastWriteTime, &info->ModificationTime);
1958 | info->Attribute = win_attributes_to_efi(ino.standard_info.FileAttributes, ino.is_dir);
1959 |
1960 | if (!name.empty()) {
1961 | memcpy(info->FileName, name.data(), name.size() * sizeof(char16_t));
1962 | info->FileName[name.size()] = 0;
1963 | } else
1964 | info->FileName[0] = 0;
1965 |
1966 | return EFI_SUCCESS;
1967 | }
1968 |
1969 | static EFI_STATUS EFIAPI file_get_info(struct _EFI_FILE_HANDLE* File, EFI_GUID* InformationType, UINTN* BufferSize, VOID* Buffer) {
1970 | inode* ino = _CR(File, inode, proto);
1971 | EFI_GUID guid = EFI_FILE_INFO_ID;
1972 |
1973 | // FIXME - EFI_FILE_SYSTEM_INFO
1974 |
1975 | if (memcmp(InformationType, &guid, sizeof(EFI_GUID)))
1976 | return EFI_UNSUPPORTED;
1977 |
1978 | return get_inode_file_info(*ino, BufferSize, Buffer);
1979 | }
1980 |
1981 | static EFI_STATUS EFIAPI file_set_info(struct _EFI_FILE_HANDLE* File, EFI_GUID* InformationType, UINTN BufferSize, VOID* Buffer) {
1982 | UNUSED(File);
1983 | UNUSED(InformationType);
1984 | UNUSED(BufferSize);
1985 | UNUSED(Buffer);
1986 |
1987 | return EFI_UNSUPPORTED;
1988 | }
1989 |
1990 | static EFI_STATUS file_flush(struct _EFI_FILE_HANDLE* File) {
1991 | UNUSED(File);
1992 |
1993 | // nop
1994 |
1995 | return EFI_SUCCESS;
1996 | }
1997 |
1998 | static void populate_file_handle(EFI_FILE_PROTOCOL* h) {
1999 | h->Revision = EFI_FILE_PROTOCOL_REVISION;
2000 | h->Open = file_open;
2001 | h->Close = file_close;
2002 | h->Delete = file_delete;
2003 | h->Read = file_read;
2004 | h->Write = file_write;
2005 | h->GetPosition = file_get_position;
2006 | h->SetPosition = file_set_position;
2007 | h->GetInfo = file_get_info;
2008 | h->SetInfo = file_set_info;
2009 | h->Flush = file_flush;
2010 | }
2011 |
2012 | static EFI_STATUS EFIAPI open_volume(EFI_SIMPLE_FILE_SYSTEM_PROTOCOL* This, EFI_FILE_PROTOCOL** Root) {
2013 | EFI_STATUS Status;
2014 | volume* vol = _CR(This, volume, proto);
2015 | inode* ino;
2016 |
2017 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(inode), (void**)&ino);
2018 | if (EFI_ERROR(Status)) {
2019 | do_print_error("AllocatePool", Status);
2020 | return Status;
2021 | }
2022 |
2023 | memset(ino, 0, sizeof(inode));
2024 |
2025 | new (ino) inode(*vol);
2026 |
2027 | populate_file_handle(&ino->proto);
2028 |
2029 | ino->ino = NTFS_ROOT_DIR_INODE;
2030 |
2031 | *Root = &ino->proto;
2032 |
2033 | return EFI_SUCCESS;
2034 | }
2035 |
2036 | static EFI_STATUS read_mft(volume& vol) {
2037 | EFI_STATUS Status, Status2;
2038 | FILE_RECORD_SEGMENT_HEADER* mft;
2039 |
2040 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&mft);
2041 | if (EFI_ERROR(Status)) {
2042 | do_print_error("AllocatePool", Status);
2043 | return Status;
2044 | }
2045 |
2046 | Status = vol.block->ReadBlocks(vol.block, vol.block->Media->MediaId,
2047 | (vol.boot_sector->MFT * vol.boot_sector->BytesPerSector * vol.boot_sector->SectorsPerCluster) / vol.block->Media->BlockSize,
2048 | vol.file_record_size, mft);
2049 | if (EFI_ERROR(Status)) {
2050 | bs->FreePool(mft);
2051 | do_print_error("ReadBlocks", Status);
2052 | return Status;
2053 | }
2054 |
2055 | if (mft->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
2056 | do_print("Signature was not FILE\n");
2057 | bs->FreePool(mft);
2058 | return EFI_INVALID_PARAMETER;
2059 | }
2060 |
2061 | Status = process_fixups(&mft->MultiSectorHeader, vol.file_record_size,
2062 | vol.boot_sector->BytesPerSector);
2063 | if (EFI_ERROR(Status)) {
2064 | bs->FreePool(mft);
2065 | do_print_error("process_fixups", Status);
2066 | return Status;
2067 | }
2068 |
2069 | // read DATA mappings
2070 |
2071 | Status = EFI_INVALID_PARAMETER;
2072 |
2073 | Status2 = loop_through_atts(vol, NTFS_MFT_INODE, mft, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool {
2074 | if (att.TypeCode == ntfs_attribute::DATA && att_name.empty()) {
2075 | Status = read_mappings(vol, att, &vol.mft_mappings);
2076 | if (EFI_ERROR(Status))
2077 | do_print_error("read_mappings", Status);
2078 |
2079 | return false;
2080 | }
2081 |
2082 | return true;
2083 | });
2084 |
2085 | bs->FreePool(mft);
2086 |
2087 | if (EFI_ERROR(Status2)) {
2088 | do_print_error("loop_through_atts", Status2);
2089 | return Status2;
2090 | }
2091 |
2092 | return Status;
2093 | }
2094 |
2095 | volume::~volume() {
2096 | while (!IsListEmpty(&mft_mappings)) {
2097 | mapping* m = _CR(mft_mappings.Flink, mapping, list_entry);
2098 | RemoveEntryList(&m->list_entry);
2099 | bs->FreePool(m);
2100 | }
2101 |
2102 | bs->FreePool(boot_sector);
2103 | }
2104 |
2105 | static EFI_STATUS read_upcase(volume& vol) {
2106 | EFI_STATUS Status, Status2;
2107 | FILE_RECORD_SEGMENT_HEADER* file;
2108 | LIST_ENTRY mappings;
2109 | uint64_t size;
2110 |
2111 | InitializeListHead(&mappings);
2112 |
2113 | Status = bs->AllocatePool(EfiBootServicesData, vol.file_record_size, (void**)&file);
2114 | if (EFI_ERROR(Status)) {
2115 | do_print_error("AllocatePool", Status);
2116 | return Status;
2117 | }
2118 |
2119 | Status = read_from_mappings(vol, &vol.mft_mappings, NTFS_UPCASE_INODE * vol.file_record_size,
2120 | (uint8_t*)file, vol.file_record_size);
2121 | if (EFI_ERROR(Status)) {
2122 | bs->FreePool(file);
2123 | do_print_error("read_from_mappings", Status);
2124 | return Status;
2125 | }
2126 |
2127 | if (file->MultiSectorHeader.Signature != NTFS_FILE_SIGNATURE) {
2128 | do_print("Signature was not FILE\n");
2129 | bs->FreePool(file);
2130 | return EFI_INVALID_PARAMETER;
2131 | }
2132 |
2133 | Status = process_fixups(&file->MultiSectorHeader, vol.file_record_size,
2134 | vol.boot_sector->BytesPerSector);
2135 |
2136 | if (EFI_ERROR(Status)) {
2137 | bs->FreePool(file);
2138 | do_print_error("process_fixups", Status);
2139 | return Status;
2140 | }
2141 |
2142 | Status2 = loop_through_atts(vol, NTFS_UPCASE_INODE, file, [&](const ATTRIBUTE_RECORD_HEADER& att, string_view, u16string_view att_name) -> bool {
2143 | switch (att.TypeCode) {
2144 | case ntfs_attribute::DATA:
2145 | // assuming that $UpCase DATA can never be resident
2146 | if (att_name.empty() && att.FormCode == NTFS_ATTRIBUTE_FORM::NONRESIDENT_FORM) {
2147 | size = att.Form.Nonresident.AllocatedLength;
2148 | Status = read_mappings(vol, att, &mappings);
2149 |
2150 | if (EFI_ERROR(Status))
2151 | do_print_error("read_mappings", Status);
2152 |
2153 | return false;
2154 | }
2155 | break;
2156 |
2157 | default:
2158 | break;
2159 | }
2160 |
2161 | return true;
2162 | });
2163 |
2164 | if (EFI_ERROR(Status2)) {
2165 | do_print_error("loop_through_atts", Status2);
2166 | return Status2;
2167 | }
2168 |
2169 | if (EFI_ERROR(Status))
2170 | return Status;
2171 |
2172 | Status = read_from_mappings(vol, &mappings, 0, (uint8_t*)vol.upcase, min(size, (uint64_t)sizeof(vol.upcase)));
2173 |
2174 | if (EFI_ERROR(Status))
2175 | do_print_error("read_from_mappings", Status);
2176 |
2177 | while (!IsListEmpty(&mappings)) {
2178 | mapping* m = _CR(mappings.Flink, mapping, list_entry);
2179 | RemoveEntryList(&m->list_entry);
2180 | bs->FreePool(m);
2181 | }
2182 |
2183 | return Status;
2184 | }
2185 |
2186 | static EFI_STATUS EFIAPI get_arc_name(EFI_QUIBBLE_PROTOCOL* This, char* ArcName, UINTN* ArcNameLen) {
2187 | UNUSED(This);
2188 | UNUSED(ArcName);
2189 | UNUSED(ArcNameLen);
2190 |
2191 | return EFI_UNSUPPORTED;
2192 | }
2193 |
2194 | static EFI_STATUS get_driver_name(EFI_QUIBBLE_PROTOCOL* This, CHAR16* DriverName, UINTN* DriverNameLen) {
2195 | static const char16_t name[] = u"ntfs";
2196 |
2197 | UNUSED(This);
2198 |
2199 | if (*DriverNameLen < sizeof(name)) {
2200 | *DriverNameLen = sizeof(name);
2201 | return EFI_BUFFER_TOO_SMALL;
2202 | }
2203 |
2204 | *DriverNameLen = sizeof(name);
2205 |
2206 | memcpy(DriverName, name, sizeof(name));
2207 |
2208 | return EFI_SUCCESS;
2209 | }
2210 |
2211 | static EFI_STATUS EFIAPI drv_start(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle,
2212 | EFI_DEVICE_PATH_PROTOCOL* RemainingDevicePath) {
2213 | EFI_STATUS Status;
2214 | EFI_GUID disk_guid = EFI_DISK_IO_PROTOCOL_GUID;
2215 | EFI_GUID block_guid = EFI_BLOCK_IO_PROTOCOL_GUID;
2216 | EFI_GUID fs_guid = EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_GUID;
2217 | EFI_GUID quibble_guid = EFI_QUIBBLE_PROTOCOL_GUID;
2218 | EFI_BLOCK_IO_PROTOCOL* block;
2219 | uint32_t sblen;
2220 | NTFS_BOOT_SECTOR* sb;
2221 | EFI_DISK_IO_PROTOCOL* disk_io;
2222 | volume* vol;
2223 |
2224 | UNUSED(RemainingDevicePath);
2225 |
2226 | Status = bs->OpenProtocol(ControllerHandle, &block_guid, (void**)&block, This->DriverBindingHandle,
2227 | ControllerHandle, EFI_OPEN_PROTOCOL_GET_PROTOCOL);
2228 | if (EFI_ERROR(Status))
2229 | return Status;
2230 |
2231 | if (block->Media->BlockSize == 0) {
2232 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2233 | return EFI_UNSUPPORTED;
2234 | }
2235 |
2236 | Status = bs->OpenProtocol(ControllerHandle, &disk_guid, (void**)&disk_io, This->DriverBindingHandle,
2237 | ControllerHandle, EFI_OPEN_PROTOCOL_BY_DRIVER);
2238 | if (EFI_ERROR(Status)) {
2239 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2240 | return Status;
2241 | }
2242 |
2243 | // FIXME - FAT driver also claims DISK_IO 2 protocol - do we need to?
2244 |
2245 | sblen = sector_align(sizeof(NTFS_BOOT_SECTOR), block->Media->BlockSize);
2246 |
2247 | Status = bs->AllocatePool(EfiBootServicesData, sblen, (void**)&sb);
2248 | if (EFI_ERROR(Status)) {
2249 | do_print_error("AllocatePool", Status);
2250 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2251 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2252 | return Status;
2253 | }
2254 |
2255 | // read superblock
2256 |
2257 | Status = block->ReadBlocks(block, block->Media->MediaId, 0, sblen, sb);
2258 | if (EFI_ERROR(Status)) {
2259 | bs->FreePool(sb);
2260 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2261 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2262 | return Status;
2263 | }
2264 |
2265 | if (memcmp(sb->FsName, NTFS_FS_NAME, sizeof(NTFS_FS_NAME) - 1)) { // not NTFS
2266 | bs->FreePool(sb);
2267 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2268 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2269 | return EFI_UNSUPPORTED;
2270 | }
2271 |
2272 | Status = bs->AllocatePool(EfiBootServicesData, sizeof(volume), (void**)&vol);
2273 | if (EFI_ERROR(Status)) {
2274 | do_print_error("AllocatePool", Status);
2275 | bs->FreePool(sb);
2276 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2277 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2278 | return Status;
2279 | }
2280 |
2281 | memset(vol, 0, sizeof(volume));
2282 |
2283 | vol->proto.Revision = EFI_SIMPLE_FILE_SYSTEM_PROTOCOL_REVISION;
2284 | vol->proto.OpenVolume = open_volume;
2285 | vol->boot_sector = sb;
2286 | vol->controller = ControllerHandle;
2287 | vol->block = block;
2288 | vol->disk_io = disk_io;
2289 |
2290 | if (sb->ClustersPerMFTRecord < 0)
2291 | vol->file_record_size = 1ull << -sb->ClustersPerMFTRecord;
2292 | else
2293 | vol->file_record_size = (uint64_t)sb->BytesPerSector * (uint64_t)sb->SectorsPerCluster * (uint64_t)sb->ClustersPerMFTRecord;
2294 |
2295 | InitializeListHead(&vol->mft_mappings);
2296 |
2297 | Status = read_mft(*vol);
2298 | if (EFI_ERROR(Status)) {
2299 | do_print_error("read_mft", Status);
2300 | vol->volume::~volume();
2301 | bs->FreePool(vol);
2302 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2303 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2304 | return Status;
2305 | }
2306 |
2307 | Status = read_upcase(*vol);
2308 | if (EFI_ERROR(Status)) {
2309 | do_print_error("read_upcase", Status);
2310 | vol->volume::~volume();
2311 | bs->FreePool(vol);
2312 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2313 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2314 | return Status;
2315 | }
2316 |
2317 | vol->quibble_proto.GetArcName = get_arc_name;
2318 | vol->quibble_proto.GetWindowsDriverName = get_driver_name;
2319 |
2320 | Status = bs->InstallMultipleProtocolInterfaces(&ControllerHandle, &fs_guid, &vol->proto,
2321 | &quibble_guid, &vol->quibble_proto, nullptr);
2322 | if (EFI_ERROR(Status)) {
2323 | do_print_error("InstallMultipleProtocolInterfaces", Status);
2324 | vol->volume::~volume();
2325 | bs->FreePool(vol);
2326 | bs->CloseProtocol(ControllerHandle, &block_guid, This->DriverBindingHandle, ControllerHandle);
2327 | bs->CloseProtocol(ControllerHandle, &disk_guid, This->DriverBindingHandle, ControllerHandle);
2328 | return Status;
2329 | }
2330 |
2331 | return EFI_SUCCESS;
2332 | }
2333 |
2334 | static EFI_STATUS EFIAPI drv_stop(EFI_DRIVER_BINDING_PROTOCOL* This, EFI_HANDLE ControllerHandle,
2335 | UINTN NumberOfChildren, EFI_HANDLE* ChildHandleBuffer) {
2336 | UNUSED(This);
2337 | UNUSED(ControllerHandle);
2338 | UNUSED(NumberOfChildren);
2339 | UNUSED(ChildHandleBuffer);
2340 |
2341 | // FIXME - make this work(?)
2342 |
2343 | return EFI_INVALID_PARAMETER;
2344 | }
2345 |
2346 | static void get_info_protocol(EFI_HANDLE image_handle) {
2347 | EFI_GUID guid = EFI_QUIBBLE_INFO_PROTOCOL_GUID;
2348 | EFI_HANDLE* handles = NULL;
2349 | UINTN count;
2350 | EFI_STATUS Status;
2351 |
2352 | Status = bs->LocateHandleBuffer(ByProtocol, &guid, NULL, &count, &handles);
2353 | if (EFI_ERROR(Status))
2354 | return;
2355 |
2356 | if (count == 0) {
2357 | bs->FreePool(handles);
2358 | return;
2359 | }
2360 |
2361 | for (unsigned int i = 0; i < count; i++) {
2362 | Status = bs->OpenProtocol(handles[i], &guid, (void**)&info_proto, image_handle, NULL,
2363 | EFI_OPEN_PROTOCOL_BY_HANDLE_PROTOCOL);
2364 | if (EFI_ERROR(Status))
2365 | continue;
2366 |
2367 | break;
2368 | }
2369 |
2370 | bs->FreePool(handles);
2371 | }
2372 |
2373 | extern "C"
2374 | EFI_STATUS EFIAPI efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE* SystemTable) {
2375 | EFI_STATUS Status;
2376 | EFI_GUID guid = EFI_DRIVER_BINDING_PROTOCOL_GUID;
2377 |
2378 | systable = SystemTable;
2379 | bs = SystemTable->BootServices;
2380 |
2381 | get_info_protocol(ImageHandle);
2382 |
2383 | drvbind.Supported = drv_supported;
2384 | drvbind.Start = drv_start;
2385 | drvbind.Stop = drv_stop;
2386 | drvbind.Version = 0x10;
2387 | drvbind.ImageHandle = ImageHandle;
2388 | drvbind.DriverBindingHandle = ImageHandle;
2389 |
2390 | Status = bs->InstallProtocolInterface(&drvbind.DriverBindingHandle, &guid,
2391 | EFI_NATIVE_INTERFACE, &drvbind);
2392 | if (EFI_ERROR(Status)) {
2393 | do_print_error("InstallProtocolInterface", Status);
2394 | return Status;
2395 | }
2396 |
2397 | return EFI_SUCCESS;
2398 | }
2399 |
--------------------------------------------------------------------------------