├── .clang-format ├── .gitattributes ├── .github └── workflows │ ├── build.yml │ └── release.yml ├── .gitignore ├── 3rdparty └── libsais-LICENSE ├── CMakeLists.txt ├── LICENSE ├── Makefile.am ├── NEWS ├── PORTING.md ├── README.md ├── bootstrap.sh ├── build-aux ├── ax_build_date_epoch.m4 ├── ax_check_compile_flag.m4 ├── ax_progvar.m4 ├── ax_pthread.m4 ├── ax_subst_man_date.m4 ├── ax_subst_transformed_package_name.m4 └── git-version-gen ├── bunzip3 ├── bunzip3.1 ├── bz3cat ├── bz3cat.1 ├── bz3grep ├── bz3grep.1.in ├── bz3less ├── bz3less.1.in ├── bz3more ├── bz3more.1.in ├── bz3most ├── bz3most.1.in ├── bzip3.1.in ├── bzip3.pc.in ├── configure.ac ├── doc ├── bzip3_format.md └── overview.md ├── etc ├── BENCHMARKS.md ├── benchmark.png └── benchmark.svg ├── examples ├── fuzz-decode-block.c ├── fuzz-decompress.c ├── fuzz-round-trip.c ├── hl-api.c ├── shakespeare.txt ├── shakespeare.txt.bz3 └── standard_test_files │ ├── 63_byte_file.bin │ ├── 65_byte_file.bin │ └── readme.txt ├── include ├── common.h ├── libbz3.h ├── libsais.h └── yarg.h └── src ├── libbz3.c └── main.c /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | IndentPPDirectives: BeforeHash 4 | IndentWidth: '4' 5 | DerivePointerAlignment: false 6 | PointerAlignment: Middle 7 | TabWidth: '4' 8 | UseTab: Never 9 | Cpp11BracedListStyle: false 10 | ColumnLimit: 120 11 | 12 | ... 13 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | build-aux/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: [ push, pull_request ] 4 | 5 | jobs: 6 | 7 | dist: 8 | name: Package 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v3 12 | - name: Install libtool v2.4.7 13 | run: | 14 | wget -q https://ftpmirror.gnu.org/libtool/libtool-2.4.7.tar.gz 15 | tar xvfz libtool-2.4.7.tar.gz 16 | cd libtool-2.4.7 17 | ./configure 18 | make 19 | sudo make install 20 | - name: Configure 21 | run: | 22 | ./bootstrap.sh 23 | ./configure 24 | - name: Make source package 25 | run: make VERSION=${{ github.sha }} dist 26 | - name: Upload source package artifact 27 | uses: actions/upload-artifact@v4 28 | with: 29 | name: bzip3-${{ github.sha }} 30 | path: bzip3-${{ github.sha }}.tar.gz 31 | - name: Run distcheck 32 | run: make distcheck 33 | 34 | build: 35 | name: Build Matrix 36 | needs: [ dist ] 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | platform: [ ubuntu-latest, macos-latest ] 41 | compiler: [ clang, gcc ] 42 | feature: [ with-pthread, without-pthread ] 43 | runs-on: ${{ matrix.platform }} 44 | steps: 45 | - name: Download source package artifact 46 | uses: actions/download-artifact@v4 47 | with: 48 | name: bzip3-${{ github.sha }} 49 | - name: Extract source package 50 | run: tar --strip-components=1 -xf bzip3-${{ github.sha}}.tar.gz 51 | - name: Fetch examples 52 | run: | 53 | mkdir examples 54 | cd examples 55 | wget https://github.com/kspalaiologos/bzip3/raw/${{ github.sha }}/examples/shakespeare.txt \ 56 | https://github.com/kspalaiologos/bzip3/raw/${{ github.sha }}/examples/shakespeare.txt.bz3 57 | - name: Configure 58 | run: ./configure CC=${{ matrix.compiler }} --${{ matrix.feature }} 59 | - name: Make 60 | run: make 61 | - name: Check 62 | run: make roundtrip test 63 | 64 | build-archs: 65 | name: Build Matrix for non-x86 architectures (Debian Bookworm) 66 | needs: [ dist ] 67 | strategy: 68 | fail-fast: false 69 | matrix: 70 | compiler: [ clang, gcc ] 71 | feature: [ with-pthread, without-pthread ] 72 | arch: [ armv6, armv7, aarch64, s390x, ppc64le ] 73 | runs-on: ubuntu-latest 74 | steps: 75 | - name: Download source package artifact 76 | uses: actions/download-artifact@v4 77 | with: 78 | name: bzip3-${{ github.sha }} 79 | - name: Extract source package 80 | run: tar --strip-components=1 -xf bzip3-${{ github.sha}}.tar.gz 81 | - name: Fetch examples 82 | run: | 83 | mkdir examples 84 | cd examples 85 | wget https://github.com/kspalaiologos/bzip3/raw/${{ github.sha }}/examples/shakespeare.txt \ 86 | https://github.com/kspalaiologos/bzip3/raw/${{ github.sha }}/examples/shakespeare.txt.bz3 87 | - uses: uraimo/run-on-arch-action@v2 88 | name: Run in the container 89 | with: 90 | arch: ${{ matrix.arch }} 91 | distro: bookworm 92 | shell: /bin/sh 93 | dockerRunArgs: | 94 | --volume "${PWD}:/bzip3" 95 | install: | 96 | apt update -q -y 97 | apt install -q -y clang gcc make 98 | run: | 99 | cd /bzip3 100 | ./configure CC=${{ matrix.compiler }} --${{ matrix.feature }} --disable-arch-native --disable-link-time-optimization 101 | make && make roundtrip test 102 | build-archs-ubuntu: 103 | name: Build Matrix for non-x86 architectures (Ubuntu Latest) 104 | needs: [ dist ] 105 | strategy: 106 | fail-fast: false 107 | matrix: 108 | compiler: [ clang, gcc ] 109 | feature: [ with-pthread, without-pthread ] 110 | arch: [ riscv64 ] 111 | runs-on: ubuntu-latest 112 | steps: 113 | - name: Download source package artifact 114 | uses: actions/download-artifact@v4 115 | with: 116 | name: bzip3-${{ github.sha }} 117 | - name: Extract source package 118 | run: tar --strip-components=1 -xf bzip3-${{ github.sha}}.tar.gz 119 | - name: Fetch examples 120 | run: | 121 | mkdir examples 122 | cd examples 123 | wget https://github.com/kspalaiologos/bzip3/raw/${{ github.sha }}/examples/shakespeare.txt \ 124 | https://github.com/kspalaiologos/bzip3/raw/${{ github.sha }}/examples/shakespeare.txt.bz3 125 | - uses: uraimo/run-on-arch-action@v2 126 | name: Run in the container 127 | with: 128 | arch: ${{ matrix.arch }} 129 | distro: ubuntu_latest 130 | shell: /bin/sh 131 | dockerRunArgs: | 132 | --volume "${PWD}:/bzip3" 133 | install: | 134 | apt update -q -y 135 | apt install -q -y clang gcc make 136 | run: | 137 | cd /bzip3 138 | ./configure CC=${{ matrix.compiler }} --${{ matrix.feature }} --disable-arch-native --disable-link-time-optimization 139 | make && make roundtrip test 140 | 141 | cmake: 142 | name: Build with CMake 143 | runs-on: ubuntu-latest 144 | steps: 145 | - uses: actions/checkout@v3 146 | - name: CMake 147 | run: cmake -B build 148 | - name: Make 149 | run: make -C build 150 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*.*.*' 7 | 8 | jobs: 9 | 10 | ghrelease: 11 | name: Publish sources on GitHub Release 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | - name: Configure 17 | run: | 18 | ./bootstrap.sh 19 | ./configure 20 | - name: Build source packages 21 | run: | 22 | make dist 23 | bzip2 -d -k bzip3-${{ github.ref_name }}.tar.bz2 24 | zstd -19 bzip3-${{ github.ref_name }}.tar 25 | 7z a bzip3-${{ github.ref_name}}.tar{.7z,} 26 | - name: Build a binary (for dogfooding) 27 | run: make 28 | - name: Create a dogfood package 29 | run: | 30 | ./bzip3 -e bzip3-${{ github.ref_name }}.tar 31 | - name: Publish Release 32 | uses: softprops/action-gh-release@v1 33 | with: 34 | files: | 35 | bzip3-${{ github.ref_name }}.tar 36 | bzip3-${{ github.ref_name }}.tar.7z 37 | bzip3-${{ github.ref_name }}.tar.bz2 38 | bzip3-${{ github.ref_name }}.tar.bz3 39 | bzip3-${{ github.ref_name }}.tar.gz 40 | bzip3-${{ github.ref_name }}.tar.xz 41 | bzip3-${{ github.ref_name }}.tar.zst 42 | bzip3-${{ github.ref_name }}.zip 43 | - name: Upload source package artifact 44 | uses: actions/upload-artifact@v4 45 | with: 46 | name: bzip3-${{ github.ref_name }} 47 | path: bzip3-${{ github.ref_name }}.tar.gz 48 | 49 | binaries: 50 | name: Publish Binaries on GitHub Release 51 | needs: [ ghrelease ] 52 | runs-on: ubuntu-latest 53 | strategy: 54 | fail-fast: false 55 | matrix: 56 | target: 57 | - [ "x86_64-linux", "--enable-static-exe --disable-arch-native", "" ] 58 | - [ "x86_64", "CC=x86_64-w64-mingw32-gcc --host x86_64-w64-mingw32 --enable-static-exe --disable-arch-native", "gcc-mingw-w64-x86-64" ] 59 | - [ "i686", "CC=i686-w64-mingw32-gcc --host i686-w64-mingw32 --enable-static-exe --disable-arch-native", "gcc-mingw-w64-i686" ] 60 | steps: 61 | - name: Download source package artifact 62 | uses: actions/download-artifact@v4 63 | with: 64 | name: bzip3-${{ github.ref_name }} 65 | - name: Extract source package 66 | run: tar --strip-components=1 -xf bzip3-${{ github.ref_name }}.tar.gz 67 | - name: Install cross-compile dependencies 68 | if: ${{ matrix.target[2] }} 69 | run: | 70 | sudo apt-get update 71 | sudo apt-get install -y ${{ matrix.target[2] }} 72 | - name: Configure 73 | run: ./configure --bindir=/ --program-suffix=-${{ matrix.target[0] }} ${{ matrix.target[1] }} 74 | - name: Make 75 | run: | 76 | make 77 | make DESTDIR=$(pwd)/output install-exec 78 | - name: Publish binary 79 | uses: softprops/action-gh-release@v1 80 | with: 81 | files: | 82 | output/bzip3-${{ matrix.target[0] }}* 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Developer stuff 2 | corpus/ 3 | *.bz3 4 | etc/bitflip 5 | 6 | # Editor stuff 7 | .vscode/ 8 | *~ 9 | tags 10 | 11 | # Generated 12 | *.o 13 | *.lo 14 | *.la 15 | *.so 16 | bzip3 17 | bzip3-* 18 | .version 19 | LICENSE2 20 | .version-prev 21 | LICENSE2 22 | 23 | # Autotools 24 | .deps/ 25 | .libs/ 26 | .dirstamp 27 | /aclocal.m4 28 | /config.log 29 | /config.status 30 | /configure 31 | /libtool 32 | Makefile 33 | Makefile.in 34 | autom4te.cache/ 35 | /build-aux/* 36 | !/build-aux/ax_build_date_epoch.m4 37 | !/build-aux/ax_check_compile_flag.m4 38 | !/build-aux/ax_pthread.m4 39 | !/build-aux/ax_progvar.m4 40 | !/build-aux/ax_subst_man_date.m4 41 | !/build-aux/ax_transformed_package_name.m4 42 | !/build-aux/git-version-gen 43 | bzip3.pc 44 | 45 | examples/hl-api 46 | 47 | examples/compress-file 48 | 49 | examples/decompress-file 50 | 51 | examples/fuzz 52 | examples/afl_in 53 | examples/afl_out 54 | 55 | bz3grep.1 56 | 57 | bz3less.1 58 | 59 | bz3more.1 60 | 61 | bz3most.1 62 | 63 | bzip3.1 64 | -------------------------------------------------------------------------------- /3rdparty/libsais-LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 2 | 3 | project( 4 | bzip3 5 | VERSION 1.5.2 6 | DESCRIPTION "A better and stronger spiritual successor to BZip2" 7 | HOMEPAGE_URL "https://github.com/kspalaiologos/bzip3" 8 | LANGUAGES C) 9 | 10 | set(CMAKE_C_STANDARD 99) 11 | 12 | option(BUILD_SHARED_LIBS "Build libbz3 as a shared library" ON) 13 | option(BZIP3_BUILD_APPS "Build bzip3 applications" ON) 14 | option(BZIP3_ENABLE_PTHREAD "Enable use of pthread library" ON) 15 | option(BZIP3_ENABLE_ARCH_NATIVE "Enable CPU-specific optimizations" OFF) 16 | option(BZIP3_ENABLE_STATIC_EXE "Enable static builds of the executable" OFF) 17 | 18 | include(CheckCCompilerFlag) 19 | include(CheckSymbolExists) 20 | include(GNUInstallDirs) 21 | 22 | set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR}) 23 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 24 | 25 | set(prefix ${CMAKE_INSTALL_PREFIX}) 26 | set(exec_prefix ${CMAKE_INSTALL_PREFIX}) 27 | set(bindir ${CMAKE_INSTALL_FULL_BINDIR}) 28 | set(libdir ${CMAKE_INSTALL_FULL_LIBDIR}) 29 | set(includedir ${CMAKE_INSTALL_FULL_INCLUDEDIR}) 30 | set(PACKAGE ${CMAKE_PROJECT_NAME}) 31 | set(PACKAGE_VERSION ${PROJECT_VERSION}) 32 | configure_file(bzip3.pc.in ${CMAKE_CURRENT_BINARY_DIR}/bzip3.pc @ONLY) 33 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/bzip3.pc 34 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) 35 | 36 | if(BZIP3_ENABLE_PTHREAD) 37 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 38 | find_package(Threads REQUIRED) 39 | endif() 40 | 41 | if(BUILD_SHARED_LIBS) 42 | add_library(bz3 SHARED) 43 | else() 44 | add_library(bz3 STATIC) 45 | endif() 46 | target_sources(bz3 PRIVATE src/libbz3.c) 47 | target_compile_definitions(bz3 PUBLIC VERSION="${PROJECT_VERSION}") 48 | target_include_directories( 49 | bz3 50 | PUBLIC $ 51 | INTERFACE $) 52 | if(BZIP3_ENABLE_PTHREAD) 53 | target_compile_definitions(bz3 PUBLIC PTHREAD) 54 | target_link_libraries(bz3 Threads::Threads) 55 | endif() 56 | if(BZIP3_ENABLE_ARCH_NATIVE) 57 | check_c_compiler_flag(-march=native CC_SUPPORT_MARCH_NATIVE_FLAG) 58 | check_c_compiler_flag(-mtune=native CC_SUPPORT_MTUNE_NATIVE_FLAG) 59 | if(CC_SUPPORT_MARCH_NATIVE_FLAG AND CC_SUPPORT_MTUNE_NATIVE_FLAG) 60 | target_link_options(bz3 PUBLIC -march=native -mtune=native) 61 | else() 62 | message( 63 | FATAL_ERROR 64 | "Compiler does not support native optimizations, disable `BZIP3_ENABLE_ARCH_NATIVE`" 65 | ) 66 | endif() 67 | endif() 68 | set_target_properties( 69 | bz3 70 | PROPERTIES OUTPUT_NAME bzip3 71 | SOVERSION "0.0.0" 72 | PUBLIC_HEADER include/libbz3.h 73 | VERSION "0") 74 | if(BUILD_SHARED_LIBS) 75 | set_target_properties(bz3 PROPERTIES POSITION_INDEPENDENT_CODE ON) 76 | endif() 77 | install( 78 | TARGETS bz3 79 | EXPORT ${CMAKE_PROJECT_NAME}-config 80 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 81 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 82 | PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 83 | install( 84 | EXPORT ${CMAKE_PROJECT_NAME}-config 85 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${CMAKE_PROJECT_NAME} 86 | NAMESPACE ${CMAKE_PROJECT_NAME}::) 87 | 88 | if(BZIP3_BUILD_APPS) 89 | add_executable(bzip3) 90 | target_sources(bzip3 PRIVATE src/main.c) 91 | if(BZIP3_ENABLE_STATIC_EXE) 92 | if(BUILD_SHARED_LIBS) 93 | message( 94 | FATAL_ERROR 95 | "libbz3 is not built as a static library, disable `BUILD_SHARED_LIBS`" 96 | ) 97 | endif() 98 | check_c_compiler_flag(-static CC_SUPPORT_STATIC_FLAG) 99 | if(CC_SUPPORT_STATIC_FLAG) 100 | target_link_options(bzip3 PRIVATE -static) 101 | else() 102 | message( 103 | FATAL_ERROR 104 | "Compiler does not support static linking, disable `BZIP3_ENABLE_STATIC_EXE`" 105 | ) 106 | endif() 107 | endif() 108 | target_link_libraries(bzip3 PRIVATE bz3) 109 | install(TARGETS bzip3 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) 110 | 111 | set(BZIP3_APP_SCRIPTS bunzip3 bz3cat bz3grep bz3less bz3more bz3most) 112 | install(PROGRAMS ${BZIP3_APP_SCRIPTS} DESTINATION ${CMAKE_INSTALL_BINDIR}) 113 | 114 | if(UNIX) 115 | set(BZIP3_MANS 116 | bunzip3.1 117 | bz3cat.1 118 | bz3grep.1 119 | bz3less.1 120 | bz3more.1 121 | bz3most.1 122 | bzip3.1) 123 | foreach(BZIP3_MAN ${BZIP3_MANS}) 124 | if(EXISTS ${BZIP3_MAN}.in) 125 | string(TIMESTAMP MAN_DATE "%d %B %Y" UTC) 126 | set(TRANSFORMED_PACKAGE_NAME ${CMAKE_PROJECT_NAME}) 127 | set(MAN_DATE ${MAN_DATE}) 128 | set(VERSION ${PROJECT_VERSION}) 129 | configure_file(${BZIP3_MAN}.in ${CMAKE_CURRENT_BINARY_DIR}/${BZIP3_MAN} 130 | @ONLY) 131 | else() 132 | configure_file(${BZIP3_MAN} ${CMAKE_CURRENT_BINARY_DIR}/${BZIP3_MAN} 133 | COPYONLY) 134 | endif() 135 | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${BZIP3_MAN} 136 | DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) 137 | endforeach() 138 | endif() 139 | endif() 140 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I build-aux 2 | 3 | AM_CFLAGS = -I$(top_srcdir)/include 4 | 5 | EXTRA_DIST = LICENSE 3rdparty/libsais-LICENSE PORTING.md README.md build-aux/git-version-gen CMakeLists.txt 6 | 7 | pkgconfig_DATA = bzip3.pc 8 | 9 | include_HEADERS = include/libbz3.h 10 | noinst_HEADERS = include/common.h \ 11 | include/libsais.h \ 12 | include/yarg.h 13 | 14 | lib_LTLIBRARIES = libbzip3.la 15 | libbzip3_la_SOURCES = src/libbz3.c 16 | libbzip3_la_LDFLAGS = -no-undefined -version-info 1:0:0 17 | 18 | bin_PROGRAMS = bzip3 19 | bzip3_CFLAGS = $(AM_CFLAGS) 20 | bzip3_SOURCES = src/main.c 21 | if ENABLE_STATIC 22 | bzip3_SOURCES += $(libbzip3_la_SOURCES) 23 | else 24 | bzip3_LDADD = libbzip3.la 25 | endif 26 | 27 | dist_man_MANS = bzip3.1 bz3cat.1 bz3more.1 bz3less.1 bz3most.1 bz3grep.1 bunzip3.1 28 | 29 | dist_bin_SCRIPTS = bz3cat bz3more bz3less bz3most bz3grep bunzip3 30 | 31 | CLEANFILES = $(bin_PROGRAMS) 32 | 33 | # End standard generic autotools stuff 34 | 35 | # Begin special handling for autoconf VERSION being updated on commit 36 | 37 | BUILT_SOURCES = .version 38 | CLEANFILES += $(BUILT_SOURCES) .version-prev 39 | 40 | src/bzip3-main.$(OBJEXT): .version 41 | 42 | _BRANCH_REF != $(AWK) '{print ".git/" $$2}' .git/HEAD 2>/dev/null ||: 43 | 44 | .version: $(_BRANCH_REF) 45 | @if [ -e "$(srcdir)/.tarball-version" ]; then \ 46 | printf "$(VERSION)" > $@; \ 47 | else \ 48 | touch "$@-prev"; \ 49 | if [ -e "$@" ]; then \ 50 | cp "$@" "$@-prev"; \ 51 | fi; \ 52 | ./build-aux/git-version-gen "$(srcdir)/.tarball-version" > $@; \ 53 | cmp -s "$@" "$@-prev" || autoreconf configure.ac --force; \ 54 | fi 55 | 56 | dist-hook: 57 | printf "$(VERSION)" > "$(distdir)/.tarball-version" 58 | 59 | # Begin developer convenience targets 60 | 61 | .PHONY: format 62 | format: $(bzip3_SOURCES) $(libbzip3_la_SOURCES) $(include_HEADERS) $(noinst_HEADERS) 63 | clang-format -i $^ examples/*.c 64 | 65 | .PHONY: cloc 66 | cloc: $(bzip3_SOURCES) $(libbzip3_la_SOURCES) $(include_HEADERS) $(noinst_HEADERS) 67 | cloc $^ 68 | 69 | CLEANFILES += LICENSE2 70 | .PHONY: roundtrip 71 | 72 | BZIP3 := bzip3$(EXEEXT) 73 | 74 | roundtrip: $(BZIP3) 75 | rm -f $(builddir)/LICENSE2 76 | ./$(BZIP3) -v -feb 6 $(srcdir)/LICENSE $(builddir)/LICENSE.bz3 77 | ./$(BZIP3) -v -d $(builddir)/LICENSE.bz3 $(builddir)/LICENSE2 78 | cmp $(srcdir)/LICENSE $(builddir)/LICENSE2 79 | -command -v md5sum >/dev/null 2>&1 && md5sum $(builddir)/LICENSE.bz3 80 | 81 | .PHONY: test 82 | test: $(BZIP3) 83 | ./$(BZIP3) -d < $(srcdir)/examples/shakespeare.txt.bz3 | cmp - $(srcdir)/examples/shakespeare.txt 84 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 2 | v1.1.2: 3 | * fix memory UB in libsais 4 | * restructure src/cm.c 5 | * block size check in src/libbz3.c 6 | * fix shift UB in lzp 7 | * -h/-v CLI switches 8 | * change maximum block size to ~512M 9 | 10 | v1.1.3: 11 | * fix a serious stdin/stdout CRLF bug on Windows that corrupted the data. 12 | * imply `-c` when a stream isn't opened thus preventing potential UB. 13 | * bzip3 file format documentation. 14 | * increase the maximum amount of parallel workers to 24. 15 | * prevent accidentally overwriting output; add the `-f` command-line flag. 16 | 17 | v1.1.4: 18 | * increase the maximum allowed amount of parallel workers to 64. 19 | * clean up the cm code 20 | * set up pkg-config support 21 | * CLI robustness improvements 22 | 23 | v1.1.5: 24 | * rework the argument parsing schema to resemble UNIX utilities more. 25 | * make compression the default action 26 | * replace -v with -V for version information 27 | * manual pages 28 | * bz3cat, bz3more, bz3less, bz3grep, unbzip3 utilities 29 | * disable architecture-specific optimisations in github release builds 30 | 31 | v1.1.6: 32 | * fixed tickets: #53, #50, #45, #46 (portability issues & a verbatim block 33 | decompression diagnostic issue) 34 | * package unbzip3 manpage 35 | * flockfile/funlockfile calls for WIN32 36 | 37 | v1.1.7: 38 | * rename unbzip3 as bunzip3 for bzip2/gzip/lzip/... compatibility. 39 | * high level api for libbzip3: `bz3_bound`, `bz3_compress` and `bz3_decompress`. 40 | * more robust decompression; safety checks for the RLE and LZP steps. 41 | * documentation for the frame format. 42 | * examples of bzip3 API usage, AFL fuzzing instructions. 43 | * `bz3_version` API function 44 | * more robust I/O handling and fsync (linux only) calls to ensure a correct I/O transaction. 45 | 46 | v1.1.8: 47 | * add the Apache-2.0 license of `libsais`. 48 | 49 | v1.2.0: 50 | * alias `-z` to `-e` (compatibility with bzip2). 51 | * version bzip3 library. 52 | * dynamically link library to the tool (eliminating the libbz3.c duplication in tool and library). 53 | * add verbose output (via `-v`). 54 | * add version information to the manual pages. 55 | * set `rpath` in the Makefile to solve an issue with /usr/local/lib not being present in the dynamic linker search path. 56 | 57 | v1.2.1: 58 | * fix a LZP decompression bug when a match occurred before block boundary. 59 | * don't set rpath in the Makefile 60 | * fix build warnings from -v 61 | * add `most` support 62 | * windows binary mode fix 63 | 64 | v1.2.2: 65 | * safety fixes for the LZP pass. 66 | * add the `-k` compatibility flag. 67 | * use `env` to detect the shell in bzip3 utility scripts 68 | * update libtool to v2.4.7 69 | 70 | v1.2.3: 71 | * fix an important regression introduced in pull request #55 regarding I/O in main.c 72 | * slightly enlargen the SAIS buffer beyond the documented recommended size to avoid some memory errors in libsais 73 | * properly handle a scenario where the individual block original size is larger than the block size declared in the file header 74 | * further security fixes: strict check for size_src overflow of badly bounded b1, store mode: checking for truncation 75 | mRLE: decoding bounds, bz3_bound in bz3_decompress 76 | 77 | v1.3.0: 78 | * resolve alignment issues on SPARC/s390x. 79 | * fix the security issues arising from libsais. 80 | * due to these changes, updating is strongly encouraged. 81 | 82 | v1.3.1: 83 | * Verbose mode in the tool now prints the extra statistics also during testing, not just encoding or decoding. 84 | * Update the CI pipeline to Debian Bullseye. 85 | * Fix a minor issue with side effects in RLE decoding. 86 | * Explicitly disable `-march=native` for releases. 87 | * Fix a bug in the tool reported by Adam Borowski regarding -t/-d input buffer checks. 88 | * Fix an issue with the current input offset not being taken into account in bz3_compress. 89 | 90 | v1.3.2: 91 | * Add the `-r` option ("recovery"). If a file fails to decompress fully (e.g. due to a CRC error), ignore it and write the 92 | file regardless. 93 | * Add preliminary CMake support. 94 | * Fix the include guard macro name to work with pedantic compilers. 95 | * Fix the shift direction in the crc32 check function. Because of a programming mistake, v1.3.0 and v1.3.1 96 | introduced a breaking change to the CRC calculation code. The change has been reverted in this release. 97 | While the archives created with these versions of bzip3 will fail to regularly decompress due to a checksum 98 | error, using the recovery option as `bzip3 -rd` to decompress will 99 | restore their original contents. 100 | 101 | v1.4.0: 102 | * Wrap up all the changes from v1.3; bump up the minor version release. 103 | * Various changes for CMake; bundle CMake files with the autotools dist tarball. 104 | * Support for linking with C++. 105 | 106 | v1.5.0: 107 | * Add `--rm` option that removes the original files after successful operation. 108 | * `bz3grep`: display the correct program name on invalid invocation. 109 | * Improve the docstrings regarding the use of `bz3_bound` with block decompression. 110 | * Tighter LZP/RLE bounds in the decoder => slightly improved compression with no 111 | observable format change. 112 | * Improve the documentation and available tooling for fuzzing. 113 | * Rewritten the file format documentation. 114 | * Add the `bz3_min_memory_needed` API. 115 | * BREAKING: Change the signature of `bz3_decode_block` and `bz3_encode_block`. 116 | Refer to the documentation for the new usage. This version is not ABI-compatible 117 | with the previous versions of the package. 118 | * Fix: the file `$dir.bz3` being created when invoked as `bzip3 -e $dir` where 119 | `$dir` is a directory. 120 | 121 | v1.5.1: 122 | * Replace getopt-based parsing with `yarg`. 123 | * Change the soname to indicate a difference in the ABI. 124 | 125 | v1.5.2: 126 | * batch mode: fall back to stdin input with no auxiliary arguments. 127 | * bz3_compress (API): better bound estimation 128 | * yarg: oom handling; stop relying on (GNU) asprintf, use the baked in variant. 129 | * pkg-config: Add License variable 130 | * bz3_decompress (API): fix a memory leak 131 | -------------------------------------------------------------------------------- /PORTING.md: -------------------------------------------------------------------------------- 1 | 2 | ## General problems 3 | 4 | 1. `Makefile.am:8: error: 'pkgconfig_DATA' is used but 'pkgconfigdir' is undefined` => please install pkgconfig. 5 | 6 | ## Windows 7 | 8 | Cross-compiling Windows binaries is supported: 9 | 10 | ```console 11 | # For x86_64 (64bit) 12 | $ ./configure CC=x86_64-w64-mingw32-gcc --host x86_64-w64-mingw32 --enable-static-exe 13 | $ make 14 | 15 | # For i686 (32bit) 16 | $ ./configure CC=i866-w64-mingw32-gcc --host i686-w64-mingw32 --enable-static-exe 17 | $ make 18 | ``` 19 | 20 | Static builds are recommended to avoid the pthread dynamic linking issue. If a dynamic library is desired, consider defining `BZIP3_DLL_EXPORT` or `BZIP3_DLL_IMPORT`. 21 | 22 | ## M1 MacOS 23 | 24 | Make sure that you run `./configure` with `--disable-arch-native`. 25 | 26 | ## Emscripten 27 | 28 | Assuming that asm.js code is desired: 29 | 30 | ``` 31 | emconfigure ./configure --without-pthread --host none-none-none CC=emcc "CFLAGS=-O2 -DBZIP3_VISIBLE=\"__attribute__((used))\"" 32 | make src/bzip3-libbz3.o 33 | emcc -O2 src/bzip3-libbz3.o -o libbz3.js -sWASM=0 --memory-init-file 0 -sFILESYSTEM=0 -sALLOW_MEMORY_GROWTH -s 'EXPORTED_RUNTIME_METHODS=["UTF8ToString"]' 34 | ``` 35 | 36 | asm.js code size: 118KB (v1.1.7), 34K gzipped. 37 | wasm+js stub code size: 76KB (v1.1.7), 26K gzipped. 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BZip3 2 | 3 | [![Build](https://github.com/kspalaiologos/bzip3/actions/workflows/build.yml/badge.svg)](https://github.com/kspalaiologos/bzip3/actions/workflows/build.yml) 4 | 5 | A better, faster and stronger spiritual successor to BZip2. Features higher compression ratios and better performance thanks to a order-0 context mixing entropy coder, a fast Burrows-Wheeler transform code making use of suffix arrays and a RLE with Lempel Ziv+Prediction pass based on LZ77-style string matching and PPM-style context modeling. 6 | 7 | Like its ancestor, **BZip3 excels at compressing text or code**. 8 | 9 | ## Installation 10 | 11 | ```console 12 | # If using a git clone (not needed for source packages), first... 13 | $ ./bootstrap.sh 14 | 15 | # All... 16 | $ ./configure 17 | $ make 18 | $ sudo make install 19 | ``` 20 | 21 | Alternatively, you might be able to install bzip3 using your system's package manager: 22 | 23 | [![Packaging status](https://repology.org/badge/vertical-allrepos/bzip3.svg?columns=3)](https://repology.org/project/bzip3/versions) 24 | 25 | On macOS, you can use [Homebrew](https://brew.sh) to easily install: 26 | 27 | ```console 28 | $ brew install bzip3 29 | ``` 30 | 31 | ## Perl source code benchmark 32 | 33 | First, I have downloaded every version of Perl5 ever released and decompressed them. 34 | 35 | ```bash 36 | % wget -r -l1 -nH --cut-dirs=2 --no-parent -A.tar.gz --no-directories https://www.cpan.org/src/5.0/ 37 | % for g in *.gz; do gunzip $g; done 38 | % ls -la | wc -l 39 | 262 40 | ``` 41 | 42 | Then, I put all the resulting `.tar` files in a single `.tar` file and tried to compress it using various compressors: 43 | 44 | ``` 45 | xz -T16 -9 -k all.tar 10829.91s user 26.91s system 1488% cpu 14658M memory 12:09.24 total 46 | bzip2 -9 -k all.tar 981.78s user 9.77s system 95% cpu 8M memory 17:16.64 total 47 | bzip3 -e -b 256 -j 12 all.tar 2713.81s user 16.28s system 634% cpu 18301M memory 7:10.10 total 48 | bzip3 -e -b 511 -j 4 all.tar 17.65s user 12.19s system 170% cpu 12178M memory 7:08.65 total 49 | zstd -T12 -16 all.tar 4162.94s user 16.40s system 1056% cpu 687M memory 6:35.62 total 50 | ``` 51 | 52 | The results follow: 53 | 54 | | Method | Compressed size (bytes) | 55 | | ---------------- | -----------------------:| 56 | | LZMA (xz) | 2'056'645'240 | 57 | | bzip2 | 3'441'163'911 | 58 | | bzip3 -b 256 | 1'001'957'587 | 59 | | bzip3 -b 511 | 546'456'978 | 60 | | Zstandard | 3'076'143'660 | 61 | 62 | Finally, wall clock time decompression times (WD Blue HDD): 63 | 64 | | Method | Decompression time | 65 | | ---------------- | ------------------:| 66 | | LZMA (xz) | 4min 40s | 67 | | bzip2 | 9min 22s | 68 | | bzip3 (parallel) | 4min 06s | 69 | | Zstandard | 3min 51s | 70 | 71 | Then, I used `lrzip` to perform long-range deduplication on the original `.tar` file: 72 | 73 | ``` 74 | % time lrzip -n -o all_none.tar.lrz all.tar 75 | 546.17s user 160.87s system 102% cpu 10970M memory 11:28.00 total 76 | 77 | % time lrzip --lzma -o all_lzma.tar.lrz all.tar 78 | 702.16s user 161.87s system 122% cpu 10792M memory 11:44.83 total 79 | 80 | % time lrzip -b -o all_bzip2.tar.lrz all.tar 81 | 563.93s user 147.38s system 112% cpu 10970M memory 10:34.10 total 82 | ``` 83 | 84 | Finally, I compressed the resulting `none.tar.lrz` file using bzip3: 85 | 86 | ``` 87 | % time bzip3 -e -b 256 -j 2 all_none.tar.lrz 88 | 32.05s user 0.76s system 146% cpu 2751M memory 22.411 total 89 | ``` 90 | 91 | The results follow: 92 | 93 | | Method | Compressed size (bytes) | 94 | | ---------------- | -----------------------:| 95 | | lrzip + bzip3 | 60'672'608 | 96 | | lrzip + lzma | 64'774'202 | 97 | | lrzip + bzip2 | 75'685'065 | 98 | 99 | For further benchmarks against Turbo-Range-Coder and BSC, check [powturbo's benchmark](https://github.com/powturbo/Turbo-Range-Coder) of bzip3, bzip2, bsc and others. 100 | 101 | ## Disclaimers 102 | 103 | **I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED.** 104 | 105 | Every compression of a file implies an assumption that the compressed file can be decompressed to reproduce the original. Great efforts in design, coding and testing have been made to ensure that this program works correctly. 106 | 107 | However, the complexity of the algorithms, and, in particular, the presence of various special cases in the code which occur with very low but non-zero probability make it impossible to rule out the possibility of bugs remaining in the program. 108 | 109 | DO NOT COMPRESS ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. 110 | 111 | That is not to say this program is inherently unreliable. Indeed, I very much hope the opposite is true. Bzip3/libbz3 has been carefully constructed and extensively tested. 112 | 113 | **Bzip3's performance is _heavily_ dependent on the compiler. x64 Linux clang13 builds usually can go as high as 17MiB/s compression and 23MiB/s decompression _per thread_. Windows and 32-bit builds might be considerably slower.** 114 | 115 | Bzip3 has been tested on the following architectures: 116 | - x86 117 | - x86_64 118 | - armv6 119 | - armv7 120 | - aarch64 121 | - ppc64le 122 | - mips 123 | - mips64 124 | - sparc 125 | - s390x 126 | 127 | ## Corpus benchmarks 128 | 129 | ![visualisation of the benchmarks](etc/benchmark.png) 130 | 131 | Check etc/BENCHMARKS.md for more results. 132 | 133 | ## Licensing 134 | 135 | A breakdown of components and their licenses follows: 136 | 137 | - (runtime) The codebase as a whole: Copyright 2022-2023, Kamila Szewczyk (kspalaiologos@gmail.com); LGPL (LICENSE) 138 | - (runtime) The Burrows-Wheeler transform (libsais) and LZP code: 2021-2022, Ilya Grebnov (ilya.grebnov@gmail.com); Apache 2.0 (3rdparty/libsais-LICENSE) 139 | - (compile-time) `build-aux`: Copyright 2011, Daniel Richard G (skunk@iSKUNK.ORG), 2019, Marc Stevens (marc.stevens@cwi.nl), 2008, Steven G. Johnson (stevenj@alum.mit.edu); GPL-3+ with AutoConf exception 140 | - (compile-time) `build-aux/ax_check_compile_flag.m4`: Copyright 2008, Guido U. Draheim (guidod@gmx.de), 2011, Maarten Bosmans (mkbosmans@gmail.com); FSFAP 141 | - (compile-time) `build-aux/git-version-gen`: Copyright 2007-2012, Free Software Foundation, Inc; GPLv3 142 | - (runtime) `bz3grep`: Copyright 2003, Thomas Klausner; BSD-2-clause 143 | 144 | `bzip3` as a whole is licensed under LGPLv3 only. It is not dual-licensed under LGPLv3 and Apache 2.0. 145 | 146 | ## Thanks 147 | 148 | - Ilya Grebnov for his `libsais` library used for BWT construction in BZip3 and the LZP encoder which I had used as a reference implementation to improve myself. 149 | - Caleb Maclennan for configuring autotools as a packaging-friendly build system for BZip3. 150 | - Ilya Muravyov for his public domain BWT post-coder, a derivative of which is used in this project. 151 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | 4 | incomplete_source () { 5 | printf '%s\n' \ 6 | "$1. Please either:" \ 7 | "* $2," \ 8 | "* or use the source packages instead of a repo archive" \ 9 | "* or use a full Git clone." >&2 10 | exit 1 11 | } 12 | 13 | # This enables easy building from Github's snapshot archives 14 | if [ ! -e ".git" ]; then 15 | if [ ! -f ".tarball-version" ]; then 16 | incomplete_source "No version information found" \ 17 | "identify the correct version with \`echo \$version > .tarball-version\`" 18 | fi 19 | else 20 | # Just a head start to save a ./configure cycle 21 | ./build-aux/git-version-gen .tarball-version > .version 22 | fi 23 | 24 | autoreconf --install 25 | -------------------------------------------------------------------------------- /build-aux/ax_build_date_epoch.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # https://www.gnu.org/software/autoconf-archive/ax_build_date_epoch.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_BUILD_DATE_EPOCH(VARIABLE[, FORMAT[, ACTION-IF-FAIL]]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Sets VARIABLE to a string representing the current time. It is 12 | # formatted according to FORMAT if specified, otherwise it is formatted as 13 | # the number of seconds (excluding leap seconds) since the UNIX epoch (01 14 | # Jan 1970 00:00:00 UTC). 15 | # 16 | # If the SOURCE_DATE_EPOCH environment variable is set, it uses the value 17 | # of that variable instead of the current time. See 18 | # https://reproducible-builds.org/specs/source-date-epoch). If 19 | # SOURCE_DATE_EPOCH is set but cannot be properly interpreted as a UNIX 20 | # timestamp, then execute ACTION-IF-FAIL if specified, otherwise error. 21 | # 22 | # VARIABLE is AC_SUBST-ed. 23 | # 24 | # LICENSE 25 | # 26 | # Copyright (c) 2016 Eric Bavier 27 | # 28 | # This program is free software: you can redistribute it and/or modify it 29 | # under the terms of the GNU General Public License as published by the 30 | # Free Software Foundation, either version 3 of the License, or (at your 31 | # option) any later version. 32 | # 33 | # This program is distributed in the hope that it will be useful, but 34 | # WITHOUT ANY WARRANTY; without even the implied warranty of 35 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 36 | # Public License for more details. 37 | # 38 | # You should have received a copy of the GNU General Public License along 39 | # with this program. If not, see . 40 | # 41 | # As a special exception, the respective Autoconf Macro's copyright owner 42 | # gives unlimited permission to copy, distribute and modify the configure 43 | # scripts that are the output of Autoconf when processing the Macro. You 44 | # need not follow the terms of the GNU General Public License when using 45 | # or distributing such scripts, even though portions of the text of the 46 | # Macro appear in them. The GNU General Public License (GPL) does govern 47 | # all other use of the material that constitutes the Autoconf Macro. 48 | # 49 | # This special exception to the GPL applies to versions of the Autoconf 50 | # Macro released by the Autoconf Archive. When you make and distribute a 51 | # modified version of the Autoconf Macro, you may extend this special 52 | # exception to the GPL to apply to your modified version as well. 53 | 54 | #serial 2 55 | 56 | AC_DEFUN([AX_BUILD_DATE_EPOCH], 57 | [dnl 58 | AC_MSG_CHECKING([for build time]) 59 | ax_date_fmt="m4_default($2,%s)" 60 | AS_IF([test x"$SOURCE_DATE_EPOCH" = x], 61 | [$1=`date "+$ax_date_fmt"`], 62 | [ax_build_date=`date -u -d "@$SOURCE_DATE_EPOCH" "+$ax_date_fmt" 2>/dev/null \ 63 | || date -u -r "$SOURCE_DATE_EPOCH" "+$ax_date_fmt" 2>/dev/null` 64 | AS_IF([test x"$ax_build_date" = x], 65 | [m4_ifval([$3], 66 | [$3], 67 | [AC_MSG_ERROR([malformed SOURCE_DATE_EPOCH])])], 68 | [$1=$ax_build_date])]) 69 | AC_MSG_RESULT([$$1]) 70 | ])dnl AX_BUILD_DATE_EPOCH 71 | -------------------------------------------------------------------------------- /build-aux/ax_check_compile_flag.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check whether the given FLAG works with the current language's compiler 12 | # or gives an error. (Warnings, however, are ignored) 13 | # 14 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 15 | # success/failure. 16 | # 17 | # If EXTRA-FLAGS is defined, it is added to the current language's default 18 | # flags (e.g. CFLAGS) when the check is done. The check is thus made with 19 | # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to 20 | # force the compiler to issue an error when a bad flag is given. 21 | # 22 | # INPUT gives an alternative input source to AC_COMPILE_IFELSE. 23 | # 24 | # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this 25 | # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. 26 | # 27 | # LICENSE 28 | # 29 | # Copyright (c) 2008 Guido U. Draheim 30 | # Copyright (c) 2011 Maarten Bosmans 31 | # 32 | # Copying and distribution of this file, with or without modification, are 33 | # permitted in any medium without royalty provided the copyright notice 34 | # and this notice are preserved. This file is offered as-is, without any 35 | # warranty. 36 | 37 | #serial 6 38 | 39 | AC_DEFUN([AX_CHECK_COMPILE_FLAG], 40 | [AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF 41 | AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl 42 | AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ 43 | ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS 44 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" 45 | AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])], 46 | [AS_VAR_SET(CACHEVAR,[yes])], 47 | [AS_VAR_SET(CACHEVAR,[no])]) 48 | _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) 49 | AS_VAR_IF(CACHEVAR,yes, 50 | [m4_default([$2], :)], 51 | [m4_default([$3], :)]) 52 | AS_VAR_POPDEF([CACHEVAR])dnl 53 | ])dnl AX_CHECK_COMPILE_FLAGS 54 | -------------------------------------------------------------------------------- /build-aux/ax_progvar.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_PROGVAR], [ 2 | test -n "$m4_toupper($1)" || { AC_PATH_PROG(m4_toupper($1), m4_default($2,$1)) } 3 | test -n "$m4_toupper($1)" || AC_MSG_ERROR([m4_default($2,$1) is required]) 4 | ]) 5 | 6 | -------------------------------------------------------------------------------- /build-aux/ax_pthread.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # https://www.gnu.org/software/autoconf-archive/ax_pthread.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # This macro figures out how to build C programs using POSIX threads. It 12 | # sets the PTHREAD_LIBS output variable to the threads library and linker 13 | # flags, and the PTHREAD_CFLAGS output variable to any special C compiler 14 | # flags that are needed. (The user can also force certain compiler 15 | # flags/libs to be tested by setting these environment variables.) 16 | # 17 | # Also sets PTHREAD_CC and PTHREAD_CXX to any special C compiler that is 18 | # needed for multi-threaded programs (defaults to the value of CC 19 | # respectively CXX otherwise). (This is necessary on e.g. AIX to use the 20 | # special cc_r/CC_r compiler alias.) 21 | # 22 | # NOTE: You are assumed to not only compile your program with these flags, 23 | # but also to link with them as well. For example, you might link with 24 | # $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS 25 | # $PTHREAD_CXX $CXXFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS 26 | # 27 | # If you are only building threaded programs, you may wish to use these 28 | # variables in your default LIBS, CFLAGS, and CC: 29 | # 30 | # LIBS="$PTHREAD_LIBS $LIBS" 31 | # CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 32 | # CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS" 33 | # CC="$PTHREAD_CC" 34 | # CXX="$PTHREAD_CXX" 35 | # 36 | # In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant 37 | # has a nonstandard name, this macro defines PTHREAD_CREATE_JOINABLE to 38 | # that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). 39 | # 40 | # Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the 41 | # PTHREAD_PRIO_INHERIT symbol is defined when compiling with 42 | # PTHREAD_CFLAGS. 43 | # 44 | # ACTION-IF-FOUND is a list of shell commands to run if a threads library 45 | # is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it 46 | # is not found. If ACTION-IF-FOUND is not specified, the default action 47 | # will define HAVE_PTHREAD. 48 | # 49 | # Please let the authors know if this macro fails on any platform, or if 50 | # you have any other suggestions or comments. This macro was based on work 51 | # by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help 52 | # from M. Frigo), as well as ac_pthread and hb_pthread macros posted by 53 | # Alejandro Forero Cuervo to the autoconf macro repository. We are also 54 | # grateful for the helpful feedback of numerous users. 55 | # 56 | # Updated for Autoconf 2.68 by Daniel Richard G. 57 | # 58 | # LICENSE 59 | # 60 | # Copyright (c) 2008 Steven G. Johnson 61 | # Copyright (c) 2011 Daniel Richard G. 62 | # Copyright (c) 2019 Marc Stevens 63 | # 64 | # This program is free software: you can redistribute it and/or modify it 65 | # under the terms of the GNU General Public License as published by the 66 | # Free Software Foundation, either version 3 of the License, or (at your 67 | # option) any later version. 68 | # 69 | # This program is distributed in the hope that it will be useful, but 70 | # WITHOUT ANY WARRANTY; without even the implied warranty of 71 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 72 | # Public License for more details. 73 | # 74 | # You should have received a copy of the GNU General Public License along 75 | # with this program. If not, see . 76 | # 77 | # As a special exception, the respective Autoconf Macro's copyright owner 78 | # gives unlimited permission to copy, distribute and modify the configure 79 | # scripts that are the output of Autoconf when processing the Macro. You 80 | # need not follow the terms of the GNU General Public License when using 81 | # or distributing such scripts, even though portions of the text of the 82 | # Macro appear in them. The GNU General Public License (GPL) does govern 83 | # all other use of the material that constitutes the Autoconf Macro. 84 | # 85 | # This special exception to the GPL applies to versions of the Autoconf 86 | # Macro released by the Autoconf Archive. When you make and distribute a 87 | # modified version of the Autoconf Macro, you may extend this special 88 | # exception to the GPL to apply to your modified version as well. 89 | 90 | #serial 31 91 | 92 | AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD]) 93 | AC_DEFUN([AX_PTHREAD], [ 94 | AC_REQUIRE([AC_CANONICAL_HOST]) 95 | AC_REQUIRE([AC_PROG_CC]) 96 | AC_REQUIRE([AC_PROG_SED]) 97 | AC_LANG_PUSH([C]) 98 | ax_pthread_ok=no 99 | 100 | # We used to check for pthread.h first, but this fails if pthread.h 101 | # requires special compiler flags (e.g. on Tru64 or Sequent). 102 | # It gets checked for in the link test anyway. 103 | 104 | # First of all, check if the user has set any of the PTHREAD_LIBS, 105 | # etcetera environment variables, and if threads linking works using 106 | # them: 107 | if test "x$PTHREAD_CFLAGS$PTHREAD_LIBS" != "x"; then 108 | ax_pthread_save_CC="$CC" 109 | ax_pthread_save_CFLAGS="$CFLAGS" 110 | ax_pthread_save_LIBS="$LIBS" 111 | AS_IF([test "x$PTHREAD_CC" != "x"], [CC="$PTHREAD_CC"]) 112 | AS_IF([test "x$PTHREAD_CXX" != "x"], [CXX="$PTHREAD_CXX"]) 113 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 114 | LIBS="$PTHREAD_LIBS $LIBS" 115 | AC_MSG_CHECKING([for pthread_join using $CC $PTHREAD_CFLAGS $PTHREAD_LIBS]) 116 | AC_LINK_IFELSE([AC_LANG_CALL([], [pthread_join])], [ax_pthread_ok=yes]) 117 | AC_MSG_RESULT([$ax_pthread_ok]) 118 | if test "x$ax_pthread_ok" = "xno"; then 119 | PTHREAD_LIBS="" 120 | PTHREAD_CFLAGS="" 121 | fi 122 | CC="$ax_pthread_save_CC" 123 | CFLAGS="$ax_pthread_save_CFLAGS" 124 | LIBS="$ax_pthread_save_LIBS" 125 | fi 126 | 127 | # We must check for the threads library under a number of different 128 | # names; the ordering is very important because some systems 129 | # (e.g. DEC) have both -lpthread and -lpthreads, where one of the 130 | # libraries is broken (non-POSIX). 131 | 132 | # Create a list of thread flags to try. Items with a "," contain both 133 | # C compiler flags (before ",") and linker flags (after ","). Other items 134 | # starting with a "-" are C compiler flags, and remaining items are 135 | # library names, except for "none" which indicates that we try without 136 | # any flags at all, and "pthread-config" which is a program returning 137 | # the flags for the Pth emulation library. 138 | 139 | ax_pthread_flags="pthreads none -Kthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" 140 | 141 | # The ordering *is* (sometimes) important. Some notes on the 142 | # individual items follow: 143 | 144 | # pthreads: AIX (must check this before -lpthread) 145 | # none: in case threads are in libc; should be tried before -Kthread and 146 | # other compiler flags to prevent continual compiler warnings 147 | # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) 148 | # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads), Tru64 149 | # (Note: HP C rejects this with "bad form for `-t' option") 150 | # -pthreads: Solaris/gcc (Note: HP C also rejects) 151 | # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it 152 | # doesn't hurt to check since this sometimes defines pthreads and 153 | # -D_REENTRANT too), HP C (must be checked before -lpthread, which 154 | # is present but should not be used directly; and before -mthreads, 155 | # because the compiler interprets this as "-mt" + "-hreads") 156 | # -mthreads: Mingw32/gcc, Lynx/gcc 157 | # pthread: Linux, etcetera 158 | # --thread-safe: KAI C++ 159 | # pthread-config: use pthread-config program (for GNU Pth library) 160 | 161 | case $host_os in 162 | 163 | freebsd*) 164 | 165 | # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) 166 | # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) 167 | 168 | ax_pthread_flags="-kthread lthread $ax_pthread_flags" 169 | ;; 170 | 171 | hpux*) 172 | 173 | # From the cc(1) man page: "[-mt] Sets various -D flags to enable 174 | # multi-threading and also sets -lpthread." 175 | 176 | ax_pthread_flags="-mt -pthread pthread $ax_pthread_flags" 177 | ;; 178 | 179 | openedition*) 180 | 181 | # IBM z/OS requires a feature-test macro to be defined in order to 182 | # enable POSIX threads at all, so give the user a hint if this is 183 | # not set. (We don't define these ourselves, as they can affect 184 | # other portions of the system API in unpredictable ways.) 185 | 186 | AC_EGREP_CPP([AX_PTHREAD_ZOS_MISSING], 187 | [ 188 | # if !defined(_OPEN_THREADS) && !defined(_UNIX03_THREADS) 189 | AX_PTHREAD_ZOS_MISSING 190 | # endif 191 | ], 192 | [AC_MSG_WARN([IBM z/OS requires -D_OPEN_THREADS or -D_UNIX03_THREADS to enable pthreads support.])]) 193 | ;; 194 | 195 | solaris*) 196 | 197 | # On Solaris (at least, for some versions), libc contains stubbed 198 | # (non-functional) versions of the pthreads routines, so link-based 199 | # tests will erroneously succeed. (N.B.: The stubs are missing 200 | # pthread_cleanup_push, or rather a function called by this macro, 201 | # so we could check for that, but who knows whether they'll stub 202 | # that too in a future libc.) So we'll check first for the 203 | # standard Solaris way of linking pthreads (-mt -lpthread). 204 | 205 | ax_pthread_flags="-mt,-lpthread pthread $ax_pthread_flags" 206 | ;; 207 | esac 208 | 209 | # Are we compiling with Clang? 210 | 211 | AC_CACHE_CHECK([whether $CC is Clang], 212 | [ax_cv_PTHREAD_CLANG], 213 | [ax_cv_PTHREAD_CLANG=no 214 | # Note that Autoconf sets GCC=yes for Clang as well as GCC 215 | if test "x$GCC" = "xyes"; then 216 | AC_EGREP_CPP([AX_PTHREAD_CC_IS_CLANG], 217 | [/* Note: Clang 2.7 lacks __clang_[a-z]+__ */ 218 | # if defined(__clang__) && defined(__llvm__) 219 | AX_PTHREAD_CC_IS_CLANG 220 | # endif 221 | ], 222 | [ax_cv_PTHREAD_CLANG=yes]) 223 | fi 224 | ]) 225 | ax_pthread_clang="$ax_cv_PTHREAD_CLANG" 226 | 227 | 228 | # GCC generally uses -pthread, or -pthreads on some platforms (e.g. SPARC) 229 | 230 | # Note that for GCC and Clang -pthread generally implies -lpthread, 231 | # except when -nostdlib is passed. 232 | # This is problematic using libtool to build C++ shared libraries with pthread: 233 | # [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=25460 234 | # [2] https://bugzilla.redhat.com/show_bug.cgi?id=661333 235 | # [3] https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=468555 236 | # To solve this, first try -pthread together with -lpthread for GCC 237 | 238 | AS_IF([test "x$GCC" = "xyes"], 239 | [ax_pthread_flags="-pthread,-lpthread -pthread -pthreads $ax_pthread_flags"]) 240 | 241 | # Clang takes -pthread (never supported any other flag), but we'll try with -lpthread first 242 | 243 | AS_IF([test "x$ax_pthread_clang" = "xyes"], 244 | [ax_pthread_flags="-pthread,-lpthread -pthread"]) 245 | 246 | 247 | # The presence of a feature test macro requesting re-entrant function 248 | # definitions is, on some systems, a strong hint that pthreads support is 249 | # correctly enabled 250 | 251 | case $host_os in 252 | darwin* | hpux* | linux* | osf* | solaris*) 253 | ax_pthread_check_macro="_REENTRANT" 254 | ;; 255 | 256 | aix*) 257 | ax_pthread_check_macro="_THREAD_SAFE" 258 | ;; 259 | 260 | *) 261 | ax_pthread_check_macro="--" 262 | ;; 263 | esac 264 | AS_IF([test "x$ax_pthread_check_macro" = "x--"], 265 | [ax_pthread_check_cond=0], 266 | [ax_pthread_check_cond="!defined($ax_pthread_check_macro)"]) 267 | 268 | 269 | if test "x$ax_pthread_ok" = "xno"; then 270 | for ax_pthread_try_flag in $ax_pthread_flags; do 271 | 272 | case $ax_pthread_try_flag in 273 | none) 274 | AC_MSG_CHECKING([whether pthreads work without any flags]) 275 | ;; 276 | 277 | *,*) 278 | PTHREAD_CFLAGS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\1/"` 279 | PTHREAD_LIBS=`echo $ax_pthread_try_flag | sed "s/^\(.*\),\(.*\)$/\2/"` 280 | AC_MSG_CHECKING([whether pthreads work with "$PTHREAD_CFLAGS" and "$PTHREAD_LIBS"]) 281 | ;; 282 | 283 | -*) 284 | AC_MSG_CHECKING([whether pthreads work with $ax_pthread_try_flag]) 285 | PTHREAD_CFLAGS="$ax_pthread_try_flag" 286 | ;; 287 | 288 | pthread-config) 289 | AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no]) 290 | AS_IF([test "x$ax_pthread_config" = "xno"], [continue]) 291 | PTHREAD_CFLAGS="`pthread-config --cflags`" 292 | PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" 293 | ;; 294 | 295 | *) 296 | AC_MSG_CHECKING([for the pthreads library -l$ax_pthread_try_flag]) 297 | PTHREAD_LIBS="-l$ax_pthread_try_flag" 298 | ;; 299 | esac 300 | 301 | ax_pthread_save_CFLAGS="$CFLAGS" 302 | ax_pthread_save_LIBS="$LIBS" 303 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 304 | LIBS="$PTHREAD_LIBS $LIBS" 305 | 306 | # Check for various functions. We must include pthread.h, 307 | # since some functions may be macros. (On the Sequent, we 308 | # need a special flag -Kthread to make this header compile.) 309 | # We check for pthread_join because it is in -lpthread on IRIX 310 | # while pthread_create is in libc. We check for pthread_attr_init 311 | # due to DEC craziness with -lpthreads. We check for 312 | # pthread_cleanup_push because it is one of the few pthread 313 | # functions on Solaris that doesn't have a non-functional libc stub. 314 | # We try pthread_create on general principles. 315 | 316 | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include 317 | # if $ax_pthread_check_cond 318 | # error "$ax_pthread_check_macro must be defined" 319 | # endif 320 | static void *some_global = NULL; 321 | static void routine(void *a) 322 | { 323 | /* To avoid any unused-parameter or 324 | unused-but-set-parameter warning. */ 325 | some_global = a; 326 | } 327 | static void *start_routine(void *a) { return a; }], 328 | [pthread_t th; pthread_attr_t attr; 329 | pthread_create(&th, 0, start_routine, 0); 330 | pthread_join(th, 0); 331 | pthread_attr_init(&attr); 332 | pthread_cleanup_push(routine, 0); 333 | pthread_cleanup_pop(0) /* ; */])], 334 | [ax_pthread_ok=yes], 335 | []) 336 | 337 | CFLAGS="$ax_pthread_save_CFLAGS" 338 | LIBS="$ax_pthread_save_LIBS" 339 | 340 | AC_MSG_RESULT([$ax_pthread_ok]) 341 | AS_IF([test "x$ax_pthread_ok" = "xyes"], [break]) 342 | 343 | PTHREAD_LIBS="" 344 | PTHREAD_CFLAGS="" 345 | done 346 | fi 347 | 348 | 349 | # Clang needs special handling, because older versions handle the -pthread 350 | # option in a rather... idiosyncratic way 351 | 352 | if test "x$ax_pthread_clang" = "xyes"; then 353 | 354 | # Clang takes -pthread; it has never supported any other flag 355 | 356 | # (Note 1: This will need to be revisited if a system that Clang 357 | # supports has POSIX threads in a separate library. This tends not 358 | # to be the way of modern systems, but it's conceivable.) 359 | 360 | # (Note 2: On some systems, notably Darwin, -pthread is not needed 361 | # to get POSIX threads support; the API is always present and 362 | # active. We could reasonably leave PTHREAD_CFLAGS empty. But 363 | # -pthread does define _REENTRANT, and while the Darwin headers 364 | # ignore this macro, third-party headers might not.) 365 | 366 | # However, older versions of Clang make a point of warning the user 367 | # that, in an invocation where only linking and no compilation is 368 | # taking place, the -pthread option has no effect ("argument unused 369 | # during compilation"). They expect -pthread to be passed in only 370 | # when source code is being compiled. 371 | # 372 | # Problem is, this is at odds with the way Automake and most other 373 | # C build frameworks function, which is that the same flags used in 374 | # compilation (CFLAGS) are also used in linking. Many systems 375 | # supported by AX_PTHREAD require exactly this for POSIX threads 376 | # support, and in fact it is often not straightforward to specify a 377 | # flag that is used only in the compilation phase and not in 378 | # linking. Such a scenario is extremely rare in practice. 379 | # 380 | # Even though use of the -pthread flag in linking would only print 381 | # a warning, this can be a nuisance for well-run software projects 382 | # that build with -Werror. So if the active version of Clang has 383 | # this misfeature, we search for an option to squash it. 384 | 385 | AC_CACHE_CHECK([whether Clang needs flag to prevent "argument unused" warning when linking with -pthread], 386 | [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG], 387 | [ax_cv_PTHREAD_CLANG_NO_WARN_FLAG=unknown 388 | # Create an alternate version of $ac_link that compiles and 389 | # links in two steps (.c -> .o, .o -> exe) instead of one 390 | # (.c -> exe), because the warning occurs only in the second 391 | # step 392 | ax_pthread_save_ac_link="$ac_link" 393 | ax_pthread_sed='s/conftest\.\$ac_ext/conftest.$ac_objext/g' 394 | ax_pthread_link_step=`AS_ECHO(["$ac_link"]) | sed "$ax_pthread_sed"` 395 | ax_pthread_2step_ac_link="($ac_compile) && (echo ==== >&5) && ($ax_pthread_link_step)" 396 | ax_pthread_save_CFLAGS="$CFLAGS" 397 | for ax_pthread_try in '' -Qunused-arguments -Wno-unused-command-line-argument unknown; do 398 | AS_IF([test "x$ax_pthread_try" = "xunknown"], [break]) 399 | CFLAGS="-Werror -Wunknown-warning-option $ax_pthread_try -pthread $ax_pthread_save_CFLAGS" 400 | ac_link="$ax_pthread_save_ac_link" 401 | AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], 402 | [ac_link="$ax_pthread_2step_ac_link" 403 | AC_LINK_IFELSE([AC_LANG_SOURCE([[int main(void){return 0;}]])], 404 | [break]) 405 | ]) 406 | done 407 | ac_link="$ax_pthread_save_ac_link" 408 | CFLAGS="$ax_pthread_save_CFLAGS" 409 | AS_IF([test "x$ax_pthread_try" = "x"], [ax_pthread_try=no]) 410 | ax_cv_PTHREAD_CLANG_NO_WARN_FLAG="$ax_pthread_try" 411 | ]) 412 | 413 | case "$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG" in 414 | no | unknown) ;; 415 | *) PTHREAD_CFLAGS="$ax_cv_PTHREAD_CLANG_NO_WARN_FLAG $PTHREAD_CFLAGS" ;; 416 | esac 417 | 418 | fi # $ax_pthread_clang = yes 419 | 420 | 421 | 422 | # Various other checks: 423 | if test "x$ax_pthread_ok" = "xyes"; then 424 | ax_pthread_save_CFLAGS="$CFLAGS" 425 | ax_pthread_save_LIBS="$LIBS" 426 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 427 | LIBS="$PTHREAD_LIBS $LIBS" 428 | 429 | # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. 430 | AC_CACHE_CHECK([for joinable pthread attribute], 431 | [ax_cv_PTHREAD_JOINABLE_ATTR], 432 | [ax_cv_PTHREAD_JOINABLE_ATTR=unknown 433 | for ax_pthread_attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do 434 | AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], 435 | [int attr = $ax_pthread_attr; return attr /* ; */])], 436 | [ax_cv_PTHREAD_JOINABLE_ATTR=$ax_pthread_attr; break], 437 | []) 438 | done 439 | ]) 440 | AS_IF([test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xunknown" && \ 441 | test "x$ax_cv_PTHREAD_JOINABLE_ATTR" != "xPTHREAD_CREATE_JOINABLE" && \ 442 | test "x$ax_pthread_joinable_attr_defined" != "xyes"], 443 | [AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], 444 | [$ax_cv_PTHREAD_JOINABLE_ATTR], 445 | [Define to necessary symbol if this constant 446 | uses a non-standard name on your system.]) 447 | ax_pthread_joinable_attr_defined=yes 448 | ]) 449 | 450 | AC_CACHE_CHECK([whether more special flags are required for pthreads], 451 | [ax_cv_PTHREAD_SPECIAL_FLAGS], 452 | [ax_cv_PTHREAD_SPECIAL_FLAGS=no 453 | case $host_os in 454 | solaris*) 455 | ax_cv_PTHREAD_SPECIAL_FLAGS="-D_POSIX_PTHREAD_SEMANTICS" 456 | ;; 457 | esac 458 | ]) 459 | AS_IF([test "x$ax_cv_PTHREAD_SPECIAL_FLAGS" != "xno" && \ 460 | test "x$ax_pthread_special_flags_added" != "xyes"], 461 | [PTHREAD_CFLAGS="$ax_cv_PTHREAD_SPECIAL_FLAGS $PTHREAD_CFLAGS" 462 | ax_pthread_special_flags_added=yes]) 463 | 464 | AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT], 465 | [ax_cv_PTHREAD_PRIO_INHERIT], 466 | [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], 467 | [[int i = PTHREAD_PRIO_INHERIT; 468 | return i;]])], 469 | [ax_cv_PTHREAD_PRIO_INHERIT=yes], 470 | [ax_cv_PTHREAD_PRIO_INHERIT=no]) 471 | ]) 472 | AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes" && \ 473 | test "x$ax_pthread_prio_inherit_defined" != "xyes"], 474 | [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.]) 475 | ax_pthread_prio_inherit_defined=yes 476 | ]) 477 | 478 | CFLAGS="$ax_pthread_save_CFLAGS" 479 | LIBS="$ax_pthread_save_LIBS" 480 | 481 | # More AIX lossage: compile with *_r variant 482 | if test "x$GCC" != "xyes"; then 483 | case $host_os in 484 | aix*) 485 | AS_CASE(["x/$CC"], 486 | [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6], 487 | [#handle absolute path differently from PATH based program lookup 488 | AS_CASE(["x$CC"], 489 | [x/*], 490 | [ 491 | AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"]) 492 | AS_IF([test "x${CXX}" != "x"], [AS_IF([AS_EXECUTABLE_P([${CXX}_r])],[PTHREAD_CXX="${CXX}_r"])]) 493 | ], 494 | [ 495 | AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC]) 496 | AS_IF([test "x${CXX}" != "x"], [AC_CHECK_PROGS([PTHREAD_CXX],[${CXX}_r],[$CXX])]) 497 | ] 498 | ) 499 | ]) 500 | ;; 501 | esac 502 | fi 503 | fi 504 | 505 | test -n "$PTHREAD_CC" || PTHREAD_CC="$CC" 506 | test -n "$PTHREAD_CXX" || PTHREAD_CXX="$CXX" 507 | 508 | AC_SUBST([PTHREAD_LIBS]) 509 | AC_SUBST([PTHREAD_CFLAGS]) 510 | AC_SUBST([PTHREAD_CC]) 511 | AC_SUBST([PTHREAD_CXX]) 512 | 513 | # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: 514 | if test "x$ax_pthread_ok" = "xyes"; then 515 | ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1]) 516 | : 517 | else 518 | ax_pthread_ok=no 519 | $2 520 | fi 521 | AC_LANG_POP 522 | ])dnl AX_PTHREAD 523 | -------------------------------------------------------------------------------- /build-aux/ax_subst_man_date.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_SUBST_MAN_DATE], [ 2 | ax_date_fmt="m4_default($1,%d %B %Y)" 3 | ax_src_file="m4_default($2,*.1.in)" 4 | AS_IF([test ! -e .gitignore], 5 | [ 6 | AX_PROGVAR([date]) 7 | AX_BUILD_DATE_EPOCH(MAN_DATE, "$ax_date_fmt") 8 | ], [ 9 | AX_PROGVAR([git]) 10 | MAN_DATE=$($GIT log -1 --format="%cd" --date=format:"$ax_date_fmt" -- $ax_src_file) 11 | ]) 12 | AC_SUBST([MAN_DATE]) 13 | ]) 14 | -------------------------------------------------------------------------------- /build-aux/ax_subst_transformed_package_name.m4: -------------------------------------------------------------------------------- 1 | AC_DEFUN([AX_SUBST_TRANSFORMED_PACKAGE_NAME], [ 2 | AC_PROG_SED 3 | TRANSFORMED_PACKAGE_NAME="$(printf "$PACKAGE_NAME" | $SED -e "$(printf "$program_transform_name" | $SED -e 's/\$\$/\$/')")" 4 | AC_SUBST([TRANSFORMED_PACKAGE_NAME]) 5 | ]) 6 | -------------------------------------------------------------------------------- /build-aux/git-version-gen: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # Print a version string. 4 | scriptversion=2012-03-18.17; # UTC 5 | 6 | # Copyright (C) 2007-2012 Free Software Foundation, Inc. 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation; either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with this program. If not, see . 20 | 21 | # This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. 22 | # It may be run two ways: 23 | # - from a git repository in which the "git describe" command below 24 | # produces useful output (thus requiring at least one signed tag) 25 | # - from a non-git-repo directory containing a .tarball-version file, which 26 | # presumes this script is invoked like "./git-version-gen .tarball-version". 27 | 28 | # In order to use intra-version strings in your project, you will need two 29 | # separate generated version string files: 30 | # 31 | # .tarball-version - present only in a distribution tarball, and not in 32 | # a checked-out repository. Created with contents that were learned at 33 | # the last time autoconf was run, and used by git-version-gen. Must not 34 | # be present in either $(srcdir) or $(builddir) for git-version-gen to 35 | # give accurate answers during normal development with a checked out tree, 36 | # but must be present in a tarball when there is no version control system. 37 | # Therefore, it cannot be used in any dependencies. GNUmakefile has 38 | # hooks to force a reconfigure at distribution time to get the value 39 | # correct, without penalizing normal development with extra reconfigures. 40 | # 41 | # .version - present in a checked-out repository and in a distribution 42 | # tarball. Usable in dependencies, particularly for files that don't 43 | # want to depend on config.h but do want to track version changes. 44 | # Delete this file prior to any autoconf run where you want to rebuild 45 | # files to pick up a version string change; and leave it stale to 46 | # minimize rebuild time after unrelated changes to configure sources. 47 | # 48 | # As with any generated file in a VC'd directory, you should add 49 | # /.version to .gitignore, so that you don't accidentally commit it. 50 | # .tarball-version is never generated in a VC'd directory, so needn't 51 | # be listed there. 52 | # 53 | # Use the following line in your configure.ac, so that $(VERSION) will 54 | # automatically be up-to-date each time configure is run (and note that 55 | # since configure.ac no longer includes a version string, Makefile rules 56 | # should not depend on configure.ac for version updates). 57 | # 58 | # AC_INIT([GNU project], 59 | # m4_esyscmd([build-aux/git-version-gen .tarball-version]), 60 | # [bug-project@example]) 61 | # 62 | # Then use the following lines in your Makefile.am, so that .version 63 | # will be present for dependencies, and so that .version and 64 | # .tarball-version will exist in distribution tarballs. 65 | # 66 | # EXTRA_DIST = $(top_srcdir)/.version 67 | # BUILT_SOURCES = $(top_srcdir)/.version 68 | # $(top_srcdir)/.version: 69 | # echo $(VERSION) > $@-t && mv $@-t $@ 70 | # dist-hook: 71 | # echo $(VERSION) > $(distdir)/.tarball-version 72 | 73 | 74 | me=$0 75 | 76 | version="git-version-gen $scriptversion 77 | 78 | Copyright 2011 Free Software Foundation, Inc. 79 | There is NO warranty. You may redistribute this software 80 | under the terms of the GNU General Public License. 81 | For more information about these matters, see the files named COPYING." 82 | 83 | usage="\ 84 | Usage: $me [OPTION]... \$srcdir/.tarball-version [TAG-NORMALIZATION-SED-SCRIPT] 85 | Print a version string. 86 | 87 | Options: 88 | 89 | --prefix prefix of git tags 90 | 91 | --help display this help and exit 92 | --version output version information and exit 93 | 94 | Running without arguments will suffice in most cases." 95 | 96 | prefix= 97 | 98 | while test $# -gt 0; do 99 | case $1 in 100 | --help) echo "$usage"; exit 0;; 101 | --version) echo "$version"; exit 0;; 102 | --prefix) shift; prefix="$1";; 103 | -*) 104 | echo "$0: Unknown option '$1'." >&2 105 | echo "$0: Try '--help' for more information." >&2 106 | exit 1;; 107 | *) 108 | if test -z "$tarball_version_file"; then 109 | tarball_version_file="$1" 110 | elif test -z "$tag_sed_script"; then 111 | tag_sed_script="$1" 112 | else 113 | echo "$0: extra non-option argument '$1'." >&2 114 | exit 1 115 | fi;; 116 | esac 117 | shift 118 | done 119 | 120 | if test -z "$tarball_version_file"; then 121 | echo "$usage" 122 | exit 1 123 | fi 124 | 125 | tag_sed_script="${tag_sed_script:-s/x/x/}" 126 | 127 | nl=' 128 | ' 129 | 130 | # Avoid meddling by environment variable of the same name. 131 | v= 132 | v_from_git= 133 | 134 | # First see if there is a tarball-only version file. 135 | # then try "git describe", then default. 136 | if test -f $tarball_version_file 137 | then 138 | v=`cat $tarball_version_file` || v= 139 | case $v in 140 | *$nl*) v= ;; # reject multi-line output 141 | [0-9]*) ;; 142 | *) v= ;; 143 | esac 144 | test -z "$v" \ 145 | && echo "$0: WARNING: $tarball_version_file is missing or damaged" 1>&2 146 | fi 147 | 148 | if test -n "$v" 149 | then 150 | : # use $v 151 | # Otherwise, if there is at least one git commit involving the working 152 | # directory, and "git describe" output looks sensible, use that to 153 | # derive a version string. 154 | elif test "`git log -1 --pretty=format:x . 2>/dev/null`" = x \ 155 | && v=`git describe --tags --abbrev=7 --match="$prefix*" HEAD 2>/dev/null \ 156 | || git describe --tags --abbrev=7 HEAD 2>/dev/null \ 157 | || git log -1 --pretty=format:'v0-HEAD-%h' 2>/dev/null` \ 158 | && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ 159 | && case $v in 160 | $prefix[0-9]*) ;; 161 | *) (exit 1) ;; 162 | esac 163 | then 164 | # Is this a new git that lists number of commits since the last 165 | # tag or the previous older version that did not? 166 | # Newer: v6.10-77-g0f8faeb 167 | # Older: v6.10-g0f8faeb 168 | case $v in 169 | *-*-*) : git describe is okay three part flavor ;; 170 | *-*) 171 | : git describe is older two part flavor 172 | # Recreate the number of commits and rewrite such that the 173 | # result is the same as if we were using the newer version 174 | # of git describe. 175 | vtag=`echo "$v" | sed 's/-.*//'` 176 | commit_list=`git rev-list "$vtag"..HEAD 2>/dev/null` \ 177 | || { commit_list=failed; 178 | echo "$0: WARNING: git rev-list failed" 1>&2; } 179 | numcommits=`echo "$commit_list" | wc -l` 180 | v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; 181 | test "$commit_list" = failed && v=UNKNOWN 182 | ;; 183 | esac 184 | 185 | v=`echo "$v" | sed 's/-/.r/'`; 186 | v_from_git=1 187 | else 188 | v=UNKNOWN 189 | fi 190 | 191 | v=`echo "$v" |sed "s/^$prefix//"` 192 | 193 | # Test whether to append the "-dirty" suffix only if the version 194 | # string we're using came from git. I.e., skip the test if it's "UNKNOWN" 195 | # or if it came from .tarball-version. 196 | if test -n "$v_from_git"; then 197 | # Don't declare a version "dirty" merely because a time stamp has changed. 198 | git update-index --refresh > /dev/null 2>&1 199 | 200 | dirty=`exec 2>/dev/null;git diff-index --name-only HEAD` || dirty= 201 | case "$dirty" in 202 | '') ;; 203 | *) # Append the suffix only if there isn't one already. 204 | case $v in 205 | *-dirty) ;; 206 | *) v="$v-dirty" ;; 207 | esac ;; 208 | esac 209 | fi 210 | 211 | # Omit the trailing newline, so that m4_esyscmd can use the result directly. 212 | echo "$v" | tr -d "$nl" 213 | 214 | # Local variables: 215 | # eval: (add-hook 'write-file-hooks 'time-stamp) 216 | # time-stamp-start: "scriptversion=" 217 | # time-stamp-format: "%:y-%02m-%02d.%02H" 218 | # time-stamp-time-zone: "UTC" 219 | # time-stamp-end: "; # UTC" 220 | # End: 221 | -------------------------------------------------------------------------------- /bunzip3: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | exec bzip3 -d "$@" 4 | -------------------------------------------------------------------------------- /bunzip3.1: -------------------------------------------------------------------------------- 1 | .so man1/bzip3.1 -------------------------------------------------------------------------------- /bz3cat: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | exec bzip3 -Bcd "$@" 4 | -------------------------------------------------------------------------------- /bz3cat.1: -------------------------------------------------------------------------------- 1 | .so man1/bzip3.1 -------------------------------------------------------------------------------- /bz3grep: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | # 3 | # Copyright (c) 2003 Thomas Klausner. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions 7 | # are met: 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 2. Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 | # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 | # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 | # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 | # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | # 25 | # Adapted for bz3cat. 26 | 27 | grep=${GREP:-grep} 28 | zcat=${ZCAT:-bz3cat} 29 | 30 | endofopts=0 31 | pattern_found=0 32 | grep_args="" 33 | hyphen=0 34 | silent=0 35 | 36 | prog=bz3grep 37 | 38 | # skip all options and pass them on to grep taking care of options 39 | # with arguments, and if -e was supplied 40 | 41 | while [ "$#" -gt 0 ] && [ "${endofopts}" -eq 0 ]; do 42 | case "$1" in 43 | # from GNU grep-2.5.1 -- keep in sync! 44 | -[ABCDXdefm]) 45 | if [ "$#" -lt 2 ]; then 46 | printf '%s: missing argument for %s flag\n' "${prog}" "$1" >&2 47 | exit 1 48 | fi 49 | case "$1" in 50 | -e) 51 | pattern="$2" 52 | pattern_found=1 53 | shift 2 54 | break 55 | ;; 56 | -f) 57 | pattern_found=2 58 | ;; 59 | *) 60 | ;; 61 | esac 62 | grep_args="${grep_args} $1 $2" 63 | shift 2 64 | ;; 65 | --) 66 | shift 67 | endofopts=1 68 | ;; 69 | -) 70 | hyphen=1 71 | shift 72 | ;; 73 | -h) 74 | silent=1 75 | shift 76 | ;; 77 | -*) 78 | grep_args="${grep_args} $1" 79 | shift 80 | ;; 81 | *) 82 | # pattern to grep for 83 | endofopts=1 84 | ;; 85 | esac 86 | done 87 | 88 | # if no -e option was found, take next argument as grep-pattern 89 | if [ "${pattern_found}" -lt 1 ]; then 90 | if [ "$#" -ge 1 ]; then 91 | pattern="$1" 92 | shift 93 | elif [ "${hyphen}" -gt 0 ]; then 94 | pattern="-" 95 | else 96 | printf '%s: missing pattern\n' "${prog}" >&2 97 | exit 1 98 | fi 99 | fi 100 | 101 | EXIT_CODE=0 102 | # call grep ... 103 | if [ "$#" -lt 1 ]; then 104 | # ... on stdin 105 | set -f # Disable file name generation (globbing). 106 | # shellcheck disable=SC2086 107 | "${zcat}" - | "${grep}" ${grep_args} -- "${pattern}" - 108 | EXIT_CODE=$? 109 | set +f 110 | else 111 | # ... on all files given on the command line 112 | if [ "${silent}" -lt 1 ] && [ "$#" -gt 1 ]; then 113 | grep_args="-H ${grep_args}" 114 | fi 115 | set -f 116 | while [ "$#" -gt 0 ]; do 117 | # shellcheck disable=SC2086 118 | if [ $pattern_found -eq 2 ]; then 119 | "${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- - 120 | else 121 | "${zcat}" -- "$1" | "${grep}" --label="${1}" ${grep_args} -- "${pattern}" - 122 | fi 123 | [ "$?" -ne 0 ] && EXIT_CODE=1 124 | shift 125 | done 126 | set +f 127 | fi 128 | 129 | exit "${EXIT_CODE}" 130 | -------------------------------------------------------------------------------- /bz3grep.1.in: -------------------------------------------------------------------------------- 1 | .TH bz3grep 1 "@MAN_DATE@" "version v@VERSION@" 2 | . 3 | .SH "NAME" 4 | \fBbz3grep\fR \- print lines matching a pattern in bzip3\-compressed files 5 | . 6 | .SH "SYNOPSIS" 7 | \fBbz3grep\fR [\fIgrep\-flags\fR] [\-\-] \fIpattern\fR [\fIfiles\fR \.\.\.] 8 | . 9 | .SH "DESCRIPTION" 10 | \fBbz3grep\fR runs \fBgrep(1)\fR on files, or \fBstdin\fR if no files argument is given, after decompressing them with \fBbz3cat(1)\fR\. 11 | . 12 | .P 13 | The grep\-flags and pattern arguments are passed on to \fBgrep(1)\fR\. If an \fB\-e\fR flag is found in the \fBgrep\-flags\fR, \fBbz3grep\fR will not look for a pattern argument\. 14 | . 15 | .SH "EXIT STATUS" 16 | In case of missing arguments or missing pattern, 1 will be returned, otherwise 0\. 17 | . 18 | .SH "SEE ALSO" 19 | \fBbzip3(1)\fR 20 | . 21 | .SH "AUTHORS" 22 | Thomas Klausner \fIwiz@NetBSD\.org\fR 23 | -------------------------------------------------------------------------------- /bz3less: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | bz3cat "$@" | less 3 | -------------------------------------------------------------------------------- /bz3less.1.in: -------------------------------------------------------------------------------- 1 | .TH bz3less 1 "@MAN_DATE@" "version v@VERSION@" 2 | . 3 | .SH "NAME" 4 | \fBbz3less\fR \- view bzip3\-compressed files 5 | 6 | .SH "SYNOPSIS" 7 | \fBbz3less\fR [\fIflags\fR] [\fIfile\fR \.\.\.] 8 | 9 | .SH "DESCRIPTION" 10 | \fBbz3less\fR runs \fBless(1)\fR on files or stdin, 11 | if no files argument is given, after decompressing them 12 | with \fBbz3cat(1)\fR\. 13 | 14 | .SH "SEE ALSO" 15 | \fBbzip3(1)\fR, \fBless(1)\fR 16 | -------------------------------------------------------------------------------- /bz3more: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | for FILE 4 | do 5 | test $# -lt 2 || 6 | printf '::::::::::::::\n%s\n::::::::::::::\n' "$FILE" || break 7 | bz3cat -- "$FILE" 8 | done 2>&1 | eval ${PAGER-more} 9 | -------------------------------------------------------------------------------- /bz3more.1.in: -------------------------------------------------------------------------------- 1 | .TH bz3more 1 "@MAN_DATE@" "version v@VERSION@" 2 | . 3 | .SH "NAME" 4 | \fBbz3more\fR \- view bzip3\-compressed files 5 | 6 | .SH "SYNOPSIS" 7 | \fBbz3more\fR [\fIflags\fR] [\fIfile\fR \.\.\.] 8 | 9 | .SH "DESCRIPTION" 10 | \fBbz3more\fR runs \fBmore(1)\fR on files or stdin, 11 | if no files argument is given, after decompressing them 12 | with \fBbz3cat(1)\fR\. 13 | 14 | .SH "SEE ALSO" 15 | \fBbzip3(1)\fR, \fBmore(1)\fR 16 | -------------------------------------------------------------------------------- /bz3most: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | bz3cat "$@" | most 3 | -------------------------------------------------------------------------------- /bz3most.1.in: -------------------------------------------------------------------------------- 1 | .TH bz3most 1 "@MAN_DATE@" "version v@VERSION@" 2 | . 3 | .SH "NAME" 4 | \fBbz3most\fR \- view bzip3\-compressed files 5 | 6 | .SH "SYNOPSIS" 7 | \fBbz3most\fR [\fIflags\fR] [\fIfile\fR \.\.\.] 8 | 9 | .SH "DESCRIPTION" 10 | \fBbz3most\fR runs \fBmost(1)\fR on files or stdin, 11 | if no files argument is given, after decompressing them 12 | with \fBbz3cat(1)\fR\. 13 | 14 | .SH "SEE ALSO" 15 | \fBbzip3(1)\fR, \fBmost(1)\fR 16 | -------------------------------------------------------------------------------- /bzip3.1.in: -------------------------------------------------------------------------------- 1 | .TH @TRANSFORMED_PACKAGE_NAME@ 1 "@MAN_DATE@" "version v@VERSION@" 2 | . 3 | .SH NAME 4 | @TRANSFORMED_PACKAGE_NAME@ \- an efficient statistical file compressor and spiritual successor 5 | to bzip2 6 | 7 | .SH SYNOPSIS 8 | .B @TRANSFORMED_PACKAGE_NAME@ 9 | .RB [ " \-BbcdehftV " ] 10 | [ 11 | .I "filenames \&..." 12 | ] 13 | . 14 | .P 15 | .B bz3cat 16 | is equivalent to 17 | .B @TRANSFORMED_PACKAGE_NAME@ \-dc 18 | .P 19 | .B bunzip3 20 | is equivalent to 21 | .B @TRANSFORMED_PACKAGE_NAME@ \-d 22 | .SH DESCRIPTION 23 | Compress or decompress a file using run length encoding and Lempel Ziv 24 | prediction, followed by the Burrows-Wheeler transform and arithmetic 25 | coding. 26 | .B @TRANSFORMED_PACKAGE_NAME@, 27 | like its ancestor 28 | .B bzip2, 29 | excels at compressing text or source code. 30 | 31 | The command-line options are deliberately very similar to 32 | those of 33 | .B bzip2, 34 | but they are not identical. 35 | .PP 36 | .B @TRANSFORMED_PACKAGE_NAME@ 37 | expects at most two filenames intertwined with flags. 38 | .B @TRANSFORMED_PACKAGE_NAME@ 39 | will by default not overwrite existing files. 40 | If this behaviour is intended, use the \-f flag. 41 | 42 | If no file names are specified, 43 | .B @TRANSFORMED_PACKAGE_NAME@ 44 | will compress from standard input to standard output, refusing 45 | to output binary data to a terminal. The \-e flag (encode) is implied. 46 | 47 | .B bunzip3 48 | (or, 49 | .B @TRANSFORMED_PACKAGE_NAME@ \-d 50 | equivalently) decompresses data from standard input to the standard 51 | output, refusing to read from a terminal. 52 | 53 | If two files are specified, the first one is used in place of 54 | standard input, and the second one is used in place of standard 55 | output. 56 | 57 | If the \-c flag is present, @TRANSFORMED_PACKAGE_NAME@ will read from the specified 58 | file and output data to standard output instead. Otherwise, if decoding, 59 | .B @TRANSFORMED_PACKAGE_NAME@ 60 | will try to guess the decompressed filename by removing the 61 | .I .bz3 62 | extension. If not present, an error will be reported. If encoding, 63 | the output filename will be generated by appending the 64 | .I .bz3 65 | extension to the input filename. 66 | 67 | .SH OPTIONS 68 | .TP 69 | .B \-B --batch 70 | Enable batch mode. By default, 71 | .B @TRANSFORMED_PACKAGE_NAME@ 72 | will error if more than two files are passed, and the two files specified 73 | are always treated as input and output. The batch mode makes 74 | .B @TRANSFORMED_PACKAGE_NAME@ 75 | treat every file as input, so for example 76 | .I @TRANSFORMED_PACKAGE_NAME@ -Bd *.bz3 77 | will decompress all 78 | .I .bz3 79 | files in the current directory. 80 | .TP 81 | .B \-b --block N 82 | Set the block size to N mebibytes. The minimum is 1MiB, the maximum is 83 | 511MiB. 84 | .TP 85 | .B \-c --stdout 86 | Force writing output data to the standard output if one file is 87 | specified. 88 | .TP 89 | .B \-d --decode 90 | Force decompression. 91 | .TP 92 | .B \-e/-z --encode 93 | Force compression (default behaviour). 94 | .TP 95 | .B \-f --force 96 | Overwrite existing files. 97 | .TP 98 | .B \-h --help 99 | Display a help message and exit. 100 | .TP 101 | .B \-j --jobs N 102 | Set the amount of parallel worker threads that process one block each. 103 | .TP 104 | .B \--rm 105 | Remove the input files after successful compression or decompression. This is 106 | silently ignored if output is stdout. 107 | .TP 108 | .B \-k --keep 109 | Keep (don't delete) the input files. Set by default, provided only 110 | for compatibility with other compressors. 111 | .TP 112 | .B \-v --verbose 113 | Set verbose output mode to see compression statistics. 114 | .TP 115 | .B \-V --version 116 | Display version information and exit. 117 | .TP 118 | .B \-t --test 119 | Verify the validity of compressed blocks. 120 | .TP 121 | .B \-- 122 | Treat all subsequent arguments as file names, even if they start with 123 | a dash. This is so you can handle files with names beginning with a dash. 124 | .SH FILE FORMAT 125 | 126 | Compression is performed as long as the input block is longer 127 | than 64 bytes. Otherwise, it's coded as a literal block. In all 128 | other cases, the compressed data is written to the file. The 129 | file format has constant overhead of 9 bytes per file and from 130 | 9 to 17 bytes per block. Random data is coded so that expansion 131 | is generally under 0.8%. 132 | 133 | .B @TRANSFORMED_PACKAGE_NAME@ 134 | uses 32-bit CRC to ensure that the decompressed version of a file is 135 | identical to the original. This guards against corruption of the 136 | compressed data. 137 | 138 | .SH MEMORY MANAGEMENT 139 | 140 | The \-b flag sets the block size in mebibytes (MiB). The default is 16 141 | MiB. Compression and decompression memory usage can be estimated as: 142 | 143 | 6 x block size 144 | 145 | Larger block sizes usually give rapidly diminishing returns. 146 | It is also important to appreciate that the decompression memory 147 | requirement is set at compression time by the choice of block size. 148 | In general, try and use the largest block size memory constraints allow, 149 | since that maximises the compression achieved. Compression and 150 | decompression speed are virtually unaffected by block size. 151 | 152 | .SH AUTHOR 153 | Kamila Szewczyk, kspalaiologos@gmail.com. 154 | 155 | https://github.com/kspalaiologos/bzip3 156 | 157 | Thanks to: Ilya Grebnov, Benjamin Strachan, Caleb Maclennan, Ilya Muravyov, 158 | package maintainers - Leah Neukirchen, Grigory Kirillov, Maciej Barc, 159 | Robert Schutz, Petr Pisar, Przemyslaw Skibinski, Shun Sakai and others. 160 | Also everyone who sent patches, helped with portability problems, encouraged 161 | me to work on bzip3 and lent me machines for performance tests. 162 | 163 | .SH "SEE ALSO" 164 | \fBbzip2(1)\fR, \fBbz3less(1)\fR, \fBbz3more(1)\fR, \fBbz3grep(1)\fR, \fBbunzip3(1)\fR 165 | -------------------------------------------------------------------------------- /bzip3.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | bindir=@bindir@ 4 | libdir=@libdir@ 5 | includedir=@includedir@ 6 | 7 | Name: @PACKAGE@ 8 | Description: A better and stronger spiritual successor to BZip2 9 | Version: @PACKAGE_VERSION@ 10 | License: LGPL-3.0-or-later 11 | Libs: -L${libdir} -lbzip3 12 | Cflags: -I${includedir} 13 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_PREREQ([2.68]) 2 | AC_INIT([bzip3], [m4_esyscmd(build-aux/git-version-gen .tarball-version)], [https://github.com/kspalaiologos/bzip3]) 3 | AC_CONFIG_AUX_DIR([build-aux]) 4 | AM_INIT_AUTOMAKE([foreign subdir-objects tar-ustar dist-bzip2 dist-xz dist-zip color-tests]) 5 | AM_SILENT_RULES([yes]) 6 | AC_CONFIG_MACRO_DIR([build-aux]) 7 | 8 | AC_MSG_CHECKING([whether system or user specified compiler flags are set]) 9 | AM_CONDITIONAL([PASSED_CFLAGS], [test -n "$CFLAGS"]) 10 | AM_COND_IF([PASSED_CFLAGS], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no])]) 11 | 12 | AC_PROG_CC([clang gcc icc]) 13 | AC_PROG_AWK 14 | LT_INIT 15 | 16 | PKG_PROG_PKG_CONFIG 17 | PKG_INSTALLDIR 18 | 19 | AC_C_RESTRICT 20 | 21 | AC_ARG_WITH([pthread], 22 | AS_HELP_STRING([--without-pthread], [Disable use of pthread library])) 23 | AM_CONDITIONAL([WITH_PTHREAD], [test x"$with_pthread" != xno]) 24 | AM_COND_IF([WITH_PTHREAD], [ 25 | AC_CHECK_HEADER(pthread.h, 26 | [AX_PTHREAD([CFLAGS="$CFLAGS $PTHREAD_CFLAGS -DPTHREAD" LIBS="$LIBS $PTHREAD_LIBS"])], 27 | [AC_MSG_ERROR([pthread.h not found, use --without-pthread to skip])]) 28 | ]) 29 | 30 | AC_ARG_ENABLE([arch-native], 31 | AS_HELP_STRING([--disable-arch-native], [Disable CPU-specific optimizations])) 32 | AM_CONDITIONAL([ENABLE_ARCH_NATIVE], [test x"$enable_arch_native" != xno]) 33 | 34 | AC_ARG_ENABLE([static-exe], 35 | AS_HELP_STRING([--enable-static-exe], [Enable static builds of the executable.])) 36 | AM_CONDITIONAL([ENABLE_STATIC], [test x"$enable_static_exe" = xyes]) 37 | 38 | AM_COND_IF([PASSED_CFLAGS], [ 39 | AC_MSG_NOTICE([skipping compiler feature detection, using '$CFLAGS']) 40 | ], [ 41 | AX_CHECK_COMPILE_FLAG([-O2], [CFLAGS="$CFLAGS -O2"], []) 42 | AX_CHECK_COMPILE_FLAG([-g3], [CFLAGS="$CFLAGS -g3"], []) 43 | AX_CHECK_COMPILE_FLAG([-fPIC], [CFLAGS="$CFLAGS -fPIC"], []) 44 | AC_MSG_NOTICE([using '$CFLAGS' plus compiler feature detection]) 45 | 46 | AM_COND_IF([ENABLE_ARCH_NATIVE], [ 47 | AX_CHECK_COMPILE_FLAG([-march=native], [CFLAGS="$CFLAGS -march=native"], 48 | [AC_MSG_ERROR([Compiler does not support native optimizations, use --disable-arch-native])]) 49 | AX_CHECK_COMPILE_FLAG([-mtune=native], [CFLAGS="$CFLAGS -mtune=native"], 50 | [AC_MSG_ERROR([Compiler does not support native optimizations, use --disable-arch-native])]) 51 | ]) 52 | 53 | AM_COND_IF([ENABLE_STATIC], [ 54 | AX_CHECK_COMPILE_FLAG([-static], [LIBS="$LIBS -all-static"], 55 | [AC_MSG_ERROR([Compiler does not support static linking.])]) 56 | ]) 57 | ]) 58 | 59 | AX_SUBST_TRANSFORMED_PACKAGE_NAME 60 | AX_SUBST_MAN_DATE 61 | 62 | AC_CONFIG_FILES([Makefile bzip3.pc]) 63 | AC_CONFIG_FILES([bzip3.1 bz3grep.1 bz3less.1 bz3more.1 bz3most.1]) 64 | AC_ARG_PROGRAM 65 | AC_OUTPUT 66 | -------------------------------------------------------------------------------- /doc/bzip3_format.md: -------------------------------------------------------------------------------- 1 | # BZip3 Format Specification 2 | 3 | Version 1 4 | 5 | ## Headers 6 | 7 | The File and Frame formats share a similar structure, differing only in whether they include a 8 | block count field. 9 | 10 | ### File Header 11 | 12 | ``` 13 | +----------------+------------------+--------------------+ 14 | | Header | Chunk 1 | Chunk 2 | 15 | | (9 bytes) | (variable size) | (variable size) | 16 | +----------------+------------------+--------------------+ 17 | ``` 18 | 19 | This is created by the CLI tool. 20 | 21 | ### Frame Header 22 | 23 | ``` 24 | +----------------+------------------+--------------------+ 25 | | Header | Chunk 1 | Chunk 2 | 26 | | (13 bytes) | (variable size) | (variable size) | 27 | +----------------+------------------+--------------------+ 28 | ``` 29 | 30 | This is created/read by `bz3_compress` and `bz3_decompress`. 31 | 32 | ### Header Structure 33 | 34 | | Field | Type | Description | File Header | Frame Header | 35 | | -------------- | ------ | ------------------------------- | ----------- | ------------ | 36 | | Signature | u8[5] | Fixed "BZ3v1" ASCII string | ✓ | ✓ | 37 | | Max Block Size | u32_le | Maximum decompressed block size | ✓ | ✓ | 38 | | Block Count | u32_le | Number of blocks in the stream | ✗ | ✓ | 39 | 40 | ### Validation Rules 41 | 42 | 1. **Signature**: Must exactly match "BZ3v1" 43 | 2. **Max Block Size**: 44 | - Minimum: 65KiB (66,560 bytes) 45 | - Maximum: 511MiB (535,822,336 bytes) 46 | 3. **Block Count** (Frame Format only): 47 | - Must match the actual number of blocks in the stream 48 | - Should be greater than 0 49 | 50 | ### Example Parser 51 | 52 | ```c 53 | typedef struct { 54 | uint32_t max_block_size; 55 | uint32_t block_count; // Frame Format only 56 | } bzip3_header_t; 57 | 58 | bool read_bzip3_header(FILE* fp, bzip3_header_t* header, bool is_frame_format) { 59 | char signature[6] = {0}; 60 | 61 | // Read signature 62 | if (fread(signature, 1, 5, fp) != 5) 63 | return false; 64 | 65 | if (strcmp(signature, "BZ3v1") != 0) 66 | return false; 67 | 68 | // Read max block size 69 | uint8_t size_bytes[4]; 70 | if (fread(size_bytes, 1, 4, fp) != 4) 71 | return false; 72 | 73 | header->max_block_size = read_neutral_s32(size_bytes); 74 | 75 | if (header->max_block_size < 65536 || 76 | header->max_block_size > 535822336) 77 | return false; 78 | 79 | // Read block count if Frame Format 80 | if (is_frame_format) { 81 | uint8_t count_bytes[4]; 82 | if (fread(count_bytes, 1, 4, fp) != 4) 83 | return false; 84 | 85 | header->block_count = read_neutral_s32(count_bytes); 86 | 87 | if (header->block_count == 0) 88 | return false; 89 | } 90 | 91 | return true; 92 | } 93 | ``` 94 | 95 | The integers in BZip3 are written unaligned, in little endian format. 96 | A portable implementation is below. 97 | 98 | ```c 99 | // Reading a 32-bit integer 100 | static s32 read_neutral_s32(u8 * data) { 101 | return ((u32)data[0]) | 102 | (((u32)data[1]) << 8) | 103 | (((u32)data[2]) << 16) | 104 | (((u32)data[3]) << 24); 105 | } 106 | 107 | // Writing a 32-bit integer 108 | static void write_neutral_s32(u8 * data, s32 value) { 109 | data[0] = value & 0xFF; 110 | data[1] = (value >> 8) & 0xFF; 111 | data[2] = (value >> 16) & 0xFF; 112 | data[3] = (value >> 24) & 0xFF; 113 | } 114 | ``` 115 | 116 | ## Block Format 117 | 118 | After the header, both File and Frame formats contain a sequence of blocks that follow the Block 119 | Format specification. Each block is encapsulated in a chunk structure that defines its size. 120 | 121 | The blocks (***without chunk header***) can be encoded/decoded using the `bz3_encode_block` 122 | and `bz3_decode_block` APIs. 123 | 124 | ### Chunk Structure 125 | 126 | ```c 127 | // Main block structure 128 | struct Chunk { 129 | u32_le compressedSize; // Size of compressed block 130 | u32_le origSize; // Original uncompressed size 131 | 132 | if (origSize < 64) { 133 | SmallBlock block; 134 | } else { 135 | Block block; 136 | } 137 | }; 138 | ``` 139 | 140 | ### Small Block Format (< 64 bytes) 141 | 142 | For blocks smaller than 64 bytes, no compression is attempted. The data is stored with just a checksum: 143 | 144 | ```c 145 | struct SmallBlock { 146 | u32_le crc32; // CRC32 checksum 147 | u32_le literal; // Always 0xFFFFFFFF for small blocks. This is basically an invalid `bwtIndex` 148 | u8 data[parent.compressedSize - 8]; // Uncompressed data 149 | }; 150 | ``` 151 | 152 | ### Regular Block Format (≥ 64 bytes) 153 | 154 | Larger blocks use a more complex format that supports multiple compression features: 155 | 156 | ```c 157 | struct Block { 158 | u32_le crc32; // CRC32 checksum of uncompressed data 159 | u32_le bwtIndex; // Burrows-Wheeler transform index 160 | u8 model; // Compression model flags 161 | 162 | if ((model & 0x02) != 0) 163 | u32_le lzpSize; // Size after LZP compression 164 | if ((model & 0x04) != 0) 165 | u32_le rleSize; // Size after RLE compression 166 | 167 | u8 data[parent.compressedSize - (popcnt(model) * 4 + 9)]; 168 | }; 169 | ``` 170 | 171 | #### Compression Model 172 | 173 | The `model` byte in regular blocks indicates which compression features were used: 174 | 175 | - `0x02`: LZP (Lempel Ziv Prediction) filter 176 | - `0x04`: RLE (Run-Length Encoding) filter 177 | 178 | ## External Resources 179 | 180 | - [BZip3 Pattern for ImHex](https://github.com/WerWolv/ImHex-Patterns/pull/329) 181 | -------------------------------------------------------------------------------- /doc/overview.md: -------------------------------------------------------------------------------- 1 | # BZip3 Format Documentation 2 | 3 | BZip3 is a modern compression format designed for high compression ratios while maintaining 4 | reasonable decompression speeds. It is intended to provide similar compression ratio and 5 | performance to LZMA and BZip2; as opposed to faster Lempel-Ziv codecs that usually offer worse 6 | compression ratio like ZStandard or LZ4. 7 | 8 | This documentation covers the technical specifications of the BZip3 format. 9 | 10 | ## Format Characteristics 11 | 12 | - Block level compression (no streams) 13 | - Maximum block size ranges from 65KiB to 511MiB 14 | - Memory usage of ~(6 x block size), both compression and decompression 15 | - Little-endian encoding for integers 16 | - Embedded CRC32 checksums for data integrity 17 | - Combines LZP, RLE followed by Burrows-Wheeler transform and arithmetic coding coupled with 18 | a statistical predictor. 19 | 20 | ## Format Overview 21 | 22 | BZip3 uses two main top-level formats: 23 | 24 | 1. **File Format**: The standard format used by the command-line tool 25 | 2. **Frame Format**: Used by the high-level API functions `bz3_compress` and `bz3_decompress`. 26 | 27 | These formats are very similar: the file format is a superset of the frame format and thus also 28 | contains a block count field. 29 | 30 | See [bzip3_format.md](./bzip3_format.md) for more details. 31 | -------------------------------------------------------------------------------- /etc/BENCHMARKS.md: -------------------------------------------------------------------------------- 1 | 2 | [Testing corpus](https://github.com/kspalaiologos/bzip3/releases/download/corpus/corpus.7z) 3 | 4 | ``` 5 | 17256 bee_movie.txt.bz3 6 | 18109 bzip2/bee_movie.txt.bz2 7 | 55315 bee_movie.txt 8 | 9 | 72320 bzip3-master.tar.bz3 10 | 77575 bzip2/bzip3-master.tar.bz2 11 | 501760 bzip3-master.tar 12 | 13 | 256680 lua-5.4.4.tar.bz3 14 | 285841 bzip2/lua-5.4.4.tar.bz2 15 | 1361920 lua-5.4.4.tar 16 | 17 | 468272 cantrbry.tar.bz3 18 | 570856 bzip2/cantrbry.tar.bz2 19 | 2821120 cantrbry.tar 20 | 21 | 807959 calgary.tar.bz3 22 | 891321 bzip2/calgary.tar.bz2 23 | 3265536 calgary.tar 24 | 25 | 1229840 shakespeare.txt.bz3 26 | 1479261 bzip2/shakespeare.txt.bz2 27 | 5458199 shakespeare.txt 28 | 29 | 2347278 decoda.tar.bz3 30 | 2580600 bzip2/decoda.tar.bz2 31 | 6154240 decoda.tar 32 | 33 | 2052611 2b2t_signs.txt.bz3 34 | 2388597 bzip2/2b2t_signs.txt.bz2 35 | 9635520 2b2t_signs.txt 36 | 37 | 41187299 audio.tar.bz3 38 | 95526840 bzip2/audio.tar.bz2 39 | 115742720 audio.tar 40 | 41 | 12753530 chinese.txt.bz3 42 | 17952181 bzip2/chinese.txt.bz2 43 | 79912971 chinese.txt 44 | 45 | 22677651 enwik8.bz3 46 | 29008758 bzip2/enwik8.bz2 47 | 100000000 enwik8 48 | 49 | 47227855 silesia.tar.bz3 50 | 54538771 bzip2/silesia.tar.bz2 51 | 211968000 silesia.tar 52 | 53 | 8437731 lisp.mb.bz3 54 | 13462295 bzip2/lisp.mb.bz2 55 | 371331415 lisp.mb 56 | 57 | 83624620 gcc.tar.bz3 58 | 109065903 bzip2/gcc.tar.bz2 59 | 824309760 gcc.tar 60 | 61 | 157642102 dna.tar.bz3 62 | 180075480 bzip2/dna.tar.bz2 63 | 685619200 dna.tar 64 | 65 | 129023171 linux.tar.bz3 66 | 157810434 bzip2/linux.tar.bz2 67 | 1215221760 linux.tar 68 | 69 | 406343457 Windows NT 4.0.vmdk.bz3 70 | 437184515 bzip2/Windows NT 4.0.vmdk.bz2 71 | 804192256 Windows NT 4.0.vmdk 72 | ``` 73 | 74 | ## Benchmark on the Calgary corpus 75 | 76 | Downloaded from http://corpus.canterbury.ac.nz/resources/calgary.tar.gz 77 | 78 | Results: 79 | 80 | ``` 81 | % wc -c corpus/calgary.tar.bz2 corpus/calgary.tar.lzma corpus/calgary.tar.gz corpus/calgary.tar 82 | 891321 corpus/calgary.tar.bz2 83 | 853112 corpus/calgary.tar.lzma 84 | 1062584 corpus/calgary.tar.gz 85 | 3265536 corpus/calgary.tar 86 | ``` 87 | 88 | Performance: 89 | 90 | ``` 91 | Benchmark 1: gzip -9 -k -f corpus/calgary.tar 92 | Time (mean ± σ): 224.3 ms ± 2.6 ms [User: 221.4 ms, System: 2.5 ms] 93 | Range (min … max): 219.9 ms … 230.9 ms 30 runs 94 | 95 | Benchmark 2: lzma -9 -k -f corpus/calgary.tar 96 | Time (mean ± σ): 787.9 ms ± 9.6 ms [User: 753.6 ms, System: 33.7 ms] 97 | Range (min … max): 764.8 ms … 813.1 ms 30 runs 98 | 99 | Benchmark 3: bzip3 -e -b 3 corpus/calgary.tar corpus/calgary.tar.bz3 100 | Time (mean ± σ): 265.3 ms ± 1.8 ms [User: 257.6 ms, System: 5.9 ms] 101 | Range (min … max): 262.5 ms … 269.0 ms 11 runs 102 | 103 | Benchmark 4: bzip2 -9 -k -f corpus/calgary.tar 104 | Time (mean ± σ): 172.9 ms ± 2.4 ms [User: 168.4 ms, System: 4.4 ms] 105 | Range (min … max): 169.5 ms … 179.4 ms 30 runs 106 | ``` 107 | 108 | Memory usage (as reported by `zsh`'s `time`): 109 | 110 | ``` 111 | bzip2 8M memory 112 | bzip3 17M memory 113 | lzma 95M memory 114 | gzip 5M memory 115 | ``` 116 | 117 | ## Benchmark on the Linux kernel 118 | 119 | ``` 120 | bzip3 -e -b 16 corpus/linux.tar corpus/linux.tar.bz3 76.93s user 0.41s system 99% cpu 89M memory 1:17.38 total 121 | bzip2 -9 -k linux.tar 61.23s user 0.35s system 99% cpu 8M memory 1:01.58 total 122 | gzip -9 -k linux.tar 43.08s user 0.35s system 99% cpu 4M memory 43.435 total 123 | lzma -9 -k linux.tar 397.30s user 0.90s system 99% cpu 675M memory 6:38.28 total 124 | ``` 125 | 126 | ``` 127 | wc -c linux.tar* 128 | 1215221760 linux.tar 129 | 157810434 linux.tar.bz2 130 | 208100532 linux.tar.gz 131 | 125725455 linux.tar.lzma 132 | ``` 133 | 134 | ## The Silesia corpus 135 | 136 | ``` 137 | lzma -9 -k silesia.tar 76.88s user 0.31s system 99% cpu 675M memory 1:17.20 total 138 | bzip3 -e -b 16 silesia.tar 17.42s user 0.08s system 99% cpu 98M memory 17.510 total 139 | zstd -19 silesia.tar 83.43s user 0.20s system 100% cpu 237M memory 1:23.47 total 140 | 141 | % wc -c silesia* 142 | 211968000 silesia.tar 143 | 47227855 silesia.tar.bz3 144 | 48761670 silesia.tar.lzma 145 | 53000145 silesia.tar.zst 146 | ``` 147 | -------------------------------------------------------------------------------- /etc/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kspalaiologos/bzip3/42e1cfc5e65054639517b3517dc61fb0b6d28408/etc/benchmark.png -------------------------------------------------------------------------------- /examples/fuzz-decode-block.c: -------------------------------------------------------------------------------- 1 | /* A tiny utility for fuzzing bzip3 block decompression. 2 | * 3 | * Prerequisites: 4 | * 5 | * - AFL https://github.com/AFLplusplus/AFLplusplus 6 | * - clang (part of LLVM) 7 | * 8 | * On Arch this is `pacman -S afl++ clang` 9 | * 10 | * # Instructions: 11 | * 12 | * 1. Prepare fuzzer directories 13 | * 14 | * mkdir -p afl_in && mkdir -p afl_out 15 | * 16 | * 2. Build binary (to compress test data). 17 | * 18 | * afl-clang fuzz-decode-block.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 19 | * 20 | * 3. Make a fuzzer input file. 21 | * 22 | * With `your_file` being an arbitrary input to test, use this utility 23 | * to generate a compressed test block: 24 | * 25 | * ./fuzz standard_test_files/63_byte_file.bin 63_byte_file.bin.bz3b 8 26 | * ./fuzz standard_test_files/65_byte_file.bin 65_byte_file.bin.bz3b 8 27 | * mv 63_byte_file.bin.bz3b afl_in/ 28 | * mv 65_byte_file.bin.bz3b afl_in/ 29 | * 30 | * For this test, it is recommended to make 2 files, one that's <64 bytes and one that's >64 bytes. 31 | * 32 | * 4. Build binary (for fuzzing). 33 | * 34 | * afl-clang-fast fuzz-decode-block.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 35 | * 36 | * 5. Run the fuzzer. 37 | * 38 | * AFL_SKIP_CPUFREQ=1 afl-fuzz -i afl_in -o afl_out -- ./fuzz @@ 39 | * 40 | * 6. Wanna go faster? Multithread. 41 | * 42 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -M fuzzer01 -- ./fuzz @@; exec bash" & 43 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer02 -- ./fuzz @@; exec bash" & 44 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer03 -- ./fuzz @@; exec bash" & 45 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer04 -- ./fuzz @@; exec bash" & 46 | * 47 | * etc. Replace `alacritty` with your terminal. 48 | * 49 | * And check progress with `afl-whatsup afl_out` (updates periodically). 50 | * 51 | * 7. Found a crash? 52 | * 53 | * If you find a crash, consider also doing the following: 54 | * 55 | * clang fuzz-decode-block.c -g3 -O3 -march=native -o fuzz_asan -I../include "-DVERSION=\"0.0.0\"" -fsanitize=undefined -fsanitize=address 56 | * 57 | * And run fuzz_asan on the crashing test case (you can find it in one of the `afl_out/crashes/` folders). 58 | * Attach the test case /and/ the output of fuzz_asan to the bug report. 59 | * 60 | * If no error occurs, it could be that there was a memory corruption `between` the runs. 61 | * In which case, you want to run AFL with address sanitizer. Use `export AFL_USE_ASAN=1` to enable 62 | * addres sanitizer; then run AFL. 63 | * 64 | * export AFL_USE_ASAN=1 65 | * afl-clang-fast fuzz-decode-block.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 66 | */ 67 | 68 | /* 69 | 70 | This hex editor template can be used to help debug a breaking file. 71 | Would provide for ImHex, but ImHex terminates if template is borked. 72 | 73 | 74 | //------------------------------------------------ 75 | //--- 010 Editor v15.0.1 Binary Template 76 | // 77 | // File: bzip3block.bt 78 | // Authors: Sewer56 79 | // Version: 1.0.0 80 | // Purpose: Parse bzip3 fuzzer block data 81 | // Category: Archive 82 | // File Mask: *.bz3b 83 | //------------------------------------------------ 84 | 85 | // Colors for different sections 86 | #define COLOR_HEADER 0xA0FFA0 // Block metadata 87 | #define COLOR_BLOCKHEAD 0xFFB0B0 // Block headers 88 | #define COLOR_DATA 0xB0B0FF // Compressed data 89 | 90 | local uint32 currentBlockSize; // Store block size globally 91 | 92 | // Block metadata structure 93 | typedef struct { 94 | uint32 orig_size; // Original uncompressed size 95 | uint32 comp_size; // Compressed size 96 | uint32 buffer_size; // Size of decompression buffer 97 | } BLOCK_META ; 98 | 99 | // Regular block header (for blocks >= 64 bytes) 100 | typedef struct { 101 | uint32 crc32; // CRC32 checksum of uncompressed data 102 | uint32 bwtIndex; // Burrows-Wheeler transform index 103 | uint8 model; // Compression model flags: 104 | // bit 1 (0x02): LZP was used 105 | // bit 2 (0x04): RLE was used 106 | 107 | // Optional size fields based on compression flags 108 | if(model & 0x02) 109 | uint32 lzpSize; // Size after LZP compression 110 | if(model & 0x04) 111 | uint32 rleSize; // Size after RLE compression 112 | } BLOCK_HEADER ; 113 | 114 | // Small block header (for blocks < 64 bytes) 115 | typedef struct { 116 | uint32 crc32; // CRC32 checksum 117 | uint32 literal; // Always 0xFFFFFFFF for small blocks 118 | uint8 data[currentBlockSize - 8]; // Uncompressed data 119 | } SMALL_BLOCK ; 120 | 121 | // Block content structure 122 | typedef struct { 123 | currentBlockSize = meta.comp_size; 124 | 125 | if(meta.orig_size < 64) { 126 | SMALL_BLOCK content; 127 | } else { 128 | BLOCK_HEADER header; 129 | uchar data[meta.comp_size - (Popcount(header.model) * 4 + 9)]; 130 | } 131 | } BLOCK_CONTENT ; 132 | 133 | // Helper function for bit counting (used for header size calculation) 134 | int Popcount(byte b) { 135 | local int count = 0; 136 | while(b) { 137 | count += b & 1; 138 | b >>= 1; 139 | } 140 | return count; 141 | } 142 | 143 | // Main block structure 144 | typedef struct { 145 | BLOCK_META meta; 146 | BLOCK_CONTENT content; 147 | } BLOCK; 148 | 149 | // Main parsing structure 150 | BLOCK block; 151 | */ 152 | 153 | #include "../include/libbz3.h" 154 | #include "../src/libbz3.c" 155 | #include 156 | #include 157 | #include 158 | #include 159 | 160 | #define KiB(x) ((x)*1024) 161 | 162 | // Required for AFL++ persistent mode 163 | #ifdef __AFL_HAVE_MANUAL_CONTROL 164 | #include 165 | __AFL_FUZZ_INIT(); 166 | #endif 167 | 168 | size_t min_size_t(size_t a, size_t b) { 169 | return (a < b) ? a : b; 170 | } 171 | 172 | // Returns 0 on success, positive on bzip3 errors 173 | static int try_decode_block(const uint8_t *input_buf, size_t input_len) { 174 | // Read whatever metadata we can get 175 | uint32_t orig_size = 0; 176 | uint32_t comp_size = 0; 177 | uint32_t buffer_size = 0; 178 | 179 | if (input_len >= 4) orig_size = *(const uint32_t *)input_buf; 180 | if (input_len >= 8) comp_size = *(const uint32_t *)(input_buf + 4); 181 | if (input_len >= 12) buffer_size = *(const uint32_t *)(input_buf + 8); 182 | 183 | // Initialize state with minimum block size 184 | struct bz3_state *state = bz3_new(KiB(65)); 185 | if (!state) return 0; // not under test 186 | 187 | // Allocate buffer with fuzzer-provided size 188 | uint8_t *buffer = malloc(buffer_size); 189 | if (!buffer) { 190 | bz3_free(state); 191 | return 0; // not under test 192 | } 193 | 194 | // Copy whatever compressed data we can get 195 | size_t data_len = input_len > 12 ? input_len - 12 : 0; 196 | if (data_len > 0) { 197 | memcpy(buffer, input_buf + 12, min_size_t(data_len, (size_t)buffer_size)); 198 | } 199 | 200 | // Attempt decompression with potentially invalid parameters 201 | int bzerr = bz3_decode_block(state, buffer, buffer_size, comp_size, orig_size); 202 | // and pray we don't crash :p 203 | 204 | free(buffer); 205 | bz3_free(state); 206 | return bzerr; 207 | } 208 | 209 | static int encode_block(const char *infile, const char *outfile, uint32_t block_size) { 210 | block_size = block_size <= KiB(65) ? KiB(65) : block_size; 211 | 212 | // Read input file 213 | FILE *fp_in = fopen(infile, "rb"); 214 | if (!fp_in) { 215 | perror("Failed to open input file"); 216 | return 1; 217 | } 218 | 219 | fseek(fp_in, 0, SEEK_END); 220 | size_t insize = ftell(fp_in); 221 | fseek(fp_in, 0, SEEK_SET); 222 | 223 | uint8_t *inbuf = malloc(insize); 224 | if (!inbuf) { 225 | fclose(fp_in); 226 | return 1; 227 | } 228 | 229 | fread(inbuf, 1, insize, fp_in); 230 | fclose(fp_in); 231 | 232 | // Initialize compression state 233 | struct bz3_state *state = bz3_new(block_size); 234 | if (!state) { 235 | free(inbuf); 236 | return 1; 237 | } 238 | 239 | // Make output buffer 240 | size_t outsize = bz3_bound(insize); 241 | uint8_t *outbuf = malloc(outsize + 12); // +12 for metadata 242 | if (!outbuf) { 243 | bz3_free(state); 244 | free(inbuf); 245 | return 1; 246 | } 247 | 248 | // Store metadata 249 | *(uint32_t *)outbuf = insize; // Original size 250 | *(uint32_t *)(outbuf + 8) = outsize; // Buffer size needed for decompression 251 | 252 | // Compress the block 253 | int32_t comp_size = bz3_encode_block(state, outbuf + 12, insize); 254 | if (comp_size < 0) { 255 | printf("bz3_encode_block() failed with error code %d\n", comp_size); 256 | bz3_free(state); 257 | free(inbuf); 258 | free(outbuf); 259 | return comp_size; 260 | } 261 | 262 | // Store compressed size 263 | *(uint32_t *)(outbuf + 4) = comp_size; 264 | 265 | FILE *fp_out = fopen(outfile, "wb"); 266 | if (!fp_out) { 267 | perror("Failed to open output file"); 268 | bz3_free(state); 269 | free(inbuf); 270 | free(outbuf); 271 | return 1; 272 | } 273 | 274 | fwrite(outbuf, 1, comp_size + 12, fp_out); 275 | fclose(fp_out); 276 | 277 | printf("Encoded block from %s (%zu bytes) to %s (%d bytes)\n", 278 | infile, insize, outfile, comp_size + 12); 279 | 280 | bz3_free(state); 281 | free(inbuf); 282 | free(outbuf); 283 | return 0; 284 | } 285 | 286 | int main(int argc, char **argv) { 287 | #ifdef __AFL_HAVE_MANUAL_CONTROL 288 | __AFL_INIT(); 289 | 290 | while (__AFL_LOOP(1000)) { 291 | try_decode_block(__AFL_FUZZ_TESTCASE_BUF, __AFL_FUZZ_TESTCASE_LEN); 292 | } 293 | #else 294 | if (argc == 4) { 295 | // Compression mode: input_file output_file block_size 296 | return encode_block(argv[1], argv[2], atoi(argv[3])); 297 | } 298 | 299 | if (argc != 2) { 300 | fprintf(stderr, "Usage:\n"); 301 | fprintf(stderr, " Decode: %s \n", argv[0]); 302 | fprintf(stderr, " Encode: %s \n", argv[0]); 303 | return 1; 304 | } 305 | 306 | // Decode mode 307 | FILE *fp = fopen(argv[1], "rb"); 308 | if (!fp) { 309 | perror("Failed to open input file"); 310 | return 1; 311 | } 312 | 313 | fseek(fp, 0, SEEK_END); 314 | size_t size = ftell(fp); 315 | fseek(fp, 0, SEEK_SET); 316 | 317 | uint8_t *buffer = malloc(size); 318 | if (!buffer) { 319 | fclose(fp); 320 | return 1; 321 | } 322 | 323 | fread(buffer, 1, size, fp); 324 | fclose(fp); 325 | 326 | int result = try_decode_block(buffer, size); 327 | free(buffer); 328 | return result > 0 ? result : 0; // Return bzip3 errors but treat validation errors as success 329 | #endif 330 | 331 | return 0; 332 | } -------------------------------------------------------------------------------- /examples/fuzz-decompress.c: -------------------------------------------------------------------------------- 1 | /* A tiny utility for fuzzing bzip3 frame decompression. 2 | * 3 | * Prerequisites: 4 | * 5 | * - AFL https://github.com/AFLplusplus/AFLplusplus 6 | * - clang (part of LLVM) 7 | * 8 | * On Arch this is `pacman -S afl++ clang` 9 | * 10 | * # Instructions: 11 | * 12 | * 1. Prepare fuzzer directories 13 | * 14 | * mkdir -p afl_in && mkdir -p afl_out 15 | * 16 | * 2. Build binary (to compress test data). 17 | * 18 | * afl-clang fuzz-decompress.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 19 | * 20 | * 3. Make a fuzzer input file. 21 | * 22 | * With `your_file` being an arbitrary input to test, use this utility 23 | * to generate a compressed test frame: 24 | * 25 | * ./fuzz hl-api.c hl-api.c.bz3 8 26 | * mv hl-api.c.bz3 afl_in/ 27 | * 28 | * 4. Build binary (for fuzzing). 29 | * 30 | * afl-clang-fast fuzz-decompress.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 31 | * 32 | * 5. Run the fuzzer. 33 | * 34 | * AFL_SKIP_CPUFREQ=1 afl-fuzz -i afl_in -o afl_out -- ./fuzz @@ 35 | * 36 | * 6. Wanna go faster? Multithread. 37 | * 38 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -M fuzzer01 -- ./fuzz @@; exec bash" & 39 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer02 -- ./fuzz @@; exec bash" & 40 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer03 -- ./fuzz @@; exec bash" & 41 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer04 -- ./fuzz @@; exec bash" & 42 | * 43 | * etc. Replace `alacritty` with your terminal. 44 | * 45 | * And check progress with `afl-whatsup afl_out` (updates periodically). 46 | * 47 | * 7. Found a crash? 48 | * 49 | * If you find a crash, consider also doing the following: 50 | * 51 | * clang fuzz-decompress.c -g3 -O3 -march=native -o fuzz_asan -I../include "-DVERSION=\"0.0.0\"" -fsanitize=undefined -fsanitize=address 52 | * 53 | * And run fuzz_asan on the crashing test case (you can find it in one of the `afl_out/crashes/` folders). 54 | * Attach the test case /and/ the output of fuzz_asan to the bug report. 55 | * 56 | * If no error occurs, it could be that there was a memory corruption `between` the runs. 57 | * In which case, you want to run AFL with address sanitizer. Use `export AFL_USE_ASAN=1` to enable 58 | * addres sanitizer; then run AFL. 59 | * 60 | * export AFL_USE_ASAN=1 61 | * afl-clang-fast fuzz-decompress.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 62 | */ 63 | 64 | 65 | /* 66 | This hex editor template can be used to help debug a breaking file. 67 | Would provide for ImHex, but ImHex terminates if template is borked. 68 | 69 | //------------------------------------------------ 70 | //--- 010 Editor v15.0.1 Binary Template 71 | // 72 | // File: bzip3-fuzz-decompress.bt 73 | // Authors: Sewer56 74 | // Version: 1.0.0 75 | // Purpose: Parse bzip3 fuzzer data 76 | //------------------------------------------------ 77 | 78 | // Colors for different sections 79 | #define COLOR_HEADER 0xA0FFA0 // Frame header 80 | #define COLOR_BLOCKHEAD 0xFFB0B0 // Block headers 81 | #define COLOR_DATA 0xB0B0FF // Compressed data 82 | 83 | local uint32 currentBlockSize; // Store block size globally 84 | 85 | // Frame header structure 86 | typedef struct { 87 | char signature[5]; // "BZ3v1" 88 | uint32 blockSize; // Maximum block size 89 | uint32 block_count; 90 | } FRAME_HEADER ; 91 | 92 | // Regular block header (for blocks >= 64 bytes) 93 | typedef struct { 94 | uint32 crc32; // CRC32 checksum of uncompressed data 95 | uint32 bwtIndex; // Burrows-Wheeler transform index 96 | uint8 model; // Compression model flags: 97 | // bit 1 (0x02): LZP was used 98 | // bit 2 (0x04): RLE was used 99 | 100 | // Optional size fields based on compression flags 101 | if(model & 0x02) 102 | uint32 lzpSize; // Size after LZP compression 103 | if(model & 0x04) 104 | uint32 rleSize; // Size after RLE compression 105 | } BLOCK_HEADER ; 106 | 107 | // Small block header (for blocks < 64 bytes) 108 | typedef struct { 109 | uint32 crc32; // CRC32 checksum 110 | uint32 literal; // Always 0xFFFFFFFF for small blocks 111 | uint8 data[currentBlockSize - 8]; // Uncompressed data 112 | } SMALL_BLOCK ; 113 | 114 | // Main block structure 115 | typedef struct { 116 | uint32 compressedSize; // Size of compressed block 117 | uint32 origSize; // Original uncompressed size 118 | 119 | currentBlockSize = compressedSize; // Store for use in SMALL_BLOCK 120 | 121 | if(origSize < 64) { 122 | SMALL_BLOCK content; 123 | } else { 124 | BLOCK_HEADER header; 125 | uchar data[compressedSize - (Popcount(header.model) * 4 + 9)]; 126 | } 127 | } BLOCK ; 128 | 129 | // Helper function for bit counting (used for header size calculation) 130 | int Popcount(byte b) { 131 | local int count = 0; 132 | while(b) { 133 | count += b & 1; 134 | b >>= 1; 135 | } 136 | return count; 137 | } 138 | 139 | // Main parsing structure 140 | uint32 orig_size; 141 | FRAME_HEADER frameHeader; 142 | 143 | // Read blocks until end of file 144 | while(!FEof()) { 145 | BLOCK block; 146 | } 147 | 148 | */ 149 | 150 | #include "../include/libbz3.h" 151 | #include "../src/libbz3.c" 152 | #include 153 | #include 154 | #include 155 | #include 156 | 157 | #define KiB(x) ((x)*1024) 158 | 159 | // Required for AFL++ persistent mode 160 | #ifdef __AFL_HAVE_MANUAL_CONTROL 161 | #include 162 | __AFL_FUZZ_INIT(); 163 | #endif 164 | 165 | // Maximum allowed size to prevent excessive memory allocation 166 | #define MAX_SIZE 0x10000000 // 256MB 167 | 168 | // Returns 0 on success, negative on input validation errors, positive on bzip3 errors 169 | static int try_decompress(const uint8_t *input_buf, size_t input_len) { 170 | if (input_len < 8) { // invalid, does not contain orig_size 171 | return -1; 172 | } 173 | 174 | size_t orig_size = *(const uint32_t *)input_buf; 175 | uint8_t *outbuf = malloc(orig_size); 176 | if (!outbuf) { 177 | return -3; 178 | } 179 | 180 | // We read orig_size from the input as we also want to fuzz it. 181 | int bzerr = bz3_decompress( 182 | input_buf + sizeof(uint32_t), 183 | outbuf, 184 | input_len - sizeof(uint32_t), 185 | &orig_size 186 | ); 187 | 188 | if (bzerr != BZ3_OK) { 189 | printf("bz3_decompress() failed with error code %d\n", bzerr); 190 | } else { 191 | printf("OK, %d => %d\n", (int)input_len, (int)orig_size); 192 | } 193 | 194 | free(outbuf); 195 | return bzerr; 196 | } 197 | 198 | static int compress_file(const char *infile, const char *outfile, uint32_t block_size) { 199 | block_size = block_size <= KiB(65) ? KiB(65) : block_size; 200 | 201 | // Read the data into `inbuf` 202 | FILE *fp_in = fopen(infile, "rb"); 203 | if (!fp_in) { 204 | perror("Failed to open input file"); 205 | return 1; 206 | } 207 | 208 | fseek(fp_in, 0, SEEK_END); 209 | size_t insize = ftell(fp_in); 210 | fseek(fp_in, 0, SEEK_SET); 211 | 212 | uint8_t *inbuf = malloc(insize); 213 | if (!inbuf) { 214 | fclose(fp_in); 215 | return 1; 216 | } 217 | 218 | fread(inbuf, 1, insize, fp_in); 219 | fclose(fp_in); 220 | 221 | // Make buffer for output. 222 | size_t outsize = bz3_bound(insize); 223 | uint8_t *outbuf = malloc(outsize + sizeof(uint32_t)); 224 | if (!outbuf) { 225 | free(inbuf); 226 | return 1; 227 | } 228 | 229 | // Store original size at the start 230 | // This is important, the `try_decompress` will read this field during fuzzing. 231 | // And pass it as a parameter to `bz3_decompress`. 232 | *(uint32_t *)outbuf = insize; 233 | 234 | int bzerr = bz3_compress(block_size, inbuf, outbuf + sizeof(uint32_t), insize, &outsize); 235 | if (bzerr != BZ3_OK) { 236 | printf("bz3_compress() failed with error code %d\n", bzerr); 237 | free(inbuf); 238 | free(outbuf); 239 | return bzerr; 240 | } 241 | 242 | FILE *fp_out = fopen(outfile, "wb"); 243 | if (!fp_out) { 244 | perror("Failed to open output file"); 245 | free(inbuf); 246 | free(outbuf); 247 | return 1; 248 | } 249 | 250 | fwrite(outbuf, 1, outsize + sizeof(uint32_t), fp_out); 251 | fclose(fp_out); 252 | 253 | printf("Compressed %s (%zu bytes) to %s (%zu bytes)\n", 254 | infile, insize, outfile, outsize + sizeof(uint32_t)); 255 | 256 | free(inbuf); 257 | free(outbuf); 258 | return 0; 259 | } 260 | 261 | int main(int argc, char **argv) { 262 | #ifdef __AFL_HAVE_MANUAL_CONTROL 263 | __AFL_INIT(); 264 | 265 | while (__AFL_LOOP(1000)) { 266 | try_decompress(__AFL_FUZZ_TESTCASE_BUF, __AFL_FUZZ_TESTCASE_LEN); 267 | } 268 | #else 269 | if (argc == 4) { 270 | // Compression mode: input_file output_file block_size 271 | return compress_file(argv[1], argv[2], atoi(argv[3])); 272 | } 273 | 274 | if (argc != 2) { 275 | fprintf(stderr, "Usage:\n"); 276 | fprintf(stderr, " Decompress: %s \n", argv[0]); 277 | fprintf(stderr, " Compress: %s \n", argv[0]); 278 | return 1; 279 | } 280 | 281 | // Decompression mode 282 | FILE *fp = fopen(argv[1], "rb"); 283 | if (!fp) { 284 | perror("Failed to open input file"); 285 | return 1; 286 | } 287 | 288 | fseek(fp, 0, SEEK_END); 289 | size_t size = ftell(fp); 290 | fseek(fp, 0, SEEK_SET); 291 | 292 | if (size < 64) { 293 | fclose(fp); 294 | return 0; 295 | } 296 | 297 | uint8_t *buffer = malloc(size); 298 | if (!buffer) { 299 | fclose(fp); 300 | return 1; 301 | } 302 | 303 | fread(buffer, 1, size, fp); 304 | fclose(fp); 305 | 306 | int result = try_decompress(buffer, size); 307 | free(buffer); 308 | return result > 0 ? result : 0; // Return bzip3 errors but treat validation errors as success 309 | #endif 310 | 311 | return 0; 312 | } -------------------------------------------------------------------------------- /examples/fuzz-round-trip.c: -------------------------------------------------------------------------------- 1 | /* A tiny utility for fuzzing bzip3 round-trip compression/decompression. 2 | * 3 | * Prerequisites: 4 | * 5 | * - AFL https://github.com/AFLplusplus/AFLplusplus 6 | * - clang (part of LLVM) 7 | * 8 | * On Arch this is `pacman -S afl++ clang` 9 | * 10 | * # Instructions: 11 | * 12 | * 1. Prepare fuzzer directories 13 | * 14 | * mkdir -p afl_in && mkdir -p afl_out 15 | * 16 | * 2. Insert a test file to afl_in/ 17 | * 18 | * cp ./standard_test_files/63_byte_file.bin afl_in/ 19 | * 20 | * 3. Build binary (for fuzzing) 21 | * 22 | * afl-clang-fast fuzz-round-trip.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 23 | * 24 | * 4. Run the fuzzer 25 | * 26 | * AFL_SKIP_CPUFREQ=1 afl-fuzz -i afl_in -o afl_out -- ./fuzz @@ 27 | * 28 | * 5. Need to go faster? Multithread. 29 | * 30 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -M fuzzer01 -- ./fuzz @@; exec bash" & 31 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer02 -- ./fuzz @@; exec bash" & 32 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer03 -- ./fuzz @@; exec bash" & 33 | * alacritty -e bash -c "afl-fuzz -i afl_in -o afl_out -S fuzzer04 -- ./fuzz @@; exec bash" & 34 | * 35 | * etc. Replace `alacritty` with your terminal. 36 | * 37 | * 6. For ASAN testing: 38 | * 39 | * export AFL_USE_ASAN=1 40 | * afl-clang-fast fuzz-round-trip.c -I../include -o fuzz -g3 "-DVERSION=\"0.0.0\"" -O3 -march=native 41 | */ 42 | 43 | #include "../include/libbz3.h" 44 | #include "../src/libbz3.c" 45 | #include 46 | #include 47 | #include 48 | #include 49 | 50 | #define KiB(x) ((x)*1024) 51 | #define DEFAULT_BLOCK_SIZE KiB(65) 52 | 53 | // Required for AFL++ persistent mode 54 | #ifdef __AFL_HAVE_MANUAL_CONTROL 55 | #include 56 | __AFL_FUZZ_INIT(); 57 | #endif 58 | 59 | // Function to emulate a crash for diagnostic purposes 60 | static void __attribute__((noreturn)) crash_with_message(const char* msg) { 61 | fprintf(stderr, "Emulating crash: %s\n", msg); 62 | // Use abort() to generate a crash that ASAN and other tools can catch 63 | abort(); 64 | } 65 | 66 | // Returns 0 on success, crashes on failure 67 | static int try_round_trip(const uint8_t *input_buf, size_t input_len) { 68 | if (input_len == 0) return 0; 69 | 70 | // Use the larger of DEFAULT_BLOCK_SIZE or input_len 71 | size_t block_size = input_len > DEFAULT_BLOCK_SIZE ? input_len : DEFAULT_BLOCK_SIZE; 72 | 73 | struct bz3_state *state = bz3_new(block_size); 74 | if (!state) { 75 | return -1; // allocation failures not tested. 76 | } 77 | 78 | // Allocate buffer for both compression and decompression 79 | // Using block_size to ensure we have enough space for both operations 80 | size_t comp_buf_len = bz3_bound(input_len); 81 | uint8_t *comp_buf = malloc(comp_buf_len); 82 | if (!comp_buf) { 83 | bz3_free(state); 84 | return -1; // allocation failures not tested. 85 | } 86 | 87 | // Step 0: Move input to compress buffer 88 | memmove(comp_buf, input_buf, input_len); 89 | 90 | // Step 1: Compress the input 91 | int32_t comp_size = bz3_encode_block(state, comp_buf, input_len); 92 | if (comp_size < 0) { 93 | bz3_free(state); 94 | free(comp_buf); 95 | crash_with_message("Compression failed"); 96 | } 97 | 98 | // Step 2: Decompress 99 | int bzerr = bz3_decode_block(state, comp_buf, comp_buf_len, comp_size, input_len); 100 | if (bzerr < 0 || bzerr != input_len) { 101 | bz3_free(state); 102 | free(comp_buf); 103 | crash_with_message("Decompression failed"); 104 | } 105 | 106 | // Step 3: Compare 107 | if (memcmp(input_buf, comp_buf, input_len) != 0) { 108 | bz3_free(state); 109 | free(comp_buf); 110 | crash_with_message("Round-trip data mismatch"); 111 | } 112 | 113 | bz3_free(state); 114 | free(comp_buf); 115 | return 0; 116 | } 117 | 118 | static int test_file(const char *filename) { 119 | FILE *fp = fopen(filename, "rb"); 120 | if (!fp) { 121 | perror("Failed to open input file"); 122 | return 1; 123 | } 124 | 125 | fseek(fp, 0, SEEK_END); 126 | size_t size = ftell(fp); 127 | fseek(fp, 0, SEEK_SET); 128 | 129 | uint8_t *buffer = malloc(size); 130 | if (!buffer) { 131 | fclose(fp); 132 | crash_with_message("Failed to allocate input buffer"); 133 | } 134 | 135 | if (fread(buffer, 1, size, fp) != size) { 136 | fclose(fp); 137 | free(buffer); 138 | crash_with_message("Failed to read input file"); 139 | } 140 | fclose(fp); 141 | 142 | int result = try_round_trip(buffer, size); 143 | free(buffer); 144 | return result; 145 | } 146 | 147 | int main(int argc, char **argv) { 148 | #ifdef __AFL_HAVE_MANUAL_CONTROL 149 | __AFL_INIT(); 150 | 151 | while (__AFL_LOOP(1000)) { 152 | try_round_trip(__AFL_FUZZ_TESTCASE_BUF, __AFL_FUZZ_TESTCASE_LEN); 153 | } 154 | #else 155 | if (argc != 2) { 156 | fprintf(stderr, "Usage: %s \n", argv[0]); 157 | return 1; 158 | } 159 | 160 | return test_file(argv[1]); 161 | #endif 162 | 163 | return 0; 164 | } -------------------------------------------------------------------------------- /examples/hl-api.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #define MB (1024 * 1024) 7 | 8 | int main(void) { 9 | printf("Compressing shakespeare.txt back and forth in memory.\n"); 10 | 11 | // Read the entire "shakespeare.txt" file to memory: 12 | FILE * fp = fopen("shakespeare.txt", "rb"); 13 | fseek(fp, 0, SEEK_END); 14 | size_t size = ftell(fp); 15 | fseek(fp, 0, SEEK_SET); 16 | char * buffer = malloc(size); 17 | fread(buffer, 1, size, fp); 18 | fclose(fp); 19 | 20 | // Compress the file: 21 | size_t out_size = bz3_bound(size); 22 | char * outbuf = malloc(out_size); 23 | int bzerr = bz3_compress(1 * MB, buffer, outbuf, size, &out_size); 24 | if (bzerr != BZ3_OK) { 25 | printf("bz3_compress() failed with error code %d", bzerr); 26 | return 1; 27 | } 28 | 29 | printf("%d => %d\n", size, out_size); 30 | 31 | // Decompress the file. 32 | bzerr = bz3_decompress(outbuf, buffer, out_size, &size); 33 | if (bzerr != BZ3_OK) { 34 | printf("bz3_decompress() failed with error code %d", bzerr); 35 | return 1; 36 | } 37 | 38 | printf("%d => %d\n", out_size, size); 39 | 40 | free(buffer); 41 | free(outbuf); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /examples/shakespeare.txt.bz3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kspalaiologos/bzip3/42e1cfc5e65054639517b3517dc61fb0b6d28408/examples/shakespeare.txt.bz3 -------------------------------------------------------------------------------- /examples/standard_test_files/63_byte_file.bin: -------------------------------------------------------------------------------- 1 |   !"#$%&'()0123456789@ABCDEFGHIPQRSTUVWXY`abc -------------------------------------------------------------------------------- /examples/standard_test_files/65_byte_file.bin: -------------------------------------------------------------------------------- 1 |   !"#$%&'()0123456789@ABCDEFGHIPQRSTUVWXY`abcde -------------------------------------------------------------------------------- /examples/standard_test_files/readme.txt: -------------------------------------------------------------------------------- 1 | This is a standard set of files to use as inputs for fuzzer testing: 2 | 3 | - 65_byte_file.bin: 65 bytes, all unique 4 | - 63_byte_file.bin: 63 bytes, all unique 5 | -------------------------------------------------------------------------------- /include/common.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * BZip3 - A spiritual successor to BZip2. 4 | * Copyright (C) 2022-2024 Kamila Szewczyk 5 | * 6 | * This program is free software: you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation, either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #ifndef _COMMON_H 21 | #define _COMMON_H 22 | 23 | #define KiB(x) ((x)*1024) 24 | #define MiB(x) ((x)*1024 * 1024) 25 | #define BWT_BOUND(x) (bz3_bound(x) + 128) 26 | 27 | #include 28 | #include 29 | 30 | typedef uint8_t u8; 31 | typedef uint16_t u16; 32 | typedef uint32_t u32; 33 | typedef uint64_t u64; 34 | 35 | typedef int8_t s8; 36 | typedef int16_t s16; 37 | typedef int32_t s32; 38 | 39 | static s32 read_neutral_s32(const u8 * data) { 40 | return ((u32)data[0]) | (((u32)data[1]) << 8) | (((u32)data[2]) << 16) | (((u32)data[3]) << 24); 41 | } 42 | 43 | static void write_neutral_s32(u8 * data, s32 value) { 44 | data[0] = value & 0xFF; 45 | data[1] = (value >> 8) & 0xFF; 46 | data[2] = (value >> 16) & 0xFF; 47 | data[3] = (value >> 24) & 0xFF; 48 | } 49 | 50 | #if defined(__GNUC__) || defined(__clang__) 51 | #define RESTRICT __restrict__ 52 | #elif defined(_MSC_VER) || defined(__INTEL_COMPILER) 53 | #define RESTRICT __restrict 54 | #else 55 | #define RESTRICT restrict 56 | #warning Your compiler, configuration or platform might not be supported. 57 | #endif 58 | 59 | #if defined(__has_builtin) 60 | #if __has_builtin(__builtin_prefetch) 61 | #define HAS_BUILTIN_PREFETCH 62 | #endif 63 | #elif defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4)) 64 | #define HAS_BUILTIN_PREFETCH 65 | #endif 66 | 67 | #if defined(__has_builtin) 68 | #if __has_builtin(__builtin_bswap16) 69 | #define HAS_BUILTIN_BSWAP16 70 | #endif 71 | #elif defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) || (__GNUC__ >= 5)) 72 | #define HAS_BUILTIN_BSWAP16 73 | #endif 74 | 75 | #if defined(HAS_BUILTIN_PREFETCH) 76 | #define prefetch(address) __builtin_prefetch((const void *)(address), 0, 0) 77 | #define prefetchw(address) __builtin_prefetch((const void *)(address), 1, 0) 78 | #elif defined(_M_IX86) || defined(_M_AMD64) || defined(__x86_64__) || defined(i386) || defined(__i386__) || \ 79 | defined(__i386) 80 | #include 81 | #define prefetch(address) _mm_prefetch((const void *)(address), _MM_HINT_NTA) 82 | #define prefetchw(address) _m_prefetchw((const void *)(address)) 83 | #elif defined(_M_ARM) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ 84 | defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) 85 | #include 86 | #define prefetch(address) __prefetch((const void *)(address)) 87 | #define prefetchw(address) __prefetchw((const void *)(address)) 88 | #elif defined(_M_ARM64) || defined(__aarch64__) 89 | #include 90 | #define prefetch(address) __prefetch2((const void *)(address), 1) 91 | #define prefetchw(address) __prefetch2((const void *)(address), 17) 92 | #else 93 | #error Your compiler, configuration or platform is not supported. 94 | #endif 95 | 96 | #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) 97 | #if defined(_LITTLE_ENDIAN) || (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN) || \ 98 | (defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN) || \ 99 | (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || \ 100 | (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 101 | #define __LITTLE_ENDIAN__ 102 | #elif defined(_BIG_ENDIAN) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN) || \ 103 | (defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN) || \ 104 | (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || \ 105 | (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 106 | #define __BIG_ENDIAN__ 107 | #elif defined(_WIN32) 108 | #define __LITTLE_ENDIAN__ 109 | #endif 110 | #endif 111 | 112 | #if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) 113 | #if defined(HAS_BUILTIN_BSWAP16) 114 | #define bswap16(x) (__builtin_bswap16(x)) 115 | #elif defined(_MSC_VER) && !defined(__INTEL_COMPILER) 116 | #define bswap16(x) (_byteswap_ushort(x)) 117 | #else 118 | #define bswap16(x) ((u16)(x >> 8) | (u16)(x << 8)) 119 | #endif 120 | #elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__) 121 | #define bswap16(x) (x) 122 | #else 123 | #error Your compiler, configuration or platform is not supported. 124 | #endif 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /include/libbz3.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * BZip3 - A spiritual successor to BZip2. 4 | * Copyright (C) 2022-2024 Kamila Szewczyk 5 | * 6 | * This program is free software: you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation, either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #ifndef LIBBZ3_H 21 | #define LIBBZ3_H 22 | 23 | #include 24 | #include 25 | 26 | /* Symbol visibility control. */ 27 | #ifndef BZIP3_VISIBLE 28 | #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) 29 | #define BZIP3_VISIBLE __attribute__((visibility("default"))) 30 | #else 31 | #define BZIP3_VISIBLE 32 | #endif 33 | #endif 34 | 35 | #if defined(BZIP3_DLL_EXPORT) && (BZIP3_DLL_EXPORT == 1) 36 | #define BZIP3_API __declspec(dllexport) BZIP3_VISIBLE 37 | #elif defined(BZIP3_DLL_IMPORT) && (BZIP3_DLL_IMPORT == 1) 38 | #define BZIP3_API __declspec(dllimport) BZIP3_VISIBLE 39 | #else 40 | #define BZIP3_API BZIP3_VISIBLE 41 | #endif 42 | 43 | #ifdef __cplusplus 44 | extern "C" { 45 | #endif 46 | 47 | #define BZ3_OK 0 48 | #define BZ3_ERR_OUT_OF_BOUNDS -1 49 | #define BZ3_ERR_BWT -2 50 | #define BZ3_ERR_CRC -3 51 | #define BZ3_ERR_MALFORMED_HEADER -4 52 | #define BZ3_ERR_TRUNCATED_DATA -5 53 | #define BZ3_ERR_DATA_TOO_BIG -6 54 | #define BZ3_ERR_INIT -7 55 | #define BZ3_ERR_DATA_SIZE_TOO_SMALL -8 56 | 57 | struct bz3_state; 58 | 59 | /** 60 | * @brief Get bzip3 version. 61 | */ 62 | BZIP3_API const char * bz3_version(void); 63 | 64 | /** 65 | * @brief Get the last error number associated with a given state. 66 | */ 67 | BZIP3_API int8_t bz3_last_error(struct bz3_state * state); 68 | 69 | /** 70 | * @brief Return a user-readable message explaining the cause of the last error. 71 | */ 72 | BZIP3_API const char * bz3_strerror(struct bz3_state * state); 73 | 74 | /** 75 | * @brief Construct a new block encoder state, which will encode blocks as big as the given block size. 76 | * The decoder will be able to decode blocks at most as big as the given block size. 77 | * Returns NULL in case allocation fails or the block size is not between 65K and 511M 78 | */ 79 | BZIP3_API struct bz3_state * bz3_new(int32_t block_size); 80 | 81 | /** 82 | * @brief Free the memory occupied by a block encoder state. 83 | */ 84 | BZIP3_API void bz3_free(struct bz3_state * state); 85 | 86 | /** 87 | * @brief Return the recommended size of the output buffer for the compression functions. 88 | */ 89 | BZIP3_API size_t bz3_bound(size_t input_size); 90 | 91 | /* ** HIGH LEVEL APIs ** */ 92 | 93 | /** 94 | * @brief Compress a frame. This function does not support parallelism 95 | * by itself, consider using the low level `bz3_encode_blocks()` function instead. 96 | * Using the low level API might provide better performance. 97 | * Returns a bzip3 error code; BZ3_OK when the operation is successful. 98 | * Make sure to set out_size to the size of the output buffer before the operation; 99 | * out_size must be at least equal to `bz3_bound(in_size)'. 100 | */ 101 | BZIP3_API int bz3_compress(uint32_t block_size, const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size); 102 | 103 | /** 104 | * @brief Decompress a frame. This function does not support parallelism 105 | * by itself, consider using the low level `bz3_decode_blocks()` function instead. 106 | * Using the low level API might provide better performance. 107 | * Returns a bzip3 error code; BZ3_OK when the operation is successful. 108 | * Make sure to set out_size to the size of the output buffer before the operation. 109 | */ 110 | BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size); 111 | 112 | /** 113 | * @brief Calculate the minimal memory required for compression with the given block size. 114 | * This includes all internal buffers and state structures. This calculates the amount of bytes 115 | * that will be allocated by a call to `bz3_new()`. 116 | * 117 | * @details Memory allocation and usage patterns: 118 | * 119 | * bz3_new(): 120 | * - Allocates all memory upfront: 121 | * - Core state structure (sizeof(struct bz3_state)) 122 | * - Swap buffer (bz3_bound(block_size) bytes) 123 | * - SAIS array (BWT_BOUND(block_size) * sizeof(int32_t) bytes) 124 | * - LZP lookup table ((1 << LZP_DICTIONARY) * sizeof(int32_t) bytes) 125 | * - Compression state (sizeof(state)) 126 | * - All memory remains allocated until bz3_free() 127 | * 128 | * Additional memory may be used depending on API used from here. 129 | * 130 | * # Low Level APIs 131 | * 132 | * 1. bz3_encode_block() / bz3_decode_block(): 133 | * - Uses pre-allocated memory from bz3_new() 134 | * - No additional memory allocation except for libsais (usually ~16KiB) 135 | * - Peak memory usage of physical RAM varies with compression stages: 136 | * - LZP: Uses LZP lookup table + swap buffer 137 | * - BWT: Uses SAIS array + swap buffer 138 | * - Entropy coding: Uses compression state (cm_state) + swap buffer 139 | * 140 | * Using the higher level API, `bz3_compress`, expect an additional allocation 141 | * of `bz3_bound(block_size)`. 142 | * 143 | * In the parallel version `bz3_encode_blocks`, each thread gets its own state, 144 | * so memory usage is `n_threads * bz3_compress_memory_needed()`. 145 | * 146 | * # High Level APIs 147 | * 148 | * 1. bz3_compress(): 149 | * - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes) 150 | * in addition to the memory amount returned by this method call and libsais. 151 | * - Everything is freed after compression completes 152 | * 153 | * 2. bz3_decompress(): 154 | * - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes) 155 | * in addition to the memory amount returned by this method call and libsais. 156 | * - Everything is freed after compression completes 157 | * 158 | * Memory remains constant during operation, with except of some small allocations from libsais during 159 | * BWT stage. That is not accounted by this function, though it usually amounts to ~16KiB, negligible. 160 | * The worst case of BWT is 2*block_size technically speaking. 161 | * 162 | * No dynamic (re)allocation occurs outside of that. 163 | * 164 | * @param block_size The block size to be used for compression 165 | * @return The total number of bytes required for compression, or 0 if block_size is invalid 166 | */ 167 | BZIP3_API size_t bz3_min_memory_needed(int32_t block_size); 168 | 169 | /* ** LOW LEVEL APIs ** */ 170 | 171 | /** 172 | * @brief Encode a single block. Returns the amount of bytes written to `buffer'. 173 | * `buffer' must be able to hold at least `bz3_bound(size)' bytes. The size must not 174 | * exceed the block size associated with the state. 175 | */ 176 | BZIP3_API int32_t bz3_encode_block(struct bz3_state * state, uint8_t * buffer, int32_t size); 177 | 178 | /** 179 | * @brief Decode a single block. 180 | * 181 | * `buffer' must be able to hold at least `bz3_bound(orig_size)' bytes 182 | * in order to ensure decompression will succeed for all possible bzip3 blocks. 183 | * 184 | * In most (but not all) cases, `orig_size` should usually be sufficient. 185 | * If it is not sufficient, you must allocate a buffer of size `bz3_bound(orig_size)` temporarily. 186 | * 187 | * If `buffer_size` is too small, `BZ3_ERR_DATA_SIZE_TOO_SMALL` will be returned. 188 | * The size must not exceed the block size associated with the state. 189 | * 190 | * @param buffer_size The size of the buffer at 'buffer' 191 | * @param compressed_size The size of the compressed data in 'buffer' 192 | * @param orig_size The original size of the data before compression. 193 | */ 194 | BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, size_t buffer_size, int32_t compressed_size, int32_t orig_size); 195 | 196 | /** 197 | * @brief Encode `n' blocks, all in parallel. 198 | * All specifics of the `bz3_encode_block' still hold. The function will launch a thread for each block. 199 | * The compressed sizes are written to the `sizes' array. Every buffer is overwritten and none of them can overlap. 200 | * Precisely `n' states, buffers and sizes must be supplied. 201 | * 202 | * Expects `n' between 2 and 16. 203 | * 204 | * Present in the shared library only if -lpthread was present during building. 205 | */ 206 | BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[], int32_t n); 207 | 208 | /** 209 | * @brief Decode `n' blocks, all in parallel. 210 | * Same specifics as `bz3_encode_blocks', but doesn't overwrite `sizes'. 211 | */ 212 | BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], size_t buffer_sizes[], int32_t sizes[], 213 | int32_t orig_sizes[], int32_t n); 214 | 215 | /** 216 | * @brief Check if using original file size as buffer size is sufficient for decompressing 217 | * a block at `block` pointer. 218 | * 219 | * @param block Pointer to the compressed block data 220 | * @param block_size Size of the block buffer in bytes (must be at least 13 bytes for header) 221 | * @param orig_size Size of the original uncompressed data 222 | * @return 1 if original size is sufficient, 0 if insufficient, -1 on header error (insufficient buffer size) 223 | * 224 | * @remarks 225 | * 226 | * This function is useful for external APIs using the low level block encoding API, 227 | * `bz3_encode_block`. You would normally call this directly after `bz3_encode_block` 228 | * on the block that has been output. 229 | * 230 | * The purpose of this function is to prevent encoding blocks that would require an additional 231 | * malloc at decompress time. 232 | * The goal is to prevent erroring with `BZ3_ERR_DATA_SIZE_TOO_SMALL`, thus 233 | * in turn 234 | */ 235 | BZIP3_API int bz3_orig_size_sufficient_for_decode(const uint8_t * block, size_t block_size, int32_t orig_size); 236 | 237 | 238 | #ifdef __cplusplus 239 | } /* extern "C" */ 240 | #endif 241 | 242 | #endif 243 | -------------------------------------------------------------------------------- /include/yarg.h: -------------------------------------------------------------------------------- 1 | /* Written by Kamila Szewczyk (kspalaiologos@gmail.com) */ 2 | 3 | #ifndef _YARG_H 4 | #define _YARG_H 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | typedef enum { 14 | no_argument, 15 | required_argument, 16 | optional_argument 17 | } yarg_arg_type; 18 | 19 | typedef struct { 20 | int opt; 21 | yarg_arg_type type; 22 | const char * long_opt; 23 | } yarg_options; 24 | 25 | typedef enum { 26 | YARG_STYLE_WINDOWS, 27 | YARG_STYLE_UNIX, 28 | YARG_STYLE_UNIX_SHORT 29 | } yarg_style; 30 | 31 | typedef struct { 32 | bool dash_dash; 33 | yarg_style style; 34 | } yarg_settings; 35 | 36 | typedef struct { 37 | int opt; 38 | const char * long_opt; 39 | char * arg; 40 | } yarg_option; 41 | 42 | typedef struct { 43 | yarg_option * args; 44 | int argc; 45 | char ** pos_args; 46 | int pos_argc; 47 | char * error; 48 | } yarg_result; 49 | 50 | static const char yarg_oom[] = "Out of memory"; 51 | static int yarg_asprintf(char ** strp, const char * fmt, ...) { 52 | if (fmt == yarg_oom) 53 | goto use_yarg_oom; 54 | va_list ap; 55 | va_start(ap, fmt); 56 | int len = vsnprintf(NULL, 0, fmt, ap); 57 | va_end(ap); 58 | if (len < 0) 59 | goto use_yarg_oom; 60 | *strp = (char *) malloc(len + 1); 61 | if (!*strp) 62 | goto use_yarg_oom; 63 | va_start(ap, fmt); 64 | len = vsnprintf(*strp, len + 1, fmt, ap); 65 | va_end(ap); 66 | return len; 67 | 68 | use_yarg_oom: 69 | *strp = (char *)yarg_oom; 70 | return sizeof(yarg_oom); 71 | } 72 | 73 | static char * yarg_strdup(const char * str) { 74 | char * new_str = (char *) calloc(strlen(str) + 1, 1); 75 | if (!new_str) return NULL; 76 | strcpy(new_str, str); 77 | return new_str; 78 | } 79 | 80 | static int yarg_parse_unix(int argc, char * argv[], yarg_options opt[], 81 | yarg_result * res, bool dash_dash) { 82 | int no_args = 0, no_pos_args = 0; 83 | for (int i = 1; i < argc; i++) { 84 | if (argv[i][0] == '-') { 85 | if (argv[i][1] == '-') { 86 | if (dash_dash && argv[i][2] == '\0') 87 | { no_pos_args += argc - i - 1; break; } 88 | char * long_opt = argv[i] + 2; yarg_options * o = NULL; 89 | int len = 0; while (long_opt[len] && long_opt[len] != '=') len++; 90 | for (int j = 0; opt[j].opt; j++) 91 | if (opt[j].long_opt && !strncmp(opt[j].long_opt, long_opt, len)) 92 | { o = &opt[j]; break; } 93 | if (!o) { 94 | yarg_asprintf(&res->error, "--%.*s -- unknown option\n", len, long_opt); 95 | return 0; 96 | } 97 | if (o->type == required_argument) { 98 | if (long_opt[len] == '=') { 99 | // Ignore. 100 | } else if (argv[i + 1] && argv[i + 1][0] != '-') { 101 | i++; 102 | } else { 103 | yarg_asprintf(&res->error, "--%s -- missing argument\n", o->long_opt); 104 | return 0; 105 | } 106 | } else if (o->type == optional_argument) { 107 | if (long_opt[len] == '=') { 108 | } else if (argv[i + 1] && argv[i + 1][0] != '-') { 109 | i++; 110 | } 111 | } 112 | no_args++; 113 | } else { 114 | for (int j = 1; argv[i][j]; j++) { 115 | char c = argv[i][j]; yarg_options * o = NULL; 116 | for (int k = 0; opt[k].opt; k++) 117 | if (opt[k].opt == c) 118 | { o = &opt[k]; break; } 119 | if (!o) { 120 | yarg_asprintf(&res->error, "-%c -- unknown option\n", c); 121 | return 0; 122 | } 123 | if (o->type == required_argument) { 124 | if (argv[i][j + 1]) { 125 | // Ignore. 126 | } else if (argv[i + 1] && argv[i + 1][0] != '-') { 127 | i++; 128 | } else { 129 | yarg_asprintf(&res->error, "-%c -- missing argument\n", c); 130 | return 0; 131 | } 132 | no_args++; 133 | break; 134 | } else if(o->type == optional_argument) { 135 | if (argv[i][j + 1]) { 136 | // Ignore. 137 | no_args++; 138 | break; 139 | } else if (argv[i + 1] && argv[i + 1][0] != '-') { 140 | i++; 141 | no_args++; 142 | break; 143 | } 144 | } 145 | no_args++; 146 | } 147 | } 148 | } else no_pos_args++; 149 | } 150 | 151 | res->args = (yarg_option *) calloc((no_args + 1) * sizeof(yarg_option), 1); 152 | res->pos_args = (char **) calloc((no_pos_args + 1) * sizeof(char *), 1); 153 | if(!res->args || !res->pos_args) { 154 | yarg_asprintf(&res->error, yarg_oom); 155 | return 0; 156 | } 157 | 158 | for (int i = 1; i < argc; i++) { 159 | if (argv[i][0] == '-') { 160 | if (argv[i][1] == '-') { 161 | if (dash_dash && argv[i][2] == '\0') { 162 | for (int j = i + 1; j < argc; j++) 163 | if(!(res->pos_args[res->pos_argc++] = yarg_strdup(argv[j]))) { 164 | yarg_asprintf(&res->error, yarg_oom); 165 | return 0; 166 | } 167 | break; 168 | } 169 | char * long_opt = argv[i] + 2; yarg_options * o = NULL; 170 | int len = 0; while (long_opt[len] && long_opt[len] != '=') len++; 171 | for (int j = 0; opt[j].opt; j++) 172 | if (opt[j].long_opt && !strncmp(opt[j].long_opt, long_opt, len)) 173 | { o = &opt[j]; break; } 174 | res->args[res->argc].opt = o->opt; 175 | res->args[res->argc].long_opt = o->long_opt; 176 | if (o->type == required_argument || o->type == optional_argument) { 177 | if (long_opt[len] == '=') { 178 | if(!(res->args[res->argc].arg = yarg_strdup(long_opt + len + 1))) { 179 | yarg_asprintf(&res->error, yarg_oom); 180 | return 0; 181 | } 182 | } else if (argv[i + 1] && argv[i + 1][0] != '-') { 183 | if(!(res->args[res->argc].arg = yarg_strdup(argv[++i]))) { 184 | yarg_asprintf(&res->error, yarg_oom); 185 | return 0; 186 | } 187 | } 188 | } 189 | res->argc++; 190 | } else { 191 | for (int j = 1; argv[i][j]; j++) { 192 | char c = argv[i][j]; yarg_options * o = NULL; 193 | for (int k = 0; opt[k].opt; k++) 194 | if (opt[k].opt == c) 195 | { o = &opt[k]; break; } 196 | if (!o) { 197 | yarg_asprintf(&res->error, "-%c -- unknown option\n", c); 198 | return 0; 199 | } 200 | res->args[res->argc].opt = c; 201 | res->args[res->argc].long_opt = o->long_opt; 202 | if (o->type == required_argument || o->type == optional_argument) { 203 | if (argv[i][j + 1]) { 204 | if(!(res->args[res->argc++].arg = yarg_strdup(argv[i] + j + 1))) { 205 | yarg_asprintf(&res->error, yarg_oom); 206 | return 0; 207 | } 208 | break; 209 | } else if (argv[i + 1] && argv[i + 1][0] != '-') { 210 | if(!(res->args[res->argc++].arg = yarg_strdup(argv[++i]))) { 211 | yarg_asprintf(&res->error, yarg_oom); 212 | return 0; 213 | } 214 | break; 215 | } 216 | } 217 | res->argc++; 218 | } 219 | } 220 | } else if(!(res->pos_args[res->pos_argc++] = yarg_strdup(argv[i]))) { 221 | yarg_asprintf(&res->error, yarg_oom); 222 | return 0; 223 | } 224 | } 225 | 226 | return 1; 227 | } 228 | 229 | static int yarg_parse_unix_short(int argc, char * argv[], yarg_options opt[], 230 | yarg_result * res, bool dash_dash, char opt_char) { 231 | int no_args = 0, no_pos_args = 0; 232 | for (int i = 1; i < argc; i++) { 233 | if (argv[i][0] == opt_char) { 234 | if (dash_dash && argv[i][1] == '\0') { 235 | no_pos_args += argc - i - 1; 236 | break; 237 | } 238 | char * long_opt = argv[i] + 1; yarg_options * o = NULL; 239 | int len = 0; while (long_opt[len] && long_opt[len] != '=') len++; 240 | for (int j = 0; opt[j].opt; j++) 241 | if (opt[j].long_opt && !strncmp(opt[j].long_opt, long_opt, len)) 242 | { o = &opt[j]; break; } 243 | if (!o) { 244 | yarg_asprintf(&res->error, "%c%.*s -- unknown option\n", opt_char, len, long_opt); 245 | return 0; 246 | } 247 | if (o->type == required_argument) { 248 | if (long_opt[len] == '=') { 249 | // Ignore. 250 | } else if (argv[i + 1] && argv[i + 1][0] != opt_char) { 251 | i++; 252 | } else { 253 | yarg_asprintf(&res->error, "%c%s -- missing argument\n", opt_char, o->long_opt); 254 | return 0; 255 | } 256 | } else if (o->type == optional_argument) { 257 | if (long_opt[len] == '=') { 258 | // Ignore. 259 | } else if (argv[i + 1] && argv[i + 1][0] != opt_char) { 260 | i++; 261 | } 262 | } 263 | no_args++; 264 | } else no_pos_args++; 265 | } 266 | 267 | res->args = (yarg_option *) calloc((no_args + 1) * sizeof(yarg_option), 1); 268 | res->pos_args = (char **) calloc((no_pos_args + 1) * sizeof(char *), 1); 269 | if (!res->args || !res->pos_args) { 270 | yarg_asprintf(&res->error, yarg_oom); 271 | return 0; 272 | } 273 | 274 | for (int i = 1; i < argc; i++) { 275 | if (argv[i][0] == opt_char) { 276 | if (dash_dash && argv[i][1] == '\0') { 277 | for (int j = i + 1; j < argc; j++) 278 | if(!(res->pos_args[res->pos_argc++] = yarg_strdup(argv[j]))) { 279 | yarg_asprintf(&res->error, yarg_oom); 280 | return 0; 281 | } 282 | break; 283 | } 284 | char * long_opt = argv[i] + 1; yarg_options * o = NULL; 285 | int len = 0; while (long_opt[len] && long_opt[len] != '=') len++; 286 | for (int j = 0; opt[j].opt; j++) 287 | if (opt[j].long_opt && !strncmp(opt[j].long_opt, long_opt, len)) 288 | { o = &opt[j]; break; } 289 | res->args[res->argc].opt = o->opt; 290 | res->args[res->argc].long_opt = o->long_opt; 291 | if (o->type == required_argument || o->type == optional_argument) { 292 | if (long_opt[len] == '=') { 293 | if(!(res->args[res->argc].arg = yarg_strdup(long_opt + len + 1))) { 294 | yarg_asprintf(&res->error, yarg_oom); 295 | return 0; 296 | } 297 | } else if (argv[i + 1] && argv[i + 1][0] != opt_char) { 298 | if(!(res->args[res->argc].arg = yarg_strdup(argv[++i]))) { 299 | yarg_asprintf(&res->error, yarg_oom); 300 | return 0; 301 | } 302 | } 303 | } 304 | res->argc++; 305 | } else if(!(res->pos_args[res->pos_argc++] = yarg_strdup(argv[i]))) { 306 | yarg_asprintf(&res->error, yarg_oom); 307 | return 0; 308 | } 309 | } 310 | 311 | return 1; 312 | } 313 | 314 | void yarg_destroy(yarg_result * r) { 315 | if(r) { 316 | if(r->args) { 317 | for (int i = 0; i < r->argc; i++) { 318 | free(r->args[i].arg); 319 | } 320 | } 321 | free(r->args); 322 | if(r->pos_args) { 323 | for (int i = 0; i < r->pos_argc; i++) { 324 | free(r->pos_args[i]); 325 | } 326 | } 327 | free(r->pos_args); 328 | if (r->error != yarg_oom) 329 | free(r->error); 330 | } 331 | free(r); 332 | } 333 | 334 | yarg_result * yarg_parse(int argc, char * argv[], yarg_options opt[], yarg_settings settings) { 335 | yarg_result * res = (yarg_result *) calloc(sizeof(yarg_result), 1); 336 | if (!res) return NULL; 337 | switch (settings.style) { 338 | case YARG_STYLE_WINDOWS: 339 | yarg_parse_unix_short(argc, argv, opt, res, false, '/'); 340 | break; 341 | case YARG_STYLE_UNIX: 342 | yarg_parse_unix(argc, argv, opt, res, settings.dash_dash); 343 | break; 344 | case YARG_STYLE_UNIX_SHORT: 345 | yarg_parse_unix_short(argc, argv, opt, res, settings.dash_dash, '-'); 346 | break; 347 | } 348 | return res; 349 | } 350 | 351 | #endif 352 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * BZip3 - A spiritual successor to BZip2. 4 | * Copyright (C) 2022-2024 Kamila Szewczyk 5 | * 6 | * This program is free software: you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation, either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, but WITHOUT 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 | * more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License along with 17 | * this program. If not, see . 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #if defined __MSVCRT__ 31 | #include 32 | #include 33 | #endif 34 | 35 | #include "common.h" 36 | #include "libbz3.h" 37 | #include "yarg.h" 38 | 39 | #define MODE_DECODE 0 40 | #define MODE_ENCODE 1 41 | #define MODE_TEST 2 42 | #define MODE_RECOVER 3 43 | 44 | static void version() { 45 | fprintf(stdout, "bzip3 " VERSION 46 | "\n" 47 | "Copyright (C) by Kamila Szewczyk, 2022-2023.\n" 48 | "License: GNU Lesser GPL version 3 \n"); 49 | } 50 | 51 | static void help() { 52 | fprintf(stdout, 53 | "bzip3 - better and stronger spiritual successor to bzip2.\n" 54 | "Usage: bzip3 [-e/-z/-d/-t/-c/-h/-V] [-b block_size] [-j jobs] files...\n" 55 | "Operations:\n" 56 | " -e/-z, --encode compress data (default)\n" 57 | " -d, --decode decompress data\n" 58 | " -r, --recover attempt at recovering corrupted data\n" 59 | " -t, --test verify validity of compressed data\n" 60 | " -h, --help display an usage overview\n" 61 | " -f, --force force overwriting output if it already exists\n" 62 | " --rm remove input files after successful (de)compression\n" 63 | " -k, --keep keep (don't delete) input files (default)\n" 64 | " -v, --verbose verbose mode (display more information)\n" 65 | " -V, --version display version information\n" 66 | "Extra flags:\n" 67 | " -c, --stdout force writing to standard output\n" 68 | " -b N, --block=N set block size in MiB {16}\n" 69 | " -B, --batch process all files specified as inputs\n" 70 | #ifdef PTHREAD 71 | " -j N, --jobs=N set the amount of parallel threads\n" 72 | #endif 73 | "\n" 74 | "Report bugs to: https://github.com/kspalaiologos/bzip3\n"); 75 | } 76 | 77 | static void xwrite(const void * data, size_t size, size_t len, FILE * des) { 78 | if (len == 0 || size == 0) return; 79 | if (fwrite(data, size, len, des) != len) { 80 | fprintf(stderr, "Write error: %s\n", strerror(errno)); 81 | exit(1); 82 | } 83 | } 84 | 85 | /* Read any amount of items (from 0 to len) as long as there is no error */ 86 | static size_t xread(void * data, size_t size, size_t len, FILE * des) { 87 | size_t written = fread(data, size, len, des); 88 | if (ferror(des)) { 89 | fprintf(stderr, "Read error: %s\n", strerror(errno)); 90 | exit(1); 91 | } 92 | return written; 93 | } 94 | 95 | /* Either read 0 (due to eof) items or exactly len items */ 96 | static size_t xread_eofcheck(void * data, size_t size, size_t len, FILE * des) { 97 | size_t written = xread(data, size, len, des); 98 | /* feof will be true */ 99 | if (!written) return 0; 100 | if (feof(des)) { 101 | fprintf(stderr, "Error: Corrupt file\n"); 102 | exit(1); 103 | } 104 | return written; 105 | } 106 | 107 | /* Always read len items */ 108 | static void xread_noeof(void * data, size_t size, size_t len, FILE * des) { 109 | if (!xread_eofcheck(data, size, len, des)) { 110 | fprintf(stderr, "Error: Corrupt file\n"); 111 | exit(1); 112 | } 113 | } 114 | 115 | static void close_out_file(FILE * des) { 116 | if (des) { 117 | int outfd = fileno(des); 118 | 119 | if (fflush(des)) { 120 | fprintf(stderr, "Error: Failed on fflush: %s\n", strerror(errno)); 121 | exit(1); 122 | } 123 | 124 | #ifdef __linux__ 125 | while (1) { 126 | int status = fsync(outfd); 127 | if (status == -1) { 128 | if (errno == EINVAL) break; 129 | if (errno == EINTR) continue; 130 | fprintf(stderr, "Error: Failed on fsync: %s\n", strerror(errno)); 131 | exit(1); 132 | } 133 | break; 134 | } 135 | #endif 136 | 137 | if (des != stdout && fclose(des)) { 138 | fprintf(stderr, "Error: Failed on fclose: %s\n", strerror(errno)); 139 | exit(1); 140 | } 141 | } 142 | } 143 | 144 | static void remove_in_file(char * file_name, FILE * output_des) { 145 | if (file_name == NULL) { 146 | return; 147 | } 148 | if (output_des == stdout) { 149 | return; 150 | } 151 | if (remove(file_name)) { 152 | fprintf(stderr, "Error: failed to remove input file `%s': %s\n", file_name, strerror(errno)); 153 | exit(1); 154 | } 155 | } 156 | 157 | static int process(FILE * input_des, FILE * output_des, int mode, int block_size, int workers, int verbose, 158 | char * file_name) { 159 | uint64_t bytes_read = 0, bytes_written = 0; 160 | 161 | if ((mode == MODE_ENCODE && isatty(fileno(output_des))) || 162 | ((mode == MODE_DECODE || mode == MODE_TEST || mode == MODE_RECOVER) && isatty(fileno(input_des)))) { 163 | fprintf(stderr, "Refusing to read/write binary data from/to the terminal.\n"); 164 | return 1; 165 | } 166 | 167 | // Reset errno after the isatty() call. 168 | errno = 0; 169 | 170 | u8 byteswap_buf[4]; 171 | 172 | switch (mode) { 173 | case MODE_ENCODE: 174 | xwrite("BZ3v1", 5, 1, output_des); 175 | 176 | write_neutral_s32(byteswap_buf, block_size); 177 | xwrite(byteswap_buf, 4, 1, output_des); 178 | 179 | bytes_written += 9; 180 | break; 181 | case MODE_RECOVER: 182 | case MODE_DECODE: 183 | case MODE_TEST: { 184 | char signature[5]; 185 | 186 | if (xread(signature, 5, 1, input_des) != 1 || strncmp(signature, "BZ3v1", 5) != 0) { 187 | fprintf(stderr, "Invalid signature.\n"); 188 | return 1; 189 | } 190 | 191 | xread_noeof(byteswap_buf, 4, 1, input_des); 192 | 193 | block_size = read_neutral_s32(byteswap_buf); 194 | 195 | if (block_size < KiB(65) || block_size > MiB(511)) { 196 | fprintf(stderr, 197 | "The input file is corrupted. Reason: Invalid block " 198 | "size in the header.\n"); 199 | if (mode == MODE_RECOVER) { 200 | fprintf(stderr, "Recovery mode: Proceeding.\n"); 201 | block_size = MiB(511); 202 | } else { 203 | return 1; 204 | } 205 | } 206 | 207 | bytes_read += 9; 208 | break; 209 | } 210 | } 211 | 212 | #ifdef PTHREAD 213 | if (workers > 64 || workers < 0) { 214 | fprintf(stderr, "Number of workers must be between 0 and 64.\n"); 215 | return 1; 216 | } 217 | 218 | if (workers <= 1) { 219 | #endif 220 | struct bz3_state * state = bz3_new(block_size); 221 | 222 | if (state == NULL) { 223 | fprintf(stderr, "Failed to create a block encoder state.\n"); 224 | return 1; 225 | } 226 | 227 | size_t buffer_size = bz3_bound(block_size); 228 | u8 * buffer = malloc(buffer_size); 229 | 230 | if (!buffer) { 231 | fprintf(stderr, "Failed to allocate memory.\n"); 232 | return 1; 233 | } 234 | 235 | if (mode == MODE_ENCODE) { 236 | s32 read_count; 237 | while (!feof(input_des)) { 238 | read_count = xread(buffer, 1, block_size, input_des); 239 | bytes_read += read_count; 240 | 241 | if (read_count == 0) break; 242 | 243 | s32 new_size = bz3_encode_block(state, buffer, read_count); 244 | if (new_size == -1) { 245 | fprintf(stderr, "Failed to encode a block: %s\n", bz3_strerror(state)); 246 | return 1; 247 | } 248 | 249 | write_neutral_s32(byteswap_buf, new_size); 250 | xwrite(byteswap_buf, 4, 1, output_des); 251 | write_neutral_s32(byteswap_buf, read_count); 252 | xwrite(byteswap_buf, 4, 1, output_des); 253 | xwrite(buffer, new_size, 1, output_des); 254 | bytes_written += 8 + new_size; 255 | } 256 | fflush(output_des); 257 | } else if (mode == MODE_DECODE) { 258 | s32 new_size, old_size; 259 | while (!feof(input_des)) { 260 | if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) continue; 261 | 262 | new_size = read_neutral_s32(byteswap_buf); 263 | xread_noeof(&byteswap_buf, 1, 4, input_des); 264 | old_size = read_neutral_s32(byteswap_buf); 265 | if (old_size > bz3_bound(block_size) || new_size > bz3_bound(block_size)) { 266 | fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); 267 | return 1; 268 | } 269 | xread_noeof(buffer, 1, new_size, input_des); 270 | bytes_read += 8 + new_size; 271 | if (bz3_decode_block(state, buffer, buffer_size, new_size, old_size) == -1) { 272 | fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state)); 273 | return 1; 274 | } 275 | xwrite(buffer, old_size, 1, output_des); 276 | bytes_written += old_size; 277 | } 278 | fflush(output_des); 279 | } else if (mode == MODE_RECOVER) { 280 | s32 new_size, old_size; 281 | while (!feof(input_des)) { 282 | if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) continue; 283 | 284 | new_size = read_neutral_s32(byteswap_buf); 285 | xread_noeof(&byteswap_buf, 1, 4, input_des); 286 | old_size = read_neutral_s32(byteswap_buf); 287 | if (old_size > bz3_bound(block_size) || new_size > bz3_bound(block_size)) { 288 | fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); 289 | return 1; 290 | } 291 | xread_noeof(buffer, 1, new_size, input_des); 292 | bytes_read += 8 + new_size; 293 | if (bz3_decode_block(state, buffer, buffer_size, new_size, old_size) == -1) { 294 | fprintf(stderr, "Writing invalid block: %s\n", bz3_strerror(state)); 295 | } 296 | xwrite(buffer, old_size, 1, output_des); 297 | bytes_written += old_size; 298 | } 299 | fflush(output_des); 300 | } else if (mode == MODE_TEST) { 301 | s32 new_size, old_size; 302 | while (!feof(input_des)) { 303 | if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) continue; 304 | new_size = read_neutral_s32(byteswap_buf); 305 | xread_noeof(&byteswap_buf, 1, 4, input_des); 306 | old_size = read_neutral_s32(byteswap_buf); 307 | if (old_size > bz3_bound(block_size) || new_size > bz3_bound(block_size)) { 308 | fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); 309 | return 1; 310 | } 311 | xread_noeof(buffer, 1, new_size, input_des); 312 | bytes_read += 8 + new_size; 313 | bytes_written += old_size; 314 | if (bz3_decode_block(state, buffer, buffer_size, new_size, old_size) == -1) { 315 | fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state)); 316 | return 1; 317 | } 318 | } 319 | } 320 | 321 | if (bz3_last_error(state) != BZ3_OK && mode != MODE_RECOVER) { 322 | fprintf(stderr, "Failed to read data: %s\n", bz3_strerror(state)); 323 | return 1; 324 | } 325 | 326 | free(buffer); 327 | 328 | bz3_free(state); 329 | #ifdef PTHREAD 330 | } else { 331 | struct bz3_state * states[workers]; 332 | u8 * buffers[workers]; 333 | s32 sizes[workers]; 334 | size_t buffer_sizes[workers]; 335 | s32 old_sizes[workers]; 336 | for (s32 i = 0; i < workers; i++) { 337 | states[i] = bz3_new(block_size); 338 | if (states[i] == NULL) { 339 | fprintf(stderr, "Failed to create a block encoder state.\n"); 340 | return 1; 341 | } 342 | size_t buffer_size = bz3_bound(block_size); 343 | buffer_sizes[i] = buffer_size; 344 | buffers[i] = malloc(buffer_size); 345 | if (!buffers[i]) { 346 | fprintf(stderr, "Failed to allocate memory.\n"); 347 | return 1; 348 | } 349 | } 350 | 351 | if (mode == MODE_ENCODE) { 352 | while (!feof(input_des)) { 353 | s32 i = 0; 354 | for (; i < workers; i++) { 355 | size_t read_count = xread(buffers[i], 1, block_size, input_des); 356 | bytes_read += read_count; 357 | sizes[i] = old_sizes[i] = read_count; 358 | if (read_count < block_size) { 359 | i++; 360 | break; 361 | } 362 | } 363 | bz3_encode_blocks(states, buffers, sizes, i); 364 | for (s32 j = 0; j < i; j++) { 365 | if (bz3_last_error(states[j]) != BZ3_OK) { 366 | fprintf(stderr, "Failed to encode data: %s\n", bz3_strerror(states[j])); 367 | return 1; 368 | } 369 | } 370 | for (s32 j = 0; j < i; j++) { 371 | write_neutral_s32(byteswap_buf, sizes[j]); 372 | xwrite(byteswap_buf, 4, 1, output_des); 373 | write_neutral_s32(byteswap_buf, old_sizes[j]); 374 | xwrite(byteswap_buf, 4, 1, output_des); 375 | xwrite(buffers[j], sizes[j], 1, output_des); 376 | bytes_written += 8 + sizes[j]; 377 | } 378 | } 379 | fflush(output_des); 380 | } else if (mode == MODE_DECODE) { 381 | while (!feof(input_des)) { 382 | s32 i = 0; 383 | for (; i < workers; i++) { 384 | if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) break; 385 | sizes[i] = read_neutral_s32(byteswap_buf); 386 | xread_noeof(&byteswap_buf, 1, 4, input_des); 387 | old_sizes[i] = read_neutral_s32(byteswap_buf); 388 | if (old_sizes[i] > bz3_bound(block_size) || sizes[i] > bz3_bound(block_size)) { 389 | fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); 390 | return 1; 391 | } 392 | xread_noeof(buffers[i], 1, sizes[i], input_des); 393 | bytes_read += 8 + sizes[i]; 394 | } 395 | bz3_decode_blocks(states, buffers, buffer_sizes, sizes, old_sizes, i); 396 | for (s32 j = 0; j < i; j++) { 397 | if (bz3_last_error(states[j]) != BZ3_OK) { 398 | fprintf(stderr, "Failed to decode data: %s\n", bz3_strerror(states[j])); 399 | return 1; 400 | } 401 | } 402 | for (s32 j = 0; j < i; j++) { 403 | xwrite(buffers[j], old_sizes[j], 1, output_des); 404 | bytes_written += old_sizes[j]; 405 | } 406 | } 407 | fflush(output_des); 408 | } else if (mode == MODE_RECOVER) { 409 | while (!feof(input_des)) { 410 | s32 i = 0; 411 | for (; i < workers; i++) { 412 | if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) break; 413 | sizes[i] = read_neutral_s32(byteswap_buf); 414 | xread_noeof(&byteswap_buf, 1, 4, input_des); 415 | old_sizes[i] = read_neutral_s32(byteswap_buf); 416 | if (old_sizes[i] > bz3_bound(block_size) || sizes[i] > bz3_bound(block_size)) { 417 | fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); 418 | return 1; 419 | } 420 | xread_noeof(buffers[i], 1, sizes[i], input_des); 421 | bytes_read += 8 + sizes[i]; 422 | } 423 | bz3_decode_blocks(states, buffers, buffer_sizes, sizes, old_sizes, i); 424 | for (s32 j = 0; j < i; j++) { 425 | if (bz3_last_error(states[j]) != BZ3_OK) { 426 | fprintf(stderr, "Writing invalid block: %s\n", bz3_strerror(states[j])); 427 | } 428 | } 429 | for (s32 j = 0; j < i; j++) { 430 | xwrite(buffers[j], old_sizes[j], 1, output_des); 431 | bytes_written += old_sizes[j]; 432 | } 433 | } 434 | fflush(output_des); 435 | } else if (mode == MODE_TEST) { 436 | while (!feof(input_des)) { 437 | s32 i = 0; 438 | for (; i < workers; i++) { 439 | if (!xread_eofcheck(&byteswap_buf, 1, 4, input_des)) break; 440 | sizes[i] = read_neutral_s32(byteswap_buf); 441 | xread_noeof(&byteswap_buf, 1, 4, input_des); 442 | old_sizes[i] = read_neutral_s32(byteswap_buf); 443 | if (old_sizes[i] > bz3_bound(block_size) || sizes[i] > bz3_bound(block_size)) { 444 | fprintf(stderr, "Failed to decode a block: Inconsistent headers.\n"); 445 | return 1; 446 | } 447 | xread_noeof(buffers[i], 1, sizes[i], input_des); 448 | bytes_read += 8 + sizes[i]; 449 | bytes_written += old_sizes[i]; 450 | } 451 | bz3_decode_blocks(states, buffers, buffer_sizes, sizes, old_sizes, i); 452 | for (s32 j = 0; j < i; j++) { 453 | if (bz3_last_error(states[j]) != BZ3_OK) { 454 | fprintf(stderr, "Failed to decode data: %s\n", bz3_strerror(states[j])); 455 | return 1; 456 | } 457 | } 458 | } 459 | } 460 | 461 | for (s32 i = 0; i < workers; i++) { 462 | free(buffers[i]); 463 | bz3_free(states[i]); 464 | } 465 | } 466 | #endif 467 | 468 | if (verbose) { 469 | if (file_name) fprintf(stderr, " %s:", file_name); 470 | if (mode == MODE_ENCODE) 471 | fprintf(stderr, "\t%" PRIu64 " -> %" PRIu64 " bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, 472 | (double)bytes_written * 100.0 / bytes_read, (double)bytes_written * 8.0 / bytes_read); 473 | else if (mode == MODE_DECODE) 474 | fprintf(stderr, "\t%" PRIu64 " -> %" PRIu64 " bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, 475 | (double)bytes_read * 100.0 / bytes_written, (double)bytes_read * 8.0 / bytes_written); 476 | else 477 | fprintf(stderr, "\tOK, %" PRIu64 " -> %" PRIu64 " bytes, %.2f%%, %.2f bpb\n", bytes_read, bytes_written, 478 | (double)bytes_read * 100.0 / bytes_written, (double)bytes_read * 8.0 / bytes_written); 479 | } 480 | 481 | return 0; 482 | } 483 | 484 | static int is_dir(const char * path) { 485 | struct stat sb; 486 | if (stat(path, &sb) == 0 && S_ISDIR(sb.st_mode)) return 1; 487 | return 0; 488 | } 489 | 490 | static int is_numeric(const char * str) { 491 | for (; *str; str++) 492 | if (!isdigit(*str)) return 0; 493 | return 1; 494 | } 495 | 496 | static FILE * open_output(char * output, int force) { 497 | FILE * output_des = NULL; 498 | 499 | if (output != NULL) { 500 | if (is_dir(output)) { 501 | fprintf(stderr, "Error: output file `%s' is a directory.\n", output); 502 | exit(1); 503 | } 504 | 505 | if (access(output, F_OK) == 0) { 506 | if (!force) { 507 | fprintf(stderr, "Error: output file `%s' already exists. Use -f to force overwrite.\n", output); 508 | exit(1); 509 | } 510 | } 511 | 512 | output_des = fopen(output, "wb"); 513 | if (output_des == NULL) { 514 | fprintf(stderr, "Error: failed to open output file `%s': %s\n", output, strerror(errno)); 515 | exit(1); 516 | } 517 | } else { 518 | output_des = stdout; 519 | } 520 | 521 | return output_des; 522 | } 523 | 524 | static FILE * open_input(char * input) { 525 | FILE * input_des = NULL; 526 | 527 | if (input != NULL) { 528 | if (is_dir(input)) { 529 | fprintf(stderr, "Error: input `%s' is a directory.\n", input); 530 | exit(1); 531 | } 532 | 533 | input_des = fopen(input, "rb"); 534 | if (input_des == NULL) { 535 | fprintf(stderr, "Error: failed to open input file `%s': %s\n", input, strerror(errno)); 536 | exit(1); 537 | } 538 | } else { 539 | input_des = stdin; 540 | } 541 | 542 | return input_des; 543 | } 544 | 545 | int main(int argc, char * argv[]) { 546 | int mode = MODE_ENCODE; 547 | 548 | // input and output file names 549 | char *input = NULL, *output = NULL; 550 | char *f1 = NULL, *f2 = NULL; 551 | int force = 0; 552 | 553 | // command line arguments 554 | int force_stdstreams = 0, workers = 0, batch = 0, verbose = 0, remove_input_file = 0; 555 | 556 | // the block size 557 | u32 block_size = MiB(16); 558 | 559 | enum { RM_OPTION = CHAR_MAX + 1 }; 560 | 561 | yarg_options opt[] = { 562 | { 'e', no_argument, "encode" }, 563 | { 'z', no_argument, "encode" }, /* alias */ 564 | { 'd', no_argument, "decode" }, 565 | { 't', no_argument, "test" }, 566 | { 'c', no_argument, "stdout" }, 567 | { 'f', no_argument, "force" }, 568 | { 'r', no_argument, "recover" }, 569 | { 'h', no_argument, "help" }, 570 | { RM_OPTION, no_argument, "rm" }, 571 | { 'k', no_argument, "keep" }, 572 | { 'V', no_argument, "version" }, 573 | { 'v', no_argument, "verbose" }, 574 | { 'b', required_argument, "block" }, 575 | { 'B', no_argument, "batch" }, 576 | #ifdef PTHREAD 577 | { 'j', required_argument, "jobs" }, 578 | #endif 579 | { 0, no_argument, NULL } 580 | }; 581 | yarg_settings settings = { 582 | .dash_dash = true, 583 | .style = YARG_STYLE_UNIX, 584 | }; 585 | yarg_result * res = yarg_parse(argc, argv, opt, settings); 586 | if (!res) { 587 | fprintf(stderr, "bzip3: out of memory.\n"); 588 | return 1; 589 | } 590 | if (res->error) { 591 | fputs(res->error, stderr); 592 | fputs("Try 'bzip3 --help' for more information.\n", stderr); 593 | return 1; 594 | } 595 | // `res' is not freed later on as it has the approximate lifetime 596 | // equal to the lifetime of the program overall. 597 | for (int i = 0; i < res->argc; i++) { 598 | switch(res->args[i].opt) { 599 | case 'e': case 'z': mode = MODE_ENCODE; break; 600 | case 'd': mode = MODE_DECODE; break; 601 | case 'r': mode = MODE_RECOVER; break; 602 | case 't': mode = MODE_TEST; break; 603 | case 'c': force_stdstreams = 1; break; 604 | case 'f': force = 1; break; 605 | case RM_OPTION: remove_input_file = 1; break; 606 | case 'k': break; 607 | case 'h': help(); return 0; 608 | case 'V': version(); return 0; 609 | case 'B': batch = 1; break; 610 | case 'v': verbose = 1; break; 611 | case 'b': 612 | if (!is_numeric(res->args[i].arg)) { 613 | fprintf(stderr, "bzip3: invalid block size: %s\n", res->args[i].arg); 614 | return 1; 615 | } 616 | block_size = MiB(atoi(res->args[i].arg)); 617 | break; 618 | #ifdef PTHREAD 619 | case 'j': 620 | if (!is_numeric(res->args[i].arg)) { 621 | fprintf(stderr, "bzip3: invalid amount of jobs: %s\n", res->args[i].arg); 622 | return 1; 623 | } 624 | workers = atoi(res->args[i].arg); 625 | break; 626 | #endif 627 | } 628 | } 629 | 630 | #if defined(__MSVCRT__) 631 | setmode(STDIN_FILENO, O_BINARY); 632 | setmode(STDOUT_FILENO, O_BINARY); 633 | #endif 634 | 635 | if (block_size < KiB(65) || block_size > MiB(511)) { 636 | fprintf(stderr, "Block size must be between 65 KiB and 511 MiB.\n"); 637 | return 1; 638 | } 639 | 640 | if (batch && res->pos_argc) { 641 | switch (mode) { 642 | case MODE_ENCODE: 643 | /* Encode each of the files. */ 644 | for (int i = 0; i < res->pos_argc; i++) { 645 | char * arg = res->pos_args[i]; 646 | 647 | FILE * input_des = open_input(arg); 648 | char * output_name; 649 | if (force_stdstreams) 650 | output_name = NULL; 651 | else { 652 | output_name = malloc(strlen(arg) + 5); 653 | if (!output_name) { 654 | fprintf(stderr, "Failed to allocate memory.\n"); 655 | return 1; 656 | } 657 | strcpy(output_name, arg); 658 | strcat(output_name, ".bz3"); 659 | } 660 | 661 | FILE * output_des = open_output(output_name, force); 662 | process(input_des, output_des, mode, block_size, workers, verbose, arg); 663 | 664 | fclose(input_des); 665 | close_out_file(output_des); 666 | if (!force_stdstreams) free(output_name); 667 | if (remove_input_file) { 668 | remove_in_file(arg, output_des); 669 | } 670 | } 671 | break; 672 | case MODE_RECOVER: 673 | case MODE_DECODE: 674 | /* Decode each of the files. */ 675 | for (int i = 0; i < res->pos_argc; i++) { 676 | char * arg = res->pos_args[i]; 677 | 678 | FILE * input_des = open_input(arg); 679 | char * output_name; 680 | if (force_stdstreams) 681 | output_name = NULL; 682 | else { 683 | output_name = malloc(strlen(arg) + 1); 684 | if (!output_name) { 685 | fprintf(stderr, "Failed to allocate memory.\n"); 686 | return 1; 687 | } 688 | strcpy(output_name, arg); 689 | if (strlen(output_name) > 4 && !strcmp(output_name + strlen(output_name) - 4, ".bz3")) 690 | output_name[strlen(output_name) - 4] = 0; 691 | else { 692 | fprintf(stderr, "Warning: file %s has an unknown extension, skipping.\n", arg); 693 | return 1; 694 | } 695 | } 696 | 697 | FILE * output_des = open_output(output_name, force); 698 | process(input_des, output_des, mode, block_size, workers, verbose, arg); 699 | 700 | fclose(input_des); 701 | close_out_file(output_des); 702 | if (!force_stdstreams) free(output_name); 703 | if (remove_input_file) { 704 | remove_in_file(arg, output_des); 705 | } 706 | } 707 | break; 708 | case MODE_TEST: 709 | /* Test each of the files. */ 710 | for (int i = 0; i < res->pos_argc; i++) { 711 | char * arg = res->pos_args[i]; 712 | 713 | FILE * input_des = open_input(arg); 714 | process(input_des, NULL, mode, block_size, workers, verbose, arg); 715 | fclose(input_des); 716 | } 717 | break; 718 | } 719 | 720 | if (fclose(stdout)) { 721 | fprintf(stderr, "Error: Failed on fclose(stdout): %s\n", strerror(errno)); 722 | return 1; 723 | } 724 | 725 | return 0; 726 | } 727 | 728 | for (int i = 0; i < res->pos_argc; i++) { 729 | char * arg = res->pos_args[i]; 730 | 731 | if (f1 != NULL && f2 != NULL) { 732 | fprintf(stderr, "Error: too many files specified.\n"); 733 | return 1; 734 | } 735 | 736 | if (f1 == NULL) 737 | f1 = arg; 738 | else 739 | f2 = arg; 740 | } 741 | 742 | if (f1 == NULL && f2 == NULL) 743 | input = NULL, output = NULL; 744 | else if (mode == MODE_TEST) 745 | input = f1; 746 | else { 747 | if (mode == MODE_ENCODE) { 748 | if (f2 == NULL) { 749 | // encode from f1? 750 | input = f1; 751 | if (force_stdstreams) 752 | output = NULL; 753 | else { 754 | output = malloc(strlen(f1) + 5); 755 | if (!output) { 756 | fprintf(stderr, "Failed to allocate memory.\n"); 757 | return 1; 758 | } 759 | strcpy(output, f1); 760 | strcat(output, ".bz3"); 761 | } 762 | } else { 763 | // encode from f1 to f2. 764 | input = f1; 765 | output = f2; 766 | } 767 | } else if (mode == MODE_DECODE || mode == MODE_RECOVER) { 768 | if (f2 == NULL) { 769 | // decode from f1 to stdout. 770 | input = f1; 771 | if (force_stdstreams) 772 | output = NULL; 773 | else { 774 | output = malloc(strlen(f1) + 1); 775 | if (!output) { 776 | fprintf(stderr, "Failed to allocate memory.\n"); 777 | return 1; 778 | } 779 | strcpy(output, f1); 780 | if (strlen(output) > 4 && !strcmp(output + strlen(output) - 4, ".bz3")) 781 | output[strlen(output) - 4] = 0; 782 | else { 783 | fprintf(stderr, "Warning: file %s has an unknown extension, skipping.\n", f1); 784 | return 1; 785 | } 786 | } 787 | } else { 788 | // decode from f1 to f2. 789 | input = f1; 790 | output = f2; 791 | } 792 | } 793 | } 794 | 795 | FILE *input_des = NULL, *output_des = NULL; 796 | 797 | input_des = open_input(input); 798 | output_des = mode != MODE_TEST ? open_output(output, force) : NULL; 799 | 800 | if (output != f2) free(output); 801 | 802 | int r = process(input_des, output_des, mode, block_size, workers, verbose, input); 803 | 804 | fclose(input_des); 805 | close_out_file(output_des); 806 | if (fclose(stdout)) { 807 | fprintf(stderr, "Error: Failed on fclose(stdout): %s\n", strerror(errno)); 808 | return 1; 809 | } 810 | if (remove_input_file) { 811 | remove_in_file(input, output_des); 812 | } 813 | return r; 814 | } 815 | --------------------------------------------------------------------------------