├── .github └── workflows │ ├── c-cpp.yml │ └── codeql.yml ├── Kanzi_VS2008.zip ├── Kanzi_VS2022.zip ├── LICENSE ├── README.md ├── SECURITY.md ├── bin └── .gitignore ├── kanzi.1.gz ├── lib └── .gitignore └── src ├── BitStreamException.hpp ├── CMakeLists.txt ├── Context.hpp ├── EntropyDecoder.hpp ├── EntropyEncoder.hpp ├── Error.hpp ├── Event.cpp ├── Event.hpp ├── Global.cpp ├── Global.hpp ├── InputBitStream.hpp ├── InputStream.hpp ├── Listener.hpp ├── Magic.hpp ├── Makefile ├── Makefile.tcmalloc ├── Memory.hpp ├── OutputBitStream.hpp ├── OutputStream.hpp ├── Predictor.hpp ├── Seekable.hpp ├── SliceArray.hpp ├── Transform.hpp ├── api ├── Compressor.cpp ├── Compressor.hpp ├── Decompressor.cpp └── Decompressor.hpp ├── app ├── BlockCompressor.cpp ├── BlockCompressor.hpp ├── BlockDecompressor.cpp ├── BlockDecompressor.hpp ├── InfoPrinter.cpp ├── InfoPrinter.hpp └── Kanzi.cpp ├── bitstream ├── DebugInputBitStream.cpp ├── DebugInputBitStream.hpp ├── DebugOutputBitStream.cpp ├── DebugOutputBitStream.hpp ├── DefaultInputBitStream.cpp ├── DefaultInputBitStream.hpp ├── DefaultOutputBitStream.cpp └── DefaultOutputBitStream.hpp ├── concurrent.hpp ├── configure ├── entropy ├── ANSRangeDecoder.cpp ├── ANSRangeDecoder.hpp ├── ANSRangeEncoder.cpp ├── ANSRangeEncoder.hpp ├── AdaptiveProbMap.hpp ├── BinaryEntropyDecoder.cpp ├── BinaryEntropyDecoder.hpp ├── BinaryEntropyEncoder.cpp ├── BinaryEntropyEncoder.hpp ├── CMPredictor.cpp ├── CMPredictor.hpp ├── EntropyDecoderFactory.hpp ├── EntropyEncoderFactory.hpp ├── EntropyUtils.cpp ├── EntropyUtils.hpp ├── ExpGolombDecoder.cpp ├── ExpGolombDecoder.hpp ├── ExpGolombEncoder.cpp ├── ExpGolombEncoder.hpp ├── FPAQDecoder.cpp ├── FPAQDecoder.hpp ├── FPAQEncoder.cpp ├── FPAQEncoder.hpp ├── HuffmanCommon.cpp ├── HuffmanCommon.hpp ├── HuffmanDecoder.cpp ├── HuffmanDecoder.hpp ├── HuffmanEncoder.cpp ├── HuffmanEncoder.hpp ├── NullEntropyDecoder.hpp ├── NullEntropyEncoder.hpp ├── RangeDecoder.cpp ├── RangeDecoder.hpp ├── RangeEncoder.cpp ├── RangeEncoder.hpp ├── TPAQPredictor.cpp └── TPAQPredictor.hpp ├── io ├── CompressedInputStream.cpp ├── CompressedInputStream.hpp ├── CompressedOutputStream.cpp ├── CompressedOutputStream.hpp ├── IOException.hpp ├── IOUtil.hpp └── NullOutputStream.hpp ├── msvc_dirent.hpp ├── test ├── TestBWT.cpp ├── TestCompressedStream.cpp ├── TestDefaultBitStream.cpp ├── TestEntropyCodec.cpp └── TestTransforms.cpp ├── transform ├── AliasCodec.cpp ├── AliasCodec.hpp ├── BWT.cpp ├── BWT.hpp ├── BWTBlockCodec.cpp ├── BWTBlockCodec.hpp ├── BWTS.cpp ├── BWTS.hpp ├── DivSufSort.cpp ├── DivSufSort.hpp ├── EXECodec.cpp ├── EXECodec.hpp ├── FSDCodec.cpp ├── FSDCodec.hpp ├── LZCodec.cpp ├── LZCodec.hpp ├── NullTransform.hpp ├── RLT.cpp ├── RLT.hpp ├── ROLZCodec.cpp ├── ROLZCodec.hpp ├── SBRT.cpp ├── SBRT.hpp ├── SRT.cpp ├── SRT.hpp ├── TextCodec.cpp ├── TextCodec.hpp ├── TransformFactory.hpp ├── TransformSequence.hpp ├── UTFCodec.cpp ├── UTFCodec.hpp ├── ZRLT.cpp └── ZRLT.hpp ├── types.hpp ├── util.hpp └── util ├── Clock.hpp ├── Printer.hpp ├── XXHash.hpp └── strings.hpp /.github/workflows/c-cpp.yml: -------------------------------------------------------------------------------- 1 | name: C/C++ CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | strategy: 12 | matrix: 13 | os: [ ubuntu-latest, macos-latest ] 14 | compiler: [ clang, gcc ] 15 | runs-on: ${{ matrix.os }} 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: make 20 | run: cd src && make clean && make all 21 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "master" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "master" ] 20 | schedule: 21 | - cron: '18 0 * * 5' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'cpp' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Use only 'java' to analyze code written in Java, Kotlin or both 38 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 39 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 40 | 41 | steps: 42 | - name: Checkout repository 43 | uses: actions/checkout@v3 44 | 45 | # Initializes the CodeQL tools for scanning. 46 | - name: Initialize CodeQL 47 | uses: github/codeql-action/init@v3 48 | with: 49 | languages: ${{ matrix.language }} 50 | # If you wish to specify custom queries, you can do so here or in a config file. 51 | # By default, queries listed here will override any specified in a config file. 52 | # Prefix the list here with "+" to use these queries and those in the config file. 53 | 54 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 55 | # queries: security-extended,security-and-quality 56 | 57 | 58 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 59 | # If this step fails, then you should remove it and run the build manually (see below) 60 | - name: Autobuild 61 | uses: github/codeql-action/autobuild@v3 62 | 63 | # ℹ️ Command-line programs to run using the OS shell. 64 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 65 | 66 | # If the Autobuild fails above, remove it and uncomment the following three lines. 67 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 68 | 69 | # - run: | 70 | # echo "Run, Build Application using script" 71 | # ./location_of_script_within_repo/buildscript.sh 72 | 73 | - name: Perform CodeQL Analysis 74 | uses: github/codeql-action/analyze@v3 75 | with: 76 | category: "/language:${{matrix.language}}" 77 | -------------------------------------------------------------------------------- /Kanzi_VS2008.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flanglet/kanzi-cpp/6a7e95d3936b79140d642875602ab61da52ad632/Kanzi_VS2008.zip -------------------------------------------------------------------------------- /Kanzi_VS2022.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flanglet/kanzi-cpp/6a7e95d3936b79140d642875602ab61da52ad632/Kanzi_VS2022.zip -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | Security updates are applied only to the latest release. 4 | 5 | ## Vulnerability Definition 6 | 7 | A security vulnerability is a bug that, given a certain input, triggers a crash or an infinite loop. Compression and decompression failures do not belong in this category. 8 | 9 | ## Reporting a Vulnerability 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** If you have discovered a security vulnerability in this project, report it privately. 12 | 13 | Please disclose it at [security advisory](https://github.com/flanglet/kanzi-cpp/security/advisories/new). 14 | 15 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 16 | 17 | * Operating system 18 | * Hardware: CPU, memory 19 | * Kanzi version 20 | * Command line invoked 21 | * Error reported/crash data/log output 22 | 23 | If possible provide a minimal reproducer. 24 | -------------------------------------------------------------------------------- /bin/.gitignore: -------------------------------------------------------------------------------- 1 | **/Debug/** 2 | **/Release/** 3 | **/*.obj 4 | **/*.o 5 | **/*.htm 6 | **/*.exe 7 | **/*.idb 8 | **/*.pdb 9 | **/*.ncb 10 | **/*.sln 11 | **/*.suo 12 | **/*vcproj* -------------------------------------------------------------------------------- /kanzi.1.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/flanglet/kanzi-cpp/6a7e95d3936b79140d642875602ab61da52ad632/kanzi.1.gz -------------------------------------------------------------------------------- /lib/.gitignore: -------------------------------------------------------------------------------- 1 | **/Debug/** 2 | **/Release/** 3 | **/*.obj 4 | **/*.o 5 | **/*.htm 6 | **/*.exe 7 | **/*.idb 8 | **/*.pdb 9 | **/*.ncb 10 | **/*.sln 11 | **/*.suo 12 | **/*vcproj* -------------------------------------------------------------------------------- /src/BitStreamException.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BitStreamException_ 18 | #define _BitStreamException_ 19 | 20 | #include 21 | #include 22 | #include "types.hpp" 23 | 24 | 25 | namespace kanzi 26 | { 27 | 28 | class BitStreamException : public std::runtime_error 29 | { 30 | private: 31 | int _code; 32 | 33 | public: 34 | enum BitStreamStatus { 35 | UNDEFINED = 0, 36 | INPUT_OUTPUT = 1, 37 | END_OF_STREAM = 2, 38 | INVALID_STREAM = 3, 39 | STREAM_CLOSED = 4 40 | }; 41 | 42 | BitStreamException(const std::string& msg) : std::runtime_error(msg) 43 | { 44 | _code = UNDEFINED; 45 | } 46 | 47 | BitStreamException(const std::string& msg, int code) : std::runtime_error(msg), _code(code) 48 | { 49 | } 50 | 51 | int error() const { return _code; } 52 | 53 | virtual ~BitStreamException() NOEXCEPT {} 54 | }; 55 | 56 | } 57 | #endif 58 | 59 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(kanzi) 3 | 4 | # Set C++ standard 5 | set(CMAKE_CXX_STANDARD 17) 6 | #set(CMAKE_CXX_STANDARD_REQUIRED True) 7 | #set(CMAKE_CXX_COMPILER "clang++") 8 | 9 | if(CONCURRENCY_DISABLED) 10 | add_definitions(-DCONCURRENCY_DISABLED) 11 | endif() 12 | 13 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -O3 -fomit-frame-pointer -fPIC -DNDEBUG -pedantic -march=native -fno-rtti") 14 | 15 | if(MSVC) 16 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 17 | endif() 18 | 19 | # Source files 20 | set(LIB_COMMON_SOURCES 21 | Global.cpp 22 | Event.cpp 23 | entropy/EntropyUtils.cpp 24 | entropy/HuffmanCommon.cpp 25 | entropy/CMPredictor.cpp 26 | entropy/TPAQPredictor.cpp 27 | transform/AliasCodec.cpp 28 | transform/BWT.cpp 29 | transform/BWTS.cpp 30 | transform/DivSufSort.cpp 31 | transform/SBRT.cpp 32 | transform/BWTBlockCodec.cpp 33 | transform/LZCodec.cpp 34 | transform/FSDCodec.cpp 35 | transform/ROLZCodec.cpp 36 | transform/RLT.cpp 37 | transform/SRT.cpp 38 | transform/TextCodec.cpp 39 | transform/UTFCodec.cpp 40 | transform/EXECodec.cpp 41 | transform/ZRLT.cpp 42 | ) 43 | 44 | set(LIB_COMP_SOURCES 45 | api/Compressor.cpp 46 | bitstream/DebugOutputBitStream.cpp 47 | bitstream/DefaultOutputBitStream.cpp 48 | io/CompressedOutputStream.cpp 49 | entropy/ANSRangeEncoder.cpp 50 | entropy/BinaryEntropyEncoder.cpp 51 | entropy/ExpGolombEncoder.cpp 52 | entropy/FPAQEncoder.cpp 53 | entropy/HuffmanEncoder.cpp 54 | entropy/RangeEncoder.cpp 55 | ) 56 | 57 | set(LIB_DECOMP_SOURCES 58 | api/Decompressor.cpp 59 | bitstream/DebugInputBitStream.cpp 60 | bitstream/DefaultInputBitStream.cpp 61 | io/CompressedInputStream.cpp 62 | entropy/ANSRangeDecoder.cpp 63 | entropy/BinaryEntropyDecoder.cpp 64 | entropy/ExpGolombDecoder.cpp 65 | entropy/FPAQDecoder.cpp 66 | entropy/HuffmanDecoder.cpp 67 | entropy/RangeDecoder.cpp 68 | ) 69 | 70 | set(TEST_SOURCES 71 | test/TestEntropyCodec.cpp 72 | test/TestBWT.cpp 73 | test/TestCompressedStream.cpp 74 | test/TestDefaultBitStream.cpp 75 | test/TestTransforms.cpp 76 | ) 77 | 78 | set(APP_SOURCES 79 | app/Kanzi.cpp 80 | app/InfoPrinter.cpp 81 | app/BlockCompressor.cpp 82 | app/BlockDecompressor.cpp 83 | ) 84 | 85 | # Libraries 86 | add_library(libkanzi STATIC ${LIB_COMMON_SOURCES} ${LIB_COMP_SOURCES} ${LIB_DECOMP_SOURCES}) 87 | add_library(libkanzi_shared SHARED ${LIB_COMMON_SOURCES} ${LIB_COMP_SOURCES} ${LIB_DECOMP_SOURCES}) 88 | set_target_properties(libkanzi PROPERTIES OUTPUT_NAME "kanzi") 89 | set_target_properties(libkanzi_shared PROPERTIES OUTPUT_NAME "kanzi") 90 | 91 | #add_library(libkanzi_comp STATIC ${LIB_COMP_SOURCES}) 92 | #add_library(libkanzi_decomp STATIC ${LIB_DECOMP_SOURCES}) 93 | 94 | #add_library(libkanzi_comp_shared SHARED ${LIB_COMP_SOURCES}) 95 | #add_library(libkanzi_decomp_shared SHARED ${LIB_DECOMP_SOURCES}) 96 | 97 | # Test executables 98 | add_executable(testBWT test/TestBWT.cpp) 99 | target_link_libraries(testBWT libkanzi) 100 | 101 | add_executable(testTransforms test/TestTransforms.cpp) 102 | target_link_libraries(testTransforms libkanzi) 103 | 104 | add_executable(testEntropyCodec test/TestEntropyCodec.cpp) 105 | target_link_libraries(testEntropyCodec libkanzi) 106 | 107 | add_executable(testDefaultBitStream test/TestDefaultBitStream.cpp) 108 | target_link_libraries(testDefaultBitStream libkanzi) 109 | 110 | add_executable(testCompressedStream test/TestCompressedStream.cpp) 111 | target_link_libraries(testCompressedStream libkanzi) 112 | 113 | # Main executable 114 | add_executable(kanzi ${APP_SOURCES}) 115 | target_link_libraries(kanzi libkanzi) 116 | 117 | # Custom target to build all tests 118 | add_custom_target(test 119 | DEPENDS testBWT testTransforms testEntropyCodec testDefaultBitStream testCompressedStream 120 | ) 121 | # Custom target to build static libraries 122 | add_custom_target(static_lib 123 | DEPENDS libkanzi #libkanzi_comp libkanzi_decomp 124 | ) 125 | 126 | # Custom target to build shared libraries 127 | add_custom_target(shared_lib 128 | DEPENDS libkanzi_shared #libkanzi_comp_shared libkanzi_decomp_shared 129 | ) 130 | 131 | # Custom target to build all libraries (static and shared) 132 | add_custom_target(lib 133 | DEPENDS static_lib shared_lib 134 | ) 135 | -------------------------------------------------------------------------------- /src/Context.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Context_ 18 | #define _Context_ 19 | 20 | #include 21 | #include 22 | #include 23 | #include "concurrent.hpp" 24 | #include "util/strings.hpp" 25 | 26 | namespace kanzi 27 | { 28 | 29 | // Poor's man equivalent to std::variant used to support C++98 and up. 30 | // union cannot be used due to the std:string field. 31 | // The extra memory used does not matter for the application context since 32 | // the map is small. 33 | typedef struct ContextVal { 34 | int64 lVal; 35 | std::string sVal; 36 | bool isString; 37 | 38 | ContextVal(bool b, int64 val, const std::string& str) : lVal(val), sVal(str), isString(b) {} 39 | ContextVal() { isString = false; lVal = 0; } 40 | } ctxVal; 41 | 42 | class Context 43 | { 44 | public: 45 | 46 | #ifdef CONCURRENCY_ENABLED 47 | Context(ThreadPool* p = nullptr) : _pool(p) {} 48 | Context(const Context& c) : _map(c._map), _pool(c._pool) {} 49 | Context(const Context& c, ThreadPool* p) : _map(c._map), _pool(p) {} 50 | Context& operator=(const Context& c) = default; 51 | #else 52 | Context() {} 53 | Context(const Context& c) : _map(c._map) {} 54 | Context& operator=(const Context& c) { _map = c._map; return *this; }; 55 | #endif 56 | 57 | virtual ~Context() {} 58 | bool has(const std::string& key) const; 59 | int getInt(const std::string& key, int defValue = 0) const; 60 | int64 getLong(const std::string& key, int64 defValue = 0) const; 61 | std::string getString(const std::string& key, const std::string& defValue = "") const; 62 | void putInt(const std::string& key, int value); 63 | void putLong(const std::string& key, int64 value); 64 | void putString(const std::string& key, const std::string& value); 65 | 66 | #ifdef CONCURRENCY_ENABLED 67 | ThreadPool* getPool() const { return _pool; } 68 | #endif 69 | 70 | private: 71 | std::map _map; 72 | 73 | #ifdef CONCURRENCY_ENABLED 74 | ThreadPool* _pool; 75 | #endif 76 | }; 77 | 78 | 79 | inline bool Context::has(const std::string& key) const 80 | { 81 | return _map.find(key) != _map.end(); 82 | } 83 | 84 | 85 | inline int Context::getInt(const std::string& key, int defValue) const 86 | { 87 | return int(this->getLong(key, defValue)); 88 | } 89 | 90 | 91 | inline int64 Context::getLong(const std::string& key, int64 defValue) const 92 | { 93 | std::map::const_iterator it = _map.find(key); 94 | 95 | if (it == _map.end()) 96 | return defValue; 97 | 98 | return it->second.isString == true ? defValue : it->second.lVal; 99 | } 100 | 101 | 102 | inline std::string Context::getString(const std::string& key, const std::string& defValue) const 103 | { 104 | std::map::const_iterator it = _map.find(key); 105 | 106 | if (it == _map.end()) 107 | return defValue; 108 | 109 | return it->second.isString == true ? it->second.sVal : defValue; 110 | } 111 | 112 | 113 | inline void Context::putInt(const std::string& key, int value) 114 | { 115 | _map[key] = ctxVal(false, value, ""); 116 | } 117 | 118 | 119 | inline void Context::putLong(const std::string& key, int64 value) 120 | { 121 | _map[key] = ctxVal(false, value, ""); 122 | } 123 | 124 | 125 | inline void Context::putString(const std::string& key, const std::string& value) 126 | { 127 | _map[key] = ctxVal(true, 0, value); 128 | } 129 | 130 | } 131 | #endif 132 | 133 | 134 | -------------------------------------------------------------------------------- /src/EntropyDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _EntropyDecoder_ 18 | #define _EntropyDecoder_ 19 | 20 | #include "InputBitStream.hpp" 21 | 22 | namespace kanzi 23 | { 24 | // EntropyDecoder entropy decodes data from a bitstream 25 | class EntropyDecoder 26 | { 27 | public: 28 | // Decode the array provided from the bitstream. Return the number of bytes 29 | // read from the bitstream 30 | virtual int decode(byte block[], uint blkptr, uint len) = 0; 31 | 32 | // Return the underlying bitstream 33 | virtual InputBitStream& getBitStream() const = 0; 34 | 35 | // Must be called before getting rid of the entropy decoder. 36 | // Trying to decode after a call to dispose gives undefined behavior 37 | virtual void dispose() = 0; 38 | 39 | virtual ~EntropyDecoder(){} 40 | }; 41 | 42 | } 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /src/EntropyEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _EntropyEncoder_ 18 | #define _EntropyEncoder_ 19 | 20 | #include "OutputBitStream.hpp" 21 | 22 | namespace kanzi 23 | { 24 | // EntropyEncoder entropy encodes data to a bitstream 25 | class EntropyEncoder 26 | { 27 | public: 28 | // Encode the array provided into the bitstream. Return the number of bytes 29 | // written to the bitstream 30 | virtual int encode(const byte block[], uint blkptr, uint len) = 0; 31 | 32 | // Return the underlying bitstream 33 | virtual OutputBitStream& getBitStream() const = 0; 34 | 35 | // Must be called before getting rid of the entropy encoder. 36 | // Trying to encode after a call to dispose gives undefined behavior 37 | virtual void dispose() = 0; 38 | 39 | virtual ~EntropyEncoder(){} 40 | }; 41 | 42 | } 43 | #endif 44 | 45 | -------------------------------------------------------------------------------- /src/Error.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Error_ 18 | #define _Error_ 19 | 20 | namespace kanzi 21 | { 22 | 23 | struct Error 24 | { 25 | public: 26 | enum ErrorCode { 27 | ERR_MISSING_PARAM = 1, 28 | ERR_BLOCK_SIZE = 2, 29 | ERR_INVALID_CODEC = 3, 30 | ERR_CREATE_COMPRESSOR = 4, 31 | ERR_CREATE_DECOMPRESSOR = 5, 32 | ERR_OUTPUT_IS_DIR = 6, 33 | ERR_OVERWRITE_FILE = 7, 34 | ERR_CREATE_FILE = 8, 35 | ERR_CREATE_BITSTREAM = 9, 36 | ERR_OPEN_FILE = 10, 37 | ERR_READ_FILE = 11, 38 | ERR_WRITE_FILE = 12, 39 | ERR_PROCESS_BLOCK = 13, 40 | ERR_CREATE_CODEC = 14, 41 | ERR_INVALID_FILE = 15, 42 | ERR_STREAM_VERSION = 16, 43 | ERR_CREATE_STREAM = 17, 44 | ERR_INVALID_PARAM = 18, 45 | ERR_CRC_CHECK = 19, 46 | ERR_RESERVED_NAME = 20, 47 | ERR_UNKNOWN = 127 48 | }; 49 | }; 50 | 51 | } 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /src/Event.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include 19 | #include "Event.hpp" 20 | 21 | using namespace kanzi; 22 | 23 | Event::Event(Event::Type type, int id, const std::string& msg, clock_t evtTime) 24 | : _type(type) 25 | , _time(evtTime) 26 | , _msg(msg) 27 | , _id(id) 28 | { 29 | _size = 0; 30 | _hash = 0; 31 | _hashType = NO_HASH; 32 | _offset = -1; 33 | _skipFlags = 0; 34 | } 35 | 36 | Event::Event(Event::Type type, int id, int64 size, clock_t evtTime, 37 | uint64 hash, HashType hashType, int64 offset, uint8 skipFlags) 38 | : _type(type) 39 | , _time(evtTime) 40 | , _id(id) 41 | , _size(size) 42 | , _offset(offset) 43 | , _hash(hash) 44 | , _hashType(hashType) 45 | , _skipFlags(skipFlags) 46 | { 47 | } 48 | 49 | std::string Event::toString() const 50 | { 51 | if (_msg.size() > 0) 52 | return _msg; 53 | 54 | std::stringstream ss; 55 | ss << "{ \"type\":\"" << getTypeAsString() << "\""; 56 | 57 | if (_id >= 0) 58 | ss << ", \"id\":" << getId(); 59 | 60 | ss << ", \"size\":" << getSize(); 61 | 62 | if (getType() != BLOCK_INFO) 63 | ss << ", \"time\":" << getTime(); 64 | 65 | if (_hashType != NO_HASH) { 66 | ss << ", \"hash\":\""; 67 | ss << std::uppercase << std::setfill('0'); 68 | 69 | if (_hashType == SIZE_32) 70 | ss << std::setw(8) << std::hex << getHash() << "\""; 71 | else 72 | ss << std::setw(16) << std::hex << getHash() << "\""; 73 | } 74 | 75 | if (getType() == BLOCK_INFO) { 76 | ss << ", \"offset\":" << getOffset(); 77 | ss << ", \"skipFlags\": "; 78 | 79 | for (int i = 128; i >= 1; i >>= 1) 80 | ss << ((_skipFlags & i) == 0 ? "0" : "1"); 81 | } 82 | 83 | ss << " }"; 84 | return ss.str(); 85 | } 86 | 87 | std::string Event::getTypeAsString() const 88 | { 89 | switch (_type) { 90 | case AFTER_HEADER_DECODING: 91 | return "AFTER_HEADER_DECODING"; 92 | 93 | case COMPRESSION_END: 94 | return "COMPRESSION_END"; 95 | 96 | case BEFORE_TRANSFORM: 97 | return "BEFORE_TRANSFORM"; 98 | 99 | case AFTER_TRANSFORM: 100 | return "AFTER_TRANSFORM"; 101 | 102 | case BEFORE_ENTROPY: 103 | return "BEFORE_ENTROPY"; 104 | 105 | case AFTER_ENTROPY: 106 | return "AFTER_ENTROPY"; 107 | 108 | case DECOMPRESSION_START: 109 | return "DECOMPRESSION_START"; 110 | 111 | case DECOMPRESSION_END: 112 | return "DECOMPRESSION_END"; 113 | 114 | case COMPRESSION_START: 115 | return "COMPRESSION_START"; 116 | 117 | case BLOCK_INFO: 118 | return "BLOCK_INFO"; 119 | 120 | default: 121 | return "Unknown Type"; 122 | } 123 | } 124 | 125 | -------------------------------------------------------------------------------- /src/Event.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Event_ 18 | #define _Event_ 19 | 20 | #include 21 | #include 22 | #include "types.hpp" 23 | 24 | namespace kanzi 25 | { 26 | 27 | class Event { 28 | public: 29 | enum Type { 30 | COMPRESSION_START, 31 | COMPRESSION_END, 32 | BEFORE_TRANSFORM, 33 | AFTER_TRANSFORM, 34 | BEFORE_ENTROPY, 35 | AFTER_ENTROPY, 36 | DECOMPRESSION_START, 37 | DECOMPRESSION_END, 38 | AFTER_HEADER_DECODING, 39 | BLOCK_INFO 40 | }; 41 | 42 | enum HashType { 43 | NO_HASH, 44 | SIZE_32, 45 | SIZE_64 46 | }; 47 | 48 | Event(Type type, int id, const std::string& msg, clock_t evtTime = 0); 49 | 50 | Event(Type type, int id, int64 size, clock_t evtTime, uint64 hash = 0, 51 | HashType hashType = NO_HASH, int64 offset = -1, uint8 skipFlags = 0); 52 | 53 | virtual ~Event() {} 54 | 55 | int getId() const { return _id; } 56 | 57 | int64 getSize() const { return _size; } 58 | 59 | Event::Type getType() const { return _type; } 60 | 61 | std::string getTypeAsString() const; 62 | 63 | clock_t getTime() const { return _time; } 64 | 65 | uint64 getHash() const { return _hashType != NO_HASH ? _hash : 0; } 66 | 67 | int64 getOffset() const { return _offset; } 68 | 69 | HashType getHashType() const { return _hashType; } 70 | 71 | std::string toString() const; 72 | 73 | private: 74 | Event::Type _type; 75 | clock_t _time; 76 | std::string _msg; 77 | int _id; 78 | int64 _size; 79 | int64 _offset; 80 | uint64 _hash; 81 | HashType _hashType; 82 | uint8 _skipFlags; 83 | }; 84 | } 85 | #endif 86 | 87 | -------------------------------------------------------------------------------- /src/Global.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Global_ 18 | #define _Global_ 19 | 20 | #include 21 | #include 22 | 23 | #include "types.hpp" 24 | 25 | namespace kanzi { 26 | 27 | class Global { 28 | public: 29 | enum DataType { UNDEFINED, TEXT, MULTIMEDIA, EXE, NUMERIC, BASE64, DNA, BIN, UTF8, SMALL_ALPHABET }; 30 | 31 | static int stretch(int d); // ln(x / (1 - x)) 32 | 33 | static int squash(int d); // 1 / (1 + e-x) (inverse of stretch) 34 | 35 | static int log2(uint32 x); // fast, integer rounded 36 | 37 | static int log2(uint64 x); // fast, integer rounded 38 | 39 | static int _log2(uint32 x); // same as log2 minus check on input value 40 | 41 | static int _log2(uint64 x); // same as log2 minus check on input value 42 | 43 | static int trailingZeros(uint32 x); 44 | 45 | static int trailingZeros(uint64 x); 46 | 47 | static int log2_1024(uint32 x); // slow, accurate to 1/1024th 48 | 49 | static void computeJobsPerTask(int jobsPerTask[], int jobs, int tasks); 50 | 51 | static int computeFirstOrderEntropy1024(int blockLen, const uint histo[]); 52 | 53 | static void computeHistogram(const byte block[], int end, uint freqs[], bool isOrder0=true, bool withTotal=false); 54 | 55 | static DataType detectSimpleType(int count, const uint histo[]); 56 | 57 | static bool isReservedName(std::string fileName); 58 | 59 | private: 60 | Global(); 61 | ~Global() {} 62 | 63 | static const Global _singleton; 64 | static const int LOG2_4096[257]; // 4096*Math.log2(x) 65 | static const int LOG2[256]; // int(Math.log2(x-1)) 66 | static int STRETCH[4096]; 67 | static int SQUASH[4096]; 68 | static char BASE64_SYMBOLS[]; 69 | static char DNA_SYMBOLS[]; 70 | static char NUMERIC_SYMBOLS[]; 71 | 72 | std::set _reservedNames; 73 | }; 74 | 75 | 76 | // return p = 1/(1 + exp(-d)), d scaled by 8 bits, p scaled by 12 bits 77 | inline int Global::squash(int d) 78 | { 79 | if (d >= 2048) 80 | return 4095; 81 | 82 | return (d <= -2048) ? 0 : SQUASH[d + 2047]; 83 | } 84 | 85 | inline int Global::stretch(int d) 86 | { 87 | return STRETCH[d]; 88 | } 89 | 90 | // x cannot be 0 91 | inline int Global::_log2(uint32 x) 92 | { 93 | #if defined(_MSC_VER) 94 | unsigned long res; 95 | _BitScanReverse(&res, x); 96 | return int(res); 97 | #elif defined(__GNUG__) 98 | return 31 ^ __builtin_clz(x); 99 | #elif defined(__clang__) 100 | return 31 ^ __builtin_clz(x); 101 | #else 102 | int res = 0; 103 | 104 | if (x >= 1 << 16) { 105 | x >>= 16; 106 | res = 16; 107 | } 108 | 109 | if (x >= 1 << 8) { 110 | x >>= 8; 111 | res += 8; 112 | } 113 | 114 | return res + Global::LOG2[x - 1]; 115 | #endif 116 | } 117 | 118 | 119 | // x cannot be 0 120 | inline int Global::_log2(uint64 x) 121 | { 122 | #if defined(_MSC_VER) && defined(_M_AMD64) 123 | unsigned long res; 124 | _BitScanReverse64(&res, x); 125 | return int(res); 126 | #elif defined(__GNUG__) 127 | return 63 ^ __builtin_clzll(x); 128 | #elif defined(__clang__) 129 | return 63 ^ __builtin_clzll(x); 130 | #else 131 | int res = 0; 132 | 133 | if (x >= uint64(1) << 32) { 134 | x >>= 32; 135 | res = 32; 136 | } 137 | 138 | if (x >= uint64(1) << 16) { 139 | x >>= 16; 140 | res += 16; 141 | } 142 | 143 | if (x >= uint64(1) << 8) { 144 | x >>= 8; 145 | res += 8; 146 | } 147 | 148 | return res + Global::LOG2[x - 1]; 149 | #endif 150 | } 151 | 152 | 153 | // x cannot be 0 154 | inline int Global::trailingZeros(uint32 x) 155 | { 156 | #if defined(_MSC_VER) 157 | unsigned long res; 158 | _BitScanForward(&res, x); 159 | return int(res); 160 | #elif defined(__GNUG__) 161 | return __builtin_ctz(x); 162 | #elif defined(__clang__) 163 | return __builtin_ctz(x); 164 | #else 165 | return _log2((x & (~x + 1)) - 1); 166 | #endif 167 | } 168 | 169 | 170 | // x cannot be 0 171 | inline int Global::trailingZeros(uint64 x) 172 | { 173 | #if defined(_MSC_VER) && defined(_M_AMD64) 174 | unsigned long res; 175 | _BitScanForward64(&res, x); 176 | return int(res); 177 | #elif defined(__GNUG__) 178 | return __builtin_ctzll(x); 179 | #elif defined(__clang__) 180 | return __builtin_ctzll(x); 181 | #else 182 | return _log2((x & (~x + 1)) - 1); 183 | #endif 184 | } 185 | } 186 | #endif 187 | 188 | -------------------------------------------------------------------------------- /src/InputBitStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _InputBitStream_ 18 | #define _InputBitStream_ 19 | 20 | #include "types.hpp" 21 | 22 | namespace kanzi 23 | { 24 | 25 | class InputBitStream 26 | { 27 | public: 28 | // Returns 1 or 0 29 | virtual int readBit() = 0; 30 | 31 | // Length is the number of bits in [1..64]. Return the bits read as a long 32 | // Throws if the stream is closed. 33 | virtual uint64 readBits(uint length) = 0; 34 | 35 | // Read bits and put them in the byte array. Length is the number of bits 36 | // Return the number of bits read. 37 | // Throws if the stream is closed. 38 | virtual uint readBits(byte bits[], uint length) = 0; 39 | 40 | virtual void close() = 0; 41 | 42 | // Number of bits read 43 | virtual uint64 read() const = 0; 44 | 45 | // Return false when the bitstream is closed or the End-Of-Stream has been reached 46 | virtual bool hasMoreToRead() = 0; 47 | 48 | InputBitStream(){} 49 | 50 | virtual ~InputBitStream(){} 51 | }; 52 | 53 | } 54 | #endif 55 | 56 | -------------------------------------------------------------------------------- /src/InputStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _InputStream_ 18 | #define _InputStream_ 19 | 20 | #include 21 | 22 | namespace kanzi 23 | { 24 | // Maps to istream 25 | typedef std::istream InputStream; 26 | } 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /src/Listener.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Listener_ 18 | #define _Listener_ 19 | 20 | namespace kanzi 21 | { 22 | 23 | template 24 | class Listener 25 | { 26 | public: 27 | Listener(){} 28 | 29 | virtual void processEvent(const T& evt) = 0; 30 | 31 | virtual ~Listener(){} 32 | }; 33 | 34 | } 35 | #endif 36 | 37 | -------------------------------------------------------------------------------- /src/OutputBitStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _OutputBitStream_ 18 | #define _OutputBitStream_ 19 | 20 | #include "types.hpp" 21 | 22 | namespace kanzi 23 | { 24 | 25 | class OutputBitStream 26 | { 27 | public: 28 | // Write the least significant bit of the input integer 29 | // Throws if the stream is closed. 30 | virtual void writeBit(int bit) = 0; 31 | 32 | // Length is the number of bits in [1..64]. Return the number of bits written. 33 | // Throws if the stream is closed. 34 | virtual uint writeBits(uint64 bits, uint length) = 0; 35 | 36 | // Write bits ouf of the byte array. Length is the number of bits. 37 | // Return the number of bits written. 38 | // Throws if the stream is closed. 39 | virtual uint writeBits(const byte bits[], uint length) = 0; 40 | 41 | virtual void close() = 0; 42 | 43 | // Number of bits written 44 | virtual uint64 written() const = 0; 45 | 46 | OutputBitStream(){} 47 | 48 | virtual ~OutputBitStream(){} 49 | }; 50 | 51 | } 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /src/OutputStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _OutputStream_ 18 | #define _OutputStream_ 19 | 20 | #include 21 | 22 | namespace kanzi 23 | { 24 | // Maps to ostream 25 | typedef std::ostream OutputStream; 26 | } 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /src/Predictor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Predictor_ 18 | #define _Predictor_ 19 | 20 | namespace kanzi 21 | { 22 | 23 | // Predictor predicts the probability of the next bit being 1. 24 | class Predictor 25 | { 26 | public: 27 | Predictor(){} 28 | 29 | // Updates the internal probability model based on the observed bit 30 | virtual void update(int bit) = 0; 31 | 32 | // Returns the value representing the probability of the next bit being 1 33 | // in the [0..4095] range. 34 | // E.G. 410 represents roughly a probability of 10% for 1 35 | virtual int get() = 0; 36 | 37 | virtual ~Predictor(){} 38 | }; 39 | 40 | } 41 | #endif 42 | 43 | -------------------------------------------------------------------------------- /src/Seekable.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Seekable_ 18 | #define _Seekable_ 19 | 20 | #include "types.hpp" 21 | 22 | 23 | namespace kanzi 24 | { 25 | 26 | class Seekable 27 | { 28 | public: 29 | Seekable(){} 30 | 31 | // return position in bits 32 | virtual int64 tell() = 0; 33 | 34 | // position in bits 35 | // return success or failure 36 | virtual bool seek(int64 position) = 0; 37 | 38 | virtual ~Seekable(){} 39 | }; 40 | 41 | } 42 | #endif 43 | 44 | -------------------------------------------------------------------------------- /src/SliceArray.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _SliceArray_ 18 | #define _SliceArray_ 19 | 20 | namespace kanzi 21 | { 22 | 23 | template 24 | class SliceArray 25 | { 26 | public: 27 | T* _array; 28 | int _length; // buffer length (a.k.a capacity) 29 | int _index; 30 | 31 | SliceArray(T* arr, int len, int index = 0) : _array(arr), _length(len), _index(index) {} 32 | 33 | #if __cplusplus < 201103L 34 | SliceArray(const SliceArray& sa) { _array = sa._array; _length = sa._length; _index = sa._index; } 35 | 36 | SliceArray& operator=(const SliceArray& sa); 37 | 38 | ~SliceArray(){} // does not deallocate buffer memory 39 | #else 40 | SliceArray(SliceArray&& sa) noexcept = default; 41 | 42 | SliceArray& operator=(SliceArray&& sa) noexcept = default; 43 | 44 | ~SliceArray() = default; 45 | #endif 46 | 47 | // Utility methods 48 | static bool isValid(const SliceArray& sa); 49 | }; 50 | 51 | template 52 | inline bool SliceArray::isValid(const SliceArray& sa) { 53 | return ((sa._array != nullptr) && (sa._index >= 0) && (sa._length >= 0) && (sa._index <= sa._length)); 54 | } 55 | 56 | #if __cplusplus < 201103L 57 | template 58 | inline SliceArray& SliceArray::operator=(const SliceArray& sa) { 59 | _array = sa._array; 60 | _length = sa._length; 61 | _index = sa._index; 62 | return *this; 63 | } 64 | #endif 65 | 66 | } 67 | #endif 68 | 69 | -------------------------------------------------------------------------------- /src/Transform.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Transform_ 18 | #define _Transform_ 19 | 20 | #include "SliceArray.hpp" 21 | 22 | namespace kanzi 23 | { 24 | 25 | // Transform is a class used to transform an input byte array and write 26 | // the result to an output byte array. The result may have a different size. 27 | // The transform must be stateless to ensure that the compression results 28 | // are the same regardless of the number of jobs (ie no information is retained 29 | // between to invocations of forward or inverse). 30 | template 31 | class Transform 32 | { 33 | public: 34 | Transform(){} 35 | 36 | virtual bool forward(SliceArray& src, SliceArray& dst, int length) = 0; 37 | 38 | virtual bool inverse(SliceArray& src, SliceArray& dst, int length) = 0; 39 | 40 | virtual int getMaxEncodedLength(int srcLen) const = 0; 41 | 42 | virtual ~Transform(){} 43 | }; 44 | 45 | } 46 | #endif 47 | 48 | -------------------------------------------------------------------------------- /src/api/Compressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Compressor_ 18 | #define _Compressor_ 19 | 20 | #ifdef _WIN32 21 | #define CDECL __cdecl 22 | #else 23 | #define CDECL 24 | #endif 25 | 26 | #include 27 | 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | typedef unsigned char BYTE; 34 | 35 | /** 36 | * Compression parameters 37 | */ 38 | struct cData { 39 | char transform[64]; /* name of transforms [None|PACK|BWT|BWTS|LZ|LZX|LZP|ROLZ|ROLZX] 40 | [RLT|ZRLT|MTFT|RANK|SRT|TEXT|MM|EXE|UTF|DNA] */ 41 | char entropy[16]; /* name of entropy codec [None|Huffman|ANS0|ANS1|Range|FPAQ|TPAQ|TPAQX|CM] */ 42 | unsigned int blockSize; /* size of block in bytes */ 43 | unsigned int jobs; /* max number of concurrent tasks */ 44 | int checksum; /* 0, 32 or 64 to indicate size of block checksum */ 45 | int headerless; /* bool to indicate if the bitstream has a header (usually set to 0) */ 46 | }; 47 | 48 | /** 49 | * Compression context: encapsulates compressor state (opaque: could change in future versions) 50 | */ 51 | struct cContext { 52 | void* pCos; 53 | unsigned int blockSize; 54 | void* fos; 55 | }; 56 | 57 | 58 | /** 59 | * Initialize the compressor internal states. 60 | * 61 | * @param cParam [IN] - the compression parameters 62 | * @param dst [IN] - the destination stream of compressed data 63 | * @param ctx [IN|OUT] - pointer to the compression context created by the call 64 | * 65 | * @return 0 in case of success 66 | */ 67 | int CDECL initCompressor(struct cData* cParam, FILE* dst, struct cContext** ctx); 68 | 69 | /** 70 | * Compress a block of data. The compressor must have been initialized. 71 | * 72 | * @param ctx [IN] - the compression context created during initialization 73 | * @param src [IN] - the source block of data to compress 74 | * @param inSize [IN|OUT] - the size of the source block to compress. 75 | Updated to reflect the number bytes written to the destination. 76 | * @param outSize [OUT] - the size of the compressed data 77 | * 78 | * @return 0 in case of success 79 | */ 80 | int CDECL compress(struct cContext* ctx, const BYTE* src, int* inSize, int* outSize); 81 | 82 | /** 83 | * Dispose the compressor and cleanup memory resources. 84 | * 85 | * @param ctx [IN] - the compression context created during initialization 86 | * @param outSize [IN|OUT] - the number of bytes written to the destination 87 | * (the compressor may flush internal data) 88 | * 89 | * @return 0 in case of success 90 | */ 91 | int CDECL disposeCompressor(struct cContext* ctx, int* outSize); 92 | 93 | #ifdef __cplusplus 94 | } 95 | #endif 96 | 97 | 98 | #endif 99 | 100 | -------------------------------------------------------------------------------- /src/api/Decompressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Decompressor_ 18 | #define _Decompressor_ 19 | 20 | #ifdef _WIN32 21 | #define CDECL __cdecl 22 | #else 23 | #define CDECL 24 | #endif 25 | 26 | #include 27 | 28 | #ifdef __cplusplus 29 | extern "C" { 30 | #endif 31 | 32 | typedef unsigned char BYTE; 33 | 34 | /** 35 | * Decompression parameters 36 | */ 37 | struct dData { 38 | // Required fields 39 | unsigned int bufferSize; /* read buffer size (at least block size) */ 40 | unsigned int jobs; /* max number of concurrent tasks */ 41 | int headerless; /* bool to indicate if the bitstream has a header (usually set to 0) */ 42 | 43 | // Optional fields: only required if headerless is true 44 | char transform[64]; /* name of transforms [None|PACK|BWT|BWTS|LZ|LZX|LZP|ROLZ|ROLZX] 45 | [RLT|ZRLT|MTFT|RANK|SRT|TEXT|MM|EXE|UTF|DNA] */ 46 | char entropy[16]; /* name of entropy codec [None|Huffman|ANS0|ANS1|Range|FPAQ|TPAQ|TPAQX|CM] */ 47 | unsigned int blockSize; /* size of block in bytes */ 48 | unsigned long originalSize; /* size of original file in bytes */ 49 | int checksum; /* 0, 32 or 64 to indicate size of block checksum */ 50 | int bsVersion; /* version of the bitstream */ 51 | }; 52 | 53 | /** 54 | * Decompression context: encapsulates decompressor state (opaque: could change in future versions) 55 | */ 56 | struct dContext { 57 | void* pCis; 58 | unsigned int bufferSize; 59 | void* fis; 60 | }; 61 | 62 | /** 63 | * Initialize the decompressor internal states. 64 | * 65 | * @param dParam [IN] - the decompression parameters 66 | * @param src [IN] - the source stream of compressed data 67 | * @param ctx [IN|OUT] - a pointer to the decompression context created by the call 68 | * 69 | * @return 0 in case of success 70 | */ 71 | int CDECL initDecompressor(struct dData* dParam, FILE* src, struct dContext** ctx); 72 | 73 | /** 74 | * Decompress a block of data. The decompressor must have been initialized. 75 | * 76 | * @param ctx [IN] - the decompression context created during initialization 77 | * @param dst [IN] - the destination block of decompressed data 78 | * @param inSize [OUT] - the number of bytes read from source. 79 | * @param outSize [IN|OUT] - the size of the block to decompress. 80 | * Updated to reflect the number of decompressed bytes 81 | * 82 | * @return 0 in case of success 83 | */ 84 | int CDECL decompress(struct dContext* ctx, BYTE* dst, int* inSize, int* outSize); 85 | 86 | /** 87 | * Dispose the decompressor and cleanup memory resources. 88 | * 89 | * @param ctx [IN] - the compression context created during initialization 90 | * 91 | * @return 0 in case of success 92 | */ 93 | int CDECL disposeDecompressor(struct dContext* ctx); 94 | 95 | #ifdef __cplusplus 96 | } 97 | #endif 98 | 99 | 100 | #endif 101 | 102 | -------------------------------------------------------------------------------- /src/app/BlockCompressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BlockCompressor_ 18 | #define _BlockCompressor_ 19 | 20 | #include 21 | #include 22 | #include "../InputStream.hpp" 23 | #include "../io/CompressedOutputStream.hpp" 24 | 25 | namespace kanzi { 26 | 27 | class FileCompressResult { 28 | public: 29 | int _code; 30 | uint64 _read; 31 | uint64 _written; 32 | std::string _errMsg; 33 | 34 | FileCompressResult() 35 | : _code(0) 36 | , _read(0) 37 | , _written(0) 38 | , _errMsg() 39 | { 40 | } 41 | 42 | FileCompressResult(int code, uint64 read, uint64 written, const std::string& errMsg) 43 | : _code(code) 44 | , _read(read) 45 | , _written(written) 46 | , _errMsg(errMsg) 47 | { 48 | } 49 | 50 | #if __cplusplus < 201103L 51 | FileCompressResult(const FileCompressResult& fcr) 52 | : _code(fcr._code) 53 | , _read(fcr._read) 54 | , _written(fcr._written) 55 | , _errMsg(fcr._errMsg) 56 | { 57 | } 58 | 59 | FileCompressResult& operator=(const FileCompressResult& fcr) 60 | { 61 | _errMsg = fcr._errMsg; 62 | _code = fcr._code; 63 | _read = fcr._read; 64 | _written = fcr._written; 65 | return *this; 66 | } 67 | 68 | ~FileCompressResult() {} 69 | #else 70 | FileCompressResult(const FileCompressResult& fdr) = delete; 71 | 72 | FileCompressResult& operator=(const FileCompressResult& fdr) = delete; 73 | 74 | FileCompressResult(FileCompressResult&& fdr) = default; 75 | 76 | FileCompressResult& operator=(FileCompressResult&& fdr) = default; 77 | 78 | ~FileCompressResult() = default; 79 | #endif 80 | }; 81 | 82 | #ifdef CONCURRENCY_ENABLED 83 | template 84 | class FileCompressWorker FINAL : public Task { 85 | public: 86 | FileCompressWorker(BoundedConcurrentQueue* queue) : _queue(queue) { } 87 | 88 | ~FileCompressWorker() {} 89 | 90 | R run(); 91 | 92 | private: 93 | BoundedConcurrentQueue* _queue; 94 | }; 95 | #endif 96 | 97 | template 98 | class FileCompressTask FINAL : public Task { 99 | public: 100 | static const int DEFAULT_BUFFER_SIZE = 65536; 101 | 102 | FileCompressTask(const Context& ctx, std::vector*>& listeners); 103 | 104 | ~FileCompressTask(); 105 | 106 | T run(); 107 | 108 | void dispose(); 109 | 110 | private: 111 | Context _ctx; 112 | InputStream* _is; 113 | CompressedOutputStream* _cos; 114 | std::vector*> _listeners; 115 | }; 116 | 117 | 118 | typedef FileCompressTask FCTask; 119 | 120 | class BlockCompressor { 121 | friend class FileCompressTask; 122 | 123 | public: 124 | BlockCompressor(const Context& ctx); 125 | 126 | ~BlockCompressor(); 127 | 128 | int compress(uint64& written); 129 | 130 | bool addListener(Listener& bl); 131 | 132 | bool removeListener(Listener& bl); 133 | 134 | void dispose() const {}; 135 | 136 | private: 137 | static const int DEFAULT_BLOCK_SIZE; 138 | static const int MIN_BLOCK_SIZE; 139 | static const int MAX_BLOCK_SIZE; 140 | 141 | int _verbosity; 142 | int _checksum; 143 | bool _overwrite; 144 | bool _skipBlocks; 145 | std::string _inputName; 146 | std::string _outputName; 147 | std::string _codec; 148 | std::string _transform; 149 | int _blockSize; 150 | bool _autoBlockSize; // derive block size from input size and jobs 151 | int _jobs; 152 | std::vector*> _listeners; 153 | bool _reorderFiles; 154 | bool _noDotFiles; 155 | bool _noLinks; 156 | Context _ctx; 157 | 158 | static void notifyListeners(std::vector*>& listeners, const Event& evt); 159 | 160 | static void getTransformAndCodec(int level, std::string tranformAndCodec[2]); 161 | }; 162 | } 163 | #endif 164 | 165 | -------------------------------------------------------------------------------- /src/app/BlockDecompressor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BlockDecompressor_ 18 | #define _BlockDecompressor_ 19 | 20 | #include 21 | #include 22 | #include "../OutputStream.hpp" 23 | #include "../io/CompressedInputStream.hpp" 24 | 25 | namespace kanzi { 26 | class FileDecompressResult { 27 | public: 28 | int _code; 29 | uint64 _read; 30 | std::string _errMsg; 31 | 32 | FileDecompressResult() 33 | : _code(0) 34 | , _read(0) 35 | , _errMsg() 36 | { 37 | } 38 | 39 | FileDecompressResult(int code, uint64 read, const std::string& errMsg) 40 | : _code(code) 41 | , _read(read) 42 | , _errMsg(errMsg) 43 | { 44 | } 45 | 46 | #if __cplusplus < 201103L 47 | FileDecompressResult(const FileDecompressResult& fdr) 48 | : _code(fdr._code) 49 | , _read(fdr._read) 50 | , _errMsg(fdr._errMsg) 51 | { 52 | } 53 | 54 | FileDecompressResult& operator=(const FileDecompressResult& fdr) 55 | { 56 | _errMsg = fdr._errMsg; 57 | _code = fdr._code; 58 | _read = fdr._read; 59 | return *this; 60 | } 61 | 62 | ~FileDecompressResult() {} 63 | #else 64 | FileDecompressResult(const FileDecompressResult& fcr) = delete; 65 | 66 | FileDecompressResult& operator=(const FileDecompressResult& fcr) = delete; 67 | 68 | FileDecompressResult(FileDecompressResult&& fcr) = default; 69 | 70 | FileDecompressResult& operator=(FileDecompressResult&& fcr) = default; 71 | 72 | ~FileDecompressResult() = default; 73 | #endif 74 | }; 75 | 76 | #ifdef CONCURRENCY_ENABLED 77 | template 78 | class FileDecompressWorker FINAL : public Task { 79 | public: 80 | FileDecompressWorker(BoundedConcurrentQueue* queue) : _queue(queue) { } 81 | 82 | ~FileDecompressWorker() {} 83 | 84 | R run(); 85 | 86 | private: 87 | BoundedConcurrentQueue* _queue; 88 | }; 89 | #endif 90 | 91 | template 92 | class FileDecompressTask FINAL : public Task { 93 | public: 94 | FileDecompressTask(const Context& ctx, std::vector*>& listeners); 95 | 96 | ~FileDecompressTask(); 97 | 98 | T run(); 99 | 100 | void dispose(); 101 | 102 | private: 103 | Context _ctx; 104 | OutputStream* _os; 105 | CompressedInputStream* _cis; 106 | std::vector*> _listeners; 107 | }; 108 | 109 | typedef FileDecompressTask FDTask; 110 | 111 | class BlockDecompressor { 112 | friend class FileDecompressTask; 113 | 114 | public: 115 | BlockDecompressor(const Context& ctx); 116 | 117 | ~BlockDecompressor(); 118 | 119 | int decompress(uint64& read); 120 | 121 | bool addListener(Listener& bl); 122 | 123 | bool removeListener(Listener& bl); 124 | 125 | void dispose() const {}; 126 | 127 | private: 128 | int _verbosity; 129 | bool _overwrite; 130 | std::string _inputName; 131 | std::string _outputName; 132 | int _blockSize; 133 | int _jobs; 134 | std::vector*> _listeners; 135 | bool _noDotFiles; 136 | bool _noLinks; 137 | Context _ctx; 138 | 139 | static void notifyListeners(std::vector*>& listeners, const Event& evt); 140 | }; 141 | } 142 | #endif 143 | 144 | -------------------------------------------------------------------------------- /src/app/InfoPrinter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include 19 | #include "InfoPrinter.hpp" 20 | 21 | using namespace kanzi; 22 | using namespace std; 23 | 24 | InfoPrinter::InfoPrinter(int infoLevel, InfoPrinter::Type type, OutputStream& os) 25 | : _os(os) 26 | , _type(type) 27 | , _level(infoLevel) 28 | { 29 | if (type == InfoPrinter::ENCODING) { 30 | _thresholds[0] = Event::COMPRESSION_START; 31 | _thresholds[1] = Event::BEFORE_TRANSFORM; 32 | _thresholds[2] = Event::AFTER_TRANSFORM; 33 | _thresholds[3] = Event::BEFORE_ENTROPY; 34 | _thresholds[4] = Event::AFTER_ENTROPY; 35 | _thresholds[5] = Event::COMPRESSION_END; 36 | } 37 | else { 38 | _thresholds[0] = Event::DECOMPRESSION_START; 39 | _thresholds[1] = Event::BEFORE_ENTROPY; 40 | _thresholds[2] = Event::AFTER_ENTROPY; 41 | _thresholds[3] = Event::BEFORE_TRANSFORM; 42 | _thresholds[4] = Event::AFTER_TRANSFORM; 43 | _thresholds[5] = Event::DECOMPRESSION_END; 44 | } 45 | 46 | for (int i = 0; i < 1024; i++) 47 | _map[i] = nullptr; 48 | } 49 | 50 | void InfoPrinter::processEvent(const Event& evt) 51 | { 52 | int currentBlockId = evt.getId(); 53 | 54 | if (evt.getType() == _thresholds[1]) { 55 | // Register initial block size 56 | BlockInfo* bi = new BlockInfo(); 57 | _clock12.start(); 58 | 59 | bi->_stage0Size = evt.getSize(); 60 | _map[hash(currentBlockId)] = bi; 61 | 62 | if (_level >= 5) { 63 | _os << evt.toString() << endl; 64 | } 65 | } 66 | else if (evt.getType() == _thresholds[2]) { 67 | BlockInfo* bi = _map[hash(currentBlockId)]; 68 | 69 | if (bi == nullptr) 70 | return; 71 | 72 | _clock12.stop(); 73 | _clock23.start(); 74 | 75 | if (_level >= 5) { 76 | stringstream ss; 77 | ss << evt.toString() << " [" << int64(_clock12.elapsed()) << " ms]"; 78 | _os << ss.str() << endl; 79 | } 80 | } 81 | else if (evt.getType() == _thresholds[3]) { 82 | BlockInfo* bi = _map[hash(currentBlockId)]; 83 | 84 | if (bi == nullptr) 85 | return; 86 | 87 | _clock23.stop(); 88 | _clock34.start(); 89 | bi->_stage1Size = evt.getSize(); 90 | 91 | if (_level >= 5) { 92 | _os << evt.toString() << endl; 93 | } 94 | } 95 | else if (evt.getType() == _thresholds[4]) { 96 | BlockInfo* bi = _map[hash(currentBlockId)]; 97 | 98 | if (bi == nullptr) 99 | return; 100 | 101 | if (_level < 3) { 102 | delete bi; 103 | _map[hash(currentBlockId)] = nullptr; 104 | return; 105 | } 106 | 107 | int64 stage2Size = evt.getSize(); 108 | _clock34.stop(); 109 | stringstream ss; 110 | 111 | if (_level >= 5) { 112 | ss << evt.toString() << endl; 113 | } 114 | 115 | // Display block info 116 | if (_level >= 4) { 117 | ss << "Block " << currentBlockId << ": " << bi->_stage0Size << " => "; 118 | ss << bi->_stage1Size << " [" << int64(_clock12.elapsed()) << " ms] => " << stage2Size; 119 | ss << " [" << int64(_clock34.elapsed()) << " ms]"; 120 | 121 | // Add compression ratio for encoding 122 | if ((_type == InfoPrinter::ENCODING) && (bi->_stage0Size != 0)) { 123 | ss << " (" << uint(double(stage2Size) * double(100) / double(bi->_stage0Size)); 124 | ss << "%)"; 125 | } 126 | 127 | // Optionally add hash 128 | if (evt.getHash() != 0) { 129 | ss << std::uppercase << std::hex << " [" << evt.getHash() << "]"; 130 | } 131 | 132 | _os << ss.str() << endl; 133 | } 134 | 135 | delete bi; 136 | _map[hash(currentBlockId)] = nullptr; 137 | } 138 | else if ((evt.getType() == Event::AFTER_HEADER_DECODING) && (_level >= 3)) { 139 | _os << evt.toString() << endl; 140 | } 141 | else if (_level >= 5) { 142 | _os << evt.toString() << endl; 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/app/InfoPrinter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _InfoPrinter_ 18 | #define _InfoPrinter_ 19 | 20 | #include "../Event.hpp" 21 | #include "../Listener.hpp" 22 | #include "../OutputStream.hpp" 23 | #include "../util/Clock.hpp" 24 | 25 | 26 | namespace kanzi 27 | { 28 | 29 | class BlockInfo { 30 | public: 31 | int64 _stage0Size; 32 | int64 _stage1Size; 33 | }; 34 | 35 | // An implementation of Listener to display block information (verbose option 36 | // of the BlockCompressor/BlockDecompressor) 37 | class InfoPrinter : public Listener { 38 | public: 39 | enum Type { 40 | ENCODING, 41 | DECODING 42 | }; 43 | 44 | InfoPrinter(int infoLevel, InfoPrinter::Type type, OutputStream& os); 45 | 46 | ~InfoPrinter() { 47 | for (int i = 0; i < 1024; i++) { 48 | if (_map[i] != nullptr) 49 | delete _map[i]; 50 | } 51 | } 52 | 53 | void processEvent(const Event& evt); 54 | 55 | private: 56 | OutputStream& _os; 57 | BlockInfo* _map[1024]; 58 | Event::Type _thresholds[6]; 59 | InfoPrinter::Type _type; 60 | int _level; 61 | Clock _clock12; 62 | Clock _clock23; 63 | Clock _clock34; 64 | 65 | static uint hash(uint id) { return (id * 0x1E35A7BD) & 0x03FF; } 66 | }; 67 | } 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /src/bitstream/DebugInputBitStream.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include "../bitstream/DebugInputBitStream.hpp" 19 | 20 | using namespace kanzi; 21 | using namespace std; 22 | 23 | DebugInputBitStream::DebugInputBitStream(InputBitStream& ibs) : _delegate(ibs), _out(cout), _width(80) 24 | { 25 | _idx = 0; 26 | _mark = false; 27 | _show = false; 28 | _hexa = false; 29 | _current = byte(0); 30 | } 31 | 32 | DebugInputBitStream::DebugInputBitStream(InputBitStream& ibs, ostream& os) : _delegate(ibs), _out(os), _width(80) 33 | { 34 | _idx = 0; 35 | _mark = false; 36 | _show = false; 37 | _hexa = false; 38 | _current = byte(0); 39 | } 40 | 41 | DebugInputBitStream::DebugInputBitStream(InputBitStream& ibs, ostream& os, int width) : _delegate(ibs), _out(os) 42 | { 43 | if ((width != -1) && (width < 8)) 44 | width = 8; 45 | 46 | if (width != -1) 47 | width &= 0xFFFFFFF8; 48 | 49 | _width = width; 50 | _idx = 0; 51 | _mark = false; 52 | _show = false; 53 | _hexa = false; 54 | _current = byte(0); 55 | } 56 | 57 | DebugInputBitStream::~DebugInputBitStream() 58 | { 59 | _close(); 60 | } 61 | 62 | // Returns 1 or 0 63 | int DebugInputBitStream::readBit() 64 | { 65 | int res = _delegate.readBit(); 66 | _current <<= 1; 67 | _current |= byte(res); 68 | _out << ((res & 1) == 1 ? "1" : "0"); 69 | _idx++; 70 | 71 | if (_mark == true) 72 | _out << "r"; 73 | 74 | if ((_width != -1) && ((_idx - 1) % _width == _width - 1)) { 75 | if (showByte()) 76 | printByte(_current); 77 | 78 | _out << endl; 79 | _idx = 0; 80 | } 81 | else if ((_idx & 7) == 0) { 82 | if (showByte()) 83 | printByte(_current); 84 | else 85 | _out << " "; 86 | } 87 | 88 | return res; 89 | } 90 | 91 | uint64 DebugInputBitStream::readBits(uint count) 92 | { 93 | uint64 res = _delegate.readBits(count); 94 | 95 | for (uint i = 1; i <= count; i++) { 96 | int bit = (res >> (count - i)) & 1; 97 | _idx++; 98 | _current <<= 1; 99 | _current |= byte(bit); 100 | _out << ((bit == 1) ? "1" : "0"); 101 | 102 | if ((_mark == true) && (i == count)) 103 | _out << "r"; 104 | 105 | if ((_width != -1) && (_idx % _width == 0)) { 106 | if (showByte()) 107 | printByte(_current); 108 | 109 | _out << endl; 110 | _idx = 0; 111 | } 112 | else if ((_idx & 7) == 0) { 113 | if (showByte()) 114 | printByte(_current); 115 | else 116 | _out << " "; 117 | } 118 | } 119 | 120 | return res; 121 | } 122 | 123 | uint DebugInputBitStream::readBits(byte bits[], uint count) 124 | { 125 | count = _delegate.readBits(bits, count); 126 | 127 | for (uint i = 0; i < (count >> 3); i++) { 128 | for (int j = 7; j >= 0; j--) { 129 | int bit = int(bits[i] >> j) & 1; 130 | _idx++; 131 | _current <<= 1; 132 | _current |= byte(bit); 133 | _out << ((bit == 1) ? "1" : "0"); 134 | 135 | if ((_mark == true) && (j == int(count))) 136 | _out << "r"; 137 | 138 | if ((_width != -1) && (_idx % _width == 0)) { 139 | if (showByte()) 140 | printByte(_current); 141 | 142 | _out << endl; 143 | _idx = 0; 144 | } 145 | else if ((_idx & 7) == 0) { 146 | if (showByte()) 147 | printByte(_current); 148 | else 149 | _out << " "; 150 | } 151 | } 152 | } 153 | 154 | return count; 155 | } 156 | 157 | void DebugInputBitStream::printByte(byte b) 158 | { 159 | int val = int(b); 160 | 161 | if (_hexa == true) { 162 | _out << hex << " [0x"; 163 | _out << ((val < 16) ? "0" : ""); 164 | _out << val << "] "; 165 | _out << dec; 166 | return; 167 | } 168 | 169 | _out << " ["; 170 | 171 | if (val < 10) 172 | _out << "00"; 173 | else if (val < 100) 174 | _out << "0"; 175 | 176 | _out << val << "] "; 177 | } 178 | 179 | 180 | -------------------------------------------------------------------------------- /src/bitstream/DebugInputBitStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _DebugInputBitStream_ 18 | #define _DebugInputBitStream_ 19 | 20 | #include "../InputBitStream.hpp" 21 | #include "../OutputStream.hpp" 22 | 23 | 24 | namespace kanzi { 25 | 26 | class DebugInputBitStream FINAL : public InputBitStream 27 | { 28 | private: 29 | InputBitStream& _delegate; 30 | OutputStream& _out; 31 | int _width; 32 | int _idx; 33 | bool _mark; 34 | bool _hexa; 35 | bool _show; 36 | byte _current; 37 | 38 | void printByte(byte val); 39 | 40 | void _close() { _delegate.close(); } 41 | 42 | public: 43 | DebugInputBitStream(InputBitStream& ibs); 44 | 45 | DebugInputBitStream(InputBitStream& ibs, OutputStream& os); 46 | 47 | DebugInputBitStream(InputBitStream& ibs, OutputStream& os, int width); 48 | 49 | ~DebugInputBitStream(); 50 | 51 | // Returns 1 or 0 52 | int readBit(); 53 | 54 | uint64 readBits(uint length); 55 | 56 | uint readBits(byte bits[], uint length); 57 | 58 | // Number of bits read 59 | uint64 read() const { return _delegate.read(); } 60 | 61 | // Return false when the bitstream is closed or the End-Of-Stream has been reached 62 | bool hasMoreToRead() { return _delegate.hasMoreToRead(); } 63 | 64 | void close() { _close(); } 65 | 66 | void showByte(bool show) { _show = show; } 67 | 68 | void setHexa(bool hexa) { _hexa = hexa; } 69 | 70 | bool hexa() const { return _hexa; } 71 | 72 | bool showByte() const { return _show; } 73 | 74 | void setMark(bool mark) { _mark = mark; } 75 | 76 | bool mark() const { return _mark; } 77 | }; 78 | } 79 | #endif 80 | 81 | -------------------------------------------------------------------------------- /src/bitstream/DebugOutputBitStream.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include "DebugOutputBitStream.hpp" 18 | 19 | using namespace kanzi; 20 | using namespace std; 21 | 22 | DebugOutputBitStream::DebugOutputBitStream(OutputBitStream& obs) : _delegate(obs), _out(cout), _width(80) 23 | { 24 | _mark = false; 25 | _hexa = false; 26 | _show = false; 27 | _current = byte(0); 28 | _idx = 0; 29 | } 30 | 31 | DebugOutputBitStream::DebugOutputBitStream(OutputBitStream& obs, OutputStream& os) : _delegate(obs), _out(os), _width(80) 32 | { 33 | _mark = false; 34 | _hexa = false; 35 | _show = false; 36 | _current = byte(0); 37 | _idx = 0; 38 | } 39 | 40 | DebugOutputBitStream::DebugOutputBitStream(OutputBitStream& obs, OutputStream& os, int width) : _delegate(obs), _out(os) 41 | { 42 | if ((width != -1) && (width < 8)) 43 | width = 8; 44 | 45 | if (width != -1) 46 | width &= 0xFFFFFFF8; 47 | 48 | _width = width; 49 | _mark = false; 50 | _hexa = false; 51 | _show = false; 52 | _current = byte(0); 53 | _idx = 0; 54 | } 55 | 56 | DebugOutputBitStream::~DebugOutputBitStream() 57 | { 58 | _close(); 59 | } 60 | 61 | void DebugOutputBitStream::writeBit(int bit) 62 | { 63 | bit &= 1; 64 | _out << ((bit == 1) ? "1" : "0"); 65 | _current <<= 1; 66 | _current |= byte(bit); 67 | _idx++; 68 | 69 | if (_mark == true) 70 | _out << "w"; 71 | 72 | if ((_width != -1) && ((_idx - 1) % _width == _width - 1)) { 73 | if (showByte()) 74 | printByte(_current); 75 | 76 | _out << endl; 77 | _idx = 0; 78 | } 79 | else if ((_idx & 7) == 0) { 80 | if (showByte()) 81 | printByte(_current); 82 | else 83 | _out << " "; 84 | } 85 | 86 | _delegate.writeBit(bit); 87 | } 88 | 89 | uint DebugOutputBitStream::writeBits(uint64 bits, uint count) 90 | { 91 | uint res = _delegate.writeBits(bits, count); 92 | 93 | for (uint i = 1; i <= res; i++) { 94 | uint64 bit = (bits >> (res - i)) & 1; 95 | _current <<= 1; 96 | _current |= byte(bit); 97 | _idx++; 98 | _out << ((bit == 1) ? "1" : "0"); 99 | 100 | if ((_mark == true) && (i == res)) 101 | _out << "w"; 102 | 103 | if ((_width != -1) && (_idx % _width == 0)) { 104 | if (showByte()) 105 | printByte(_current); 106 | 107 | _out << endl; 108 | _idx = 0; 109 | } 110 | else if ((_idx & 7) == 0) { 111 | if (showByte()) 112 | printByte(_current); 113 | else 114 | _out << " "; 115 | } 116 | } 117 | 118 | return res; 119 | } 120 | 121 | uint DebugOutputBitStream::writeBits(const byte bits[], uint count) 122 | { 123 | int res = _delegate.writeBits(bits, count); 124 | const int end = int(count >> 3); 125 | 126 | for (int i = 0; i < end; i++) { 127 | for (int j = 7; j >=0 ; j--) { 128 | uint64 bit = uint64(bits[i] >> j) & 1; 129 | _current <<= 1; 130 | _current |= byte(bit); 131 | _idx++; 132 | _out << ((bit == 1) ? "1" : "0"); 133 | 134 | if ((_mark == true) && (i == res)) 135 | _out << "w"; 136 | 137 | if ((_width != -1) && (_idx % _width == 0)) { 138 | if (showByte()) 139 | printByte(_current); 140 | 141 | _out << endl; 142 | _idx = 0; 143 | } 144 | else if ((_idx & 7) == 0) { 145 | if (showByte()) 146 | printByte(_current); 147 | else 148 | _out << " "; 149 | } 150 | } 151 | } 152 | 153 | return res; 154 | } 155 | 156 | void DebugOutputBitStream::printByte(byte b) 157 | { 158 | int val = int(b); 159 | 160 | if (_hexa == true) { 161 | _out << hex << " [0x"; 162 | _out << ((val < 16) ? "0" : ""); 163 | _out << val << "] "; 164 | _out << dec; 165 | return; 166 | } 167 | 168 | _out << " ["; 169 | 170 | if (val < 10) 171 | _out << "00"; 172 | else if (val < 100) 173 | _out << "0"; 174 | 175 | _out << val << "] "; 176 | } 177 | 178 | 179 | -------------------------------------------------------------------------------- /src/bitstream/DebugOutputBitStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _DebugOutputBitStream_ 18 | #define _DebugOutputBitStream_ 19 | 20 | #include "../OutputBitStream.hpp" 21 | #include "../OutputStream.hpp" 22 | 23 | namespace kanzi 24 | { 25 | 26 | class DebugOutputBitStream FINAL : public OutputBitStream 27 | { 28 | private: 29 | OutputBitStream& _delegate; 30 | OutputStream& _out; 31 | int _width; 32 | int _idx; 33 | bool _mark; 34 | bool _show; 35 | bool _hexa; 36 | byte _current; 37 | 38 | void printByte(byte val); 39 | 40 | void _close() { _delegate.close(); } 41 | 42 | public: 43 | DebugOutputBitStream(OutputBitStream& obs); 44 | 45 | DebugOutputBitStream(OutputBitStream& obs, OutputStream& os); 46 | 47 | DebugOutputBitStream(OutputBitStream& obs, OutputStream& os, int width); 48 | 49 | ~DebugOutputBitStream(); 50 | 51 | void writeBit(int bit); 52 | 53 | uint writeBits(uint64 bits, uint length); 54 | 55 | uint writeBits(const byte bits[], uint length); 56 | 57 | // Return number of bits written so far 58 | uint64 written() const { return _delegate.written(); } 59 | 60 | void close() { _close(); } 61 | 62 | void showByte(bool show) { _show = show; } 63 | 64 | void setHexa(bool hexa) { _hexa = hexa; } 65 | 66 | bool hexa() const { return _hexa; } 67 | 68 | bool showByte() const { return _show; } 69 | 70 | void setMark(bool mark) { _mark = mark; } 71 | 72 | bool mark() const { return _mark; } 73 | }; 74 | 75 | } 76 | #endif 77 | 78 | -------------------------------------------------------------------------------- /src/bitstream/DefaultOutputBitStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _DefaultOutputBitStream_ 18 | #define _DefaultOutputBitStream_ 19 | 20 | #include "../BitStreamException.hpp" 21 | #include "../OutputStream.hpp" 22 | #include "../OutputBitStream.hpp" 23 | #include "../Memory.hpp" 24 | #include "../Seekable.hpp" 25 | #include "../util/strings.hpp" 26 | 27 | 28 | namespace kanzi 29 | { 30 | 31 | #if defined(_MSC_VER) && _MSC_VER <= 1500 32 | class DefaultOutputBitStream FINAL : public OutputBitStream 33 | #else 34 | class DefaultOutputBitStream FINAL : public OutputBitStream, public Seekable 35 | #endif 36 | { 37 | private: 38 | OutputStream& _os; 39 | byte* _buffer; 40 | bool _closed; 41 | uint _bufferSize; 42 | uint _position; // index of current byte in buffer 43 | uint _availBits; // bits not consumed in _current 44 | int64 _written; 45 | uint64 _current; // cached bits 46 | 47 | void pushCurrent(); 48 | 49 | void flush(); 50 | 51 | void _close(); 52 | 53 | public: 54 | DefaultOutputBitStream(OutputStream& os, uint bufferSize=65536); 55 | 56 | ~DefaultOutputBitStream(); 57 | 58 | void writeBit(int bit); 59 | 60 | uint writeBits(uint64 bits, uint length); 61 | 62 | uint writeBits(const byte bits[], uint length); 63 | 64 | void close() { _close(); } 65 | 66 | #if !defined(_MSC_VER) || _MSC_VER > 1500 67 | int64 tell(); 68 | 69 | bool seek(int64 pos); 70 | #endif 71 | 72 | // Return number of bits written so far 73 | uint64 written() const 74 | { 75 | // Number of bits flushed + bytes written in memory + bits written in memory 76 | return uint64(_written + (int64(_position) << 3) + int64(64 - _availBits)); 77 | } 78 | 79 | bool isClosed() const { return _closed; } 80 | }; 81 | 82 | // Write least significant bit of the input integer. Trigger exception if stream is closed 83 | inline void DefaultOutputBitStream::writeBit(int bit) 84 | { 85 | if (_availBits <= 1) { // _availBits = 0 if stream is closed => force pushCurrent() 86 | _current |= (uint64(bit) & 1); 87 | pushCurrent(); 88 | } 89 | else { 90 | _availBits--; 91 | _current |= (uint64(bit & 1) << _availBits); 92 | } 93 | } 94 | 95 | // Write 'count' (in [1..64]) bits. Trigger exception if stream is closed 96 | inline uint DefaultOutputBitStream::writeBits(uint64 value, uint count) 97 | { 98 | if (count > 64) 99 | throw BitStreamException("Invalid bit count: " + TOSTR(count) + " (must be in [1..64])"); 100 | 101 | _current |= ((value << (64 - count)) >> (64 - _availBits)); 102 | 103 | if (count >= _availBits) { 104 | // Not enough spots available in 'current' 105 | const uint remaining = count - _availBits; 106 | pushCurrent(); 107 | 108 | if (remaining != 0) { 109 | _availBits -= remaining; 110 | _current = value << _availBits; 111 | } 112 | } 113 | else { 114 | _availBits -= count; 115 | } 116 | 117 | return count; 118 | } 119 | 120 | // Push 64 bits of current value into buffer. 121 | inline void DefaultOutputBitStream::pushCurrent() 122 | { 123 | BigEndian::writeLong64(&_buffer[_position], _current); 124 | _availBits = 64; 125 | _current = 0; 126 | _position += 8; 127 | 128 | if (_position >= _bufferSize - 8) 129 | flush(); 130 | } 131 | 132 | #if !defined(_MSC_VER) || _MSC_VER > 1500 133 | inline int64 DefaultOutputBitStream::tell() 134 | { 135 | if (isClosed() == true) 136 | return -1; 137 | 138 | _os.clear(); 139 | const int64 res = int64(_os.tellp()); 140 | return (res < 0) ? -1 : 8 * res + (int64(_position) << 3) + int64(64 - _availBits); 141 | } 142 | 143 | // Only support a new position at the byte boundary (pos & 7 == 0) 144 | inline bool DefaultOutputBitStream::seek(int64 pos) 145 | { 146 | if (isClosed() == true) 147 | return false; 148 | 149 | if ((pos < 0) || ((pos & 7) != 0)) 150 | return false; 151 | 152 | // Flush buffer 153 | // Round down to byte alignment 154 | const uint a = _availBits & -8; 155 | 156 | for (uint i = 56; i >= a; i -= 8) { 157 | _buffer[_position++] = byte(_current >> i); 158 | 159 | if (_position >= _bufferSize) 160 | flush(); 161 | } 162 | 163 | _availBits = 64; 164 | flush(); 165 | _os.clear(); 166 | _os.seekp(std::streampos(pos >> 3)); 167 | return true; 168 | } 169 | #endif 170 | 171 | } 172 | #endif 173 | 174 | -------------------------------------------------------------------------------- /src/configure: -------------------------------------------------------------------------------- 1 | # Fake config file 2 | -------------------------------------------------------------------------------- /src/entropy/ANSRangeDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _ANSRangeDecoder_ 18 | #define _ANSRangeDecoder_ 19 | 20 | #include "../EntropyDecoder.hpp" 21 | #include "../types.hpp" 22 | 23 | 24 | // Implementation of an Asymmetric Numeral System decoder. 25 | // See "Asymmetric Numeral System" by Jarek Duda at http://arxiv.org/abs/0902.0271 26 | // Some code has been ported from https://github.com/rygorous/ryg_rans 27 | // For an alternate C implementation example, see https://github.com/Cyan4973/FiniteStateEntropy 28 | 29 | namespace kanzi 30 | { 31 | 32 | struct ANSDecSymbol 33 | { 34 | void reset(int cumFreq, int freq, int logRange); 35 | 36 | uint16 _cumFreq; 37 | uint16 _freq; 38 | }; 39 | 40 | 41 | class ANSRangeDecoder : public EntropyDecoder { 42 | public: 43 | static const uint ANS_TOP; 44 | 45 | ANSRangeDecoder(InputBitStream& bitstream, 46 | int order = 0, 47 | int chunkSize = DEFAULT_ANS0_CHUNK_SIZE); 48 | 49 | ~ANSRangeDecoder(); 50 | 51 | int decode(byte block[], uint blkptr, uint len); 52 | 53 | InputBitStream& getBitStream() const { return _bitstream; } 54 | 55 | void dispose() { _dispose(); } 56 | 57 | 58 | private: 59 | static const int DEFAULT_ANS0_CHUNK_SIZE; 60 | static const int DEFAULT_LOG_RANGE; 61 | static const int MIN_CHUNK_SIZE; 62 | static const int MAX_CHUNK_SIZE; 63 | 64 | InputBitStream& _bitstream; 65 | uint* _freqs; 66 | uint8* _f2s; 67 | int _f2sSize; 68 | ANSDecSymbol* _symbols; 69 | byte* _buffer; 70 | uint _bufferSize; 71 | uint _chunkSize; 72 | uint _order; 73 | uint _logRange; 74 | 75 | bool decodeChunk(byte block[], uint count); 76 | 77 | uint decodeSymbol(byte*& p, uint& st, const ANSDecSymbol& sym, const int mask) const; 78 | 79 | int decodeHeader(uint frequencies[], uint alphabet[]); 80 | 81 | void _dispose() const {} 82 | }; 83 | 84 | 85 | inline void ANSDecSymbol::reset(int cumFreq, int freq, int logRange) 86 | { 87 | _cumFreq = uint16(cumFreq); 88 | _freq = (freq >= (1 << logRange)) ? uint16((1 << logRange) - 1) : uint16(freq); // Mirror encoder 89 | } 90 | 91 | 92 | inline uint ANSRangeDecoder::decodeSymbol(byte*& p, uint& st, const ANSDecSymbol& sym, const int mask) const 93 | { 94 | // Compute next ANS state 95 | // D(x) = (s, q_s (x/M) + mod(x,M) - b_s) where s is such b_s <= x mod M < b_{s+1} 96 | st = uint(sym._freq) * (st >> _logRange) + (st & mask) - uint(sym._cumFreq); 97 | 98 | // Normalize 99 | const int x = (st < ANS_TOP) ? -1 : 0; 100 | st = (st << (x & 16)) | (x & ((uint(p[0]) << 8) | uint(p[1]))); 101 | p -= (x + x); 102 | return st; 103 | } 104 | 105 | } 106 | #endif 107 | 108 | -------------------------------------------------------------------------------- /src/entropy/ANSRangeEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _ANSRangeEncoder_ 18 | #define _ANSRangeEncoder_ 19 | 20 | #include "../EntropyEncoder.hpp" 21 | 22 | 23 | // Implementation of an Asymmetric Numeral System encoder. 24 | // See "Asymmetric Numeral System" by Jarek Duda at http://arxiv.org/abs/0902.0271 25 | // Some code has been ported from https://github.com/rygorous/ryg_rans 26 | // For an alternate C implementation example, see https://github.com/Cyan4973/FiniteStateEntropy 27 | 28 | namespace kanzi 29 | { 30 | 31 | struct ANSEncSymbol 32 | { 33 | void reset(int cumFreq, int freq, uint logRange); 34 | 35 | int _xMax; // (Exclusive) upper bound of pre-normalization interval 36 | int _bias; // Bias 37 | int _cmplFreq; // Complement of frequency: (1 << scale_bits) - freq 38 | int _invShift; // Reciprocal shift 39 | uint64 _invFreq; // Fixed-point reciprocal frequency 40 | }; 41 | 42 | 43 | class ANSRangeEncoder : public EntropyEncoder 44 | { 45 | public: 46 | static const int ANS_TOP; 47 | 48 | ANSRangeEncoder(OutputBitStream& bitstream, 49 | int order = 0, 50 | int chunkSize = DEFAULT_ANS0_CHUNK_SIZE, 51 | int logRange = DEFAULT_LOG_RANGE); 52 | 53 | ~ANSRangeEncoder(); 54 | 55 | int updateFrequencies(uint frequencies[], uint lr); 56 | 57 | int encode(const byte block[], uint blkptr, uint len); 58 | 59 | OutputBitStream& getBitStream() const { return _bitstream; } 60 | 61 | void dispose() { _dispose(); } 62 | 63 | 64 | private: 65 | static const int DEFAULT_ANS0_CHUNK_SIZE; 66 | static const int DEFAULT_LOG_RANGE; 67 | static const int MIN_CHUNK_SIZE; 68 | static const int MAX_CHUNK_SIZE; 69 | 70 | ANSEncSymbol* _symbols; 71 | uint* _freqs; 72 | byte* _buffer; 73 | uint _bufferSize; 74 | OutputBitStream& _bitstream; 75 | uint _chunkSize; 76 | uint _logRange; 77 | uint _order; 78 | 79 | 80 | int rebuildStatistics(const byte block[], int end, uint lr); 81 | 82 | void encodeChunk(const byte block[], int end); 83 | 84 | int encodeSymbol(byte*& p, int& st, const ANSEncSymbol& sym) const; 85 | 86 | bool encodeHeader(int alphabetSize, const uint alphabet[], const uint frequencies[], uint lr) const; 87 | 88 | void _dispose() const {} 89 | }; 90 | 91 | 92 | inline void ANSEncSymbol::reset(int cumFreq, int freq, uint logRange) 93 | { 94 | // Make sure xMax is a positive int32. Compatibility with Java implementation 95 | if (freq >= 1 << logRange) 96 | freq = (1 << logRange) - 1; 97 | 98 | _xMax = ((ANSRangeEncoder::ANS_TOP >> logRange) << 16) * freq; 99 | _cmplFreq = (1 << logRange) - freq; 100 | 101 | if (freq < 2) { 102 | _invFreq = uint64(0xFFFFFFFF); 103 | _invShift = 32; 104 | _bias = cumFreq + (1 << logRange) - 1; 105 | } 106 | else { 107 | int shift = 0; 108 | 109 | while (freq > (1 << shift)) 110 | shift++; 111 | 112 | // Alverson, "Integer Division using reciprocals" 113 | _invFreq = (((uint64(1) << (shift + 31)) + freq - 1) / freq) & uint64(0xFFFFFFFF); 114 | _invShift = 32 + shift - 1; 115 | _bias = cumFreq; 116 | } 117 | } 118 | 119 | inline int ANSRangeEncoder::encodeSymbol(byte*& p, int& st, const ANSEncSymbol& sym) const 120 | { 121 | const int x = (st >= sym._xMax) ? 1 : 0; 122 | *p = byte(st); 123 | p -= x; 124 | *p = byte(st >> 8); 125 | p -= x; 126 | st >>= (-x & 16); 127 | 128 | // Compute next ANS state 129 | // C(s,x) = M floor(x/q_s) + mod(x,q_s) + b_s where b_s = q_0 + ... + q_{s-1} 130 | // st = ((st / freq) << lr) + (st % freq) + cumFreq; 131 | return st + sym._bias + int((st * sym._invFreq) >> sym._invShift) * sym._cmplFreq; 132 | } 133 | } 134 | #endif 135 | 136 | -------------------------------------------------------------------------------- /src/entropy/AdaptiveProbMap.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _AdaptiveProbMap_ 18 | #define _AdaptiveProbMap_ 19 | 20 | #include "../Global.hpp" 21 | 22 | // APM maps a probability and a context into a new probability 23 | // that the next bit will be 1. After each guess, it updates 24 | // its state to improve future guesses. 25 | 26 | namespace kanzi { 27 | template 28 | class LinearAdaptiveProbMap { 29 | public: 30 | #if __cplusplus >= 202002L // simple-template-id in ctors and dtors rejected in C++20 31 | LinearAdaptiveProbMap(int n); 32 | 33 | ~LinearAdaptiveProbMap() { delete[] _data; } 34 | #else 35 | LinearAdaptiveProbMap(int n); 36 | 37 | ~LinearAdaptiveProbMap() { delete[] _data; } 38 | #endif 39 | 40 | int get(int bit, int pr, int ctx); 41 | 42 | private: 43 | int _index; // last p, context 44 | uint16* _data; // [NbCtx][33]: p, context -> p 45 | }; 46 | 47 | template 48 | inline LinearAdaptiveProbMap::LinearAdaptiveProbMap(int n) 49 | { 50 | const int size = (n == 0) ? 65 : n * 65; 51 | _data = new uint16[size]; 52 | _index = 0; 53 | 54 | for (int j = 0; j <= 64; j++) { 55 | _data[j] = uint16(j << 6) << 4; 56 | } 57 | 58 | for (int i = 1; i < n; i++) { 59 | memcpy(&_data[i * 65], &_data[0], 65 * sizeof(uint16)); 60 | } 61 | } 62 | 63 | // Return improved prediction given current bit, prediction and context 64 | template 65 | inline int LinearAdaptiveProbMap::get(int bit, int pr, int ctx) 66 | { 67 | // Update probability based on error and learning rate 68 | const int g = -bit & 65528; 69 | _data[_index] += (((g - int(_data[_index])) >> RATE) + bit); 70 | _data[_index + 1] += (((g - int(_data[_index + 1])) >> RATE) + bit); 71 | 72 | // Find index: 65*ctx + quantized prediction in [0..64] 73 | _index = (pr >> 6) + 65 * ctx; 74 | 75 | // Return interpolated probabibility 76 | const uint16 w = uint16(pr & 127); 77 | return int(_data[_index] * (128 - w) + _data[_index + 1] * w) >> 11; 78 | } 79 | 80 | 81 | 82 | template 83 | class LogisticAdaptiveProbMap { 84 | public: 85 | #if __cplusplus >= 202002L // simple-template-id in ctors and dtors rejected in C++20 86 | LogisticAdaptiveProbMap(int n); 87 | 88 | ~LogisticAdaptiveProbMap() { delete[] _data; } 89 | #else 90 | LogisticAdaptiveProbMap(int n); 91 | 92 | ~LogisticAdaptiveProbMap() { delete[] _data; } 93 | #endif 94 | 95 | int get(int bit, int pr, int ctx); 96 | 97 | private: 98 | int _index; // last p, context 99 | uint16* _data; // [NbCtx][33]: p, context -> p 100 | }; 101 | 102 | template 103 | inline LogisticAdaptiveProbMap::LogisticAdaptiveProbMap(int n) 104 | { 105 | const int mult = (FAST == false) ? 33 : 32; 106 | const int size = (n == 0) ? mult : n * mult; 107 | _data = new uint16[size]; 108 | _index = 0; 109 | 110 | for (int j = 0; j < mult; j++) 111 | _data[j] = uint16(Global::squash((j - 16) * 128) << 4); 112 | 113 | for (int i = 1; i < n; i++) 114 | memcpy(&_data[i * mult], &_data[0], mult * sizeof(uint16)); 115 | } 116 | 117 | // Return improved prediction given current bit, prediction and context 118 | template 119 | inline int LogisticAdaptiveProbMap::get(int bit, int pr, int ctx) 120 | { 121 | // Update probability based on error and learning rate 122 | const int g = -bit & 65528; 123 | _data[_index] += (((g - int(_data[_index])) >> RATE) + bit); 124 | 125 | if (FAST == false) { 126 | _data[_index + 1] += (((g - int(_data[_index + 1])) >> RATE) + bit); 127 | pr = Global::stretch(pr); 128 | _index = ((pr + 2048) >> 7) + 33 * ctx; 129 | 130 | // Return interpolated probabibility 131 | const uint16 w = uint16(pr & 127); 132 | return int(_data[_index] * (128 - w) + _data[_index + 1] * w) >> 11; 133 | } else { 134 | _index = ((Global::stretch(pr) + 2048) >> 7) + 32 * ctx; 135 | return int(_data[_index]) >> 4; 136 | } 137 | } 138 | 139 | } 140 | #endif 141 | 142 | -------------------------------------------------------------------------------- /src/entropy/BinaryEntropyDecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include "BinaryEntropyDecoder.hpp" 19 | #include "../Memory.hpp" 20 | #include "EntropyUtils.hpp" 21 | 22 | using namespace kanzi; 23 | using namespace std; 24 | 25 | const uint64 BinaryEntropyDecoder::TOP = 0x00FFFFFFFFFFFFFF; 26 | const uint64 BinaryEntropyDecoder::MASK_0_56 = 0x00FFFFFFFFFFFFFF; 27 | const uint64 BinaryEntropyDecoder::MASK_0_32 = 0x00000000FFFFFFFF; 28 | const int BinaryEntropyDecoder::MAX_BLOCK_SIZE = 1 << 30; 29 | const int BinaryEntropyDecoder::MAX_CHUNK_SIZE = 1 << 26; 30 | 31 | 32 | BinaryEntropyDecoder::BinaryEntropyDecoder(InputBitStream& bitstream, Predictor* predictor, bool deallocate) 33 | : _predictor(predictor) 34 | , _bitstream(bitstream) 35 | , _deallocate(deallocate) 36 | , _sba(new byte[0], 0) 37 | { 38 | if (predictor == nullptr) 39 | throw invalid_argument("Invalid null predictor parameter"); 40 | 41 | _low = 0; 42 | _high = TOP; 43 | _current = 0; 44 | } 45 | 46 | BinaryEntropyDecoder::~BinaryEntropyDecoder() 47 | { 48 | _dispose(); 49 | delete[] _sba._array; 50 | 51 | if (_deallocate) 52 | delete _predictor; 53 | } 54 | 55 | int BinaryEntropyDecoder::decode(byte block[], uint blkptr, uint count) 56 | { 57 | if (count >= MAX_BLOCK_SIZE) 58 | throw invalid_argument("Invalid block size parameter (max is 1<<30)"); 59 | 60 | uint startChunk = blkptr; 61 | const uint end = blkptr + count; 62 | uint length = max(count, 64u); 63 | 64 | if (length >= MAX_CHUNK_SIZE) { 65 | // If the block is big (>=64MB), split the decoding to avoid allocating 66 | // too much memory. 67 | length = (length / 8 < MAX_CHUNK_SIZE) ? count >> 3 : count >> 4; 68 | } 69 | 70 | const uint bufSize = length + (length >> 3); 71 | 72 | if (_sba._length < int(bufSize)) { 73 | delete[] _sba._array; 74 | _sba._length = int(bufSize); 75 | _sba._array = new byte[_sba._length]; 76 | } 77 | 78 | // Split block into chunks, read bit array from bitstream and decode chunk 79 | while (startChunk < end) { 80 | const uint chunkSize = min(length, end - startChunk); 81 | const uint szBytes = uint(EntropyUtils::readVarInt(_bitstream)); 82 | 83 | if (szBytes > bufSize) 84 | return 0; 85 | 86 | _current = _bitstream.readBits(56); 87 | 88 | if (szBytes != 0) 89 | _bitstream.readBits(&_sba._array[0], 8 * szBytes); 90 | 91 | _sba._index = 0; 92 | const uint endChunk = startChunk + chunkSize; 93 | 94 | for (uint i = startChunk; i < endChunk; i++) { 95 | block[i] = byte((decodeBit(_predictor->get()) << 7) 96 | | (decodeBit(_predictor->get()) << 6) 97 | | (decodeBit(_predictor->get()) << 5) 98 | | (decodeBit(_predictor->get()) << 4) 99 | | (decodeBit(_predictor->get()) << 3) 100 | | (decodeBit(_predictor->get()) << 2) 101 | | (decodeBit(_predictor->get()) << 1) 102 | | decodeBit(_predictor->get())); 103 | } 104 | 105 | startChunk = endChunk; 106 | } 107 | 108 | return count; 109 | } 110 | 111 | 112 | // no inline 113 | void BinaryEntropyDecoder::read() 114 | { 115 | _low = (_low << 32) & MASK_0_56; 116 | _high = ((_high << 32) | MASK_0_32) & MASK_0_56; 117 | const uint64 val = BigEndian::readInt32(&_sba._array[_sba._index]) & MASK_0_32; 118 | _current = ((_current << 32) | val) & MASK_0_56; 119 | _sba._index += 4; 120 | } 121 | 122 | // no inline 123 | byte BinaryEntropyDecoder::decodeByte() 124 | { 125 | return byte((decodeBit(_predictor->get()) << 7) 126 | | (decodeBit(_predictor->get()) << 6) 127 | | (decodeBit(_predictor->get()) << 5) 128 | | (decodeBit(_predictor->get()) << 4) 129 | | (decodeBit(_predictor->get()) << 3) 130 | | (decodeBit(_predictor->get()) << 2) 131 | | (decodeBit(_predictor->get()) << 1) 132 | | decodeBit(_predictor->get())); 133 | } 134 | 135 | -------------------------------------------------------------------------------- /src/entropy/BinaryEntropyDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BinaryEntropyDecoder_ 18 | #define _BinaryEntropyDecoder_ 19 | 20 | #include "../EntropyDecoder.hpp" 21 | #include "../Predictor.hpp" 22 | #include "../SliceArray.hpp" 23 | 24 | namespace kanzi 25 | { 26 | 27 | // This class is a generic implementation of a bool entropy decoder 28 | class BinaryEntropyDecoder FINAL : public EntropyDecoder 29 | { 30 | private: 31 | static const uint64 TOP; 32 | static const uint64 MASK_0_56; 33 | static const uint64 MASK_0_32; 34 | static const int MAX_BLOCK_SIZE; 35 | static const int MAX_CHUNK_SIZE; 36 | 37 | Predictor* _predictor; 38 | uint64 _low; 39 | uint64 _high; 40 | uint64 _current; 41 | InputBitStream& _bitstream; 42 | bool _deallocate; 43 | SliceArray _sba; 44 | 45 | void read(); 46 | 47 | void _dispose() const {} 48 | 49 | public: 50 | BinaryEntropyDecoder(InputBitStream& bitstream, Predictor* predictor, bool deallocate=true); 51 | 52 | ~BinaryEntropyDecoder(); 53 | 54 | int decode(byte block[], uint blkptr, uint count); 55 | 56 | InputBitStream& getBitStream() const { return _bitstream; } 57 | 58 | void dispose() { _dispose(); } 59 | 60 | byte decodeByte(); 61 | 62 | int decodeBit(int pred = 2048); 63 | }; 64 | 65 | 66 | inline int BinaryEntropyDecoder::decodeBit(int pred) 67 | { 68 | // Calculate interval split 69 | const uint64 split = ((((_high - _low) >> 4) * uint64(pred)) >> 8) + _low; 70 | int bit; 71 | 72 | // Update predictor 73 | if (split >= _current) { 74 | bit = 1; 75 | _high = split; 76 | _predictor->update(1); 77 | } 78 | else { 79 | bit = 0; 80 | _low = split + 1; 81 | _predictor->update(0); 82 | } 83 | 84 | // Read 32 bits from bitstream 85 | if (((_low ^ _high) >> 24) == 0) 86 | read(); 87 | 88 | return bit; 89 | } 90 | 91 | } 92 | #endif 93 | 94 | -------------------------------------------------------------------------------- /src/entropy/BinaryEntropyEncoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include "BinaryEntropyEncoder.hpp" 19 | #include "../Memory.hpp" 20 | #include "EntropyUtils.hpp" 21 | 22 | using namespace kanzi; 23 | using namespace std; 24 | 25 | const uint64 BinaryEntropyEncoder::TOP = 0x00FFFFFFFFFFFFFF; 26 | const uint64 BinaryEntropyEncoder::MASK_0_24 = 0x0000000000FFFFFF; 27 | const uint64 BinaryEntropyEncoder::MASK_0_32 = 0x00000000FFFFFFFF; 28 | const int BinaryEntropyEncoder::MAX_BLOCK_SIZE = 1 << 30; 29 | const int BinaryEntropyEncoder::MAX_CHUNK_SIZE = 1 << 26; 30 | 31 | 32 | BinaryEntropyEncoder::BinaryEntropyEncoder(OutputBitStream& bitstream, Predictor* predictor, bool deallocate) 33 | : _predictor(predictor) 34 | , _bitstream(bitstream) 35 | , _deallocate(deallocate) 36 | , _sba(new byte[0], 0) 37 | { 38 | if (predictor == nullptr) 39 | throw invalid_argument("Invalid null predictor parameter"); 40 | 41 | _low = 0; 42 | _high = TOP; 43 | _disposed = false; 44 | } 45 | 46 | BinaryEntropyEncoder::~BinaryEntropyEncoder() 47 | { 48 | _dispose(); 49 | delete[] _sba._array; 50 | 51 | if (_deallocate) 52 | delete _predictor; 53 | } 54 | 55 | int BinaryEntropyEncoder::encode(const byte block[], uint blkptr, uint count) 56 | { 57 | if (count >= MAX_BLOCK_SIZE) 58 | throw invalid_argument("Invalid block size parameter (max is 1<<30)"); 59 | 60 | uint startChunk = blkptr; 61 | const uint end = blkptr + count; 62 | uint length = max(count, 64u); 63 | 64 | if (length >= MAX_CHUNK_SIZE) { 65 | // If the block is big (>=64MB), split the encoding to avoid allocating 66 | // too much memory. 67 | length = (length / 8 < MAX_CHUNK_SIZE) ? count >> 3 : count >> 4; 68 | } 69 | 70 | const uint bufSize = length + (length >> 3); 71 | 72 | if (_sba._length < int(bufSize)) { 73 | delete[] _sba._array; 74 | _sba._length = int(bufSize); 75 | _sba._array = new byte[_sba._length]; 76 | } 77 | 78 | // Split block into chunks, encode chunk and write bit array to bitstream 79 | while (startChunk < end) { 80 | const uint chunkSize = min(length, end - startChunk); 81 | const uint endChunk = startChunk + chunkSize; 82 | _sba._index = 0; 83 | 84 | for (uint i = startChunk; i < endChunk; i++) { 85 | encodeBit(int(block[i]) & 0x80, _predictor->get()); 86 | encodeBit(int(block[i]) & 0x40, _predictor->get()); 87 | encodeBit(int(block[i]) & 0x20, _predictor->get()); 88 | encodeBit(int(block[i]) & 0x10, _predictor->get()); 89 | encodeBit(int(block[i]) & 0x08, _predictor->get()); 90 | encodeBit(int(block[i]) & 0x04, _predictor->get()); 91 | encodeBit(int(block[i]) & 0x02, _predictor->get()); 92 | encodeBit(int(block[i]) & 0x01, _predictor->get()); 93 | } 94 | 95 | EntropyUtils::writeVarInt(_bitstream, uint32(_sba._index)); 96 | _bitstream.writeBits(&_sba._array[0], 8 * _sba._index); 97 | startChunk = endChunk; 98 | 99 | if (startChunk < end) 100 | _bitstream.writeBits(_low | MASK_0_24, 56); 101 | } 102 | 103 | return count; 104 | } 105 | 106 | void BinaryEntropyEncoder::_dispose() 107 | { 108 | if (_disposed == true) 109 | return; 110 | 111 | _disposed = true; 112 | _bitstream.writeBits(_low | MASK_0_24, 56); 113 | } 114 | 115 | // no inline 116 | void BinaryEntropyEncoder::flush() 117 | { 118 | BigEndian::writeInt32(&_sba._array[_sba._index], int32(_high >> 24)); 119 | _sba._index += 4; 120 | _low <<= 32; 121 | _high = (_high << 32) | MASK_0_32; 122 | } 123 | 124 | // no inline 125 | void BinaryEntropyEncoder::encodeByte(byte val) 126 | { 127 | encodeBit(int(val) & 0x80, _predictor->get()); 128 | encodeBit(int(val) & 0x40, _predictor->get()); 129 | encodeBit(int(val) & 0x20, _predictor->get()); 130 | encodeBit(int(val) & 0x10, _predictor->get()); 131 | encodeBit(int(val) & 0x08, _predictor->get()); 132 | encodeBit(int(val) & 0x04, _predictor->get()); 133 | encodeBit(int(val) & 0x02, _predictor->get()); 134 | encodeBit(int(val) & 0x01, _predictor->get()); 135 | } 136 | 137 | -------------------------------------------------------------------------------- /src/entropy/BinaryEntropyEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BinaryEntropyEncoder_ 18 | #define _BinaryEntropyEncoder_ 19 | 20 | #include "../EntropyEncoder.hpp" 21 | #include "../Predictor.hpp" 22 | #include "../SliceArray.hpp" 23 | 24 | namespace kanzi 25 | { 26 | 27 | // This class is a generic implementation of a bool entropy encoder 28 | class BinaryEntropyEncoder FINAL : public EntropyEncoder 29 | { 30 | private: 31 | static const uint64 TOP; 32 | static const uint64 MASK_0_24; 33 | static const uint64 MASK_0_32; 34 | static const int MAX_BLOCK_SIZE; 35 | static const int MAX_CHUNK_SIZE; 36 | 37 | Predictor* _predictor; 38 | uint64 _low; 39 | uint64 _high; 40 | OutputBitStream& _bitstream; 41 | bool _disposed; 42 | bool _deallocate; 43 | SliceArray _sba; 44 | 45 | void _dispose(); 46 | 47 | void flush(); 48 | 49 | public: 50 | BinaryEntropyEncoder(OutputBitStream& bitstream, Predictor* predictor, bool deallocate=true); 51 | 52 | ~BinaryEntropyEncoder(); 53 | 54 | int encode(const byte block[], uint blkptr, uint count); 55 | 56 | OutputBitStream& getBitStream() const { return _bitstream; } 57 | 58 | void dispose() { _dispose(); } 59 | 60 | void encodeByte(byte val); 61 | 62 | void encodeBit(int bit, int pred = 2048); 63 | }; 64 | 65 | 66 | inline void BinaryEntropyEncoder::encodeBit(int bit, int pred) 67 | { 68 | // Update fields with new interval bounds and predictor 69 | const uint64 mid = _low + ((((_high - _low) >> 4) * uint64(pred)) >> 8); 70 | (bit != 0) ? _high = mid : _low = mid + 1; 71 | _predictor->update(bit != 0); 72 | 73 | // Write unchanged first 32 bits to bitstream 74 | if (((_low ^ _high) >> 24) == 0) 75 | flush(); 76 | } 77 | } 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /src/entropy/CMPredictor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | 17 | #include "CMPredictor.hpp" 18 | 19 | using namespace kanzi; 20 | 21 | const int CMPredictor::FAST_RATE = 2; 22 | const int CMPredictor::MEDIUM_RATE = 4; 23 | const int CMPredictor::SLOW_RATE = 6; 24 | const int CMPredictor::PSCALE = 65536; 25 | 26 | 27 | CMPredictor::CMPredictor() 28 | { 29 | _ctx = 1; 30 | _runMask = 0; 31 | _c1 = 0; 32 | _c2 = 0; 33 | 34 | for (int i = 0; i < 256; i++) { 35 | for (int j = 0; j <= 256; j++) 36 | _counter1[i][j] = 32768; 37 | 38 | for (int j = 0; j < 16; j++) { 39 | _counter2[2 * i][j] = j << 12; 40 | _counter2[2 * i + 1][j] = j << 12; 41 | } 42 | 43 | _counter2[2 * i][16] = 65535; 44 | _counter2[2 * i + 1][16] = 65535; 45 | } 46 | 47 | _pc1 = _counter1[_ctx]; 48 | _pc2 = &_counter2[_ctx | _runMask][8]; 49 | } 50 | -------------------------------------------------------------------------------- /src/entropy/CMPredictor.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _CMPredictor_ 18 | #define _CMPredictor_ 19 | 20 | #include "../Predictor.hpp" 21 | #include "../types.hpp" 22 | 23 | namespace kanzi 24 | { 25 | 26 | class CMPredictor FINAL : public Predictor 27 | { 28 | private: 29 | static const int FAST_RATE; 30 | static const int MEDIUM_RATE; 31 | static const int SLOW_RATE; 32 | static const int PSCALE; 33 | 34 | int _c1; 35 | int _c2; 36 | int _ctx; 37 | int _runMask; 38 | int _counter1[256][257]; 39 | int _counter2[512][17]; 40 | int* _pc1; 41 | int* _pc2; 42 | 43 | public: 44 | CMPredictor(); 45 | 46 | ~CMPredictor(){} 47 | 48 | void update(int bit); 49 | 50 | int get(); 51 | }; 52 | 53 | // Update the probability model 54 | inline void CMPredictor::update(int bit) 55 | { 56 | if (bit == 0) { 57 | _pc1[256] -= (_pc1[256] >> FAST_RATE); 58 | _pc1[_c1] -= (_pc1[_c1] >> MEDIUM_RATE); 59 | _pc2[0] -= (_pc2[0]>> SLOW_RATE); 60 | _pc2[1] -= (_pc2[1]>> SLOW_RATE); 61 | _ctx += _ctx; 62 | } 63 | else { 64 | _pc1[256] -= ((_pc1[256] - PSCALE + 16) >> FAST_RATE); 65 | _pc1[_c1] -= ((_pc1[_c1] - PSCALE + 16) >> MEDIUM_RATE); 66 | _pc2[0] -= ((_pc2[0] - PSCALE + 16) >> SLOW_RATE); 67 | _pc2[1] -= ((_pc2[1] - PSCALE + 16) >> SLOW_RATE); 68 | _ctx += (_ctx + 1); 69 | } 70 | 71 | if (_ctx > 255) { 72 | _c2 = _c1; 73 | _c1 = _ctx & 0xFF; 74 | _ctx = 1; 75 | _runMask = (_c1 == _c2) ? 0x100 : 0; 76 | } 77 | } 78 | 79 | // Return the split value representing the probability of 1 in the [0..4095] range. 80 | inline int CMPredictor::get() 81 | { 82 | _pc1 = _counter1[_ctx]; 83 | const int p = (13 * (_pc1[256] + _pc1[_c1]) + 6 * _pc1[_c2]) >> 5; 84 | _pc2 = &_counter2[_ctx | _runMask][p >> 12]; 85 | return (p + p + 3 * (_pc2[0] + _pc2[1]) + 64) >> 7; // rescale to [0..4095] 86 | } 87 | } 88 | #endif 89 | 90 | -------------------------------------------------------------------------------- /src/entropy/EntropyDecoderFactory.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _EntropyDecoderFactory_ 18 | #define _EntropyDecoderFactory_ 19 | 20 | #include 21 | #include "../Context.hpp" 22 | #include "ANSRangeDecoder.hpp" 23 | #include "BinaryEntropyDecoder.hpp" 24 | #include "HuffmanDecoder.hpp" 25 | #include "NullEntropyDecoder.hpp" 26 | #include "RangeDecoder.hpp" 27 | #include "CMPredictor.hpp" 28 | #include "FPAQDecoder.hpp" 29 | #include "TPAQPredictor.hpp" 30 | 31 | 32 | namespace kanzi { 33 | 34 | class EntropyDecoderFactory { 35 | public: 36 | static const short NONE_TYPE = 0; // No compression 37 | static const short HUFFMAN_TYPE = 1; // Huffman 38 | static const short FPAQ_TYPE = 2; // Fast PAQ (order 0) 39 | static const short PAQ_TYPE = 3; // Obsolete 40 | static const short RANGE_TYPE = 4; // Range 41 | static const short ANS0_TYPE = 5; // Asymmetric Numerical System order 0 42 | static const short CM_TYPE = 6; // Context Model 43 | static const short TPAQ_TYPE = 7; // Tangelo PAQ 44 | static const short ANS1_TYPE = 8; // Asymmetric Numerical System order 1 45 | static const short TPAQX_TYPE = 9; // Tangelo PAQ Extra 46 | static const short RESERVED1 = 10; //Reserved 47 | static const short RESERVED2 = 11; //Reserved 48 | static const short RESERVED3 = 12; //Reserved 49 | static const short RESERVED4 = 13; //Reserved 50 | static const short RESERVED5 = 14; //Reserved 51 | static const short RESERVED6 = 15; //Reserved 52 | 53 | static EntropyDecoder* newDecoder(InputBitStream& ibs, Context& ctx, short entropyType); 54 | 55 | static const char* getName(short entropyType); 56 | 57 | static short getType(const char* name); 58 | }; 59 | 60 | 61 | inline EntropyDecoder* EntropyDecoderFactory::newDecoder(InputBitStream& ibs, Context& ctx, short entropyType) 62 | { 63 | switch (entropyType) { 64 | // Each block is decoded separately 65 | // Rebuild the entropy decoder to reset block statistics 66 | case HUFFMAN_TYPE: 67 | return new HuffmanDecoder(ibs); 68 | 69 | case ANS0_TYPE: 70 | return new ANSRangeDecoder(ibs, 0); 71 | 72 | case ANS1_TYPE: 73 | return new ANSRangeDecoder(ibs, 1); 74 | 75 | case RANGE_TYPE: 76 | return new RangeDecoder(ibs); 77 | 78 | case FPAQ_TYPE: 79 | return new FPAQDecoder(ibs); 80 | 81 | case CM_TYPE: 82 | return new BinaryEntropyDecoder(ibs, new CMPredictor()); 83 | 84 | case TPAQ_TYPE: 85 | return new BinaryEntropyDecoder(ibs, new TPAQPredictor(&ctx)); 86 | 87 | case TPAQX_TYPE: 88 | return new BinaryEntropyDecoder(ibs, new TPAQPredictor(&ctx)); 89 | 90 | case NONE_TYPE: 91 | return new NullEntropyDecoder(ibs); 92 | 93 | default: 94 | std::string msg = "Unknown entropy codec type: '"; 95 | msg += char(entropyType); 96 | msg += '\''; 97 | throw std::invalid_argument(msg); 98 | } 99 | } 100 | 101 | 102 | inline const char* EntropyDecoderFactory::getName(short entropyType) 103 | { 104 | switch (entropyType) { 105 | case HUFFMAN_TYPE: 106 | return "HUFFMAN"; 107 | 108 | case ANS0_TYPE: 109 | return "ANS0"; 110 | 111 | case ANS1_TYPE: 112 | return "ANS1"; 113 | 114 | case RANGE_TYPE: 115 | return "RANGE"; 116 | 117 | case FPAQ_TYPE: 118 | return "FPAQ"; 119 | 120 | case CM_TYPE: 121 | return "CM"; 122 | 123 | case TPAQ_TYPE: 124 | return "TPAQ"; 125 | 126 | case TPAQX_TYPE: 127 | return "TPAQX"; 128 | 129 | case NONE_TYPE: 130 | return "NONE"; 131 | 132 | default: 133 | std::string msg = "Unknown entropy codec type: '"; 134 | msg += char(entropyType); 135 | msg += '\''; 136 | throw std::invalid_argument(msg); 137 | } 138 | } 139 | 140 | 141 | inline short EntropyDecoderFactory::getType(const char* str) 142 | { 143 | std::string name = str; 144 | transform(name.begin(), name.end(), name.begin(), ::toupper); 145 | 146 | if (name == "HUFFMAN") 147 | return HUFFMAN_TYPE; 148 | 149 | if (name == "ANS0") 150 | return ANS0_TYPE; 151 | 152 | if (name == "ANS1") 153 | return ANS1_TYPE; 154 | 155 | if (name == "FPAQ") 156 | return FPAQ_TYPE; 157 | 158 | if (name == "RANGE") 159 | return RANGE_TYPE; 160 | 161 | if (name == "CM") 162 | return CM_TYPE; 163 | 164 | if (name == "TPAQ") 165 | return TPAQ_TYPE; 166 | 167 | if (name == "TPAQX") 168 | return TPAQX_TYPE; 169 | 170 | if (name == "NONE") 171 | return NONE_TYPE; 172 | 173 | throw std::invalid_argument("Unsupported entropy codec type: '" + name + "'"); 174 | } 175 | } 176 | #endif 177 | 178 | -------------------------------------------------------------------------------- /src/entropy/EntropyEncoderFactory.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _EntropyEncoderFactory_ 18 | #define _EntropyEncoderFactory_ 19 | 20 | #include 21 | #include "../Context.hpp" 22 | #include "ANSRangeEncoder.hpp" 23 | #include "BinaryEntropyEncoder.hpp" 24 | #include "HuffmanEncoder.hpp" 25 | #include "NullEntropyEncoder.hpp" 26 | #include "RangeEncoder.hpp" 27 | #include "CMPredictor.hpp" 28 | #include "FPAQEncoder.hpp" 29 | #include "TPAQPredictor.hpp" 30 | 31 | 32 | namespace kanzi { 33 | 34 | class EntropyEncoderFactory { 35 | public: 36 | static const short NONE_TYPE = 0; // No compression 37 | static const short HUFFMAN_TYPE = 1; // Huffman 38 | static const short FPAQ_TYPE = 2; // Fast PAQ (order 0) 39 | static const short PAQ_TYPE = 3; // Obsolete 40 | static const short RANGE_TYPE = 4; // Range 41 | static const short ANS0_TYPE = 5; // Asymmetric Numerical System order 0 42 | static const short CM_TYPE = 6; // Context Model 43 | static const short TPAQ_TYPE = 7; // Tangelo PAQ 44 | static const short ANS1_TYPE = 8; // Asymmetric Numerical System order 1 45 | static const short TPAQX_TYPE = 9; // Tangelo PAQ Extra 46 | static const short RESERVED1 = 10; //Reserved 47 | static const short RESERVED2 = 11; //Reserved 48 | static const short RESERVED3 = 12; //Reserved 49 | static const short RESERVED4 = 13; //Reserved 50 | static const short RESERVED5 = 14; //Reserved 51 | static const short RESERVED6 = 15; //Reserved 52 | 53 | static EntropyEncoder* newEncoder(OutputBitStream& obs, Context& ctx, short entropyType); 54 | 55 | static const char* getName(short entropyType); 56 | 57 | static short getType(const char* name); 58 | }; 59 | 60 | 61 | inline EntropyEncoder* EntropyEncoderFactory::newEncoder(OutputBitStream& obs, Context& ctx, short entropyType) 62 | { 63 | switch (entropyType) { 64 | case HUFFMAN_TYPE: 65 | return new HuffmanEncoder(obs); 66 | 67 | case ANS0_TYPE: 68 | return new ANSRangeEncoder(obs, 0); 69 | 70 | case ANS1_TYPE: 71 | return new ANSRangeEncoder(obs, 1); 72 | 73 | case RANGE_TYPE: 74 | return new RangeEncoder(obs); 75 | 76 | case FPAQ_TYPE: 77 | return new FPAQEncoder(obs); 78 | 79 | case CM_TYPE: 80 | return new BinaryEntropyEncoder(obs, new CMPredictor()); 81 | 82 | case TPAQ_TYPE: 83 | return new BinaryEntropyEncoder(obs, new TPAQPredictor(&ctx)); 84 | 85 | case TPAQX_TYPE: 86 | return new BinaryEntropyEncoder(obs, new TPAQPredictor(&ctx)); 87 | 88 | case NONE_TYPE: 89 | return new NullEntropyEncoder(obs); 90 | 91 | default: 92 | std::string msg = "Unknown entropy codec type: '"; 93 | msg += char(entropyType); 94 | msg += '\''; 95 | throw std::invalid_argument(msg); 96 | } 97 | } 98 | 99 | 100 | inline const char* EntropyEncoderFactory::getName(short entropyType) 101 | { 102 | switch (entropyType) { 103 | case HUFFMAN_TYPE: 104 | return "HUFFMAN"; 105 | 106 | case ANS0_TYPE: 107 | return "ANS0"; 108 | 109 | case ANS1_TYPE: 110 | return "ANS1"; 111 | 112 | case RANGE_TYPE: 113 | return "RANGE"; 114 | 115 | case FPAQ_TYPE: 116 | return "FPAQ"; 117 | 118 | case CM_TYPE: 119 | return "CM"; 120 | 121 | case TPAQ_TYPE: 122 | return "TPAQ"; 123 | 124 | case TPAQX_TYPE: 125 | return "TPAQX"; 126 | 127 | case NONE_TYPE: 128 | return "NONE"; 129 | 130 | default: 131 | std::string msg = "Unknown entropy codec type: '"; 132 | msg += char(entropyType); 133 | msg += '\''; 134 | throw std::invalid_argument(msg); 135 | } 136 | } 137 | 138 | 139 | inline short EntropyEncoderFactory::getType(const char* str) 140 | { 141 | std::string name = str; 142 | transform(name.begin(), name.end(), name.begin(), ::toupper); 143 | 144 | if (name == "HUFFMAN") 145 | return HUFFMAN_TYPE; 146 | 147 | if (name == "ANS0") 148 | return ANS0_TYPE; 149 | 150 | if (name == "ANS1") 151 | return ANS1_TYPE; 152 | 153 | if (name == "FPAQ") 154 | return FPAQ_TYPE; 155 | 156 | if (name == "RANGE") 157 | return RANGE_TYPE; 158 | 159 | if (name == "CM") 160 | return CM_TYPE; 161 | 162 | if (name == "TPAQ") 163 | return TPAQ_TYPE; 164 | 165 | if (name == "TPAQX") 166 | return TPAQX_TYPE; 167 | 168 | if (name == "NONE") 169 | return NONE_TYPE; 170 | 171 | throw std::invalid_argument("Unsupported entropy codec type: '" + name + "'"); 172 | } 173 | } 174 | #endif 175 | 176 | -------------------------------------------------------------------------------- /src/entropy/EntropyUtils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _EntropyUtils_ 18 | #define _EntropyUtils_ 19 | 20 | #include "../InputBitStream.hpp" 21 | #include "../OutputBitStream.hpp" 22 | 23 | namespace kanzi 24 | { 25 | 26 | class EntropyUtils 27 | { 28 | private: 29 | static const int FULL_ALPHABET; 30 | static const int PARTIAL_ALPHABET; 31 | static const int ALPHABET_256; 32 | static const int ALPHABET_0; 33 | 34 | public: 35 | static const int INCOMPRESSIBLE_THRESHOLD; 36 | 37 | EntropyUtils() {} 38 | 39 | ~EntropyUtils() {} 40 | 41 | static int encodeAlphabet(OutputBitStream& obs, const uint alphabet[], int length, int count); 42 | 43 | static int decodeAlphabet(InputBitStream& ibs, uint alphabet[]); 44 | 45 | static int normalizeFrequencies(uint freqs[], uint alphabet[], int length, uint totalFreq, uint scale); 46 | 47 | static int writeVarInt(OutputBitStream& obs, uint32 val); 48 | 49 | static uint32 readVarInt(InputBitStream& ibs); 50 | }; 51 | 52 | } 53 | #endif 54 | 55 | -------------------------------------------------------------------------------- /src/entropy/ExpGolombDecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include "ExpGolombDecoder.hpp" 17 | 18 | using namespace kanzi; 19 | 20 | ExpGolombDecoder::ExpGolombDecoder(InputBitStream& bitstream, bool sgn) 21 | : _bitstream(bitstream), _signed(sgn) 22 | { 23 | } 24 | 25 | 26 | int ExpGolombDecoder::decode(byte block[], uint blkptr, uint len) 27 | { 28 | byte* buf = &block[blkptr]; 29 | const uint len8 = len & uint(-8); 30 | 31 | for (uint i = 0; i < len8; i += 8) { 32 | buf[i] = decodeByte(); 33 | buf[i+1] = decodeByte(); 34 | buf[i+2] = decodeByte(); 35 | buf[i+3] = decodeByte(); 36 | buf[i+4] = decodeByte(); 37 | buf[i+5] = decodeByte(); 38 | buf[i+6] = decodeByte(); 39 | buf[i+7] = decodeByte(); 40 | } 41 | 42 | for (uint i = len8; i < len; i++) 43 | buf[i] = decodeByte(); 44 | 45 | return len; 46 | } 47 | -------------------------------------------------------------------------------- /src/entropy/ExpGolombDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _ExpGolombDecoder_ 18 | #define _ExpGolombDecoder_ 19 | 20 | #include "../EntropyDecoder.hpp" 21 | 22 | namespace kanzi 23 | { 24 | 25 | class ExpGolombDecoder : public EntropyDecoder 26 | { 27 | private: 28 | InputBitStream& _bitstream; 29 | const bool _signed; 30 | 31 | void flush(); 32 | 33 | void _dispose() const {} 34 | 35 | public: 36 | ExpGolombDecoder(InputBitStream& bitstream, bool sign=true); 37 | 38 | ~ExpGolombDecoder() { _dispose(); } 39 | 40 | int decode(byte arr[], uint blkptr, uint len); 41 | 42 | InputBitStream& getBitStream() const { return _bitstream; } 43 | 44 | byte decodeByte(); 45 | 46 | void dispose() { _dispose(); } 47 | 48 | bool isSigned() const { return _signed; } 49 | }; 50 | 51 | 52 | inline byte ExpGolombDecoder::decodeByte() 53 | { 54 | if (_bitstream.readBit() == 1) 55 | return byte(0); 56 | 57 | uint log2 = 1; 58 | 59 | while (_bitstream.readBit() == 0) 60 | log2++; 61 | 62 | // Clamp. Do not attempt to detect a corrupted bitstream 63 | log2 &= 7; 64 | 65 | if (_signed == true) { 66 | // Decode signed: read value + sign 67 | int res = int(_bitstream.readBits(log2 + 1)); 68 | const int sgn = res & 1; 69 | res = (res >> 1) + (1 << log2) - 1; 70 | return byte((res - sgn) ^ -sgn); // res or -res 71 | } 72 | 73 | // Decode unsigned 74 | return byte((1 << log2) - 1 + _bitstream.readBits(log2)); 75 | } 76 | } 77 | #endif 78 | 79 | -------------------------------------------------------------------------------- /src/entropy/ExpGolombEncoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include "ExpGolombEncoder.hpp" 17 | 18 | using namespace kanzi; 19 | 20 | const int ExpGolombEncoder::CACHE[2][256] = { 21 | // Unsigned 22 | { 23 | 513, 1538, 1539, 2564, 2565, 2566, 2567, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 4624, 24 | 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 5664, 25 | 5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680, 26 | 5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 6720, 27 | 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730, 6731, 6732, 6733, 6734, 6735, 6736, 28 | 6737, 6738, 6739, 6740, 6741, 6742, 6743, 6744, 6745, 6746, 6747, 6748, 6749, 6750, 6751, 6752, 29 | 6753, 6754, 6755, 6756, 6757, 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, 6766, 6767, 6768, 30 | 6769, 6770, 6771, 6772, 6773, 6774, 6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 7808, 31 | 7809, 7808, 6783, 6782, 6781, 6780, 6779, 6778, 6777, 6776, 6775, 6774, 6773, 6772, 6771, 6770, 32 | 6769, 6768, 6767, 6766, 6765, 6764, 6763, 6762, 6761, 6760, 6759, 6758, 6757, 6756, 6755, 6754, 33 | 6753, 6752, 6751, 6750, 6749, 6748, 6747, 6746, 6745, 6744, 6743, 6742, 6741, 6740, 6739, 6738, 34 | 6737, 6736, 6735, 6734, 6733, 6732, 6731, 6730, 6729, 6728, 6727, 6726, 6725, 6724, 6723, 6722, 35 | 6721, 6720, 5695, 5694, 5693, 5692, 5691, 5690, 5689, 5688, 5687, 5686, 5685, 5684, 5683, 5682, 36 | 5681, 5680, 5679, 5678, 5677, 5676, 5675, 5674, 5673, 5672, 5671, 5670, 5669, 5668, 5667, 5666, 37 | 5665, 5664, 4639, 4638, 4637, 4636, 4635, 4634, 4633, 4632, 4631, 4630, 4629, 4628, 4627, 4626, 38 | 4625, 4624, 3599, 3598, 3597, 3596, 3595, 3594, 3593, 3592, 2567, 2566, 2565, 2564, 1539, 1538 39 | }, 40 | // Signed 41 | { 42 | 513, 2052, 2054, 3080, 3082, 3084, 3086, 4112, 4114, 4116, 4118, 4120, 4122, 4124, 4126, 5152, 43 | 5154, 5156, 5158, 5160, 5162, 5164, 5166, 5168, 5170, 5172, 5174, 5176, 5178, 5180, 5182, 6208, 44 | 6210, 6212, 6214, 6216, 6218, 6220, 6222, 6224, 6226, 6228, 6230, 6232, 6234, 6236, 6238, 6240, 45 | 6242, 6244, 6246, 6248, 6250, 6252, 6254, 6256, 6258, 6260, 6262, 6264, 6266, 6268, 6270, 7296, 46 | 7298, 7300, 7302, 7304, 7306, 7308, 7310, 7312, 7314, 7316, 7318, 7320, 7322, 7324, 7326, 7328, 47 | 7330, 7332, 7334, 7336, 7338, 7340, 7342, 7344, 7346, 7348, 7350, 7352, 7354, 7356, 7358, 7360, 48 | 7362, 7364, 7366, 7368, 7370, 7372, 7374, 7376, 7378, 7380, 7382, 7384, 7386, 7388, 7390, 7392, 49 | 7394, 7396, 7398, 7400, 7402, 7404, 7406, 7408, 7410, 7412, 7414, 7416, 7418, 7420, 7422, 8448, 50 | 8451, 8449, 7423, 7421, 7419, 7417, 7415, 7413, 7411, 7409, 7407, 7405, 7403, 7401, 7399, 7397, 51 | 7395, 7393, 7391, 7389, 7387, 7385, 7383, 7381, 7379, 7377, 7375, 7373, 7371, 7369, 7367, 7365, 52 | 7363, 7361, 7359, 7357, 7355, 7353, 7351, 7349, 7347, 7345, 7343, 7341, 7339, 7337, 7335, 7333, 53 | 7331, 7329, 7327, 7325, 7323, 7321, 7319, 7317, 7315, 7313, 7311, 7309, 7307, 7305, 7303, 7301, 54 | 7299, 7297, 6271, 6269, 6267, 6265, 6263, 6261, 6259, 6257, 6255, 6253, 6251, 6249, 6247, 6245, 55 | 6243, 6241, 6239, 6237, 6235, 6233, 6231, 6229, 6227, 6225, 6223, 6221, 6219, 6217, 6215, 6213, 56 | 6211, 6209, 5183, 5181, 5179, 5177, 5175, 5173, 5171, 5169, 5167, 5165, 5163, 5161, 5159, 5157, 57 | 5155, 5153, 4127, 4125, 4123, 4121, 4119, 4117, 4115, 4113, 3087, 3085, 3083, 3081, 2055, 2053 58 | } 59 | }; 60 | 61 | ExpGolombEncoder::ExpGolombEncoder(OutputBitStream& bitstream, bool sgn) 62 | : _bitstream(bitstream), _signed((sgn == true) ? 1 : 0) 63 | { 64 | } 65 | 66 | int ExpGolombEncoder::encode(const byte block[], uint blkptr, uint len) 67 | { 68 | const byte* buf = &block[blkptr]; 69 | const uint len8 = len & uint(-8); 70 | 71 | for (uint i = 0; i < len8; i += 8) { 72 | encodeByte(buf[i]); 73 | encodeByte(buf[i+1]); 74 | encodeByte(buf[i+2]); 75 | encodeByte(buf[i+3]); 76 | encodeByte(buf[i+4]); 77 | encodeByte(buf[i+5]); 78 | encodeByte(buf[i+6]); 79 | encodeByte(buf[i+7]); 80 | } 81 | 82 | for (uint i = len8; i < len; i++) 83 | encodeByte(buf[i]); 84 | 85 | return len; 86 | } 87 | 88 | -------------------------------------------------------------------------------- /src/entropy/ExpGolombEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _ExpGolombEncoder_ 18 | #define _ExpGolombEncoder_ 19 | 20 | #include "../EntropyEncoder.hpp" 21 | 22 | namespace kanzi 23 | { 24 | 25 | class ExpGolombEncoder : public EntropyEncoder 26 | { 27 | private: 28 | static const int CACHE[2][256]; 29 | OutputBitStream& _bitstream; 30 | const int _signed; 31 | 32 | void _dispose() const {} 33 | 34 | public: 35 | ExpGolombEncoder(OutputBitStream& bitstream, bool sign=true); 36 | 37 | ~ExpGolombEncoder() { _dispose(); } 38 | 39 | int encode(const byte block[], uint blkptr, uint len); 40 | 41 | OutputBitStream& getBitStream() const { return _bitstream; } 42 | 43 | void encodeByte(byte val); 44 | 45 | void dispose() { _dispose(); } 46 | 47 | bool isSigned() const { return _signed == 1; } 48 | }; 49 | 50 | 51 | inline void ExpGolombEncoder::encodeByte(byte val) 52 | { 53 | if (val == byte(0)) { 54 | // shortcut when input is 0 55 | _bitstream.writeBit(1); 56 | return; 57 | } 58 | 59 | const int emit = CACHE[_signed][uint8(val)]; 60 | _bitstream.writeBits(emit & 0x1FF, emit >> 9); 61 | } 62 | } 63 | #endif 64 | 65 | -------------------------------------------------------------------------------- /src/entropy/FPAQDecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include "FPAQDecoder.hpp" 19 | #include "EntropyUtils.hpp" 20 | 21 | using namespace kanzi; 22 | using namespace std; 23 | 24 | 25 | const uint64 FPAQDecoder::TOP = 0x00FFFFFFFFFFFFFF; 26 | const uint64 FPAQDecoder::MASK_0_56 = 0x00FFFFFFFFFFFFFF; 27 | const uint64 FPAQDecoder::MASK_0_32 = 0x00000000FFFFFFFF; 28 | const uint FPAQDecoder::DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; 29 | const uint FPAQDecoder::MAX_BLOCK_SIZE = 1 << 30; 30 | const int FPAQDecoder::PSCALE = 65536; 31 | 32 | 33 | FPAQDecoder::FPAQDecoder(InputBitStream& bitstream) 34 | : _bitstream(bitstream) 35 | { 36 | reset(); 37 | } 38 | 39 | FPAQDecoder::~FPAQDecoder() 40 | { 41 | _dispose(); 42 | } 43 | 44 | bool FPAQDecoder::reset() 45 | { 46 | _low = 0; 47 | _high = TOP; 48 | _current = 0; 49 | _ctx = 1; 50 | 51 | for (int i = 0; i < 4; i++) { 52 | for (int j = 0; j < 256; j++) 53 | _probs[i][j] = PSCALE >> 1; 54 | } 55 | 56 | _p = _probs[0]; 57 | return true; 58 | } 59 | 60 | int FPAQDecoder::decode(byte block[], uint blkptr, uint count) 61 | { 62 | if (count >= MAX_BLOCK_SIZE) 63 | throw invalid_argument("Invalid block size parameter (max is 1<<30)"); 64 | 65 | uint startChunk = blkptr; 66 | const uint end = blkptr + count; 67 | 68 | // Read bit array from bitstream and decode chunk 69 | while (startChunk < end) { 70 | const uint szBytes = uint(EntropyUtils::readVarInt(_bitstream)); 71 | 72 | // Sanity check 73 | if (szBytes >= 2 * count) 74 | return 0; 75 | 76 | const size_t bufSize = max(szBytes + (szBytes >> 2), 8192u); 77 | 78 | if (_buf.size() < bufSize) 79 | _buf.resize(bufSize); 80 | 81 | _current = _bitstream.readBits(56); 82 | 83 | if (bufSize > szBytes) 84 | memset(&_buf[szBytes], 0, bufSize - szBytes); 85 | 86 | _bitstream.readBits(&_buf[0], 8 * szBytes); 87 | _index = 0; 88 | const uint chunkSize = min(DEFAULT_CHUNK_SIZE, end - startChunk); 89 | const uint endChunk = startChunk + chunkSize; 90 | _p = _probs[0]; 91 | 92 | for (uint i = startChunk; i < endChunk; i++) { 93 | _ctx = 1; 94 | decodeBit(_p[_ctx]); 95 | decodeBit(_p[_ctx]); 96 | decodeBit(_p[_ctx]); 97 | decodeBit(_p[_ctx]); 98 | decodeBit(_p[_ctx]); 99 | decodeBit(_p[_ctx]); 100 | decodeBit(_p[_ctx]); 101 | decodeBit(_p[_ctx]); 102 | block[i] = byte(_ctx); 103 | _p = _probs[(_ctx & 0xFF) >> 6]; 104 | } 105 | 106 | startChunk = endChunk; 107 | } 108 | 109 | return count; 110 | } 111 | 112 | 113 | -------------------------------------------------------------------------------- /src/entropy/FPAQDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _FPAQDecoder_ 18 | #define _FPAQDecoder_ 19 | 20 | #include 21 | 22 | #include "../EntropyDecoder.hpp" 23 | #include "../Memory.hpp" 24 | #include "../SliceArray.hpp" 25 | 26 | namespace kanzi 27 | { 28 | 29 | // Derived from fpaq0r by Matt Mahoney & Alexander Ratushnyak. 30 | // See http://mattmahoney.net/dc/#fpaq0. 31 | // Simple (and fast) adaptive entropy bit coder 32 | class FPAQDecoder : public EntropyDecoder 33 | { 34 | private: 35 | static const uint64 TOP; 36 | static const uint64 MASK_0_56; 37 | static const uint64 MASK_0_32; 38 | static const uint DEFAULT_CHUNK_SIZE; 39 | static const uint MAX_BLOCK_SIZE; 40 | static const int PSCALE; 41 | 42 | uint64 _low; 43 | uint64 _high; 44 | uint64 _current; 45 | InputBitStream& _bitstream; 46 | std::vector _buf; 47 | int _index; 48 | uint16 _probs[4][256]; // probability of bit=1 49 | uint16* _p; // pointer to current prob 50 | int _ctx; // previous bits 51 | 52 | void _dispose() const {} 53 | 54 | int decodeBit(int pred = 2048); 55 | 56 | bool reset(); 57 | 58 | public: 59 | FPAQDecoder(InputBitStream& bitstream); 60 | 61 | ~FPAQDecoder(); 62 | 63 | int decode(byte block[], uint blkptr, uint count); 64 | 65 | InputBitStream& getBitStream() const { return _bitstream; } 66 | 67 | void dispose() { _dispose(); } 68 | 69 | void read(); 70 | }; 71 | 72 | 73 | inline int FPAQDecoder::decodeBit(int prob) 74 | { 75 | // Calculate interval split 76 | // Written in a way to maximize accuracy of multiplication/division 77 | const uint64 split = ((((_high - _low) >> 8) * uint64(prob)) >> 8) + _low; 78 | int bit; 79 | 80 | // Update probabilities 81 | if (split >= _current) { 82 | _high = split; 83 | _p[_ctx] -= uint16((_p[_ctx] - PSCALE + 64) >> 6); 84 | _ctx += (_ctx + 1); 85 | bit = 1; 86 | } 87 | else { 88 | _low = split + 1; 89 | _p[_ctx] -= uint16(_p[_ctx] >> 6); 90 | _ctx += _ctx; 91 | bit = 0; 92 | } 93 | 94 | // Read 32 bits from bitstream 95 | if (((_low ^ _high) >> 24) == 0) 96 | read(); 97 | 98 | return bit; 99 | } 100 | 101 | 102 | inline void FPAQDecoder::read() 103 | { 104 | _low = (_low << 32) & MASK_0_56; 105 | _high = ((_high << 32) | MASK_0_32) & MASK_0_56; 106 | const uint64 val = BigEndian::readInt32(&_buf[_index]) & MASK_0_32; 107 | _current = ((_current << 32) | val) & MASK_0_56; 108 | _index += 4; 109 | } 110 | } 111 | #endif 112 | 113 | -------------------------------------------------------------------------------- /src/entropy/FPAQEncoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include 18 | #include "FPAQEncoder.hpp" 19 | #include "EntropyUtils.hpp" 20 | 21 | using namespace kanzi; 22 | using namespace std; 23 | 24 | const uint64 FPAQEncoder::TOP = 0x00FFFFFFFFFFFFFF; 25 | const uint64 FPAQEncoder::MASK_0_24 = 0x0000000000FFFFFF; 26 | const uint64 FPAQEncoder::MASK_0_32 = 0x00000000FFFFFFFF; 27 | const uint FPAQEncoder::DEFAULT_CHUNK_SIZE = 4 * 1024 * 1024; 28 | const uint FPAQEncoder::MAX_BLOCK_SIZE = 1 << 30; 29 | const int FPAQEncoder::PSCALE = 65536; 30 | 31 | 32 | FPAQEncoder::FPAQEncoder(OutputBitStream& bitstream) 33 | : _bitstream(bitstream) 34 | { 35 | reset(); 36 | } 37 | 38 | FPAQEncoder::~FPAQEncoder() 39 | { 40 | _dispose(); 41 | } 42 | 43 | bool FPAQEncoder::reset() 44 | { 45 | _index = 0; 46 | _low = 0; 47 | _high = TOP; 48 | _disposed = false; 49 | 50 | for (int i = 0; i < 4; i++) { 51 | for (int j = 0; j < 256; j++) 52 | _probs[i][j] = PSCALE >> 1; 53 | } 54 | 55 | return true; 56 | } 57 | 58 | int FPAQEncoder::encode(const byte block[], uint blkptr, uint count) 59 | { 60 | if (count >= MAX_BLOCK_SIZE) 61 | throw invalid_argument("Invalid block size parameter (max is 1<<30)"); 62 | 63 | uint startChunk = blkptr; 64 | const uint end = blkptr + count; 65 | 66 | // Split block into chunks, encode chunk and write bit array to bitstream 67 | while (startChunk < end) { 68 | const uint chunkSize = min(DEFAULT_CHUNK_SIZE, end - startChunk); 69 | const size_t bufSize = max(chunkSize + (chunkSize >> 3), 1024u); 70 | 71 | if (_buf.size() < bufSize) 72 | _buf.resize(bufSize); 73 | 74 | _index = 0; 75 | const uint endChunk = startChunk + chunkSize; 76 | uint16* p = _probs[0]; 77 | 78 | for (uint i = startChunk; i < endChunk; i++) { 79 | const int val = int(block[i]); 80 | const int bits = val + 256; 81 | encodeBit(val & 0x80, p[1]); 82 | encodeBit(val & 0x40, p[bits >> 7]); 83 | encodeBit(val & 0x20, p[bits >> 6]); 84 | encodeBit(val & 0x10, p[bits >> 5]); 85 | encodeBit(val & 0x08, p[bits >> 4]); 86 | encodeBit(val & 0x04, p[bits >> 3]); 87 | encodeBit(val & 0x02, p[bits >> 2]); 88 | encodeBit(val & 0x01, p[bits >> 1]); 89 | p = _probs[val >> 6]; 90 | } 91 | 92 | EntropyUtils::writeVarInt(_bitstream, uint32(_index)); 93 | _bitstream.writeBits(&_buf[0], 8 * _index); 94 | startChunk += chunkSize; 95 | 96 | if (startChunk < end) 97 | _bitstream.writeBits(_low | MASK_0_24, 56); 98 | } 99 | 100 | return count; 101 | } 102 | 103 | void FPAQEncoder::_dispose() 104 | { 105 | if (_disposed == true) 106 | return; 107 | 108 | _disposed = true; 109 | _bitstream.writeBits(_low | MASK_0_24, 56); 110 | } 111 | -------------------------------------------------------------------------------- /src/entropy/FPAQEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _FPAQEncoder_ 18 | #define _FPAQEncoder_ 19 | 20 | #include 21 | 22 | #include "../EntropyEncoder.hpp" 23 | #include "../Memory.hpp" 24 | #include "../SliceArray.hpp" 25 | 26 | namespace kanzi 27 | { 28 | 29 | // Derived from fpaq0r by Matt Mahoney & Alexander Ratushnyak. 30 | // See http://mattmahoney.net/dc/#fpaq0. 31 | // Simple (and fast) adaptive entropy bit coder 32 | class FPAQEncoder : public EntropyEncoder 33 | { 34 | private: 35 | static const uint64 TOP; 36 | static const uint64 MASK_0_24; 37 | static const uint64 MASK_0_32; 38 | static const uint DEFAULT_CHUNK_SIZE; 39 | static const uint MAX_BLOCK_SIZE; 40 | static const int PSCALE; 41 | 42 | uint64 _low; 43 | uint64 _high; 44 | bool _disposed; 45 | OutputBitStream& _bitstream; 46 | std::vector _buf; 47 | int _index; 48 | uint16 _probs[4][256]; // probability of bit=1 49 | 50 | 51 | void encodeBit(int bit, uint16& prob); 52 | 53 | bool reset(); 54 | 55 | void _dispose(); 56 | 57 | public: 58 | FPAQEncoder(OutputBitStream& bitstream); 59 | 60 | ~FPAQEncoder(); 61 | 62 | int encode(const byte block[], uint blkptr, uint count); 63 | 64 | OutputBitStream& getBitStream() const { return _bitstream; } 65 | 66 | void dispose() { _dispose(); } 67 | 68 | void flush(); 69 | }; 70 | 71 | 72 | inline void FPAQEncoder::encodeBit(int bit, uint16& prob) 73 | { 74 | // Update probabilities 75 | if (bit == 0) { 76 | _low = _low + ((((_high - _low) >> 8) * uint64(prob)) >> 8) + 1; 77 | prob -= uint16(prob >> 6); 78 | } else { 79 | _high = _low + ((((_high - _low) >> 8) * uint64(prob)) >> 8); 80 | prob -= uint16((prob - PSCALE + 64) >> 6); 81 | } 82 | 83 | // Write unchanged first 32 bits to bitstream 84 | if (((_low ^ _high) >> 24) == 0) 85 | flush(); 86 | } 87 | 88 | inline void FPAQEncoder::flush() 89 | { 90 | BigEndian::writeInt32(&_buf[_index], int32(_high >> 24)); 91 | _index += 4; 92 | _low <<= 32; 93 | _high = (_high << 32) | MASK_0_32; 94 | } 95 | } 96 | #endif 97 | 98 | -------------------------------------------------------------------------------- /src/entropy/HuffmanCommon.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include "HuffmanCommon.hpp" 17 | 18 | using namespace kanzi; 19 | 20 | 21 | const int HuffmanCommon::LOG_MAX_CHUNK_SIZE = 14; 22 | const int HuffmanCommon::MAX_CHUNK_SIZE = 1 << LOG_MAX_CHUNK_SIZE; 23 | const int HuffmanCommon::MAX_SYMBOL_SIZE = 12; 24 | const int HuffmanCommon::BUFFER_SIZE = (MAX_SYMBOL_SIZE << 8) + 256; 25 | 26 | 27 | // Return the number of codes generated 28 | // codes and symbols are updated 29 | int HuffmanCommon::generateCanonicalCodes(const uint16 sizes[], uint16 codes[], uint symbols[], int count) 30 | { 31 | if (count == 0) 32 | return 0; 33 | 34 | if (count > 1) { 35 | int8 buf[BUFFER_SIZE] = { int8(0) }; 36 | 37 | for (int i = 0; i < count; i++) { 38 | const uint s = symbols[i]; 39 | 40 | if ((s > 255) || (sizes[s] > MAX_SYMBOL_SIZE)) 41 | return -1; 42 | 43 | buf[((sizes[s] - 1) << 8) | s] = int8(1); 44 | } 45 | 46 | for (int i = 0, n = 0; n < count; i++) { 47 | symbols[n] = i & 0xFF; 48 | n += buf[i]; 49 | } 50 | } 51 | 52 | int curLen = sizes[symbols[0]]; 53 | 54 | for (int i = 0, code = 0; i < count; i++) { 55 | const int s = symbols[i]; 56 | code <<= (sizes[s] - curLen); 57 | curLen = sizes[s]; 58 | codes[s] = uint16(code); 59 | code++; 60 | } 61 | 62 | return count; 63 | } 64 | -------------------------------------------------------------------------------- /src/entropy/HuffmanCommon.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _HuffmanCommon_ 18 | #define _HuffmanCommon_ 19 | 20 | #include "../types.hpp" 21 | 22 | 23 | namespace kanzi 24 | { 25 | 26 | class HuffmanCommon 27 | { 28 | public: 29 | static const int LOG_MAX_CHUNK_SIZE; 30 | static const int MAX_CHUNK_SIZE; 31 | static const int MAX_SYMBOL_SIZE; 32 | 33 | static int generateCanonicalCodes(const uint16 sizes[], uint16 codes[], uint ranks[], int count); 34 | 35 | private: 36 | static const int BUFFER_SIZE; 37 | }; 38 | 39 | } 40 | #endif 41 | 42 | -------------------------------------------------------------------------------- /src/entropy/HuffmanDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _HuffmanDecoder_ 18 | #define _HuffmanDecoder_ 19 | 20 | #include "HuffmanCommon.hpp" 21 | #include "../EntropyDecoder.hpp" 22 | 23 | 24 | namespace kanzi 25 | { 26 | 27 | // Implementation of a static Huffman coder. 28 | class HuffmanDecoder : public EntropyDecoder 29 | { 30 | public: 31 | HuffmanDecoder(InputBitStream& bitstream, int chunkSize = HuffmanCommon::MAX_CHUNK_SIZE); 32 | 33 | ~HuffmanDecoder() { _dispose(); delete[] _buffer; } 34 | 35 | int decode(byte block[], uint blkptr, uint len); 36 | 37 | InputBitStream& getBitStream() const { return _bitstream; } 38 | 39 | void dispose() { _dispose(); } 40 | 41 | private: 42 | static const int DECODING_BATCH_SIZE; 43 | static const int TABLE_MASK; 44 | 45 | InputBitStream& _bitstream; 46 | byte* _buffer; 47 | uint _bufferSize; 48 | uint16 _codes[256]; 49 | uint _alphabet[256]; 50 | uint16 _sizes[256]; 51 | uint16 _table[1 << 12]; // decoding table: code -> size, symbol 52 | int _chunkSize; 53 | 54 | int readLengths(); 55 | 56 | bool buildDecodingTable(int count); 57 | 58 | bool reset(); 59 | 60 | void _dispose() const {} 61 | }; 62 | 63 | 64 | } 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /src/entropy/HuffmanEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _HuffmanEncoder_ 18 | #define _HuffmanEncoder_ 19 | 20 | #include "HuffmanCommon.hpp" 21 | #include "../EntropyEncoder.hpp" 22 | 23 | 24 | namespace kanzi 25 | { 26 | 27 | // Implementation of a static Huffman encoder. 28 | // Uses in place generation of canonical codes instead of a tree 29 | class HuffmanEncoder : public EntropyEncoder 30 | { 31 | public: 32 | HuffmanEncoder(OutputBitStream& bitstream, int chunkSize = HuffmanCommon::MAX_CHUNK_SIZE); 33 | 34 | ~HuffmanEncoder() { _dispose(); delete[] _buffer; } 35 | 36 | int updateFrequencies(uint frequencies[]); 37 | 38 | int encode(const byte block[], uint blkptr, uint len); 39 | 40 | OutputBitStream& getBitStream() const { return _bitstream; } 41 | 42 | void dispose() { _dispose(); } 43 | 44 | 45 | private: 46 | OutputBitStream& _bitstream; 47 | uint16 _codes[256]; 48 | int _chunkSize; 49 | byte* _buffer; 50 | uint _bufferSize; 51 | 52 | int computeCodeLengths(uint16 sizes[], uint sranks[], int count) const; 53 | 54 | int limitCodeLengths(const uint alphabet[], uint freqs[], uint16 sizes[], uint sranks[], int count) const; 55 | 56 | void _dispose() const {} 57 | 58 | bool reset(); 59 | 60 | static void computeInPlaceSizesPhase1(uint data[], int n); 61 | 62 | static uint computeInPlaceSizesPhase2(uint data[], int n); 63 | }; 64 | 65 | } 66 | #endif 67 | 68 | -------------------------------------------------------------------------------- /src/entropy/NullEntropyDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _NullEntropyDecoder_ 18 | #define _NullEntropyDecoder_ 19 | 20 | #include "../EntropyDecoder.hpp" 21 | #include "../InputBitStream.hpp" 22 | 23 | namespace kanzi { 24 | 25 | // Null entropy decoder 26 | // Pass through that writes the data directly to the bitstream 27 | class NullEntropyDecoder FINAL : public EntropyDecoder { 28 | private: 29 | InputBitStream& _bitstream; 30 | 31 | 32 | public: 33 | NullEntropyDecoder(InputBitStream& bitstream); 34 | 35 | ~NullEntropyDecoder() {} 36 | 37 | int decode(byte block[], uint blkptr, uint len); 38 | 39 | byte decodeByte(); 40 | 41 | InputBitStream& getBitStream() const { return _bitstream; } 42 | 43 | void dispose() {} 44 | }; 45 | 46 | inline NullEntropyDecoder::NullEntropyDecoder(InputBitStream& bitstream) 47 | : _bitstream(bitstream) 48 | { 49 | } 50 | 51 | inline int NullEntropyDecoder::decode(byte block[], uint blkptr, uint count) 52 | { 53 | uint res = 0; 54 | 55 | while (count != 0) { 56 | const uint ckSize = (count < 1<<23) ? count : 1<<23; 57 | const uint r = uint(_bitstream.readBits(&block[blkptr], 8 * ckSize) >> 3); 58 | 59 | if (r == 0) 60 | break; 61 | 62 | res += r; 63 | blkptr += r; 64 | count -= r; 65 | } 66 | 67 | return res; 68 | } 69 | 70 | inline byte NullEntropyDecoder::decodeByte() 71 | { 72 | return byte(_bitstream.readBits(8)); 73 | } 74 | } 75 | #endif 76 | 77 | -------------------------------------------------------------------------------- /src/entropy/NullEntropyEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _NullEntropyEncoder_ 18 | #define _NullEntropyEncoder_ 19 | 20 | #include "../EntropyEncoder.hpp" 21 | #include "../OutputBitStream.hpp" 22 | 23 | namespace kanzi { 24 | 25 | // Null entropy encoder 26 | // Pass through that writes the data directly to the bitstream 27 | class NullEntropyEncoder FINAL : public EntropyEncoder { 28 | private: 29 | OutputBitStream& _bitstream; 30 | 31 | 32 | public: 33 | NullEntropyEncoder(OutputBitStream& bitstream); 34 | 35 | ~NullEntropyEncoder() {} 36 | 37 | int encode(const byte block[], uint blkptr, uint len); 38 | 39 | void encodeByte(byte val); 40 | 41 | OutputBitStream& getBitStream() const { return _bitstream; } 42 | 43 | void dispose() {} 44 | }; 45 | 46 | inline NullEntropyEncoder::NullEntropyEncoder(OutputBitStream& bitstream) 47 | : _bitstream(bitstream) 48 | { 49 | } 50 | 51 | inline int NullEntropyEncoder::encode(const byte block[], uint blkptr, uint count) 52 | { 53 | uint res = 0; 54 | 55 | while (count != 0) { 56 | const uint ckSize = (count < 1<<23) ? count : 1<<23; 57 | const uint w = uint(_bitstream.writeBits(&block[blkptr], 8 * ckSize) >> 3); 58 | 59 | if (w == 0) 60 | break; 61 | 62 | res += w; 63 | blkptr += w; 64 | count -= w; 65 | } 66 | 67 | return res; 68 | } 69 | 70 | inline void NullEntropyEncoder::encodeByte(byte val) 71 | { 72 | _bitstream.writeBits(uint64(val), 8); 73 | } 74 | } 75 | #endif 76 | 77 | -------------------------------------------------------------------------------- /src/entropy/RangeDecoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | 17 | #pragma once 18 | #ifndef _RangeDecoder_ 19 | #define _RangeDecoder_ 20 | 21 | #include "../EntropyDecoder.hpp" 22 | 23 | 24 | namespace kanzi 25 | { 26 | 27 | // Based on Order 0 range coder by Dmitry Subbotin itself derived from the algorithm 28 | // described by G.N.N Martin in his seminal article in 1979. 29 | // [G.N.N. Martin on the Data Recording Conference, Southampton, 1979] 30 | // Optimized for speed. 31 | 32 | class RangeDecoder : public EntropyDecoder { 33 | public: 34 | static const int DECODING_BATCH_SIZE; 35 | static const int DECODING_MASK; 36 | 37 | RangeDecoder(InputBitStream& bitstream, int chunkSize = DEFAULT_CHUNK_SIZE); 38 | 39 | ~RangeDecoder() { _dispose(); delete[] _f2s; } 40 | 41 | int decode(byte block[], uint blkptr, uint len); 42 | 43 | InputBitStream& getBitStream() const { return _bitstream; } 44 | 45 | void dispose() { _dispose(); } 46 | 47 | private: 48 | static const uint64 TOP_RANGE; 49 | static const uint64 BOTTOM_RANGE; 50 | static const uint64 RANGE_MASK; 51 | static const int DEFAULT_CHUNK_SIZE; 52 | static const int DEFAULT_LOG_RANGE; 53 | static const int MAX_CHUNK_SIZE; 54 | 55 | uint64 _code; 56 | uint64 _low; 57 | uint64 _range; 58 | uint _alphabet[256]; 59 | uint _freqs[256]; 60 | uint64 _cumFreqs[257]; 61 | short* _f2s; 62 | int _lenF2S; 63 | InputBitStream& _bitstream; 64 | uint _chunkSize; 65 | uint _shift; 66 | 67 | int decodeHeader(uint frequencies[]); 68 | 69 | byte decodeByte(); 70 | 71 | bool reset(); 72 | 73 | void _dispose() const {} 74 | }; 75 | 76 | } 77 | #endif 78 | 79 | -------------------------------------------------------------------------------- /src/entropy/RangeEncoder.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _RangeEncoder_ 18 | #define _RangeEncoder_ 19 | 20 | #include "../EntropyEncoder.hpp" 21 | 22 | 23 | namespace kanzi 24 | { 25 | 26 | // Based on Order 0 range coder by Dmitry Subbotin itself derived from the algorithm 27 | // described by G.N.N Martin in his seminal article in 1979. 28 | // [G.N.N. Martin on the Data Recording Conference, Southampton, 1979] 29 | // Optimized for speed. 30 | 31 | class RangeEncoder : public EntropyEncoder 32 | { 33 | public: 34 | RangeEncoder(OutputBitStream& bitstream, int chunkSize = DEFAULT_CHUNK_SIZE, int logRange=DEFAULT_LOG_RANGE); 35 | 36 | ~RangeEncoder() { _dispose(); } 37 | 38 | int encode(const byte block[], uint blkptr, uint len); 39 | 40 | OutputBitStream& getBitStream() const { return _bitstream; } 41 | 42 | void dispose() { _dispose(); } 43 | 44 | private: 45 | static const uint64 TOP_RANGE; 46 | static const uint64 BOTTOM_RANGE; 47 | static const uint64 RANGE_MASK; 48 | static const int DEFAULT_CHUNK_SIZE; 49 | static const int DEFAULT_LOG_RANGE; 50 | static const int MAX_CHUNK_SIZE; 51 | 52 | uint64 _low; 53 | uint64 _range; 54 | uint _alphabet[256]; 55 | uint _freqs[256]; 56 | uint64 _cumFreqs[257]; 57 | OutputBitStream& _bitstream; 58 | uint _chunkSize; 59 | uint _logRange; 60 | uint _shift; 61 | 62 | int rebuildStatistics(const byte block[], int start, int end, int lr); 63 | 64 | int updateFrequencies(uint frequencies[], int size, int lr); 65 | 66 | void encodeByte(byte b); 67 | 68 | bool encodeHeader(int alphabetSize, const uint alphabet[], const uint frequencies[], int lr) const; 69 | 70 | bool reset(); 71 | 72 | void _dispose() const {} 73 | }; 74 | 75 | } 76 | #endif 77 | 78 | -------------------------------------------------------------------------------- /src/entropy/TPAQPredictor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include "TPAQPredictor.hpp" 17 | 18 | using namespace kanzi; 19 | 20 | const int TPAQMixer::BEGIN_LEARN_RATE = 60 << 7; 21 | const int TPAQMixer::END_LEARN_RATE = 11 << 7; 22 | 23 | template<> 24 | const int TPAQPredictor::MAX_LENGTH = 88; 25 | template<> 26 | const int TPAQPredictor::BUFFER_SIZE = 64 * 1024 * 1024; 27 | template<> 28 | const int TPAQPredictor::HASH_SIZE = 16 * 1024 * 1024; 29 | template<> 30 | const int TPAQPredictor::HASH = 0x7FEB352D; 31 | template<> 32 | const int TPAQPredictor::MAX_LENGTH = 88; 33 | template<> 34 | const int TPAQPredictor::BUFFER_SIZE = 64 * 1024 * 1024; 35 | template<> 36 | const int TPAQPredictor::HASH_SIZE = 16 * 1024 * 1024; 37 | template<> 38 | const int TPAQPredictor::HASH = 0x7FEB352D; 39 | 40 | 41 | TPAQMixer::TPAQMixer() 42 | { 43 | _pr = 2048; 44 | _skew = 0; 45 | _w0 = _w1 = _w2 = _w3 = _w4 = _w5 = _w6 = _w7 = 32768; 46 | _p0 = _p1 = _p2 = _p3 = _p4 = _p5 = _p6 = _p7 = 0; 47 | _learnRate = BEGIN_LEARN_RATE; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/io/IOException.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _IOException_ 18 | #define _IOException_ 19 | 20 | #include 21 | #include 22 | #include "../Error.hpp" 23 | #include "../types.hpp" 24 | #include "../util/strings.hpp" 25 | 26 | 27 | namespace kanzi 28 | { 29 | 30 | class IOException : public std::runtime_error 31 | { 32 | private: 33 | int _code; 34 | 35 | public: 36 | IOException(const std::string& msg) : std::runtime_error(msg + ". Error code: " + TOSTR(Error::ERR_UNKNOWN)) 37 | { 38 | _code = Error::ERR_UNKNOWN; 39 | } 40 | 41 | IOException(const std::string& msg, int error) : std::runtime_error(msg + ". Error code: " + TOSTR(error)) 42 | { 43 | _code = error; 44 | } 45 | 46 | int error() const { return _code; } 47 | 48 | ~IOException() NOEXCEPT {} 49 | }; 50 | 51 | } 52 | #endif 53 | 54 | -------------------------------------------------------------------------------- /src/io/NullOutputStream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _NullOutputStream_ 18 | #define _NullOutputStream_ 19 | 20 | namespace kanzi 21 | { 22 | template > 23 | class basic_nullbuf : public std::basic_streambuf 24 | { 25 | typename traits::int_type overflow(typename traits::int_type c) 26 | { 27 | return traits::not_eof(c); 28 | } 29 | 30 | void close() {} 31 | }; 32 | 33 | template > 34 | class basic_onullstream : public std::basic_ostream 35 | { 36 | public: 37 | basic_onullstream() : 38 | std::basic_ios(&_sbuf), 39 | std::basic_ostream(&_sbuf) 40 | { 41 | this->init(&_sbuf); 42 | } 43 | 44 | private: 45 | basic_nullbuf _sbuf; 46 | }; 47 | 48 | typedef basic_onullstream NullOutputStream; 49 | } 50 | 51 | #endif 52 | 53 | -------------------------------------------------------------------------------- /src/transform/AliasCodec.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _AliasCodec_ 18 | #define _AliasCodec_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | namespace kanzi { 24 | typedef struct ssAlias 25 | { 26 | uint32 val; 27 | uint32 freq; 28 | 29 | ssAlias(uint32 v, uint32 f) : val(v), freq(f) { } 30 | 31 | friend bool operator< (ssAlias const& lhs, ssAlias const& rhs) { 32 | int r; 33 | return ((r = int(lhs.freq - rhs.freq)) != 0) ? r > 0 : lhs.val > rhs.val; 34 | } 35 | } sdAlias; 36 | 37 | 38 | // Simple codec replacing large symbols with small aliases whenever possible 39 | class AliasCodec FINAL : public Transform 40 | { 41 | 42 | public: 43 | AliasCodec() { _pCtx = nullptr; _onlyDNA = false; } 44 | 45 | AliasCodec(Context& ctx); 46 | 47 | ~AliasCodec() {} 48 | 49 | bool forward(SliceArray& src, SliceArray& dst, int length); 50 | 51 | bool inverse(SliceArray& src, SliceArray& dst, int length); 52 | 53 | 54 | // Required encoding output buffer size 55 | int getMaxEncodedLength(int srcLen) const 56 | { 57 | return srcLen + 1024; 58 | } 59 | 60 | private: 61 | static const int MIN_BLOCK_SIZE; 62 | 63 | Context* _pCtx; 64 | bool _onlyDNA; 65 | }; 66 | } 67 | 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /src/transform/BWT.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BWT_ 18 | #define _BWT_ 19 | 20 | #include "../concurrent.hpp" 21 | #include "../Context.hpp" 22 | #include "../Transform.hpp" 23 | #include "DivSufSort.hpp" 24 | 25 | 26 | namespace kanzi { 27 | // The Burrows-Wheeler Transform is a reversible transform based on 28 | // permutation of the data in the original message to reduce the entropy. 29 | 30 | // The initial text can be found here: 31 | // Burrows M and Wheeler D, [A block sorting lossless data compression algorithm] 32 | // Technical Report 124, Digital Equipment Corporation, 1994 33 | 34 | // See also Peter Fenwick, [Block sorting text compression - final report] 35 | // Technical Report 130, 1996 36 | 37 | // This implementation replaces the 'slow' sorting of permutation strings 38 | // with the construction of a suffix array (faster but more complex). 39 | // 40 | // E.G. 0123456789A 41 | // Source: mississippi\0 42 | // Suffixes: rank sorted 43 | // mississippi\0 0 -> 4 i\0 44 | // ississippi\0 1 -> 3 ippi\0 45 | // ssissippi\0 2 -> 10 issippi\0 46 | // sissippi\0 3 -> 8 ississippi\0 47 | // issippi\0 4 -> 2 mississippi\0 48 | // ssippi\0 5 -> 9 pi\0 49 | // sippi\0 6 -> 7 ppi\0 50 | // ippi\0 7 -> 1 sippi\0 51 | // ppi\0 8 -> 6 sissippi\0 52 | // pi\0 9 -> 5 ssippi\0 53 | // i\0 10 -> 0 ssissippi\0 54 | // Suffix array SA : 10 7 4 1 0 9 8 6 3 5 2 55 | // BWT[i] = input[SA[i]-1] => BWT(input) = ipssmpissii (+ primary index 5) 56 | // The suffix array and permutation vector are equal when the input is 0 terminated 57 | // The insertion of a guard is done internally and is entirely transparent. 58 | // 59 | // This implementation extends the canonical algorithm to use up to MAX_CHUNKS primary 60 | // indexes (based on input block size). Each primary index corresponds to a data chunk. 61 | // Chunks may be inverted concurrently. 62 | template 63 | class InverseBiPSIv2Task FINAL : public Task { 64 | private: 65 | uint* _data; 66 | uint* _buckets; 67 | uint16* _fastBits; 68 | int* _primaryIndexes; 69 | byte* _dst; 70 | int _total; 71 | int _start; 72 | int _ckSize; 73 | int _firstChunk; 74 | int _lastChunk; 75 | 76 | public: 77 | InverseBiPSIv2Task(uint* buf, uint* buckets, uint16* fastBits, byte* output, 78 | int* primaryIndexes, int total, int start, int ckSize, int firstChunk, int lastChunk); 79 | ~InverseBiPSIv2Task() {} 80 | 81 | T run(); 82 | }; 83 | 84 | class BWT FINAL : public Transform { 85 | 86 | private: 87 | static const int MAX_BLOCK_SIZE; 88 | static const int NB_FASTBITS; 89 | static const int BLOCK_SIZE_THRESHOLD1; 90 | static const int BLOCK_SIZE_THRESHOLD2; 91 | 92 | uint* _buffer; 93 | int* _sa; 94 | int _bufferSize; 95 | int _saSize; 96 | int _primaryIndexes[8]; 97 | DivSufSort _saAlgo; 98 | int _jobs; 99 | #ifdef CONCURRENCY_ENABLED 100 | ThreadPool* _pool; 101 | #endif 102 | 103 | bool inverseBiPSIv2(SliceArray& input, SliceArray& output, int count); 104 | 105 | bool inverseMergeTPSI(SliceArray& input, SliceArray& output, int count); 106 | 107 | public: 108 | static const int MASK_FASTBITS; 109 | 110 | BWT(int jobs = 1); 111 | 112 | BWT(Context& ctx); 113 | 114 | ~BWT() { delete[] _buffer; delete[] _sa; } 115 | 116 | bool forward(SliceArray& input, SliceArray& output, int length); 117 | 118 | bool inverse(SliceArray& input, SliceArray& output, int length); 119 | 120 | int getPrimaryIndex(int n) const { return _primaryIndexes[n]; } 121 | 122 | bool setPrimaryIndex(int n, int primaryIndex); 123 | 124 | int getMaxEncodedLength(int srcLen) const { return srcLen; } 125 | 126 | static int getBWTChunks(int size); 127 | }; 128 | 129 | 130 | inline int BWT::getBWTChunks(int size) 131 | { 132 | return (size < BLOCK_SIZE_THRESHOLD1) ? 1 : 8; 133 | } 134 | } 135 | #endif 136 | 137 | -------------------------------------------------------------------------------- /src/transform/BWTBlockCodec.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BWTBlockCodec_ 18 | #define _BWTBlockCodec_ 19 | 20 | #include "../transform/BWT.hpp" 21 | #include "../Context.hpp" 22 | 23 | 24 | namespace kanzi { 25 | 26 | // Utility class to en/de-code a BWT data block and its associated primary index(es) 27 | 28 | // BWT stream format: Header (mode + primary index(es)) | Data (n bytes) 29 | // mode (8 bits): xxxyyyzz 30 | // xxx: ignored 31 | // yyy: log(chunks) 32 | // zz: primary index size - 1 (in bytes) 33 | // primary indexes (chunks * (8|16|24|32 bits)) 34 | 35 | class BWTBlockCodec FINAL : public Transform { 36 | public: 37 | 38 | BWTBlockCodec(Context& ctx); 39 | 40 | ~BWTBlockCodec() { delete _pBWT; } 41 | 42 | bool forward(SliceArray& input, SliceArray& output, int length); 43 | 44 | bool inverse(SliceArray& input, SliceArray& output, int length); 45 | 46 | // Required encoding output buffer size 47 | int getMaxEncodedLength(int srcLen) const 48 | { 49 | return srcLen + 32 /* max header size */; 50 | } 51 | 52 | private: 53 | BWT* _pBWT; 54 | int _bsVersion; 55 | }; 56 | } 57 | #endif 58 | 59 | -------------------------------------------------------------------------------- /src/transform/BWTS.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _BWTS_ 18 | #define _BWTS_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | #include "DivSufSort.hpp" 23 | 24 | 25 | namespace kanzi 26 | { 27 | 28 | // Bijective version of the Burrows-Wheeler Transform 29 | // The main advantage over the regular BWT is that there is no need for a primary 30 | // index (hence the bijectivity). BWTS is about 10% slower than BWT. 31 | // Forward transform based on the code at https://code.google.com/p/mk-bwts/ 32 | // by Neal Burns and DivSufSort (port of libDivSufSort by Yuta Mori) 33 | 34 | class BWTS FINAL : public Transform { 35 | 36 | private: 37 | static const int MAX_BLOCK_SIZE; 38 | 39 | int* _buffer1; 40 | int* _buffer2; 41 | int _bufferSize; 42 | DivSufSort _saAlgo; 43 | 44 | int moveLyndonWordHead(int sa[], int isa[], const byte data[], 45 | int count, int start, int size, int rank) const; 46 | 47 | public: 48 | BWTS() 49 | { 50 | _buffer1 = new int[0]; 51 | _buffer2 = new int[0]; 52 | _bufferSize = 0; 53 | } 54 | 55 | BWTS(Context&) 56 | { 57 | _buffer1 = new int[0]; 58 | _buffer2 = new int[0]; 59 | _bufferSize = 0; 60 | } 61 | 62 | ~BWTS() 63 | { 64 | delete[] _buffer1; 65 | delete[] _buffer2; 66 | } 67 | 68 | bool forward(SliceArray& input, SliceArray& output, int length); 69 | 70 | bool inverse(SliceArray& input, SliceArray& output, int length); 71 | 72 | int getMaxEncodedLength(int srcLen) const { return srcLen; } 73 | }; 74 | 75 | } 76 | #endif 77 | 78 | -------------------------------------------------------------------------------- /src/transform/EXECodec.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _EXECodec_ 18 | #define _EXECodec_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | namespace kanzi 24 | { 25 | class EXECodec FINAL : public Transform { 26 | public: 27 | EXECodec() { _pCtx = nullptr; } 28 | 29 | EXECodec(Context& ctx) : _pCtx(&ctx) {} 30 | 31 | ~EXECodec() {} 32 | 33 | bool forward(SliceArray& source, SliceArray& destination, int length); 34 | 35 | bool inverse(SliceArray& source, SliceArray& destination, int length); 36 | 37 | int getMaxEncodedLength(int inputLen) const; 38 | 39 | private: 40 | 41 | static const byte X86_MASK_JUMP; 42 | static const byte X86_INSTRUCTION_JUMP; 43 | static const byte X86_INSTRUCTION_JCC; 44 | static const byte X86_TWO_BYTE_PREFIX; 45 | static const byte X86_MASK_JCC; 46 | static const byte X86_ESCAPE; 47 | static const byte NOT_EXE; 48 | static const byte X86; 49 | static const byte ARM64; 50 | static const byte MASK_DT; 51 | static const int X86_ADDR_MASK; 52 | static const int MASK_ADDRESS; 53 | static const int ARM_B_ADDR_MASK; 54 | static const int ARM_B_OPCODE_MASK; 55 | static const int ARM_B_ADDR_SGN_MASK; 56 | static const int ARM_OPCODE_B; 57 | static const int ARM_OPCODE_BL; 58 | static const int ARM_CB_REG_BITS; 59 | static const int ARM_CB_ADDR_MASK; 60 | static const int ARM_CB_ADDR_SGN_MASK; 61 | static const int ARM_CB_OPCODE_MASK; 62 | static const int ARM_OPCODE_CBZ; 63 | static const int ARM_OPCODE_CBNZ; 64 | static const int WIN_PE; 65 | static const uint16 WIN_X86_ARCH; 66 | static const uint16 WIN_AMD64_ARCH; 67 | static const uint16 WIN_ARM64_ARCH; 68 | static const int ELF_X86_ARCH; 69 | static const int ELF_AMD64_ARCH; 70 | static const int ELF_ARM64_ARCH; 71 | static const int MAC_AMD64_ARCH; 72 | static const int MAC_ARM64_ARCH; 73 | static const int MAC_MH_EXECUTE; 74 | static const int MAC_LC_SEGMENT; 75 | static const int MAC_LC_SEGMENT64; 76 | static const int MIN_BLOCK_SIZE; 77 | static const int MAX_BLOCK_SIZE; 78 | 79 | 80 | bool forwardARM(SliceArray& source, SliceArray& destination, int length, int codeStart, int codeEnd); 81 | 82 | bool forwardX86(SliceArray& source, SliceArray& destination, int length, int codeStart, int codeEnd); 83 | 84 | bool inverseARM(SliceArray& source, SliceArray& destination, int length); 85 | 86 | bool inverseX86(SliceArray& source, SliceArray& destination, int length); 87 | 88 | static byte detectType(const byte src[], int count, int& codeStart, int& codeEnd); 89 | 90 | static bool parseHeader(const byte src[], int count, uint magic, int& arch, int& codeStart, int& codeEnd); 91 | 92 | Context* _pCtx; 93 | }; 94 | 95 | 96 | inline int EXECodec::getMaxEncodedLength(int srcLen) const 97 | { 98 | // Allocate some extra buffer for incompressible data. 99 | return (srcLen <= 256) ? srcLen + 32 : srcLen + srcLen / 8; 100 | } 101 | 102 | } 103 | #endif 104 | 105 | -------------------------------------------------------------------------------- /src/transform/FSDCodec.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _FSDCodec_ 18 | #define _FSDCodec_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | 24 | // Fixed Step Delta codec 25 | // Decorrelate values separated by a constant distance (step) and encode residuals 26 | namespace kanzi { 27 | 28 | class FSDCodec FINAL : public Transform { 29 | 30 | public: 31 | FSDCodec() { _pCtx = nullptr; } 32 | 33 | FSDCodec(Context& ctx) : _pCtx(&ctx) {} 34 | 35 | ~FSDCodec() {} 36 | 37 | bool forward(SliceArray& src, SliceArray& dst, int length); 38 | 39 | bool inverse(SliceArray& src, SliceArray& dst, int length); 40 | 41 | // Required encoding output buffer size 42 | int getMaxEncodedLength(int srcLen) const 43 | { 44 | return srcLen + ((srcLen < 1024) ? 64 : srcLen >> 4); // limit expansion 45 | } 46 | 47 | private: 48 | static const int MIN_LENGTH; 49 | static const byte ESCAPE_TOKEN; 50 | static const byte DELTA_CODING; 51 | static const byte XOR_CODING; 52 | static const uint8 ZIGZAG1[256]; 53 | static const int8 ZIGZAG2[256]; 54 | 55 | Context* _pCtx; 56 | }; 57 | } 58 | #endif 59 | 60 | -------------------------------------------------------------------------------- /src/transform/NullTransform.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _NullTransform_ 18 | #define _NullTransform_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | 24 | namespace kanzi 25 | { 26 | 27 | class NullTransform FINAL : public Transform { 28 | public: 29 | NullTransform() {} 30 | NullTransform(Context&) {} 31 | ~NullTransform() {} 32 | 33 | bool forward(SliceArray& input, SliceArray& output, int length) { return doCopy(input, output, length); } 34 | 35 | bool inverse(SliceArray& input, SliceArray& output, int length) { return doCopy(input, output, length); } 36 | 37 | // Required encoding output buffer size 38 | int getMaxEncodedLength(int inputLen) const { return inputLen; } 39 | 40 | private: 41 | bool doCopy(SliceArray& input, SliceArray& output, int length) const; 42 | 43 | }; 44 | 45 | inline bool NullTransform::doCopy(SliceArray& input, SliceArray& output, int length) const 46 | { 47 | if (length == 0) 48 | return true; 49 | 50 | if (!SliceArray::isValid(input)) 51 | throw std::invalid_argument("Invalid input block"); 52 | 53 | if (!SliceArray::isValid(output)) 54 | throw std::invalid_argument("Invalid output block"); 55 | 56 | if (input._index + length > input._length) 57 | return false; 58 | 59 | if (output._index + length > output._length) 60 | return false; 61 | 62 | memcpy(&output._array[output._index], &input._array[input._index], size_t(length)); 63 | input._index += length; 64 | output._index += length; 65 | return true; 66 | } 67 | 68 | } 69 | #endif 70 | 71 | -------------------------------------------------------------------------------- /src/transform/RLT.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _RLT_ 18 | #define _RLT_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | namespace kanzi 24 | { 25 | 26 | // Implementation of an escaped RLE 27 | // Run length encoding: 28 | // RUN_LEN_ENCODE1 = 224 => RUN_LEN_ENCODE2 = 31*224 = 6944 29 | // 4 <= runLen < 224+4 -> 1 byte 30 | // 228 <= runLen < 6944+228 -> 2 bytes 31 | // 7172 <= runLen < 65535+7172 -> 3 bytes 32 | 33 | class RLT FINAL : public Transform 34 | { 35 | public: 36 | RLT() { _pCtx = nullptr; } 37 | RLT(Context& ctx) : _pCtx(&ctx) {} 38 | ~RLT() {} 39 | 40 | bool forward(SliceArray& pSrc, SliceArray& pDst, int length); 41 | 42 | bool inverse(SliceArray& pSrc, SliceArray& pDst, int length); 43 | 44 | int getMaxEncodedLength(int srcLen) const { return (srcLen <= 512) ? srcLen + 32 : srcLen; } 45 | 46 | private: 47 | static const int RUN_LEN_ENCODE1; 48 | static const int RUN_LEN_ENCODE2; 49 | static const int RUN_THRESHOLD; 50 | static const int MAX_RUN; 51 | static const int MAX_RUN4; 52 | static const int MIN_BLOCK_LENGTH; 53 | static const byte DEFAULT_ESCAPE; 54 | 55 | static int emitRunLength(byte dst[], int run, byte escape, byte val); 56 | 57 | Context* _pCtx; 58 | }; 59 | 60 | } 61 | #endif 62 | 63 | -------------------------------------------------------------------------------- /src/transform/SBRT.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #include 17 | #include "SBRT.hpp" 18 | 19 | using namespace kanzi; 20 | 21 | 22 | const int SBRT::MODE_MTF = 1; // alpha = 0 23 | const int SBRT::MODE_RANK = 2; // alpha = 1/2 24 | const int SBRT::MODE_TIMESTAMP = 3; // alpha = 1 25 | 26 | 27 | 28 | SBRT::SBRT(int mode) : 29 | _mask1((mode == MODE_TIMESTAMP) ? 0 : -1) 30 | , _mask2((mode == MODE_MTF) ? 0 : -1) 31 | , _shift((mode == MODE_RANK) ? 1 : 0) 32 | { 33 | if ((mode != MODE_MTF) && (mode != MODE_RANK) && (mode != MODE_TIMESTAMP)) 34 | throw std::invalid_argument("Invalid mode parameter"); 35 | } 36 | 37 | SBRT::SBRT(int mode, Context&) : 38 | _mask1((mode == MODE_TIMESTAMP) ? 0 : -1) 39 | , _mask2((mode == MODE_MTF) ? 0 : -1) 40 | , _shift((mode == MODE_RANK) ? 1 : 0) 41 | { 42 | if ((mode != MODE_MTF) && (mode != MODE_RANK) && (mode != MODE_TIMESTAMP)) 43 | throw std::invalid_argument("Invalid mode parameter"); 44 | } 45 | 46 | bool SBRT::forward(SliceArray& input, SliceArray& output, int count) 47 | { 48 | if (count == 0) 49 | return true; 50 | 51 | if (!SliceArray::isValid(input)) 52 | throw std::invalid_argument("SBRT: Invalid input block"); 53 | 54 | if (!SliceArray::isValid(output)) 55 | throw std::invalid_argument("SBRT: Invalid output block"); 56 | 57 | // Aliasing 58 | const byte* src = &input._array[input._index]; 59 | byte* dst = &output._array[output._index]; 60 | int p[256] = { 0 }; 61 | int q[256] = { 0 }; 62 | uint8 s2r[256]; 63 | uint8 r2s[256]; 64 | 65 | for (int i = 0; i < 256; i++) { 66 | s2r[i] = uint8(i); 67 | r2s[i] = uint8(i); 68 | } 69 | 70 | for (int i = 0; i < count; i++) { 71 | const uint8 c = uint8(src[i]); 72 | int r = int(s2r[c]); 73 | dst[i] = byte(r); 74 | const int qc = ((i & _mask1) + (p[c] & _mask2)) >> _shift; 75 | p[c] = i; 76 | q[c] = qc; 77 | 78 | // Move up symbol to correct rank 79 | while ((r > 0) && (q[r2s[r - 1]] <= qc)) { 80 | r2s[r] = r2s[r - 1]; 81 | s2r[r2s[r]] = uint8(r); 82 | r--; 83 | } 84 | 85 | r2s[r] = c; 86 | s2r[c] = uint8(r); 87 | } 88 | 89 | input._index += count; 90 | output._index += count; 91 | return true; 92 | } 93 | 94 | bool SBRT::inverse(SliceArray& input, SliceArray& output, int count) 95 | { 96 | if (count == 0) 97 | return true; 98 | 99 | if (!SliceArray::isValid(input)) 100 | throw std::invalid_argument("SBRT: Invalid input block"); 101 | 102 | if (!SliceArray::isValid(output)) 103 | throw std::invalid_argument("SBRT: Invalid output block"); 104 | 105 | // Aliasing 106 | const byte* src = &input._array[input._index]; 107 | byte* dst = &output._array[output._index]; 108 | int p[256] = { 0 }; 109 | int q[256] = { 0 }; 110 | uint8 r2s[256]; 111 | 112 | for (int i = 0; i < 256; i++) 113 | r2s[i] = uint8(i); 114 | 115 | for (int i = 0; i < count; i++) { 116 | int r = int(src[i]); 117 | const int c = int(r2s[r]); 118 | dst[i] = byte(r2s[r]); 119 | const int qc = ((i & _mask1) + (p[c] & _mask2)) >> _shift; 120 | p[c] = i; 121 | q[c] = qc; 122 | 123 | // Move up symbol to correct rank 124 | while ((r > 0) && (q[r2s[r - 1]] <= qc)) { 125 | r2s[r] = r2s[r - 1]; 126 | r--; 127 | } 128 | 129 | r2s[r] = uint8(c); 130 | } 131 | 132 | input._index += count; 133 | output._index += count; 134 | return true; 135 | } 136 | -------------------------------------------------------------------------------- /src/transform/SBRT.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _SBRT_ 18 | #define _SBRT_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | 24 | namespace kanzi 25 | { 26 | // Sort by Rank Transform is a family of transforms typically used after 27 | // a BWT to reduce the variance of the data prior to entropy coding. 28 | // SBR(alpha) is defined by sbr(x, alpha) = (1-alpha)*(t-w1(x,t)) + alpha*(t-w2(x,t)) 29 | // where x is an item in the data list, t is the current access time and wk(x,t) is 30 | // the k-th access time to x at time t (with 0 <= alpha <= 1). 31 | // See [Two new families of list update algorithms] by Frank Schulz for details. 32 | // SBR(0)= Move to Front Transform 33 | // SBR(1)= Time Stamp Transform 34 | // This code implements SBR(0), SBR(1/2) and SBR(1). Code derived from openBWT 35 | class SBRT FINAL : public Transform 36 | { 37 | public: 38 | static const int MODE_MTF; 39 | static const int MODE_RANK; 40 | static const int MODE_TIMESTAMP; 41 | 42 | SBRT(int mode); 43 | SBRT(int mode, Context&); 44 | ~SBRT() {} 45 | 46 | bool forward(SliceArray& input, SliceArray& output, int length); 47 | 48 | bool inverse(SliceArray& input, SliceArray& output, int length); 49 | 50 | int getMaxEncodedLength(int srcLen) const { return srcLen; } 51 | 52 | private: 53 | 54 | const int _mask1; 55 | const int _mask2; 56 | const int _shift; 57 | }; 58 | 59 | } 60 | #endif 61 | 62 | -------------------------------------------------------------------------------- /src/transform/SRT.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _SRT_ 18 | #define _SRT_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | namespace kanzi { 24 | 25 | // Sorted Rank Transform is typically used after a BWT to reduce the variance 26 | // of the data prior to entropy coding. 27 | 28 | class SRT FINAL : public Transform { 29 | public: 30 | SRT() {} 31 | SRT(Context&) {} 32 | ~SRT() {} 33 | 34 | bool forward(SliceArray& pSrc, SliceArray& pDst, int length); 35 | 36 | bool inverse(SliceArray& pSrc, SliceArray& pDst, int length); 37 | 38 | int getMaxEncodedLength(int srcLen) const { return srcLen + 1024 /* max header size */; } 39 | 40 | private: 41 | static int preprocess(const uint freqs[], uint8 symbols[]); 42 | 43 | static int encodeHeader(const uint freqs[], byte dst[]); 44 | 45 | static int decodeHeader(const byte src[], uint freqs[]); 46 | }; 47 | } 48 | #endif 49 | 50 | -------------------------------------------------------------------------------- /src/transform/UTFCodec.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _UTFCodec_ 18 | #define _UTFCodec_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | 24 | namespace kanzi 25 | { 26 | typedef struct ssUTF 27 | { 28 | uint32 val; 29 | uint32 freq; 30 | 31 | ssUTF(uint32 v, uint32 f) : val(v), freq(f) {} 32 | 33 | friend bool operator< (ssUTF const& lhs, ssUTF const& rhs) { 34 | int r; 35 | return ((r = int(lhs.freq - rhs.freq)) != 0) ? r > 0 : lhs.val > rhs.val; 36 | } 37 | } sdUTF; 38 | 39 | 40 | // UTF8 encoder/decoder 41 | class UTFCodec FINAL : public Transform { 42 | public: 43 | UTFCodec() { _pCtx = nullptr; } 44 | 45 | UTFCodec(Context& ctx) : _pCtx(&ctx) {} 46 | 47 | ~UTFCodec() {} 48 | 49 | bool forward(SliceArray& source, SliceArray& destination, int length); 50 | 51 | bool inverse(SliceArray& source, SliceArray& destination, int length); 52 | 53 | int getMaxEncodedLength(int srcLen) const { return srcLen + 8192; } 54 | 55 | private: 56 | 57 | static const int MIN_BLOCK_SIZE; 58 | static const int LEN_SEQ[256]; 59 | 60 | Context* _pCtx; 61 | 62 | static bool validate(const byte block[], int count); 63 | 64 | static int pack(const byte in[], uint32& out); 65 | 66 | static int unpack(uint32 in, byte out[]); 67 | }; 68 | 69 | 70 | inline int UTFCodec::pack(const byte in[], uint32& out) 71 | { 72 | int s; 73 | 74 | switch (int(in[0]) >> 4) { 75 | case 0: 76 | case 1: 77 | case 2: 78 | case 3: 79 | case 4: 80 | case 5: 81 | case 6: 82 | case 7: 83 | out = uint32(in[0]); 84 | s = 1; 85 | break; 86 | 87 | case 12: 88 | case 13: 89 | out = (1 << 19) | (uint32(in[0]) << 8) | uint32(in[1]); 90 | s = 2; 91 | break; 92 | 93 | case 14: 94 | out = (2 << 19) | ((uint32(in[0]) & 0x0F) << 12) | ((uint32(in[1]) & 0x3F) << 6) | (uint32(in[2]) & 0x3F); 95 | s = 3; 96 | break; 97 | 98 | case 15: 99 | out = (4 << 19) | ((uint32(in[0]) & 0x07) << 18) | ((uint32(in[1]) & 0x3F) << 12) | ((uint32(in[2]) & 0x3F) << 6) | (uint32(in[3]) & 0x3F); 100 | s = 4; 101 | break; 102 | 103 | default: 104 | out = 0; 105 | s = 0; // signal invalid value 106 | break; 107 | } 108 | 109 | return s; 110 | } 111 | 112 | 113 | inline int UTFCodec::unpack(uint32 in, byte out[]) 114 | { 115 | int s; 116 | 117 | switch (in >> 19) { 118 | case 0: 119 | out[0] = byte(in); 120 | s = 1; 121 | break; 122 | 123 | case 1: 124 | out[0] = byte(in >> 8); 125 | out[1] = byte(in); 126 | s = 2; 127 | break; 128 | 129 | case 2: 130 | out[0] = byte(((in >> 12) & 0x0F) | 0xE0); 131 | out[1] = byte(((in >> 6) & 0x3F) | 0x80); 132 | out[2] = byte((in & 0x3F) | 0x80); 133 | s = 3; 134 | break; 135 | 136 | case 4: 137 | case 5: 138 | case 6: 139 | case 7: 140 | out[0] = byte(((in >> 18) & 0x07) | 0xF0); 141 | out[1] = byte(((in >> 12) & 0x3F) | 0x80); 142 | out[2] = byte(((in >> 6) & 0x3F) | 0x80); 143 | out[3] = byte((in & 0x3F) | 0x80); 144 | s = 4; 145 | break; 146 | 147 | default: 148 | s = 0; // signal invalid value 149 | break; 150 | } 151 | 152 | return s; 153 | } 154 | } 155 | #endif 156 | 157 | -------------------------------------------------------------------------------- /src/transform/ZRLT.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _ZRLT_ 18 | #define _ZRLT_ 19 | 20 | #include "../Context.hpp" 21 | #include "../Transform.hpp" 22 | 23 | namespace kanzi 24 | { 25 | // Zero Run Length Encoding is a simple encoding algorithm by Wheeler 26 | // closely related to Run Length Encoding. The main difference is 27 | // that only runs of 0 values are processed. Also, the length is 28 | // encoded in a different way (each digit in a different byte) 29 | // This algorithm is well adapted to process post BWT/MTFT data. 30 | 31 | class ZRLT FINAL : public Transform 32 | { 33 | public: 34 | ZRLT() {} 35 | ZRLT(Context&) {} 36 | ~ZRLT() {} 37 | 38 | bool forward(SliceArray& pSrc, SliceArray& pDst, int length); 39 | 40 | bool inverse(SliceArray& pSrc, SliceArray& pDst, int length); 41 | 42 | // Required encoding output buffer size unknown => guess 43 | int getMaxEncodedLength(int srcLen) const { return srcLen; } 44 | }; 45 | 46 | } 47 | #endif 48 | 49 | -------------------------------------------------------------------------------- /src/util.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _util_ 18 | #define _util_ 19 | 20 | 21 | #include 22 | #include "types.hpp" 23 | 24 | 25 | 26 | // Ahem ... Visual Studio 27 | // This ostreambuf class is required because Microsoft cannot bother to implement 28 | // streambuf::pubsetbuf(). 29 | template 30 | struct ostreambuf : public std::basic_streambuf > 31 | { 32 | ostreambuf(T* buffer, std::streamsize length) { 33 | this->setp(buffer, &buffer[length]); 34 | } 35 | }; 36 | 37 | template 38 | struct istreambuf : public std::basic_streambuf > 39 | { 40 | istreambuf(T* buffer, std::streamsize length) { 41 | this->setg(buffer, buffer, &buffer[length]); 42 | } 43 | }; 44 | 45 | #endif 46 | 47 | -------------------------------------------------------------------------------- /src/util/Clock.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Clock_ 18 | #define _Clock_ 19 | 20 | 21 | #if __cplusplus >= 201103L || _MSC_VER >= 1700 22 | 23 | #include 24 | 25 | namespace kanzi 26 | { 27 | class Clock { 28 | private: 29 | std::chrono::steady_clock::time_point _start; 30 | std::chrono::steady_clock::time_point _stop; 31 | 32 | public: 33 | Clock() 34 | { 35 | start(); 36 | _stop = _start; 37 | } 38 | 39 | void start() 40 | { 41 | _start = std::chrono::steady_clock::now(); 42 | } 43 | 44 | void stop() 45 | { 46 | _stop = std::chrono::steady_clock::now(); 47 | } 48 | 49 | double elapsed() const 50 | { 51 | // In millisec 52 | return double(std::chrono::duration_cast(_stop - _start).count()); 53 | } 54 | }; 55 | } 56 | #else 57 | 58 | #include 59 | 60 | namespace kanzi 61 | { 62 | 63 | class Clock { 64 | private: 65 | clock_t _start; 66 | clock_t _stop; 67 | 68 | public: 69 | Clock() 70 | { 71 | start(); 72 | _stop = _start; 73 | } 74 | 75 | void start() 76 | { 77 | _start = clock(); 78 | } 79 | 80 | void stop() 81 | { 82 | _stop = clock(); 83 | } 84 | 85 | double elapsed() const 86 | { 87 | // In millisec 88 | return (_stop <= _start) ? 0.0 : double(_stop - _start) / CLOCKS_PER_SEC * 1000.0; 89 | } 90 | }; 91 | 92 | } 93 | #endif 94 | 95 | #endif 96 | 97 | -------------------------------------------------------------------------------- /src/util/Printer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _Printer_ 18 | #define _Printer_ 19 | 20 | 21 | #ifdef CONCURRENCY_ENABLED 22 | #include 23 | #endif 24 | 25 | namespace kanzi 26 | { 27 | 28 | // Thread safe printer 29 | class Printer 30 | { 31 | public: 32 | Printer(std::ostream& os) { _os = &os; } 33 | 34 | ~Printer() { 35 | try { 36 | _os->flush(); 37 | } 38 | catch (std::exception&) { 39 | // Ignore: best effort 40 | } 41 | } 42 | 43 | void print(const char* msg, bool print) { 44 | if ((print == true) && (msg != nullptr)) { 45 | #ifdef CONCURRENCY_ENABLED 46 | std::lock_guard lock(_mtx); 47 | #endif 48 | (*_os) << msg ; 49 | } 50 | } 51 | 52 | void println(const char* msg, bool print) { 53 | if ((print == true) && (msg != nullptr)) { 54 | #ifdef CONCURRENCY_ENABLED 55 | std::lock_guard lock(_mtx); 56 | #endif 57 | (*_os) << msg << std::endl; 58 | } 59 | } 60 | 61 | void print(const std::string& msg, bool print) { 62 | if (print == true) { 63 | #ifdef CONCURRENCY_ENABLED 64 | std::lock_guard lock(_mtx); 65 | #endif 66 | (*_os) << msg ; 67 | } 68 | } 69 | 70 | void println(const std::string& msg, bool print) { 71 | if (print == true) { 72 | #ifdef CONCURRENCY_ENABLED 73 | std::lock_guard lock(_mtx); 74 | #endif 75 | (*_os) << msg << std::endl; 76 | } 77 | } 78 | 79 | 80 | private: 81 | #ifdef CONCURRENCY_ENABLED 82 | static std::mutex _mtx; 83 | #endif 84 | std::ostream* _os; 85 | }; 86 | 87 | } 88 | #endif 89 | 90 | -------------------------------------------------------------------------------- /src/util/strings.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2011-2024 Frederic Langlet 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | you may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | */ 15 | 16 | #pragma once 17 | #ifndef _strings_ 18 | #define _strings_ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | 25 | 26 | #if __cplusplus < 201103L 27 | // to_string() not available before C++ 11 28 | template 29 | std::string to_string(T value) 30 | { 31 | std::ostringstream os; 32 | os << value; 33 | return os.str(); 34 | } 35 | 36 | #define TOSTR(v) to_string(v) 37 | #else 38 | #define TOSTR(v) std::to_string(v) 39 | #endif 40 | 41 | 42 | inline void to_binary(int num, char* buffer, int length) 43 | { 44 | for (int i = length - 2; i >= 0; i--) { 45 | buffer[i] = (num & 1) ? '1' : '0'; 46 | num >>= 1; 47 | } 48 | 49 | buffer[length - 1] = '\0'; 50 | } 51 | 52 | // trim from end of string (right) 53 | inline std::string& rtrim(std::string& s) 54 | { 55 | static const char* whitespaces = " \t\f\v\n\r"; 56 | std::size_t pos = s.find_last_not_of(whitespaces); 57 | 58 | if (pos != std::string::npos) 59 | s.erase(pos + 1); 60 | 61 | return s; 62 | } 63 | 64 | // trim from beginning of string (left) 65 | inline std::string& ltrim(std::string& s) 66 | { 67 | static const char* whitespaces = " \t\f\v\n\r"; 68 | std::size_t pos = s.find_first_not_of(whitespaces); 69 | 70 | if (pos != std::string::npos) 71 | s.erase(0, pos); 72 | 73 | return s; 74 | } 75 | 76 | // trim from both ends of string (right then left) 77 | inline std::string& trim(std::string& s) 78 | { 79 | return ltrim(rtrim(s)); 80 | } 81 | 82 | inline void tokenize(const std::string& str, std::vector& v, char token) 83 | { 84 | std::istringstream ss(str); 85 | std::string s; 86 | 87 | while (getline(ss, s, token)) 88 | v.push_back(s); 89 | } 90 | 91 | #endif 92 | 93 | --------------------------------------------------------------------------------